<?php
/*
A collection of PHP classes to manipulate MODS files.

Released through http://bibliophile.sourceforge.net under the GPL licence.
Do whatever you like with this -- some credit to the author(s) would be appreciated.

The XML parsing is indebted to code by Dante Lorenso at:
http://www.devarticles.com/c/a/PHP/Converting-XML-Into-a-PHP-Data-Structure/
My modifications have mainly been to prune empty array elements in the nodal array.

If you make improvements, please consider contacting the administrators at bibliophile.sourceforge.net so that your improvements can be added to the release package.

Mark Grimshaw 2004
http://bibliophile.sourceforge.net
*/


/*
* Endnote XML format has no newlines
*/

// For a quick command-line test (php -f PARSEMODSENTRIES.php) after installation, uncomment these lines:
/*********************************
$parse = new ENDNOTEPARSEXML();
$parse->openFIle("test.xml~");
$parse->extractEntries();
$entries = $parse->returnArrays();
$parse->closeFile();

foreach($entries as $node => $array)
{
	print "$node: "; print_r($array); print "\n";
}
print "\n\n";
exit;
*********************************/

class ENDNOTEPARSEXML
{
	function ENDNOTEPARSEXML()
	{
		$this->entries = array();
		$this->lastPart = FALSE;
		$this->version8 = FALSE;
		include_once("core/messages/UTF8.php");
		$this->utf8 = new UTF8();
	}
// Open file
	function openFile($file)
	{
		if(!is_file($file))
			die;
		$this->fid = fopen ($file,'r');
	}
// Close file
	function closeFile()
	{
		fclose($this->fid);
	}
// Grab a complete XML entry
	function getEntry($entries)
	{
// entries now elements in $entries array
		foreach($entries as $entry)
		{
// create root node in node array
			$this->nodeStack = array();
			$this->startElement(NULL, 'ROOT', array());
// Remove <style>...</style> from string
			$entry = preg_replace("/<style.*>(.*)<\/style>/Ui", "$1", $entry);
// complete $xmlString and parse it
			$xmlString = "<record>" . $entry . "</record>";
			$this->entries[] = $this->parse($xmlString);
		}
	}
// This method starts the whole process
	function extractEntries()
	{
            	while(!feof($this->fid))
            	{
			if(preg_match_all("/<record>(.*)<\/record>/Ui", trim(fgets($this->fid)), $startEntry))
				$this->getEntry($startEntry[1]);
           	}
	}
	function parse($xmlString="")
	{
// set up a new XML parser to do all the work for us
		$this->parser = xml_parser_create('UTF-8');
		xml_set_object($this->parser, $this);
		xml_parser_set_option($this->parser, XML_OPTION_CASE_FOLDING, false);
		xml_set_element_handler($this->parser, "startElement", "endElement");
		xml_set_character_data_handler($this->parser, "characterData");
// parse the data
		xml_parse($this->parser, $xmlString);
		xml_parser_free($this->parser);
// recover the root node from the node stack
		$rnode = array_pop($this->nodeStack);
// return the root node _ELEMENTS array
		return($rnode["_ELEMENTS"][0]);
	}
// create a node
	function startElement($parser, $name, $attrs)
	{
		$this->noCollect = FALSE;
// We don't require some elements here.
// 'Periodical' field is duplicated in the 'titles' field as 'secondary-title' (ditto 'full-title')
//		if(($name == 'database') || ($name == 'source-app') || ($name == 'rec-number') || ($name == 'periodical') 
//			 || ($name == 'full-title'))
		if(($name == 'database') || ($name == 'rec-number') || ($name == 'periodical') 
			 || ($name == 'full-title'))
		{
			$this->noCollect = TRUE;
			return;
		}
		$node = array();
		$node["_NAME"] = $name;
// Endnote attributes are the library, file path, font size etc. - notneeded here so don't clutter array.
//		if(!empty($attrs) && ($name == "ref-type"))
		if(!empty($attrs))
		{
// Endnote 8 (which has a _very_ different XML format to previous versions *&^$*&^!) identifies itself as:
// <source-app name="EndNote" version="8.0">EndNote</source-app>.
// Need to capture this for ENDNOTEIMPORTSTAGE2
			if(($name == 'source-app') && ($attrs['version'] >= 8.0))
				$this->version8 = TRUE;
			if($name == 'ref-type')
				$node["_ATTRIBUTES"] = $attrs;
		}
		$node["_DATA"] = "";
		$node["_ELEMENTS"] = array();
// add the new node to the end of the node stack
		array_push($this->nodeStack, $node);
	}
	function endElement($parser, $name)
	{
		$this->noCollect = FALSE;
//		if(($name == 'database') || ($name == 'source-app') || ($name == 'rec-number') || ($name == 'periodical') 
//			 || ($name == 'full-title'))
		if(($name == 'database') || ($name == 'rec-number') || ($name == 'periodical') 
			 || ($name == 'full-title'))
		{
			$this->noCollect = TRUE;
			return;
		}
// pop this element off the node stack.....
		$node = array_pop($this->nodeStack);
		$data = trim($node["_DATA"]);
// (Don't store empty DATA strings and empty ELEMENTS arrays)
		if($data)
			$node["_DATA"] = $data;
		else
			unset($node["_DATA"]);
		if(empty($node["_ELEMENTS"]))
			unset($node["_ELEMENTS"]);
// and add it as an element of the last node in the stack...
		$lastnode = count($this->nodeStack);
		array_push($this->nodeStack[$lastnode - 1]["_ELEMENTS"], $node);
	}
// Collect the data onto the end of the current chars.
	function characterData($parser, $data)
	{
		if($this->noCollect)
			return;
// add this data to the last node in the stack...
		$lastnode = count($this->nodeStack);
		$this->nodeStack[$lastnode - 1]["_DATA"] .= $this->version8 ? utf8_decode($data) : $data;
	}
// Return arrays of entries to the calling process.
	function returnArrays()
	{
		if(empty($this->entries))
			$this->entries = FALSE;
		return $this->entries;
	}
}
?>
