📄 xml_saxy_parser.php
字号:
<?php/*** SAXY is a non-validating, but lightweight and fast SAX parser for PHP, modelled on the Expat parser* @package saxy-xmlparser* @subpackage saxy-xmlparser-main* @version 1.0* @copyright (C) 2004 John Heinstein. All rights reserved* @license http://www.gnu.org/copyleft/lesser.html LGPL License* @author John Heinstein <johnkarl@nbnet.nb.ca>* @link http://www.engageinteractive.com/saxy/ SAXY Home Page* SAXY is Free Software**/if (!defined('SAXY_INCLUDE_PATH')) { define('SAXY_INCLUDE_PATH', (dirname(__FILE__) . "/"));}/** current version of SAXY */define ('SAXY_VERSION', '1.0');/** default XML namespace */define ('SAXY_XML_NAMESPACE', 'http://www.w3.org/xml/1998/namespace');/** saxy parse state, before prolog is encountered */define('SAXY_STATE_PROLOG_NONE', 0);/** saxy parse state, in processing instruction */define('SAXY_STATE_PROLOG_PROCESSINGINSTRUCTION', 1);/** saxy parse state, an exclamation mark has been encountered */define('SAXY_STATE_PROLOG_EXCLAMATION', 2);/** saxy parse state, in DTD */define('SAXY_STATE_PROLOG_DTD', 3);/** saxy parse state, an inline DTD */define('SAXY_STATE_PROLOG_INLINEDTD', 4);/** saxy parse state, a comment */define('SAXY_STATE_PROLOG_COMMENT', 5);/** saxy parse state, processing main document */define('SAXY_STATE_PARSING', 6);/** saxy parse state, processing comment in main document */define('SAXY_STATE_PARSING_COMMENT', 7);//SAXY error codes; same as EXPAT error codes/** no error */define('SAXY_XML_ERROR_NONE', 0);/** out of memory error */define('SAXY_XML_ERROR_NO_MEMORY', 1);/** syntax error */define('SAXY_XML_ERROR_SYNTAX', 2);/** no elements in document */define('SAXY_XML_ERROR_NO_ELEMENTS', 3);/** invalid token encountered error */define('SAXY_XML_ERROR_INVALID_TOKEN', 4);/** unclosed token error */define('SAXY_XML_ERROR_UNCLOSED_TOKEN', 5);/** partial character error */define('SAXY_XML_ERROR_PARTIAL_CHAR', 6);/** mismatched tag error */define('SAXY_XML_ERROR_TAG_MISMATCH', 7);/** duplicate attribute error */define('SAXY_XML_ERROR_DUPLICATE_ATTRIBUTE', 8);/** junk after document element error */define('SAXY_XML_ERROR_JUNK_AFTER_DOC_ELEMENT', 9);/** parameter enitity reference error */define('SAXY_XML_ERROR_PARAM_ENTITY_REF', 10);/** undefined entity error */define('SAXY_XML_ERROR_UNDEFINED_ENTITY', 11);/** recursive entity error */define('SAXY_XML_ERROR_RECURSIVE_ENTITY_REF', 12);/** asynchronous entity error */define('SAXY_XML_ERROR_ASYNC_ENTITY', 13);/** bad character reference error */define('SAXY_XML_ERROR_BAD_CHAR_REF', 14);/** binary entity reference error */define('SAXY_XML_ERROR_BINARY_ENTITY_REF', 15);/** attribute external entity error */define('SAXY_XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF', 16);/** misplaced processing instruction error */define('SAXY_XML_ERROR_MISPLACED_XML_PI', 17);/** unknown encoding error */define('SAXY_XML_ERROR_UNKNOWN_ENCODING', 18);/** incorrect encoding error */define('SAXY_XML_ERROR_INCORRECT_ENCODING', 19);/** unclosed CDATA Section error */define('SAXY_XML_ERROR_UNCLOSED_CDATA_SECTION', 20);/** external entity handling error */define('SAXY_XML_ERROR_EXTERNAL_ENTITY_HANDLING', 21);require_once(SAXY_INCLUDE_PATH . 'xml_saxy_shared.php');/*** The SAX Parser class** @package saxy-xmlparser* @subpackage saxy-xmlparser-main* @author John Heinstein <johnkarl@nbnet.nb.ca>*/class SAXY_Parser extends SAXY_Parser_Base { /** @var int The current error number */ var $errorCode = SAXY_XML_ERROR_NONE; /** @var Object A reference to the DocType event handler */ var $DTDHandler = null; /** @var Object A reference to the Comment event handler */ var $commentHandler = null; /** @var Object A reference to the Processing Instruction event handler */ var $processingInstructionHandler = null; /** @var Object A reference to the Start Namespace Declaration event handler */ var $startNamespaceDeclarationHandler = null; /** @var Object A reference to the End Namespace Declaration event handler */ var $endNamespaceDeclarationHandler = null; /** @var boolean True if SAXY takes namespaces into consideration when parsing element tags */ var $isNamespaceAware = false; /** @var array An indexed array containing associative arrays of namespace prefixes mapped to their namespace URIs */ var $namespaceMap = array(); /** @var array A stack used to determine when an end namespace event should be fired */ var $namespaceStack = array(); /** @var array A track used to track the uri of the current default namespace */ var $defaultNamespaceStack = array(); /** @var array A stack containing tag names of unclosed elements */ var $elementNameStack = array(); /** * Constructor for SAX parser */ function SAXY_Parser() { $this->SAXY_Parser_Base(); $this->state = SAXY_STATE_PROLOG_NONE; } //SAXY_Parser /** * Sets a reference to the handler for the DocType event * @param mixed A reference to the DocType handler */ function xml_set_doctype_handler($handler) { $this->DTDHandler =& $handler; } //xml_set_doctype_handler /** * Sets a reference to the handler for the Comment event * @param mixed A reference to the Comment handler */ function xml_set_comment_handler($handler) { $this->commentHandler =& $handler; } //xml_set_comment_handler /** * Sets a reference to the handler for the Processing Instruction event * @param mixed A reference to the Processing Instruction handler */ function xml_set_processing_instruction_handler($handler) { $this->processingInstructionHandler =& $handler; } //xml_set_processing_instruction_handler /** * Sets a reference to the handler for the Start Namespace Declaration event * @param mixed A reference to the Start Namespace Declaration handler */ function xml_set_start_namespace_decl_handler($handler) { $this->startNamespaceDeclarationHandler =& $handler; } //xml_set_start_namespace_decl_handler /** * Sets a reference to the handler for the End Namespace Declaration event * @param mixed A reference to the Start Namespace Declaration handler */ function xml_set_end_namespace_decl_handler($handler) { $this->endNamespaceDeclarationHandler =& $handler; } //xml_set_end_namespace_decl_handler /** * Specifies whether SAXY is namespace sensitive * @param boolean True if SAXY is namespace aware */ function setNamespaceAwareness($isNamespaceAware) { $this->isNamespaceAware =& $isNamespaceAware; } //setNamespaceAwareness /** * Returns the current version of SAXY * @return Object The current version of SAXY */ function getVersion() { return SAXY_VERSION; } //getVersion /** * Processes the xml prolog, doctype, and any other nodes that exist outside of the main xml document * @param string The xml text to be processed * @return string The preprocessed xml text */ function preprocessXML($xmlText) { //strip prolog $xmlText = trim($xmlText); $startChar = -1; $total = strlen($xmlText); for ($i = 0; $i < $total; $i++) {// $currentChar = $xmlText{$i}; $currentChar = substr($xmlText, $i, 1); switch ($this->state) { case SAXY_STATE_PROLOG_NONE: if ($currentChar == '<') { $nextChar = $xmlText{($i + 1)}; if ($nextChar == '?') { $this->state = SAXY_STATE_PROLOG_PROCESSINGINSTRUCTION; $this->charContainer = ''; } else if ($nextChar == '!') { $this->state = SAXY_STATE_PROLOG_EXCLAMATION; $this->charContainer .= $currentChar; break; } else { $this->charContainer = ''; $startChar = $i; $this->state = SAXY_STATE_PARSING; return (substr($xmlText, $startChar)); } } break; case SAXY_STATE_PROLOG_EXCLAMATION: if ($currentChar == 'D') { $this->state = SAXY_STATE_PROLOG_DTD; $this->charContainer .= $currentChar; } else if ($currentChar == '-') { $this->state = SAXY_STATE_PROLOG_COMMENT; $this->charContainer = ''; } else { //will trap ! and add it $this->charContainer .= $currentChar; } break; case SAXY_STATE_PROLOG_PROCESSINGINSTRUCTION: if ($currentChar == '>') { $this->state = SAXY_STATE_PROLOG_NONE; $this->parseProcessingInstruction($this->charContainer); $this->charContainer = ''; } else { $this->charContainer .= $currentChar; } break; case SAXY_STATE_PROLOG_COMMENT: if ($currentChar == '>') { $this->state = SAXY_STATE_PROLOG_NONE; $this->parseComment($this->charContainer); $this->charContainer = ''; } else if ($currentChar == '-') { if ((($xmlText{($i + 1)} == '-') && ($xmlText{($i + 2)} == '>')) || ($xmlText{($i + 1)} == '>') || (($xmlText{($i - 1)} == '-') && ($xmlText{($i - 2)}== '!')) ){ //do nothing } else { $this->charContainer .= $currentChar; } } else { $this->charContainer .= $currentChar; } break; case SAXY_STATE_PROLOG_DTD: if ($currentChar == '[') { $this->charContainer .= $currentChar; $this->state = SAXY_STATE_PROLOG_INLINEDTD; } else if ($currentChar == '>') { $this->state = SAXY_STATE_PROLOG_NONE; if ($this->DTDHandler != null) { $this->fireDTDEvent($this->charContainer . $currentChar); } $this->charContainer = ''; } else { $this->charContainer .= $currentChar; } break; case SAXY_STATE_PROLOG_INLINEDTD: $previousChar = $xmlText{($i - 1)}; if (($currentChar == '>') && ($previousChar == ']')){ $this->state = SAXY_STATE_PROLOG_NONE; if ($this->DTDHandler != null) { $this->fireDTDEvent($this->charContainer . $currentChar); } $this->charContainer = ''; } else { $this->charContainer .= $currentChar; } break; } } } //preprocessXML /** * The controlling method for the parsing process * @param string The xml text to be processed * @return boolean True if parsing is successful */ function parse ($xmlText) { $xmlText = $this->preprocessXML($xmlText); $total = strlen($xmlText); for ($i = 0; $i < $total; $i++) {// $currentChar = $xmlText{$i}; $currentChar = substr($xmlText, $i, 1); switch ($this->state) { case SAXY_STATE_PARSING: switch ($currentChar) { case '<': if (substr($this->charContainer, 0, SAXY_CDATA_LEN) == SAXY_SEARCH_CDATA) { $this->charContainer .= $currentChar; } else { $this->parseBetweenTags($this->charContainer); $this->charContainer = ''; } break; case '-': if (($xmlText{($i - 1)} == '-') && ($xmlText{($i - 2)} == '!') && ($xmlText{($i - 3)} == '<')) { $this->state = SAXY_STATE_PARSING_COMMENT; $this->charContainer = ''; } else { $this->charContainer .= $currentChar; } break; case '>': if ((substr($this->charContainer, 0, SAXY_CDATA_LEN) == SAXY_SEARCH_CDATA) && !(($this->getCharFromEnd($this->charContainer, 0) == ']') && ($this->getCharFromEnd($this->charContainer, 1) == ']'))) { $this->charContainer .= $currentChar; } else { $this->parseTag($this->charContainer); $this->charContainer = ''; } break; default: $this->charContainer .= $currentChar; } break; case SAXY_STATE_PARSING_COMMENT: switch ($currentChar) { case '>': if (($xmlText{($i - 1)} == '-') && ($xmlText{($i - 2)} == '-')) { $this->fireCommentEvent(substr($this->charContainer, 0, (strlen($this->charContainer) - 2))); $this->charContainer = ''; $this->state = SAXY_STATE_PARSING; } else { $this->charContainer .= $currentChar; } break; default: $this->charContainer .= $currentChar; } break; } } return ($this->errorCode == 0); } //parse /** * Parses an element tag * @param string The interior text of the element tag */ function parseTag($tagText) { $tagText = trim($tagText); $firstChar = $tagText{0}; $myAttributes = array(); switch ($firstChar) { case '/': $tagName = substr($tagText, 1); $this->_fireEndElementEvent($tagName); break; case '!': $upperCaseTagText = strtoupper($tagText); if (strpos($upperCaseTagText, SAXY_SEARCH_CDATA) !== false) { //CDATA Section $total = strlen($tagText); $openBraceCount = 0; $textNodeText = ''; for ($i = 0; $i < $total; $i++) {// $currentChar = $tagText{$i}; $currentChar = substr($tagText, $i, 1); if (($currentChar == ']') && ($tagText{($i + 1)} == ']')) { break; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -