📄 parser.c.svn-base
字号:
/* * parser.c : an XML 1.0 parser, namespaces and validity support are mostly * implemented on top of the SAX interfaces * * References: * The XML specification: * http://www.w3.org/TR/REC-xml * Original 1.0 version: * http://www.w3.org/TR/1998/REC-xml-19980210 * XML second edition working draft * http://www.w3.org/TR/2000/WD-xml-2e-20000814 * * Okay this is a big file, the parser core is around 7000 lines, then it * is followed by the progressive parser top routines, then the various * high level APIs to call the parser and a few miscellaneous functions. * A number of helper functions and deprecated ones have been moved to * parserInternals.c to reduce this file size. * As much as possible the functions are associated with their relative * production in the XML specification. A few productions defining the * different ranges of character are actually implanted either in * parserInternals.h or parserInternals.c * The DOM tree build is realized from the default SAX callbacks in * the module SAX.c. * The routines doing the validation checks are in valid.c and called either * from the SAX callbacks or as standalone functions using a preparsed * document. * * See Copyright for the status of this software. * * daniel@veillard.com */#define IN_LIBXML#include "libxml.h"#if defined(WIN32) && !defined (__CYGWIN__)#define XML_DIR_SEP '\\'#else#define XML_DIR_SEP '/'#endif#include <stdlib.h>#include <string.h>#include <stdarg.h>#include <libxml/xmlmemory.h>#include <libxml/threads.h>#include <libxml/globals.h>#include <libxml/tree.h>#include <libxml/parser.h>#include <libxml/parserInternals.h>#include <libxml/valid.h>#include <libxml/entities.h>#include <libxml/xmlerror.h>#include <libxml/encoding.h>#include <libxml/xmlIO.h>#include <libxml/uri.h>#ifdef LIBXML_CATALOG_ENABLED#include <libxml/catalog.h>#endif#ifdef HAVE_CTYPE_H#include <ctype.h>#endif#ifdef HAVE_STDLIB_H#include <stdlib.h>#endif#ifdef HAVE_SYS_STAT_H#include <sys/stat.h>#endif#ifdef HAVE_FCNTL_H#include <fcntl.h>#endif#ifdef HAVE_UNISTD_H#include <unistd.h>#endif#ifdef HAVE_ZLIB_H#include <zlib.h>#endif/** * xmlParserMaxDepth: * * arbitrary depth limit for the XML documents that we allow to * process. This is not a limitation of the parser but a safety * boundary feature. */unsigned int xmlParserMaxDepth = 1024;#define SAX2 1#define XML_PARSER_BIG_BUFFER_SIZE 300#define XML_PARSER_BUFFER_SIZE 100#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"/* * List of XML prefixed PI allowed by W3C specs */static const char *xmlW3CPIs[] = { "xml-stylesheet", NULL};/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str);static xmlParserErrorsxmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, xmlSAXHandlerPtr sax, void *user_data, int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list);#ifdef LIBXML_LEGACY_ENABLEDstatic voidxmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, xmlNodePtr lastNode);#endif /* LIBXML_LEGACY_ENABLED */static xmlParserErrorsxmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, const xmlChar *string, void *user_data, xmlNodePtr *lst);/************************************************************************ * * * Some factorized error routines * * * ************************************************************************//** * xmlErrAttributeDup: * @ctxt: an XML parser context * @prefix: the attribute prefix * @localname: the attribute localname * * Handle a redefinition of attribute error */static voidxmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, const xmlChar * localname){ if ((ctxt != NULL) && (ctxt->disableSAX != 0) && (ctxt->instate == XML_PARSER_EOF)) return; ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; if (prefix == NULL) __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, ctxt->errNo, XML_ERR_FATAL, NULL, 0, (const char *) localname, NULL, NULL, 0, 0, "Attribute %s redefined\n", localname); else __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, ctxt->errNo, XML_ERR_FATAL, NULL, 0, (const char *) prefix, (const char *) localname, NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, localname); ctxt->wellFormed = 0; if (ctxt->recovery == 0) ctxt->disableSAX = 1;}/** * xmlFatalErr: * @ctxt: an XML parser context * @error: the error number * @extra: extra information string * * Handle a fatal parser error, i.e. violating Well-Formedness constraints */static voidxmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info){ const char *errmsg; if ((ctxt != NULL) && (ctxt->disableSAX != 0) && (ctxt->instate == XML_PARSER_EOF)) return; switch (error) { case XML_ERR_INVALID_HEX_CHARREF: errmsg = "CharRef: invalid hexadecimal value\n"; break; case XML_ERR_INVALID_DEC_CHARREF: errmsg = "CharRef: invalid decimal value\n"; break; case XML_ERR_INVALID_CHARREF: errmsg = "CharRef: invalid value\n"; break; case XML_ERR_INTERNAL_ERROR: errmsg = "internal error"; break; case XML_ERR_PEREF_AT_EOF: errmsg = "PEReference at end of document\n"; break; case XML_ERR_PEREF_IN_PROLOG: errmsg = "PEReference in prolog\n"; break; case XML_ERR_PEREF_IN_EPILOG: errmsg = "PEReference in epilog\n"; break; case XML_ERR_PEREF_NO_NAME: errmsg = "PEReference: no name\n"; break; case XML_ERR_PEREF_SEMICOL_MISSING: errmsg = "PEReference: expecting ';'\n"; break; case XML_ERR_ENTITY_LOOP: errmsg = "Detected an entity reference loop\n"; break; case XML_ERR_ENTITY_NOT_STARTED: errmsg = "EntityValue: \" or ' expected\n"; break; case XML_ERR_ENTITY_PE_INTERNAL: errmsg = "PEReferences forbidden in internal subset\n"; break; case XML_ERR_ENTITY_NOT_FINISHED: errmsg = "EntityValue: \" or ' expected\n"; break; case XML_ERR_ATTRIBUTE_NOT_STARTED: errmsg = "AttValue: \" or ' expected\n"; break; case XML_ERR_LT_IN_ATTRIBUTE: errmsg = "Unescaped '<' not allowed in attributes values\n"; break; case XML_ERR_LITERAL_NOT_STARTED: errmsg = "SystemLiteral \" or ' expected\n"; break; case XML_ERR_LITERAL_NOT_FINISHED: errmsg = "Unfinished System or Public ID \" or ' expected\n"; break; case XML_ERR_MISPLACED_CDATA_END: errmsg = "Sequence ']]>' not allowed in content\n"; break; case XML_ERR_URI_REQUIRED: errmsg = "SYSTEM or PUBLIC, the URI is missing\n"; break; case XML_ERR_PUBID_REQUIRED: errmsg = "PUBLIC, the Public Identifier is missing\n"; break; case XML_ERR_HYPHEN_IN_COMMENT: errmsg = "Comment must not contain '--' (double-hyphen)\n"; break; case XML_ERR_PI_NOT_STARTED: errmsg = "xmlParsePI : no target name\n"; break; case XML_ERR_RESERVED_XML_NAME: errmsg = "Invalid PI name\n"; break; case XML_ERR_NOTATION_NOT_STARTED: errmsg = "NOTATION: Name expected here\n"; break; case XML_ERR_NOTATION_NOT_FINISHED: errmsg = "'>' required to close NOTATION declaration\n"; break; case XML_ERR_VALUE_REQUIRED: errmsg = "Entity value required\n"; break; case XML_ERR_URI_FRAGMENT: errmsg = "Fragment not allowed"; break; case XML_ERR_ATTLIST_NOT_STARTED: errmsg = "'(' required to start ATTLIST enumeration\n"; break; case XML_ERR_NMTOKEN_REQUIRED: errmsg = "NmToken expected in ATTLIST enumeration\n"; break; case XML_ERR_ATTLIST_NOT_FINISHED: errmsg = "')' required to finish ATTLIST enumeration\n"; break; case XML_ERR_MIXED_NOT_STARTED: errmsg = "MixedContentDecl : '|' or ')*' expected\n"; break; case XML_ERR_PCDATA_REQUIRED: errmsg = "MixedContentDecl : '#PCDATA' expected\n"; break; case XML_ERR_ELEMCONTENT_NOT_STARTED: errmsg = "ContentDecl : Name or '(' expected\n"; break; case XML_ERR_ELEMCONTENT_NOT_FINISHED: errmsg = "ContentDecl : ',' '|' or ')' expected\n"; break; case XML_ERR_PEREF_IN_INT_SUBSET: errmsg = "PEReference: forbidden within markup decl in internal subset\n"; break; case XML_ERR_GT_REQUIRED: errmsg = "expected '>'\n"; break; case XML_ERR_CONDSEC_INVALID: errmsg = "XML conditional section '[' expected\n"; break; case XML_ERR_EXT_SUBSET_NOT_FINISHED: errmsg = "Content error in the external subset\n"; break; case XML_ERR_CONDSEC_INVALID_KEYWORD: errmsg = "conditional section INCLUDE or IGNORE keyword expected\n"; break; case XML_ERR_CONDSEC_NOT_FINISHED: errmsg = "XML conditional section not closed\n"; break; case XML_ERR_XMLDECL_NOT_STARTED: errmsg = "Text declaration '<?xml' required\n"; break; case XML_ERR_XMLDECL_NOT_FINISHED: errmsg = "parsing XML declaration: '?>' expected\n"; break; case XML_ERR_EXT_ENTITY_STANDALONE: errmsg = "external parsed entities cannot be standalone\n"; break; case XML_ERR_ENTITYREF_SEMICOL_MISSING: errmsg = "EntityRef: expecting ';'\n"; break; case XML_ERR_DOCTYPE_NOT_FINISHED: errmsg = "DOCTYPE improperly terminated\n"; break; case XML_ERR_LTSLASH_REQUIRED: errmsg = "EndTag: '</' not found\n"; break; case XML_ERR_EQUAL_REQUIRED: errmsg = "expected '='\n"; break; case XML_ERR_STRING_NOT_CLOSED: errmsg = "String not closed expecting \" or '\n"; break; case XML_ERR_STRING_NOT_STARTED: errmsg = "String not started expecting ' or \"\n"; break; case XML_ERR_ENCODING_NAME: errmsg = "Invalid XML encoding name\n"; break; case XML_ERR_STANDALONE_VALUE: errmsg = "standalone accepts only 'yes' or 'no'\n"; break; case XML_ERR_DOCUMENT_EMPTY: errmsg = "Document is empty\n"; break; case XML_ERR_DOCUMENT_END: errmsg = "Extra content at the end of the document\n"; break; case XML_ERR_NOT_WELL_BALANCED: errmsg = "chunk is not well balanced\n"; break; case XML_ERR_EXTRA_CONTENT: errmsg = "extra content at the end of well balanced chunk\n"; break; case XML_ERR_VERSION_MISSING: errmsg = "Malformed declaration expecting version\n"; break;#if 0 case: errmsg = "\n"; break;#endif default: errmsg = "Unregistered error message\n"; } ctxt->errNo = error; __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg, info); ctxt->wellFormed = 0; if (ctxt->recovery == 0) ctxt->disableSAX = 1;}/** * xmlFatalErrMsg: * @ctxt: an XML parser context * @error: the error number * @msg: the error message * * Handle a fatal parser error, i.e. violating Well-Formedness constraints */static voidxmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *msg){ if ((ctxt != NULL) && (ctxt->disableSAX != 0) && (ctxt->instate == XML_PARSER_EOF)) return; ctxt->errNo = error; __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg); ctxt->wellFormed = 0; if (ctxt->recovery == 0) ctxt->disableSAX = 1;}/** * xmlWarningMsg: * @ctxt: an XML parser context * @error: the error number * @msg: the error message * @str1: extra data * @str2: extra data * * Handle a warning. */static voidxmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *msg, const xmlChar *str1, const xmlChar *str2){ xmlStructuredErrorFunc schannel = NULL; if ((ctxt != NULL) && (ctxt->disableSAX != 0) && (ctxt->instate == XML_PARSER_EOF)) return; ctxt->errNo = error; if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) schannel = ctxt->sax->serror; __xmlRaiseError(schannel, (ctxt->sax) ? ctxt->sax->warning : NULL, ctxt->userData, ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING, NULL, 0, (const char *) str1, (const char *) str2, NULL, 0, 0, msg, (const char *) str1, (const char *) str2);}/** * xmlValidityError: * @ctxt: an XML parser context * @error: the error number * @msg: the error message * @str1: extra data * * Handle a validity error. */static voidxmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *msg, const xmlChar *str1){ xmlStructuredErrorFunc schannel = NULL; if ((ctxt != NULL) && (ctxt->disableSAX != 0) && (ctxt->instate == XML_PARSER_EOF)) return; ctxt->errNo = error; if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) schannel = ctxt->sax->serror; __xmlRaiseError(schannel, ctxt->vctxt.error, ctxt->vctxt.userData, ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR, NULL, 0, (const char *) str1,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -