📄 parser.c
字号:
/* * parser.c : an XML 1.0 non-verifying parser * * See Copyright for the status of this software. * * Daniel.Veillard@w3.org */#ifdef WIN32#include "win32config.h"#else#include "config.h"#endif#include <stdio.h>#include <string.h> /* for memset() only */#ifdef HAVE_CTYPE_H#include <ctype.h>#endif#ifdef HAVE_STDLIB_H#include <stdlib.h>#endif#ifdef HAVE_SYS_STAT_H#include <sys/stat.h>#endif#ifdef HAVE_FCNTL_H#include <fcntl.h>#endif#ifdef HAVE_UNISTD_H#include <unistd.h>#endif#ifdef HAVE_ZLIB_H#include <zlib.h>#endif#include <libxml/xmlmemory.h>#include <libxml/tree.h>#include <libxml/parser.h>#include <libxml/entities.h>#include <libxml/encoding.h>#include <libxml/valid.h>#include <libxml/parserInternals.h>#include <libxml/xmlIO.h>#include "xml-error.h"#define XML_PARSER_BIG_BUFFER_SIZE 1000#define XML_PARSER_BUFFER_SIZE 100const char *xmlParserVersion = LIBXML_VERSION_STRING;int xmlGetWarningsDefaultValue = 1;/* * List of XML prefixed PI allowed by W3C specs */const char *xmlW3CPIs[] = { "xml-stylesheet", NULL};void xmlParserHandleReference(xmlParserCtxtPtr ctxt);void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str);/************************************************************************ * * * Input handling functions for progressive parsing * * * ************************************************************************//* #define DEBUG_INPUT *//* #define DEBUG_STACK *//* #define DEBUG_PUSH */#define INPUT_CHUNK 250/* we need to keep enough input to show errors in context */#define LINE_LEN 80#ifdef DEBUG_INPUT#define CHECK_BUFFER(in) check_buffer(in)void check_buffer(xmlParserInputPtr in) { if (in->base != in->buf->buffer->content) { fprintf(stderr, "xmlParserInput: base mismatch problem\n"); } if (in->cur < in->base) { fprintf(stderr, "xmlParserInput: cur < base problem\n"); } if (in->cur > in->base + in->buf->buffer->use) { fprintf(stderr, "xmlParserInput: cur > base + use problem\n"); } fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n", (int) in, (int) in->buf->buffer->content, in->cur - in->base, in->buf->buffer->use, in->buf->buffer->size);}#else#define CHECK_BUFFER(in) #endif/** * xmlParserInputRead: * @in: an XML parser input * @len: an indicative size for the lookahead * * This function refresh the input for the parser. It doesn't try to * preserve pointers to the input buffer, and discard already read data * * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the * end of this entity */intxmlParserInputRead(xmlParserInputPtr in, int len) { int ret; int used; int index;#ifdef DEBUG_INPUT fprintf(stderr, "Read\n");#endif if (in->buf == NULL) return(-1); if (in->base == NULL) return(-1); if (in->cur == NULL) return(-1); if (in->buf->buffer == NULL) return(-1); CHECK_BUFFER(in); used = in->cur - in->buf->buffer->content; ret = xmlBufferShrink(in->buf->buffer, used); if (ret > 0) { in->cur -= ret; in->consumed += ret; } ret = xmlParserInputBufferRead(in->buf, len); if (in->base != in->buf->buffer->content) { /* * the buffer has been realloced */ index = in->cur - in->base; in->base = in->buf->buffer->content; in->cur = &in->buf->buffer->content[index]; } CHECK_BUFFER(in); return(ret);}/** * xmlParserInputGrow: * @in: an XML parser input * @len: an indicative size for the lookahead * * This function increase the input for the parser. It tries to * preserve pointers to the input buffer, and keep already read data * * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the * end of this entity */intxmlParserInputGrow(xmlParserInputPtr in, int len) { int ret; int index;#ifdef DEBUG_INPUT fprintf(stderr, "Grow\n");#endif if (in->buf == NULL) return(-1); if (in->base == NULL) return(-1); if (in->cur == NULL) return(-1); if (in->buf->buffer == NULL) return(-1); CHECK_BUFFER(in); index = in->cur - in->base; if (in->buf->buffer->use > index + INPUT_CHUNK) { CHECK_BUFFER(in); return(0); } if (in->buf->readcallback != NULL) ret = xmlParserInputBufferGrow(in->buf, len); else return(0); /* * NOTE : in->base may be a "dandling" i.e. freed pointer in this * block, but we use it really as an integer to do some * pointer arithmetic. Insure will raise it as a bug but in * that specific case, that's not ! */ if (in->base != in->buf->buffer->content) { /* * the buffer has been realloced */ index = in->cur - in->base; in->base = in->buf->buffer->content; in->cur = &in->buf->buffer->content[index]; } CHECK_BUFFER(in); return(ret);}/** * xmlParserInputShrink: * @in: an XML parser input * * This function removes used input for the parser. */voidxmlParserInputShrink(xmlParserInputPtr in) { int used; int ret; int index;#ifdef DEBUG_INPUT fprintf(stderr, "Shrink\n");#endif if (in->buf == NULL) return; if (in->base == NULL) return; if (in->cur == NULL) return; if (in->buf->buffer == NULL) return; CHECK_BUFFER(in); used = in->cur - in->buf->buffer->content; if (used > INPUT_CHUNK) { ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN); if (ret > 0) { in->cur -= ret; in->consumed += ret; } } CHECK_BUFFER(in); if (in->buf->buffer->use > INPUT_CHUNK) { return; } xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK); if (in->base != in->buf->buffer->content) { /* * the buffer has been realloced */ index = in->cur - in->base; in->base = in->buf->buffer->content; in->cur = &in->buf->buffer->content[index]; } CHECK_BUFFER(in);}/************************************************************************ * * * Parser stacks related functions and macros * * * ************************************************************************/int xmlSubstituteEntitiesDefaultValue = 0;int xmlDoValidityCheckingDefaultValue = 0;int xmlKeepBlanksDefaultValue = 1;xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str);/* * Generic function for accessing stacks in the Parser Context */#define PUSH_AND_POP(scope, type, name) \scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \ if (ctxt->name##Nr >= ctxt->name##Max) { \ ctxt->name##Max *= 2; \ ctxt->name##Tab = (void *) xmlRealloc(ctxt->name##Tab, \ ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \ if (ctxt->name##Tab == NULL) { \ fprintf(stderr, "realloc failed !\n"); \ return(0); \ } \ } \ ctxt->name##Tab[ctxt->name##Nr] = value; \ ctxt->name = value; \ return(ctxt->name##Nr++); \} \scope type name##Pop(xmlParserCtxtPtr ctxt) { \ type ret; \ if (ctxt->name##Nr <= 0) return(0); \ ctxt->name##Nr--; \ if (ctxt->name##Nr > 0) \ ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \ else \ ctxt->name = NULL; \ ret = ctxt->name##Tab[ctxt->name##Nr]; \ ctxt->name##Tab[ctxt->name##Nr] = 0; \ return(ret); \} \PUSH_AND_POP(extern, xmlParserInputPtr, input)PUSH_AND_POP(extern, xmlNodePtr, node)PUSH_AND_POP(extern, xmlChar*, name)int spacePush(xmlParserCtxtPtr ctxt, int val) { if (ctxt->spaceNr >= ctxt->spaceMax) { ctxt->spaceMax *= 2; ctxt->spaceTab = (void *) xmlRealloc(ctxt->spaceTab, ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); if (ctxt->spaceTab == NULL) { fprintf(stderr, "realloc failed !\n"); return(0); } } ctxt->spaceTab[ctxt->spaceNr] = val; ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; return(ctxt->spaceNr++);}int spacePop(xmlParserCtxtPtr ctxt) { int ret; if (ctxt->spaceNr <= 0) return(0); ctxt->spaceNr--; if (ctxt->spaceNr > 0) ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; else ctxt->space = NULL; ret = ctxt->spaceTab[ctxt->spaceNr]; ctxt->spaceTab[ctxt->spaceNr] = -1; return(ret);}/* * Macros for accessing the content. Those should be used only by the parser, * and not exported. * * Dirty macros, i.e. one need to make assumption on the context to use them * * CUR_PTR return the current pointer to the xmlChar to be parsed. * To be used with extreme caution since operations consuming * characters may move the input buffer to a different location ! * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled * in ISO-Latin or UTF-8. * This should be used internally by the parser * only to compare to ASCII values otherwise it would break when * running with UTF-8 encoding. * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only * to compare on ASCII based substring. * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined * strings within the parser. * * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding * * NEXT Skip to the next character, this does the proper decoding * in UTF-8 mode. It also pop-up unfinished entities on the fly. * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly * CUR_CHAR Return the current char as an int as well as its lenght. */#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))#define NXT(val) ctxt->input->cur[(val)]#define CUR_PTR ctxt->input->cur#define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val); \ if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \ if ((*ctxt->input->cur == 0) && \ (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ xmlPopInput(ctxt)#define SHRINK xmlParserInputShrink(ctxt->input); \ if ((*ctxt->input->cur == 0) && \ (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ xmlPopInput(ctxt)#define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ if ((*ctxt->input->cur == 0) && \ (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ xmlPopInput(ctxt)#define SKIP_BLANKS xmlSkipBlankChars(ctxt);#define NEXT xmlNextChar(ctxt);#define NEXTL(l) \ if (*(ctxt->input->cur) == '\n') { \ ctxt->input->line++; ctxt->input->col = 1; \ } else ctxt->input->col++; \ ctxt->token = 0; ctxt->input->cur += l; \ if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l);#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l);#define COPY_BUF(l,b,i,v) \ if (l == 1) b[i++] = (xmlChar) v; \ else i += xmlCopyChar(l,&b[i],v);/** * xmlNextChar: * @ctxt: the XML parser context * * Skip to the next char input char. */voidxmlNextChar(xmlParserCtxtPtr ctxt) { /* * TODO: 2.11 End-of-Line Handling * the literal two-character sequence "#xD#xA" or a standalone * literal #xD, an XML processor must pass to the application * the single character #xA. */ if (ctxt->token != 0) ctxt->token = 0; else { if ((*ctxt->input->cur == 0) && (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) && (ctxt->instate != XML_PARSER_COMMENT)) { /* * If we are at the end of the current entity and * the context allows it, we pop consumed entities * automatically. * TODO: the auto closing should be blocked in other cases */ xmlPopInput(ctxt); } else { if (*(ctxt->input->cur) == '\n') { ctxt->input->line++; ctxt->input->col = 1; } else ctxt->input->col++; if (ctxt->encoding == NULL) { /* * We are supposed to handle UTF8, check it's valid * From rfc2044: encoding of the Unicode values on UTF-8: * * UCS-4 range (hex.) UTF-8 octet sequence (binary) * 0000 0000-0000 007F 0xxxxxxx * 0000 0080-0000 07FF 110xxxxx 10xxxxxx * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx * * Check for the 0x110000 limit too */ const unsigned char *cur = ctxt->input->cur; unsigned char c; c = *cur; if (c & 0x80) { if (cur[1] == 0) xmlParserInputGrow(ctxt->input, INPUT_CHUNK); if ((cur[1] & 0xc0) != 0x80) goto encoding_error; if ((c & 0xe0) == 0xe0) { unsigned int val; if (cur[2] == 0) xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -