📄 parserinternals.c.svn-base
字号:
/* * parserInternals.c : Internal routines (and obsolete ones) needed for the * XML and HTML parsers. * * See Copyright for the status of this software. * * daniel@veillard.com */#define IN_LIBXML#include "libxml.h"#if defined(WIN32) && !defined (__CYGWIN__)#define XML_DIR_SEP '\\'#else#define XML_DIR_SEP '/'#endif#include <string.h>#ifdef HAVE_CTYPE_H#include <ctype.h>#endif#ifdef HAVE_STDLIB_H#include <stdlib.h>#endif#ifdef HAVE_SYS_STAT_H#include <sys/stat.h>#endif#ifdef HAVE_FCNTL_H#include <fcntl.h>#endif#ifdef HAVE_UNISTD_H#include <unistd.h>#endif#ifdef HAVE_ZLIB_H#include <zlib.h>#endif#include <libxml/xmlmemory.h>#include <libxml/tree.h>#include <libxml/parser.h>#include <libxml/parserInternals.h>#include <libxml/valid.h>#include <libxml/entities.h>#include <libxml/xmlerror.h>#include <libxml/encoding.h>#include <libxml/valid.h>#include <libxml/xmlIO.h>#include <libxml/uri.h>#include <libxml/dict.h>#include <libxml/SAX.h>#ifdef LIBXML_CATALOG_ENABLED#include <libxml/catalog.h>#endif#include <libxml/globals.h>#include <libxml/chvalid.h>/* * Various global defaults for parsing *//** * xmlCheckVersion: * @version: the include version number * * check the compiled lib version against the include one. * This can warn or immediately kill the application */voidxmlCheckVersion(int version) { int myversion = (int) LIBXML_VERSION; xmlInitParser(); if ((myversion / 10000) != (version / 10000)) { xmlGenericError(xmlGenericErrorContext, "Fatal: program compiled against libxml %d using libxml %d\n", (version / 10000), (myversion / 10000)); fprintf(stderr, "Fatal: program compiled against libxml %d using libxml %d\n", (version / 10000), (myversion / 10000)); } if ((myversion / 100) < (version / 100)) { xmlGenericError(xmlGenericErrorContext, "Warning: program compiled against libxml %d using older %d\n", (version / 100), (myversion / 100)); }}/************************************************************************ * * * Some factorized error routines * * * ************************************************************************//** * xmlErrMemory: * @ctxt: an XML parser context * @extra: extra informations * * Handle a redefinition of attribute error */voidxmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra){ if ((ctxt != NULL) && (ctxt->disableSAX != 0) && (ctxt->instate == XML_PARSER_EOF)) return; if (ctxt != NULL) { ctxt->errNo = XML_ERR_NO_MEMORY; ctxt->instate = XML_PARSER_EOF; ctxt->disableSAX = 1; } if (extra) __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra, NULL, NULL, 0, 0, "Memory allocation failed : %s\n", extra); else __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "Memory allocation failed\n");}/** * __xmlErrEncoding: * @ctxt: an XML parser context * @error: the error number * @msg: the error message * @str1: an string info * @str2: an string info * * Handle an encoding error */void__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *msg, const xmlChar * str1, const xmlChar * str2){ if ((ctxt != NULL) && (ctxt->disableSAX != 0) && (ctxt->instate == XML_PARSER_EOF)) return; if (ctxt != NULL) ctxt->errNo = error; __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, NULL, 0, (const char *) str1, (const char *) str2, NULL, 0, 0, msg, str1, str2); if (ctxt != NULL) { ctxt->wellFormed = 0; if (ctxt->recovery == 0) ctxt->disableSAX = 1; }}/** * xmlErrInternal: * @ctxt: an XML parser context * @msg: the error message * @str: error informations * * Handle an internal error */static voidxmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str){ if ((ctxt != NULL) && (ctxt->disableSAX != 0) && (ctxt->instate == XML_PARSER_EOF)) return; if (ctxt != NULL) ctxt->errNo = XML_ERR_INTERNAL_ERROR; __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR, XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL, 0, 0, msg, str); if (ctxt != NULL) { ctxt->wellFormed = 0; if (ctxt->recovery == 0) ctxt->disableSAX = 1; }}/** * xmlErrEncodingInt: * @ctxt: an XML parser context * @error: the error number * @msg: the error message * @val: an integer value * * n encoding error */static voidxmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *msg, int val){ if ((ctxt != NULL) && (ctxt->disableSAX != 0) && (ctxt->instate == XML_PARSER_EOF)) return; if (ctxt != NULL) ctxt->errNo = error; __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, val, 0, msg, val); if (ctxt != NULL) { ctxt->wellFormed = 0; if (ctxt->recovery == 0) ctxt->disableSAX = 1; }}/** * xmlIsLetter: * @c: an unicode character (int) * * Check whether the character is allowed by the production * [84] Letter ::= BaseChar | Ideographic * * Returns 0 if not, non-zero otherwise */intxmlIsLetter(int c) { return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));}/************************************************************************ * * * Input handling functions for progressive parsing * * * ************************************************************************//* #define DEBUG_INPUT *//* #define DEBUG_STACK *//* #define DEBUG_PUSH *//* we need to keep enough input to show errors in context */#define LINE_LEN 80#ifdef DEBUG_INPUT#define CHECK_BUFFER(in) check_buffer(in)staticvoid check_buffer(xmlParserInputPtr in) { if (in->base != in->buf->buffer->content) { xmlGenericError(xmlGenericErrorContext, "xmlParserInput: base mismatch problem\n"); } if (in->cur < in->base) { xmlGenericError(xmlGenericErrorContext, "xmlParserInput: cur < base problem\n"); } if (in->cur > in->base + in->buf->buffer->use) { xmlGenericError(xmlGenericErrorContext, "xmlParserInput: cur > base + use problem\n"); } xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n", (int) in, (int) in->buf->buffer->content, in->cur - in->base, in->buf->buffer->use, in->buf->buffer->size);}#else#define CHECK_BUFFER(in) #endif/** * xmlParserInputRead: * @in: an XML parser input * @len: an indicative size for the lookahead * * This function refresh the input for the parser. It doesn't try to * preserve pointers to the input buffer, and discard already read data * * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the * end of this entity */intxmlParserInputRead(xmlParserInputPtr in, int len) { int ret; int used; int indx;#ifdef DEBUG_INPUT xmlGenericError(xmlGenericErrorContext, "Read\n");#endif if (in->buf == NULL) return(-1); if (in->base == NULL) return(-1); if (in->cur == NULL) return(-1); if (in->buf->buffer == NULL) return(-1); if (in->buf->readcallback == NULL) return(-1); CHECK_BUFFER(in); used = in->cur - in->buf->buffer->content; ret = xmlBufferShrink(in->buf->buffer, used); if (ret > 0) { in->cur -= ret; in->consumed += ret; } ret = xmlParserInputBufferRead(in->buf, len); if (in->base != in->buf->buffer->content) { /* * the buffer has been reallocated */ indx = in->cur - in->base; in->base = in->buf->buffer->content; in->cur = &in->buf->buffer->content[indx]; } in->end = &in->buf->buffer->content[in->buf->buffer->use]; CHECK_BUFFER(in); return(ret);}/** * xmlParserInputGrow: * @in: an XML parser input * @len: an indicative size for the lookahead * * This function increase the input for the parser. It tries to * preserve pointers to the input buffer, and keep already read data * * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the * end of this entity */intxmlParserInputGrow(xmlParserInputPtr in, int len) { int ret; int indx;#ifdef DEBUG_INPUT xmlGenericError(xmlGenericErrorContext, "Grow\n");#endif if (in->buf == NULL) return(-1); if (in->base == NULL) return(-1); if (in->cur == NULL) return(-1); if (in->buf->buffer == NULL) return(-1); CHECK_BUFFER(in); indx = in->cur - in->base; if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) { CHECK_BUFFER(in); return(0); } if (in->buf->readcallback != NULL) ret = xmlParserInputBufferGrow(in->buf, len); else return(0); /* * NOTE : in->base may be a "dangling" i.e. freed pointer in this * block, but we use it really as an integer to do some * pointer arithmetic. Insure will raise it as a bug but in * that specific case, that's not ! */ if (in->base != in->buf->buffer->content) { /* * the buffer has been reallocated */ indx = in->cur - in->base; in->base = in->buf->buffer->content; in->cur = &in->buf->buffer->content[indx]; } in->end = &in->buf->buffer->content[in->buf->buffer->use]; CHECK_BUFFER(in); return(ret);}/** * xmlParserInputShrink: * @in: an XML parser input * * This function removes used input for the parser. */voidxmlParserInputShrink(xmlParserInputPtr in) { int used; int ret; int indx;#ifdef DEBUG_INPUT xmlGenericError(xmlGenericErrorContext, "Shrink\n");#endif if (in->buf == NULL) return; if (in->base == NULL) return; if (in->cur == NULL) return; if (in->buf->buffer == NULL) return; CHECK_BUFFER(in); used = in->cur - in->buf->buffer->content; /* * Do not shrink on large buffers whose only a tiny fraction * was consumed */ if (used > INPUT_CHUNK) { ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN); if (ret > 0) { in->cur -= ret; in->consumed += ret; } in->end = &in->buf->buffer->content[in->buf->buffer->use]; } CHECK_BUFFER(in); if (in->buf->buffer->use > INPUT_CHUNK) { return; } xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK); if (in->base != in->buf->buffer->content) { /* * the buffer has been reallocated */ indx = in->cur - in->base; in->base = in->buf->buffer->content; in->cur = &in->buf->buffer->content[indx]; } in->end = &in->buf->buffer->content[in->buf->buffer->use]; CHECK_BUFFER(in);}/************************************************************************ * * * UTF8 character input and related functions * * * ************************************************************************//** * xmlNextChar: * @ctxt: the XML parser context * * Skip to the next char input char. */voidxmlNextChar(xmlParserCtxtPtr ctxt){ if (ctxt->instate == XML_PARSER_EOF) return; if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { if ((*ctxt->input->cur == 0) && (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) && (ctxt->instate != XML_PARSER_COMMENT)) { /* * If we are at the end of the current entity and * the context allows it, we pop consumed entities * automatically. * the auto closing should be blocked in other cases */ xmlPopInput(ctxt); } else { const unsigned char *cur; unsigned char c; /* * 2.11 End-of-Line Handling * the literal two-character sequence "#xD#xA" or a standalone * literal #xD, an XML processor must pass to the application * the single character #xA. */ if (*(ctxt->input->cur) == '\n') { ctxt->input->line++; ctxt->input->col = 1; } else ctxt->input->col++; /* * We are supposed to handle UTF8, check it's valid * From rfc2044: encoding of the Unicode values on UTF-8: * * UCS-4 range (hex.) UTF-8 octet sequence (binary) * 0000 0000-0000 007F 0xxxxxxx * 0000 0080-0000 07FF 110xxxxx 10xxxxxx * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx * * Check for the 0x110000 limit too */ cur = ctxt->input->cur; c = *cur; if (c & 0x80) { if (c == 0xC0) goto encoding_error; if (cur[1] == 0) xmlParserInputGrow(ctxt->input, INPUT_CHUNK); if ((cur[1] & 0xc0) != 0x80) goto encoding_error; if ((c & 0xe0) == 0xe0) { unsigned int val; if (cur[2] == 0) xmlParserInputGrow(ctxt->input, INPUT_CHUNK); if ((cur[2] & 0xc0) != 0x80) goto encoding_error; if ((c & 0xf0) == 0xf0) { if (cur[3] == 0) xmlParserInputGrow(ctxt->input, INPUT_CHUNK); if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80)) goto encoding_error;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -