📄 xmlreader.c.svn-base
字号:
/* * xmlreader.c: implements the xmlTextReader streaming node API * * NOTE: * XmlTextReader.Normalization Property won't be supported, since * it makes the parser non compliant to the XML recommendation * * See Copyright for the status of this software. * * daniel@veillard.com *//* * TODOs: * - XML Schemas validation * - setting(s) for NoBlanks * - performances and tuning ... */#define IN_LIBXML#include "libxml.h"#ifdef LIBXML_READER_ENABLED#include <string.h> /* for memset() only ! */#include <stdarg.h>#ifdef HAVE_CTYPE_H#include <ctype.h>#endif#ifdef HAVE_STDLIB_H#include <stdlib.h>#endif#include <libxml/xmlmemory.h>#include <libxml/xmlIO.h>#include <libxml/xmlreader.h>#include <libxml/parserInternals.h>#include <libxml/relaxng.h>#include <libxml/uri.h>#ifdef LIBXML_XINCLUDE_ENABLED#include <libxml/xinclude.h>#endif#ifdef LIBXML_PATTERN_ENABLED#include <libxml/pattern.h>#endif/* #define DEBUG_CALLBACKS *//* #define DEBUG_READER *//** * TODO: * * macro to flag unimplemented blocks */#define TODO \ xmlGenericError(xmlGenericErrorContext, \ "Unimplemented block at %s:%d\n", \ __FILE__, __LINE__);#ifdef DEBUG_READER#define DUMP_READER xmlTextReaderDebug(reader);#else#define DUMP_READER#endif#define CHUNK_SIZE 512/************************************************************************ * * * The parser: maps the Text Reader API on top of the existing * * parsing routines building a tree * * * ************************************************************************/#define XML_TEXTREADER_INPUT 1#define XML_TEXTREADER_CTXT 2typedef enum { XML_TEXTREADER_NONE = -1, XML_TEXTREADER_START= 0, XML_TEXTREADER_ELEMENT= 1, XML_TEXTREADER_END= 2, XML_TEXTREADER_EMPTY= 3, XML_TEXTREADER_BACKTRACK= 4, XML_TEXTREADER_DONE= 5, XML_TEXTREADER_ERROR= 6} xmlTextReaderState;typedef enum { XML_TEXTREADER_NOT_VALIDATE = 0, XML_TEXTREADER_VALIDATE_DTD = 1, XML_TEXTREADER_VALIDATE_RNG = 2} xmlTextReaderValidate;struct _xmlTextReader { int mode; /* the parsing mode */ xmlDocPtr doc; /* when walking an existing doc */ xmlTextReaderValidate validate;/* is there any validation */ int allocs; /* what structure were deallocated */ xmlTextReaderState state; xmlParserCtxtPtr ctxt; /* the parser context */ xmlSAXHandlerPtr sax; /* the parser SAX callbacks */ xmlParserInputBufferPtr input; /* the input */ startElementSAXFunc startElement;/* initial SAX callbacks */ endElementSAXFunc endElement; /* idem */ startElementNsSAX2Func startElementNs;/* idem */ endElementNsSAX2Func endElementNs; /* idem */ charactersSAXFunc characters; cdataBlockSAXFunc cdataBlock; unsigned int base; /* base of the segment in the input */ unsigned int cur; /* current position in the input */ xmlNodePtr node; /* current node */ xmlNodePtr curnode;/* current attribute node */ int depth; /* depth of the current node */ xmlNodePtr faketext;/* fake xmlNs chld */ int preserve;/* preserve the resulting document */ xmlBufferPtr buffer; /* used to return const xmlChar * */ xmlDictPtr dict; /* the context dictionnary */ /* entity stack when traversing entities content */ xmlNodePtr ent; /* Current Entity Ref Node */ int entNr; /* Depth of the entities stack */ int entMax; /* Max depth of the entities stack */ xmlNodePtr *entTab; /* array of entities */ /* error handling */ xmlTextReaderErrorFunc errorFunc; /* callback function */ void *errorFuncArg; /* callback function user argument */#ifdef LIBXML_SCHEMAS_ENABLED /* Handling of RelaxNG validation */ xmlRelaxNGPtr rngSchemas; /* The Relax NG schemas */ xmlRelaxNGValidCtxtPtr rngValidCtxt;/* The Relax NG validation context */ int rngValidErrors;/* The number of errors detected */ xmlNodePtr rngFullNode; /* the node if RNG not progressive */#endif#ifdef LIBXML_XINCLUDE_ENABLED /* Handling of XInclude processing */ int xinclude; /* is xinclude asked for */ const xmlChar * xinclude_name; /* the xinclude name from dict */ xmlXIncludeCtxtPtr xincctxt; /* the xinclude context */ int in_xinclude; /* counts for xinclude */#endif#ifdef LIBXML_PATTERN_ENABLED int patternNr; /* number of preserve patterns */ int patternMax; /* max preserve patterns */ xmlPatternPtr *patternTab; /* array of preserve patterns */#endif int preserves; /* level of preserves */ int parserFlags; /* the set of options set */ /* Structured error handling */ xmlStructuredErrorFunc sErrorFunc; /* callback function */};#define NODE_IS_EMPTY 0x1#define NODE_IS_PRESERVED 0x2#define NODE_IS_SPRESERVED 0x4/** * CONSTSTR: * * Macro used to return an interned string */#define CONSTSTR(str) xmlDictLookup(reader->dict, (str), -1)#define CONSTQSTR(p, str) xmlDictQLookup(reader->dict, (p), (str))static int xmlTextReaderReadTree(xmlTextReaderPtr reader);static int xmlTextReaderNextTree(xmlTextReaderPtr reader);/************************************************************************ * * * Our own version of the freeing routines as we recycle nodes * * * ************************************************************************//** * DICT_FREE: * @str: a string * * Free a string if it is not owned by the "dict" dictionnary in the * current scope */#define DICT_FREE(str) \ if ((str) && ((!dict) || \ (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ xmlFree((char *)(str));static void xmlTextReaderFreeNode(xmlTextReaderPtr reader, xmlNodePtr cur);static void xmlTextReaderFreeNodeList(xmlTextReaderPtr reader, xmlNodePtr cur);/** * xmlFreeID: * @not: A id * * Deallocate the memory used by an id definition */static voidxmlFreeID(xmlIDPtr id) { xmlDictPtr dict = NULL; if (id == NULL) return; if (id->doc != NULL) dict = id->doc->dict; if (id->value != NULL) DICT_FREE(id->value) xmlFree(id);}/** * xmlTextReaderRemoveID: * @doc: the document * @attr: the attribute * * Remove the given attribute from the ID table maintained internally. * * Returns -1 if the lookup failed and 0 otherwise */static intxmlTextReaderRemoveID(xmlDocPtr doc, xmlAttrPtr attr) { xmlIDTablePtr table; xmlIDPtr id; xmlChar *ID; if (doc == NULL) return(-1); if (attr == NULL) return(-1); table = (xmlIDTablePtr) doc->ids; if (table == NULL) return(-1); if (attr == NULL) return(-1); ID = xmlNodeListGetString(doc, attr->children, 1); if (ID == NULL) return(-1); id = xmlHashLookup(table, ID); xmlFree(ID); if (id == NULL || id->attr != attr) { return(-1); } id->name = attr->name; id->attr = NULL; return(0);}/** * xmlTextReaderFreeProp: * @reader: the xmlTextReaderPtr used * @cur: the node * * Free a node. */static voidxmlTextReaderFreeProp(xmlTextReaderPtr reader, xmlAttrPtr cur) { xmlDictPtr dict; dict = reader->ctxt->dict; if (cur == NULL) return; if ((__xmlRegisterCallbacks) && (xmlDeregisterNodeDefaultValue)) xmlDeregisterNodeDefaultValue((xmlNodePtr) cur); /* Check for ID removal -> leading to invalid references ! */ if ((cur->parent != NULL) && (cur->parent->doc != NULL) && ((cur->parent->doc->intSubset != NULL) || (cur->parent->doc->extSubset != NULL))) { if (xmlIsID(cur->parent->doc, cur->parent, cur)) xmlTextReaderRemoveID(cur->parent->doc, cur); } if (cur->children != NULL) xmlTextReaderFreeNodeList(reader, cur->children); DICT_FREE(cur->name); if ((reader != NULL) && (reader->ctxt != NULL) && (reader->ctxt->freeAttrsNr < 100)) { cur->next = reader->ctxt->freeAttrs; reader->ctxt->freeAttrs = cur; reader->ctxt->freeAttrsNr++; } else { xmlFree(cur); }}/** * xmlTextReaderFreePropList: * @reader: the xmlTextReaderPtr used * @cur: the first property in the list * * Free a property and all its siblings, all the children are freed too. */static voidxmlTextReaderFreePropList(xmlTextReaderPtr reader, xmlAttrPtr cur) { xmlAttrPtr next; if (cur == NULL) return; while (cur != NULL) { next = cur->next; xmlTextReaderFreeProp(reader, cur); cur = next; }}/** * xmlTextReaderFreeNodeList: * @reader: the xmlTextReaderPtr used * @cur: the first node in the list * * Free a node and all its siblings, this is a recursive behaviour, all * the children are freed too. */static voidxmlTextReaderFreeNodeList(xmlTextReaderPtr reader, xmlNodePtr cur) { xmlNodePtr next; xmlDictPtr dict; dict = reader->ctxt->dict; if (cur == NULL) return; if (cur->type == XML_NAMESPACE_DECL) { xmlFreeNsList((xmlNsPtr) cur); return; } if ((cur->type == XML_DOCUMENT_NODE) || (cur->type == XML_HTML_DOCUMENT_NODE)) { xmlFreeDoc((xmlDocPtr) cur); return; } while (cur != NULL) { next = cur->next; /* unroll to speed up freeing the document */ if (cur->type != XML_DTD_NODE) { if ((cur->children != NULL) && (cur->type != XML_ENTITY_REF_NODE)) { if (cur->children->parent == cur) xmlTextReaderFreeNodeList(reader, cur->children); cur->children = NULL; } if ((__xmlRegisterCallbacks) && (xmlDeregisterNodeDefaultValue)) xmlDeregisterNodeDefaultValue(cur); if (((cur->type == XML_ELEMENT_NODE) || (cur->type == XML_XINCLUDE_START) || (cur->type == XML_XINCLUDE_END)) && (cur->properties != NULL)) xmlTextReaderFreePropList(reader, cur->properties); if ((cur->type != XML_ELEMENT_NODE) && (cur->type != XML_XINCLUDE_START) && (cur->type != XML_XINCLUDE_END) && (cur->type != XML_ENTITY_REF_NODE)) { DICT_FREE(cur->content); } if (((cur->type == XML_ELEMENT_NODE) || (cur->type == XML_XINCLUDE_START) || (cur->type == XML_XINCLUDE_END)) && (cur->nsDef != NULL)) xmlFreeNsList(cur->nsDef); /* * we don't free element names here they are interned now */ if ((cur->type != XML_TEXT_NODE) && (cur->type != XML_COMMENT_NODE)) DICT_FREE(cur->name); if (((cur->type == XML_ELEMENT_NODE) || (cur->type == XML_TEXT_NODE)) && (reader != NULL) && (reader->ctxt != NULL) && (reader->ctxt->freeElemsNr < 100)) { cur->next = reader->ctxt->freeElems; reader->ctxt->freeElems = cur; reader->ctxt->freeElemsNr++; } else { xmlFree(cur); } } cur = next; }}/** * xmlTextReaderFreeNode: * @reader: the xmlTextReaderPtr used * @cur: the node * * Free a node, this is a recursive behaviour, all the children are freed too. * This doesn't unlink the child from the list, use xmlUnlinkNode() first. */static voidxmlTextReaderFreeNode(xmlTextReaderPtr reader, xmlNodePtr cur) { xmlDictPtr dict; dict = reader->ctxt->dict; if (cur->type == XML_DTD_NODE) { xmlFreeDtd((xmlDtdPtr) cur); return; } if (cur->type == XML_NAMESPACE_DECL) { xmlFreeNs((xmlNsPtr) cur); return; } if (cur->type == XML_ATTRIBUTE_NODE) { xmlTextReaderFreeProp(reader, (xmlAttrPtr) cur); return; } if ((cur->children != NULL) && (cur->type != XML_ENTITY_REF_NODE)) { if (cur->children->parent == cur) xmlTextReaderFreeNodeList(reader, cur->children); cur->children = NULL; } if ((__xmlRegisterCallbacks) && (xmlDeregisterNodeDefaultValue)) xmlDeregisterNodeDefaultValue(cur); if (((cur->type == XML_ELEMENT_NODE) || (cur->type == XML_XINCLUDE_START) || (cur->type == XML_XINCLUDE_END)) && (cur->properties != NULL)) xmlTextReaderFreePropList(reader, cur->properties); if ((cur->type != XML_ELEMENT_NODE) && (cur->type != XML_XINCLUDE_START) && (cur->type != XML_XINCLUDE_END) && (cur->type != XML_ENTITY_REF_NODE)) { DICT_FREE(cur->content); } if (((cur->type == XML_ELEMENT_NODE) || (cur->type == XML_XINCLUDE_START) || (cur->type == XML_XINCLUDE_END)) && (cur->nsDef != NULL)) xmlFreeNsList(cur->nsDef); /* * we don't free names here they are interned now */ if ((cur->type != XML_TEXT_NODE) && (cur->type != XML_COMMENT_NODE)) DICT_FREE(cur->name); if (((cur->type == XML_ELEMENT_NODE) || (cur->type == XML_TEXT_NODE)) && (reader != NULL) && (reader->ctxt != NULL) && (reader->ctxt->freeElemsNr < 100)) { cur->next = reader->ctxt->freeElems; reader->ctxt->freeElems = cur; reader->ctxt->freeElemsNr++; } else { xmlFree(cur); }}/** * xmlTextReaderFreeIDTable: * @table: An id table * * Deallocate the memory used by an ID hash table. */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -