📄 xmlreader.c
字号:
/*
* xmlreader.c: implements the xmlTextReader streaming node API
*
* NOTE:
* XmlTextReader.Normalization Property won't be supported, since
* it makes the parser non compliant to the XML recommendation
*
* See Copyright for the status of this software.
*
* daniel@veillard.com
*/
/*
* TODOs:
* - XML Schemas validation
*/
#define IN_LIBXML
#include "libxml.h"
#ifdef LIBXML_READER_ENABLED
#include <string.h> /* for memset() only ! */
#include <stdarg.h>
#ifdef HAVE_CTYPE_H
#include <ctype.h>
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#include <libxml/xmlmemory.h>
#include <libxml/xmlIO.h>
#include <libxml/xmlreader.h>
#include <libxml/parserInternals.h>
#ifdef LIBXML_SCHEMAS_ENABLED
#include <libxml/relaxng.h>
#include <libxml/xmlschemas.h>
#endif
#include <libxml/uri.h>
#ifdef LIBXML_XINCLUDE_ENABLED
#include <libxml/xinclude.h>
#endif
#ifdef LIBXML_PATTERN_ENABLED
#include <libxml/pattern.h>
#endif
/* #define DEBUG_CALLBACKS */
/* #define DEBUG_READER */
/**
* TODO:
*
* macro to flag unimplemented blocks
*/
#define TODO \
xmlGenericError(xmlGenericErrorContext, \
"Unimplemented block at %s:%d\n", \
__FILE__, __LINE__);
#ifdef DEBUG_READER
#define DUMP_READER xmlTextReaderDebug(reader);
#else
#define DUMP_READER
#endif
#define CHUNK_SIZE 512
/************************************************************************
* *
* The parser: maps the Text Reader API on top of the existing *
* parsing routines building a tree *
* *
************************************************************************/
#define XML_TEXTREADER_INPUT 1
#define XML_TEXTREADER_CTXT 2
typedef enum {
XML_TEXTREADER_NONE = -1,
XML_TEXTREADER_START= 0,
XML_TEXTREADER_ELEMENT= 1,
XML_TEXTREADER_END= 2,
XML_TEXTREADER_EMPTY= 3,
XML_TEXTREADER_BACKTRACK= 4,
XML_TEXTREADER_DONE= 5,
XML_TEXTREADER_ERROR= 6
} xmlTextReaderState;
typedef enum {
XML_TEXTREADER_NOT_VALIDATE = 0,
XML_TEXTREADER_VALIDATE_DTD = 1,
XML_TEXTREADER_VALIDATE_RNG = 2,
XML_TEXTREADER_VALIDATE_XSD = 4
} xmlTextReaderValidate;
struct _xmlTextReader {
int mode; /* the parsing mode */
xmlDocPtr doc; /* when walking an existing doc */
xmlTextReaderValidate validate;/* is there any validation */
int allocs; /* what structure were deallocated */
xmlTextReaderState state;
xmlParserCtxtPtr ctxt; /* the parser context */
xmlSAXHandlerPtr sax; /* the parser SAX callbacks */
xmlParserInputBufferPtr input; /* the input */
startElementSAXFunc startElement;/* initial SAX callbacks */
endElementSAXFunc endElement; /* idem */
startElementNsSAX2Func startElementNs;/* idem */
endElementNsSAX2Func endElementNs; /* idem */
charactersSAXFunc characters;
cdataBlockSAXFunc cdataBlock;
unsigned int base; /* base of the segment in the input */
unsigned int cur; /* current position in the input */
xmlNodePtr node; /* current node */
xmlNodePtr curnode;/* current attribute node */
int depth; /* depth of the current node */
xmlNodePtr faketext;/* fake xmlNs chld */
int preserve;/* preserve the resulting document */
xmlBufferPtr buffer; /* used to return const xmlChar * */
xmlDictPtr dict; /* the context dictionnary */
/* entity stack when traversing entities content */
xmlNodePtr ent; /* Current Entity Ref Node */
int entNr; /* Depth of the entities stack */
int entMax; /* Max depth of the entities stack */
xmlNodePtr *entTab; /* array of entities */
/* error handling */
xmlTextReaderErrorFunc errorFunc; /* callback function */
void *errorFuncArg; /* callback function user argument */
#ifdef LIBXML_SCHEMAS_ENABLED
/* Handling of RelaxNG validation */
xmlRelaxNGPtr rngSchemas; /* The Relax NG schemas */
xmlRelaxNGValidCtxtPtr rngValidCtxt;/* The Relax NG validation context */
int rngValidErrors;/* The number of errors detected */
xmlNodePtr rngFullNode; /* the node if RNG not progressive */
/* Handling of Schemas validation */
xmlSchemaPtr xsdSchemas; /* The Schemas schemas */
xmlSchemaValidCtxtPtr xsdValidCtxt;/* The Schemas validation context */
int xsdValidErrors;/* The number of errors detected */
xmlSchemaSAXPlugPtr xsdPlug; /* the schemas plug in SAX pipeline */
#endif
#ifdef LIBXML_XINCLUDE_ENABLED
/* Handling of XInclude processing */
int xinclude; /* is xinclude asked for */
const xmlChar * xinclude_name; /* the xinclude name from dict */
xmlXIncludeCtxtPtr xincctxt; /* the xinclude context */
int in_xinclude; /* counts for xinclude */
#endif
#ifdef LIBXML_PATTERN_ENABLED
int patternNr; /* number of preserve patterns */
int patternMax; /* max preserve patterns */
xmlPatternPtr *patternTab; /* array of preserve patterns */
#endif
int preserves; /* level of preserves */
int parserFlags; /* the set of options set */
/* Structured error handling */
xmlStructuredErrorFunc sErrorFunc; /* callback function */
};
#define NODE_IS_EMPTY 0x1
#define NODE_IS_PRESERVED 0x2
#define NODE_IS_SPRESERVED 0x4
/**
* CONSTSTR:
*
* Macro used to return an interned string
*/
#define CONSTSTR(str) xmlDictLookup(reader->dict, (str), -1)
#define CONSTQSTR(p, str) xmlDictQLookup(reader->dict, (p), (str))
static int xmlTextReaderReadTree(xmlTextReaderPtr reader);
static int xmlTextReaderNextTree(xmlTextReaderPtr reader);
/************************************************************************
* *
* Our own version of the freeing routines as we recycle nodes *
* *
************************************************************************/
/**
* DICT_FREE:
* @str: a string
*
* Free a string if it is not owned by the "dict" dictionnary in the
* current scope
*/
#define DICT_FREE(str) \
if ((str) && ((!dict) || \
(xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
xmlFree((char *)(str));
static void xmlTextReaderFreeNode(xmlTextReaderPtr reader, xmlNodePtr cur);
static void xmlTextReaderFreeNodeList(xmlTextReaderPtr reader, xmlNodePtr cur);
/**
* xmlFreeID:
* @not: A id
*
* Deallocate the memory used by an id definition
*/
static void
xmlFreeID(xmlIDPtr id) {
xmlDictPtr dict = NULL;
if (id == NULL) return;
if (id->doc != NULL)
dict = id->doc->dict;
if (id->value != NULL)
DICT_FREE(id->value)
xmlFree(id);
}
/**
* xmlTextReaderRemoveID:
* @doc: the document
* @attr: the attribute
*
* Remove the given attribute from the ID table maintained internally.
*
* Returns -1 if the lookup failed and 0 otherwise
*/
static int
xmlTextReaderRemoveID(xmlDocPtr doc, xmlAttrPtr attr) {
xmlIDTablePtr table;
xmlIDPtr id;
xmlChar *ID;
if (doc == NULL) return(-1);
if (attr == NULL) return(-1);
table = (xmlIDTablePtr) doc->ids;
if (table == NULL)
return(-1);
if (attr == NULL)
return(-1);
ID = xmlNodeListGetString(doc, attr->children, 1);
if (ID == NULL)
return(-1);
id = xmlHashLookup(table, ID);
xmlFree(ID);
if (id == NULL || id->attr != attr) {
return(-1);
}
id->name = attr->name;
id->attr = NULL;
return(0);
}
/**
* xmlTextReaderFreeProp:
* @reader: the xmlTextReaderPtr used
* @cur: the node
*
* Free a node.
*/
static void
xmlTextReaderFreeProp(xmlTextReaderPtr reader, xmlAttrPtr cur) {
xmlDictPtr dict;
dict = reader->ctxt->dict;
if (cur == NULL) return;
if ((__xmlRegisterCallbacks) && (xmlDeregisterNodeDefaultValue))
xmlDeregisterNodeDefaultValue((xmlNodePtr) cur);
/* Check for ID removal -> leading to invalid references ! */
if ((cur->parent != NULL) && (cur->parent->doc != NULL) &&
((cur->parent->doc->intSubset != NULL) ||
(cur->parent->doc->extSubset != NULL))) {
if (xmlIsID(cur->parent->doc, cur->parent, cur))
xmlTextReaderRemoveID(cur->parent->doc, cur);
}
if (cur->children != NULL)
xmlTextReaderFreeNodeList(reader, cur->children);
DICT_FREE(cur->name);
if ((reader != NULL) && (reader->ctxt != NULL) &&
(reader->ctxt->freeAttrsNr < 100)) {
cur->next = reader->ctxt->freeAttrs;
reader->ctxt->freeAttrs = cur;
reader->ctxt->freeAttrsNr++;
} else {
xmlFree(cur);
}
}
/**
* xmlTextReaderFreePropList:
* @reader: the xmlTextReaderPtr used
* @cur: the first property in the list
*
* Free a property and all its siblings, all the children are freed too.
*/
static void
xmlTextReaderFreePropList(xmlTextReaderPtr reader, xmlAttrPtr cur) {
xmlAttrPtr next;
if (cur == NULL) return;
while (cur != NULL) {
next = cur->next;
xmlTextReaderFreeProp(reader, cur);
cur = next;
}
}
/**
* xmlTextReaderFreeNodeList:
* @reader: the xmlTextReaderPtr used
* @cur: the first node in the list
*
* Free a node and all its siblings, this is a recursive behaviour, all
* the children are freed too.
*/
static void
xmlTextReaderFreeNodeList(xmlTextReaderPtr reader, xmlNodePtr cur) {
xmlNodePtr next;
xmlDictPtr dict;
dict = reader->ctxt->dict;
if (cur == NULL) return;
if (cur->type == XML_NAMESPACE_DECL) {
xmlFreeNsList((xmlNsPtr) cur);
return;
}
if ((cur->type == XML_DOCUMENT_NODE) ||
(cur->type == XML_HTML_DOCUMENT_NODE)) {
xmlFreeDoc((xmlDocPtr) cur);
return;
}
while (cur != NULL) {
next = cur->next;
/* unroll to speed up freeing the document */
if (cur->type != XML_DTD_NODE) {
if ((cur->children != NULL) &&
(cur->type != XML_ENTITY_REF_NODE)) {
if (cur->children->parent == cur)
xmlTextReaderFreeNodeList(reader, cur->children);
cur->children = NULL;
}
if ((__xmlRegisterCallbacks) && (xmlDeregisterNodeDefaultValue))
xmlDeregisterNodeDefaultValue(cur);
if (((cur->type == XML_ELEMENT_NODE) ||
(cur->type == XML_XINCLUDE_START) ||
(cur->type == XML_XINCLUDE_END)) &&
(cur->properties != NULL))
xmlTextReaderFreePropList(reader, cur->properties);
if ((cur->type != XML_ELEMENT_NODE) &&
(cur->type != XML_XINCLUDE_START) &&
(cur->type != XML_XINCLUDE_END) &&
(cur->type != XML_ENTITY_REF_NODE)) {
DICT_FREE(cur->content);
}
if (((cur->type == XML_ELEMENT_NODE) ||
(cur->type == XML_XINCLUDE_START) ||
(cur->type == XML_XINCLUDE_END)) &&
(cur->nsDef != NULL))
xmlFreeNsList(cur->nsDef);
/*
* we don't free element names here they are interned now
*/
if ((cur->type != XML_TEXT_NODE) &&
(cur->type != XML_COMMENT_NODE))
DICT_FREE(cur->name);
if (((cur->type == XML_ELEMENT_NODE) ||
(cur->type == XML_TEXT_NODE)) &&
(reader != NULL) && (reader->ctxt != NULL) &&
(reader->ctxt->freeElemsNr < 100)) {
cur->next = reader->ctxt->freeElems;
reader->ctxt->freeElems = cur;
reader->ctxt->freeElemsNr++;
} else {
xmlFree(cur);
}
}
cur = next;
}
}
/**
* xmlTextReaderFreeNode:
* @reader: the xmlTextReaderPtr used
* @cur: the node
*
* Free a node, this is a recursive behaviour, all the children are freed too.
* This doesn't unlink the child from the list, use xmlUnlinkNode() first.
*/
static void
xmlTextReaderFreeNode(xmlTextReaderPtr reader, xmlNodePtr cur) {
xmlDictPtr dict;
dict = reader->ctxt->dict;
if (cur->type == XML_DTD_NODE) {
xmlFreeDtd((xmlDtdPtr) cur);
return;
}
if (cur->type == XML_NAMESPACE_DECL) {
xmlFreeNs((xmlNsPtr) cur);
return;
}
if (cur->type == XML_ATTRIBUTE_NODE) {
xmlTextReaderFreeProp(reader, (xmlAttrPtr) cur);
return;
}
if ((cur->children != NULL) &&
(cur->type != XML_ENTITY_REF_NODE)) {
if (cur->children->parent == cur)
xmlTextReaderFreeNodeList(reader, cur->children);
cur->children = NULL;
}
if ((__xmlRegisterCallbacks) && (xmlDeregisterNodeDefaultValue))
xmlDeregisterNodeDefaultValue(cur);
if (((cur->type == XML_ELEMENT_NODE) ||
(cur->type == XML_XINCLUDE_START) ||
(cur->type == XML_XINCLUDE_END)) &&
(cur->properties != NULL))
xmlTextReaderFreePropList(reader, cur->properties);
if ((cur->type != XML_ELEMENT_NODE) &&
(cur->type != XML_XINCLUDE_START) &&
(cur->type != XML_XINCLUDE_END) &&
(cur->type != XML_ENTITY_REF_NODE)) {
DICT_FREE(cur->content);
}
if (((cur->type == XML_ELEMENT_NODE) ||
(cur->type == XML_XINCLUDE_START) ||
(cur->type == XML_XINCLUDE_END)) &&
(cur->nsDef != NULL))
xmlFreeNsList(cur->nsDef);
/*
* we don't free names here they are interned now
*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -