📄 parser.h
字号:
/*
* Summary: the core parser module
* Description: Interfaces, constants and types related to the XML parser
*
* Copy: See Copyright for the status of this software.
*
* Author: Daniel Veillard
*/
#ifndef __XML_PARSER_H__
#define __XML_PARSER_H__
#include <stdarg.h>
#include <libxml/xmlversion.h>
#include <libxml/tree.h>
#include <libxml/dict.h>
#include <libxml/hash.h>
#include <libxml/valid.h>
#include <libxml/entities.h>
#include <libxml/xmlerror.h>
#include <libxml/xmlstring.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* XML_DEFAULT_VERSION:
*
* The default version of XML used: 1.0
*/
#define XML_DEFAULT_VERSION "1.0"
/**
* xmlParserInput:
*
* An xmlParserInput is an input flow for the XML processor.
* Each entity parsed is associated an xmlParserInput (except the
* few predefined ones). This is the case both for internal entities
* - in which case the flow is already completely in memory - or
* external entities - in which case we use the buf structure for
* progressive reading and I18N conversions to the internal UTF-8 format.
*/
/**
* xmlParserInputDeallocate:
* @str: the string to deallocate
*
* Callback for freeing some parser input allocations.
*/
typedef void (* xmlParserInputDeallocate)(xmlChar *str);
struct _xmlParserInput {
/* Input buffer */
xmlParserInputBufferPtr buf; /* UTF-8 encoded buffer */
const char *filename; /* The file analyzed, if any */
const char *directory; /* the directory/base of the file */
const xmlChar *base; /* Base of the array to parse */
const xmlChar *cur; /* Current char being parsed */
const xmlChar *end; /* end of the array to parse */
int length; /* length if known */
int line; /* Current line */
int col; /* Current column */
/*
* NOTE: consumed is only tested for equality in the parser code,
* so even if there is an overflow this should not give troubles
* for parsing very large instances.
*/
unsigned long consumed; /* How many xmlChars already consumed */
xmlParserInputDeallocate free; /* function to deallocate the base */
const xmlChar *encoding; /* the encoding string for entity */
const xmlChar *version; /* the version string for entity */
int standalone; /* Was that entity marked standalone */
int id; /* an unique identifier for the entity */
};
/**
* xmlParserNodeInfo:
*
* The parser can be asked to collect Node informations, i.e. at what
* place in the file they were detected.
* NOTE: This is off by default and not very well tested.
*/
typedef struct _xmlParserNodeInfo xmlParserNodeInfo;
typedef xmlParserNodeInfo *xmlParserNodeInfoPtr;
struct _xmlParserNodeInfo {
const struct _xmlNode* node;
/* Position & line # that text that created the node begins & ends on */
unsigned long begin_pos;
unsigned long begin_line;
unsigned long end_pos;
unsigned long end_line;
};
typedef struct _xmlParserNodeInfoSeq xmlParserNodeInfoSeq;
typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr;
struct _xmlParserNodeInfoSeq {
unsigned long maximum;
unsigned long length;
xmlParserNodeInfo* buffer;
};
/**
* xmlParserInputState:
*
* The parser is now working also as a state based parser.
* The recursive one use the state info for entities processing.
*/
typedef enum {
XML_PARSER_EOF = -1, /* nothing is to be parsed */
XML_PARSER_START = 0, /* nothing has been parsed */
XML_PARSER_MISC, /* Misc* before int subset */
XML_PARSER_PI, /* Within a processing instruction */
XML_PARSER_DTD, /* within some DTD content */
XML_PARSER_PROLOG, /* Misc* after internal subset */
XML_PARSER_COMMENT, /* within a comment */
XML_PARSER_START_TAG, /* within a start tag */
XML_PARSER_CONTENT, /* within the content */
XML_PARSER_CDATA_SECTION, /* within a CDATA section */
XML_PARSER_END_TAG, /* within a closing tag */
XML_PARSER_ENTITY_DECL, /* within an entity declaration */
XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */
XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */
XML_PARSER_SYSTEM_LITERAL, /* within a SYSTEM value */
XML_PARSER_EPILOG, /* the Misc* after the last end tag */
XML_PARSER_IGNORE, /* within an IGNORED section */
XML_PARSER_PUBLIC_LITERAL /* within a PUBLIC value */
} xmlParserInputState;
/**
* XML_DETECT_IDS:
*
* Bit in the loadsubset context field to tell to do ID/REFs lookups.
* Use it to initialize xmlLoadExtDtdDefaultValue.
*/
#define XML_DETECT_IDS 2
/**
* XML_COMPLETE_ATTRS:
*
* Bit in the loadsubset context field to tell to do complete the
* elements attributes lists with the ones defaulted from the DTDs.
* Use it to initialize xmlLoadExtDtdDefaultValue.
*/
#define XML_COMPLETE_ATTRS 4
/**
* XML_SKIP_IDS:
*
* Bit in the loadsubset context field to tell to not do ID/REFs registration.
* Used to initialize xmlLoadExtDtdDefaultValue in some special cases.
*/
#define XML_SKIP_IDS 8
/**
* xmlParserMode:
*
* A parser can operate in various modes
*/
typedef enum {
XML_PARSE_UNKNOWN = 0,
XML_PARSE_DOM = 1,
XML_PARSE_SAX = 2,
XML_PARSE_PUSH_DOM = 3,
XML_PARSE_PUSH_SAX = 4,
XML_PARSE_READER = 5
} xmlParserMode;
/**
* xmlParserCtxt:
*
* The parser context.
* NOTE This doesn't completely define the parser state, the (current ?)
* design of the parser uses recursive function calls since this allow
* and easy mapping from the production rules of the specification
* to the actual code. The drawback is that the actual function call
* also reflect the parser state. However most of the parsing routines
* takes as the only argument the parser context pointer, so migrating
* to a state based parser for progressive parsing shouldn't be too hard.
*/
struct _xmlParserCtxt {
struct _xmlSAXHandler *sax; /* The SAX handler */
void *userData; /* For SAX interface only, used by DOM build */
xmlDocPtr myDoc; /* the document being built */
int wellFormed; /* is the document well formed */
int replaceEntities; /* shall we replace entities ? */
const xmlChar *version; /* the XML version string */
const xmlChar *encoding; /* the declared encoding, if any */
int standalone; /* standalone document */
int html; /* an HTML(1)/Docbook(2) document */
/* Input stream stack */
xmlParserInputPtr input; /* Current input stream */
int inputNr; /* Number of current input streams */
int inputMax; /* Max number of input streams */
xmlParserInputPtr *inputTab; /* stack of inputs */
/* Node analysis stack only used for DOM building */
xmlNodePtr node; /* Current parsed Node */
int nodeNr; /* Depth of the parsing stack */
int nodeMax; /* Max depth of the parsing stack */
xmlNodePtr *nodeTab; /* array of nodes */
int record_info; /* Whether node info should be kept */
xmlParserNodeInfoSeq node_seq; /* info about each node parsed */
int errNo; /* error code */
int hasExternalSubset; /* reference and external subset */
int hasPErefs; /* the internal subset has PE refs */
int external; /* are we parsing an external entity */
int valid; /* is the document valid */
int validate; /* shall we try to validate ? */
xmlValidCtxt vctxt; /* The validity context */
xmlParserInputState instate; /* current type of input */
int token; /* next char look-ahead */
char *directory; /* the data directory */
/* Node name stack */
const xmlChar *name; /* Current parsed Node */
int nameNr; /* Depth of the parsing stack */
int nameMax; /* Max depth of the parsing stack */
const xmlChar * *nameTab; /* array of nodes */
long nbChars; /* number of xmlChar processed */
long checkIndex; /* used by progressive parsing lookup */
int keepBlanks; /* ugly but ... */
int disableSAX; /* SAX callbacks are disabled */
int inSubset; /* Parsing is in int 1/ext 2 subset */
const xmlChar * intSubName; /* name of subset */
xmlChar * extSubURI; /* URI of external subset */
xmlChar * extSubSystem; /* SYSTEM ID of external subset */
/* xml:space values */
int * space; /* Should the parser preserve spaces */
int spaceNr; /* Depth of the parsing stack */
int spaceMax; /* Max depth of the parsing stack */
int * spaceTab; /* array of space infos */
int depth; /* to prevent entity substitution loops */
xmlParserInputPtr entity; /* used to check entities boundaries */
int charset; /* encoding of the in-memory content
actually an xmlCharEncoding */
int nodelen; /* Those two fields are there to */
int nodemem; /* Speed up large node parsing */
int pedantic; /* signal pedantic warnings */
void *_private; /* For user data, libxml won't touch it */
int loadsubset; /* should the external subset be loaded */
int linenumbers; /* set line number in element content */
void *catalogs; /* document's own catalog */
int recovery; /* run in recovery mode */
int progressive; /* is this a progressive parsing */
xmlDictPtr dict; /* dictionnary for the parser */
const xmlChar * *atts; /* array for the attributes callbacks */
int maxatts; /* the size of the array */
int docdict; /* use strings from dict to build tree */
/*
* pre-interned strings
*/
const xmlChar *str_xml;
const xmlChar *str_xmlns;
const xmlChar *str_xml_ns;
/*
* Everything below is used only by the new SAX mode
*/
int sax2; /* operating in the new SAX mode */
int nsNr; /* the number of inherited namespaces */
int nsMax; /* the size of the arrays */
const xmlChar * *nsTab; /* the array of prefix/namespace name */
int *attallocs; /* which attribute were allocated */
void * *pushTab; /* array of data for push */
xmlHashTablePtr attsDefault; /* defaulted attributes if any */
xmlHashTablePtr attsSpecial; /* non-CDATA attributes if any */
int nsWellFormed; /* is the document XML Nanespace okay */
int options; /* Extra options */
/*
* Those fields are needed only for treaming parsing so far
*/
int dictNames; /* Use dictionary names for the tree */
int freeElemsNr; /* number of freed element nodes */
xmlNodePtr freeElems; /* List of freed element nodes */
int freeAttrsNr; /* number of freed attributes nodes */
xmlAttrPtr freeAttrs; /* List of freed attributes nodes */
/*
* the complete error informations for the last error.
*/
xmlError lastError;
xmlParserMode parseMode; /* the parser mode */
};
/**
* xmlSAXLocator:
*
* A SAX Locator.
*/
struct _xmlSAXLocator {
const xmlChar *(*getPublicId)(void *ctx);
const xmlChar *(*getSystemId)(void *ctx);
int (*getLineNumber)(void *ctx);
int (*getColumnNumber)(void *ctx);
};
/**
* xmlSAXHandler:
*
* A SAX handler is bunch of callbacks called by the parser when processing
* of the input generate data or structure informations.
*/
/**
* resolveEntitySAXFunc:
* @ctx: the user data (XML parser context)
* @publicId: The public ID of the entity
* @systemId: The system ID of the entity
*
* Callback:
* The entity loader, to control the loading of external entities,
* the application can either:
* - override this resolveEntity() callback in the SAX block
* - or better use the xmlSetExternalEntityLoader() function to
* set up it's own entity resolution routine
*
* Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
*/
typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx,
const xmlChar *publicId,
const xmlChar *systemId);
/**
* internalSubsetSAXFunc:
* @ctx: the user data (XML parser context)
* @name: the root element name
* @ExternalID: the external ID
* @SystemID: the SYSTEM ID (e.g. filename or URL)
*
* Callback on internal subset declaration.
*/
typedef void (*internalSubsetSAXFunc) (void *ctx,
const xmlChar *name,
const xmlChar *ExternalID,
const xmlChar *SystemID);
/**
* externalSubsetSAXFunc:
* @ctx: the user data (XML parser context)
* @name: the root element name
* @ExternalID: the external ID
* @SystemID: the SYSTEM ID (e.g. filename or URL)
*
* Callback on external subset declaration.
*/
typedef void (*externalSubsetSAXFunc) (void *ctx,
const xmlChar *name,
const xmlChar *ExternalID,
const xmlChar *SystemID);
/**
* getEntitySAXFunc:
* @ctx: the user data (XML parser context)
* @name: The entity name
*
* Get an entity by name.
*
* Returns the xmlEntityPtr if found.
*/
typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx,
const xmlChar *name);
/**
* getParameterEntitySAXFunc:
* @ctx: the user data (XML parser context)
* @name: The entity name
*
* Get a parameter entity by name.
*
* Returns the xmlEntityPtr if found.
*/
typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx,
const xmlChar *name);
/**
* entityDeclSAXFunc:
* @ctx: the user data (XML parser context)
* @name: the entity name
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -