⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 xmlregexp.c

📁 xml开源解析代码.版本为libxml2-2.6.29,可支持GB3212.网络消息发送XML时很有用.
💻 C
📖 第 1 页 / 共 5 页
字号:
/* * regexp.c: generic and extensible Regular Expression engine * * Basically designed with the purpose of compiling regexps for  * the variety of validation/shemas mechanisms now available in * XML related specifications these include: *    - XML-1.0 DTD validation *    - XML Schemas structure part 1 *    - XML Schemas Datatypes part 2 especially Appendix F *    - RELAX-NG/TREX i.e. the counter proposal * * See Copyright for the status of this software. * * Daniel Veillard <veillard@redhat.com> */#define IN_LIBXML#include "libxml.h"#ifdef LIBXML_REGEXP_ENABLED/* #define DEBUG_ERR */#include <stdio.h>#include <string.h>#ifdef HAVE_LIMITS_H#include <limits.h>#endif#include <libxml/tree.h>#include <libxml/parserInternals.h>#include <libxml/xmlregexp.h>#include <libxml/xmlautomata.h>#include <libxml/xmlunicode.h>#ifndef INT_MAX#define INT_MAX 123456789 /* easy to flag and big enough for our needs */#endif/* #define DEBUG_REGEXP_GRAPH *//* #define DEBUG_REGEXP_EXEC *//* #define DEBUG_PUSH *//* #define DEBUG_COMPACTION */#define MAX_PUSH 10000000#define ERROR(str)							\    ctxt->error = XML_REGEXP_COMPILE_ERROR;				\    xmlRegexpErrCompile(ctxt, str);#define NEXT ctxt->cur++#define CUR (*(ctxt->cur))#define NXT(index) (ctxt->cur[index])#define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l)#define NEXTL(l) ctxt->cur += l;#define XML_REG_STRING_SEPARATOR '|'/* * Need PREV to check on a '-' within a Character Group. May only be used * when it's guaranteed that cur is not at the beginning of ctxt->string! */#define PREV (ctxt->cur[-1])/** * TODO: * * macro to flag unimplemented blocks */#define TODO 								\    xmlGenericError(xmlGenericErrorContext,				\	    "Unimplemented block at %s:%d\n",				\            __FILE__, __LINE__);/************************************************************************ * 									* * 			Datatypes and structures			* * 									* ************************************************************************//* * Note: the order of the enums below is significant, do not shuffle */typedef enum {    XML_REGEXP_EPSILON = 1,    XML_REGEXP_CHARVAL,    XML_REGEXP_RANGES,    XML_REGEXP_SUBREG,  /* used for () sub regexps */    XML_REGEXP_STRING,    XML_REGEXP_ANYCHAR, /* . */    XML_REGEXP_ANYSPACE, /* \s */    XML_REGEXP_NOTSPACE, /* \S */    XML_REGEXP_INITNAME, /* \l */    XML_REGEXP_NOTINITNAME, /* \L */    XML_REGEXP_NAMECHAR, /* \c */    XML_REGEXP_NOTNAMECHAR, /* \C */    XML_REGEXP_DECIMAL, /* \d */    XML_REGEXP_NOTDECIMAL, /* \D */    XML_REGEXP_REALCHAR, /* \w */    XML_REGEXP_NOTREALCHAR, /* \W */    XML_REGEXP_LETTER = 100,    XML_REGEXP_LETTER_UPPERCASE,    XML_REGEXP_LETTER_LOWERCASE,    XML_REGEXP_LETTER_TITLECASE,    XML_REGEXP_LETTER_MODIFIER,    XML_REGEXP_LETTER_OTHERS,    XML_REGEXP_MARK,    XML_REGEXP_MARK_NONSPACING,    XML_REGEXP_MARK_SPACECOMBINING,    XML_REGEXP_MARK_ENCLOSING,    XML_REGEXP_NUMBER,    XML_REGEXP_NUMBER_DECIMAL,    XML_REGEXP_NUMBER_LETTER,    XML_REGEXP_NUMBER_OTHERS,    XML_REGEXP_PUNCT,    XML_REGEXP_PUNCT_CONNECTOR,    XML_REGEXP_PUNCT_DASH,    XML_REGEXP_PUNCT_OPEN,    XML_REGEXP_PUNCT_CLOSE,    XML_REGEXP_PUNCT_INITQUOTE,    XML_REGEXP_PUNCT_FINQUOTE,    XML_REGEXP_PUNCT_OTHERS,    XML_REGEXP_SEPAR,    XML_REGEXP_SEPAR_SPACE,    XML_REGEXP_SEPAR_LINE,    XML_REGEXP_SEPAR_PARA,    XML_REGEXP_SYMBOL,    XML_REGEXP_SYMBOL_MATH,    XML_REGEXP_SYMBOL_CURRENCY,    XML_REGEXP_SYMBOL_MODIFIER,    XML_REGEXP_SYMBOL_OTHERS,    XML_REGEXP_OTHER,    XML_REGEXP_OTHER_CONTROL,    XML_REGEXP_OTHER_FORMAT,    XML_REGEXP_OTHER_PRIVATE,    XML_REGEXP_OTHER_NA,    XML_REGEXP_BLOCK_NAME} xmlRegAtomType;typedef enum {    XML_REGEXP_QUANT_EPSILON = 1,    XML_REGEXP_QUANT_ONCE,    XML_REGEXP_QUANT_OPT,    XML_REGEXP_QUANT_MULT,    XML_REGEXP_QUANT_PLUS,    XML_REGEXP_QUANT_ONCEONLY,    XML_REGEXP_QUANT_ALL,    XML_REGEXP_QUANT_RANGE} xmlRegQuantType;typedef enum {    XML_REGEXP_START_STATE = 1,    XML_REGEXP_FINAL_STATE,    XML_REGEXP_TRANS_STATE,    XML_REGEXP_SINK_STATE,    XML_REGEXP_UNREACH_STATE} xmlRegStateType;typedef enum {    XML_REGEXP_MARK_NORMAL = 0,    XML_REGEXP_MARK_START,    XML_REGEXP_MARK_VISITED} xmlRegMarkedType;typedef struct _xmlRegRange xmlRegRange;typedef xmlRegRange *xmlRegRangePtr;struct _xmlRegRange {    int neg;		/* 0 normal, 1 not, 2 exclude */    xmlRegAtomType type;    int start;    int end;    xmlChar *blockName;};typedef struct _xmlRegAtom xmlRegAtom;typedef xmlRegAtom *xmlRegAtomPtr;typedef struct _xmlAutomataState xmlRegState;typedef xmlRegState *xmlRegStatePtr;struct _xmlRegAtom {    int no;    xmlRegAtomType type;    xmlRegQuantType quant;    int min;    int max;    void *valuep;    void *valuep2;    int neg;    int codepoint;    xmlRegStatePtr start;    xmlRegStatePtr stop;    int maxRanges;    int nbRanges;    xmlRegRangePtr *ranges;    void *data;};typedef struct _xmlRegCounter xmlRegCounter;typedef xmlRegCounter *xmlRegCounterPtr;struct _xmlRegCounter {    int min;    int max;};typedef struct _xmlRegTrans xmlRegTrans;typedef xmlRegTrans *xmlRegTransPtr;struct _xmlRegTrans {    xmlRegAtomPtr atom;    int to;    int counter;    int count;    int nd;};struct _xmlAutomataState {    xmlRegStateType type;    xmlRegMarkedType mark;    xmlRegMarkedType reached;    int no;    int maxTrans;    int nbTrans;    xmlRegTrans *trans;    /*  knowing states ponting to us can speed things up */    int maxTransTo;    int nbTransTo;    int *transTo;};typedef struct _xmlAutomata xmlRegParserCtxt;typedef xmlRegParserCtxt *xmlRegParserCtxtPtr;struct _xmlAutomata {    xmlChar *string;    xmlChar *cur;    int error;    int neg;    xmlRegStatePtr start;    xmlRegStatePtr end;    xmlRegStatePtr state;    xmlRegAtomPtr atom;    int maxAtoms;    int nbAtoms;    xmlRegAtomPtr *atoms;    int maxStates;    int nbStates;    xmlRegStatePtr *states;    int maxCounters;    int nbCounters;    xmlRegCounter *counters;    int determinist;    int negs;};struct _xmlRegexp {    xmlChar *string;    int nbStates;    xmlRegStatePtr *states;    int nbAtoms;    xmlRegAtomPtr *atoms;    int nbCounters;    xmlRegCounter *counters;    int determinist;    /*     * That's the compact form for determinists automatas     */    int nbstates;    int *compact;    void **transdata;    int nbstrings;    xmlChar **stringMap;};typedef struct _xmlRegExecRollback xmlRegExecRollback;typedef xmlRegExecRollback *xmlRegExecRollbackPtr;struct _xmlRegExecRollback {    xmlRegStatePtr state;/* the current state */    int index;		/* the index in the input stack */    int nextbranch;	/* the next transition to explore in that state */    int *counts;	/* save the automata state if it has some */};typedef struct _xmlRegInputToken xmlRegInputToken;typedef xmlRegInputToken *xmlRegInputTokenPtr;struct _xmlRegInputToken {    xmlChar *value;    void *data;};struct _xmlRegExecCtxt {    int status;		/* execution status != 0 indicate an error */    int determinist;	/* did we find an indeterministic behaviour */    xmlRegexpPtr comp;	/* the compiled regexp */    xmlRegExecCallbacks callback;    void *data;    xmlRegStatePtr state;/* the current state */    int transno;	/* the current transition on that state */    int transcount;	/* the number of chars in char counted transitions */    /*     * A stack of rollback states     */    int maxRollbacks;    int nbRollbacks;    xmlRegExecRollback *rollbacks;    /*     * The state of the automata if any     */    int *counts;    /*     * The input stack     */    int inputStackMax;    int inputStackNr;    int index;    int *charStack;    const xmlChar *inputString; /* when operating on characters */    xmlRegInputTokenPtr inputStack;/* when operating on strings */    /*     * error handling     */    int errStateNo;		/* the error state number */    xmlRegStatePtr errState;    /* the error state */    xmlChar *errString;		/* the string raising the error */    int *errCounts;		/* counters at the error state */    int nbPush;};#define REGEXP_ALL_COUNTER	0x123456#define REGEXP_ALL_LAX_COUNTER	0x123457static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top);static void xmlRegFreeState(xmlRegStatePtr state);static void xmlRegFreeAtom(xmlRegAtomPtr atom);static int xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr);static int xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint);static int xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint,                  int neg, int start, int end, const xmlChar *blockName);/************************************************************************ *									* * 		Regexp memory error handler				* *									* ************************************************************************//** * xmlRegexpErrMemory: * @extra:  extra information * * Handle an out of memory condition */static voidxmlRegexpErrMemory(xmlRegParserCtxtPtr ctxt, const char *extra){    const char *regexp = NULL;    if (ctxt != NULL) {        regexp = (const char *) ctxt->string;	ctxt->error = XML_ERR_NO_MEMORY;    }    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,		    XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,		    regexp, NULL, 0, 0,		    "Memory allocation failed : %s\n", extra);}/** * xmlRegexpErrCompile: * @extra:  extra information * * Handle a compilation failure */static voidxmlRegexpErrCompile(xmlRegParserCtxtPtr ctxt, const char *extra){    const char *regexp = NULL;    int idx = 0;    if (ctxt != NULL) {        regexp = (const char *) ctxt->string;	idx = ctxt->cur - ctxt->string;	ctxt->error = XML_REGEXP_COMPILE_ERROR;    }    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,		    XML_REGEXP_COMPILE_ERROR, XML_ERR_FATAL, NULL, 0, extra,		    regexp, NULL, idx, 0,		    "failed to compile: %s\n", extra);}/************************************************************************ * 									* * 			Allocation/Deallocation				* * 									* ************************************************************************/static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt);/** * xmlRegEpxFromParse: * @ctxt:  the parser context used to build it * * Allocate a new regexp and fill it with the result from the parser * * Returns the new regexp or NULL in case of error */static xmlRegexpPtrxmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {    xmlRegexpPtr ret;    ret = (xmlRegexpPtr) xmlMalloc(sizeof(xmlRegexp));    if (ret == NULL) {	xmlRegexpErrMemory(ctxt, "compiling regexp");	return(NULL);    }    memset(ret, 0, sizeof(xmlRegexp));    ret->string = ctxt->string;    ret->nbStates = ctxt->nbStates;    ret->states = ctxt->states;    ret->nbAtoms = ctxt->nbAtoms;    ret->atoms = ctxt->atoms;    ret->nbCounters = ctxt->nbCounters;    ret->counters = ctxt->counters;    ret->determinist = ctxt->determinist;    if (ret->determinist == -1) {        xmlRegexpIsDeterminist(ret);    }    if ((ret->determinist != 0) &&	(ret->nbCounters == 0) &&	(ctxt->negs == 0) &&	(ret->atoms != NULL) &&	(ret->atoms[0] != NULL) &&	(ret->atoms[0]->type == XML_REGEXP_STRING)) {	int i, j, nbstates = 0, nbatoms = 0;	int *stateRemap;	int *stringRemap;	int *transitions;	void **transdata;	xmlChar **stringMap;        xmlChar *value;	/*	 * Switch to a compact representation	 * 1/ counting the effective number of states left

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -