⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 xmlregexp.c.svn-base

📁 这是一个用于解析xml文件的类库。使用这个类库
💻 SVN-BASE
📖 第 1 页 / 共 5 页
字号:
/* * regexp.c: generic and extensible Regular Expression engine * * Basically designed with the purpose of compiling regexps for  * the variety of validation/shemas mechanisms now available in * XML related specifications these include: *    - XML-1.0 DTD validation *    - XML Schemas structure part 1 *    - XML Schemas Datatypes part 2 especially Appendix F *    - RELAX-NG/TREX i.e. the counter proposal * * See Copyright for the status of this software. * * Daniel Veillard <veillard@redhat.com> */#define IN_LIBXML#include "libxml.h"#ifdef LIBXML_REGEXP_ENABLED#include <stdio.h>#include <string.h>#ifdef HAVE_LIMITS_H#include <limits.h>#endif#include <libxml/tree.h>#include <libxml/parserInternals.h>#include <libxml/xmlregexp.h>#include <libxml/xmlautomata.h>#include <libxml/xmlunicode.h>#ifndef INT_MAX#define INT_MAX 123456789 /* easy to flag and big enough for our needs */#endif/* #define DEBUG_REGEXP_GRAPH  *//* #define DEBUG_REGEXP_EXEC *//* #define DEBUG_PUSH *//* #define DEBUG_COMPACTION */#define ERROR(str)							\    ctxt->error = XML_REGEXP_COMPILE_ERROR;				\    xmlRegexpErrCompile(ctxt, str);#define NEXT ctxt->cur++#define CUR (*(ctxt->cur))#define NXT(index) (ctxt->cur[index])#define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l)#define NEXTL(l) ctxt->cur += l;/** * TODO: * * macro to flag unimplemented blocks */#define TODO 								\    xmlGenericError(xmlGenericErrorContext,				\	    "Unimplemented block at %s:%d\n",				\            __FILE__, __LINE__);/************************************************************************ * 									* * 			Datatypes and structures			* * 									* ************************************************************************/typedef enum {    XML_REGEXP_EPSILON = 1,    XML_REGEXP_CHARVAL,    XML_REGEXP_RANGES,    XML_REGEXP_SUBREG,    XML_REGEXP_STRING,    XML_REGEXP_ANYCHAR, /* . */    XML_REGEXP_ANYSPACE, /* \s */    XML_REGEXP_NOTSPACE, /* \S */    XML_REGEXP_INITNAME, /* \l */    XML_REGEXP_NOTINITNAME, /* \l */    XML_REGEXP_NAMECHAR, /* \c */    XML_REGEXP_NOTNAMECHAR, /* \C */    XML_REGEXP_DECIMAL, /* \d */    XML_REGEXP_NOTDECIMAL, /* \d */    XML_REGEXP_REALCHAR, /* \w */    XML_REGEXP_NOTREALCHAR, /* \w */    XML_REGEXP_LETTER,    XML_REGEXP_LETTER_UPPERCASE,    XML_REGEXP_LETTER_LOWERCASE,    XML_REGEXP_LETTER_TITLECASE,    XML_REGEXP_LETTER_MODIFIER,    XML_REGEXP_LETTER_OTHERS,    XML_REGEXP_MARK,    XML_REGEXP_MARK_NONSPACING,    XML_REGEXP_MARK_SPACECOMBINING,    XML_REGEXP_MARK_ENCLOSING,    XML_REGEXP_NUMBER,    XML_REGEXP_NUMBER_DECIMAL,    XML_REGEXP_NUMBER_LETTER,    XML_REGEXP_NUMBER_OTHERS,    XML_REGEXP_PUNCT,    XML_REGEXP_PUNCT_CONNECTOR,    XML_REGEXP_PUNCT_DASH,    XML_REGEXP_PUNCT_OPEN,    XML_REGEXP_PUNCT_CLOSE,    XML_REGEXP_PUNCT_INITQUOTE,    XML_REGEXP_PUNCT_FINQUOTE,    XML_REGEXP_PUNCT_OTHERS,    XML_REGEXP_SEPAR,    XML_REGEXP_SEPAR_SPACE,    XML_REGEXP_SEPAR_LINE,    XML_REGEXP_SEPAR_PARA,    XML_REGEXP_SYMBOL,    XML_REGEXP_SYMBOL_MATH,    XML_REGEXP_SYMBOL_CURRENCY,    XML_REGEXP_SYMBOL_MODIFIER,    XML_REGEXP_SYMBOL_OTHERS,    XML_REGEXP_OTHER,    XML_REGEXP_OTHER_CONTROL,    XML_REGEXP_OTHER_FORMAT,    XML_REGEXP_OTHER_PRIVATE,    XML_REGEXP_OTHER_NA,    XML_REGEXP_BLOCK_NAME} xmlRegAtomType;typedef enum {    XML_REGEXP_QUANT_EPSILON = 1,    XML_REGEXP_QUANT_ONCE,    XML_REGEXP_QUANT_OPT,    XML_REGEXP_QUANT_MULT,    XML_REGEXP_QUANT_PLUS,    XML_REGEXP_QUANT_ONCEONLY,    XML_REGEXP_QUANT_ALL,    XML_REGEXP_QUANT_RANGE} xmlRegQuantType;typedef enum {    XML_REGEXP_START_STATE = 1,    XML_REGEXP_FINAL_STATE,    XML_REGEXP_TRANS_STATE} xmlRegStateType;typedef enum {    XML_REGEXP_MARK_NORMAL = 0,    XML_REGEXP_MARK_START,    XML_REGEXP_MARK_VISITED} xmlRegMarkedType;typedef struct _xmlRegRange xmlRegRange;typedef xmlRegRange *xmlRegRangePtr;struct _xmlRegRange {    int neg;		/* 0 normal, 1 not, 2 exclude */    xmlRegAtomType type;    int start;    int end;    xmlChar *blockName;};typedef struct _xmlRegAtom xmlRegAtom;typedef xmlRegAtom *xmlRegAtomPtr;typedef struct _xmlAutomataState xmlRegState;typedef xmlRegState *xmlRegStatePtr;struct _xmlRegAtom {    int no;    xmlRegAtomType type;    xmlRegQuantType quant;    int min;    int max;    void *valuep;    void *valuep2;    int neg;    int codepoint;    xmlRegStatePtr start;    xmlRegStatePtr stop;    int maxRanges;    int nbRanges;    xmlRegRangePtr *ranges;    void *data;};typedef struct _xmlRegCounter xmlRegCounter;typedef xmlRegCounter *xmlRegCounterPtr;struct _xmlRegCounter {    int min;    int max;};typedef struct _xmlRegTrans xmlRegTrans;typedef xmlRegTrans *xmlRegTransPtr;struct _xmlRegTrans {    xmlRegAtomPtr atom;    int to;    int counter;    int count;};struct _xmlAutomataState {    xmlRegStateType type;    xmlRegMarkedType mark;    xmlRegMarkedType reached;    int no;    int maxTrans;    int nbTrans;    xmlRegTrans *trans;};typedef struct _xmlAutomata xmlRegParserCtxt;typedef xmlRegParserCtxt *xmlRegParserCtxtPtr;struct _xmlAutomata {    xmlChar *string;    xmlChar *cur;    int error;    int neg;    xmlRegStatePtr start;    xmlRegStatePtr end;    xmlRegStatePtr state;    xmlRegAtomPtr atom;    int maxAtoms;    int nbAtoms;    xmlRegAtomPtr *atoms;    int maxStates;    int nbStates;    xmlRegStatePtr *states;    int maxCounters;    int nbCounters;    xmlRegCounter *counters;    int determinist;};struct _xmlRegexp {    xmlChar *string;    int nbStates;    xmlRegStatePtr *states;    int nbAtoms;    xmlRegAtomPtr *atoms;    int nbCounters;    xmlRegCounter *counters;    int determinist;    /*     * That's the compact form for determinists automatas     */    int nbstates;    int *compact;    void **transdata;    int nbstrings;    xmlChar **stringMap;};typedef struct _xmlRegExecRollback xmlRegExecRollback;typedef xmlRegExecRollback *xmlRegExecRollbackPtr;struct _xmlRegExecRollback {    xmlRegStatePtr state;/* the current state */    int index;		/* the index in the input stack */    int nextbranch;	/* the next transition to explore in that state */    int *counts;	/* save the automata state if it has some */};typedef struct _xmlRegInputToken xmlRegInputToken;typedef xmlRegInputToken *xmlRegInputTokenPtr;struct _xmlRegInputToken {    xmlChar *value;    void *data;};struct _xmlRegExecCtxt {    int status;		/* execution status != 0 indicate an error */    int determinist;	/* did we find an indeterministic behaviour */    xmlRegexpPtr comp;	/* the compiled regexp */    xmlRegExecCallbacks callback;    void *data;    xmlRegStatePtr state;/* the current state */    int transno;	/* the current transition on that state */    int transcount;	/* the number of chars in char counted transitions */    /*     * A stack of rollback states     */    int maxRollbacks;    int nbRollbacks;    xmlRegExecRollback *rollbacks;    /*     * The state of the automata if any     */    int *counts;    /*     * The input stack     */    int inputStackMax;    int inputStackNr;    int index;    int *charStack;    const xmlChar *inputString; /* when operating on characters */    xmlRegInputTokenPtr inputStack;/* when operating on strings */};#define REGEXP_ALL_COUNTER	0x123456#define REGEXP_ALL_LAX_COUNTER	0x123457static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top);static void xmlRegFreeState(xmlRegStatePtr state);static void xmlRegFreeAtom(xmlRegAtomPtr atom);/************************************************************************ *									* * 		Regexp memory error handler				* *									* ************************************************************************//** * xmlRegexpErrMemory: * @extra:  extra information * * Handle an out of memory condition */static voidxmlRegexpErrMemory(xmlRegParserCtxtPtr ctxt, const char *extra){    const char *regexp = NULL;    if (ctxt != NULL) {        regexp = (const char *) ctxt->string;	ctxt->error = XML_ERR_NO_MEMORY;    }    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,		    XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,		    regexp, NULL, 0, 0,		    "Memory allocation failed : %s\n", extra);}/** * xmlRegexpErrCompile: * @extra:  extra information * * Handle a compilation failure */static voidxmlRegexpErrCompile(xmlRegParserCtxtPtr ctxt, const char *extra){    const char *regexp = NULL;    int idx = 0;    if (ctxt != NULL) {        regexp = (const char *) ctxt->string;	idx = ctxt->cur - ctxt->string;	ctxt->error = XML_REGEXP_COMPILE_ERROR;    }    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,		    XML_REGEXP_COMPILE_ERROR, XML_ERR_FATAL, NULL, 0, extra,		    regexp, NULL, idx, 0,		    "failed to compile: %s\n", extra);}/************************************************************************ * 									* * 			Allocation/Deallocation				* * 									* ************************************************************************/static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt);/** * xmlRegEpxFromParse: * @ctxt:  the parser context used to build it * * Allocate a new regexp and fill it with the result from the parser * * Returns the new regexp or NULL in case of error */static xmlRegexpPtrxmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {    xmlRegexpPtr ret;    ret = (xmlRegexpPtr) xmlMalloc(sizeof(xmlRegexp));    if (ret == NULL) {	xmlRegexpErrMemory(ctxt, "compiling regexp");	return(NULL);    }    memset(ret, 0, sizeof(xmlRegexp));    ret->string = ctxt->string;    ret->nbStates = ctxt->nbStates;    ret->states = ctxt->states;    ret->nbAtoms = ctxt->nbAtoms;    ret->atoms = ctxt->atoms;    ret->nbCounters = ctxt->nbCounters;    ret->counters = ctxt->counters;    ret->determinist = ctxt->determinist;    if ((ret->determinist != 0) &&	(ret->nbCounters == 0) &&	(ret->atoms != NULL) &&	(ret->atoms[0] != NULL) &&	(ret->atoms[0]->type == XML_REGEXP_STRING)) {	int i, j, nbstates = 0, nbatoms = 0;	int *stateRemap;	int *stringRemap;	int *transitions;	void **transdata;	xmlChar **stringMap;        xmlChar *value;	/*	 * Switch to a compact representation	 * 1/ counting the effective number of states left	 * 2/ counting the unique number of atoms, and check that	 *    they are all of the string type	 * 3/ build a table state x atom for the transitions	 */	stateRemap = xmlMalloc(ret->nbStates * sizeof(int));	if (stateRemap == NULL) {	    xmlRegexpErrMemory(ctxt, "compiling regexp");	    xmlFree(ret);	    return(NULL);	}	for (i = 0;i < ret->nbStates;i++) {	    if (ret->states[i] != NULL) {		stateRemap[i] = nbstates;		nbstates++;	    } else {		stateRemap[i] = -1;	    }	}#ifdef DEBUG_COMPACTION	printf("Final: %d states\n", nbstates);#endif	stringMap = xmlMalloc(ret->nbAtoms * sizeof(char *));	if (stringMap == NULL) {	    xmlRegexpErrMemory(ctxt, "compiling regexp");	    xmlFree(stateRemap);	    xmlFree(ret);	    return(NULL);	}	stringRemap = xmlMalloc(ret->nbAtoms * sizeof(int));	if (stringRemap == NULL) {	    xmlRegexpErrMemory(ctxt, "compiling regexp");	    xmlFree(stringMap);	    xmlFree(stateRemap);	    xmlFree(ret);	    return(NULL);	}	for (i = 0;i < ret->nbAtoms;i++) {	    if ((ret->atoms[i]->type == XML_REGEXP_STRING) &&		(ret->atoms[i]->quant == XML_REGEXP_QUANT_ONCE)) {		value = ret->atoms[i]->valuep;                for (j = 0;j < nbatoms;j++) {		    if (xmlStrEqual(stringMap[j], value)) {			stringRemap[i] = j;			break;		    }		}		if (j >= nbatoms) {		    stringRemap[i] = nbatoms;		    stringMap[nbatoms] = xmlStrdup(value);		    if (stringMap[nbatoms] == NULL) {			for (i = 0;i < nbatoms;i++)			    xmlFree(stringMap[i]);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -