⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 xmlregexp.c.svn-base

📁 这是一个用于解析xml文件的类库。使用这个类库
💻 SVN-BASE
📖 第 1 页 / 共 5 页
字号:
	    ret = xmlUCSIsCatNd(codepoint);	    break;        case XML_REGEXP_REALCHAR:	    neg = !neg;        case XML_REGEXP_NOTREALCHAR:	    ret = xmlUCSIsCatP(codepoint);	    if (ret == 0)		ret = xmlUCSIsCatZ(codepoint);	    if (ret == 0)		ret = xmlUCSIsCatC(codepoint);	    break;        case XML_REGEXP_LETTER:	    ret = xmlUCSIsCatL(codepoint);	    break;        case XML_REGEXP_LETTER_UPPERCASE:	    ret = xmlUCSIsCatLu(codepoint);	    break;        case XML_REGEXP_LETTER_LOWERCASE:	    ret = xmlUCSIsCatLl(codepoint);	    break;        case XML_REGEXP_LETTER_TITLECASE:	    ret = xmlUCSIsCatLt(codepoint);	    break;        case XML_REGEXP_LETTER_MODIFIER:	    ret = xmlUCSIsCatLm(codepoint);	    break;        case XML_REGEXP_LETTER_OTHERS:	    ret = xmlUCSIsCatLo(codepoint);	    break;        case XML_REGEXP_MARK:	    ret = xmlUCSIsCatM(codepoint);	    break;        case XML_REGEXP_MARK_NONSPACING:	    ret = xmlUCSIsCatMn(codepoint);	    break;        case XML_REGEXP_MARK_SPACECOMBINING:	    ret = xmlUCSIsCatMc(codepoint);	    break;        case XML_REGEXP_MARK_ENCLOSING:	    ret = xmlUCSIsCatMe(codepoint);	    break;        case XML_REGEXP_NUMBER:	    ret = xmlUCSIsCatN(codepoint);	    break;        case XML_REGEXP_NUMBER_DECIMAL:	    ret = xmlUCSIsCatNd(codepoint);	    break;        case XML_REGEXP_NUMBER_LETTER:	    ret = xmlUCSIsCatNl(codepoint);	    break;        case XML_REGEXP_NUMBER_OTHERS:	    ret = xmlUCSIsCatNo(codepoint);	    break;        case XML_REGEXP_PUNCT:	    ret = xmlUCSIsCatP(codepoint);	    break;        case XML_REGEXP_PUNCT_CONNECTOR:	    ret = xmlUCSIsCatPc(codepoint);	    break;        case XML_REGEXP_PUNCT_DASH:	    ret = xmlUCSIsCatPd(codepoint);	    break;        case XML_REGEXP_PUNCT_OPEN:	    ret = xmlUCSIsCatPs(codepoint);	    break;        case XML_REGEXP_PUNCT_CLOSE:	    ret = xmlUCSIsCatPe(codepoint);	    break;        case XML_REGEXP_PUNCT_INITQUOTE:	    ret = xmlUCSIsCatPi(codepoint);	    break;        case XML_REGEXP_PUNCT_FINQUOTE:	    ret = xmlUCSIsCatPf(codepoint);	    break;        case XML_REGEXP_PUNCT_OTHERS:	    ret = xmlUCSIsCatPo(codepoint);	    break;        case XML_REGEXP_SEPAR:	    ret = xmlUCSIsCatZ(codepoint);	    break;        case XML_REGEXP_SEPAR_SPACE:	    ret = xmlUCSIsCatZs(codepoint);	    break;        case XML_REGEXP_SEPAR_LINE:	    ret = xmlUCSIsCatZl(codepoint);	    break;        case XML_REGEXP_SEPAR_PARA:	    ret = xmlUCSIsCatZp(codepoint);	    break;        case XML_REGEXP_SYMBOL:	    ret = xmlUCSIsCatS(codepoint);	    break;        case XML_REGEXP_SYMBOL_MATH:	    ret = xmlUCSIsCatSm(codepoint);	    break;        case XML_REGEXP_SYMBOL_CURRENCY:	    ret = xmlUCSIsCatSc(codepoint);	    break;        case XML_REGEXP_SYMBOL_MODIFIER:	    ret = xmlUCSIsCatSk(codepoint);	    break;        case XML_REGEXP_SYMBOL_OTHERS:	    ret = xmlUCSIsCatSo(codepoint);	    break;        case XML_REGEXP_OTHER:	    ret = xmlUCSIsCatC(codepoint);	    break;        case XML_REGEXP_OTHER_CONTROL:	    ret = xmlUCSIsCatCc(codepoint);	    break;        case XML_REGEXP_OTHER_FORMAT:	    ret = xmlUCSIsCatCf(codepoint);	    break;        case XML_REGEXP_OTHER_PRIVATE:	    ret = xmlUCSIsCatCo(codepoint);	    break;        case XML_REGEXP_OTHER_NA:	    /* ret = xmlUCSIsCatCn(codepoint); */	    /* Seems it doesn't exist anymore in recent Unicode releases */	    ret = 0;	    break;        case XML_REGEXP_BLOCK_NAME:	    ret = xmlUCSIsBlock(codepoint, (const char *) blockName);	    break;    }    if (neg)	return(!ret);    return(ret);}static intxmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) {    int i, ret = 0;    xmlRegRangePtr range;    if ((atom == NULL) || (!IS_CHAR(codepoint)))	return(-1);    switch (atom->type) {        case XML_REGEXP_SUBREG:        case XML_REGEXP_EPSILON:	    return(-1);        case XML_REGEXP_CHARVAL:            return(codepoint == atom->codepoint);        case XML_REGEXP_RANGES: {	    int accept = 0;	    for (i = 0;i < atom->nbRanges;i++) {		range = atom->ranges[i];		if (range->neg == 2) {		    ret = xmlRegCheckCharacterRange(range->type, codepoint,						0, range->start, range->end,						range->blockName);		    if (ret != 0)			return(0); /* excluded char */		} else if (range->neg) {		    ret = xmlRegCheckCharacterRange(range->type, codepoint,						0, range->start, range->end,						range->blockName);		    if (ret == 0)		        accept = 1;		    else		        return(0);		} else {		    ret = xmlRegCheckCharacterRange(range->type, codepoint,						0, range->start, range->end,						range->blockName);		    if (ret != 0)			accept = 1; /* might still be excluded */		}	    }	    return(accept);	}        case XML_REGEXP_STRING:	    printf("TODO: XML_REGEXP_STRING\n");	    return(-1);        case XML_REGEXP_ANYCHAR:        case XML_REGEXP_ANYSPACE:        case XML_REGEXP_NOTSPACE:        case XML_REGEXP_INITNAME:        case XML_REGEXP_NOTINITNAME:        case XML_REGEXP_NAMECHAR:        case XML_REGEXP_NOTNAMECHAR:        case XML_REGEXP_DECIMAL:        case XML_REGEXP_NOTDECIMAL:        case XML_REGEXP_REALCHAR:        case XML_REGEXP_NOTREALCHAR:        case XML_REGEXP_LETTER:        case XML_REGEXP_LETTER_UPPERCASE:        case XML_REGEXP_LETTER_LOWERCASE:        case XML_REGEXP_LETTER_TITLECASE:        case XML_REGEXP_LETTER_MODIFIER:        case XML_REGEXP_LETTER_OTHERS:        case XML_REGEXP_MARK:        case XML_REGEXP_MARK_NONSPACING:        case XML_REGEXP_MARK_SPACECOMBINING:        case XML_REGEXP_MARK_ENCLOSING:        case XML_REGEXP_NUMBER:        case XML_REGEXP_NUMBER_DECIMAL:        case XML_REGEXP_NUMBER_LETTER:        case XML_REGEXP_NUMBER_OTHERS:        case XML_REGEXP_PUNCT:        case XML_REGEXP_PUNCT_CONNECTOR:        case XML_REGEXP_PUNCT_DASH:        case XML_REGEXP_PUNCT_OPEN:        case XML_REGEXP_PUNCT_CLOSE:        case XML_REGEXP_PUNCT_INITQUOTE:        case XML_REGEXP_PUNCT_FINQUOTE:        case XML_REGEXP_PUNCT_OTHERS:        case XML_REGEXP_SEPAR:        case XML_REGEXP_SEPAR_SPACE:        case XML_REGEXP_SEPAR_LINE:        case XML_REGEXP_SEPAR_PARA:        case XML_REGEXP_SYMBOL:        case XML_REGEXP_SYMBOL_MATH:        case XML_REGEXP_SYMBOL_CURRENCY:        case XML_REGEXP_SYMBOL_MODIFIER:        case XML_REGEXP_SYMBOL_OTHERS:        case XML_REGEXP_OTHER:        case XML_REGEXP_OTHER_CONTROL:        case XML_REGEXP_OTHER_FORMAT:        case XML_REGEXP_OTHER_PRIVATE:        case XML_REGEXP_OTHER_NA:	case XML_REGEXP_BLOCK_NAME:	    ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0,		                            (const xmlChar *)atom->valuep);	    if (atom->neg)		ret = !ret;	    break;    }    return(ret);}/************************************************************************ * 									* *	Saving and restoring state of an execution context		* * 									* ************************************************************************/#ifdef DEBUG_REGEXP_EXECstatic voidxmlFARegDebugExec(xmlRegExecCtxtPtr exec) {    printf("state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index);    if (exec->inputStack != NULL) {	int i;	printf(": ");	for (i = 0;(i < 3) && (i < exec->inputStackNr);i++)	    printf("%s ", exec->inputStack[exec->inputStackNr - (i + 1)]);    } else {	printf(": %s", &(exec->inputString[exec->index]));    }    printf("\n");}#endifstatic voidxmlFARegExecSave(xmlRegExecCtxtPtr exec) {#ifdef DEBUG_REGEXP_EXEC    printf("saving ");    exec->transno++;    xmlFARegDebugExec(exec);    exec->transno--;#endif    if (exec->maxRollbacks == 0) {	exec->maxRollbacks = 4;	exec->rollbacks = (xmlRegExecRollback *) xmlMalloc(exec->maxRollbacks *		                             sizeof(xmlRegExecRollback));	if (exec->rollbacks == NULL) {	    xmlRegexpErrMemory(NULL, "saving regexp");	    exec->maxRollbacks = 0;	    return;	}	memset(exec->rollbacks, 0,	       exec->maxRollbacks * sizeof(xmlRegExecRollback));    } else if (exec->nbRollbacks >= exec->maxRollbacks) {	xmlRegExecRollback *tmp;	int len = exec->maxRollbacks;	exec->maxRollbacks *= 2;	tmp = (xmlRegExecRollback *) xmlRealloc(exec->rollbacks,			exec->maxRollbacks * sizeof(xmlRegExecRollback));	if (tmp == NULL) {	    xmlRegexpErrMemory(NULL, "saving regexp");	    exec->maxRollbacks /= 2;	    return;	}	exec->rollbacks = tmp;	tmp = &exec->rollbacks[len];	memset(tmp, 0, (exec->maxRollbacks - len) * sizeof(xmlRegExecRollback));    }    exec->rollbacks[exec->nbRollbacks].state = exec->state;    exec->rollbacks[exec->nbRollbacks].index = exec->index;    exec->rollbacks[exec->nbRollbacks].nextbranch = exec->transno + 1;    if (exec->comp->nbCounters > 0) {	if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {	    exec->rollbacks[exec->nbRollbacks].counts = (int *)		xmlMalloc(exec->comp->nbCounters * sizeof(int));	    if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {		xmlRegexpErrMemory(NULL, "saving regexp");		exec->status = -5;		return;	    }	}	memcpy(exec->rollbacks[exec->nbRollbacks].counts, exec->counts,	       exec->comp->nbCounters * sizeof(int));    }    exec->nbRollbacks++;}static voidxmlFARegExecRollBack(xmlRegExecCtxtPtr exec) {    if (exec->nbRollbacks <= 0) {	exec->status = -1;#ifdef DEBUG_REGEXP_EXEC	printf("rollback failed on empty stack\n");#endif	return;    }    exec->nbRollbacks--;    exec->state = exec->rollbacks[exec->nbRollbacks].state;    exec->index = exec->rollbacks[exec->nbRollbacks].index;    exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch;    if (exec->comp->nbCounters > 0) {	if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {	    fprintf(stderr, "exec save: allocation failed");	    exec->status = -6;	    return;	}	memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts,	       exec->comp->nbCounters * sizeof(int));    }#ifdef DEBUG_REGEXP_EXEC    printf("restored ");    xmlFARegDebugExec(exec);#endif}/************************************************************************ * 									* *	Verifier, running an input against a compiled regexp		* * 									* ************************************************************************/static intxmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) {    xmlRegExecCtxt execval;    xmlRegExecCtxtPtr exec = &execval;    int ret, codepoint, len;    exec->inputString = content;    exec->index = 0;    exec->determinist = 1;    exec->maxRollbacks = 0;    exec->nbRollbacks = 0;    exec->rollbacks = NULL;    exec->status = 0;    exec->comp = comp;    exec->state = comp->states[0];    exec->transno = 0;    exec->transcount = 0;    exec->inputStack = NULL;    exec->inputStackMax = 0;    if (comp->nbCounters > 0) {	exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int));	if (exec->counts == NULL) {	    xmlRegexpErrMemory(NULL, "running regexp");	    return(-1);	}        memset(exec->counts, 0, comp->nbCounters * sizeof(int));    } else	exec->counts = NULL;    while ((exec->status == 0) &&	   ((exec->inputString[exec->index] != 0) ||	    (exec->state->type != XML_REGEXP_FINAL_STATE))) {	xmlRegTransPtr trans;	xmlRegAtomPtr atom;	/*	 * If end of input on non-terminal state, rollback, however we may	 * still have epsilon like transition for counted transitions	 * on counters, in that case don't break too early.  Additionally,	 * if we are working on a range like "AB{0,2}", where B is not present,	 * we don't want to break.	 */	if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL)) {	    /*	     * if there is a transition, we must check if	     *  atom allows minOccurs of 0	     */	    if (exec->transno < exec->state->nbTrans) {	        trans = &exec->state->trans[exec->transno];		if (trans->to >=0) {		    atom = trans->atom;		    if (!((atom->min == 0) && (atom->max > 0)))		        goto rollback;		}	    } else	        goto rollback;	}	exec->transcount = 0;	for (;exec->transno < exec->state->nbTrans;exec->transno++) {	    trans = &exec->state->trans[exec->transno];	    if (trans->to < 0)		continue;	    atom = trans->atom;	    ret = 0;	    if (trans->count >= 0) {		int count;		xmlRegCounterPtr counter;		/*		 * A counted transition.		 */		count = exec->counts[trans->count];		counter = &exec->comp->counters[trans->count];#ifdef DEBUG_REGEXP_EXEC		printf("testing count %d: val %d, min %d, max %d\n",		       trans->count, count, counter->min,  counter->max);#endif		ret = ((count >= counter->min) && (count <= counter->max));	    } else if (atom == NULL) {		fprintf(stderr, "epsilon transition left at runtime\n");		exec->status = -2;		break;	    } else if (exec->inputString[exec->index] != 0) {                codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);		ret = xmlRegCheckCharacter(atom, codepoint);		if ((ret == 1) && (atom->min >= 0) && (atom->max > 0)) {		    xmlRegStatePtr to = comp->states[trans->to];		    /*		     * this is a multiple input sequence		     */		    if (exec->state->nbTrans > exec->transno + 1) {			xmlFARegExecSave(exec);		    }		    exec->transcount = 1;		    do {			/*			 * Try to progress as much as possible on the input			 */			if (exec->transcount == atom->max) {			    break;			}			exec->index += len;			/*			 * End of input: stop here			 */			if (exec->inputString[exec->index] == 0) {			    exec->index -= len;			    break;			}			if (exec->transcount >= atom->min) {			    int transno = exec->transno;			    xmlRegStatePtr state = exec->state;			    /*			     * The transition is acceptable save it			     */			    exec->transno = -1; /* trick */			    exec->state = to;			    xmlFARegExecSave(exec);			    exec->transno = transno;			    exec->state = state;			}			codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),		

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -