⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 xmlregexp.c

📁 xml开源解析代码.版本为libxml2-2.6.29,可支持GB3212.网络消息发送XML时很有用.
💻 C
📖 第 1 页 / 共 5 页
字号:
 * @ctxt:  a regexp parser context * @from:  the from state * @to:  the target state or NULL for building a new one * @lax: * */static voidxmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt,			   xmlRegStatePtr from, xmlRegStatePtr to,			   int lax) {    if (to == NULL) {	to = xmlRegNewState(ctxt);	xmlRegStatePush(ctxt, to);	ctxt->state = to;    }    if (lax)	xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_LAX_COUNTER);    else	xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER);}/** * xmlFAGenerateEpsilonTransition: * @ctxt:  a regexp parser context * @from:  the from state * @to:  the target state or NULL for building a new one * */static voidxmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt,			       xmlRegStatePtr from, xmlRegStatePtr to) {    if (to == NULL) {	to = xmlRegNewState(ctxt);	xmlRegStatePush(ctxt, to);	ctxt->state = to;    }    xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1);}/** * xmlFAGenerateCountedEpsilonTransition: * @ctxt:  a regexp parser context * @from:  the from state * @to:  the target state or NULL for building a new one * counter:  the counter for that transition * */static voidxmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt,	    xmlRegStatePtr from, xmlRegStatePtr to, int counter) {    if (to == NULL) {	to = xmlRegNewState(ctxt);	xmlRegStatePush(ctxt, to);	ctxt->state = to;    }    xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1);}/** * xmlFAGenerateCountedTransition: * @ctxt:  a regexp parser context * @from:  the from state * @to:  the target state or NULL for building a new one * counter:  the counter for that transition * */static voidxmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt,	    xmlRegStatePtr from, xmlRegStatePtr to, int counter) {    if (to == NULL) {	to = xmlRegNewState(ctxt);	xmlRegStatePush(ctxt, to);	ctxt->state = to;    }    xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter);}/** * xmlFAGenerateTransitions: * @ctxt:  a regexp parser context * @from:  the from state * @to:  the target state or NULL for building a new one * @atom:  the atom generating the transition * * Returns 0 if success and -1 in case of error. */static intxmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,	                 xmlRegStatePtr to, xmlRegAtomPtr atom) {    if (atom == NULL) {	ERROR("genrate transition: atom == NULL");	return(-1);    }    if (atom->type == XML_REGEXP_SUBREG) {	/*	 * this is a subexpression handling one should not need to	 * create a new node except for XML_REGEXP_QUANT_RANGE.	 */	if (xmlRegAtomPush(ctxt, atom) < 0) {	    return(-1);	}	if ((to != NULL) && (atom->stop != to) &&	    (atom->quant != XML_REGEXP_QUANT_RANGE)) {	    /*	     * Generate an epsilon transition to link to the target	     */	    xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);#ifdef DV	} else if ((to == NULL) && (atom->quant != XML_REGEXP_QUANT_RANGE) && 		   (atom->quant != XML_REGEXP_QUANT_ONCE)) {	    to = xmlRegNewState(ctxt);	    xmlRegStatePush(ctxt, to);	    ctxt->state = to;	    xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);#endif	}	switch (atom->quant) {	    case XML_REGEXP_QUANT_OPT:		atom->quant = XML_REGEXP_QUANT_ONCE;		/*		 * transition done to the state after end of atom.		 *      1. set transition from atom start to new state		 *      2. set transition from atom end to this state. 		 */		xmlFAGenerateEpsilonTransition(ctxt, atom->start, 0);		xmlFAGenerateEpsilonTransition(ctxt, atom->stop, ctxt->state);		break;	    case XML_REGEXP_QUANT_MULT:		atom->quant = XML_REGEXP_QUANT_ONCE;		xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);		xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);		break;	    case XML_REGEXP_QUANT_PLUS:		atom->quant = XML_REGEXP_QUANT_ONCE;		xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);		break;	    case XML_REGEXP_QUANT_RANGE: {		int counter;		xmlRegStatePtr newstate;		/*		 * This one is nasty:		 *   1/ if range has minOccurs == 0, create a new state		 *	and create epsilon transitions from atom->start		 *	to atom->stop, as well as atom->start to the new		 *	state		 *   2/ register a new counter		 *   3/ register an epsilon transition associated to		 *      this counter going from atom->stop to atom->start		 *   4/ create a new state		 *   5/ generate a counted transition from atom->stop to		 *      that state		 */		if (atom->min == 0) {		    xmlFAGenerateEpsilonTransition(ctxt, atom->start,			atom->stop);		    newstate = xmlRegNewState(ctxt);		    xmlRegStatePush(ctxt, newstate);		    ctxt->state = newstate;		    xmlFAGenerateEpsilonTransition(ctxt, atom->start,			newstate);		}		counter = xmlRegGetCounter(ctxt);		ctxt->counters[counter].min = atom->min - 1;		ctxt->counters[counter].max = atom->max - 1;		atom->min = 0;		atom->max = 0;		atom->quant = XML_REGEXP_QUANT_ONCE;		if (to != NULL) {		    newstate = to;		} else {		    newstate = xmlRegNewState(ctxt);		    xmlRegStatePush(ctxt, newstate);		}		ctxt->state = newstate;		xmlFAGenerateCountedTransition(ctxt, atom->stop,			                       newstate, counter);                                /*		 * first check count and if OK jump forward;                  * if checking fail increment count and jump back		 */		xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,			                              atom->start, counter);	    }	    default:		break;	}	return(0);    }     if ((atom->min == 0) && (atom->max == 0) &&               (atom->quant == XML_REGEXP_QUANT_RANGE)) {        /*	 * we can discard the atom and generate an epsilon transition instead	 */	if (to == NULL) {	    to = xmlRegNewState(ctxt);	    if (to != NULL)		xmlRegStatePush(ctxt, to);	    else {		return(-1);	    }	}	xmlFAGenerateEpsilonTransition(ctxt, from, to);	ctxt->state = to;	xmlRegFreeAtom(atom);	return(0);    }    if (to == NULL) {	to = xmlRegNewState(ctxt);	if (to != NULL)	    xmlRegStatePush(ctxt, to);	else {	    return(-1);	}    }    if (xmlRegAtomPush(ctxt, atom) < 0) {	return(-1);    }    xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1);    ctxt->state = to;    switch (atom->quant) {	case XML_REGEXP_QUANT_OPT:	    atom->quant = XML_REGEXP_QUANT_ONCE;	    xmlFAGenerateEpsilonTransition(ctxt, from, to);	    break;	case XML_REGEXP_QUANT_MULT:	    atom->quant = XML_REGEXP_QUANT_ONCE;	    xmlFAGenerateEpsilonTransition(ctxt, from, to);	    xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);	    break;	case XML_REGEXP_QUANT_PLUS:	    atom->quant = XML_REGEXP_QUANT_ONCE;	    xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);	    break;	case XML_REGEXP_QUANT_RANGE: 	    if (atom->min == 0) {		xmlFAGenerateEpsilonTransition(ctxt, from, to);	    }	    break;	default:	    break;    }    return(0);}/** * xmlFAReduceEpsilonTransitions: * @ctxt:  a regexp parser context * @fromnr:  the from state * @tonr:  the to state  * @counter:  should that transition be associated to a counted * */static voidxmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,	                      int tonr, int counter) {    int transnr;    xmlRegStatePtr from;    xmlRegStatePtr to;#ifdef DEBUG_REGEXP_GRAPH    printf("xmlFAReduceEpsilonTransitions(%d, %d)\n", fromnr, tonr);#endif    from = ctxt->states[fromnr];    if (from == NULL)	return;    to = ctxt->states[tonr];    if (to == NULL)	return;    if ((to->mark == XML_REGEXP_MARK_START) ||	(to->mark == XML_REGEXP_MARK_VISITED))	return;    to->mark = XML_REGEXP_MARK_VISITED;    if (to->type == XML_REGEXP_FINAL_STATE) {#ifdef DEBUG_REGEXP_GRAPH	printf("State %d is final, so %d becomes final\n", tonr, fromnr);#endif	from->type = XML_REGEXP_FINAL_STATE;    }    for (transnr = 0;transnr < to->nbTrans;transnr++) {        if (to->trans[transnr].to < 0)	    continue;	if (to->trans[transnr].atom == NULL) {	    /*	     * Don't remove counted transitions	     * Don't loop either	     */	    if (to->trans[transnr].to != fromnr) {		if (to->trans[transnr].count >= 0) {		    int newto = to->trans[transnr].to;		    xmlRegStateAddTrans(ctxt, from, NULL,					ctxt->states[newto], 					-1, to->trans[transnr].count);		} else {#ifdef DEBUG_REGEXP_GRAPH		    printf("Found epsilon trans %d from %d to %d\n",			   transnr, tonr, to->trans[transnr].to);#endif		    if (to->trans[transnr].counter >= 0) {			xmlFAReduceEpsilonTransitions(ctxt, fromnr,					      to->trans[transnr].to,					      to->trans[transnr].counter);		    } else {			xmlFAReduceEpsilonTransitions(ctxt, fromnr,					      to->trans[transnr].to,					      counter);		    }		}	    }	} else {	    int newto = to->trans[transnr].to;	    if (to->trans[transnr].counter >= 0) {		xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, 				    ctxt->states[newto], 				    to->trans[transnr].counter, -1);	    } else {		xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, 				    ctxt->states[newto], counter, -1);	    }	}    }    to->mark = XML_REGEXP_MARK_NORMAL;}/** * xmlFAEliminateSimpleEpsilonTransitions: * @ctxt:  a regexp parser context * * Eliminating general epsilon transitions can get costly in the general  * algorithm due to the large amount of generated new transitions and * associated comparisons. However for simple epsilon transition used just * to separate building blocks when generating the automata this can be * reduced to state elimination: *    - if there exists an epsilon from X to Y *    - if there is no other transition from X * then X and Y are semantically equivalent and X can be eliminated * If X is the start state then make Y the start state, else replace the * target of all transitions to X by transitions to Y. */static voidxmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {    int statenr, i, j, newto;    xmlRegStatePtr state, tmp;    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {	state = ctxt->states[statenr];	if (state == NULL)	    continue;	if (state->nbTrans != 1)	    continue;	if (state->type == XML_REGEXP_UNREACH_STATE)	    continue;	/* is the only transition out a basic transition */	if ((state->trans[0].atom == NULL) &&	    (state->trans[0].to >= 0) &&	    (state->trans[0].to != statenr) &&	    (state->trans[0].counter < 0) &&	    (state->trans[0].count < 0)) {	    newto = state->trans[0].to;            if (state->type == XML_REGEXP_START_STATE) {#ifdef DEBUG_REGEXP_GRAPH		printf("Found simple epsilon trans from start %d to %d\n",		       statenr, newto);#endif                 } else {#ifdef DEBUG_REGEXP_GRAPH		printf("Found simple epsilon trans from %d to %d\n",		       statenr, newto);#endif     	        for (i = 0;i < state->nbTransTo;i++) {		    tmp = ctxt->states[state->transTo[i]];		    for (j = 0;j < tmp->nbTrans;j++) {			if (tmp->trans[j].to == statenr) {#ifdef DEBUG_REGEXP_GRAPH			    printf("Changed transition %d on %d to go to %d\n",				   j, tmp->no, newto);#endif     			    tmp->trans[j].to = -1;			    xmlRegStateAddTrans(ctxt, tmp, tmp->trans[j].atom,			    			ctxt->states[newto],					        tmp->trans[j].counter,						tmp->trans[j].count);			}		    }		}		if (state->type == XML_REGEXP_FINAL_STATE)		    ctxt->states[newto]->type = XML_REGEXP_FINAL_STATE;		/* eliminate the transition completely */		state->nbTrans = 0;                state->type = XML_REGEXP_UNREACH_STATE;	    }            	}    }}/** * xmlFAEliminateEpsilonTransitions: * @ctxt:  a regexp parser context * */static voidxmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {    int statenr, transnr;    xmlRegStatePtr state;    int has_epsilon;    if (ctxt->states == NULL) return;    /*     * Eliminate simple epsilon transition and the associated unreachable     * states.     */    xmlFAEliminateSimpleEpsilonTransitions(ctxt);    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {	state = ctxt->states[statenr];	if ((state != NULL) && (state->type == XML_REGEXP_UNREACH_STATE)) {#ifdef DEBUG_REGEXP_GRAPH	    printf("Removed unreachable state %d\n", statenr);#endif	    xmlRegFreeState(state);	    ctxt->states[statenr] = NULL;	}    }    has_epsilon = 0;    /*     * Build the completed transitions bypassing the epsilons     * Use a marking algorithm to avoid loops     * Mark sink states too.     * Process from the latests states backward to the start when     * there is long cascading epsilon chains this minimize the     * recursions and transition compares when adding the new ones     */    for (statenr = ctxt->nbStates - 1;statenr >= 0;statenr--) {	state = ctxt->states[statenr];	if (state == NULL)	    continue;	if ((state->nbTrans == 0) &&	    (state->type != XML_REGEXP_FINAL_STATE)) {	    state->type = XML_REGEXP_SINK_STATE;	}	for (transnr = 0;transnr < state->nbTrans;transnr++) {	    if ((state->trans[transnr].atom == NULL) &&		(state->trans[transnr].to >= 0)) {		if (state->trans[transnr].to == statenr) {		    state->trans[transnr].to = -1;#ifdef DEBUG_REGEXP_GRAPH		    printf("Removed loopback epsilon trans %d on %d\n",

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -