📄 xmlregexp.c
字号:
* @ctxt: a regexp parser context * @from: the from state * @to: the target state or NULL for building a new one * @lax: * */static voidxmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from, xmlRegStatePtr to, int lax) { if (to == NULL) { to = xmlRegNewState(ctxt); xmlRegStatePush(ctxt, to); ctxt->state = to; } if (lax) xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_LAX_COUNTER); else xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER);}/** * xmlFAGenerateEpsilonTransition: * @ctxt: a regexp parser context * @from: the from state * @to: the target state or NULL for building a new one * */static voidxmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from, xmlRegStatePtr to) { if (to == NULL) { to = xmlRegNewState(ctxt); xmlRegStatePush(ctxt, to); ctxt->state = to; } xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1);}/** * xmlFAGenerateCountedEpsilonTransition: * @ctxt: a regexp parser context * @from: the from state * @to: the target state or NULL for building a new one * counter: the counter for that transition * */static voidxmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from, xmlRegStatePtr to, int counter) { if (to == NULL) { to = xmlRegNewState(ctxt); xmlRegStatePush(ctxt, to); ctxt->state = to; } xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1);}/** * xmlFAGenerateCountedTransition: * @ctxt: a regexp parser context * @from: the from state * @to: the target state or NULL for building a new one * counter: the counter for that transition * */static voidxmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from, xmlRegStatePtr to, int counter) { if (to == NULL) { to = xmlRegNewState(ctxt); xmlRegStatePush(ctxt, to); ctxt->state = to; } xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter);}/** * xmlFAGenerateTransitions: * @ctxt: a regexp parser context * @from: the from state * @to: the target state or NULL for building a new one * @atom: the atom generating the transition * * Returns 0 if success and -1 in case of error. */static intxmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from, xmlRegStatePtr to, xmlRegAtomPtr atom) { if (atom == NULL) { ERROR("genrate transition: atom == NULL"); return(-1); } if (atom->type == XML_REGEXP_SUBREG) { /* * this is a subexpression handling one should not need to * create a new node except for XML_REGEXP_QUANT_RANGE. */ if (xmlRegAtomPush(ctxt, atom) < 0) { return(-1); } if ((to != NULL) && (atom->stop != to) && (atom->quant != XML_REGEXP_QUANT_RANGE)) { /* * Generate an epsilon transition to link to the target */ xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);#ifdef DV } else if ((to == NULL) && (atom->quant != XML_REGEXP_QUANT_RANGE) && (atom->quant != XML_REGEXP_QUANT_ONCE)) { to = xmlRegNewState(ctxt); xmlRegStatePush(ctxt, to); ctxt->state = to; xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);#endif } switch (atom->quant) { case XML_REGEXP_QUANT_OPT: atom->quant = XML_REGEXP_QUANT_ONCE; /* * transition done to the state after end of atom. * 1. set transition from atom start to new state * 2. set transition from atom end to this state. */ xmlFAGenerateEpsilonTransition(ctxt, atom->start, 0); xmlFAGenerateEpsilonTransition(ctxt, atom->stop, ctxt->state); break; case XML_REGEXP_QUANT_MULT: atom->quant = XML_REGEXP_QUANT_ONCE; xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop); xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start); break; case XML_REGEXP_QUANT_PLUS: atom->quant = XML_REGEXP_QUANT_ONCE; xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start); break; case XML_REGEXP_QUANT_RANGE: { int counter; xmlRegStatePtr newstate; /* * This one is nasty: * 1/ if range has minOccurs == 0, create a new state * and create epsilon transitions from atom->start * to atom->stop, as well as atom->start to the new * state * 2/ register a new counter * 3/ register an epsilon transition associated to * this counter going from atom->stop to atom->start * 4/ create a new state * 5/ generate a counted transition from atom->stop to * that state */ if (atom->min == 0) { xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop); newstate = xmlRegNewState(ctxt); xmlRegStatePush(ctxt, newstate); ctxt->state = newstate; xmlFAGenerateEpsilonTransition(ctxt, atom->start, newstate); } counter = xmlRegGetCounter(ctxt); ctxt->counters[counter].min = atom->min - 1; ctxt->counters[counter].max = atom->max - 1; atom->min = 0; atom->max = 0; atom->quant = XML_REGEXP_QUANT_ONCE; if (to != NULL) { newstate = to; } else { newstate = xmlRegNewState(ctxt); xmlRegStatePush(ctxt, newstate); } ctxt->state = newstate; xmlFAGenerateCountedTransition(ctxt, atom->stop, newstate, counter); /* * first check count and if OK jump forward; * if checking fail increment count and jump back */ xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop, atom->start, counter); } default: break; } return(0); } if ((atom->min == 0) && (atom->max == 0) && (atom->quant == XML_REGEXP_QUANT_RANGE)) { /* * we can discard the atom and generate an epsilon transition instead */ if (to == NULL) { to = xmlRegNewState(ctxt); if (to != NULL) xmlRegStatePush(ctxt, to); else { return(-1); } } xmlFAGenerateEpsilonTransition(ctxt, from, to); ctxt->state = to; xmlRegFreeAtom(atom); return(0); } if (to == NULL) { to = xmlRegNewState(ctxt); if (to != NULL) xmlRegStatePush(ctxt, to); else { return(-1); } } if (xmlRegAtomPush(ctxt, atom) < 0) { return(-1); } xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1); ctxt->state = to; switch (atom->quant) { case XML_REGEXP_QUANT_OPT: atom->quant = XML_REGEXP_QUANT_ONCE; xmlFAGenerateEpsilonTransition(ctxt, from, to); break; case XML_REGEXP_QUANT_MULT: atom->quant = XML_REGEXP_QUANT_ONCE; xmlFAGenerateEpsilonTransition(ctxt, from, to); xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1); break; case XML_REGEXP_QUANT_PLUS: atom->quant = XML_REGEXP_QUANT_ONCE; xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1); break; case XML_REGEXP_QUANT_RANGE: if (atom->min == 0) { xmlFAGenerateEpsilonTransition(ctxt, from, to); } break; default: break; } return(0);}/** * xmlFAReduceEpsilonTransitions: * @ctxt: a regexp parser context * @fromnr: the from state * @tonr: the to state * @counter: should that transition be associated to a counted * */static voidxmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr, int tonr, int counter) { int transnr; xmlRegStatePtr from; xmlRegStatePtr to;#ifdef DEBUG_REGEXP_GRAPH printf("xmlFAReduceEpsilonTransitions(%d, %d)\n", fromnr, tonr);#endif from = ctxt->states[fromnr]; if (from == NULL) return; to = ctxt->states[tonr]; if (to == NULL) return; if ((to->mark == XML_REGEXP_MARK_START) || (to->mark == XML_REGEXP_MARK_VISITED)) return; to->mark = XML_REGEXP_MARK_VISITED; if (to->type == XML_REGEXP_FINAL_STATE) {#ifdef DEBUG_REGEXP_GRAPH printf("State %d is final, so %d becomes final\n", tonr, fromnr);#endif from->type = XML_REGEXP_FINAL_STATE; } for (transnr = 0;transnr < to->nbTrans;transnr++) { if (to->trans[transnr].to < 0) continue; if (to->trans[transnr].atom == NULL) { /* * Don't remove counted transitions * Don't loop either */ if (to->trans[transnr].to != fromnr) { if (to->trans[transnr].count >= 0) { int newto = to->trans[transnr].to; xmlRegStateAddTrans(ctxt, from, NULL, ctxt->states[newto], -1, to->trans[transnr].count); } else {#ifdef DEBUG_REGEXP_GRAPH printf("Found epsilon trans %d from %d to %d\n", transnr, tonr, to->trans[transnr].to);#endif if (to->trans[transnr].counter >= 0) { xmlFAReduceEpsilonTransitions(ctxt, fromnr, to->trans[transnr].to, to->trans[transnr].counter); } else { xmlFAReduceEpsilonTransitions(ctxt, fromnr, to->trans[transnr].to, counter); } } } } else { int newto = to->trans[transnr].to; if (to->trans[transnr].counter >= 0) { xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, ctxt->states[newto], to->trans[transnr].counter, -1); } else { xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, ctxt->states[newto], counter, -1); } } } to->mark = XML_REGEXP_MARK_NORMAL;}/** * xmlFAEliminateSimpleEpsilonTransitions: * @ctxt: a regexp parser context * * Eliminating general epsilon transitions can get costly in the general * algorithm due to the large amount of generated new transitions and * associated comparisons. However for simple epsilon transition used just * to separate building blocks when generating the automata this can be * reduced to state elimination: * - if there exists an epsilon from X to Y * - if there is no other transition from X * then X and Y are semantically equivalent and X can be eliminated * If X is the start state then make Y the start state, else replace the * target of all transitions to X by transitions to Y. */static voidxmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) { int statenr, i, j, newto; xmlRegStatePtr state, tmp; for (statenr = 0;statenr < ctxt->nbStates;statenr++) { state = ctxt->states[statenr]; if (state == NULL) continue; if (state->nbTrans != 1) continue; if (state->type == XML_REGEXP_UNREACH_STATE) continue; /* is the only transition out a basic transition */ if ((state->trans[0].atom == NULL) && (state->trans[0].to >= 0) && (state->trans[0].to != statenr) && (state->trans[0].counter < 0) && (state->trans[0].count < 0)) { newto = state->trans[0].to; if (state->type == XML_REGEXP_START_STATE) {#ifdef DEBUG_REGEXP_GRAPH printf("Found simple epsilon trans from start %d to %d\n", statenr, newto);#endif } else {#ifdef DEBUG_REGEXP_GRAPH printf("Found simple epsilon trans from %d to %d\n", statenr, newto);#endif for (i = 0;i < state->nbTransTo;i++) { tmp = ctxt->states[state->transTo[i]]; for (j = 0;j < tmp->nbTrans;j++) { if (tmp->trans[j].to == statenr) {#ifdef DEBUG_REGEXP_GRAPH printf("Changed transition %d on %d to go to %d\n", j, tmp->no, newto);#endif tmp->trans[j].to = -1; xmlRegStateAddTrans(ctxt, tmp, tmp->trans[j].atom, ctxt->states[newto], tmp->trans[j].counter, tmp->trans[j].count); } } } if (state->type == XML_REGEXP_FINAL_STATE) ctxt->states[newto]->type = XML_REGEXP_FINAL_STATE; /* eliminate the transition completely */ state->nbTrans = 0; state->type = XML_REGEXP_UNREACH_STATE; } } }}/** * xmlFAEliminateEpsilonTransitions: * @ctxt: a regexp parser context * */static voidxmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) { int statenr, transnr; xmlRegStatePtr state; int has_epsilon; if (ctxt->states == NULL) return; /* * Eliminate simple epsilon transition and the associated unreachable * states. */ xmlFAEliminateSimpleEpsilonTransitions(ctxt); for (statenr = 0;statenr < ctxt->nbStates;statenr++) { state = ctxt->states[statenr]; if ((state != NULL) && (state->type == XML_REGEXP_UNREACH_STATE)) {#ifdef DEBUG_REGEXP_GRAPH printf("Removed unreachable state %d\n", statenr);#endif xmlRegFreeState(state); ctxt->states[statenr] = NULL; } } has_epsilon = 0; /* * Build the completed transitions bypassing the epsilons * Use a marking algorithm to avoid loops * Mark sink states too. * Process from the latests states backward to the start when * there is long cascading epsilon chains this minimize the * recursions and transition compares when adding the new ones */ for (statenr = ctxt->nbStates - 1;statenr >= 0;statenr--) { state = ctxt->states[statenr]; if (state == NULL) continue; if ((state->nbTrans == 0) && (state->type != XML_REGEXP_FINAL_STATE)) { state->type = XML_REGEXP_SINK_STATE; } for (transnr = 0;transnr < state->nbTrans;transnr++) { if ((state->trans[transnr].atom == NULL) && (state->trans[transnr].to >= 0)) { if (state->trans[transnr].to == statenr) { state->trans[transnr].to = -1;#ifdef DEBUG_REGEXP_GRAPH printf("Removed loopback epsilon trans %d on %d\n",
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -