📄 xmlregexp.c.svn-base
字号:
* 2/ register a new counter * 3/ register an epsilon transition associated to * this counter going from atom->stop to atom->start * 4/ create a new state * 5/ generate a counted transition from atom->stop to * that state */ if (atom->min == 0) { xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop); newstate = xmlRegNewState(ctxt); xmlRegStatePush(ctxt, newstate); ctxt->state = newstate; xmlFAGenerateEpsilonTransition(ctxt, atom->start, newstate); } counter = xmlRegGetCounter(ctxt); ctxt->counters[counter].min = atom->min - 1; ctxt->counters[counter].max = atom->max - 1; atom->min = 0; atom->max = 0; atom->quant = XML_REGEXP_QUANT_ONCE; xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop, atom->start, counter); if (to != NULL) { newstate = to; } else { newstate = xmlRegNewState(ctxt); xmlRegStatePush(ctxt, newstate); ctxt->state = newstate; } xmlFAGenerateCountedTransition(ctxt, atom->stop, newstate, counter); } default: break; } return(0); } else { if (to == NULL) { to = xmlRegNewState(ctxt); if (to != NULL) xmlRegStatePush(ctxt, to); else { return(-1); } } if (xmlRegAtomPush(ctxt, atom) < 0) { return(-1); } xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1); ctxt->state = to; } switch (atom->quant) { case XML_REGEXP_QUANT_OPT: atom->quant = XML_REGEXP_QUANT_ONCE; xmlFAGenerateEpsilonTransition(ctxt, from, to); break; case XML_REGEXP_QUANT_MULT: atom->quant = XML_REGEXP_QUANT_ONCE; xmlFAGenerateEpsilonTransition(ctxt, from, to); xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1); break; case XML_REGEXP_QUANT_PLUS: atom->quant = XML_REGEXP_QUANT_ONCE; xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1); break; default: break; } return(0);}/** * xmlFAReduceEpsilonTransitions: * @ctxt: a regexp parser context * @fromnr: the from state * @tonr: the to state * @counter: should that transition be associated to a counted * */static voidxmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr, int tonr, int counter) { int transnr; xmlRegStatePtr from; xmlRegStatePtr to;#ifdef DEBUG_REGEXP_GRAPH printf("xmlFAReduceEpsilonTransitions(%d, %d)\n", fromnr, tonr);#endif from = ctxt->states[fromnr]; if (from == NULL) return; to = ctxt->states[tonr]; if (to == NULL) return; if ((to->mark == XML_REGEXP_MARK_START) || (to->mark == XML_REGEXP_MARK_VISITED)) return; to->mark = XML_REGEXP_MARK_VISITED; if (to->type == XML_REGEXP_FINAL_STATE) {#ifdef DEBUG_REGEXP_GRAPH printf("State %d is final, so %d becomes final\n", tonr, fromnr);#endif from->type = XML_REGEXP_FINAL_STATE; } for (transnr = 0;transnr < to->nbTrans;transnr++) { if (to->trans[transnr].atom == NULL) { /* * Don't remove counted transitions * Don't loop either */ if (to->trans[transnr].to != fromnr) { if (to->trans[transnr].count >= 0) { int newto = to->trans[transnr].to; xmlRegStateAddTrans(ctxt, from, NULL, ctxt->states[newto], -1, to->trans[transnr].count); } else {#ifdef DEBUG_REGEXP_GRAPH printf("Found epsilon trans %d from %d to %d\n", transnr, tonr, to->trans[transnr].to);#endif if (to->trans[transnr].counter >= 0) { xmlFAReduceEpsilonTransitions(ctxt, fromnr, to->trans[transnr].to, to->trans[transnr].counter); } else { xmlFAReduceEpsilonTransitions(ctxt, fromnr, to->trans[transnr].to, counter); } } } } else { int newto = to->trans[transnr].to; if (to->trans[transnr].counter >= 0) { xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, ctxt->states[newto], to->trans[transnr].counter, -1); } else { xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, ctxt->states[newto], counter, -1); } } } to->mark = XML_REGEXP_MARK_NORMAL;}/** * xmlFAEliminateEpsilonTransitions: * @ctxt: a regexp parser context * */static voidxmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) { int statenr, transnr; xmlRegStatePtr state; if (ctxt->states == NULL) return; /* * build the completed transitions bypassing the epsilons * Use a marking algorithm to avoid loops */ for (statenr = 0;statenr < ctxt->nbStates;statenr++) { state = ctxt->states[statenr]; if (state == NULL) continue; for (transnr = 0;transnr < state->nbTrans;transnr++) { if ((state->trans[transnr].atom == NULL) && (state->trans[transnr].to >= 0)) { if (state->trans[transnr].to == statenr) { state->trans[transnr].to = -1;#ifdef DEBUG_REGEXP_GRAPH printf("Removed loopback epsilon trans %d on %d\n", transnr, statenr);#endif } else if (state->trans[transnr].count < 0) { int newto = state->trans[transnr].to;#ifdef DEBUG_REGEXP_GRAPH printf("Found epsilon trans %d from %d to %d\n", transnr, statenr, newto);#endif state->mark = XML_REGEXP_MARK_START; xmlFAReduceEpsilonTransitions(ctxt, statenr, newto, state->trans[transnr].counter); state->mark = XML_REGEXP_MARK_NORMAL;#ifdef DEBUG_REGEXP_GRAPH } else { printf("Found counted transition %d on %d\n", transnr, statenr);#endif } } } } /* * Eliminate the epsilon transitions */ for (statenr = 0;statenr < ctxt->nbStates;statenr++) { state = ctxt->states[statenr]; if (state == NULL) continue; for (transnr = 0;transnr < state->nbTrans;transnr++) { if ((state->trans[transnr].atom == NULL) && (state->trans[transnr].count < 0) && (state->trans[transnr].to >= 0)) { state->trans[transnr].to = -1; } } } /* * Use this pass to detect unreachable states too */ for (statenr = 0;statenr < ctxt->nbStates;statenr++) { state = ctxt->states[statenr]; if (state != NULL) state->reached = XML_REGEXP_MARK_NORMAL; } state = ctxt->states[0]; if (state != NULL) state->reached = XML_REGEXP_MARK_START; while (state != NULL) { xmlRegStatePtr target = NULL; state->reached = XML_REGEXP_MARK_VISITED; /* * Mark all states reachable from the current reachable state */ for (transnr = 0;transnr < state->nbTrans;transnr++) { if ((state->trans[transnr].to >= 0) && ((state->trans[transnr].atom != NULL) || (state->trans[transnr].count >= 0))) { int newto = state->trans[transnr].to; if (ctxt->states[newto] == NULL) continue; if (ctxt->states[newto]->reached == XML_REGEXP_MARK_NORMAL) { ctxt->states[newto]->reached = XML_REGEXP_MARK_START; target = ctxt->states[newto]; } } } /* * find the next accessible state not explored */ if (target == NULL) { for (statenr = 1;statenr < ctxt->nbStates;statenr++) { state = ctxt->states[statenr]; if ((state != NULL) && (state->reached == XML_REGEXP_MARK_START)) { target = state; break; } } } state = target; } for (statenr = 0;statenr < ctxt->nbStates;statenr++) { state = ctxt->states[statenr]; if ((state != NULL) && (state->reached == XML_REGEXP_MARK_NORMAL)) {#ifdef DEBUG_REGEXP_GRAPH printf("Removed unreachable state %d\n", statenr);#endif xmlRegFreeState(state); ctxt->states[statenr] = NULL; } }}/** * xmlFACompareAtoms: * @atom1: an atom * @atom2: an atom * * Compares two atoms to check whether they are equivalents * * Returns 1 if yes and 0 otherwise */static intxmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) { if (atom1 == atom2) return(1); if ((atom1 == NULL) || (atom2 == NULL)) return(0); if (atom1->type != atom2->type) return(0); switch (atom1->type) { case XML_REGEXP_STRING: return(xmlStrEqual((xmlChar *)atom1->valuep, (xmlChar *)atom2->valuep)); case XML_REGEXP_EPSILON: return(1); case XML_REGEXP_CHARVAL: return(atom1->codepoint == atom2->codepoint); case XML_REGEXP_RANGES: TODO; return(0); default: break; } return(1);}/** * xmlFARecurseDeterminism: * @ctxt: a regexp parser context * * Check whether the associated regexp is determinist, * should be called after xmlFAEliminateEpsilonTransitions() * */static intxmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state, int to, xmlRegAtomPtr atom) { int ret = 1; int transnr; xmlRegTransPtr t1; if (state == NULL) return(ret); for (transnr = 0;transnr < state->nbTrans;transnr++) { t1 = &(state->trans[transnr]); /* * check transitions conflicting with the one looked at */ if (t1->atom == NULL) { if (t1->to == -1) continue; ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to], to, atom); if (ret == 0) return(0); continue; } if (t1->to != to) continue; if (xmlFACompareAtoms(t1->atom, atom)) return(0); } return(ret);}/** * xmlFAComputesDeterminism: * @ctxt: a regexp parser context * * Check whether the associated regexp is determinist, * should be called after xmlFAEliminateEpsilonTransitions() * */static intxmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) { int statenr, transnr; xmlRegStatePtr state; xmlRegTransPtr t1, t2; int i; int ret = 1;#ifdef DEBUG_REGEXP_GRAPH printf("xmlFAComputesDeterminism\n"); xmlRegPrintCtxt(stdout, ctxt);#endif if (ctxt->determinist != -1) return(ctxt->determinist); /* * Check for all states that there aren't 2 transitions * with the same atom and a different target. */ for (statenr = 0;statenr < ctxt->nbStates;statenr++) { state = ctxt->states[statenr]; if (state == NULL) continue; for (transnr = 0;transnr < state->nbTrans;transnr++) { t1 = &(state->trans[transnr]); /* * Determinism checks in case of counted or all transitions * will have to be handled separately */ if (t1->atom == NULL) continue; if (t1->to == -1) /* eliminated */ continue; for (i = 0;i < transnr;i++) { t2 = &(state->trans[i]); if (t2->to == -1) /* eliminated */ continue; if (t2->atom != NULL) { if (t1->to == t2->to) { if (xmlFACompareAtoms(t1->atom, t2->atom)) t2->to = -1; /* eliminated */ } else { /* not determinist ! */ if (xmlFACompareAtoms(t1->atom, t2->atom)) ret = 0; } } else if (t1->to != -1) { /* * do the closure in case of remaining specific * epsilon transitions like choices or all */ ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to], t2->to, t2->atom); if (ret == 0) return(0); } } if (ret == 0) break; } if (ret == 0) break; } ctxt->determinist = ret; return(ret);}/************************************************************************ * * * Routines to check input against transition atoms * * * ************************************************************************/static intxmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg, int start, int end, const xmlChar *blockName) { int ret = 0; switch (type) { case XML_REGEXP_STRING: case XML_REGEXP_SUBREG: case XML_REGEXP_RANGES: case XML_REGEXP_EPSILON: return(-1); case XML_REGEXP_ANYCHAR: ret = ((codepoint != '\n') && (codepoint != '\r')); break; case XML_REGEXP_CHARVAL: ret = ((codepoint >= start) && (codepoint <= end)); break; case XML_REGEXP_NOTSPACE: neg = !neg; case XML_REGEXP_ANYSPACE: ret = ((codepoint == '\n') || (codepoint == '\r') || (codepoint == '\t') || (codepoint == ' ')); break; case XML_REGEXP_NOTINITNAME: neg = !neg; case XML_REGEXP_INITNAME: ret = (IS_LETTER(codepoint) || (codepoint == '_') || (codepoint == ':')); break; case XML_REGEXP_NOTNAMECHAR: neg = !neg; case XML_REGEXP_NAMECHAR: ret = (IS_LETTER(codepoint) || IS_DIGIT(codepoint) || (codepoint == '.') || (codepoint == '-') || (codepoint == '_') || (codepoint == ':') || IS_COMBINING(codepoint) || IS_EXTENDER(codepoint)); break; case XML_REGEXP_NOTDECIMAL: neg = !neg; case XML_REGEXP_DECIMAL:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -