📄 xmlregexp.c
字号:
ctxt->state = to;
}
xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter);
}
/**
* xmlFAGenerateTransitions:
* @ctxt: a regexp parser context
* @from: the from state
* @to: the target state or NULL for building a new one
* @atom: the atom generating the transition
*
* Returns 0 if success and -1 in case of error.
*/
static int
xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
xmlRegStatePtr to, xmlRegAtomPtr atom) {
if (atom == NULL) {
ERROR("genrate transition: atom == NULL");
return(-1);
}
if (atom->type == XML_REGEXP_SUBREG) {
/*
* this is a subexpression handling one should not need to
* create a new node except for XML_REGEXP_QUANT_RANGE.
*/
if (xmlRegAtomPush(ctxt, atom) < 0) {
return(-1);
}
if ((to != NULL) && (atom->stop != to) &&
(atom->quant != XML_REGEXP_QUANT_RANGE)) {
/*
* Generate an epsilon transition to link to the target
*/
xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
}
switch (atom->quant) {
case XML_REGEXP_QUANT_OPT:
atom->quant = XML_REGEXP_QUANT_ONCE;
xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
break;
case XML_REGEXP_QUANT_MULT:
atom->quant = XML_REGEXP_QUANT_ONCE;
xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
break;
case XML_REGEXP_QUANT_PLUS:
atom->quant = XML_REGEXP_QUANT_ONCE;
xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
break;
case XML_REGEXP_QUANT_RANGE: {
int counter;
xmlRegStatePtr newstate;
/*
* This one is nasty:
* 1/ if range has minOccurs == 0, create a new state
* and create epsilon transitions from atom->start
* to atom->stop, as well as atom->start to the new
* state
* 2/ register a new counter
* 3/ register an epsilon transition associated to
* this counter going from atom->stop to atom->start
* 4/ create a new state
* 5/ generate a counted transition from atom->stop to
* that state
*/
if (atom->min == 0) {
xmlFAGenerateEpsilonTransition(ctxt, atom->start,
atom->stop);
newstate = xmlRegNewState(ctxt);
xmlRegStatePush(ctxt, newstate);
ctxt->state = newstate;
xmlFAGenerateEpsilonTransition(ctxt, atom->start,
newstate);
}
counter = xmlRegGetCounter(ctxt);
ctxt->counters[counter].min = atom->min - 1;
ctxt->counters[counter].max = atom->max - 1;
atom->min = 0;
atom->max = 0;
atom->quant = XML_REGEXP_QUANT_ONCE;
xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
atom->start, counter);
if (to != NULL) {
newstate = to;
} else {
newstate = xmlRegNewState(ctxt);
xmlRegStatePush(ctxt, newstate);
ctxt->state = newstate;
}
xmlFAGenerateCountedTransition(ctxt, atom->stop,
newstate, counter);
}
default:
break;
}
return(0);
} else {
if (to == NULL) {
to = xmlRegNewState(ctxt);
if (to != NULL)
xmlRegStatePush(ctxt, to);
else {
return(-1);
}
}
if (xmlRegAtomPush(ctxt, atom) < 0) {
return(-1);
}
xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1);
ctxt->state = to;
}
switch (atom->quant) {
case XML_REGEXP_QUANT_OPT:
atom->quant = XML_REGEXP_QUANT_ONCE;
xmlFAGenerateEpsilonTransition(ctxt, from, to);
break;
case XML_REGEXP_QUANT_MULT:
atom->quant = XML_REGEXP_QUANT_ONCE;
xmlFAGenerateEpsilonTransition(ctxt, from, to);
xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
break;
case XML_REGEXP_QUANT_PLUS:
atom->quant = XML_REGEXP_QUANT_ONCE;
xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
break;
default:
break;
}
return(0);
}
/**
* xmlFAReduceEpsilonTransitions:
* @ctxt: a regexp parser context
* @fromnr: the from state
* @tonr: the to state
* @counter: should that transition be associated to a counted
*
*/
static void
xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
int tonr, int counter) {
int transnr;
xmlRegStatePtr from;
xmlRegStatePtr to;
#ifdef DEBUG_REGEXP_GRAPH
printf("xmlFAReduceEpsilonTransitions(%d, %d)\n", fromnr, tonr);
#endif
from = ctxt->states[fromnr];
if (from == NULL)
return;
to = ctxt->states[tonr];
if (to == NULL)
return;
if ((to->mark == XML_REGEXP_MARK_START) ||
(to->mark == XML_REGEXP_MARK_VISITED))
return;
to->mark = XML_REGEXP_MARK_VISITED;
if (to->type == XML_REGEXP_FINAL_STATE) {
#ifdef DEBUG_REGEXP_GRAPH
printf("State %d is final, so %d becomes final\n", tonr, fromnr);
#endif
from->type = XML_REGEXP_FINAL_STATE;
}
for (transnr = 0;transnr < to->nbTrans;transnr++) {
if (to->trans[transnr].atom == NULL) {
/*
* Don't remove counted transitions
* Don't loop either
*/
if (to->trans[transnr].to != fromnr) {
if (to->trans[transnr].count >= 0) {
int newto = to->trans[transnr].to;
xmlRegStateAddTrans(ctxt, from, NULL,
ctxt->states[newto],
-1, to->trans[transnr].count);
} else {
#ifdef DEBUG_REGEXP_GRAPH
printf("Found epsilon trans %d from %d to %d\n",
transnr, tonr, to->trans[transnr].to);
#endif
if (to->trans[transnr].counter >= 0) {
xmlFAReduceEpsilonTransitions(ctxt, fromnr,
to->trans[transnr].to,
to->trans[transnr].counter);
} else {
xmlFAReduceEpsilonTransitions(ctxt, fromnr,
to->trans[transnr].to,
counter);
}
}
}
} else {
int newto = to->trans[transnr].to;
if (to->trans[transnr].counter >= 0) {
xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
ctxt->states[newto],
to->trans[transnr].counter, -1);
} else {
xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
ctxt->states[newto], counter, -1);
}
}
}
to->mark = XML_REGEXP_MARK_NORMAL;
}
/**
* xmlFAEliminateEpsilonTransitions:
* @ctxt: a regexp parser context
*
*/
static void
xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
int statenr, transnr;
xmlRegStatePtr state;
if (ctxt->states == NULL) return;
/*
* build the completed transitions bypassing the epsilons
* Use a marking algorithm to avoid loops
* mark sink states too.
*/
for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
state = ctxt->states[statenr];
if (state == NULL)
continue;
if ((state->nbTrans == 0) &&
(state->type != XML_REGEXP_FINAL_STATE)) {
state->type = XML_REGEXP_SINK_STATE;
}
for (transnr = 0;transnr < state->nbTrans;transnr++) {
if ((state->trans[transnr].atom == NULL) &&
(state->trans[transnr].to >= 0)) {
if (state->trans[transnr].to == statenr) {
state->trans[transnr].to = -1;
#ifdef DEBUG_REGEXP_GRAPH
printf("Removed loopback epsilon trans %d on %d\n",
transnr, statenr);
#endif
} else if (state->trans[transnr].count < 0) {
int newto = state->trans[transnr].to;
#ifdef DEBUG_REGEXP_GRAPH
printf("Found epsilon trans %d from %d to %d\n",
transnr, statenr, newto);
#endif
state->mark = XML_REGEXP_MARK_START;
xmlFAReduceEpsilonTransitions(ctxt, statenr,
newto, state->trans[transnr].counter);
state->mark = XML_REGEXP_MARK_NORMAL;
#ifdef DEBUG_REGEXP_GRAPH
} else {
printf("Found counted transition %d on %d\n",
transnr, statenr);
#endif
}
}
}
}
/*
* Eliminate the epsilon transitions
*/
for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
state = ctxt->states[statenr];
if (state == NULL)
continue;
for (transnr = 0;transnr < state->nbTrans;transnr++) {
if ((state->trans[transnr].atom == NULL) &&
(state->trans[transnr].count < 0) &&
(state->trans[transnr].to >= 0)) {
state->trans[transnr].to = -1;
}
}
}
/*
* Use this pass to detect unreachable states too
*/
for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
state = ctxt->states[statenr];
if (state != NULL)
state->reached = XML_REGEXP_MARK_NORMAL;
}
state = ctxt->states[0];
if (state != NULL)
state->reached = XML_REGEXP_MARK_START;
while (state != NULL) {
xmlRegStatePtr target = NULL;
state->reached = XML_REGEXP_MARK_VISITED;
/*
* Mark all states reachable from the current reachable state
*/
for (transnr = 0;transnr < state->nbTrans;transnr++) {
if ((state->trans[transnr].to >= 0) &&
((state->trans[transnr].atom != NULL) ||
(state->trans[transnr].count >= 0))) {
int newto = state->trans[transnr].to;
if (ctxt->states[newto] == NULL)
continue;
if (ctxt->states[newto]->reached == XML_REGEXP_MARK_NORMAL) {
ctxt->states[newto]->reached = XML_REGEXP_MARK_START;
target = ctxt->states[newto];
}
}
}
/*
* find the next accessible state not explored
*/
if (target == NULL) {
for (statenr = 1;statenr < ctxt->nbStates;statenr++) {
state = ctxt->states[statenr];
if ((state != NULL) && (state->reached ==
XML_REGEXP_MARK_START)) {
target = state;
break;
}
}
}
state = target;
}
for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
state = ctxt->states[statenr];
if ((state != NULL) && (state->reached == XML_REGEXP_MARK_NORMAL)) {
#ifdef DEBUG_REGEXP_GRAPH
printf("Removed unreachable state %d\n", statenr);
#endif
xmlRegFreeState(state);
ctxt->states[statenr] = NULL;
}
}
}
/**
* xmlFACompareAtoms:
* @atom1: an atom
* @atom2: an atom
*
* Compares two atoms to check whether they are equivalents
*
* Returns 1 if yes and 0 otherwise
*/
static int
xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
if (atom1 == atom2)
return(1);
if ((atom1 == NULL) || (atom2 == NULL))
return(0);
if (atom1->type != atom2->type)
return(0);
switch (atom1->type) {
case XML_REGEXP_STRING:
return(xmlStrEqual((xmlChar *)atom1->valuep,
(xmlChar *)atom2->valuep));
case XML_REGEXP_EPSILON:
return(1);
case XML_REGEXP_CHARVAL:
return(atom1->codepoint == atom2->codepoint);
case XML_REGEXP_RANGES:
TODO;
return(0);
default:
break;
}
return(1);
}
/**
* xmlFARecurseDeterminism:
* @ctxt: a regexp parser context
*
* Check whether the associated regexp is determinist,
* should be called after xmlFAEliminateEpsilonTransitions()
*
*/
static int
xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
int to, xmlRegAtomPtr atom) {
int ret = 1;
int transnr;
xmlRegTransPtr t1;
if (state == NULL)
return(ret);
for (transnr = 0;transnr < state->nbTrans;transnr++) {
t1 = &(state->trans[transnr]);
/*
* check transitions conflicting with the one looked at
*/
if (t1->atom == NULL) {
if (t1->to == -1)
continue;
ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
to, atom);
if (ret == 0)
return(0);
continue;
}
if (t1->to != to)
continue;
if (xmlFACompareAtoms(t1->atom, atom))
return(0);
}
return(ret);
}
/**
* xmlFAComputesDeterminism:
* @ctxt: a regexp parser context
*
* Check whether the associated regexp is determinist,
* should be called after xmlFAEliminateEpsilonTransitions()
*
*/
static int
xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
int statenr, transnr;
xmlRegStatePtr state;
xmlRegTransPtr t1, t2;
int i;
int ret = 1;
#ifdef DEBUG_REGEXP_GRAPH
printf("xmlFAComputesDeterminism\n");
xmlRegPrintCtxt(stdout, ctxt);
#endif
if (ctxt->determinist != -1)
return(ctxt->determinist);
/*
* Check for all states that there aren't 2 transitions
* with the same atom and a different target.
*/
for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
state = ctxt->states[statenr];
if (state == NULL)
continue;
for (transnr = 0;transnr < state->nbTrans;transnr++) {
t1 = &(state->trans[transnr]);
/*
* Determinism checks in case of counted or all transitions
* will have to be handled separately
*/
if (t1->atom == NULL)
continue;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -