📄 xmlregexp.c
字号:
fprintf(output, "SYMBOL_MATH "); break;
case XML_REGEXP_SYMBOL_CURRENCY:
fprintf(output, "SYMBOL_CURRENCY "); break;
case XML_REGEXP_SYMBOL_MODIFIER:
fprintf(output, "SYMBOL_MODIFIER "); break;
case XML_REGEXP_SYMBOL_OTHERS:
fprintf(output, "SYMBOL_OTHERS "); break;
case XML_REGEXP_OTHER:
fprintf(output, "OTHER "); break;
case XML_REGEXP_OTHER_CONTROL:
fprintf(output, "OTHER_CONTROL "); break;
case XML_REGEXP_OTHER_FORMAT:
fprintf(output, "OTHER_FORMAT "); break;
case XML_REGEXP_OTHER_PRIVATE:
fprintf(output, "OTHER_PRIVATE "); break;
case XML_REGEXP_OTHER_NA:
fprintf(output, "OTHER_NA "); break;
case XML_REGEXP_BLOCK_NAME:
fprintf(output, "BLOCK "); break;
}
}
static void
xmlRegPrintQuantType(FILE *output, xmlRegQuantType type) {
switch (type) {
case XML_REGEXP_QUANT_EPSILON:
fprintf(output, "epsilon "); break;
case XML_REGEXP_QUANT_ONCE:
fprintf(output, "once "); break;
case XML_REGEXP_QUANT_OPT:
fprintf(output, "? "); break;
case XML_REGEXP_QUANT_MULT:
fprintf(output, "* "); break;
case XML_REGEXP_QUANT_PLUS:
fprintf(output, "+ "); break;
case XML_REGEXP_QUANT_RANGE:
fprintf(output, "range "); break;
case XML_REGEXP_QUANT_ONCEONLY:
fprintf(output, "onceonly "); break;
case XML_REGEXP_QUANT_ALL:
fprintf(output, "all "); break;
}
}
static void
xmlRegPrintRange(FILE *output, xmlRegRangePtr range) {
fprintf(output, " range: ");
if (range->neg)
fprintf(output, "negative ");
xmlRegPrintAtomType(output, range->type);
fprintf(output, "%c - %c\n", range->start, range->end);
}
static void
xmlRegPrintAtom(FILE *output, xmlRegAtomPtr atom) {
fprintf(output, " atom: ");
if (atom == NULL) {
fprintf(output, "NULL\n");
return;
}
xmlRegPrintAtomType(output, atom->type);
xmlRegPrintQuantType(output, atom->quant);
if (atom->quant == XML_REGEXP_QUANT_RANGE)
fprintf(output, "%d-%d ", atom->min, atom->max);
if (atom->type == XML_REGEXP_STRING)
fprintf(output, "'%s' ", (char *) atom->valuep);
if (atom->type == XML_REGEXP_CHARVAL)
fprintf(output, "char %c\n", atom->codepoint);
else if (atom->type == XML_REGEXP_RANGES) {
int i;
fprintf(output, "%d entries\n", atom->nbRanges);
for (i = 0; i < atom->nbRanges;i++)
xmlRegPrintRange(output, atom->ranges[i]);
} else if (atom->type == XML_REGEXP_SUBREG) {
fprintf(output, "start %d end %d\n", atom->start->no, atom->stop->no);
} else {
fprintf(output, "\n");
}
}
static void
xmlRegPrintTrans(FILE *output, xmlRegTransPtr trans) {
fprintf(output, " trans: ");
if (trans == NULL) {
fprintf(output, "NULL\n");
return;
}
if (trans->to < 0) {
fprintf(output, "removed\n");
return;
}
if (trans->counter >= 0) {
fprintf(output, "counted %d, ", trans->counter);
}
if (trans->count == REGEXP_ALL_COUNTER) {
fprintf(output, "all transition, ");
} else if (trans->count >= 0) {
fprintf(output, "count based %d, ", trans->count);
}
if (trans->atom == NULL) {
fprintf(output, "epsilon to %d\n", trans->to);
return;
}
if (trans->atom->type == XML_REGEXP_CHARVAL)
fprintf(output, "char %c ", trans->atom->codepoint);
fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to);
}
static void
xmlRegPrintState(FILE *output, xmlRegStatePtr state) {
int i;
fprintf(output, " state: ");
if (state == NULL) {
fprintf(output, "NULL\n");
return;
}
if (state->type == XML_REGEXP_START_STATE)
fprintf(output, "START ");
if (state->type == XML_REGEXP_FINAL_STATE)
fprintf(output, "FINAL ");
fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans);
for (i = 0;i < state->nbTrans; i++) {
xmlRegPrintTrans(output, &(state->trans[i]));
}
}
#ifdef DEBUG_REGEXP_GRAPH
static void
xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) {
int i;
fprintf(output, " ctxt: ");
if (ctxt == NULL) {
fprintf(output, "NULL\n");
return;
}
fprintf(output, "'%s' ", ctxt->string);
if (ctxt->error)
fprintf(output, "error ");
if (ctxt->neg)
fprintf(output, "neg ");
fprintf(output, "\n");
fprintf(output, "%d atoms:\n", ctxt->nbAtoms);
for (i = 0;i < ctxt->nbAtoms; i++) {
fprintf(output, " %02d ", i);
xmlRegPrintAtom(output, ctxt->atoms[i]);
}
if (ctxt->atom != NULL) {
fprintf(output, "current atom:\n");
xmlRegPrintAtom(output, ctxt->atom);
}
fprintf(output, "%d states:", ctxt->nbStates);
if (ctxt->start != NULL)
fprintf(output, " start: %d", ctxt->start->no);
if (ctxt->end != NULL)
fprintf(output, " end: %d", ctxt->end->no);
fprintf(output, "\n");
for (i = 0;i < ctxt->nbStates; i++) {
xmlRegPrintState(output, ctxt->states[i]);
}
fprintf(output, "%d counters:\n", ctxt->nbCounters);
for (i = 0;i < ctxt->nbCounters; i++) {
fprintf(output, " %d: min %d max %d\n", i, ctxt->counters[i].min,
ctxt->counters[i].max);
}
}
#endif
/************************************************************************
* *
* Finite Automata structures manipulations *
* *
************************************************************************/
static void
xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom,
int neg, xmlRegAtomType type, int start, int end,
xmlChar *blockName) {
xmlRegRangePtr range;
if (atom == NULL) {
ERROR("add range: atom is NULL");
return;
}
if (atom->type != XML_REGEXP_RANGES) {
ERROR("add range: atom is not ranges");
return;
}
if (atom->maxRanges == 0) {
atom->maxRanges = 4;
atom->ranges = (xmlRegRangePtr *) xmlMalloc(atom->maxRanges *
sizeof(xmlRegRangePtr));
if (atom->ranges == NULL) {
xmlRegexpErrMemory(ctxt, "adding ranges");
atom->maxRanges = 0;
return;
}
} else if (atom->nbRanges >= atom->maxRanges) {
xmlRegRangePtr *tmp;
atom->maxRanges *= 2;
tmp = (xmlRegRangePtr *) xmlRealloc(atom->ranges, atom->maxRanges *
sizeof(xmlRegRangePtr));
if (tmp == NULL) {
xmlRegexpErrMemory(ctxt, "adding ranges");
atom->maxRanges /= 2;
return;
}
atom->ranges = tmp;
}
range = xmlRegNewRange(ctxt, neg, type, start, end);
if (range == NULL)
return;
range->blockName = blockName;
atom->ranges[atom->nbRanges++] = range;
}
static int
xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) {
if (ctxt->maxCounters == 0) {
ctxt->maxCounters = 4;
ctxt->counters = (xmlRegCounter *) xmlMalloc(ctxt->maxCounters *
sizeof(xmlRegCounter));
if (ctxt->counters == NULL) {
xmlRegexpErrMemory(ctxt, "allocating counter");
ctxt->maxCounters = 0;
return(-1);
}
} else if (ctxt->nbCounters >= ctxt->maxCounters) {
xmlRegCounter *tmp;
ctxt->maxCounters *= 2;
tmp = (xmlRegCounter *) xmlRealloc(ctxt->counters, ctxt->maxCounters *
sizeof(xmlRegCounter));
if (tmp == NULL) {
xmlRegexpErrMemory(ctxt, "allocating counter");
ctxt->maxCounters /= 2;
return(-1);
}
ctxt->counters = tmp;
}
ctxt->counters[ctxt->nbCounters].min = -1;
ctxt->counters[ctxt->nbCounters].max = -1;
return(ctxt->nbCounters++);
}
static int
xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
if (atom == NULL) {
ERROR("atom push: atom is NULL");
return(-1);
}
if (ctxt->maxAtoms == 0) {
ctxt->maxAtoms = 4;
ctxt->atoms = (xmlRegAtomPtr *) xmlMalloc(ctxt->maxAtoms *
sizeof(xmlRegAtomPtr));
if (ctxt->atoms == NULL) {
xmlRegexpErrMemory(ctxt, "pushing atom");
ctxt->maxAtoms = 0;
return(-1);
}
} else if (ctxt->nbAtoms >= ctxt->maxAtoms) {
xmlRegAtomPtr *tmp;
ctxt->maxAtoms *= 2;
tmp = (xmlRegAtomPtr *) xmlRealloc(ctxt->atoms, ctxt->maxAtoms *
sizeof(xmlRegAtomPtr));
if (tmp == NULL) {
xmlRegexpErrMemory(ctxt, "allocating counter");
ctxt->maxAtoms /= 2;
return(-1);
}
ctxt->atoms = tmp;
}
atom->no = ctxt->nbAtoms;
ctxt->atoms[ctxt->nbAtoms++] = atom;
return(0);
}
static void
xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
xmlRegAtomPtr atom, xmlRegStatePtr target,
int counter, int count) {
int nrtrans;
if (state == NULL) {
ERROR("add state: state is NULL");
return;
}
if (target == NULL) {
ERROR("add state: target is NULL");
return;
}
/*
* Other routines follow the philosophy 'When in doubt, add a transition'
* so we check here whether such a transition is already present and, if
* so, silently ignore this request.
*/
for (nrtrans=0; nrtrans<state->nbTrans; nrtrans++) {
if ((state->trans[nrtrans].atom == atom) &&
(state->trans[nrtrans].to == target->no) &&
(state->trans[nrtrans].counter == counter) &&
(state->trans[nrtrans].count == count)) {
#ifdef DEBUG_REGEXP_GRAPH
printf("Ignoring duplicate transition from %d to %d\n",
state->no, target->no);
#endif
return;
}
}
if (state->maxTrans == 0) {
state->maxTrans = 4;
state->trans = (xmlRegTrans *) xmlMalloc(state->maxTrans *
sizeof(xmlRegTrans));
if (state->trans == NULL) {
xmlRegexpErrMemory(ctxt, "adding transition");
state->maxTrans = 0;
return;
}
} else if (state->nbTrans >= state->maxTrans) {
xmlRegTrans *tmp;
state->maxTrans *= 2;
tmp = (xmlRegTrans *) xmlRealloc(state->trans, state->maxTrans *
sizeof(xmlRegTrans));
if (tmp == NULL) {
xmlRegexpErrMemory(ctxt, "adding transition");
state->maxTrans /= 2;
return;
}
state->trans = tmp;
}
#ifdef DEBUG_REGEXP_GRAPH
printf("Add trans from %d to %d ", state->no, target->no);
if (count == REGEXP_ALL_COUNTER)
printf("all transition\n");
else if (count >= 0)
printf("count based %d\n", count);
else if (counter >= 0)
printf("counted %d\n", counter);
else if (atom == NULL)
printf("epsilon transition\n");
else if (atom != NULL)
xmlRegPrintAtom(stdout, atom);
#endif
state->trans[state->nbTrans].atom = atom;
state->trans[state->nbTrans].to = target->no;
state->trans[state->nbTrans].counter = counter;
state->trans[state->nbTrans].count = count;
state->nbTrans++;
}
static int
xmlRegStatePush(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) {
if (state == NULL) return(-1);
if (ctxt->maxStates == 0) {
ctxt->maxStates = 4;
ctxt->states = (xmlRegStatePtr *) xmlMalloc(ctxt->maxStates *
sizeof(xmlRegStatePtr));
if (ctxt->states == NULL) {
xmlRegexpErrMemory(ctxt, "adding state");
ctxt->maxStates = 0;
return(-1);
}
} else if (ctxt->nbStates >= ctxt->maxStates) {
xmlRegStatePtr *tmp;
ctxt->maxStates *= 2;
tmp = (xmlRegStatePtr *) xmlRealloc(ctxt->states, ctxt->maxStates *
sizeof(xmlRegStatePtr));
if (tmp == NULL) {
xmlRegexpErrMemory(ctxt, "adding state");
ctxt->maxStates /= 2;
return(-1);
}
ctxt->states = tmp;
}
state->no = ctxt->nbStates;
ctxt->states[ctxt->nbStates++] = state;
return(0);
}
/**
* xmlFAGenerateAllTransition:
* @ctxt: a regexp parser context
* @from: the from state
* @to: the target state or NULL for building a new one
* @lax:
*
*/
static void
xmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt,
xmlRegStatePtr from, xmlRegStatePtr to,
int lax) {
if (to == NULL) {
to = xmlRegNewState(ctxt);
xmlRegStatePush(ctxt, to);
ctxt->state = to;
}
if (lax)
xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_LAX_COUNTER);
else
xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER);
}
/**
* xmlFAGenerateEpsilonTransition:
* @ctxt: a regexp parser context
* @from: the from state
* @to: the target state or NULL for building a new one
*
*/
static void
xmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt,
xmlRegStatePtr from, xmlRegStatePtr to) {
if (to == NULL) {
to = xmlRegNewState(ctxt);
xmlRegStatePush(ctxt, to);
ctxt->state = to;
}
xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1);
}
/**
* xmlFAGenerateCountedEpsilonTransition:
* @ctxt: a regexp parser context
* @from: the from state
* @to: the target state or NULL for building a new one
* counter: the counter for that transition
*
*/
static void
xmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt,
xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
if (to == NULL) {
to = xmlRegNewState(ctxt);
xmlRegStatePush(ctxt, to);
ctxt->state = to;
}
xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1);
}
/**
* xmlFAGenerateCountedTransition:
* @ctxt: a regexp parser context
* @from: the from state
* @to: the target state or NULL for building a new one
* counter: the counter for that transition
*
*/
static void
xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt,
xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
if (to == NULL) {
to = xmlRegNewState(ctxt);
xmlRegStatePush(ctxt, to);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -