📄 xmlregexp.c.svn-base
字号:
ret = xmlUCSIsCatNd(codepoint); break; case XML_REGEXP_REALCHAR: neg = !neg; case XML_REGEXP_NOTREALCHAR: ret = xmlUCSIsCatP(codepoint); if (ret == 0) ret = xmlUCSIsCatZ(codepoint); if (ret == 0) ret = xmlUCSIsCatC(codepoint); break; case XML_REGEXP_LETTER: ret = xmlUCSIsCatL(codepoint); break; case XML_REGEXP_LETTER_UPPERCASE: ret = xmlUCSIsCatLu(codepoint); break; case XML_REGEXP_LETTER_LOWERCASE: ret = xmlUCSIsCatLl(codepoint); break; case XML_REGEXP_LETTER_TITLECASE: ret = xmlUCSIsCatLt(codepoint); break; case XML_REGEXP_LETTER_MODIFIER: ret = xmlUCSIsCatLm(codepoint); break; case XML_REGEXP_LETTER_OTHERS: ret = xmlUCSIsCatLo(codepoint); break; case XML_REGEXP_MARK: ret = xmlUCSIsCatM(codepoint); break; case XML_REGEXP_MARK_NONSPACING: ret = xmlUCSIsCatMn(codepoint); break; case XML_REGEXP_MARK_SPACECOMBINING: ret = xmlUCSIsCatMc(codepoint); break; case XML_REGEXP_MARK_ENCLOSING: ret = xmlUCSIsCatMe(codepoint); break; case XML_REGEXP_NUMBER: ret = xmlUCSIsCatN(codepoint); break; case XML_REGEXP_NUMBER_DECIMAL: ret = xmlUCSIsCatNd(codepoint); break; case XML_REGEXP_NUMBER_LETTER: ret = xmlUCSIsCatNl(codepoint); break; case XML_REGEXP_NUMBER_OTHERS: ret = xmlUCSIsCatNo(codepoint); break; case XML_REGEXP_PUNCT: ret = xmlUCSIsCatP(codepoint); break; case XML_REGEXP_PUNCT_CONNECTOR: ret = xmlUCSIsCatPc(codepoint); break; case XML_REGEXP_PUNCT_DASH: ret = xmlUCSIsCatPd(codepoint); break; case XML_REGEXP_PUNCT_OPEN: ret = xmlUCSIsCatPs(codepoint); break; case XML_REGEXP_PUNCT_CLOSE: ret = xmlUCSIsCatPe(codepoint); break; case XML_REGEXP_PUNCT_INITQUOTE: ret = xmlUCSIsCatPi(codepoint); break; case XML_REGEXP_PUNCT_FINQUOTE: ret = xmlUCSIsCatPf(codepoint); break; case XML_REGEXP_PUNCT_OTHERS: ret = xmlUCSIsCatPo(codepoint); break; case XML_REGEXP_SEPAR: ret = xmlUCSIsCatZ(codepoint); break; case XML_REGEXP_SEPAR_SPACE: ret = xmlUCSIsCatZs(codepoint); break; case XML_REGEXP_SEPAR_LINE: ret = xmlUCSIsCatZl(codepoint); break; case XML_REGEXP_SEPAR_PARA: ret = xmlUCSIsCatZp(codepoint); break; case XML_REGEXP_SYMBOL: ret = xmlUCSIsCatS(codepoint); break; case XML_REGEXP_SYMBOL_MATH: ret = xmlUCSIsCatSm(codepoint); break; case XML_REGEXP_SYMBOL_CURRENCY: ret = xmlUCSIsCatSc(codepoint); break; case XML_REGEXP_SYMBOL_MODIFIER: ret = xmlUCSIsCatSk(codepoint); break; case XML_REGEXP_SYMBOL_OTHERS: ret = xmlUCSIsCatSo(codepoint); break; case XML_REGEXP_OTHER: ret = xmlUCSIsCatC(codepoint); break; case XML_REGEXP_OTHER_CONTROL: ret = xmlUCSIsCatCc(codepoint); break; case XML_REGEXP_OTHER_FORMAT: ret = xmlUCSIsCatCf(codepoint); break; case XML_REGEXP_OTHER_PRIVATE: ret = xmlUCSIsCatCo(codepoint); break; case XML_REGEXP_OTHER_NA: /* ret = xmlUCSIsCatCn(codepoint); */ /* Seems it doesn't exist anymore in recent Unicode releases */ ret = 0; break; case XML_REGEXP_BLOCK_NAME: ret = xmlUCSIsBlock(codepoint, (const char *) blockName); break; } if (neg) return(!ret); return(ret);}static intxmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) { int i, ret = 0; xmlRegRangePtr range; if ((atom == NULL) || (!IS_CHAR(codepoint))) return(-1); switch (atom->type) { case XML_REGEXP_SUBREG: case XML_REGEXP_EPSILON: return(-1); case XML_REGEXP_CHARVAL: return(codepoint == atom->codepoint); case XML_REGEXP_RANGES: { int accept = 0; for (i = 0;i < atom->nbRanges;i++) { range = atom->ranges[i]; if (range->neg == 2) { ret = xmlRegCheckCharacterRange(range->type, codepoint, 0, range->start, range->end, range->blockName); if (ret != 0) return(0); /* excluded char */ } else if (range->neg) { ret = xmlRegCheckCharacterRange(range->type, codepoint, 0, range->start, range->end, range->blockName); if (ret == 0) accept = 1; else return(0); } else { ret = xmlRegCheckCharacterRange(range->type, codepoint, 0, range->start, range->end, range->blockName); if (ret != 0) accept = 1; /* might still be excluded */ } } return(accept); } case XML_REGEXP_STRING: printf("TODO: XML_REGEXP_STRING\n"); return(-1); case XML_REGEXP_ANYCHAR: case XML_REGEXP_ANYSPACE: case XML_REGEXP_NOTSPACE: case XML_REGEXP_INITNAME: case XML_REGEXP_NOTINITNAME: case XML_REGEXP_NAMECHAR: case XML_REGEXP_NOTNAMECHAR: case XML_REGEXP_DECIMAL: case XML_REGEXP_NOTDECIMAL: case XML_REGEXP_REALCHAR: case XML_REGEXP_NOTREALCHAR: case XML_REGEXP_LETTER: case XML_REGEXP_LETTER_UPPERCASE: case XML_REGEXP_LETTER_LOWERCASE: case XML_REGEXP_LETTER_TITLECASE: case XML_REGEXP_LETTER_MODIFIER: case XML_REGEXP_LETTER_OTHERS: case XML_REGEXP_MARK: case XML_REGEXP_MARK_NONSPACING: case XML_REGEXP_MARK_SPACECOMBINING: case XML_REGEXP_MARK_ENCLOSING: case XML_REGEXP_NUMBER: case XML_REGEXP_NUMBER_DECIMAL: case XML_REGEXP_NUMBER_LETTER: case XML_REGEXP_NUMBER_OTHERS: case XML_REGEXP_PUNCT: case XML_REGEXP_PUNCT_CONNECTOR: case XML_REGEXP_PUNCT_DASH: case XML_REGEXP_PUNCT_OPEN: case XML_REGEXP_PUNCT_CLOSE: case XML_REGEXP_PUNCT_INITQUOTE: case XML_REGEXP_PUNCT_FINQUOTE: case XML_REGEXP_PUNCT_OTHERS: case XML_REGEXP_SEPAR: case XML_REGEXP_SEPAR_SPACE: case XML_REGEXP_SEPAR_LINE: case XML_REGEXP_SEPAR_PARA: case XML_REGEXP_SYMBOL: case XML_REGEXP_SYMBOL_MATH: case XML_REGEXP_SYMBOL_CURRENCY: case XML_REGEXP_SYMBOL_MODIFIER: case XML_REGEXP_SYMBOL_OTHERS: case XML_REGEXP_OTHER: case XML_REGEXP_OTHER_CONTROL: case XML_REGEXP_OTHER_FORMAT: case XML_REGEXP_OTHER_PRIVATE: case XML_REGEXP_OTHER_NA: case XML_REGEXP_BLOCK_NAME: ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0, (const xmlChar *)atom->valuep); if (atom->neg) ret = !ret; break; } return(ret);}/************************************************************************ * * * Saving and restoring state of an execution context * * * ************************************************************************/#ifdef DEBUG_REGEXP_EXECstatic voidxmlFARegDebugExec(xmlRegExecCtxtPtr exec) { printf("state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index); if (exec->inputStack != NULL) { int i; printf(": "); for (i = 0;(i < 3) && (i < exec->inputStackNr);i++) printf("%s ", exec->inputStack[exec->inputStackNr - (i + 1)]); } else { printf(": %s", &(exec->inputString[exec->index])); } printf("\n");}#endifstatic voidxmlFARegExecSave(xmlRegExecCtxtPtr exec) {#ifdef DEBUG_REGEXP_EXEC printf("saving "); exec->transno++; xmlFARegDebugExec(exec); exec->transno--;#endif if (exec->maxRollbacks == 0) { exec->maxRollbacks = 4; exec->rollbacks = (xmlRegExecRollback *) xmlMalloc(exec->maxRollbacks * sizeof(xmlRegExecRollback)); if (exec->rollbacks == NULL) { xmlRegexpErrMemory(NULL, "saving regexp"); exec->maxRollbacks = 0; return; } memset(exec->rollbacks, 0, exec->maxRollbacks * sizeof(xmlRegExecRollback)); } else if (exec->nbRollbacks >= exec->maxRollbacks) { xmlRegExecRollback *tmp; int len = exec->maxRollbacks; exec->maxRollbacks *= 2; tmp = (xmlRegExecRollback *) xmlRealloc(exec->rollbacks, exec->maxRollbacks * sizeof(xmlRegExecRollback)); if (tmp == NULL) { xmlRegexpErrMemory(NULL, "saving regexp"); exec->maxRollbacks /= 2; return; } exec->rollbacks = tmp; tmp = &exec->rollbacks[len]; memset(tmp, 0, (exec->maxRollbacks - len) * sizeof(xmlRegExecRollback)); } exec->rollbacks[exec->nbRollbacks].state = exec->state; exec->rollbacks[exec->nbRollbacks].index = exec->index; exec->rollbacks[exec->nbRollbacks].nextbranch = exec->transno + 1; if (exec->comp->nbCounters > 0) { if (exec->rollbacks[exec->nbRollbacks].counts == NULL) { exec->rollbacks[exec->nbRollbacks].counts = (int *) xmlMalloc(exec->comp->nbCounters * sizeof(int)); if (exec->rollbacks[exec->nbRollbacks].counts == NULL) { xmlRegexpErrMemory(NULL, "saving regexp"); exec->status = -5; return; } } memcpy(exec->rollbacks[exec->nbRollbacks].counts, exec->counts, exec->comp->nbCounters * sizeof(int)); } exec->nbRollbacks++;}static voidxmlFARegExecRollBack(xmlRegExecCtxtPtr exec) { if (exec->nbRollbacks <= 0) { exec->status = -1;#ifdef DEBUG_REGEXP_EXEC printf("rollback failed on empty stack\n");#endif return; } exec->nbRollbacks--; exec->state = exec->rollbacks[exec->nbRollbacks].state; exec->index = exec->rollbacks[exec->nbRollbacks].index; exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch; if (exec->comp->nbCounters > 0) { if (exec->rollbacks[exec->nbRollbacks].counts == NULL) { fprintf(stderr, "exec save: allocation failed"); exec->status = -6; return; } memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts, exec->comp->nbCounters * sizeof(int)); }#ifdef DEBUG_REGEXP_EXEC printf("restored "); xmlFARegDebugExec(exec);#endif}/************************************************************************ * * * Verifier, running an input against a compiled regexp * * * ************************************************************************/static intxmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) { xmlRegExecCtxt execval; xmlRegExecCtxtPtr exec = &execval; int ret, codepoint, len; exec->inputString = content; exec->index = 0; exec->determinist = 1; exec->maxRollbacks = 0; exec->nbRollbacks = 0; exec->rollbacks = NULL; exec->status = 0; exec->comp = comp; exec->state = comp->states[0]; exec->transno = 0; exec->transcount = 0; exec->inputStack = NULL; exec->inputStackMax = 0; if (comp->nbCounters > 0) { exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int)); if (exec->counts == NULL) { xmlRegexpErrMemory(NULL, "running regexp"); return(-1); } memset(exec->counts, 0, comp->nbCounters * sizeof(int)); } else exec->counts = NULL; while ((exec->status == 0) && ((exec->inputString[exec->index] != 0) || (exec->state->type != XML_REGEXP_FINAL_STATE))) { xmlRegTransPtr trans; xmlRegAtomPtr atom; /* * If end of input on non-terminal state, rollback, however we may * still have epsilon like transition for counted transitions * on counters, in that case don't break too early. Additionally, * if we are working on a range like "AB{0,2}", where B is not present, * we don't want to break. */ if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL)) { /* * if there is a transition, we must check if * atom allows minOccurs of 0 */ if (exec->transno < exec->state->nbTrans) { trans = &exec->state->trans[exec->transno]; if (trans->to >=0) { atom = trans->atom; if (!((atom->min == 0) && (atom->max > 0))) goto rollback; } } else goto rollback; } exec->transcount = 0; for (;exec->transno < exec->state->nbTrans;exec->transno++) { trans = &exec->state->trans[exec->transno]; if (trans->to < 0) continue; atom = trans->atom; ret = 0; if (trans->count >= 0) { int count; xmlRegCounterPtr counter; /* * A counted transition. */ count = exec->counts[trans->count]; counter = &exec->comp->counters[trans->count];#ifdef DEBUG_REGEXP_EXEC printf("testing count %d: val %d, min %d, max %d\n", trans->count, count, counter->min, counter->max);#endif ret = ((count >= counter->min) && (count <= counter->max)); } else if (atom == NULL) { fprintf(stderr, "epsilon transition left at runtime\n"); exec->status = -2; break; } else if (exec->inputString[exec->index] != 0) { codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len); ret = xmlRegCheckCharacter(atom, codepoint); if ((ret == 1) && (atom->min >= 0) && (atom->max > 0)) { xmlRegStatePtr to = comp->states[trans->to]; /* * this is a multiple input sequence */ if (exec->state->nbTrans > exec->transno + 1) { xmlFARegExecSave(exec); } exec->transcount = 1; do { /* * Try to progress as much as possible on the input */ if (exec->transcount == atom->max) { break; } exec->index += len; /* * End of input: stop here */ if (exec->inputString[exec->index] == 0) { exec->index -= len; break; } if (exec->transcount >= atom->min) { int transno = exec->transno; xmlRegStatePtr state = exec->state; /* * The transition is acceptable save it */ exec->transno = -1; /* trick */ exec->state = to; xmlFARegExecSave(exec); exec->transno = transno; exec->state = state; } codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -