📄 qregexp.cpp
字号:
error(RXERR_LIMIT); return 0; } } return setupState(BackRefBit | bref);}#endif/* The two following functions add a transition between all pairs of states (i, j) where i is fond in from, and j is found in to. Cat-transitions are distinguished from plus-transitions for capturing.*/void QRegExpEngine::addCatTransitions(const QVector<int> &from, const QVector<int> &to){ for (int i = 0; i < from.size(); i++) mergeInto(&s[from.at(i)].outs, to);}#ifndef QT_NO_REGEXP_CAPTUREvoid QRegExpEngine::addPlusTransitions(const QVector<int> &from, const QVector<int> &to, int atom){ for (int i = 0; i < from.size(); i++) { QRegExpAutomatonState &st = s[from.at(i)]; const QVector<int> oldOuts = st.outs; mergeInto(&st.outs, to); if (f.at(atom).capture != QRegExpAtom::NoCapture) { for (int j = 0; j < to.size(); j++) { // ### st.reenter.contains(to.at(j)) check looks suspicious if (!st.reenter.contains(to.at(j)) && qBinaryFind(oldOuts.begin(), oldOuts.end(), to.at(j)) == oldOuts.end()) st.reenter.insert(to.at(j), atom); } } }}#endif#ifndef QT_NO_REGEXP_ANCHOR_ALT/* Returns an anchor that means a OR b.*/int QRegExpEngine::anchorAlternation(int a, int b){ if (((a & b) == a || (a & b) == b) && ((a | b) & Anchor_Alternation) == 0) return a & b; int n = aa.size();#ifndef QT_NO_REGEXP_OPTIM if (n > 0 && aa.at(n - 1).a == a && aa.at(n - 1).b == b) return Anchor_Alternation | (n - 1);#endif aa.resize(n + 1); aa[n].a = a; aa[n].b = b; return Anchor_Alternation | n;}/* Returns an anchor that means a AND b.*/int QRegExpEngine::anchorConcatenation(int a, int b){ if (((a | b) & Anchor_Alternation) == 0) return a | b; if ((b & Anchor_Alternation) != 0) qSwap(a, b); int aprime = anchorConcatenation(aa.at(a ^ Anchor_Alternation).a, b); int bprime = anchorConcatenation(aa.at(a ^ Anchor_Alternation).b, b); return anchorAlternation(aprime, bprime);}#endif/* Adds anchor a on a transition caracterised by its from state and its to state.*/void QRegExpEngine::addAnchors(int from, int to, int a){ QRegExpAutomatonState &st = s[from]; if (st.anchors.contains(to)) a = anchorAlternation(st.anchors.value(to), a); st.anchors.insert(to, a);}#ifndef QT_NO_REGEXP_OPTIM/* This function chooses between the good-string and the bad-character heuristics. It computes two scores and chooses the heuristic with the highest score. Here are some common-sense constraints on the scores that should be respected if the formulas are ever modified: (1) If goodStr is empty, the good-string heuristic scores 0. (2) If the regular expression is trivial, the good-string heuristic should be used. (3) If the search is case insensitive, the good-string heuristic should be used, unless it scores 0. (Case insensitivity turns all entries of occ1 to 0.) (4) If (goodLateStart - goodEarlyStart) is big, the good-string heuristic should score less.*/void QRegExpEngine::heuristicallyChooseHeuristic(){ if (minl == 0) { useGoodStringHeuristic = false; } else if (trivial) { useGoodStringHeuristic = true; } else { /* Magic formula: The good string has to constitute a good proportion of the minimum-length string, and appear at a more-or-less known index. */ int goodStringScore = (64 * goodStr.length() / minl) - (goodLateStart - goodEarlyStart); /* Less magic formula: We pick some characters at random, and check whether they are good or bad. */ int badCharScore = 0; int step = qMax(1, NumBadChars / 32); for (int i = 1; i < NumBadChars; i += step) { if (occ1.at(i) == NoOccurrence) badCharScore += minl; else badCharScore += occ1.at(i); } badCharScore /= minl; useGoodStringHeuristic = (goodStringScore > badCharScore); }}#endif#if defined(QT_DEBUG)void QRegExpEngine::dump() const{ int i, j; qDebug("Case %ssensitive engine", cs ? "" : "in"); qDebug(" States"); for (i = 0; i < s.size(); i++) { qDebug(" %d%s", i, i == InitialState ? " (initial)" : i == FinalState ? " (final)" : "");#ifndef QT_NO_REGEXP_CAPTURE if (nf > 0) qDebug(" in atom %d", s[i].atom);#endif int m = s[i].match; if ((m & CharClassBit) != 0) { qDebug(" match character class %d", m ^ CharClassBit);#ifndef QT_NO_REGEXP_CCLASS cl[m ^ CharClassBit].dump();#else qDebug(" negative character class");#endif } else if ((m & BackRefBit) != 0) { qDebug(" match back-reference %d", m ^ BackRefBit); } else if (m >= 0x20 && m <= 0x7e) { qDebug(" match 0x%.4x (%c)", m, m); } else { qDebug(" match 0x%.4x", m); } for (j = 0; j < s[i].outs.size(); j++) { int next = s[i].outs[j]; qDebug(" -> %d", next); if (s[i].reenter.contains(next)) qDebug(" [reenter %d]", s[i].reenter[next]); if (s[i].anchors.value(next) != 0) qDebug(" [anchors 0x%.8x]", s[i].anchors[next]); } }#ifndef QT_NO_REGEXP_CAPTURE if (nf > 0) { qDebug(" Atom Parent Capture"); for (i = 0; i < nf; i++) { if (f[i].capture == QRegExpAtom::NoCapture) { qDebug(" %6d %6d nil", i, f[i].parent); } else { int cap = f[i].capture; bool official = captureForOfficialCapture.contains(cap); qDebug(" %6d %6d %6d %s", i, f[i].parent, f[i].capture, official ? "official" : ""); } } }#endif#ifndef QT_NO_REGEXP_ANCHOR_ALT for (i = 0; i < aa.size(); i++) qDebug(" Anchor alternation 0x%.8x: 0x%.8x 0x%.9x", i, aa[i].a, aa[i].b);#endif}#endifvoid QRegExpEngine::setup(){ ref = 1;#ifndef QT_NO_REGEXP_CAPTURE f.resize(32); nf = 0; cf = -1;#endif officialncap = 0; ncap = 0;#ifndef QT_NO_REGEXP_OPTIM caretAnchored = true; trivial = true;#endif valid = false;#ifndef QT_NO_REGEXP_BACKREF nbrefs = 0;#endif#ifndef QT_NO_REGEXP_OPTIM useGoodStringHeuristic = true; minl = 0; occ1.fill(0, NumBadChars);#endif}int QRegExpEngine::setupState(int match){#ifndef QT_NO_REGEXP_CAPTURE s += QRegExpAutomatonState(cf, match);#else s += QRegExpAutomatonState(match);#endif return s.size() - 1;}#ifndef QT_NO_REGEXP_CAPTURE/* Functions startAtom() and finishAtom() should be called to delimit atoms. When a state is created, it is assigned to the current atom. The information is later used for capturing.*/int QRegExpEngine::startAtom(bool officialCapture){ if ((nf & (nf + 1)) == 0 && nf + 1 >= f.size()) f.resize((nf + 1) << 1); f[nf].parent = cf; cf = nf++; f[cf].capture = officialCapture ? QRegExpAtom::OfficialCapture : QRegExpAtom::NoCapture; return cf;}void QRegExpEngine::finishAtom(int atom, bool needCapture){ if (greedyQuantifiers && needCapture && f[atom].capture == QRegExpAtom::NoCapture) f[atom].capture = QRegExpAtom::UnofficialCapture; cf = f.at(atom).parent;}#endif#ifndef QT_NO_REGEXP_LOOKAHEAD/* Creates a lookahead anchor.*/int QRegExpEngine::addLookahead(QRegExpEngine *eng, bool negative){ int n = ahead.size(); if (n == MaxLookaheads) { error(RXERR_LIMIT); return 0; } ahead += new QRegExpLookahead(eng, negative); return Anchor_FirstLookahead << n;}#endif#ifndef QT_NO_REGEXP_CAPTURE/* We want the longest leftmost captures.*/static bool isBetterCapture(int ncap, const int *begin1, const int *end1, const int *begin2, const int *end2){ for (int i = 0; i < ncap; i++) { int delta = begin2[i] - begin1[i]; // it has to start early... if (delta == 0) delta = end1[i] - end2[i]; // ...and end late if (delta != 0) return delta > 0; } return false;}#endif/* Returns true if anchor a matches at position pos + i in the input string, otherwise false.*/bool QRegExpMatchState::testAnchor(int i, int a, const int *capBegin){ int j;#ifndef QT_NO_REGEXP_ANCHOR_ALT if ((a & QRegExpEngine::Anchor_Alternation) != 0) return testAnchor(i, eng->aa.at(a ^ QRegExpEngine::Anchor_Alternation).a, capBegin) || testAnchor(i, eng->aa.at(a ^ QRegExpEngine::Anchor_Alternation).b, capBegin);#endif if ((a & QRegExpEngine::Anchor_Caret) != 0) { if (pos + i != caretPos) return false; } if ((a & QRegExpEngine::Anchor_Dollar) != 0) { if (pos + i != len) return false; }#ifndef QT_NO_REGEXP_ESCAPE if ((a & (QRegExpEngine::Anchor_Word | QRegExpEngine::Anchor_NonWord)) != 0) { bool before = false; bool after = false; if (pos + i != 0) before = isWord(in[pos + i - 1]); if (pos + i != len) after = isWord(in[pos + i]); if ((a & QRegExpEngine::Anchor_Word) != 0 && (before == after)) return false; if ((a & QRegExpEngine::Anchor_NonWord) != 0 && (before != after)) return false; }#endif#ifndef QT_NO_REGEXP_LOOKAHEAD if ((a & QRegExpEngine::Anchor_LookaheadMask) != 0) { QString cstr((QChar *) in + pos + i, len - pos - i); const QVector<QRegExpLookahead *> &ahead = eng->ahead; for (j = 0; j < ahead.size(); j++) { if ((a & (QRegExpEngine::Anchor_FirstLookahead << j)) != 0) { QRegExpMatchState matchState; matchState.prepareForMatch(ahead[j]->eng); matchState.match(cstr, 0, true, true, matchState.caretPos - matchState.pos - i); if ((matchState.captured.at(0) == 0) == ahead[j]->neg) return false; } } }#endif#ifndef QT_NO_REGEXP_CAPTURE#ifndef QT_NO_REGEXP_BACKREF for (j = 0; j < eng->nbrefs; j++) { if ((a & (QRegExpEngine::Anchor_BackRef1Empty << j)) != 0) { int i = eng->captureForOfficialCapture.at(j); if (capBegin[i] != EmptyCapture) return false; } }#endif#endif return true;}#ifndef QT_NO_REGEXP_OPTIM/* The three following functions are what Jeffrey Friedl would call transmissions (or bump-alongs). Using one or the other should make no difference except in performance.*/bool QRegExpEngine::goodStringMatch(QRegExpMatchState &matchState) const{ int k = matchState.pos + goodEarlyStart; while ((k = matchState.str->indexOf(goodStr, k, cs)) != -1) { int from = k - goodLateStart;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -