📄 qregexp.cpp
字号:
#endif#if defined(QT_DEBUG)void QRegExpEngine::dump() const{ int i, j; qDebug("Case %ssensitive engine", cs ? "" : "in"); qDebug(" States"); for (i = 0; i < s.size(); i++) { qDebug(" %d%s", i, i == InitialState ? " (initial)" : i == FinalState ? " (final)" : "");#ifndef QT_NO_REGEXP_CAPTURE qDebug(" in atom %d", s[i]->atom);#endif int m = s[i]->match; if ((m & CharClassBit) != 0) { qDebug(" match character class %d", m ^ CharClassBit);#ifndef QT_NO_REGEXP_CCLASS cl[m ^ CharClassBit]->dump();#else qDebug(" negative character class");#endif } else if ((m & BackRefBit) != 0) { qDebug(" match back-reference %d", m ^ BackRefBit); } else if (m >= 0x20 && m <= 0x7e) { qDebug(" match 0x%.4x (%c)", m, m); } else { qDebug(" match 0x%.4x", m); } for (j = 0; j < s[i]->outs.size(); j++) { int next = s[i]->outs[j]; qDebug(" -> %d", next); if (s[i]->reenter != 0 && s[i]->reenter->contains(next)) qDebug(" [reenter %d]", (*s[i]->reenter)[next]); if (s[i]->anchors != 0 && s[i]->anchors->value(next, 0) != 0) qDebug(" [anchors 0x%.8x]", (*s[i]->anchors)[next]); } }#ifndef QT_NO_REGEXP_CAPTURE if (nf > 0) { qDebug(" Atom Parent Capture"); for (i = 0; i < nf; i++) qDebug(" %6d %6d %6d", i, f[i].parent, f[i].capture); }#endif#ifndef QT_NO_REGEXP_ANCHOR_ALT for (i = 0; i < aa.size(); i++) qDebug(" Anchor alternation 0x%.8x: 0x%.8x 0x%.9x", i, aa[i].a, aa[i].b);#endif}#endifvoid QRegExpEngine::setup(Qt::CaseSensitivity caseSensitive){ ref = 1;#ifndef QT_NO_REGEXP_CAPTURE f.resize(32); nf = 0; cf = -1;#endif officialncap = 0; ncap = 0;#ifndef QT_NO_REGEXP_OPTIM caretAnchored = true; trivial = true;#endif valid = false; cs = caseSensitive;#ifndef QT_NO_REGEXP_BACKREF nbrefs = 0;#endif#ifndef QT_NO_REGEXP_OPTIM useGoodStringHeuristic = true; minl = 0; occ1.fill(0, NumBadChars);#endif}int QRegExpEngine::setupState(int match){#ifndef QT_NO_REGEXP_CAPTURE s += new State(cf, match);#else s += new State(match);#endif return s.size() - 1;}#ifndef QT_NO_REGEXP_CAPTURE/* Functions startAtom() and finishAtom() should be called to delimit atoms. When a state is created, it is assigned to the current atom. The information is later used for capturing.*/int QRegExpEngine::startAtom(bool capture){ if ((nf & (nf + 1)) == 0 && nf + 1 >= f.size()) f.resize((nf + 1) << 1); f[nf].parent = cf; cf = nf++; f[cf].capture = capture ? ncap++ : -1; return cf;}#endif#ifndef QT_NO_REGEXP_LOOKAHEAD/* Creates a lookahead anchor.*/int QRegExpEngine::addLookahead(QRegExpEngine *eng, bool negative){ int n = ahead.size(); if (n == MaxLookaheads) { error(RXERR_LIMIT); return 0; } ahead += new Lookahead(eng, negative); return Anchor_FirstLookahead << n;}#endif#ifndef QT_NO_REGEXP_CAPTURE/* We want the longest leftmost captures.*/bool QRegExpEngine::isBetterCapture(const int *begin1, const int *end1, const int *begin2, const int *end2){ for (int i = 0; i < ncap; i++) { int delta = begin2[i] - begin1[i]; // it has to start early... if (delta == 0) delta = end1[i] - end2[i]; // ...and end late (like a party) if (delta != 0) return delta > 0; } return false;}#endif/* Returns true if anchor a matches at position mmPos + i in the input string, otherwise false.*/bool QRegExpEngine::testAnchor(int i, int a, const int *capBegin){ int j;#ifndef QT_NO_REGEXP_ANCHOR_ALT if ((a & Anchor_Alternation) != 0) return testAnchor(i, aa[a ^ Anchor_Alternation].a, capBegin) || testAnchor(i, aa[a ^ Anchor_Alternation].b, capBegin);#endif if ((a & Anchor_Caret) != 0) { if (mmPos + i != mmCaretPos) return false; } if ((a & Anchor_Dollar) != 0) { if (mmPos + i != mmLen) return false; }#ifndef QT_NO_REGEXP_ESCAPE if ((a & (Anchor_Word | Anchor_NonWord)) != 0) { bool before = false; bool after = false; if (mmPos + i != 0) before = isWord(mmIn[mmPos + i - 1]); if (mmPos + i != mmLen) after = isWord(mmIn[mmPos + i]); if ((a & Anchor_Word) != 0 && (before == after)) return false; if ((a & Anchor_NonWord) != 0 && (before != after)) return false; }#endif#ifndef QT_NO_REGEXP_LOOKAHEAD if ((a & Anchor_LookaheadMask) != 0) { QString cstr((QChar *) mmIn + mmPos + i, mmLen - mmPos - i); for (j = 0; j < ahead.size(); j++) { if ((a & (Anchor_FirstLookahead << j)) != 0) { QVector<int> captured; ahead[j]->eng->match(cstr, 0, true, true, mmCaretPos - mmPos - i, captured); if ((captured[0] == 0) == ahead[j]->neg) return false; } } }#endif#ifndef QT_NO_REGEXP_CAPTURE#ifndef QT_NO_REGEXP_BACKREF for (j = 0; j < nbrefs; j++) { if ((a & (Anchor_BackRef1Empty << j)) != 0) { if (capBegin[j] != EmptyCapture) return false; } }#endif#endif return true;}#ifndef QT_NO_REGEXP_OPTIM/* The three following functions are what Jeffrey Friedl would call transmissions (or bump-alongs). Using one or the other should make no difference except in performance.*/bool QRegExpEngine::goodStringMatch(){ int k = mmPos + goodEarlyStart; while ((k = mmStr->indexOf(goodStr, k, cs)) != -1) { int from = k - goodLateStart; int to = k - goodEarlyStart; if (from > mmPos) mmPos = from; while (mmPos <= to) { if (matchHere()) return true; ++mmPos; } ++k; } return false;}bool QRegExpEngine::badCharMatch(){ int slideHead = 0; int slideNext = 0; int i; int lastPos = mmLen - minl; memset(mmSlideTab, 0, mmSlideTabSize * sizeof(int)); /* Set up the slide table, used for the bad-character heuristic, using the table of first occurrence of each character. */ for (i = 0; i < minl; i++) { int sk = occ1[BadChar(mmIn[mmPos + i])]; if (sk == NoOccurrence) sk = i + 1; if (sk > 0) { int k = i + 1 - sk; if (k < 0) { sk = i + 1; k = 0; } if (sk > mmSlideTab[k]) mmSlideTab[k] = sk; } } if (mmPos > lastPos) return false; for (;;) { if (++slideNext >= mmSlideTabSize) slideNext = 0; if (mmSlideTab[slideHead] > 0) { if (mmSlideTab[slideHead] - 1 > mmSlideTab[slideNext]) mmSlideTab[slideNext] = mmSlideTab[slideHead] - 1; mmSlideTab[slideHead] = 0; } else { if (matchHere()) return true; } if (mmPos == lastPos) break; /* Update the slide table. This code has much in common with the initialization code. */ int sk = occ1[BadChar(mmIn[mmPos + minl])]; if (sk == NoOccurrence) { mmSlideTab[slideNext] = minl; } else if (sk > 0) { int k = slideNext + minl - sk; if (k >= mmSlideTabSize) k -= mmSlideTabSize; if (sk > mmSlideTab[k]) mmSlideTab[k] = sk; } slideHead = slideNext; ++mmPos; } return false;}#elsebool QRegExpEngine::bruteMatch(){ while (mmPos <= mmLen) { if (matchHere()) return true; ++mmPos; } return false;}#endif/* Here's the core of the engine. It tries to do a match here and now.*/bool QRegExpEngine::matchHere(){ int ncur = 1, nnext = 0; int i = 0, j, k, m; bool stop = false; mmMatchLen = -1; mmOneTestMatchedLen = -1; mmCurStack[0] = InitialState;#ifndef QT_NO_REGEXP_CAPTURE if (ncap > 0) { for (j = 0; j < ncap; j++) { mmCurCapBegin[j] = EmptyCapture; mmCurCapEnd[j] = EmptyCapture; } }#endif#ifndef QT_NO_REGEXP_BACKREF QVector<int> zzZ; while ((ncur > 0 || !mmSleeping.isEmpty()) && i <= mmLen - mmPos && !stop)#else while (ncur > 0 && i <= mmLen - mmPos && !stop)#endif { int ch = (i < mmLen - mmPos) ? mmIn[mmPos + i].unicode() : 0; for (j = 0; j < ncur; j++) { int cur = mmCurStack[j]; State *scur = s[cur]; QVector<int> &outs = scur->outs; for (k = 0; k < outs.size(); k++) { int next = outs[k]; State *snext = s[next]; bool in = true;#ifndef QT_NO_REGEXP_BACKREF int needSomeSleep = 0;#endif /* First, check if the anchors are anchored properly. */ if (scur->anchors != 0) { int a = scur->anchors->value(next, 0); if (a != 0 && !testAnchor(i, a, mmCurCapBegin + j * ncap)) in = false; } /* If indeed they are, check if the input character is correct for this transition. */ if (in) { m = snext->match; if ((m & (CharClassBit | BackRefBit)) == 0) { if (cs) in = (m == ch); else in = (QChar(m).toLower() == QChar(ch).toLower()); } else if (next == FinalState) { mmMatchLen = i; stop = mmMinimal; in = true; } else if ((m & CharClassBit) != 0) {#
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -