📄 qregexp.cpp
字号:
}#endif#ifndef QT_NO_REGEXP_CAPTURE/* We want the longest leftmost captures.*/bool QRegExpEngine::isBetterCapture( const int *begin1, const int *end1, const int *begin2, const int *end2 ){ for ( int i = 0; i < ncap; i++ ) { int delta = begin2[i] - begin1[i]; // it has to start early... if ( delta == 0 ) delta = end1[i] - end2[i]; // ...and end late (like a party) if ( delta != 0 ) return delta > 0; } return FALSE;}#endif/* Returns TRUE if anchor a matches at position mmPos + i in the input string, otherwise FALSE.*/bool QRegExpEngine::testAnchor( int i, int a, const int *capBegin ){ int j;#ifndef QT_NO_REGEXP_ANCHOR_ALT if ( (a & Anchor_Alternation) != 0 ) { return testAnchor( i, aa[a ^ Anchor_Alternation].a, capBegin ) || testAnchor( i, aa[a ^ Anchor_Alternation].b, capBegin ); }#endif if ( (a & Anchor_Caret) != 0 ) { if ( mmPos + i != mmCaretPos ) return FALSE; } if ( (a & Anchor_Dollar) != 0 ) { if ( mmPos + i != mmLen ) return FALSE; }#ifndef QT_NO_REGEXP_ESCAPE if ( (a & (Anchor_Word | Anchor_NonWord)) != 0 ) { bool before = FALSE; bool after = FALSE; if ( mmPos + i != 0 ) before = isWord( mmIn[mmPos + i - 1] ); if ( mmPos + i != mmLen ) after = isWord( mmIn[mmPos + i] ); if ( (a & Anchor_Word) != 0 && (before == after) ) return FALSE; if ( (a & Anchor_NonWord) != 0 && (before != after) ) return FALSE; }#endif#ifndef QT_NO_REGEXP_LOOKAHEAD if ( (a & Anchor_LookaheadMask) != 0 ) { QConstString cstr = QConstString( (QChar *) mmIn + mmPos + i, mmLen - mmPos - i ); for ( j = 0; j < (int) ahead.size(); j++ ) { if ( (a & (Anchor_FirstLookahead << j)) != 0 ) { QMemArray<int> captured; ahead[j]->eng->match( cstr.string(), 0, TRUE, TRUE, mmCaretPos - mmPos - i, captured ); if ( (captured[0] == 0) == ahead[j]->neg ) return FALSE; } } }#endif#ifndef QT_NO_REGEXP_CAPTURE#ifndef QT_NO_REGEXP_BACKREF for ( j = 0; j < nbrefs; j++ ) { if ( (a & (Anchor_BackRef1Empty << j)) != 0 ) { if ( capBegin[j] != EmptyCapture ) return FALSE; } }#endif#endif return TRUE;}#ifndef QT_NO_REGEXP_OPTIM/* The three following functions are what Jeffrey Friedl would call transmissions (or bump-alongs). Using one or the other should make no difference except in performance.*/bool QRegExpEngine::goodStringMatch(){ int k = mmPos + goodEarlyStart; while ( (k = mmStr->find(goodStr, k, cs)) != -1 ) { int from = k - goodLateStart; int to = k - goodEarlyStart; if ( from > mmPos ) mmPos = from; while ( mmPos <= to ) { if ( matchHere() ) return TRUE; mmPos++; } k++; } return FALSE;}bool QRegExpEngine::badCharMatch(){ int slideHead = 0; int slideNext = 0; int i; int lastPos = mmLen - minl; memset( mmSlideTab, 0, mmSlideTabSize * sizeof(int) ); /* Set up the slide table, used for the bad-character heuristic, using the table of first occurrence of each character. */ for ( i = 0; i < minl; i++ ) { int sk = occ1[BadChar(mmIn[mmPos + i])]; if ( sk == NoOccurrence ) sk = i + 1; if ( sk > 0 ) { int k = i + 1 - sk; if ( k < 0 ) { sk = i + 1; k = 0; } if ( sk > mmSlideTab[k] ) mmSlideTab[k] = sk; } } if ( mmPos > lastPos ) return FALSE; for ( ;; ) { if ( ++slideNext >= mmSlideTabSize ) slideNext = 0; if ( mmSlideTab[slideHead] > 0 ) { if ( mmSlideTab[slideHead] - 1 > mmSlideTab[slideNext] ) mmSlideTab[slideNext] = mmSlideTab[slideHead] - 1; mmSlideTab[slideHead] = 0; } else { if ( matchHere() ) return TRUE; } if ( mmPos == lastPos ) break; /* Update the slide table. This code has much in common with the initialization code. */ int sk = occ1[BadChar(mmIn[mmPos + minl])]; if ( sk == NoOccurrence ) { mmSlideTab[slideNext] = minl; } else if ( sk > 0 ) { int k = slideNext + minl - sk; if ( k >= mmSlideTabSize ) k -= mmSlideTabSize; if ( sk > mmSlideTab[k] ) mmSlideTab[k] = sk; } slideHead = slideNext; mmPos++; } return FALSE;}#elsebool QRegExpEngine::bruteMatch(){ while ( mmPos <= mmLen ) { if ( matchHere() ) return TRUE; mmPos++; } return FALSE;}#endif/* Here's the core of the engine. It tries to do a match here and now.*/bool QRegExpEngine::matchHere(){ int ncur = 1, nnext = 0; int i = 0, j, k, m; bool stop = FALSE; mmMatchLen = -1; mmOneTestMatchedLen = -1; mmCurStack[0] = InitialState;#ifndef QT_NO_REGEXP_CAPTURE if ( ncap > 0 ) { for ( j = 0; j < ncap; j++ ) { mmCurCapBegin[j] = EmptyCapture; mmCurCapEnd[j] = EmptyCapture; } }#endif#ifndef QT_NO_REGEXP_BACKREF int *zzZ = 0; while ( (ncur > 0 || !mmSleeping.isEmpty()) && i <= mmLen - mmPos && !stop )#else while ( ncur > 0 && i <= mmLen - mmPos && !stop )#endif { int ch = ( i < mmLen - mmPos ) ? mmIn[mmPos + i].unicode() : 0; for ( j = 0; j < ncur; j++ ) { int cur = mmCurStack[j]; State *scur = s[cur]; QMemArray<int>& outs = scur->outs; for ( k = 0; k < (int) outs.size(); k++ ) { int next = outs[k]; State *snext = s[next]; bool in = TRUE;#ifndef QT_NO_REGEXP_BACKREF int needSomeSleep = 0;#endif /* First, check if the anchors are anchored properly. */ if ( scur->anchors != 0 ) { int a = at( *scur->anchors, next ); if ( a != 0 && !testAnchor(i, a, mmCurCapBegin + j * ncap) ) in = FALSE; } /* If indeed they are, check if the input character is correct for this transition. */ if ( in ) { m = snext->match; if ( (m & (CharClassBit | BackRefBit)) == 0 ) { if ( cs ) in = ( m == ch ); else in = ( QChar(m).lower() == QChar(ch).lower() ); } else if ( next == FinalState ) { mmMatchLen = i; stop = mmMinimal; in = TRUE; } else if ( (m & CharClassBit) != 0 ) {#ifndef QT_NO_REGEXP_CCLASS const CharClass *cc = cl[m ^ CharClassBit]; if ( cs ) in = cc->in( ch ); else if ( cc->negative() ) in = cc->in( QChar(ch).lower() ) && cc->in( QChar(ch).upper() ); else in = cc->in( QChar(ch).lower() ) || cc->in( QChar(ch).upper() );#endif#ifndef QT_NO_REGEXP_BACKREF } else { /* ( (m & BackRefBit) != 0 ) */ int bref = m ^ BackRefBit; int ell = j * ncap + ( bref - 1 ); in = bref <= ncap && mmCurCapBegin[ell] != EmptyCapture; if ( in ) { if ( cs ) in = ( mmIn[mmPos + mmCurCapBegin[ell]] == QChar(ch) ); else in = ( mmIn[mmPos + mmCurCapBegin[ell]].lower() == QChar(ch).lower() ); } if ( in ) { int delta; if ( mmCurCapEnd[ell] == EmptyCapture ) delta = i - mmCurCapBegin[ell]; else delta = mmCurCapEnd[ell] - mmCurCapBegin[ell]; in = ( delta <= mmLen - (mmPos + i) ); if ( in && delta > 1 ) { int n = 1; if ( cs ) { while ( n < delta ) { if ( mmIn[mmPos + mmCurCapBegin[ell] + n] != mmIn[mmPos + i + n] ) break; n++; } } else { while ( n < delta ) { QChar a = mmIn[mmPos + mmCurCapBegin[ell] + n]; QChar b = mmIn[mmPos + i + n]; if ( a.lower() != b.lower() ) break; n++; } } in = ( n == delta ); if ( in ) needSomeSleep = delta - 1; } }#endif } } /* We must now update our data structures. */ if ( in ) {#ifndef QT_NO_REGEXP_CAPTURE int *capBegin, *capEnd;#endif /* If the next state was not encountered yet, all is fine. */ if ( (m = mmInNextStack[next]) == -1 ) { m = nnext++; mmNextStack[m] = next; mmInNextStack[next] = m;#ifndef QT_NO_REGEXP_CAPTURE capBegin = mmNextCapBegin + m * ncap; capEnd = mmNextCapEnd + m * ncap; /* Otherwise, we'll first maintain captures in temporary arrays, and decide at the end whether it's best to keep the previous capture zones or the new ones. */ } else { capBegin = mmTempCapBegin; capEnd = mmTempCapEnd;#endif }#ifndef QT_NO_REGEXP_CAPTURE /* Updating the capture zones is much of a task. */ if ( ncap > 0 ) { memcpy( capBegin, mmCurCapBegin + j * ncap, ncap * sizeof(int) ); memcpy( capEnd, mmCurCapEnd + j * ncap, ncap * sizeof(int) ); int c = scur->atom, n = snext->atom; int p = -1, q = -1; int cap; /* Lemma 1. For any x in the range [0..nf), we have f[x].parent < x. Proof. By looking at startAtom(), it is clear that cf < nf holds all the time, and thus that f[nf].parent < nf. */ /* If we are reentering an atom, we empty all capture zones inside it. */ if ( scur->reenter != 0 && (q = at(*scur->reenter, next)) != 0 ) { QBitArray b; b.fill( FALSE, nf ); b.setBit( q, TRUE ); for ( int ell = q + 1; ell < nf; ell++ ) { if ( b.testBit(f[ell].parent) ) { b.setBit( ell, TRUE ); cap = f[ell].capture; if ( cap >= 0 ) { capBegin[cap] = EmptyCapture; capEnd[cap] = EmptyCapture; } } } p = f[q].parent; /* Otherwise, close the capture zones we are leaving. We are leaving f[c].capture, f[f[c].parent].capture, f[f[f[c].parent].parent].capture, ..., until f[x].capture, with x such that f[x].parent is the youngest common ancestor for c and n. We go up along c's and n's ancestry until we find x.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -