📄 qregexp.cpp
字号:
QVector<int> c(csize); int i = 0, j = 0, k = 0; while (i < asize) { if (j < bsize) { if (a->at(i) == b[j]) { ++i; --csize; } else if (a->at(i) < b[j]) { c[k++] = a->at(i++); } else { c[k++] = b[j++]; } } else { memcpy(c.data() + k, a->constData() + i, (asize - i) * sizeof(int)); break; } } c.resize(csize); if (j < bsize) memcpy(c.data() + k, b.constData() + j, (bsize - j) * sizeof(int)); *a = c; }}/* Merges two disjoint QMaps of (int, int) pairs and puts the result into the first one.*/static void mergeInto(QMap<int, int> *a, const QMap<int, int> &b){ for (QMap<int, int>::ConstIterator it = b.constBegin(); it != b.constEnd(); ++it) a->insert(it.key(), *it);}#ifndef QT_NO_REGEXP_WILDCARD/* Translates a wildcard pattern to an equivalent regular expression pattern (e.g., *.cpp to .*\.cpp).*/static QString wc2rx(const QString &wc_str){ int wclen = wc_str.length(); QString rx = QLatin1String(""); int i = 0; const QChar *wc = wc_str.unicode(); while (i < wclen) { QChar c = wc[i++]; switch (c.unicode()) { case '*': rx += QLatin1String(".*"); break; case '?': rx += QLatin1Char('.'); break; case '$': case '(': case ')': case '+': case '.': case '\\': case '^': case '{': case '|': case '}': rx += QLatin1Char('\\'); rx += c; break; case '[': rx += c; if (wc[i] == QLatin1Char('^')) rx += wc[i++]; if (i < wclen) { if (rx[i] == QLatin1Char(']')) rx += wc[i++]; while (i < wclen && wc[i] != QLatin1Char(']')) { if (wc[i] == QLatin1Char('\\')) rx += QLatin1Char('\\'); rx += wc[i++]; } } break; default: rx += c; } } return rx;}#endifstatic int caretIndex(int offset, QRegExp::CaretMode caretMode){ if (caretMode == QRegExp::CaretAtZero) { return 0; } else if (caretMode == QRegExp::CaretAtOffset) { return offset; } else { // QRegExp::CaretWontMatch return -1; }}/* The class QRegExpEngine encapsulates a modified nondeterministic finite automaton (NFA).*/class QRegExpEngine{public:#ifndef QT_NO_REGEXP_CCLASS /* The class CharClass represents a set of characters, such as can be found in regular expressions (e.g., [a-z] denotes the set {a, b, ..., z}). */ class CharClass { public: CharClass(); CharClass(const CharClass &cc) { operator=(cc); } CharClass &operator=(const CharClass &cc); void clear(); bool negative() const { return n; } void setNegative(bool negative); void addCategories(int cats); void addRange(ushort from, ushort to); void addSingleton(ushort ch) { addRange(ch, ch); } bool in(QChar ch) const;#ifndef QT_NO_REGEXP_OPTIM const QVector<int> &firstOccurrence() const { return occ1; }#endif#if defined(QT_DEBUG) void dump() const;#endif /* The struct Range represents a range of characters (e.g., [0-9] denotes range 48 to 57). */ struct Range { ushort from; // 48 ushort to; // 57 }; private: int c; // character classes QVector<Range> r; // character ranges bool n; // negative?#ifndef QT_NO_REGEXP_OPTIM QVector<int> occ1; // first-occurrence array#endif };#else struct CharClass { int dummy;#ifndef QT_NO_REGEXP_OPTIM CharClass() { occ1.fill(0, NumBadChars); } const QVector<int> &firstOccurrence() const { return occ1; } QVector<int> occ1;#endif };#endif QRegExpEngine(Qt::CaseSensitivity cs) { setup(cs); } QRegExpEngine(const QString &rx, Qt::CaseSensitivity cs); ~QRegExpEngine(); bool isValid() const { return valid; } Qt::CaseSensitivity caseSensitivity() const { return cs; } const QString &errorString() const { return yyError; } int numCaptures() const { return officialncap; } void match(const QString &str, int pos, bool minimal, bool oneTest, int caretIndex, QVector<int> &captured); int partialMatchLength() const { return mmOneTestMatchedLen; } int createState(QChar ch); int createState(const CharClass &cc);#ifndef QT_NO_REGEXP_BACKREF int createState(int bref);#endif void addCatTransitions(const QVector<int> &from, const QVector<int> &to);#ifndef QT_NO_REGEXP_CAPTURE void addPlusTransitions(const QVector<int> &from, const QVector<int> &to, int atom);#endif#ifndef QT_NO_REGEXP_ANCHOR_ALT int anchorAlternation(int a, int b); int anchorConcatenation(int a, int b);#else int anchorAlternation(int a, int b) { return a & b; } int anchorConcatenation(int a, int b) { return a | b; }#endif void addAnchors(int from, int to, int a);#ifndef QT_NO_REGEXP_OPTIM void heuristicallyChooseHeuristic();#endif#if defined(QT_DEBUG) void dump() const;#endif int ref;private: enum { CharClassBit = 0x10000, BackRefBit = 0x20000 }; /* The struct State represents one state in a modified NFA. The input characters matched are stored in the state instead of on the transitions, something possible for an automaton constructed from a regular expression. */ struct State {#ifndef QT_NO_REGEXP_CAPTURE int atom; // which atom does this state belong to?#endif int match; // what does it match? (see CharClassBit and BackRefBit) QVector<int> outs; // out-transitions QMap<int, int> *reenter; // atoms reentered when transiting out QMap<int, int> *anchors; // anchors met when transiting out#ifndef QT_NO_REGEXP_CAPTURE State(int a, int m) : atom(a), match(m), reenter(0), anchors(0) { }#else State(int m) : match(m), reenter(0), anchors(0) { }#endif ~State() { delete reenter; delete anchors; } };#ifndef QT_NO_REGEXP_LOOKAHEAD /* The struct Lookahead represents a lookahead a la Perl (e.g., (?=foo) and (?!bar)). */ struct Lookahead { QRegExpEngine *eng; // NFA representing the embedded regular expression bool neg; // negative lookahead? Lookahead(QRegExpEngine *eng0, bool neg0) : eng(eng0), neg(neg0) { } ~Lookahead() { delete eng; } };#endif#ifndef QT_NO_REGEXP_CAPTURE /* The struct Atom represents one node in the hierarchy of regular expression atoms. */ struct Atom { int parent; // index of parent in array of atoms int capture; // index of capture, from 1 to ncap };#endif#ifndef QT_NO_REGEXP_ANCHOR_ALT /* The struct AnchorAlternation represents a pair of anchors with OR semantics. */ struct AnchorAlternation { int a; // this anchor... int b; // ...or this one };#endif enum { InitialState = 0, FinalState = 1 }; void setup(Qt::CaseSensitivity cs); int setupState(int match); /* Let's hope that 13 lookaheads and 14 back-references are enough. */ enum { MaxLookaheads = 13, MaxBackRefs = 14 }; enum { Anchor_Dollar = 0x00000001, Anchor_Caret = 0x00000002, Anchor_Word = 0x00000004, Anchor_NonWord = 0x00000008, Anchor_FirstLookahead = 0x00000010, Anchor_BackRef1Empty = Anchor_FirstLookahead << MaxLookaheads, Anchor_BackRef0Empty = Anchor_BackRef1Empty >> 1, Anchor_Alternation = Anchor_BackRef1Empty << MaxBackRefs, Anchor_LookaheadMask = (Anchor_FirstLookahead - 1) ^ ((Anchor_FirstLookahead << MaxLookaheads) - 1) };#ifndef QT_NO_REGEXP_CAPTURE int startAtom(bool capture); void finishAtom(int atom) { cf = f[atom].parent; }#endif#ifndef QT_NO_REGEXP_LOOKAHEAD int addLookahead(QRegExpEngine *eng, bool negative);#endif#ifndef QT_NO_REGEXP_CAPTURE bool isBetterCapture(const int *begin1, const int *end1, const int *begin2, const int *end2);#endif bool testAnchor(int i, int a, const int *capBegin);#ifndef QT_NO_REGEXP_OPTIM bool goodStringMatch(); bool badCharMatch();#else bool bruteMatch();#endif bool matchHere(); QList<State *> s; // array of states#ifndef QT_NO_REGEXP_CAPTURE QVector<Atom> f; // atom hierarchy int nf; // number of atoms int cf; // current atom#endif int officialncap; // number of captures, seen from the outside int ncap; // number of captures, seen from the inside#ifndef QT_NO_REGEXP_CCLASS QList<CharClass *> cl; // array of character classes#endif#ifndef QT_NO_REGEXP_LOOKAHEAD QList<Lookahead *> ahead; // array of lookaheads#endif#ifndef QT_NO_REGEXP_ANCHOR_ALT QVector<AnchorAlternation> aa; // array of (a, b) pairs of anchors#endif#ifndef QT_NO_REGEXP_OPTIM bool caretAnchored; // does the regexp start with ^? bool trivial; // is the good-string all that needs to match?#endif bool valid; // is the regular expression valid? Qt::CaseSensitivity cs; // case sensitive?#ifndef QT_NO_REGEXP_BACKREF int nbrefs; // number of back-references#endif#ifndef QT_NO_REGEXP_OPTIM bool useGoodStringHeuristic; // use goodStringMatch? otherwise badCharMatch int goodEarlyStart; // the index where goodStr can first occur in a match int goodLateStart; // the index where goodStr can last occur in a match QString goodStr; // the string that any match has to contain int minl; // the minimum length of a match QVector<int> occ1; // first-occurrence array#endif /* The class Box is an abstraction for a regular expression fragment. It can also be seen as one node in the syntax tree of a regular expression with synthetized attributes. Its interface is ugly for performance reasons. */ class Box { public: Box(QRegExpEngine *engine); Box(const Box &b) { operator=(b); } Box &operator=(const Box &b); void clear() { operator=(Box(eng)); } void set(QChar ch); void set(const CharClass &cc);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -