⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 qregexp.cpp

📁 qt-x11-opensource-src-4.1.4.tar.gz源码
💻 CPP
📖 第 1 页 / 共 5 页
字号:
#ifndef QT_NO_REGEXP_BACKREF        void set(int bref);#endif        void cat(const Box &b);        void orx(const Box &b);        void plus(int atom);        void opt();        void catAnchor(int a);#ifndef QT_NO_REGEXP_OPTIM        void setupHeuristics();#endif#if defined(QT_DEBUG)        void dump() const;#endif    private:        void addAnchorsToEngine(const Box &to) const;        QRegExpEngine *eng; // the automaton under construction        QVector<int> ls; // the left states (firstpos)        QVector<int> rs; // the right states (lastpos)        QMap<int, int> lanchors; // the left anchors        QMap<int, int> ranchors; // the right anchors        int skipanchors; // the anchors to match if the box is skipped#ifndef QT_NO_REGEXP_OPTIM        int earlyStart; // the index where str can first occur        int lateStart; // the index where str can last occur        QString str; // a string that has to occur in any match        QString leftStr; // a string occurring at the left of this box        QString rightStr; // a string occurring at the right of this box        int maxl; // the maximum length of this box (possibly InftyLen)#endif        int minl; // the minimum length of this box#ifndef QT_NO_REGEXP_OPTIM        QVector<int> occ1; // first-occurrence array#endif    };    friend class Box;    /*      This is the lexical analyzer for regular expressions.    */    enum { Tok_Eos, Tok_Dollar, Tok_LeftParen, Tok_MagicLeftParen, Tok_PosLookahead,           Tok_NegLookahead, Tok_RightParen, Tok_CharClass, Tok_Caret, Tok_Quantifier, Tok_Bar,           Tok_Word, Tok_NonWord, Tok_Char = 0x10000, Tok_BackRef = 0x20000 };    int getChar();    int getEscape();#ifndef QT_NO_REGEXP_INTERVAL    int getRep(int def);#endif#ifndef QT_NO_REGEXP_LOOKAHEAD    void skipChars(int n);#endif    void error(const char *msg);    void startTokenizer(const QChar *rx, int len);    int getToken();    const QChar *yyIn; // a pointer to the input regular expression pattern    int yyPos0; // the position of yyTok in the input pattern    int yyPos; // the position of the next character to read    int yyLen; // the length of yyIn    int yyCh; // the last character read    CharClass *yyCharClass; // attribute for Tok_CharClass tokens    int yyMinRep; // attribute for Tok_Quantifier    int yyMaxRep; // ditto    QString yyError; // syntax error or overflow during parsing?    /*      This is the syntactic analyzer for regular expressions.    */    int parse(const QChar *rx, int len);    void parseAtom(Box *box);    void parseFactor(Box *box);    void parseTerm(Box *box);    void parseExpression(Box *box);    int yyTok; // the last token read    bool yyMayCapture; // set this to false to disable capturing    /*      This is the engine state during matching.    */    const QString *mmStr; // a pointer to the input QString    const QChar *mmIn; // a pointer to the input string data    int mmPos; // the current position in the string    int mmCaretPos;    int mmLen; // the length of the input string    bool mmMinimal; // minimal matching?    QVector<int> mmBigArray; // big QVector<int> array    int *mmInNextStack; // is state is mmNextStack?    int *mmCurStack; // stack of current states    int *mmNextStack; // stack of next states    int *mmCurCapBegin; // start of current states' captures    int *mmNextCapBegin; // start of next states' captures    int *mmCurCapEnd; // end of current states' captures    int *mmNextCapEnd; // end of next states' captures    int *mmTempCapBegin; // start of temporary captures    int *mmTempCapEnd; // end of temporary captures    int *mmCapBegin; // start of captures for a next state    int *mmCapEnd; // end of captures for a next state    int *mmSlideTab; // bump-along slide table for bad-character heuristic    int mmSlideTabSize; // size of slide table#ifndef QT_NO_REGEXP_BACKREF    QList<QVector<int> > mmSleeping; // list of back-reference sleepers#endif    int mmMatchLen; // length of match    int mmOneTestMatchedLen; // length of partial match};QRegExpEngine::QRegExpEngine(const QString &rx, Qt::CaseSensitivity cs){    setup(cs);    valid = (parse(rx.unicode(), rx.length()) == rx.length());    if (!valid) {#ifndef QT_NO_REGEXP_OPTIM        trivial = false;#endif        error(RXERR_LEFTDELIM);    }}QRegExpEngine::~QRegExpEngine(){    while (!s.isEmpty())        delete s.takeFirst();#ifndef QT_NO_REGEXP_CCLASS    while (!cl.isEmpty())        delete cl.takeFirst();#endif#ifndef QT_NO_REGEXP_LOOKAHEAD    while (!ahead.isEmpty())        delete ahead.takeFirst();#endif}/*  Tries to match in str and returns an array of (begin, length) pairs  for captured text. If there is no match, all pairs are (-1, -1).*/void QRegExpEngine::match(const QString &str, int pos, bool minimal, bool oneTest, int caretIndex,                          QVector<int> &captured){    bool matched = false;    QChar char_null;#ifndef QT_NO_REGEXP_OPTIM    if (trivial && !oneTest) {        mmPos = str.indexOf(goodStr, pos, cs);        mmMatchLen = goodStr.length();        matched = (mmPos != -1);    } else#endif    {        mmStr = &str;        mmIn = str.unicode();        if (mmIn == 0)            mmIn = &char_null;        mmPos = pos;        mmCaretPos = caretIndex;        mmLen = str.length();        mmMinimal = minimal;        mmMatchLen = 0;        mmOneTestMatchedLen = 0;        if (valid && mmPos >= 0 && mmPos <= mmLen) {#ifndef QT_NO_REGEXP_OPTIM            if (oneTest) {                matched = matchHere();            } else {                if (mmPos <= mmLen - minl) {                    if (caretAnchored) {                        matched = matchHere();                    } else if (useGoodStringHeuristic) {                        matched = goodStringMatch();                    } else {                        matched = badCharMatch();                    }                }            }#else            matched = oneTest ? matchHere() : bruteMatch();#endif        }    }    int capturedSize = 2 + 2 * officialncap;    captured.resize(capturedSize);    if (matched) {        int *c = captured.data();        c[0] = mmPos;        c[1] = mmMatchLen;        for (int j = 0; j < officialncap; j++) {            int len = mmCapEnd[j] - mmCapBegin[j];            c[2 + 2 * j] = len > 0 ? mmPos + mmCapBegin[j] : 0;            c[2 + 2 * j + 1] = len;        }    } else {        // we rely on 2's complement here        memset(captured.data(), -1, capturedSize * sizeof(int));    }}/*  The three following functions add one state to the automaton and  return the number of the state.*/int QRegExpEngine::createState(QChar ch){    return setupState(ch.unicode());}int QRegExpEngine::createState(const CharClass &cc){#ifndef QT_NO_REGEXP_CCLASS    int n = cl.size();    cl += new CharClass(cc);    return setupState(CharClassBit | n);#else    Q_UNUSED(cc);    return setupState(CharClassBit);#endif}#ifndef QT_NO_REGEXP_BACKREFint QRegExpEngine::createState(int bref){    if (bref > nbrefs) {        nbrefs = bref;        if (nbrefs > MaxBackRefs) {            error(RXERR_LIMIT);            return 0;        }    }    return setupState(BackRefBit | bref);}#endif/*  The two following functions add a transition between all pairs of  states (i, j) where i is fond in from, and j is found in to.  Cat-transitions are distinguished from plus-transitions for  capturing.*/void QRegExpEngine::addCatTransitions(const QVector<int> &from, const QVector<int> &to){    for (int i = 0; i < from.size(); i++) {        State *st = s.at(from.at(i));        mergeInto(&st->outs, to);    }}#ifndef QT_NO_REGEXP_CAPTUREvoid QRegExpEngine::addPlusTransitions(const QVector<int> &from, const QVector<int> &to, int atom){    for (int i = 0; i < from.size(); i++) {        State *st = s.at(from.at(i));        QVector<int> oldOuts = st->outs;        mergeInto(&st->outs, to);        if (f.at(atom).capture >= 0) {            if (st->reenter == 0)                st->reenter = new QMap<int, int>;            for (int j = 0; j < to.size(); j++) {                if (!st->reenter->contains(to[j]) &&                     qBinaryFind(oldOuts.begin(), oldOuts.end(), to[j]) == oldOuts.end())                    st->reenter->insert(to[j], atom);            }        }    }}#endif#ifndef QT_NO_REGEXP_ANCHOR_ALT/*  Returns an anchor that means a OR b.*/int QRegExpEngine::anchorAlternation(int a, int b){    if (((a & b) == a || (a & b) == b) && ((a | b) & Anchor_Alternation) == 0)        return a & b;    int n = aa.size();#ifndef QT_NO_REGEXP_OPTIM    if (n > 0 && aa.at(n - 1).a == a && aa.at(n - 1).b == b)        return Anchor_Alternation | (n - 1);#endif    aa.resize(n + 1);    aa[n].a = a;    aa[n].b = b;    return Anchor_Alternation | n;}/*  Returns an anchor that means a AND b.*/int QRegExpEngine::anchorConcatenation(int a, int b){    if (((a | b) & Anchor_Alternation) == 0)        return a | b;    if ((b & Anchor_Alternation) != 0)        qSwap(a, b);    int aprime = anchorConcatenation(aa.at(a ^ Anchor_Alternation).a, b);    int bprime = anchorConcatenation(aa.at(a ^ Anchor_Alternation).b, b);    return anchorAlternation(aprime, bprime);}#endif/*  Adds anchor a on a transition caracterised by its from state and  its to state.*/void QRegExpEngine::addAnchors(int from, int to, int a){    State *st = s.at(from);    if (st->anchors == 0)        st->anchors = new QMap<int, int>;    if (st->anchors->contains(to))        a = anchorAlternation((*st->anchors)[to], a);    st->anchors->insert(to, a);}#ifndef QT_NO_REGEXP_OPTIM/*  This function chooses between the good-string and the bad-character  heuristics. It computes two scores and chooses the heuristic with  the highest score.  Here are some common-sense constraints on the scores that should be  respected if the formulas are ever modified: (1) If goodStr is  empty, the good-string heuristic scores 0. (2) If the regular  expression is trivial, the good-string heuristic should be used.  (3) If the search is case insensitive, the good-string heuristic  should be used, unless it scores 0. (Case insensitivity turns all  entries of occ1 to 0.) (4) If (goodLateStart - goodEarlyStart) is  big, the good-string heuristic should score less.*/void QRegExpEngine::heuristicallyChooseHeuristic(){    if (minl == 0) {        useGoodStringHeuristic = false;    } else if (trivial) {        useGoodStringHeuristic = true;    } else {        /*          Magic formula: The good string has to constitute a good          proportion of the minimum-length string, and appear at a          more-or-less known index.        */        int goodStringScore = (64 * goodStr.length() / minl) -                              (goodLateStart - goodEarlyStart);        /*          Less magic formula: We pick some characters at random, and          check whether they are good or bad.        */        int badCharScore = 0;        int step = qMax(1, NumBadChars / 32);        for (int i = 1; i < NumBadChars; i += step) {            if (occ1[i] == NoOccurrence)                badCharScore += minl;            else                badCharScore += occ1[i];        }        badCharScore /= minl;        useGoodStringHeuristic = (goodStringScore > badCharScore);    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -