⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 qregexp.cpp

📁 奇趣公司比较新的qt/emd版本
💻 CPP
📖 第 1 页 / 共 5 页
字号:
private:    enum { CharClassBit = 0x10000, BackRefBit = 0x20000 };    enum { InitialState = 0, FinalState = 1 };    void setup();    int setupState(int match);    /*      Let's hope that 13 lookaheads and 14 back-references are      enough.     */    enum { MaxLookaheads = 13, MaxBackRefs = 14 };    enum { Anchor_Dollar = 0x00000001, Anchor_Caret = 0x00000002, Anchor_Word = 0x00000004,           Anchor_NonWord = 0x00000008, Anchor_FirstLookahead = 0x00000010,           Anchor_BackRef1Empty = Anchor_FirstLookahead << MaxLookaheads,           Anchor_BackRef0Empty = Anchor_BackRef1Empty >> 1,           Anchor_Alternation = Anchor_BackRef1Empty << MaxBackRefs,           Anchor_LookaheadMask = (Anchor_FirstLookahead - 1) ^                   ((Anchor_FirstLookahead << MaxLookaheads) - 1) };#ifndef QT_NO_REGEXP_CAPTURE    int startAtom(bool officialCapture);    void finishAtom(int atom, bool needCapture);#endif#ifndef QT_NO_REGEXP_LOOKAHEAD    int addLookahead(QRegExpEngine *eng, bool negative);#endif#ifndef QT_NO_REGEXP_OPTIM    bool goodStringMatch(QRegExpMatchState &matchState) const;    bool badCharMatch(QRegExpMatchState &matchState) const;#else    bool bruteMatch(QRegExpMatchState &matchState) const;#endif    QVector<QRegExpAutomatonState> s; // array of states#ifndef QT_NO_REGEXP_CAPTURE    QVector<QRegExpAtom> f; // atom hierarchy    int nf; // number of atoms    int cf; // current atom    QVector<int> captureForOfficialCapture;#endif    int officialncap; // number of captures, seen from the outside    int ncap; // number of captures, seen from the inside#ifndef QT_NO_REGEXP_CCLASS    QVector<QRegExpCharClass> cl; // array of character classes#endif#ifndef QT_NO_REGEXP_LOOKAHEAD    QVector<QRegExpLookahead *> ahead; // array of lookaheads#endif#ifndef QT_NO_REGEXP_ANCHOR_ALT    QVector<QRegExpAnchorAlternation> aa; // array of (a, b) pairs of anchors#endif#ifndef QT_NO_REGEXP_OPTIM    bool caretAnchored; // does the regexp start with ^?    bool trivial; // is the good-string all that needs to match?#endif    bool valid; // is the regular expression valid?    Qt::CaseSensitivity cs; // case sensitive?    bool greedyQuantifiers; // RegExp2?#ifndef QT_NO_REGEXP_BACKREF    int nbrefs; // number of back-references#endif#ifndef QT_NO_REGEXP_OPTIM    bool useGoodStringHeuristic; // use goodStringMatch? otherwise badCharMatch    int goodEarlyStart; // the index where goodStr can first occur in a match    int goodLateStart; // the index where goodStr can last occur in a match    QString goodStr; // the string that any match has to contain    int minl; // the minimum length of a match    QVector<int> occ1; // first-occurrence array#endif    /*      The class Box is an abstraction for a regular expression      fragment. It can also be seen as one node in the syntax tree of      a regular expression with synthetized attributes.      Its interface is ugly for performance reasons.    */    class Box    {    public:        Box(QRegExpEngine *engine);        Box(const Box &b) { operator=(b); }        Box &operator=(const Box &b);        void clear() { operator=(Box(eng)); }        void set(QChar ch);        void set(const QRegExpCharClass &cc);#ifndef QT_NO_REGEXP_BACKREF        void set(int bref);#endif        void cat(const Box &b);        void orx(const Box &b);        void plus(int atom);        void opt();        void catAnchor(int a);#ifndef QT_NO_REGEXP_OPTIM        void setupHeuristics();#endif#if defined(QT_DEBUG)        void dump() const;#endif    private:        void addAnchorsToEngine(const Box &to) const;        QRegExpEngine *eng; // the automaton under construction        QVector<int> ls; // the left states (firstpos)        QVector<int> rs; // the right states (lastpos)        QMap<int, int> lanchors; // the left anchors        QMap<int, int> ranchors; // the right anchors        int skipanchors; // the anchors to match if the box is skipped#ifndef QT_NO_REGEXP_OPTIM        int earlyStart; // the index where str can first occur        int lateStart; // the index where str can last occur        QString str; // a string that has to occur in any match        QString leftStr; // a string occurring at the left of this box        QString rightStr; // a string occurring at the right of this box        int maxl; // the maximum length of this box (possibly InftyLen)#endif        int minl; // the minimum length of this box#ifndef QT_NO_REGEXP_OPTIM        QVector<int> occ1; // first-occurrence array#endif    };    friend class Box;    /*      This is the lexical analyzer for regular expressions.    */    enum { Tok_Eos, Tok_Dollar, Tok_LeftParen, Tok_MagicLeftParen, Tok_PosLookahead,           Tok_NegLookahead, Tok_RightParen, Tok_CharClass, Tok_Caret, Tok_Quantifier, Tok_Bar,           Tok_Word, Tok_NonWord, Tok_Char = 0x10000, Tok_BackRef = 0x20000 };    int getChar();    int getEscape();#ifndef QT_NO_REGEXP_INTERVAL    int getRep(int def);#endif#ifndef QT_NO_REGEXP_LOOKAHEAD    void skipChars(int n);#endif    void error(const char *msg);    void startTokenizer(const QChar *rx, int len);    int getToken();    const QChar *yyIn; // a pointer to the input regular expression pattern    int yyPos0; // the position of yyTok in the input pattern    int yyPos; // the position of the next character to read    int yyLen; // the length of yyIn    int yyCh; // the last character read    QRegExpCharClass *yyCharClass; // attribute for Tok_CharClass tokens    int yyMinRep; // attribute for Tok_Quantifier    int yyMaxRep; // ditto    QString yyError; // syntax error or overflow during parsing?    /*      This is the syntactic analyzer for regular expressions.    */    int parse(const QChar *rx, int len);    void parseAtom(Box *box);    void parseFactor(Box *box);    void parseTerm(Box *box);    void parseExpression(Box *box);    int yyTok; // the last token read    bool yyMayCapture; // set this to false to disable capturing    friend struct QRegExpMatchState;};#ifndef QT_NO_REGEXP_LOOKAHEAD/*  The struct QRegExpLookahead represents a lookahead a la Perl (e.g.,  (?=foo) and (?!bar)).*/struct QRegExpLookahead{    QRegExpEngine *eng; // NFA representing the embedded regular expression    bool neg; // negative lookahead?    inline QRegExpLookahead(QRegExpEngine *eng0, bool neg0)        : eng(eng0), neg(neg0) { }    inline ~QRegExpLookahead() { delete eng; }};#endifQRegExpEngine::QRegExpEngine(const QRegExpEngineKey &key)    : cs(key.cs), greedyQuantifiers(key.patternSyntax == QRegExp::RegExp2){    setup();    QString rx;    switch (key.patternSyntax) {    case QRegExp::Wildcard:#ifndef QT_NO_REGEXP_WILDCARD        rx = wc2rx(key.pattern);#endif        break;    case QRegExp::FixedString:        rx = QRegExp::escape(key.pattern);        break;    default:        rx = key.pattern;    }    valid = (parse(rx.unicode(), rx.length()) == rx.length());    if (!valid) {#ifndef QT_NO_REGEXP_OPTIM        trivial = false;#endif        error(RXERR_LEFTDELIM);    }}QRegExpEngine::~QRegExpEngine(){#ifndef QT_NO_REGEXP_LOOKAHEAD    qDeleteAll(ahead);#endif}void QRegExpMatchState::prepareForMatch(QRegExpEngine *eng){    /*      We use one QVector<int> for all the big data used a lot in      matchHere() and friends.    */    int ns = eng->s.size(); // number of states    int ncap = eng->ncap;#ifndef QT_NO_REGEXP_OPTIM    slideTabSize = qMax(eng->minl + 1, 16);#else    slideTabSize = 0;#endif    bigArray.resize((3 + 4 * ncap) * ns + 4 * ncap + slideTabSize);    inNextStack = bigArray.data();    memset(inNextStack, -1, ns * sizeof(int));    curStack = inNextStack + ns;    nextStack = inNextStack + 2 * ns;    curCapBegin = inNextStack + 3 * ns;    nextCapBegin = curCapBegin + ncap * ns;    curCapEnd = curCapBegin + 2 * ncap * ns;    nextCapEnd = curCapBegin + 3 * ncap * ns;    tempCapBegin = curCapBegin + 4 * ncap * ns;    tempCapEnd = tempCapBegin + ncap;    capBegin = tempCapBegin + 2 * ncap;    capEnd = tempCapBegin + 3 * ncap;    slideTab = tempCapBegin + 4 * ncap;    this->eng = eng;}/*  Tries to match in str and returns an array of (begin, length) pairs  for captured text. If there is no match, all pairs are (-1, -1).*/void QRegExpMatchState::match(const QString &str0, int pos0, bool minimal0, bool oneTest,                              int caretIndex){    bool matched = false;    QChar char_null;#ifndef QT_NO_REGEXP_OPTIM    if (eng->trivial && !oneTest) {        pos = str0.indexOf(eng->goodStr, pos0, eng->cs);        matchLen = eng->goodStr.length();        matched = (pos != -1);    } else#endif    {        str = &str0;        in = str0.unicode();        if (in == 0)            in = &char_null;        pos = pos0;        caretPos = caretIndex;        len = str0.length();        minimal = minimal0;        matchLen = 0;        oneTestMatchedLen = 0;        if (eng->valid && pos >= 0 && pos <= len) {#ifndef QT_NO_REGEXP_OPTIM            if (oneTest) {                matched = matchHere();            } else {                if (pos <= len - eng->minl) {                    if (eng->caretAnchored) {                        matched = matchHere();                    } else if (eng->useGoodStringHeuristic) {                        matched = eng->goodStringMatch(*this);                    } else {                        matched = eng->badCharMatch(*this);                    }                }            }#else            matched = oneTest ? matchHere() : eng->bruteMatch(*this);#endif        }    }    int numCaptures = eng->numCaptures();    int capturedSize = 2 + 2 * numCaptures;    captured.resize(capturedSize);    if (matched) {        int *c = captured.data();        *c++ = pos;        *c++ = matchLen;#ifndef QT_NO_REGEXP_CAPTURE        for (int i = 0; i < numCaptures; ++i) {            int j = eng->captureForOfficialCapture.at(i);            int len = capEnd[j] - capBegin[j];            *c++ = (len > 0) ? pos + capBegin[j] : 0;            *c++ = len;        }#endif    } else {        // we rely on 2's complement here        memset(captured.data(), -1, capturedSize * sizeof(int));    }}/*  The three following functions add one state to the automaton and  return the number of the state.*/int QRegExpEngine::createState(QChar ch){    return setupState(ch.unicode());}int QRegExpEngine::createState(const QRegExpCharClass &cc){#ifndef QT_NO_REGEXP_CCLASS    int n = cl.size();    cl += QRegExpCharClass(cc);    return setupState(CharClassBit | n);#else    Q_UNUSED(cc);    return setupState(CharClassBit);#endif}#ifndef QT_NO_REGEXP_BACKREFint QRegExpEngine::createState(int bref){    if (bref > nbrefs) {        nbrefs = bref;        if (nbrefs > MaxBackRefs) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -