⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 qregexp.cpp

📁 Trolltech公司发布的图形界面操作系统。可在qt-embedded-2.3.10平台上编译为嵌入式图形界面操作系统。
💻 CPP
📖 第 1 页 / 共 5 页
字号:
	case '+':	case '.':	case '\\':	case '^':	case '{':	case '|':	case '}':	    rx += QChar( '\\' );	    rx += c;	    break;	case '[':	    rx += c;	    if ( wc[i] == QChar('^') )		rx += wc[i++];	    if ( i < wclen ) {		if ( rx[i] == ']' )		    rx += wc[i++];		while ( i < wclen && wc[i] != QChar(']') ) {		    if ( wc[i] == '\\' )			rx += QChar( '\\' );		    rx += wc[i++];		}	    }	    break;	default:	    rx += c;	}    }    return rx;}#endif/*  The class QRegExpEngine encapsulates a modified nondeterministic  finite automaton (NFA).*/class QRegExpEngine : public QShared{public:#ifndef QT_NO_REGEXP_CCLASS    /*      The class CharClass represents a set of characters, such as can      be found in regular expressions (e.g., [a-z] denotes the set      {a, b, ..., z}).    */    class CharClass    {    public:	CharClass();	CharClass( const CharClass& cc ) { operator=( cc ); }	CharClass& operator=( const CharClass& cc );	void clear();	bool negative() const { return n; }	void setNegative( bool negative );	void addCategories( int cats );	void addRange( ushort from, ushort to );	void addSingleton( ushort ch ) { addRange( ch, ch ); }	bool in( QChar ch ) const;#ifndef QT_NO_REGEXP_OPTIM	const QMemArray<int>& firstOccurrence() const { return occ1; }#endif#if defined(QT_DEBUG)	void dump() const;#endif    private:	/*	  The struct Range represents a range of characters (e.g.,	  [0-9] denotes range 48 to 57).	*/	struct Range	{	    ushort from; // 48	    ushort to; // 57	};	int c; // character classes	QMemArray<Range> r; // character ranges	bool n; // negative?#ifndef QT_NO_REGEXP_OPTIM	QMemArray<int> occ1; // first-occurrence array#endif    };#else    struct CharClass    {	int dummy;#ifndef QT_NO_REGEXP_OPTIM	CharClass() { occ1.fill( 0, NumBadChars ); }	const QMemArray<int>& firstOccurrence() const { return occ1; }	QMemArray<int> occ1;#endif    };#endif    QRegExpEngine( bool caseSensitive ) { setup( caseSensitive ); }    QRegExpEngine( const QString& rx, bool caseSensitive );#ifndef QT_NO_REGEXP_OPTIM    ~QRegExpEngine();#endif    bool isValid() const { return valid; }    bool caseSensitive() const { return cs; }    const QString& errorString() const { return yyError; }    int numCaptures() const { return officialncap; }    void match( const QString& str, int pos, bool minimal, bool oneTest,		int caretIndex, QMemArray<int>& captured );    int partialMatchLength() const { return mmOneTestMatchedLen; }    int createState( QChar ch );    int createState( const CharClass& cc );#ifndef QT_NO_REGEXP_BACKREF    int createState( int bref );#endif    void addCatTransitions( const QMemArray<int>& from,			    const QMemArray<int>& to );#ifndef QT_NO_REGEXP_CAPTURE    void addPlusTransitions( const QMemArray<int>& from,			     const QMemArray<int>& to, int atom );#endif#ifndef QT_NO_REGEXP_ANCHOR_ALT    int anchorAlternation( int a, int b );    int anchorConcatenation( int a, int b );#else    int anchorAlternation( int a, int b ) { return a & b; }    int anchorConcatenation( int a, int b ) { return a | b; }#endif    void addAnchors( int from, int to, int a );#ifndef QT_NO_REGEXP_OPTIM    void heuristicallyChooseHeuristic();#endif#if defined(QT_DEBUG)    void dump() const;#endifprivate:    enum { CharClassBit = 0x10000, BackRefBit = 0x20000 };    /*      The struct State represents one state in a modified NFA. The      input characters matched are stored in the state instead of on      the transitions, something possible for an automaton      constructed from a regular expression.    */    struct State    {#ifndef QT_NO_REGEXP_CAPTURE	int atom; // which atom does this state belong to?#endif	int match; // what does it match? (see CharClassBit and BackRefBit)	QMemArray<int> outs; // out-transitions	QMap<int, int> *reenter; // atoms reentered when transiting out	QMap<int, int> *anchors; // anchors met when transiting out#ifndef QT_NO_REGEXP_CAPTURE	State( int a, int m )	    : atom( a ), match( m ), reenter( 0 ), anchors( 0 ) { }#else	State( int m )	    : match( m ), reenter( 0 ), anchors( 0 ) { }#endif	~State() { delete reenter; delete anchors; }    };#ifndef QT_NO_REGEXP_LOOKAHEAD    /*      The struct Lookahead represents a lookahead a la Perl (e.g.,      (?=foo) and (?!bar)).    */    struct Lookahead    {	QRegExpEngine *eng; // NFA representing the embedded regular expression	bool neg; // negative lookahead?	Lookahead( QRegExpEngine *eng0, bool neg0 )	    : eng( eng0 ), neg( neg0 ) { }	~Lookahead() { delete eng; }    };#endif#ifndef QT_NO_REGEXP_CAPTURE    /*      The struct Atom represents one node in the hierarchy of regular      expression atoms.    */    struct Atom    {	int parent; // index of parent in array of atoms	int capture; // index of capture, from 1 to ncap    };#endif#ifndef QT_NO_REGEXP_ANCHOR_ALT    /*      The struct AnchorAlternation represents a pair of anchors with      OR semantics.    */    struct AnchorAlternation    {	int a; // this anchor...	int b; // ...or this one    };#endif    enum { InitialState = 0, FinalState = 1 };    void setup( bool caseSensitive );    int setupState( int match );    /*      Let's hope that 13 lookaheads and 14 back-references are      enough.     */    enum { MaxLookaheads = 13, MaxBackRefs = 14 };    enum { Anchor_Dollar = 0x00000001, Anchor_Caret = 0x00000002,	   Anchor_Word = 0x00000004, Anchor_NonWord = 0x00000008,	   Anchor_FirstLookahead = 0x00000010,	   Anchor_BackRef1Empty = Anchor_FirstLookahead << MaxLookaheads,	   Anchor_BackRef0Empty = Anchor_BackRef1Empty >> 1,	   Anchor_Alternation = Anchor_BackRef1Empty << MaxBackRefs,	   Anchor_LookaheadMask = ( Anchor_FirstLookahead - 1 ) ^		   ( (Anchor_FirstLookahead << MaxLookaheads) - 1 ) };#ifndef QT_NO_REGEXP_CAPTURE    int startAtom( bool capture );    void finishAtom( int atom ) { cf = f[atom].parent; }#endif#ifndef QT_NO_REGEXP_LOOKAHEAD    int addLookahead( QRegExpEngine *eng, bool negative );#endif#ifndef QT_NO_REGEXP_CAPTURE    bool isBetterCapture( const int *begin1, const int *end1, const int *begin2,			  const int *end2 );#endif    bool testAnchor( int i, int a, const int *capBegin );#ifndef QT_NO_REGEXP_OPTIM    bool goodStringMatch();    bool badCharMatch();#else    bool bruteMatch();#endif    bool matchHere();    QPtrVector<State> s; // array of states    int ns; // number of states#ifndef QT_NO_REGEXP_CAPTURE    QMemArray<Atom> f; // atom hierarchy    int nf; // number of atoms    int cf; // current atom#endif    int officialncap; // number of captures, seen from the outside    int ncap; // number of captures, seen from the inside#ifndef QT_NO_REGEXP_CCLASS    QPtrVector<CharClass> cl; // array of character classes#endif#ifndef QT_NO_REGEXP_LOOKAHEAD    QPtrVector<Lookahead> ahead; // array of lookaheads#endif#ifndef QT_NO_REGEXP_ANCHOR_ALT    QMemArray<AnchorAlternation> aa; // array of (a, b) pairs of anchors#endif#ifndef QT_NO_REGEXP_OPTIM    bool caretAnchored; // does the regexp start with ^?    bool trivial; // is the good-string all that needs to match?#endif    bool valid; // is the regular expression valid?    bool cs; // case sensitive?#ifndef QT_NO_REGEXP_BACKREF    int nbrefs; // number of back-references#endif#ifndef QT_NO_REGEXP_OPTIM    bool useGoodStringHeuristic; // use goodStringMatch? otherwise badCharMatch    int goodEarlyStart; // the index where goodStr can first occur in a match    int goodLateStart; // the index where goodStr can last occur in a match    QString goodStr; // the string that any match has to contain    int minl; // the minimum length of a match    QMemArray<int> occ1; // first-occurrence array#endif    /*      The class Box is an abstraction for a regular expression      fragment. It can also be seen as one node in the syntax tree of      a regular expression with synthetized attributes.      Its interface is ugly for performance reasons.    */    class Box    {    public:	Box( QRegExpEngine *engine );	Box( const Box& b ) { operator=( b ); }	Box& operator=( const Box& b );	void clear() { operator=( Box(eng) ); }	void set( QChar ch );	void set( const CharClass& cc );#ifndef QT_NO_REGEXP_BACKREF	void set( int bref );#endif	void cat( const Box& b );	void orx( const Box& b );	void plus( int atom );	void opt();	void catAnchor( int a );#ifndef QT_NO_REGEXP_OPTIM	void setupHeuristics();#endif#if defined(QT_DEBUG)	void dump() const;#endif    private:	void addAnchorsToEngine( const Box& to ) const;	QRegExpEngine *eng; // the automaton under construction	QMemArray<int> ls; // the left states (firstpos)	QMemArray<int> rs; // the right states (lastpos)	QMap<int, int> lanchors; // the left anchors	QMap<int, int> ranchors; // the right anchors	int skipanchors; // the anchors to match if the box is skipped#ifndef QT_NO_REGEXP_OPTIM	int earlyStart; // the index where str can first occur	int lateStart; // the index where str can last occur	QString str; // a string that has to occur in any match	QString leftStr; // a string occurring at the left of this box	QString rightStr; // a string occurring at the right of this box	int maxl; // the maximum length of this box (possibly InftyLen)#endif	int minl; // the minimum length of this box#ifndef QT_NO_REGEXP_OPTIM	QMemArray<int> occ1; // first-occurrence array#endif    };    friend class Box;    /*      This is the lexical analyzer for regular expressions.    */    enum { Tok_Eos, Tok_Dollar, Tok_LeftParen, Tok_MagicLeftParen,	   Tok_PosLookahead, Tok_NegLookahead, Tok_RightParen, Tok_CharClass,	   Tok_Caret, Tok_Quantifier, Tok_Bar, Tok_Word, Tok_NonWord,	   Tok_Char = 0x10000, Tok_BackRef = 0x20000 };    int getChar();    int getEscape();#ifndef QT_NO_REGEXP_INTERVAL    int getRep( int def );#endif#ifndef QT_NO_REGEXP_LOOKAHEAD    void skipChars( int n );#endif    void error( const char *msg );    void startTokenizer( const QChar *rx, int len );    int getToken();    const QChar *yyIn; // a pointer to the input regular expression pattern    int yyPos0; // the position of yyTok in the input pattern    int yyPos; // the position of the next character to read    int yyLen; // the length of yyIn    int yyCh; // the last character read    CharClass *yyCharClass; // attribute for Tok_CharClass tokens    int yyMinRep; // attribute for Tok_Quantifier    int yyMaxRep; // ditto    QString yyError; // syntax error or overflow during parsing?    /*      This is the syntactic analyzer for regular expressions.    */    int parse( const QChar *rx, int len );    void parseAtom( Box *box );    void parseFactor( Box *box );    void parseTerm( Box *box );    void parseExpression( Box *box );    int yyTok; // the last token read    bool yyMayCapture; // set this to FALSE to disable capturing    /*      This is the engine state during matching.    */    const QString *mmStr; // a pointer to the input QString    const QChar *mmIn; // a pointer to the input string data    int mmPos; // the current position in the string    int mmCaretPos;    int mmLen; // the length of the input string    bool mmMinimal; // minimal matching?    QMemArray<int> mmBigArray; // big QMemArray<int> array    int *mmInNextStack; // is state is mmNextStack?    int *mmCurStack; // stack of current states

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -