⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regexpr.pas

📁 正则表达式的分析解释控件
💻 PAS
📖 第 1 页 / 共 2 页
字号:
    regstart : REChar; // char that must begin a match; '\0' if none obvious
    reganch : REChar; // is the match anchored (at beginning-of-line only)?
    regmust : PRegExprChar; // string (pointer into program) that match must include, or nil
    regmlen : integer; // length of regmust string
    // Regstart and reganch permit very fast decisions on suitable starting points
    // for a match, cutting down the work a lot.  Regmust permits fast rejection
    // of lines that cannot possibly match.  The regmust tests are costly enough
    // that regcomp() supplies a regmust only if the r.e. contains something
    // potentially expensive (at present, the only such thing detected is * or +
    // at the start of the r.e., which can involve a lot of backup).  Regmlen is
    // supplied because the test in regexec() needs it and regcomp() is computing
    // it anyway.
    {$IFDEF UseFirstCharSet} //###0.929
    FirstCharSet : TSetOfREChar;
    {$ENDIF}

    // work variables for Exec's routins - save stack in recursion}
    reginput : PRegExprChar; // String-input pointer.
    fInputStart : PRegExprChar; // Pointer to first char of input string.
    fInputEnd : PRegExprChar; // Pointer to char AFTER last char of input string

    // work variables for compiler's routines
    regparse : PRegExprChar;  // Input-scan pointer.
    regnpar : integer; // count.
    regdummy : char;
    regcode : PRegExprChar;   // Code-emit pointer; @regdummy = don't.
    regsize : integer; // Code size.

    regexpbeg : PRegExprChar; // only for error handling. Contains
    // pointer to beginning of r.e. while compiling
    fExprIsCompiled : boolean; // true if r.e. successfully compiled

    // programm is essentially a linear encoding
    // of a nondeterministic finite-state machine (aka syntax charts or
    // "railroad normal form" in parsing technology).  Each node is an opcode
    // plus a "next" pointer, possibly plus an operand.  "Next" pointers of
    // all nodes except BRANCH implement concatenation; a "next" pointer with
    // a BRANCH on both ends of it is connecting two alternatives.  (Here we
    // have one of the subtle syntax dependencies:  an individual BRANCH (as
    // opposed to a collection of them) is never concatenated with anything
    // because of operator precedence.)  The operand of some types of node is
    // a literal string; for others, it is a node leading into a sub-FSM.  In
    // particular, the operand of a BRANCH node is the first node of the branch.
    // (NB this is *not* a tree structure:  the tail of the branch connects
    // to the thing following the set of BRANCHes.)  The opcodes are:
    programm : PRegExprChar; // Unwarranted chumminess with compiler.

    fExpression : PRegExprChar; // source of compiled r.e.
    fInputString : PRegExprChar; // input string

    fLastError : integer; // see Error, LastError

    fModifiers : integer; // modifiers
    fCompModifiers : integer; // compiler's copy of modifiers
    fProgModifiers : integer; // modifiers values from last programm compilation

    fSpaceChars : RegExprString; //###0.927
    fWordChars : RegExprString; //###0.929
    fInvertCase : TRegExprInvertCaseFunction; //###0.927

    fLineSeparators : RegExprString; //###0.941
    fLinePairedSeparatorAssigned : boolean;
    fLinePairedSeparatorHead,
    fLinePairedSeparatorTail : REChar;
    {$IFNDEF UniCode}
    fLineSeparatorsSet : set of REChar;
    {$ENDIF}

    procedure InvalidateProgramm;
    // Mark programm as have to be [re]compiled

    function IsProgrammOk : boolean; //###0.941
    // Check if we can use precompiled r.e. or
    // [re]compile it if something changed

    function GetExpression : RegExprString;
    procedure SetExpression (const s : RegExprString);

    function GetModifierStr : RegExprString;
    class function ParseModifiersStr (const AModifiers : RegExprString;
      var AModifiersInt : integer) : boolean; //###0.941 class function now
    // Parse AModifiers string and return true and set AModifiersInt
    // if it's in format 'ismxrg-ismxrg'.
    procedure SetModifierStr (const AModifiers : RegExprString);

    function GetModifier (AIndex : integer) : boolean;
    procedure SetModifier (AIndex : integer; ASet : boolean);

    procedure Error (AErrorID : integer); virtual; // error handler.
    // Default handler raise exception ERegExpr with
    // Message = ErrorMsg (AErrorID), ErrorCode = AErrorID
    // and CompilerErrorPos = value of property CompilerErrorPos.


    {==================== Compiler section ===================}
    function CompileRegExpr (exp : PRegExprChar) : boolean;
    // compile a regular expression into internal code

    procedure Tail (p : PRegExprChar; val : PRegExprChar);
    // set the next-pointer at the end of a node chain

    procedure OpTail (p : PRegExprChar; val : PRegExprChar);
    // regoptail - regtail on operand of first argument; nop if operandless

    function EmitNode (op : TREOp) : PRegExprChar;
    // regnode - emit a node, return location

    procedure EmitC (b : REChar);
    // emit (if appropriate) a byte of code

    procedure InsertOperator (op : TREOp; opnd : PRegExprChar; sz : integer); //###0.90
    // insert an operator in front of already-emitted operand
    // Means relocating the operand.

    function ParseReg (paren : integer; var flagp : integer) : PRegExprChar;
    // regular expression, i.e. main body or parenthesized thing

    function ParseBranch (var flagp : integer) : PRegExprChar;
    // one alternative of an | operator

    function ParsePiece (var flagp : integer) : PRegExprChar;
    // something followed by possible [*+?]

    function ParseAtom (var flagp : integer) : PRegExprChar;
    // the lowest level

    function GetCompilerErrorPos : integer;
    // current pos in r.e. - for error hanling

    {$IFDEF UseFirstCharSet} //###0.929
    procedure FillFirstCharSet (prog : PRegExprChar);
    {$ENDIF}

    {===================== Mathing section ===================}
    function regrepeat (p : PRegExprChar; AMax : integer) : integer;
    // repeatedly match something simple, report how many

    function regnext (p : PRegExprChar) : PRegExprChar;
    // dig the "next" pointer out of a node

    function MatchPrim (prog : PRegExprChar) : boolean;
    // recursively matching routine

    function ExecPrim (AOffset: integer) : boolean;
    // Exec for stored InputString

    {$IFDEF RegExpPCodeDump}
    function DumpOp (op : REChar) : RegExprString;
    {$ENDIF}

    function GetSubExprMatchCount : integer;
    function GetMatchPos (Idx : integer) : integer;
    function GetMatchLen (Idx : integer) : integer;
    function GetMatch (Idx : integer) : RegExprString;

    function GetInputString : RegExprString;
    procedure SetInputString (const AInputString : RegExprString);

    {$IFNDEF UseSetOfChar}
    function StrScanCI (s : PRegExprChar; ch : REChar) : PRegExprChar; //###0.928
    {$ENDIF}

    procedure SetLineSeparators (const AStr : RegExprString);
    procedure SetLinePairedSeparator (const AStr : RegExprString);
    function GetLinePairedSeparator : RegExprString;

   public
    constructor Create;
    destructor Destroy; override;

    class function VersionMajor : integer; //###0.944
    class function VersionMinor : integer; //###0.944

    property Expression : RegExprString read GetExpression write SetExpression;
    // Regular expression.
    // For optimization, TRegExpr will automatically compiles it into 'P-code'
    // (You can see it with help of Dump method) and stores in internal
    // structures. Real [re]compilation occures only when it really needed -
    // while calling Exec[Next], Substitute, Dump, etc
    // and only if Expression or other P-code affected properties was changed
    // after last [re]compilation.
    // If any errors while [re]compilation occures, Error method is called
    // (by default Error raises exception - see below)

    property ModifierStr : RegExprString read GetModifierStr write SetModifierStr;
    // Set/get default values of r.e.syntax modifiers. Modifiers in
    // r.e. (?ismx-ismx) will replace this default values.
    // If you try to set unsupported modifier, Error will be called
    // (by defaul Error raises exception ERegExpr).

    property ModifierI : boolean index 1 read GetModifier write SetModifier;
    // Modifier /i - caseinsensitive, initialized from RegExprModifierI

    property ModifierR : boolean index 2 read GetModifier write SetModifier;
    // Modifier /r - use r.e.syntax extended for russian,
    // (was property ExtSyntaxEnabled in previous versions)
    // If true, then 

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -