📄 regexpr.pas
字号:
unit RegExpr;
{
TRegExpr class library
Delphi Regular Expressions
Copyright (c) 1999-2004 Andrey V. Sorokin, St.Petersburg, Russia
You may use this software in any kind of development,
including comercial, redistribute, and modify it freely,
under the following restrictions :
1. This software is provided as it is, without any kind of
warranty given. Use it at Your own risk.The author is not
responsible for any consequences of use of this software.
2. The origin of this software may not be mispresented, You
must not claim that You wrote the original software. If
You use this software in any kind of product, it would be
appreciated that there in a information box, or in the
documentation would be an acknowledgement like
Partial Copyright (c) 2004 Andrey V. Sorokin
http://RegExpStudio.com
mailto:anso@mail.ru
3. You may not have any income from distributing this source
(or altered version of it) to other developers. When You
use this product in a comercial package, the source may
not be charged seperatly.
4. Altered versions must be plainly marked as such, and must
not be misrepresented as being the original software.
5. RegExp Studio application and all the visual components as
well as documentation is not part of the TRegExpr library
and is not free for usage.
mailto:anso@mail.ru
http://RegExpStudio.com
http://anso.da.ru/
}
interface
// ======== Determine compiler
{$IFDEF VER80} Sorry, TRegExpr is for 32-bits Delphi only. Delphi 1 is not supported (and whos really care today?!). {$ENDIF}
{$IFDEF VER90} {$DEFINE D2} {$ENDIF} // D2
{$IFDEF VER93} {$DEFINE D2} {$ENDIF} // CPPB 1
{$IFDEF VER100} {$DEFINE D3} {$DEFINE D2} {$ENDIF} // D3
{$IFDEF VER110} {$DEFINE D4} {$DEFINE D3} {$DEFINE D2} {$ENDIF} // CPPB 3
{$IFDEF VER120} {$DEFINE D4} {$DEFINE D3} {$DEFINE D2} {$ENDIF} // D4
{$IFDEF VER130} {$DEFINE D5} {$DEFINE D4} {$DEFINE D3} {$DEFINE D2} {$ENDIF} // D5
{$IFDEF VER140} {$DEFINE D6} {$DEFINE D5} {$DEFINE D4} {$DEFINE D3} {$DEFINE D2} {$ENDIF} // D6
{$IFDEF VER150} {$DEFINE D7} {$DEFINE D6} {$DEFINE D5} {$DEFINE D4} {$DEFINE D3} {$DEFINE D2} {$ENDIF} // D7
// ======== Define base compiler options
{$BOOLEVAL OFF}
{$EXTENDEDSYNTAX ON}
{$LONGSTRINGS ON}
{$OPTIMIZATION ON}
{$IFDEF D6}
{$WARN SYMBOL_PLATFORM OFF} // Suppress .Net warnings
{$ENDIF}
{$IFDEF D7}
{$WARN UNSAFE_CAST OFF} // Suppress .Net warnings
{$WARN UNSAFE_TYPE OFF} // Suppress .Net warnings
{$WARN UNSAFE_CODE OFF} // Suppress .Net warnings
{$ENDIF}
{$IFDEF FPC}
{$MODE DELPHI} // Delphi-compatible mode in FreePascal
{$ENDIF}
// ======== Define options for TRegExpr engine
{.$DEFINE UniCode} // Unicode support
{$DEFINE RegExpPCodeDump} // p-code dumping (see Dump method)
{$IFNDEF FPC} // the option is not supported in FreePascal
{$DEFINE reRealExceptionAddr} // exceptions will point to appropriate source line, not to Error procedure
{$ENDIF}
{$DEFINE ComplexBraces} // support braces in complex cases
{$IFNDEF UniCode} // the option applicable only for non-UniCode mode
{$DEFINE UseSetOfChar} // Significant optimization by using set of char
{$ENDIF}
{$IFDEF UseSetOfChar}
{$DEFINE UseFirstCharSet} // Fast skip between matches for r.e. that starts with determined set of chars
{$ENDIF}
// ======== Define Pascal-language options
// Define 'UseAsserts' option (do not edit this definitions).
// Asserts used to catch 'strange bugs' in TRegExpr implementation (when something goes
// completely wrong). You can swith asserts on/off with help of {$C+}/{$C-} compiler options.
{$IFDEF D3} {$DEFINE UseAsserts} {$ENDIF}
{$IFDEF FPC} {$DEFINE UseAsserts} {$ENDIF}
// Define 'use subroutine parameters default values' option (do not edit this definition).
{$IFDEF D4} {$DEFINE DefParam} {$ENDIF}
// Define 'OverMeth' options, to use method overloading (do not edit this definitions).
{$IFDEF D5} {$DEFINE OverMeth} {$ENDIF}
{$IFDEF FPC} {$DEFINE OverMeth} {$ENDIF}
uses
Classes, // TStrings in Split method
SysUtils; // Exception
type
{$IFDEF UniCode}
PRegExprChar = PWideChar;
RegExprString = WideString;
REChar = WideChar;
{$ELSE}
PRegExprChar = PChar;
RegExprString = AnsiString; //###0.952 was string
REChar = Char;
{$ENDIF}
TREOp = REChar; // internal p-code type //###0.933
PREOp = ^TREOp;
TRENextOff = integer; // internal Next "pointer" (offset to current p-code) //###0.933
PRENextOff = ^TRENextOff; // used for extracting Next "pointers" from compiled r.e. //###0.933
TREBracesArg = integer; // type of {m,n} arguments
PREBracesArg = ^TREBracesArg;
const
REOpSz = SizeOf (TREOp) div SizeOf (REChar); // size of p-code in RegExprString units
RENextOffSz = SizeOf (TRENextOff) div SizeOf (REChar); // size of Next 'pointer' -"-
REBracesArgSz = SizeOf (TREBracesArg) div SizeOf (REChar); // size of BRACES arguments -"-
type
TRegExprInvertCaseFunction = function (const Ch : REChar) : REChar
of object;
const
EscChar = '\'; // 'Escape'-char ('\' in common r.e.) used for escaping metachars (\w, \d etc).
RegExprModifierI : boolean = False; // default value for ModifierI
RegExprModifierR : boolean = True; // default value for ModifierR
RegExprModifierS : boolean = True; // default value for ModifierS
RegExprModifierG : boolean = True; // default value for ModifierG
RegExprModifierM : boolean = False; // default value for ModifierM
RegExprModifierX : boolean = False; // default value for ModifierX
RegExprSpaceChars : RegExprString = // default value for SpaceChars
' '#$9#$A#$D#$C;
RegExprWordChars : RegExprString = // default value for WordChars
'0123456789' //###0.940
+ 'abcdefghijklmnopqrstuvwxyz'
+ 'ABCDEFGHIJKLMNOPQRSTUVWXYZ_';
RegExprLineSeparators : RegExprString =// default value for LineSeparators
#$d#$a{$IFDEF UniCode}+#$b#$c#$2028#$2029#$85{$ENDIF}; //###0.947
RegExprLinePairedSeparator : RegExprString =// default value for LinePairedSeparator
#$d#$a;
{ if You need Unix-styled line separators (only \n), then use:
RegExprLineSeparators = #$a;
RegExprLinePairedSeparator = '';
}
const
NSUBEXP = 15; // max number of subexpression //###0.929
// Cannot be more than NSUBEXPMAX
// Be carefull - don't use values which overflow CLOSE opcode
// (in this case you'll get compiler erorr).
// Big NSUBEXP will cause more slow work and more stack required
NSUBEXPMAX = 255; // Max possible value for NSUBEXP. //###0.945
// Don't change it! It's defined by internal TRegExpr design.
MaxBracesArg = $7FFFFFFF - 1; // max value for {n,m} arguments //###0.933
{$IFDEF ComplexBraces}
LoopStackMax = 10; // max depth of loops stack //###0.925
{$ENDIF}
TinySetLen = 3;
// if range includes more then TinySetLen chars, //###0.934
// then use full (32 bytes) ANYOFFULL instead of ANYOF[BUT]TINYSET
// !!! Attension ! If you change TinySetLen, you must
// change code marked as "//!!!TinySet"
type
{$IFDEF UseSetOfChar}
PSetOfREChar = ^TSetOfREChar;
TSetOfREChar = set of REChar;
{$ENDIF}
TRegExpr = class;
TRegExprReplaceFunction = function (ARegExpr : TRegExpr): string
of object;
TRegExpr = class
private
startp : array [0 .. NSUBEXP - 1] of PRegExprChar; // founded expr starting points
endp : array [0 .. NSUBEXP - 1] of PRegExprChar; // founded expr end points
{$IFDEF ComplexBraces}
LoopStack : array [1 .. LoopStackMax] of integer; // state before entering loop
LoopStackIdx : integer; // 0 - out of all loops
{$ENDIF}
// The "internal use only" fields to pass info from compile
// to execute that permits the execute phase to run lots faster on
// simple cases.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -