📄 perlregex.pas
字号:
{**************************************************************************************************}
{ }
{ Perl Regular Expressions VCL component }
{ }
{ The contents of this file are subject to the Mozilla Public License Version 1.1 (the "License"); }
{ you may not use this file except in compliance with the License. You may obtain a copy of the }
{ License at http://www.mozilla.org/MPL/ }
{ }
{ Software distributed under the License is distributed on an "AS IS" basis, WITHOUT WARRANTY OF }
{ ANY KIND, either express or implied. See the License for the specific language governing rights }
{ and limitations under the License. }
{ }
{ The Original Code is PerlRegEx.pas. }
{ }
{ The Initial Developer of the Original Code is Jan Goyvaerts. }
{ Portions created by Jan Goyvaerts are Copyright (C) 1999, 2005, 2008 Jan Goyvaerts. }
{ All rights reserved. }
{ }
{ Design & implementation, by Jan Goyvaerts, 1999, 2005, 2008 }
{ }
{ TPerlRegEx is available at http://www.regular-expressions.info/delphi.html }
{ }
{**************************************************************************************************}
unit PerlRegEx;
interface
uses
Windows, Messages, SysUtils, Classes,
pcre;
type
TPerlRegExOptions = set of (
preCaseLess, // /i -> Case insensitive
preMultiLine, // /m -> ^ and $ also match before/after a newline, not just at the beginning and the end of the PCREString
preSingleLine, // /s -> Dot matches any character, including \n (newline). Otherwise, it matches anything except \n
preExtended, // /x -> Allow regex to contain extra whitespace, newlines and Perl-style comments, all of which will be filtered out
preAnchored, // /A -> Successful match can only occur at the start of the subject or right after the previous match
preUnGreedy, // Repeat operators (+, *, ?) are not greedy by default (i.e. they try to match the minimum number of characters instead of the maximum)
preNoAutoCapture // (group) is a non-capturing group; only named groups capture
);
type
TPerlRegExState = set of (
preNotBOL, // Not Beginning Of Line: ^ does not match at the start of Subject
preNotEOL, // Not End Of Line: $ does not match at the end of Subject
preNotEmpty // Empty matches not allowed
);
const
// Maximum number of subexpressions (backreferences)
// Subexpressions are created by placing round brackets in the regex, and are referenced by \1, \2, ...
// In Perl, they are available as $1, $2, ... after the regex matched; with TPerlRegEx, use the Subexpressions property
// You can also insert \1, \2, ... in the Replacement PCREString; \0 is the complete matched expression
MAX_SUBEXPRESSIONS = 99;
{$IFDEF UNICODE}
type
PCREString = UTF8String;
{$ELSE}
type
PCREString = AnsiString;
{$ENDIF}
type
TPerlRegExReplaceEvent = procedure(Sender: TObject; var ReplaceWith: PCREString) of object;
type
TPerlRegEx = class(TComponent)
private // *** Property storage, getters and setters
FCompiled, FStudied: Boolean;
FOptions: TPerlRegExOptions;
FState: TPerlRegExState;
FRegEx, FReplacement, FSubject: PCREString;
FStart, FStop: Integer;
FOnMatch: TNotifyEvent;
FOnReplace: TPerlRegExReplaceEvent;
function GetMatchedExpression: PCREString;
function GetMatchedExpressionLength: Integer;
function GetMatchedExpressionOffset: Integer;
procedure SetOptions(Value: TPerlRegExOptions);
procedure SetRegEx(const Value: PCREString);
function GetSubExpressionCount: Integer;
function GetSubExpressions(Index: Integer): PCREString;
function GetSubExpressionLengths(Index: Integer): Integer;
function GetSubExpressionOffsets(Index: Integer): Integer;
procedure SetSubject(const Value: PCREString);
procedure SetStart(const Value: Integer);
procedure SetStop(const Value: Integer);
function GetFoundMatch: Boolean;
private // *** Variables used by pcrelib.dll
Offsets: array[0..(MAX_SUBEXPRESSIONS+1)*3] of Integer;
OffsetCount: Integer;
pcreOptions: Integer;
pattern, hints, chartable: Pointer;
FSubjectPChar: PAnsiChar;
FHasStoredSubExpressions: Boolean;
FStoredSubExpressions: array of PCREString;
function GetSubjectLeft: PCREString;
function GetSubjectRight: PCREString;
protected
procedure CleanUp;
// Dispose off whatever we created, so we can start over. Called automatically when needed, so it is not made public
procedure ClearStoredSubExpressions;
public
constructor Create(AOwner: TComponent); override;
// Come to life
destructor Destroy; override;
// Clean up after ourselves
class function EscapeRegExChars(const S: string): string;
// Escapes regex characters in S so that the regex engine can be used to match S as plain text
procedure Compile;
// Compile the regex. Called automatically by Match
procedure Study;
// Study the regex. Studying takes time, but will make the execution of the regex a lot faster.
// Call study if you will be using the same regex many times
function Match: Boolean;
// Attempt to match the regex
function MatchAgain: Boolean;
// Attempt to match the regex to the remainder of the string after the previous match
// To avoid problems (when using ^ in the regex), call MatchAgain only after a succesful Match()
function Replace: PCREString;
// Replace matched expression in Subject with ComputeReplacement. Returns the actual replacement text from ComputeReplacement
function ReplaceAll: Boolean;
// Repeat MatchAgain and Replace until you drop. Returns True if anything was replaced at all.
function ComputeReplacement: PCREString;
// Returns Replacement with backreferences filled in
procedure StoreSubExpressions;
// Stores duplicates of SubExpressions[] so they and ComputeReplacement will still return the proper strings
// even if FSubject is changed or cleared
function NamedSubExpression(const SEName: PCREString): Integer;
// Returns the index of the named group SEName
procedure Split(Strings: TStrings; Limit: Integer);
// Split Subject along regex matches. Items are appended to PCREStrings.
property Compiled: Boolean read FCompiled;
// True if the RegEx has already been compiled.
property FoundMatch: Boolean read GetFoundMatch;
// Returns True when MatchedExpression* and SubExpression* indicate a match
property Studied: Boolean read FStudied;
// True if the RegEx has already been studied
property MatchedExpression: PCREString read GetMatchedExpression;
// The matched PCREString
property MatchedExpressionLength: Integer read GetMatchedExpressionLength;
// Length of the matched PCREString
property MatchedExpressionOffset: Integer read GetMatchedExpressionOffset;
// Character offset in the Subject PCREString at which the matched subPCREString starts
property Start: Integer read FStart write SetStart;
// Starting position in Subject from which MatchAgain begins
property Stop: Integer read FStop write SetStop;
// Last character in Subject that Match and MatchAgain search through
property State: TPerlRegExState read FState write FState;
// State of Subject
property SubExpressionCount: Integer read GetSubExpressionCount;
// Number of matched subexpressions
property SubExpressions[Index: Integer]: PCREString read GetSubExpressions;
// Matched subexpressions after a regex has been matched
property SubExpressionLengths[Index: Integer]: Integer read GetSubExpressionLengths;
// Lengths of the subexpressions
property SubExpressionOffsets[Index: Integer]: Integer read GetSubExpressionOffsets;
// Character offsets in the Subject PCREString of the subexpressions
property Subject: PCREString read FSubject write SetSubject;
// The PCREString on which Match() will try to match RegEx
property SubjectLeft: PCREString read GetSubjectLeft;
// Part of the subject to the left of the match
property SubjectRight: PCREString read GetSubjectRight;
// Part of the subject to the right of the match
published
property Options: TPerlRegExOptions read FOptions write SetOptions;
// Options
property RegEx: PCREString read FRegEx write SetRegEx;
// The regular expression to be matched
property Replacement: PCREString read FReplacement write FReplacement;
// PCREString to replace matched expression with. \number backreferences will be substituted with SubExpressions
// TPerlRegEx supports the "JGsoft" replacement text flavor as explained at http://www.regular-expressions.info/refreplace.html
property OnMatch: TNotifyEvent read FOnMatch write FOnMatch;
// Triggered by Match and MatchAgain after a successful match
property OnReplace: TPerlRegExReplaceEvent read FOnReplace write FOnReplace;
// Triggered by Replace and ReplaceAll just before the replacement is done, allowing you to determine the new PCREString
end;
{
You can add TPerlRegEx components to a TPerlRegExList to match them all together on the same subject,
as if they were one regex regex1|regex2|regex3|...
TPerlRegExList does not own the TPerlRegEx components, just like a TList
If a TPerlRegEx has been added to a TPerlRegExList, it should not be used in any other situation
until it is removed from the list
}
type
TPerlRegExList = class
private
FList: TList;
FSubject: PCREString;
FMatchedRegEx: TPerlRegEx;
FStart, FStop: Integer;
function GetRegEx(Index: Integer): TPerlRegEx;
procedure SetRegEx(Index: Integer; Value: TPerlRegEx);
procedure SetSubject(const Value: PCREString);
procedure SetStart(const Value: Integer);
procedure SetStop(const Value: Integer);
function GetCount: Integer;
protected
procedure UpdateRegEx(ARegEx: TPerlRegEx);
public
constructor Create;
destructor Destroy; override;
public
function Add(ARegEx: TPerlRegEx): Integer;
procedure Clear;
procedure Delete(Index: Integer);
function IndexOf(ARegEx: TPerlRegEx): Integer;
procedure Insert(Index: Integer; ARegEx: TPerlRegEx);
public
function Match: Boolean;
function MatchAgain: Boolean;
property RegEx[Index: Integer]: TPerlRegEx read GetRegEx write SetRegEx;
property Count: Integer read GetCount;
property Subject: PCREString read FSubject write SetSubject;
property Start: Integer read FStart write SetStart;
property Stop: Integer read FStop write SetStop;
property MatchedRegEx: TPerlRegEx read FMatchedRegEx;
end;
procedure Register;
implementation
{ ********* Unit support routines ********* }
procedure Register;
begin
RegisterComponents('JGsoft', [TPerlRegEx]);
end;
function FirstCap(const S: string): string;
begin
if S = '' then Result := ''
else begin
Result := AnsiLowerCase(S);
{$IFDEF UNICODE}
CharUpperBuffW(@Result[1], 1);
{$ELSE}
CharUpperBuffA(@Result[1], 1);
{$ENDIF}
end
end;
function InitialCaps(const S: string): string;
var
I: Integer;
Up: Boolean;
begin
Result := AnsiLowerCase(S);
Up := True;
{$IFDEF UNICODE}
for I := 1 to Length(Result) do begin
case Result[I] of
#0..'&', '(', '*', '+', ',', '-', '.', '<', '[', '{', '
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -