📄 perlregex.pas

📁 正则表达式类
💻 PAS
字号:
{**************************************************************************************************}
{                                                                                                  }
{ Perl Regular Expressions VCL component                                                           }
{                                                                                                  }
{ The contents of this file are subject to the Mozilla Public License Version 1.1 (the "License"); }
{ you may not use this file except in compliance with the License. You may obtain a copy of the    }
{ License at http://www.mozilla.org/MPL/                                                           }
{                                                                                                  }
{ Software distributed under the License is distributed on an "AS IS" basis, WITHOUT WARRANTY OF   }
{ ANY KIND, either express or implied. See the License for the specific language governing rights  }
{ and limitations under the License.                                                               }
{                                                                                                  }
{ The Original Code is PerlRegEx.pas.                                                              }
{                                                                                                  }
{ The Initial Developer of the Original Code is Jan Goyvaerts.                                     }
{ Portions created by Jan Goyvaerts are Copyright (C) 1999, 2005, 2008  Jan Goyvaerts.             }
{ All rights reserved.                                                                             }
{                                                                                                  }
{ Design & implementation, by Jan Goyvaerts, 1999, 2005, 2008                                      }
{                                                                                                  }
{ TPerlRegEx is available at http://www.regular-expressions.info/delphi.html                       }
{                                                                                                  }
{**************************************************************************************************}

unit PerlRegEx;

interface

uses
  Windows, Messages, SysUtils, Classes,
  pcre;

type
  TPerlRegExOptions = set of (
    preCaseLess,       // /i -> Case insensitive
    preMultiLine,      // /m -> ^ and $ also match before/after a newline, not just at the beginning and the end of the PCREString
    preSingleLine,     // /s -> Dot matches any character, including \n (newline). Otherwise, it matches anything except \n
    preExtended,       // /x -> Allow regex to contain extra whitespace, newlines and Perl-style comments, all of which will be filtered out
    preAnchored,       // /A -> Successful match can only occur at the start of the subject or right after the previous match
    preUnGreedy,       // Repeat operators (+, *, ?) are not greedy by default (i.e. they try to match the minimum number of characters instead of the maximum)
    preNoAutoCapture   // (group) is a non-capturing group; only named groups capture
  );

type
  TPerlRegExState = set of (
    preNotBOL,         // Not Beginning Of Line: ^ does not match at the start of Subject
    preNotEOL,         // Not End Of Line: $ does not match at the end of Subject
    preNotEmpty        // Empty matches not allowed
  );

const
  // Maximum number of subexpressions (backreferences)
  // Subexpressions are created by placing round brackets in the regex, and are referenced by \1, \2, ...
  // In Perl, they are available as $1, $2, ... after the regex matched; with TPerlRegEx, use the Subexpressions property
  // You can also insert \1, \2, ... in the Replacement PCREString; \0 is the complete matched expression
  MAX_SUBEXPRESSIONS = 99;

{$IFDEF UNICODE}
type
  PCREString = UTF8String;
{$ELSE}
type
  PCREString = AnsiString;
{$ENDIF}

type
  TPerlRegExReplaceEvent = procedure(Sender: TObject; var ReplaceWith: PCREString) of object;

type
  TPerlRegEx = class(TComponent)
  private    // *** Property storage, getters and setters
    FCompiled, FStudied: Boolean;
    FOptions: TPerlRegExOptions;
    FState: TPerlRegExState;
    FRegEx, FReplacement, FSubject: PCREString;
    FStart, FStop: Integer;
    FOnMatch: TNotifyEvent;
    FOnReplace: TPerlRegExReplaceEvent;
    function GetMatchedExpression: PCREString;
    function GetMatchedExpressionLength: Integer;
    function GetMatchedExpressionOffset: Integer;
    procedure SetOptions(Value: TPerlRegExOptions);
    procedure SetRegEx(const Value: PCREString);
    function GetSubExpressionCount: Integer;
    function GetSubExpressions(Index: Integer): PCREString;
    function GetSubExpressionLengths(Index: Integer): Integer;
    function GetSubExpressionOffsets(Index: Integer): Integer;
    procedure SetSubject(const Value: PCREString);
    procedure SetStart(const Value: Integer);
    procedure SetStop(const Value: Integer);
    function GetFoundMatch: Boolean;
  private    // *** Variables used by pcrelib.dll
    Offsets: array[0..(MAX_SUBEXPRESSIONS+1)*3] of Integer;
    OffsetCount: Integer;
    pcreOptions: Integer;
    pattern, hints, chartable: Pointer;
    FSubjectPChar: PAnsiChar;
    FHasStoredSubExpressions: Boolean;
    FStoredSubExpressions: array of PCREString;
    function GetSubjectLeft: PCREString;
    function GetSubjectRight: PCREString;
  protected
    procedure CleanUp;
        // Dispose off whatever we created, so we can start over. Called automatically when needed, so it is not made public
    procedure ClearStoredSubExpressions;
  public
    constructor Create(AOwner: TComponent); override;
        // Come to life
    destructor Destroy; override;
        // Clean up after ourselves
    class function EscapeRegExChars(const S: string): string;
        // Escapes regex characters in S so that the regex engine can be used to match S as plain text
    procedure Compile;
        // Compile the regex. Called automatically by Match
    procedure Study;
        // Study the regex. Studying takes time, but will make the execution of the regex a lot faster.
        // Call study if you will be using the same regex many times
    function Match: Boolean;
        // Attempt to match the regex
    function MatchAgain: Boolean;
        // Attempt to match the regex to the remainder of the string after the previous match
        // To avoid problems (when using ^ in the regex), call MatchAgain only after a succesful Match()
    function Replace: PCREString;
        // Replace matched expression in Subject with ComputeReplacement.  Returns the actual replacement text from ComputeReplacement
    function ReplaceAll: Boolean;
        // Repeat MatchAgain and Replace until you drop.  Returns True if anything was replaced at all.
    function ComputeReplacement: PCREString;
        // Returns Replacement with backreferences filled in
    procedure StoreSubExpressions;
        // Stores duplicates of SubExpressions[] so they and ComputeReplacement will still return the proper strings
        // even if FSubject is changed or cleared
    function NamedSubExpression(const SEName: PCREString): Integer;
        // Returns the index of the named group SEName
    procedure Split(Strings: TStrings; Limit: Integer);
        // Split Subject along regex matches.  Items are appended to PCREStrings.
    property Compiled: Boolean read FCompiled;
        // True if the RegEx has already been compiled.
    property FoundMatch: Boolean read GetFoundMatch;
        // Returns True when MatchedExpression* and SubExpression* indicate a match
    property Studied: Boolean read FStudied;
        // True if the RegEx has already been studied
    property MatchedExpression: PCREString read GetMatchedExpression;
        // The matched PCREString
    property MatchedExpressionLength: Integer read GetMatchedExpressionLength;
        // Length of the matched PCREString
    property MatchedExpressionOffset: Integer read GetMatchedExpressionOffset;
        // Character offset in the Subject PCREString at which the matched subPCREString starts
    property Start: Integer read FStart write SetStart;
        // Starting position in Subject from which MatchAgain begins
    property Stop: Integer read FStop write SetStop;
        // Last character in Subject that Match and MatchAgain search through
    property State: TPerlRegExState read FState write FState;
        // State of Subject
    property SubExpressionCount: Integer read GetSubExpressionCount;
        // Number of matched subexpressions
    property SubExpressions[Index: Integer]: PCREString read GetSubExpressions;
        // Matched subexpressions after a regex has been matched
    property SubExpressionLengths[Index: Integer]: Integer read GetSubExpressionLengths;
        // Lengths of the subexpressions
    property SubExpressionOffsets[Index: Integer]: Integer read GetSubExpressionOffsets;
        // Character offsets in the Subject PCREString of the subexpressions
    property Subject: PCREString read FSubject write SetSubject;
        // The PCREString on which Match() will try to match RegEx
    property SubjectLeft: PCREString read GetSubjectLeft;
        // Part of the subject to the left of the match
    property SubjectRight: PCREString read GetSubjectRight;
        // Part of the subject to the right of the match
  published
    property Options: TPerlRegExOptions read FOptions write SetOptions;
        // Options
    property RegEx: PCREString read FRegEx write SetRegEx;
        // The regular expression to be matched
    property Replacement: PCREString read FReplacement write FReplacement;
        // PCREString to replace matched expression with. \number backreferences will be substituted with SubExpressions
        // TPerlRegEx supports the "JGsoft" replacement text flavor as explained at http://www.regular-expressions.info/refreplace.html
    property OnMatch: TNotifyEvent read FOnMatch write FOnMatch;
        // Triggered by Match and MatchAgain after a successful match
    property OnReplace: TPerlRegExReplaceEvent read FOnReplace write FOnReplace;
        // Triggered by Replace and ReplaceAll just before the replacement is done, allowing you to determine the new PCREString
  end;

{
  You can add TPerlRegEx components to a TPerlRegExList to match them all together on the same subject,
  as if they were one regex regex1|regex2|regex3|...
  TPerlRegExList does not own the TPerlRegEx components, just like a TList
  If a TPerlRegEx has been added to a TPerlRegExList, it should not be used in any other situation
  until it is removed from the list
}

type
  TPerlRegExList = class
  private
    FList: TList;
    FSubject: PCREString;
    FMatchedRegEx: TPerlRegEx;
    FStart, FStop: Integer;
    function GetRegEx(Index: Integer): TPerlRegEx;
    procedure SetRegEx(Index: Integer; Value: TPerlRegEx);
    procedure SetSubject(const Value: PCREString);
    procedure SetStart(const Value: Integer);
    procedure SetStop(const Value: Integer);
    function GetCount: Integer;
  protected
    procedure UpdateRegEx(ARegEx: TPerlRegEx);
  public
    constructor Create;
    destructor Destroy; override;
  public
    function Add(ARegEx: TPerlRegEx): Integer;
    procedure Clear;
    procedure Delete(Index: Integer);
    function IndexOf(ARegEx: TPerlRegEx): Integer;
    procedure Insert(Index: Integer; ARegEx: TPerlRegEx);
  public
    function Match: Boolean;
    function MatchAgain: Boolean;
    property RegEx[Index: Integer]: TPerlRegEx read GetRegEx write SetRegEx;
    property Count: Integer read GetCount;
    property Subject: PCREString read FSubject write SetSubject;
    property Start: Integer read FStart write SetStart;
    property Stop: Integer read FStop write SetStop;
    property MatchedRegEx: TPerlRegEx read FMatchedRegEx;
  end;

procedure Register;

implementation


         { ********* Unit support routines ********* }

procedure Register;
begin
  RegisterComponents('JGsoft', [TPerlRegEx]);
end;

function FirstCap(const S: string): string;
begin
  if S = '' then Result := ''
  else begin
    Result := AnsiLowerCase(S);
  {$IFDEF UNICODE}
    CharUpperBuffW(@Result[1], 1);
  {$ELSE}
    CharUpperBuffA(@Result[1], 1);
  {$ENDIF}
  end
end;

function InitialCaps(const S: string): string;
var
  I: Integer;
  Up: Boolean;
begin
  Result := AnsiLowerCase(S);
  Up := True;
{$IFDEF UNICODE}
  for I := 1 to Length(Result) do begin
    case Result[I] of
      #0..'&', '(', '*', '+', ',', '-', '.', '<', '[', '{', '
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -