📄 unicode.pas
字号:
end;
TExpressionList = record
Expressions: array of TElement;
ExpressionsUsed: Integer;
end;
TSymbolTable = record
Symbols: array of TSymbolTableEntry;
SymbolsUsed: Integer;
end;
TEquivalentList = record
Equivalents: array of TEquivalent;
EquivalentsUsed: Integer;
end;
// structure used for constructing the NFA and reducing to a minimal FDFA
PUREBuffer = ^TUREBuffer;
TUREBuffer = record
Reducing: Boolean;
Error: Integer;
Flags: Cardinal;
Stack: TStateList;
SymbolTable: TSymbolTable; // table of unique symbols encountered
ExpressionList: TExpressionList; // tracks the unique expressions generated for the NFA and when the NFA is reduced
States: TStateTable; // the reduced table of unique groups of NFA states
EquivalentList: TEquivalentList; // tracks states when equivalent states are merged
end;
TTransition = record
Symbol,
NextState: Cardinal;
end;
PDFAState = ^TDFAState;
TDFAState = record
Accepting: Boolean;
NumberTransitions: Integer;
StartTransition: Integer;
end;
TDFAStates = record
States: array of TDFAState;
StatesUsed: Integer;
end;
TTransitions = record
Transitions: array of TTransition;
TransitionsUsed: Integer;
end;
TDFA = record
Flags: Cardinal;
SymbolTable: TSymbolTable;
StateList: TDFAStates;
TransitionList: TTransitions;
end;
TURESearch = class(TSearchEngine)
private
FUREBuffer: TUREBuffer;
FDFA: TDFA;
protected
procedure AddEquivalentPair(L, R: Cardinal);
procedure AddRange(var CCL: TCClass; Range: TRange);
function AddState(NewStates: array of Cardinal): Cardinal;
procedure AddSymbolState(Symbol, State: Cardinal);
function BuildCharacterClass(CP: PUCS2; Limit: Cardinal; Symbol: PSymbolTableEntry): Cardinal;
procedure CCLSetup(Symbol: PSymbolTableEntry; Mask: Cardinal);
procedure ClearUREBuffer;
function CompileSymbol(S: PUCS2; Limit: Cardinal; Symbol: PSymbolTableEntry): Cardinal;
procedure CompileURE(RE: PWideChar; RELength: Cardinal; Casefold: Boolean);
procedure CollectPendingOperations(var State: Cardinal);
function ConvertRE2NFA(RE: PWideChar; RELength: Cardinal): Cardinal;
function ExecuteURE(Flags: Cardinal; Text: PUCS2; TextLen: Cardinal; var MatchStart, MatchEnd: Cardinal): Boolean;
procedure ClearDFA;
procedure HexDigitSetup(Symbol: PSymbolTableEntry; Mask: Cardinal);
function MakeExpression(AType, LHS, RHS: Cardinal): Cardinal;
function MakeHexNumber(NP: PUCS2; Limit: Cardinal; var Number: Cardinal): Cardinal;
function MakeSymbol(S: PUCS2; Limit: Cardinal; var Consumed: Cardinal): Cardinal;
function MatchesProperties(Props, C: Cardinal): Boolean;
procedure MergeEquivalents;
function ParsePropertyList(Properties: PUCS2; Limit: Cardinal; var Mask: Cardinal): Cardinal;
function Peek: Cardinal;
function Pop: Cardinal;
function PosixCCL(CP: PUCS2; Limit: Cardinal; Symbol: PSymbolTableEntry): Cardinal;
function ProbeLowSurrogate(LeftState: PUCS2; Limit: Cardinal; var Code: UCS4): Cardinal;
procedure Push(V: Cardinal);
procedure Reduce(Start: Cardinal);
procedure SpaceSetup(Symbol: PSymbolTableEntry; Mask: Cardinal);
function SymbolsAreDifferent(A, B: PSymbolTableEntry): Boolean;
public
constructor Create(AOwner: TWideStrings); override;
destructor Destroy; override;
procedure Clear; override;
procedure FindPrepare(const Pattern: WideString; Options: TSearchFlags); override;
procedure FindPrepare(const Pattern: PWideChar; PatternLength: Cardinal; Options: TSearchFlags); override;
function FindFirst(const Text: WideString; var Start, Stop: Cardinal): Boolean; override;
function FindFirst(const Text: PWideChar; TextLen: Cardinal; var Start, Stop: Cardinal): Boolean; override;
function FindAll(const Text: WideString): Boolean; override;
function FindAll(const Text: PWideChar; TextLen: Cardinal): Boolean; override;
end;
// Event used to give the application a chance to switch the way of how to save the text in TWideStrings
// if the text contains characters not only from the ANSI block but the save type is
// ANSI. On triggering the event the application can change the property SaveUnicode
// as needed. This property is again checked after the callback returns.
TConfirmConversionEvent = procedure(Sender: TWideStrings; var Allowed: Boolean) of object;
TWideStrings = class(TPersistent)
private
FUpdateCount: Integer;
FLanguage: LCID; // language can usually left alone, the system's default is used
FSaved, // set in SaveToStream, True in case saving was successfull otherwise False
FSaveUnicode: Boolean; // flag set on loading to keep track in which format to save
// (can be set explicitely, but expect losses if there's true Unicode content
// and this flag is set to False)
FOnConfirmConversion: TConfirmConversionEvent;
function GetCommaText: WideString;
function GetName(Index: Integer): WideString;
function GetValue(const Name: WideString): WideString;
procedure ReadData(Reader: TReader);
procedure SetCommaText(const Value: WideString);
procedure SetValue(const Name, Value: WideString);
procedure WriteData(Writer: TWriter);
protected
procedure DefineProperties(Filer: TFiler); override;
procedure Error(const Msg: String; Data: Integer);
function Get(Index: Integer): WideString; virtual; abstract;
function GetCapacity: Integer; virtual;
function GetCount: Integer; virtual; abstract;
function GetObject(Index: Integer): TObject; virtual;
function GetTextStr: WideString; virtual;
procedure Put(Index: Integer; const S: WideString); virtual;
procedure PutObject(Index: Integer; AObject: TObject); virtual;
procedure SetCapacity(NewCapacity: Integer); virtual;
procedure SetTextStr(const Value: WideString); virtual;
procedure SetUpdateState(Updating: Boolean); virtual;
procedure SetLanguage(Value: LCID); virtual;
public
constructor Create;
destructor Destroy; override;
function Add(const S: WideString): Integer; virtual;
function AddObject(const S: WideString; AObject: TObject): Integer; virtual;
procedure Append(const S: WideString);
procedure AddStrings(Strings: TStrings); overload; virtual;
procedure AddStrings(Strings: TWideStrings); overload; virtual;
procedure Assign(Source: TPersistent); override;
procedure AssignTo(Dest: TPersistent); override;
procedure BeginUpdate;
procedure Clear; virtual; abstract;
procedure Delete(Index: Integer); virtual; abstract;
procedure EndUpdate;
function Equals(Strings: TWideStrings): Boolean;
procedure Exchange(Index1, Index2: Integer); virtual;
function GetText: PWideChar; virtual;
function IndexOf(const S: WideString): Integer; virtual;
function IndexOfName(const Name: WideString): Integer;
function IndexOfObject(AObject: TObject): Integer;
procedure Insert(Index: Integer; const S: WideString); virtual; abstract;
procedure InsertObject(Index: Integer; const S: WideString; AObject: TObject);
procedure LoadFromFile(const FileName: String); virtual;
procedure LoadFromStream(Stream: TStream); virtual;
procedure Move(CurIndex, NewIndex: Integer); virtual;
procedure SaveToFile(const FileName: String); virtual;
procedure SaveToStream(Stream: TStream); virtual;
procedure SetText(Text: PWideChar); virtual;
property Capacity: Integer read GetCapacity write SetCapacity;
property CommaText: WideString read GetCommaText write SetCommaText;
property Count: Integer read GetCount;
property Language: LCID read FLanguage write SetLanguage;
property Names[Index: Integer]: WideString read GetName;
property Objects[Index: Integer]: TObject read GetObject write PutObject;
property Values[const Name: WideString]: WideString read GetValue write SetValue;
property Saved: Boolean read FSaved;
property SaveUnicode: Boolean read FSaveUnicode write FSaveUnicode;
property Strings[Index: Integer]: WideString read Get write Put; default;
property Text: WideString read GetTextStr write SetTextStr;
property OnConfirmConversion: TConfirmConversionEvent read FOnConfirmConversion write FOnConfirmConversion;
end;
// TWideStringList class
TWideStringItem = record
FString: WideString;
FObject: TObject;
end;
TWideStringItemList = array of TWideStringItem;
TWideStringList = class(TWideStrings)
private
FList: TWideStringItemList;
FCount: Integer;
FSorted: Boolean;
FDuplicates: TDuplicates;
FOnChange: TNotifyEvent;
FOnChanging: TNotifyEvent;
procedure ExchangeItems(Index1, Index2: Integer);
procedure Grow;
procedure QuickSort(L, R: Integer);
procedure InsertItem(Index: Integer; const S: WideString);
procedure SetSorted(Value: Boolean);
protected
procedure Changed; virtual;
procedure Changing; virtual;
function Get(Index: Integer): WideString; override;
function GetCapacity: Integer; override;
function GetCount: Integer; override;
function GetObject(Index: Integer): TObject; override;
procedure Put(Index: Integer; const S: WideString); override;
procedure PutObject(Index: Integer; AObject: TObject); override;
procedure SetCapacity(NewCapacity: Integer); override;
procedure SetUpdateState(Updating: Boolean); override;
procedure SetLanguage(Value: LCID); override;
public
destructor Destroy; override;
function Add(const S: WideString): Integer; override;
procedure Clear; override;
procedure Delete(Index: Integer); override;
procedure Exchange(Index1, Index2: Integer); override;
function Find(const S: WideString; var Index: Integer): Boolean; virtual;
function IndexOf(const S: WideString): Integer; override;
procedure Insert(Index: Integer; const S: WideString); override;
procedure Sort; virtual;
property Duplicates: TDuplicates read FDuplicates write FDuplicates;
property Sorted: Boolean read FSorted write SetSorted;
property OnChange: TNotifyEvent read FOnChange write FOnChange;
property OnChanging: TNotifyEvent read FOnChanging write FOnChanging;
end;
// result type for number retrival functions
TUNumber = record
Numerator,
Denominator: Integer;
end;
// functions involving Null-terminated strings
// NOTE: PWideChars as well as WideStrings are NOT managed by reference counting (in opposition to 8 bit strings)!
function StrLenW(Str: PWideChar): Cardinal;
function StrEndW(Str: PWideChar): PWideChar;
function StrMoveW(Dest, Source: PWideChar; Count: Cardinal): PWideChar;
function StrCopyW(Dest, Source: PWideChar): PWideChar;
function StrECopyW(Dest, Source: PWideChar): PWideChar;
function StrLCopyW(Dest, Source: PWideChar; MaxLen: Cardinal): PWideChar;
function StrPCopyW(Dest: PWideChar; const Source: String): PWideChar;
function StrPLCopyW(Dest: PWideChar; const Source: String; MaxLen: Cardinal): PWideChar;
function StrCatW(Dest, Source: PWideChar): PWideChar;
function StrLCatW(Dest, Source: PWideChar; MaxLen: Cardinal): PWideChar;
function StrCompW(Str1, Str2: PWideChar): Integer;
function StrICompW(Str1, Str2: PWideChar): Integer;
function StrLCompW(Str1, Str2: PWideChar; MaxLen: Cardinal): Integer;
function StrLICompW(Str1, Str2: PWideChar; MaxLen: Cardinal): Integer;
function StrNScanW(S1, S2: PWideChar): Integer;
function StrRNScanW(S1, S2: PWideChar): Integer;
function StrScanW(Str: PWideChar; Chr: WideChar): PWideChar; overload;
function StrScanW(Str: PWideChar; Chr: WideChar; StrLen: Cardinal): PWideChar; overload;
function StrRScanW(Str: PWideChar; Chr: WideChar): PWideChar;
function StrPosW(Str, SubStr: PWideChar): PWideChar;
function StrUpperW(Str: PWideChar): PWideChar;
function StrLowerW(Str: PWideChar): PWideChar;
function StrTitleW(Str: PWideChar): PWideChar;
function StrAllocW(Size: Cardinal): PWideChar;
function StrBufSizeW(Str: PWideChar): Cardinal;
function StrNewW(Str: PWideChar): PWideChar;
procedure StrDisposeW(Str: PWideChar);
procedure StrSwapByteOrder(Str: PWideChar);
// functions involving Delphi wide strings
function WideAdjustLineBreaks(const S: WideString): WideString;
function WideCharPos(const S: WideString; const Ch: WideChar; const Index: Integer): Integer; //az
function WideCompose(const S: WideString): WideString;
function WideComposeHangul(Source: WideString): WideString;
function WideDecompose(const S: WideString): WideString;
function WideLoCase(C: WideChar): WideChar;
function WideLowerCase(const S: WideString): WideString;
function WideExtractQuotedStr(var Src: PWideChar; Quote: WideChar): WideString;
function WideQuotedStr(const S: WideString; Quote: WideChar): WideString;
function WideStringOfChar(C: WideChar; Count: Cardinal): WideString;
function WideTitleCaseChar(C: WideChar): WideChar;
function WideTitleCaseString(const S: WideString): WideString;
function WideTrim(const S: WideString): WideString;
function WideTrimLeft(const S: WideString): WideString;
function WideTrimRight(const S: WideString): WideString;
function WideUpCase(C: WideChar): WideChar;
function WideUpperCase(const S: WideString): WideString;
// low level character routines
function UnicodeGetDigit(Code: UCS4): Integer;
function UnicodeGetNumber(Code: UCS4): TUNumber;
function UnicodeToUpper(Code: UCS4): UCS4;
function UnicodeToLower(Code: UCS4): UCS4;
function UnicodeToTitle(Code: UCS4): UCS4;
// character test routines
function UnicodeIsAlpha(C: UCS4): Boolean;
function UnicodeIsDigit(C: UCS4): Boolean;
function UnicodeIsAlphaNum(C: UCS4): Boolean;
function UnicodeIsControl(C: UCS4): Boolean;
function UnicodeIsSpace(C: UCS4): Boolean;
function UnicodeIsWhiteSpace(C: UCS4): Boolean;
function UnicodeIsBlank(C: UCS4): Boolean;
function UnicodeIsPunctuation(C: UCS4): Boolean;
function UnicodeIsGraph(C: UCS4): Boolean;
function UnicodeIsPrintable(C: UCS4): Boolean;
function UnicodeIsUpper(C: UCS4): Boolean;
function UnicodeIsLower(C: UCS4): Boolean;
function UnicodeIsTitle(C: UCS4): Boolean;
function UnicodeIsHexDigit(C: UCS4): Boolean;
function UnicodeIsIsoControl(C: UCS4): Boolean;
function UnicodeIsFormatControl(C: UCS4): Boolean;
function UnicodeIsSymbol(C: UCS4): Boolean;
function UnicodeIsNumber(C: UCS4): Boolean;
function UnicodeIsNonSpacing(C: UCS4): Boolean;
function UnicodeIsOpenPunctuation(C: UCS4): Boolean;
function UnicodeIsClosePunctuation(C: UCS4): Boolean;
function UnicodeIsInitialPunctuation(C: UCS4): Boolean;
function UnicodeIsFinalPunctuation(C: UCS4): Boolean;
function UnicodeIsComposite(C: UCS4): Boolean;
function UnicodeIsQuotationMark(C: UCS4): Boolean;
function UnicodeIsSymmetric(C: UCS4): Boolean;
function UnicodeIsMirroring(C: UCS4): Boolean;
function UnicodeIsNonBreaking(C: UCS4): Boolean;
// Directionality functions
function UnicodeIsRTL(C: UCS4): Boolean;
function UnicodeIsLTR(C: UCS4): Boolean;
function UnicodeIsStrong(C: UCS4): Boolean;
function UnicodeIsWeak(C: UCS4): Boolean;
function UnicodeIsNeutral(C: UCS4): Boolean;
function UnicodeIsSeparator(C: UCS4): Boolean;
// Other character test functions
function UnicodeIsMark(C: UCS4): Boolean;
function UnicodeIsModifier(C: UCS4): Boolean;
function UnicodeIsLetterNumber(C: UCS4): Boolean;
function UnicodeIsConnectionPunctuation(C: UCS4): Boolean;
function UnicodeIsDash(C: UCS4): Boolean;
function UnicodeIsMath(C: UCS4): Boolean;
function UnicodeIsCurrency(C: UCS4): Boolean;
function UnicodeIsModifierSymbol(C: UCS4): Boolean;
function UnicodeIsNonSpacingMark(C: UCS4): Boolean;
function UnicodeIsSpacingMark(C: UCS4): Boolean;
function UnicodeIsEnclosing(C: UCS4): Boolean;
function UnicodeIsPrivate(C: UCS4): Boolean;
function UnicodeIsSurrogate(C: UCS4): Boolean;
function UnicodeIsLineSeparator(C: UCS4): Boolean;
function UnicodeIsParagraphSeparator(C: UCS4): Boolean;
function UnicodeIsIdenifierStart(C: UCS4): Boolean;
function UnicodeIsIdentifierPart(C: UCS4): Boolean;
function UnicodeIsDefined(C: UCS4): Boolean;
function UnicodeIsUndefined(C: UCS4): Boolean;
function UnicodeIsHan(C: UCS4): Boolean;
function UnicodeIsHangul(C: UCS4): Boolean;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -