📄 jclstrings.pas
字号:
{**************************************************************************************************}
{ }
{ Project JEDI Code Library (JCL) }
{ }
{ The contents of this file are subject to the Mozilla Public License Version 1.1 (the "License"); }
{ you may not use this file except in compliance with the License. You may obtain a copy of the }
{ License at http://www.mozilla.org/MPL/ }
{ }
{ Software distributed under the License is distributed on an "AS IS" basis, WITHOUT WARRANTY OF }
{ ANY KIND, either express or implied. See the License for the specific language governing rights }
{ and limitations under the License. }
{ }
{ The Original Code is JclStrings.pas. }
{ }
{ The Initial Developer of the Original Code is Marcel van Brakel. }
{ Portions created by Marcel van Brakel are Copyright (C) Marcel van Brakel. All rights reserved. }
{ }
{ Contributor(s): }
{ Alexander Radchenko }
{ Anthony Steele }
{ Azret Botash }
{ Barry Kelly }
{ Huanlin Tsai }
{ Jack N.A. Bakker }
{ Jean-Fabien Connault }
{ John C Molyneux }
{ Leonard Wennekers }
{ Martin Kimmings }
{ Martin Kubecka }
{ Massimo Maria Ghisalberti }
{ Matthias Thoma (mthoma) }
{ Michael Winter }
{ Nick Hodges }
{ Olivier Sannier }
{ Pelle F. S. Liljendal }
{ Petr Vones }
{ Robert Lee }
{ Robert Marquardt }
{ Robert Rossmair (rrossmair) }
{ }
{**************************************************************************************************}
{ }
{ Various character and string routines (searching, testing and transforming) }
{ }
{**************************************************************************************************}
// Last modified: $Date: 2005/03/08 16:10:08 $
// For history see end of file
unit JclStrings;
{$I jcl.inc}
interface
uses
Classes, SysUtils,
JclBase, JclWideStrings;
// Character constants and sets
const
// Misc. often used character definitions
AnsiNull = AnsiChar(#0);
AnsiSoh = AnsiChar(#1);
AnsiStx = AnsiChar(#2);
AnsiEtx = AnsiChar(#3);
AnsiEot = AnsiChar(#4);
AnsiEnq = AnsiChar(#5);
AnsiAck = AnsiChar(#6);
AnsiBell = AnsiChar(#7);
AnsiBackspace = AnsiChar(#8);
AnsiTab = AnsiChar(#9);
AnsiLineFeed = JclBase.AnsiLineFeed;
AnsiVerticalTab = AnsiChar(#11);
AnsiFormFeed = AnsiChar(#12);
AnsiCarriageReturn = JclBase.AnsiCarriageReturn;
AnsiCrLf = JclBase.AnsiCrLf;
AnsiSo = AnsiChar(#14);
AnsiSi = AnsiChar(#15);
AnsiDle = AnsiChar(#16);
AnsiDc1 = AnsiChar(#17);
AnsiDc2 = AnsiChar(#18);
AnsiDc3 = AnsiChar(#19);
AnsiDc4 = AnsiChar(#20);
AnsiNak = AnsiChar(#21);
AnsiSyn = AnsiChar(#22);
AnsiEtb = AnsiChar(#23);
AnsiCan = AnsiChar(#24);
AnsiEm = AnsiChar(#25);
AnsiEndOfFile = AnsiChar(#26);
AnsiEscape = AnsiChar(#27);
AnsiFs = AnsiChar(#28);
AnsiGs = AnsiChar(#29);
AnsiRs = AnsiChar(#30);
AnsiUs = AnsiChar(#31);
AnsiSpace = AnsiChar(' ');
AnsiComma = AnsiChar(',');
AnsiBackslash = AnsiChar('\');
AnsiForwardSlash = AnsiChar('/');
AnsiDoubleQuote = AnsiChar('"');
AnsiSingleQuote = AnsiChar('''');
AnsiLineBreak = JclBase.AnsiLineBreak;
// Misc. character sets
AnsiWhiteSpace = [AnsiTab, AnsiLineFeed, AnsiVerticalTab,
AnsiFormFeed, AnsiCarriageReturn, AnsiSpace];
AnsiSigns = ['-', '+'];
AnsiUppercaseLetters = JclBase.AnsiUppercaseLetters;
AnsiLowercaseLetters = JclBase.AnsiLowercaseLetters;
AnsiLetters = JclBase.AnsiLetters;
AnsiDecDigits = JclBase.AnsiDecDigits;
AnsiOctDigits = JclBase.AnsiOctDigits;
AnsiHexDigits = JclBase.AnsiHexDigits;
AnsiValidIdentifierLetters = JclBase.AnsiValidIdentifierLetters;
const
// CharType return values
C1_UPPER = $0001; // Uppercase
C1_LOWER = $0002; // Lowercase
C1_DIGIT = $0004; // Decimal digits
C1_SPACE = $0008; // Space characters
C1_PUNCT = $0010; // Punctuation
C1_CNTRL = $0020; // Control characters
C1_BLANK = $0040; // Blank characters
C1_XDIGIT = $0080; // Hexadecimal digits
C1_ALPHA = $0100; // Any linguistic character: alphabetic, syllabary, or ideographic
{$IFDEF MSWINDOWS}
{$IFDEF SUPPORTS_EXTSYM}
{$EXTERNALSYM C1_UPPER}
{$EXTERNALSYM C1_LOWER}
{$EXTERNALSYM C1_DIGIT}
{$EXTERNALSYM C1_SPACE}
{$EXTERNALSYM C1_PUNCT}
{$EXTERNALSYM C1_CNTRL}
{$EXTERNALSYM C1_BLANK}
{$EXTERNALSYM C1_XDIGIT}
{$EXTERNALSYM C1_ALPHA}
{$ENDIF SUPPORTS_EXTSYM}
{$ENDIF MSWINDOWS}
// String Test Routines
function StrIsAlpha(const S: AnsiString): Boolean;
function StrIsAlphaNum(const S: AnsiString): Boolean;
function StrIsAlphaNumUnderscore(const S: AnsiString): Boolean;
function StrContainsChars(const S: AnsiString; Chars: TSysCharSet; CheckAll: Boolean): Boolean;
function StrConsistsOfNumberChars(const S: AnsiString): Boolean;
function StrIsDigit(const S: AnsiString): Boolean;
function StrIsSubset(const S: AnsiString; const ValidChars: TSysCharSet): Boolean;
function StrSame(const S1, S2: AnsiString): Boolean;
// String Transformation Routines
function StrCenter(const S: AnsiString; L: Integer; C: AnsiChar = ' '): AnsiString;
function StrCharPosLower(const S: AnsiString; CharPos: Integer): AnsiString;
function StrCharPosUpper(const S: AnsiString; CharPos: Integer): AnsiString;
function StrDoubleQuote(const S: AnsiString): AnsiString;
function StrEnsureNoPrefix(const Prefix, Text: AnsiString): AnsiString;
function StrEnsureNoSuffix(const Suffix, Text: AnsiString): AnsiString;
function StrEnsurePrefix(const Prefix, Text: AnsiString): AnsiString;
function StrEnsureSuffix(const Suffix, Text: AnsiString): AnsiString;
function StrEscapedToString(const S: AnsiString): AnsiString;
function StrLower(const S: AnsiString): AnsiString;
procedure StrLowerInPlace(var S: AnsiString);
procedure StrLowerBuff(S: PAnsiChar);
procedure StrMove(var Dest: AnsiString; const Source: AnsiString; const ToIndex,
FromIndex, Count: Integer);
function StrPadLeft(const S: AnsiString; Len: Integer; C: AnsiChar = AnsiSpace ): AnsiString;
function StrPadRight(const S: AnsiString; Len: Integer; C: AnsiChar = AnsiSpace ): AnsiString;
function StrProper(const S: AnsiString): AnsiString;
procedure StrProperBuff(S: PAnsiChar);
function StrQuote(const S: AnsiString; C: AnsiChar): AnsiString;
function StrRemoveChars(const S: AnsiString; const Chars: TSysCharSet): AnsiString;
function StrKeepChars(const S: AnsiString; const Chars: TSysCharSet): AnsiString;
procedure StrReplace(var S: AnsiString; const Search, Replace: AnsiString; Flags: TReplaceFlags = []);
function StrReplaceChar(const S: AnsiString; const Source, Replace: Char): AnsiString;
function StrReplaceChars(const S: AnsiString; const Chars: TSysCharSet; Replace: Char): AnsiString;
function StrReplaceButChars(const S: AnsiString; const Chars: TSysCharSet; Replace: Char): AnsiString;
function StrRepeat(const S: AnsiString; Count: Integer): AnsiString;
function StrRepeatLength(const S: AnsiString; Const L: Integer): AnsiString;
function StrReverse(const S: AnsiString): AnsiString;
procedure StrReverseInPlace(var S: AnsiString);
function StrSingleQuote(const S: AnsiString): AnsiString;
function StrSmartCase(const S: AnsiString; Delimiters: TSysCharSet): AnsiString;
function StrStringToEscaped(const S: AnsiString): AnsiString;
function StrStripNonNumberChars(const S: AnsiString): AnsiString;
function StrToHex(const Source: AnsiString): AnsiString;
function StrTrimCharLeft(const S: AnsiString; C: AnsiChar): AnsiString;
function StrTrimCharsLeft(const S: AnsiString; const Chars: TSysCharSet): AnsiString;
function StrTrimCharRight(const S: AnsiString; C: AnsiChar): AnsiString;
function StrTrimCharsRight(const S: AnsiString; const Chars: TSysCharSet): AnsiString;
function StrTrimQuotes(const S: AnsiString): AnsiString;
function StrUpper(const S: AnsiString): AnsiString;
procedure StrUpperInPlace(var S: AnsiString);
procedure StrUpperBuff(S: PAnsiChar);
{$IFDEF WIN32}
function StrOemToAnsi(const S: AnsiString): AnsiString;
function StrAnsiToOem(const S: AnsiString): AnsiString;
{$ENDIF WIN32}
// String Management
procedure StrAddRef(var S: AnsiString);
function StrAllocSize(const S: AnsiString): Longint;
procedure StrDecRef(var S: AnsiString);
function StrLen(S: PChar): Integer;
function StrLength(const S: AnsiString): Longint;
function StrRefCount(const S: AnsiString): Longint;
procedure StrResetLength(var S: AnsiString);
// String Search and Replace Routines
function StrCharCount(const S: AnsiString; C: AnsiChar): Integer;
function StrCharsCount(const S: AnsiString; Chars: TSysCharSet): Integer;
function StrStrCount(const S, SubS: AnsiString): Integer;
function StrCompare(const S1, S2: AnsiString): Integer;
function StrCompareRange(const S1, S2: AnsiString; const Index, Count: Integer): Integer;
function StrFillChar(const C: AnsiChar; const Count: Integer): AnsiString;
function StrFind(const Substr, S: AnsiString; const Index: Integer = 1): Integer;
function StrHasPrefix(const S: AnsiString; const Prefixes: array of string): Boolean;
function StrIndex(const S: AnsiString; const List: array of AnsiString): Integer;
function StrILastPos(const SubStr, S: AnsiString): Integer;
function StrIPos(const SubStr, S: AnsiString): Integer;
function StrIsOneOf(const S: AnsiString; const List: array of AnsiString): Boolean;
function StrLastPos(const SubStr, S: AnsiString): Integer;
function StrMatch(const Substr, S: AnsiString; const Index: Integer = 1): Integer;
function StrMatches(const Substr, S: AnsiString; const Index: Integer = 1): Boolean;
function StrNIPos(const S, SubStr: AnsiString; N: Integer): Integer;
function StrNPos(const S, SubStr: AnsiString; N: Integer): Integer;
function StrPrefixIndex(const S: AnsiString; const Prefixes: array of string): Integer;
function StrSearch(const Substr, S: AnsiString; const Index: Integer = 1): Integer;
// String Extraction
function StrAfter(const SubStr, S: AnsiString): AnsiString;
function StrBefore(const SubStr, S: AnsiString): AnsiString;
function StrBetween(const S: AnsiString; const Start, Stop: AnsiChar): AnsiString;
function StrChopRight(const S: AnsiString; N: Integer): AnsiString;
function StrLeft(const S: AnsiString; Count: Integer): AnsiString;
function StrMid(const S: AnsiString; Start, Count: Integer): AnsiString;
function StrRestOf(const S: AnsiString; N: Integer): AnsiString;
function StrRight(const S: AnsiString; Count: Integer): AnsiString;
// Character Test Routines
function CharEqualNoCase(const C1, C2: AnsiChar): Boolean;
function CharIsAlpha(const C: AnsiChar): Boolean;
function CharIsAlphaNum(const C: AnsiChar): Boolean;
function CharIsBlank(const C: AnsiChar): Boolean;
function CharIsControl(const C: AnsiChar): Boolean;
function CharIsDelete(const C: AnsiChar): Boolean;
function CharIsDigit(const C: AnsiChar): Boolean;
function CharIsLower(const C: AnsiChar): Boolean;
function CharIsNumberChar(const C: AnsiChar): Boolean;
function CharIsPrintable(const C: AnsiChar): Boolean;
function CharIsPunctuation(const C: AnsiChar): Boolean;
function CharIsReturn(const C: AnsiChar): Boolean;
function CharIsSpace(const C: AnsiChar): Boolean;
function CharIsUpper(const C: AnsiChar): Boolean;
function CharIsWhiteSpace(const C: AnsiChar): Boolean;
function CharType(const C: AnsiChar): Word;
// Character Transformation Routines
function CharHex(const C: AnsiChar): Byte;
function CharLower(const C: AnsiChar): AnsiChar;
function CharUpper(const C: AnsiChar): AnsiChar;
function CharToggleCase(const C: AnsiChar): AnsiChar;
// Character Search and Replace
function CharPos(const S: AnsiString; const C: AnsiChar; const Index: Integer = 1): Integer;
function CharLastPos(const S: AnsiString; const C: AnsiChar; const Index: Integer = 1): Integer;
function CharIPos(const S: AnsiString; const C: AnsiChar; const Index: Integer = 1 ): Integer;
function CharReplace(var S: AnsiString; const Search, Replace: AnsiChar): Integer;
// PCharVector
type
PCharVector = ^PChar;
function StringsToPCharVector(var Dest: PCharVector; const Source: TStrings): PCharVector;
function PCharVectorCount(Source: PCharVector): Integer;
procedure PCharVectorToStrings(const Dest: TStrings; Source: PCharVector);
procedure FreePCharVector(var Dest: PCharVector);
// MultiSz Routines
type
PMultiSz = PChar;
PWideMultiSz = PWideChar;
function StringsToMultiSz(var Dest: PMultiSz; const Source: TStrings): PMultiSz;
procedure MultiSzToStrings(const Dest: TStrings; const Source: PMultiSz);
function MultiSzLength(const Source: PMultiSz): Integer;
procedure AllocateMultiSz(var Dest: PMultiSz; Len: Integer);
procedure FreeMultiSz(var Dest: PMultiSz);
function MultiSzDup(const Source: PMultiSz): PMultiSz;
function WideStringsToWideMultiSz(var Dest: PWideMultiSz; const Source: TWideStrings): PWideMultiSz;
procedure WideMultiSzToWideStrings(const Dest: TWideStrings; const Source: PWideMultiSz);
function WideMultiSzLength(const Source: PWideMultiSz): Integer;
procedure AllocateWideMultiSz(var Dest: PWideMultiSz; Len: Integer);
procedure FreeWideMultiSz(var Dest: PWideMultiSz);
function WideMultiSzDup(const Source: PWideMultiSz): PWideMultiSz;
// TStrings Manipulation
procedure StrIToStrings(S, Sep: AnsiString; const List: TStrings; const AllowEmptyString: Boolean = True);
procedure StrToStrings(S, Sep: AnsiString; const List: TStrings; const AllowEmptyString: Boolean = True);
function StringsToStr(const List: TStrings; const Sep: AnsiString; const AllowEmptyString: Boolean = True): AnsiString;
procedure TrimStrings(const List: TStrings; DeleteIfEmpty: Boolean = True );
procedure TrimStringsRight(const List: TStrings; DeleteIfEmpty: Boolean = True);
procedure TrimStringsLeft(const List: TStrings; DeleteIfEmpty: Boolean = True );
function AddStringToStrings(const S: string; Strings: TStrings; const Unique: Boolean): Boolean;
// Miscellaneous
function BooleanToStr(B: Boolean): AnsiString;
function FileToString(const FileName: AnsiString): AnsiString;
procedure StringToFile(const FileName, Contents: AnsiString);
function StrToken(var S: AnsiString; Separator: AnsiChar): AnsiString;
procedure StrTokens(const S: AnsiString; const List: TStrings);
procedure StrTokenToStrings(S: AnsiString; Separator: AnsiChar; const List: TStrings);
function StrWord(var S: PAnsiChar; out Word: AnsiString): Boolean;
function StrToFloatSafe(const S: AnsiString): Float;
function StrToIntSafe(const S: AnsiString): Integer;
procedure StrNormIndex(const StrLen: integer; var Index: integer; var Count: integer); overload;
// Exceptions
type
EJclStringError = EJclError;
implementation
uses
{$IFDEF MSWINDOWS}
Windows,
{$ENDIF MSWINDOWS}
{$IFDEF HAS_UNIT_LIBC}
Libc,
{$ENDIF HAS_UNIT_LIBC}
JclSysUtils, JclLogic, JclResources;
//=== Internal ===============================================================
type
TAnsiStrRec = packed record
AllocSize: Longint;
RefCount: Longint;
Length: Longint;
end;
const
AnsiStrRecSize = SizeOf(TAnsiStrRec); // size of the AnsiString header rec
AnsiCharCount = Ord(High(AnsiChar)) + 1; // # of chars in one set
AnsiLoOffset = AnsiCharCount * 0; // offset to lower case chars
AnsiUpOffset = AnsiCharCount * 1; // offset to upper case chars
AnsiReOffset = AnsiCharCount * 2; // offset to reverse case chars
AnsiAlOffset = 12; // offset to AllocSize in StrRec
AnsiRfOffset = 8; // offset to RefCount in StrRec
AnsiLnOffset = 4; // offset to Length in StrRec
AnsiCaseMapSize = AnsiCharCount * 3; // # of chars is a table
var
AnsiCaseMap: array [0..AnsiCaseMapSize - 1] of AnsiChar; // case mappings
AnsiCaseMapReady: Boolean = False; // true if case map exists
AnsiCharTypes: array [AnsiChar] of Word;
procedure LoadCharTypes;
var
CurrChar: AnsiChar;
CurrType: Word;
begin
for CurrChar := Low(AnsiChar) to High(AnsiChar) do
begin
{$IFDEF MSWINDOWS}
GetStringTypeExA(LOCALE_USER_DEFAULT, CT_CTYPE1, @CurrChar, SizeOf(AnsiChar), CurrType);
{$DEFINE CHAR_TYPES_INITIALIZED}
{$ENDIF MSWINDOWS}
{$IFDEF LINUX}
CurrType := 0;
if isupper(Byte(CurrChar)) <> 0 then
CurrType := CurrType or C1_UPPER;
if islower(Byte(CurrChar)) <> 0 then
CurrType := CurrType or C1_LOWER;
if isdigit(Byte(CurrChar)) <> 0 then
CurrType := CurrType or C1_DIGIT;
if isspace(Byte(CurrChar)) <> 0 then
CurrType := CurrType or C1_SPACE;
if ispunct(Byte(CurrChar)) <> 0 then
CurrType := CurrType or C1_PUNCT;
if iscntrl(Byte(CurrChar)) <> 0 then
CurrType := CurrType or C1_CNTRL;
if isblank(Byte(CurrChar)) <> 0 then
CurrType := CurrType or C1_BLANK;
if isxdigit(Byte(CurrChar)) <> 0 then
CurrType := CurrType or C1_XDIGIT;
if isalpha(Byte(CurrChar)) <> 0 then
CurrType := CurrType or C1_ALPHA;
{$DEFINE CHAR_TYPES_INITIALIZED}
{$ENDIF LINUX}
AnsiCharTypes[CurrChar] := CurrType;
{$IFNDEF CHAR_TYPES_INITIALIZED}
Implement case map initialization here
{$ENDIF ~CHAR_TYPES_INITIALIZED}
end;
end;
procedure LoadCaseMap;
var
CurrChar, UpCaseChar, LoCaseChar, ReCaseChar: AnsiChar;
begin
if not AnsiCaseMapReady then
begin
for CurrChar := Low(AnsiChar) to High(AnsiChar) do
begin
{$IFDEF MSWINDOWS}
LoCaseChar := CurrChar;
UpCaseChar := CurrChar;
Windows.CharLowerBuff(@LoCaseChar, 1);
Windows.CharUpperBuff(@UpCaseChar, 1);
{$DEFINE CASE_MAP_INITIALIZED}
{$ENDIF MSWINDOWS}
{$IFDEF LINUX}
LoCaseChar := AnsiChar(tolower(Byte(CurrChar)));
UpCaseChar := AnsiChar(toupper(Byte(CurrChar)));
{$DEFINE CASE_MAP_INITIALIZED}
{$ENDIF LINUX}
{$IFNDEF CASE_MAP_INITIALIZED}
Implement case map initialization here
{$ENDIF ~CASE_MAP_INITIALIZED}
if CharIsUpper(CurrChar) then
ReCaseChar := LoCaseChar
else
if CharIsLower(CurrChar) then
ReCaseChar := UpCaseChar
else
ReCaseChar := CurrChar;
AnsiCaseMap[Ord(CurrChar) + AnsiLoOffset] := LoCaseChar;
AnsiCaseMap[Ord(CurrChar) + AnsiUpOffset] := UpCaseChar;
AnsiCaseMap[Ord(CurrChar) + AnsiReOffset] := ReCaseChar;
end;
AnsiCaseMapReady := True;
end;
end;
// Uppercases or Lowercases a give AnsiString depending on the
// passed offset. (UpOffset or LoOffset)
procedure StrCase(var Str: AnsiString; const Offset: Integer); register; assembler;
asm
// make sure that the string is not null
TEST EAX, EAX
JZ @@StrIsNull
// create unique string if this one is ref-counted
PUSH EDX
CALL UniqueString
POP EDX
// make sure that the new string is not null
TEST EAX, EAX
JZ @@StrIsNull
// get the length, and prepare the counter
MOV ECX, [EAX - AnsiStrRecSize].TAnsiStrRec.Length
DEC ECX
JS @@StrIsNull
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -