📄 ucharacter.pas
字号:
{*******************************************************}
{ }
{ CodeGear Delphi Runtime Library }
{ }
{ Copyright (c) 1995-2008 CodeGear }
{ }
{*******************************************************}
unit uCharacter;
interface
uses SysUtils;
resourcestring
sArgumentOutOfRange_InvalidHighSurrogate = 'A valid high surrogate character is >= $D800 and <= $DBFF';
sArgumentOutOfRange_InvalidLowSurrogate = 'A valid low surrogate character is >= $DC00 and <= $DFFF';
sArgumentOutOfRange_Index = 'Index out of range (%d). Must be >= 0 and < %d';
sArgumentOutOfRange_StringIndex = 'String index out of range (%d). Must be >= 1 and <= %d';
sArgumentOutOfRange_InvalidUTF32 = 'Invalid UTF32 character value. Must be >= 0 and <= $10FFF, excluding surrogate pair ranges';
sArgument_InvalidHighSurrogate = 'High surrogate char without a following low surrogate char at index: %d. Check that the string is encoded properly';
sArgument_InvalidLowSurrogate = 'Low surrogate char without a preceding high surrogate char at index: %d. Check that the string is encoded properly';
sNoConstruct = 'Class %s is not intended to be constructed';
//{$SCOPEDENUMS ON}
type
EArgumentException = class(Exception);
EArgumentOutOfRangeException = class(EArgumentException);
ENoConstructException = class(Exception);
type
TUnicodeCategory = (
ucControl,
ucFormat,
ucUnassigned,
ucPrivateUse,
ucSurrogate,
ucLowercaseLetter,
ucModifierLetter,
ucOtherLetter,
ucTitlecaseLetter,
ucUppercaseLetter,
ucCombiningMark,
ucEnclosingMark,
ucNonSpacingMark,
ucDecimalNumber,
ucLetterNumber,
ucOtherNumber,
ucConnectPunctuation,
ucDashPunctuation,
ucClosePunctuation,
ucFinalPunctuation,
ucInitialPunctuation,
ucOtherPunctuation,
ucOpenPunctuation,
ucCurrencySymbol,
ucModifierSymbol,
ucMathSymbol,
ucOtherSymbol,
ucLineSeparator,
ucParagraphSeparator,
ucSpaceSeparator
);
TUnicodeBreak = (
ubMandatory,
ubCarriageReturn,
ubLineFeed,
ubCombiningMark,
ubSurrogate,
ubZeroWidthSpace,
ubInseparable,
ubNonBreakingGlue,
ubContingent,
ubSpace,
ubAfter,
ubBefore,
ubBeforeAndAfter,
ubHyphen,
ubNonStarter,
ubOpenPunctuation,
ubClosePunctuation,
ubQuotation,
ubExclamation,
ubIdeographic,
ubNumeric,
ubInfixSeparator,
ubSymbol,
ubAlphabetic,
ubPrefix,
ubPostfix,
ubComplexContext,
ubAmbiguous,
ubUnknown,
ubNextLine,
ubWordJoiner,
ubHangulLJamo,
ubHangulVJamo,
ubHangulTJamo,
ubHangulLvSyllable,
ubHangulLvtSyllable
);
type
TCharacter = class sealed
private
class procedure Initialize; static;
class function IsLatin1(C: WideChar): Boolean; inline; static;
class function IsAscii(C: WideChar): Boolean; inline; static;
class function CheckLetter(uc: TUnicodeCategory): Boolean; inline; static;
class function CheckLetterOrDigit(uc: TUnicodeCategory): Boolean; inline; static;
class function CheckNumber(uc: TUnicodeCategory): Boolean; inline; static;
class function CheckPunctuation(uc: TUnicodeCategory): Boolean; inline; static;
class function CheckSymbol(uc: TUnicodeCategory): Boolean; inline; static;
class function CheckSeparator(uc: TUnicodeCategory): Boolean; inline; static;
public
constructor Create;
class function ConvertFromUtf32(C: UCS4Char): Widestring; static;
class function ConvertToUtf32(const S: Widestring; Index: Integer): UCS4Char; overload; inline; static;
class function ConvertToUtf32(const S: Widestring; Index: Integer; out CharLength: Integer): UCS4Char; overload; static;
class function ConvertToUtf32(const HighSurrogate, LowSurrogate: WideChar): UCS4Char; overload; static;
class function GetNumericValue(C: WideChar): Double; overload; static;
class function GetNumericValue(const S: Widestring; Index: Integer): Double; overload; static;
class function GetUnicodeCategory(C: WideChar): TUnicodeCategory; overload; static;
class function GetUnicodeCategory(const S: Widestring; Index: Integer): TUnicodeCategory; overload; static;
class function IsControl(C: WideChar): Boolean; overload; static;
class function IsControl(const S: Widestring; Index: Integer): Boolean; overload; static;
class function IsDigit(C: WideChar): Boolean; overload; static;
class function IsDigit(const S: Widestring; Index: Integer): Boolean; overload; static;
class function IsHighSurrogate(C: WideChar): Boolean; overload; inline; static;
class function IsHighSurrogate(const S: Widestring; Index: Integer): Boolean; overload; inline; static;
class function IsLetter(C: WideChar): Boolean; overload; static;
class function IsLetter(const S: Widestring; Index: Integer): Boolean; overload; static;
class function IsLetterOrDigit(C: WideChar): Boolean; overload; static;
class function IsLetterOrDigit(const S: Widestring; Index: Integer): Boolean; overload; static;
class function IsLower(C: WideChar): Boolean; overload; static;
class function IsLower(const S: Widestring; Index: Integer): Boolean; overload; static;
class function IsLowSurrogate(C: WideChar): Boolean; overload; inline; static;
class function IsLowSurrogate(const S: Widestring; Index: Integer): Boolean; overload; inline; static;
class function IsNumber(C: WideChar): Boolean; overload; static;
class function IsNumber(const S: Widestring; Index: Integer): Boolean; overload; static;
class function IsPunctuation(C: WideChar): Boolean; overload; static;
class function IsPunctuation(const S: Widestring; Index: Integer): Boolean; overload; static;
class function IsSeparator(C: WideChar): Boolean; overload; static;
class function IsSeparator(const S: Widestring; Index: Integer): Boolean; overload; static;
class function IsSurrogate(Surrogate: WideChar): Boolean; overload; inline; static;
class function IsSurrogate(const S: Widestring; Index: Integer): Boolean; overload; static;
class function IsSurrogatePair(const HighSurrogate, LowSurrogate: WideChar): Boolean; overload; inline; static;
class function IsSurrogatePair(const S: Widestring; Index: Integer): Boolean; overload; static;
class function IsSymbol(C: WideChar): Boolean; overload; static;
class function IsSymbol(const S: Widestring; Index: Integer): Boolean; overload; static;
class function IsUpper(C: WideChar): Boolean; overload; static;
class function IsUpper(const S: Widestring; Index: Integer): Boolean; overload; static;
class function IsWhiteSpace(C: WideChar): Boolean; overload; static;
class function IsWhiteSpace(const S: Widestring; Index: Integer): Boolean; overload; static;
class function ToLower(C: WideChar): WideChar; overload; static;
class function ToLower(const S: Widestring): Widestring; overload; static;
class function ToUpper(C: WideChar): WideChar; overload; static;
class function ToUpper(const S: Widestring): Widestring; overload; static;
end;
function ConvertFromUtf32(C: UCS4Char): Widestring; inline;
function ConvertToUtf32(const S: Widestring; Index: Integer): UCS4Char; overload; inline;
function ConvertToUtf32(const S: Widestring; Index: Integer; out CharLength: Integer): UCS4Char; overload; inline;
function ConvertToUtf32(const HighSurrogate, LowSurrogate: WideChar): UCS4Char; overload; inline;
function GetNumericValue(C: WideChar): Double; overload; inline;
function GetNumericValue(const S: Widestring; Index: Integer): Double; overload; inline;
function GetUnicodeCategory(C: WideChar): TUnicodeCategory; overload; inline;
function GetUnicodeCategory(const S: Widestring; Index: Integer): TUnicodeCategory; overload; inline;
function IsControl(C: WideChar): Boolean; overload; inline;
function IsControl(const S: Widestring; Index: Integer): Boolean; overload; inline;
function IsDigit(C: WideChar): Boolean; overload; inline;
function IsDigit(const S: Widestring; Index: Integer): Boolean; overload; inline;
function IsHighSurrogate(C: WideChar): Boolean; overload; inline;
function IsHighSurrogate(const S: Widestring; Index: Integer): Boolean; overload; inline;
function IsLetter(C: WideChar): Boolean; overload; inline;
function IsLetter(const S: Widestring; Index: Integer): Boolean; overload; inline;
function IsLetterOrDigit(C: WideChar): Boolean; overload; inline;
function IsLetterOrDigit(const S: Widestring; Index: Integer): Boolean; overload; inline;
function IsLower(C: WideChar): Boolean; overload; inline;
function IsLower(const S: Widestring; Index: Integer): Boolean; overload; inline;
function IsLowSurrogate(C: WideChar): Boolean; overload; inline;
function IsLowSurrogate(const S: Widestring; Index: Integer): Boolean; overload; inline;
function IsNumber(C: WideChar): Boolean; overload; inline;
function IsNumber(const S: Widestring; Index: Integer): Boolean; overload; inline;
function IsPunctuation(C: WideChar): Boolean; overload; inline;
function IsPunctuation(const S: Widestring; Index: Integer): Boolean; overload; inline;
function IsSeparator(C: WideChar): Boolean; overload; inline;
function IsSeparator(const S: Widestring; Index: Integer): Boolean; overload; inline;
function IsSurrogate(Surrogate: WideChar): Boolean; overload; inline;
function IsSurrogate(const S: Widestring; Index: Integer): Boolean; overload; inline;
function IsSurrogatePair(const HighSurrogate, LowSurrogate: WideChar): Boolean; overload; inline;
function IsSurrogatePair(const S: Widestring; Index: Integer): Boolean; overload; inline;
function IsSymbol(C: WideChar): Boolean; overload; inline;
function IsSymbol(const S: Widestring; Index: Integer): Boolean; overload; inline;
function IsUpper(C: WideChar): Boolean; overload; inline;
function IsUpper(const S: Widestring; Index: Integer): Boolean; overload; inline;
function IsWhiteSpace(C: WideChar): Boolean; overload; inline;
function IsWhiteSpace(const S: Widestring; Index: Integer): Boolean; overload; inline;
function ToLower(C: WideChar): WideChar; overload; inline;
function ToLower(const S: Widestring): Widestring; overload; inline;
function ToUpper(C: WideChar): WideChar; overload; inline;
function ToUpper(const S: Widestring): Widestring; overload; inline;
implementation
uses Windows;
{$RESOURCE 'uCharacter.res'}
type
TIndexArray = array[0..32767] of Word;
PIndexArray = ^TIndexArray;
TCategoryArray = array[0..65535] of TUnicodeCategory;
PCategoryArray = ^TCategoryArray;
TNumberArray = array[0..4095] of Double;
PNumberArray = ^TNumberArray;
PDataTableOffsets = ^TDataTableOffsets;
TDataTableOffsets = record
IndexTable1Offset: Integer;
IndexTable2Offset: Integer;
DataTableOffset: Integer;
NumberIndex1Offset: Integer;
NumberIndex2Offset: Integer;
NumberDataOffset: Integer;
end;
var
DataTable: Pointer;
CatIndexPrimary: PIndexArray;
CatIndexSecondary: PIndexArray;
CategoryTable: PCategoryArray;
NumIndexPrimary: PIndexArray;
NumIndexSecondary: PIndexArray;
NumericValueTable: PNumberArray;
{ TCharacter }
function InternalGetUnicodeCategory(C: UCS4Char): TUnicodeCategory; inline;
begin
if CategoryTable = nil then
TCharacter.Initialize;
Result := CategoryTable[CatIndexSecondary[CatIndexPrimary[C shr 8] + ((C shr 4) and $F)] + C and $F];
end;
function NumberValue(C: UCS4Char): Double; inline;
begin
if NumericValueTable = nil then
TCharacter.Initialize;
Result := NumericValueTable[NumIndexSecondary[NumIndexPrimary[C shr 8] + ((C shr 4) and $F)] + C and $F];
end;
const
Latin1Categories: array[0..255] of TUnicodeCategory =
( ucControl, ucControl,
ucControl, ucControl,
ucControl, ucControl,
ucControl, ucControl,
ucControl, ucControl,
ucControl, ucControl,
ucControl, ucControl,
ucControl, ucControl,
ucControl, ucControl,
ucControl, ucControl,
ucControl, ucControl,
ucControl, ucControl,
ucControl, ucControl,
ucControl, ucControl,
ucControl, ucControl,
ucControl, ucControl,
ucSpaceSeparator, ucOtherPunctuation,
ucOtherPunctuation,
ucOtherPunctuation, ucCurrencySymbol,
ucOtherPunctuation,
ucOtherPunctuation,
ucOtherPunctuation,
ucOpenPunctuation,
ucClosePunctuation,
ucOtherPunctuation, ucMathSymbol,
ucOtherPunctuation,
ucDashPunctuation,
ucOtherPunctuation,
ucOtherPunctuation, ucDecimalNumber,
ucDecimalNumber, ucDecimalNumber,
ucDecimalNumber, ucDecimalNumber,
ucDecimalNumber, ucDecimalNumber,
ucDecimalNumber, ucDecimalNumber,
ucDecimalNumber, ucOtherPunctuation,
ucOtherPunctuation, ucMathSymbol,
ucMathSymbol, ucMathSymbol,
ucOtherPunctuation,
ucOtherPunctuation,
ucUppercaseLetter, ucUppercaseLetter,
ucUppercaseLetter, ucUppercaseLetter,
ucUppercaseLetter, ucUppercaseLetter,
ucUppercaseLetter, ucUppercaseLetter,
ucUppercaseLetter, ucUppercaseLetter,
ucUppercaseLetter, ucUppercaseLetter,
ucUppercaseLetter, ucUppercaseLetter,
ucUppercaseLetter, ucUppercaseLetter,
ucUppercaseLetter, ucUppercaseLetter,
ucUppercaseLetter, ucUppercaseLetter,
ucUppercaseLetter, ucUppercaseLetter,
ucUppercaseLetter, ucUppercaseLetter,
ucUppercaseLetter, ucUppercaseLetter,
ucOpenPunctuation,
ucOtherPunctuation,
ucClosePunctuation, ucModifierSymbol,
ucConnectPunctuation,
ucModifierSymbol, ucLowercaseLetter,
ucLowercaseLetter, ucLowercaseLetter,
ucLowercaseLetter, ucLowercaseLetter,
ucLowercaseLetter, ucLowercaseLetter,
ucLowercaseLetter, ucLowercaseLetter,
ucLowercaseLetter, ucLowercaseLetter,
ucLowercaseLetter, ucLowercaseLetter,
ucLowercaseLetter, ucLowercaseLetter,
ucLowercaseLetter, ucLowercaseLetter,
ucLowercaseLetter, ucLowercaseLetter,
ucLowercaseLetter, ucLowercaseLetter,
ucLowercaseLetter, ucLowercaseLetter,
ucLowercaseLetter, ucLowercaseLetter,
ucLowercaseLetter, ucOpenPunctuation,
ucMathSymbol, ucClosePunctuation,
ucMathSymbol, ucControl,
ucControl, ucControl,
ucControl, ucControl,
ucControl, ucControl,
ucControl, ucControl,
ucControl, ucControl,
ucControl, ucControl,
ucControl, ucControl,
ucControl, ucControl,
ucControl, ucControl,
ucControl, ucControl,
ucControl, ucControl,
ucControl, ucControl,
ucControl, ucControl,
ucControl, ucControl,
ucControl, ucControl,
ucControl, ucControl,
ucSpaceSeparator, ucOtherPunctuation,
ucCurrencySymbol, ucCurrencySymbol,
ucCurrencySymbol, ucCurrencySymbol,
ucOtherSymbol, ucOtherSymbol,
ucModifierSymbol, ucOtherSymbol,
ucLowercaseLetter,
ucInitialPunctuation, ucMathSymbol,
ucDashPunctuation, ucOtherSymbol,
ucModifierSymbol, ucOtherSymbol,
ucMathSymbol, ucOtherNumber,
ucOtherNumber, ucModifierSymbol,
ucLowercaseLetter, ucOtherSymbol,
ucOtherPunctuation, ucModifierSymbol,
ucOtherNumber, ucLowercaseLetter,
ucFinalPunctuation, ucOtherNumber,
ucOtherNumber, ucOtherNumber,
ucOtherPunctuation,
ucUppercaseLetter, ucUppercaseLetter,
ucUppercaseLetter, ucUppercaseLetter,
ucUppercaseLetter, ucUppercaseLetter,
ucUppercaseLetter, ucUppercaseLetter,
ucUppercaseLetter, ucUppercaseLetter,
ucUppercaseLetter, ucUppercaseLetter,
ucUppercaseLetter, ucUppercaseLetter,
ucUppercaseLetter, ucUppercaseLetter,
ucUppercaseLetter, ucUppercaseLetter,
ucUppercaseLetter, ucUppercaseLetter,
ucUppercaseLetter, ucUppercaseLetter,
ucUppercaseLetter, ucMathSymbol,
ucUppercaseLetter, ucUppercaseLetter,
ucUppercaseLetter, ucUppercaseLetter,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -