📄 ucharacter.pas
字号:
ucUppercaseLetter, ucUppercaseLetter,
ucUppercaseLetter, ucLowercaseLetter,
ucLowercaseLetter, ucLowercaseLetter,
ucLowercaseLetter, ucLowercaseLetter,
ucLowercaseLetter, ucLowercaseLetter,
ucLowercaseLetter, ucLowercaseLetter,
ucLowercaseLetter, ucLowercaseLetter,
ucLowercaseLetter, ucLowercaseLetter,
ucLowercaseLetter, ucLowercaseLetter,
ucLowercaseLetter, ucLowercaseLetter,
ucLowercaseLetter, ucLowercaseLetter,
ucLowercaseLetter, ucLowercaseLetter,
ucLowercaseLetter, ucLowercaseLetter,
ucLowercaseLetter, ucMathSymbol,
ucLowercaseLetter, ucLowercaseLetter,
ucLowercaseLetter, ucLowercaseLetter,
ucLowercaseLetter, ucLowercaseLetter,
ucLowercaseLetter, ucLowercaseLetter
);
function InternalGetLatin1Category(C: WideChar): TUnicodeCategory; inline;
begin
Result := Latin1Categories[Byte(C)];
end;
procedure CheckStringRange(const S: Widestring; Index: Integer); inline;
begin
if (Index > Length(S)) or (Index < 1) then
raise EArgumentOutOfRangeException.CreateResFmt(@sArgumentOutOfRange_StringIndex, [Index, Length(S)]);
end;
class function TCharacter.CheckLetter(uc: TUnicodeCategory): Boolean;
begin
case uc of
ucUppercaseLetter,
ucLowercaseLetter,
ucTitlecaseLetter,
ucModifierLetter,
ucOtherLetter: Result := True;
else
Result := False;
end;
end;
class function TCharacter.CheckLetterOrDigit(uc: TUnicodeCategory): Boolean;
begin
case uc of
ucUppercaseLetter,
ucLowercaseLetter,
ucTitlecaseLetter,
ucModifierLetter,
ucOtherLetter,
ucDecimalNumber: Result := True;
else
Result := False;
end;
end;
class function TCharacter.CheckNumber(uc: TUnicodeCategory): Boolean;
begin
case uc of
ucOtherNumber,
ucLetterNumber,
ucDecimalNumber: Result := True;
else
Result := False;
end;
end;
class function TCharacter.CheckPunctuation(uc: TUnicodeCategory): Boolean;
begin
case uc of
ucConnectPunctuation,
ucDashPunctuation,
ucClosePunctuation,
ucFinalPunctuation,
ucInitialPunctuation,
ucOtherPunctuation,
ucOpenPunctuation: Result := True;
else
Result := False;
end;
end;
class function TCharacter.CheckSeparator(uc: TUnicodeCategory): Boolean;
begin
case uc of
ucLineSeparator,
ucParagraphSeparator,
ucSpaceSeparator: Result := True;
else
Result := False;
end;
end;
class function TCharacter.CheckSymbol(uc: TUnicodeCategory): Boolean;
begin
case uc of
ucCurrencySymbol,
ucModifierSymbol,
ucMathSymbol,
ucOtherSymbol: Result := True;
else
Result := False;
end;
end;
class function TCharacter.IsLatin1(C: WideChar): Boolean;
begin
Result := Integer(C) <= $FF;
end;
class function TCharacter.IsLetter(C: WideChar): Boolean;
begin
if not IsLatin1(C) then
Result := CheckLetter(InternalGetUnicodeCategory(UCS4Char(C)))
else if not IsAscii(C) then
Result := CheckLetter(InternalGetLatin1Category(C))
else
begin
C := WideChar(Integer(C) or Ord(' '));
Result := (C >= 'a') and (C <= 'z');
end;
end;
class function TCharacter.IsLetter(const S: Widestring; Index: Integer): Boolean;
begin
CheckStringRange(S, Index);
Result := IsLetter(S[Index]);
end;
class function TCharacter.IsLetterOrDigit(const S: Widestring; Index: Integer): Boolean;
var
C: WideChar;
begin
CheckStringRange(S, Index);
C := S[Index];
if IsLatin1(C) then
Result := CheckLetterOrDigit(InternalGetLatin1Category(C))
else
Result := CheckLetterOrDigit(GetUnicodeCategory(S, Index));
end;
class function TCharacter.IsLetterOrDigit(C: WideChar): Boolean;
begin
if IsLatin1(C) then
Result := CheckLetterOrDigit(InternalGetLatin1Category(C))
else
Result := CheckLetterOrDigit(InternalGetUnicodeCategory(UCS4Char(C)));
end;
class function TCharacter.IsAscii(C: WideChar): Boolean;
begin
Result := Integer(C) <= $7F;
end;
class function TCharacter.IsControl(const S: Widestring; Index: Integer): Boolean;
var
C: WideChar;
begin
CheckStringRange(S, Index);
C := S[Index];
if IsLatin1(C) then
Result := InternalGetLatin1Category(C) = ucControl
else
Result := GetUnicodeCategory(S, Index) = ucControl;
end;
class function TCharacter.IsControl(C: WideChar): Boolean;
begin
if IsLatin1(C) then
Result := InternalGetLatin1Category(C) = ucControl
else
Result := InternalGetUnicodeCategory(UCS4Char(C)) = ucControl;
end;
class function TCharacter.IsDigit(C: WideChar): Boolean;
begin
if not IsLatin1(C) then
Result := InternalGetUnicodeCategory(UCS4Char(C)) = ucDecimalNumber
else
Result := (C >= '0') and (C <= '9');
end;
class function TCharacter.IsDigit(const S: Widestring; Index: Integer): Boolean;
var
C: WideChar;
begin
CheckStringRange(S, Index);
C := S[Index];
if not IsLatin1(C) then
Result := GetUnicodeCategory(S, Index) = ucDecimalNumber
else
Result := (C >= '0') and (C <= '9');
end;
class function TCharacter.IsHighSurrogate(C: WideChar): Boolean;
begin
Result := (Integer(C) >= $D800) and (Integer(C) <= $DBFF);
end;
class function TCharacter.IsLowSurrogate(C: WideChar): Boolean;
begin
Result := (Integer(C) >= $DC00) and (Integer(C) <= $DFFF);
end;
class function TCharacter.IsSurrogate(Surrogate: WideChar): Boolean;
begin
Result := (Integer(Surrogate) >= $D800) and (Integer(Surrogate) <= $DFFF);
end;
class function TCharacter.IsSurrogatePair(const HighSurrogate, LowSurrogate: WideChar): Boolean;
begin
Result := (Integer(HighSurrogate) >= $D800) and (Integer(HighSurrogate) <= $DBFF) and
(Integer(LowSurrogate) >= $DC00) and (Integer(LowSurrogate) <= $DFFF);
end;
class function TCharacter.GetUnicodeCategory(C: WideChar): TUnicodeCategory;
begin
if IsLatin1(C) then
Result := InternalGetLatin1Category(C)
else
Result := InternalGetUnicodeCategory(UCS4Char(C));
end;
class function TCharacter.ConvertToUtf32(const S: Widestring; Index: Integer; out CharLength: Integer): UCS4Char;
var
LowSurrogate, HighSurrogate: Integer;
begin
CheckStringRange(S, Index);
CharLength := 1;
HighSurrogate := Integer(S[Index]) - $D800;
if (HighSurrogate < 0) or (HighSurrogate > $7FF) then
Result := UCS4Char(S[Index])
else
begin
if HighSurrogate > $3FF then
raise EArgumentException.CreateResFmt(@sArgument_InvalidLowSurrogate, [Index]);
if Index > Length(S) - 1 then
raise EArgumentException.CreateResFmt(@sArgument_InvalidHighSurrogate, [Index]);
LowSurrogate := Integer(S[Index + 1]) - $DC00;
if (LowSurrogate < 0) or (LowSurrogate > $3FF) then
raise EArgumentException.CreateResFmt(@sArgument_InvalidHighSurrogate, [Index]);
Inc(CharLength);
Result := (HighSurrogate * $400) + LowSurrogate + $10000;
end;
end;
class function TCharacter.ConvertToUtf32(const S: Widestring; Index: Integer): UCS4Char;
var
CharLength: Integer;
begin
Result := ConvertToUtf32(S, Index, CharLength);
end;
class function TCharacter.ConvertFromUtf32(C: UCS4Char): Widestring;
begin
if (C > $10FFFF) or ((C >= $D800) and (C <= $DFFF)) then
raise EArgumentOutOfRangeException.CreateRes(@sArgumentOutOfRange_InvalidUTF32);
if C < $10000 then
Result := WideChar(C)
else
begin
Dec(C, $10000);
Result := WideChar(C div $400 + $D800) + WideChar(C mod $400 + $DC00);
end;
end;
class function TCharacter.ConvertToUtf32(const HighSurrogate, LowSurrogate: WideChar): UCS4Char;
begin
if not IsHighSurrogate(HighSurrogate) then
raise EArgumentOutOfRangeException.CreateRes(@sArgumentOutOfRange_InvalidHighSurrogate);
if not IsLowSurrogate(LowSurrogate) then
raise EArgumentOutOfRangeException.CreateRes(@sArgumentOutOfRange_InvalidLowSurrogate);
Result := ((Integer(HighSurrogate) - $D800) * $400) + (Integer(LowSurrogate) - $DC00) + $10000;
end;
constructor TCharacter.Create;
begin
raise ENoConstructException.CreateResFmt(@sNoConstruct, [ClassName]);
end;
class procedure TCharacter.Initialize;
var
Res: HRSRC;
ResData: HGLOBAL;
Offsets: PDataTableOffsets;
begin
Res := FindResource(HInstance, PChar('CHARTABLE'), RT_RCDATA);
if Res = 0 then
RaiseLastOSError;
ResData := LoadResource(HInstance, Res);
if ResData = 0 then
RaiseLastOSError;
DataTable := LockResource(ResData);
if DataTable = nil then
RaiseLastOSError;
Offsets := DataTable;
CatIndexPrimary := Pointer(Integer(DataTable) + Offsets.IndexTable1Offset);
CatIndexSecondary := Pointer(Integer(DataTable) + Offsets.IndexTable2Offset);
CategoryTable := Pointer(Integer(DataTable) + Offsets.DataTableOffset);
NumIndexPrimary := Pointer(Integer(DataTable) + Offsets.NumberIndex1Offset);
NumIndexSecondary := Pointer(Integer(DataTable) + Offsets.NumberIndex2Offset);
NumericValueTable := Pointer(Integer(DataTable) + Offsets.NumberDataOffset);
end;
class function TCharacter.GetNumericValue(C: WideChar): Double;
begin
Result := NumberValue(UCS4Char(C));
end;
class function TCharacter.GetNumericValue(const S: Widestring; Index: Integer): Double;
begin
Result := NumberValue(ConvertToUTF32(S, Index));
end;
class function TCharacter.GetUnicodeCategory(const S: Widestring; Index: Integer): TUnicodeCategory;
begin
CheckStringRange(S, Index);
if IsLatin1(S[Index]) then
Result := InternalGetLatin1Category(S[Index])
else
Result := InternalGetUnicodeCategory(ConvertToUtf32(S, Index));
end;
class function TCharacter.IsHighSurrogate(const S: Widestring; Index: Integer): Boolean;
begin
CheckStringRange(S, Index);
Result := IsHighSurrogate(S[Index]);
end;
class function TCharacter.IsLower(C: WideChar): Boolean;
begin
if not IsLatin1(C) then
Result := InternalGetUnicodeCategory(UCS4Char(C)) = ucLowercaseLetter
else if not IsAscii(C) then
Result := InternalGetLatin1Category(C) = ucLowercaseLetter
else
Result := (C >= 'a') and (C <= 'z');
end;
class function TCharacter.IsLower(const S: Widestring; Index: Integer): Boolean;
var
C: WideChar;
begin
CheckStringRange(S, Index);
C := S[Index];
if not IsLatin1(C) then
Result := GetUnicodeCategory(S, Index) = ucLowercaseLetter
else if not IsAscii(C) then
Result := InternalGetLatin1Category(C) = ucLowercaseLetter
else
Result := (C >= 'a') and (C <= 'z');
end;
class function TCharacter.IsLowSurrogate(const S: Widestring; Index: Integer): Boolean;
begin
CheckStringRange(S, Index);
Result := IsLowSurrogate(S[Index]);
end;
class function TCharacter.IsNumber(C: WideChar): Boolean;
begin
if not IsLatin1(C) then
Result := CheckNumber(InternalGetUnicodeCategory(UCS4Char(C)))
else if not IsAscii(C) then
Result := CheckNumber(InternalGetLatin1Category(C))
else
Result := (C >= '0') and (C <= '9');
end;
class function TCharacter.IsNumber(const S: Widestring; Index: Integer): Boolean;
var
C: WideChar;
begin
CheckStringRange(S, Index);
C := S[Index];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -