⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ucharacter.pas

📁 uEncoding字符串UNICODE处理单元 用于处理unicode国际通用
💻 PAS
📖 第 1 页 / 共 3 页
字号:
      ucUppercaseLetter, ucUppercaseLetter,
      ucUppercaseLetter, ucLowercaseLetter,
      ucLowercaseLetter, ucLowercaseLetter,
      ucLowercaseLetter, ucLowercaseLetter,
      ucLowercaseLetter, ucLowercaseLetter,
      ucLowercaseLetter, ucLowercaseLetter,
      ucLowercaseLetter, ucLowercaseLetter,
      ucLowercaseLetter, ucLowercaseLetter,
      ucLowercaseLetter, ucLowercaseLetter,
      ucLowercaseLetter, ucLowercaseLetter,
      ucLowercaseLetter, ucLowercaseLetter,
      ucLowercaseLetter, ucLowercaseLetter,
      ucLowercaseLetter, ucLowercaseLetter,
      ucLowercaseLetter, ucMathSymbol,
      ucLowercaseLetter, ucLowercaseLetter,
      ucLowercaseLetter, ucLowercaseLetter,
      ucLowercaseLetter, ucLowercaseLetter,
      ucLowercaseLetter, ucLowercaseLetter
     );  

function InternalGetLatin1Category(C: WideChar): TUnicodeCategory; inline;
begin
  Result := Latin1Categories[Byte(C)];
end;

procedure CheckStringRange(const S: Widestring; Index: Integer); inline;
begin
  if (Index > Length(S)) or (Index < 1) then
    raise EArgumentOutOfRangeException.CreateResFmt(@sArgumentOutOfRange_StringIndex, [Index, Length(S)]);
end;

class function TCharacter.CheckLetter(uc: TUnicodeCategory): Boolean;
begin
  case uc of
    ucUppercaseLetter,
    ucLowercaseLetter,
    ucTitlecaseLetter,
    ucModifierLetter,
    ucOtherLetter: Result := True;
  else
    Result := False;
  end;
end;

class function TCharacter.CheckLetterOrDigit(uc: TUnicodeCategory): Boolean;
begin
  case uc of
    ucUppercaseLetter,
    ucLowercaseLetter,
    ucTitlecaseLetter,
    ucModifierLetter,
    ucOtherLetter,
    ucDecimalNumber: Result := True;
  else
    Result := False;
  end;
end;

class function TCharacter.CheckNumber(uc: TUnicodeCategory): Boolean;
begin
  case uc of
    ucOtherNumber,
    ucLetterNumber,
    ucDecimalNumber: Result := True;
  else
    Result := False;
  end;
end;

class function TCharacter.CheckPunctuation(uc: TUnicodeCategory): Boolean;
begin
  case uc of
    ucConnectPunctuation,
    ucDashPunctuation,
    ucClosePunctuation,
    ucFinalPunctuation,
    ucInitialPunctuation,
    ucOtherPunctuation,
    ucOpenPunctuation: Result := True;
  else
    Result := False;
  end;
end;

class function TCharacter.CheckSeparator(uc: TUnicodeCategory): Boolean;
begin
  case uc of
    ucLineSeparator,
    ucParagraphSeparator,
    ucSpaceSeparator: Result := True;
  else
    Result := False;
  end;
end;

class function TCharacter.CheckSymbol(uc: TUnicodeCategory): Boolean;
begin
  case uc of
    ucCurrencySymbol,
    ucModifierSymbol,
    ucMathSymbol,
    ucOtherSymbol: Result := True;
  else
    Result := False;
  end;
end;

class function TCharacter.IsLatin1(C: WideChar): Boolean;
begin
  Result := Integer(C) <= $FF;
end;

class function TCharacter.IsLetter(C: WideChar): Boolean;
begin
  if not IsLatin1(C) then
    Result := CheckLetter(InternalGetUnicodeCategory(UCS4Char(C)))
  else if not IsAscii(C) then
    Result := CheckLetter(InternalGetLatin1Category(C))
  else
  begin
    C := WideChar(Integer(C) or Ord(' '));
    Result := (C >= 'a') and (C <= 'z');
  end;
end;

class function TCharacter.IsLetter(const S: Widestring; Index: Integer): Boolean;
begin
  CheckStringRange(S, Index);
  Result := IsLetter(S[Index]);
end;

class function TCharacter.IsLetterOrDigit(const S: Widestring; Index: Integer): Boolean;
var
  C: WideChar;
begin
  CheckStringRange(S, Index);
  C := S[Index];
  if IsLatin1(C) then
    Result := CheckLetterOrDigit(InternalGetLatin1Category(C))
  else
    Result := CheckLetterOrDigit(GetUnicodeCategory(S, Index));
end;

class function TCharacter.IsLetterOrDigit(C: WideChar): Boolean;
begin
  if IsLatin1(C) then
    Result := CheckLetterOrDigit(InternalGetLatin1Category(C))
  else
    Result := CheckLetterOrDigit(InternalGetUnicodeCategory(UCS4Char(C)));
end;

class function TCharacter.IsAscii(C: WideChar): Boolean;
begin
  Result := Integer(C) <= $7F;
end;

class function TCharacter.IsControl(const S: Widestring; Index: Integer): Boolean;
var
  C: WideChar;
begin
  CheckStringRange(S, Index);
  C := S[Index];
  if IsLatin1(C) then
    Result := InternalGetLatin1Category(C) = ucControl
  else
    Result := GetUnicodeCategory(S, Index) = ucControl;
end;

class function TCharacter.IsControl(C: WideChar): Boolean;
begin
  if IsLatin1(C) then
    Result := InternalGetLatin1Category(C) = ucControl
  else
    Result := InternalGetUnicodeCategory(UCS4Char(C)) = ucControl;
end;

class function TCharacter.IsDigit(C: WideChar): Boolean;
begin
  if not IsLatin1(C) then
    Result := InternalGetUnicodeCategory(UCS4Char(C)) = ucDecimalNumber
  else
    Result := (C >= '0') and (C <= '9');
end;

class function TCharacter.IsDigit(const S: Widestring; Index: Integer): Boolean;
var
  C: WideChar;
begin
  CheckStringRange(S, Index);
  C := S[Index];
  if not IsLatin1(C) then
    Result := GetUnicodeCategory(S, Index) = ucDecimalNumber
  else
    Result := (C >= '0') and (C <= '9');
end;

class function TCharacter.IsHighSurrogate(C: WideChar): Boolean;
begin
  Result := (Integer(C) >= $D800) and (Integer(C) <= $DBFF);
end;

class function TCharacter.IsLowSurrogate(C: WideChar): Boolean;
begin
  Result := (Integer(C) >= $DC00) and (Integer(C) <= $DFFF);
end;

class function TCharacter.IsSurrogate(Surrogate: WideChar): Boolean;
begin
  Result := (Integer(Surrogate) >= $D800) and (Integer(Surrogate) <= $DFFF);
end;

class function TCharacter.IsSurrogatePair(const HighSurrogate, LowSurrogate: WideChar): Boolean;
begin
  Result := (Integer(HighSurrogate) >= $D800) and (Integer(HighSurrogate) <= $DBFF) and
    (Integer(LowSurrogate) >= $DC00) and (Integer(LowSurrogate) <= $DFFF);
end;

class function TCharacter.GetUnicodeCategory(C: WideChar): TUnicodeCategory;
begin
  if IsLatin1(C) then
    Result := InternalGetLatin1Category(C)
  else
    Result := InternalGetUnicodeCategory(UCS4Char(C));
end;

class function TCharacter.ConvertToUtf32(const S: Widestring; Index: Integer; out CharLength: Integer): UCS4Char;
var
  LowSurrogate, HighSurrogate: Integer;
begin
  CheckStringRange(S, Index);
  CharLength := 1;
  HighSurrogate := Integer(S[Index]) - $D800;
  if (HighSurrogate < 0) or (HighSurrogate > $7FF) then
    Result := UCS4Char(S[Index])
  else
  begin
    if HighSurrogate > $3FF then
      raise EArgumentException.CreateResFmt(@sArgument_InvalidLowSurrogate, [Index]);
    if Index > Length(S) - 1 then
      raise EArgumentException.CreateResFmt(@sArgument_InvalidHighSurrogate, [Index]);
    LowSurrogate := Integer(S[Index + 1]) - $DC00;
    if (LowSurrogate < 0) or (LowSurrogate > $3FF) then
      raise EArgumentException.CreateResFmt(@sArgument_InvalidHighSurrogate, [Index]);
    Inc(CharLength);
    Result := (HighSurrogate * $400) + LowSurrogate + $10000;
  end;
end;

class function TCharacter.ConvertToUtf32(const S: Widestring; Index: Integer): UCS4Char;
var
  CharLength: Integer;
begin
  Result := ConvertToUtf32(S, Index, CharLength);
end;

class function TCharacter.ConvertFromUtf32(C: UCS4Char): Widestring;
begin
  if (C > $10FFFF) or ((C >= $D800) and (C <= $DFFF)) then
    raise EArgumentOutOfRangeException.CreateRes(@sArgumentOutOfRange_InvalidUTF32);
  if C < $10000 then
    Result := WideChar(C)
  else
  begin
    Dec(C, $10000);
    Result := WideChar(C div $400 + $D800) + WideChar(C mod $400 + $DC00);
  end;
end;

class function TCharacter.ConvertToUtf32(const HighSurrogate, LowSurrogate: WideChar): UCS4Char;
begin
  if not IsHighSurrogate(HighSurrogate) then
    raise EArgumentOutOfRangeException.CreateRes(@sArgumentOutOfRange_InvalidHighSurrogate);
  if not IsLowSurrogate(LowSurrogate) then
    raise EArgumentOutOfRangeException.CreateRes(@sArgumentOutOfRange_InvalidLowSurrogate);
  Result := ((Integer(HighSurrogate) - $D800) * $400) + (Integer(LowSurrogate) - $DC00) + $10000;
end;

constructor TCharacter.Create;
begin
  raise ENoConstructException.CreateResFmt(@sNoConstruct, [ClassName]);
end;

class procedure TCharacter.Initialize;
var
  Res: HRSRC;
  ResData: HGLOBAL;
  Offsets: PDataTableOffsets;
begin
  Res := FindResource(HInstance, PChar('CHARTABLE'), RT_RCDATA);
  if Res = 0 then
    RaiseLastOSError;
  ResData := LoadResource(HInstance, Res);
  if ResData = 0 then
    RaiseLastOSError;
  DataTable := LockResource(ResData);
  if DataTable = nil then
    RaiseLastOSError;
  Offsets := DataTable;
  CatIndexPrimary := Pointer(Integer(DataTable) + Offsets.IndexTable1Offset);
  CatIndexSecondary := Pointer(Integer(DataTable) + Offsets.IndexTable2Offset);
  CategoryTable := Pointer(Integer(DataTable) + Offsets.DataTableOffset);
  NumIndexPrimary := Pointer(Integer(DataTable) + Offsets.NumberIndex1Offset);
  NumIndexSecondary := Pointer(Integer(DataTable) + Offsets.NumberIndex2Offset);
  NumericValueTable := Pointer(Integer(DataTable) + Offsets.NumberDataOffset);
end;

class function TCharacter.GetNumericValue(C: WideChar): Double;
begin
  Result := NumberValue(UCS4Char(C)); 
end;

class function TCharacter.GetNumericValue(const S: Widestring; Index: Integer): Double;
begin
  Result := NumberValue(ConvertToUTF32(S, Index));
end;

class function TCharacter.GetUnicodeCategory(const S: Widestring; Index: Integer): TUnicodeCategory;
begin
  CheckStringRange(S, Index);
  if IsLatin1(S[Index]) then
    Result := InternalGetLatin1Category(S[Index])
  else
    Result := InternalGetUnicodeCategory(ConvertToUtf32(S, Index));
end;

class function TCharacter.IsHighSurrogate(const S: Widestring; Index: Integer): Boolean;
begin
  CheckStringRange(S, Index);
  Result := IsHighSurrogate(S[Index]);
end;

class function TCharacter.IsLower(C: WideChar): Boolean;
begin
  if not IsLatin1(C) then
    Result := InternalGetUnicodeCategory(UCS4Char(C)) = ucLowercaseLetter
  else if not IsAscii(C) then
    Result := InternalGetLatin1Category(C) = ucLowercaseLetter
  else
    Result := (C >= 'a') and (C <= 'z');
end;

class function TCharacter.IsLower(const S: Widestring; Index: Integer): Boolean;
var
  C: WideChar;
begin
  CheckStringRange(S, Index);
  C := S[Index];
  if not IsLatin1(C) then
    Result := GetUnicodeCategory(S, Index) = ucLowercaseLetter
  else if not IsAscii(C) then
    Result := InternalGetLatin1Category(C) = ucLowercaseLetter
  else
    Result := (C >= 'a') and (C <= 'z');
end;

class function TCharacter.IsLowSurrogate(const S: Widestring; Index: Integer): Boolean;
begin
  CheckStringRange(S, Index);
  Result := IsLowSurrogate(S[Index]);
end;

class function TCharacter.IsNumber(C: WideChar): Boolean;
begin
  if not IsLatin1(C) then
    Result := CheckNumber(InternalGetUnicodeCategory(UCS4Char(C)))
  else if not IsAscii(C) then
    Result := CheckNumber(InternalGetLatin1Category(C))
  else
    Result := (C >= '0') and (C <= '9');
end;

class function TCharacter.IsNumber(const S: Widestring; Index: Integer): Boolean;
var
  C: WideChar;
begin
  CheckStringRange(S, Index);
  C := S[Index];

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -