⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ucharacter.pas

📁 uEncoding字符串UNICODE处理单元 用于处理unicode国际通用
💻 PAS
📖 第 1 页 / 共 3 页
字号:
{*******************************************************}
{                                                       }
{           CodeGear Delphi Runtime Library             }
{                                                       }
{           Copyright (c) 1995-2008 CodeGear            }
{                                                       }
{*******************************************************}

unit uCharacter;

interface

uses SysUtils;

resourcestring
  sArgumentOutOfRange_InvalidHighSurrogate = 'A valid high surrogate character is >= $D800 and <= $DBFF';
  sArgumentOutOfRange_InvalidLowSurrogate = 'A valid low surrogate character is >= $DC00 and <= $DFFF';
  sArgumentOutOfRange_Index = 'Index out of range (%d).  Must be >= 0 and < %d';
  sArgumentOutOfRange_StringIndex = 'String index out of range (%d).  Must be >= 1 and <= %d';
  sArgumentOutOfRange_InvalidUTF32 = 'Invalid UTF32 character value.  Must be >= 0 and <= $10FFF, excluding surrogate pair ranges'; 
  sArgument_InvalidHighSurrogate = 'High surrogate char without a following low surrogate char at index: %d. Check that the string is encoded properly';
  sArgument_InvalidLowSurrogate = 'Low surrogate char without a preceding high surrogate char at index: %d. Check that the string is encoded properly';
  sNoConstruct = 'Class %s is not intended to be constructed';


//{$SCOPEDENUMS ON}
type
  EArgumentException = class(Exception);
  EArgumentOutOfRangeException = class(EArgumentException);

  ENoConstructException = class(Exception);

type
  TUnicodeCategory = (
    ucControl,
    ucFormat,
    ucUnassigned,
    ucPrivateUse,
    ucSurrogate,
    ucLowercaseLetter,
    ucModifierLetter,
    ucOtherLetter,
    ucTitlecaseLetter,
    ucUppercaseLetter,
    ucCombiningMark,
    ucEnclosingMark,
    ucNonSpacingMark,
    ucDecimalNumber,
    ucLetterNumber,
    ucOtherNumber,
    ucConnectPunctuation,
    ucDashPunctuation,
    ucClosePunctuation,
    ucFinalPunctuation,
    ucInitialPunctuation,
    ucOtherPunctuation,
    ucOpenPunctuation,
    ucCurrencySymbol,
    ucModifierSymbol,
    ucMathSymbol,
    ucOtherSymbol,
    ucLineSeparator,
    ucParagraphSeparator,
    ucSpaceSeparator
  );

  TUnicodeBreak = (
    ubMandatory,
    ubCarriageReturn,
    ubLineFeed,
    ubCombiningMark,
    ubSurrogate,
    ubZeroWidthSpace,
    ubInseparable,
    ubNonBreakingGlue,
    ubContingent,
    ubSpace,
    ubAfter,
    ubBefore,
    ubBeforeAndAfter,
    ubHyphen,
    ubNonStarter,
    ubOpenPunctuation,
    ubClosePunctuation,
    ubQuotation,
    ubExclamation,
    ubIdeographic,
    ubNumeric,
    ubInfixSeparator,
    ubSymbol,
    ubAlphabetic,
    ubPrefix,
    ubPostfix,
    ubComplexContext,
    ubAmbiguous,
    ubUnknown,
    ubNextLine,
    ubWordJoiner,
    ubHangulLJamo,
    ubHangulVJamo,
    ubHangulTJamo,
    ubHangulLvSyllable,
    ubHangulLvtSyllable
  );

type
  TCharacter = class sealed
  private
    class procedure Initialize; static;
    class function IsLatin1(C: WideChar): Boolean; inline; static;
    class function IsAscii(C: WideChar): Boolean; inline; static;
    class function CheckLetter(uc: TUnicodeCategory): Boolean; inline; static;
    class function CheckLetterOrDigit(uc: TUnicodeCategory): Boolean; inline; static;
    class function CheckNumber(uc: TUnicodeCategory): Boolean; inline; static;
    class function CheckPunctuation(uc: TUnicodeCategory): Boolean; inline; static;
    class function CheckSymbol(uc: TUnicodeCategory): Boolean; inline; static;
    class function CheckSeparator(uc: TUnicodeCategory): Boolean; inline; static;
  public
    constructor Create;
    class function ConvertFromUtf32(C: UCS4Char): Widestring; static;
    class function ConvertToUtf32(const S: Widestring; Index: Integer): UCS4Char; overload; inline; static;
    class function ConvertToUtf32(const S: Widestring; Index: Integer; out CharLength: Integer): UCS4Char; overload; static;
    class function ConvertToUtf32(const HighSurrogate, LowSurrogate: WideChar): UCS4Char; overload; static;
    class function GetNumericValue(C: WideChar): Double; overload; static;
    class function GetNumericValue(const S: Widestring; Index: Integer): Double; overload; static;
    class function GetUnicodeCategory(C: WideChar): TUnicodeCategory; overload; static;
    class function GetUnicodeCategory(const S: Widestring; Index: Integer): TUnicodeCategory; overload; static;
    class function IsControl(C: WideChar): Boolean; overload; static;
    class function IsControl(const S: Widestring; Index: Integer): Boolean; overload; static;
    class function IsDigit(C: WideChar): Boolean; overload; static;
    class function IsDigit(const S: Widestring; Index: Integer): Boolean; overload; static;
    class function IsHighSurrogate(C: WideChar): Boolean; overload; inline; static;
    class function IsHighSurrogate(const S: Widestring; Index: Integer): Boolean; overload; inline; static;
    class function IsLetter(C: WideChar): Boolean; overload; static;
    class function IsLetter(const S: Widestring; Index: Integer): Boolean; overload; static;
    class function IsLetterOrDigit(C: WideChar): Boolean; overload; static;
    class function IsLetterOrDigit(const S: Widestring; Index: Integer): Boolean; overload; static;
    class function IsLower(C: WideChar): Boolean; overload; static;
    class function IsLower(const S: Widestring; Index: Integer): Boolean; overload; static;
    class function IsLowSurrogate(C: WideChar): Boolean; overload; inline; static;
    class function IsLowSurrogate(const S: Widestring; Index: Integer): Boolean; overload; inline; static;
    class function IsNumber(C: WideChar): Boolean; overload; static;
    class function IsNumber(const S: Widestring; Index: Integer): Boolean; overload; static;
    class function IsPunctuation(C: WideChar): Boolean; overload; static;
    class function IsPunctuation(const S: Widestring; Index: Integer): Boolean; overload; static;
    class function IsSeparator(C: WideChar): Boolean; overload; static;
    class function IsSeparator(const S: Widestring; Index: Integer): Boolean; overload; static;
    class function IsSurrogate(Surrogate: WideChar): Boolean; overload; inline; static;
    class function IsSurrogate(const S: Widestring; Index: Integer): Boolean; overload; static;
    class function IsSurrogatePair(const HighSurrogate, LowSurrogate: WideChar): Boolean; overload; inline; static;
    class function IsSurrogatePair(const S: Widestring; Index: Integer): Boolean; overload; static;
    class function IsSymbol(C: WideChar): Boolean; overload; static;
    class function IsSymbol(const S: Widestring; Index: Integer): Boolean; overload; static;
    class function IsUpper(C: WideChar): Boolean; overload; static;
    class function IsUpper(const S: Widestring; Index: Integer): Boolean; overload; static;
    class function IsWhiteSpace(C: WideChar): Boolean; overload; static;
    class function IsWhiteSpace(const S: Widestring; Index: Integer): Boolean; overload; static;
    class function ToLower(C: WideChar): WideChar; overload; static;
    class function ToLower(const S: Widestring): Widestring; overload; static;
    class function ToUpper(C: WideChar): WideChar; overload; static;
    class function ToUpper(const S: Widestring): Widestring; overload; static;
  end;

function ConvertFromUtf32(C: UCS4Char): Widestring; inline;
function ConvertToUtf32(const S: Widestring; Index: Integer): UCS4Char; overload; inline;
function ConvertToUtf32(const S: Widestring; Index: Integer; out CharLength: Integer): UCS4Char; overload; inline;
function ConvertToUtf32(const HighSurrogate, LowSurrogate: WideChar): UCS4Char; overload; inline;
function GetNumericValue(C: WideChar): Double; overload; inline;
function GetNumericValue(const S: Widestring; Index: Integer): Double; overload; inline;
function GetUnicodeCategory(C: WideChar): TUnicodeCategory; overload; inline;
function GetUnicodeCategory(const S: Widestring; Index: Integer): TUnicodeCategory; overload; inline;
function IsControl(C: WideChar): Boolean; overload; inline;
function IsControl(const S: Widestring; Index: Integer): Boolean; overload; inline;
function IsDigit(C: WideChar): Boolean; overload; inline;
function IsDigit(const S: Widestring; Index: Integer): Boolean; overload; inline;
function IsHighSurrogate(C: WideChar): Boolean; overload; inline;
function IsHighSurrogate(const S: Widestring; Index: Integer): Boolean; overload; inline;
function IsLetter(C: WideChar): Boolean; overload; inline;
function IsLetter(const S: Widestring; Index: Integer): Boolean; overload; inline;
function IsLetterOrDigit(C: WideChar): Boolean; overload; inline;
function IsLetterOrDigit(const S: Widestring; Index: Integer): Boolean; overload; inline;
function IsLower(C: WideChar): Boolean; overload; inline;
function IsLower(const S: Widestring; Index: Integer): Boolean; overload; inline;
function IsLowSurrogate(C: WideChar): Boolean; overload; inline;
function IsLowSurrogate(const S: Widestring; Index: Integer): Boolean; overload; inline;
function IsNumber(C: WideChar): Boolean; overload; inline;
function IsNumber(const S: Widestring; Index: Integer): Boolean; overload; inline;
function IsPunctuation(C: WideChar): Boolean; overload; inline;
function IsPunctuation(const S: Widestring; Index: Integer): Boolean; overload; inline;
function IsSeparator(C: WideChar): Boolean; overload; inline;
function IsSeparator(const S: Widestring; Index: Integer): Boolean; overload; inline;
function IsSurrogate(Surrogate: WideChar): Boolean; overload; inline;
function IsSurrogate(const S: Widestring; Index: Integer): Boolean; overload; inline;
function IsSurrogatePair(const HighSurrogate, LowSurrogate: WideChar): Boolean; overload; inline;
function IsSurrogatePair(const S: Widestring; Index: Integer): Boolean; overload; inline;
function IsSymbol(C: WideChar): Boolean; overload; inline;
function IsSymbol(const S: Widestring; Index: Integer): Boolean; overload; inline;
function IsUpper(C: WideChar): Boolean; overload; inline;
function IsUpper(const S: Widestring; Index: Integer): Boolean; overload; inline;
function IsWhiteSpace(C: WideChar): Boolean; overload; inline;
function IsWhiteSpace(const S: Widestring; Index: Integer): Boolean; overload; inline;
function ToLower(C: WideChar): WideChar; overload; inline;
function ToLower(const S: Widestring): Widestring; overload; inline;
function ToUpper(C: WideChar): WideChar; overload; inline;
function ToUpper(const S: Widestring): Widestring; overload; inline;

implementation

uses Windows;

{$RESOURCE 'uCharacter.res'}




type
  TIndexArray = array[0..32767] of Word;
  PIndexArray = ^TIndexArray;
  TCategoryArray = array[0..65535] of TUnicodeCategory;
  PCategoryArray = ^TCategoryArray;
  TNumberArray = array[0..4095] of Double;
  PNumberArray = ^TNumberArray;
  PDataTableOffsets = ^TDataTableOffsets;
  TDataTableOffsets = record
    IndexTable1Offset: Integer;
    IndexTable2Offset: Integer;
    DataTableOffset: Integer;
    NumberIndex1Offset: Integer;
    NumberIndex2Offset: Integer;
    NumberDataOffset: Integer;
  end;

var
  DataTable: Pointer;
  CatIndexPrimary: PIndexArray;
  CatIndexSecondary: PIndexArray;
  CategoryTable: PCategoryArray;
  NumIndexPrimary: PIndexArray;
  NumIndexSecondary: PIndexArray;
  NumericValueTable: PNumberArray;

{ TCharacter }

function InternalGetUnicodeCategory(C: UCS4Char): TUnicodeCategory; inline;
begin
  if CategoryTable = nil then
    TCharacter.Initialize;
  Result := CategoryTable[CatIndexSecondary[CatIndexPrimary[C shr 8] + ((C shr 4) and $F)] + C and $F];
end;

function NumberValue(C: UCS4Char): Double; inline;
begin
  if NumericValueTable = nil then
    TCharacter.Initialize;
  Result := NumericValueTable[NumIndexSecondary[NumIndexPrimary[C shr 8] + ((C shr 4) and $F)] + C and $F];
end;

const
  Latin1Categories: array[0..255] of TUnicodeCategory =
    ( ucControl, ucControl,
      ucControl, ucControl,
      ucControl, ucControl,
      ucControl, ucControl,
      ucControl, ucControl,
      ucControl, ucControl,
      ucControl, ucControl,
      ucControl, ucControl,
      ucControl, ucControl,
      ucControl, ucControl,
      ucControl, ucControl,
      ucControl, ucControl,
      ucControl, ucControl,
      ucControl, ucControl,
      ucControl, ucControl,
      ucControl, ucControl,
      ucSpaceSeparator, ucOtherPunctuation,
      ucOtherPunctuation,
      ucOtherPunctuation, ucCurrencySymbol,
      ucOtherPunctuation,
      ucOtherPunctuation,
      ucOtherPunctuation,
      ucOpenPunctuation,
      ucClosePunctuation,
      ucOtherPunctuation, ucMathSymbol,
      ucOtherPunctuation,
      ucDashPunctuation,
      ucOtherPunctuation,
      ucOtherPunctuation, ucDecimalNumber,
      ucDecimalNumber, ucDecimalNumber, 
      ucDecimalNumber, ucDecimalNumber, 
      ucDecimalNumber, ucDecimalNumber, 
      ucDecimalNumber, ucDecimalNumber, 
      ucDecimalNumber, ucOtherPunctuation, 
      ucOtherPunctuation, ucMathSymbol, 
      ucMathSymbol, ucMathSymbol, 
      ucOtherPunctuation, 
      ucOtherPunctuation, 
      ucUppercaseLetter, ucUppercaseLetter, 
      ucUppercaseLetter, ucUppercaseLetter, 
      ucUppercaseLetter, ucUppercaseLetter, 
      ucUppercaseLetter, ucUppercaseLetter, 
      ucUppercaseLetter, ucUppercaseLetter, 
      ucUppercaseLetter, ucUppercaseLetter, 
      ucUppercaseLetter, ucUppercaseLetter, 
      ucUppercaseLetter, ucUppercaseLetter, 
      ucUppercaseLetter, ucUppercaseLetter, 
      ucUppercaseLetter, ucUppercaseLetter, 
      ucUppercaseLetter, ucUppercaseLetter, 
      ucUppercaseLetter, ucUppercaseLetter, 
      ucUppercaseLetter, ucUppercaseLetter, 
      ucOpenPunctuation, 
      ucOtherPunctuation, 
      ucClosePunctuation, ucModifierSymbol,
      ucConnectPunctuation, 
      ucModifierSymbol, ucLowercaseLetter, 
      ucLowercaseLetter, ucLowercaseLetter, 
      ucLowercaseLetter, ucLowercaseLetter, 
      ucLowercaseLetter, ucLowercaseLetter, 
      ucLowercaseLetter, ucLowercaseLetter, 
      ucLowercaseLetter, ucLowercaseLetter, 
      ucLowercaseLetter, ucLowercaseLetter, 
      ucLowercaseLetter, ucLowercaseLetter, 
      ucLowercaseLetter, ucLowercaseLetter, 
      ucLowercaseLetter, ucLowercaseLetter, 
      ucLowercaseLetter, ucLowercaseLetter,
      ucLowercaseLetter, ucLowercaseLetter,
      ucLowercaseLetter, ucLowercaseLetter,
      ucLowercaseLetter, ucOpenPunctuation,
      ucMathSymbol, ucClosePunctuation,
      ucMathSymbol, ucControl,
      ucControl, ucControl,
      ucControl, ucControl,
      ucControl, ucControl,
      ucControl, ucControl,
      ucControl, ucControl,
      ucControl, ucControl,
      ucControl, ucControl,
      ucControl, ucControl,
      ucControl, ucControl,
      ucControl, ucControl,
      ucControl, ucControl,
      ucControl, ucControl,
      ucControl, ucControl,
      ucControl, ucControl,
      ucControl, ucControl,
      ucControl, ucControl,
      ucSpaceSeparator, ucOtherPunctuation,
      ucCurrencySymbol, ucCurrencySymbol,
      ucCurrencySymbol, ucCurrencySymbol,
      ucOtherSymbol, ucOtherSymbol,
      ucModifierSymbol, ucOtherSymbol,
      ucLowercaseLetter,
      ucInitialPunctuation, ucMathSymbol,
      ucDashPunctuation, ucOtherSymbol,
      ucModifierSymbol, ucOtherSymbol,
      ucMathSymbol, ucOtherNumber,
      ucOtherNumber, ucModifierSymbol,
      ucLowercaseLetter, ucOtherSymbol,
      ucOtherPunctuation, ucModifierSymbol,
      ucOtherNumber, ucLowercaseLetter,
      ucFinalPunctuation, ucOtherNumber,
      ucOtherNumber, ucOtherNumber,
      ucOtherPunctuation,
      ucUppercaseLetter, ucUppercaseLetter,
      ucUppercaseLetter, ucUppercaseLetter,
      ucUppercaseLetter, ucUppercaseLetter,
      ucUppercaseLetter, ucUppercaseLetter,
      ucUppercaseLetter, ucUppercaseLetter,
      ucUppercaseLetter, ucUppercaseLetter,
      ucUppercaseLetter, ucUppercaseLetter,
      ucUppercaseLetter, ucUppercaseLetter,
      ucUppercaseLetter, ucUppercaseLetter,
      ucUppercaseLetter, ucUppercaseLetter,
      ucUppercaseLetter, ucUppercaseLetter,
      ucUppercaseLetter, ucMathSymbol,
      ucUppercaseLetter, ucUppercaseLetter,
      ucUppercaseLetter, ucUppercaseLetter,

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -