📄 jclunicode.pas
字号:
// after the callback returns.
TConfirmConversionEvent = procedure (Sender: TWideStrings; var Allowed: Boolean) of object;
TWideStrings = class(TPersistent)
private
FUpdateCount: Integer;
FLanguage: LCID; // language can usually left alone, the system's default is used
FSaved: Boolean; // set in SaveToStream, True in case saving was successfull otherwise False
FSaveUnicode: Boolean; // flag set on loading to keep track in which format to save
// (can be set explicitely, but expect losses if there's true Unicode content
// and this flag is set to False)
FNormalizationForm: TNormalizationForm; // determines in which form Unicode strings should be stored
FOnConfirmConversion: TConfirmConversionEvent;
function GetCommaText: WideString;
function GetName(Index: Integer): WideString;
function GetValue(const Name: WideString): WideString;
procedure ReadData(Reader: TReader);
procedure SetCommaText(const Value: WideString);
procedure SetNormalizationForm(const Value: TNormalizationForm);
procedure SetValue(const Name, Value: WideString);
procedure WriteData(Writer: TWriter);
protected
procedure DefineProperties(Filer: TFiler); override;
procedure DoConfirmConversion(var Allowed: Boolean); virtual;
procedure Error(const Msg: string; Data: Integer);
function Get(Index: Integer): WideString; virtual; abstract;
function GetCapacity: Integer; virtual;
function GetCount: Integer; virtual; abstract;
function GetObject(Index: Integer): TObject; virtual;
function GetText: WideString; virtual;
procedure Put(Index: Integer; const S: WideString); virtual; abstract;
procedure PutObject(Index: Integer; AObject: TObject); virtual; abstract;
procedure SetCapacity(NewCapacity: Integer); virtual;
procedure SetUpdateState(Updating: Boolean); virtual;
procedure SetLanguage(Value: LCID); virtual;
public
constructor Create;
function Add(const S: WideString): Integer; virtual;
function AddObject(const S: WideString; AObject: TObject): Integer; virtual;
procedure Append(const S: WideString);
procedure AddStrings(Strings: TStrings); overload; virtual;
procedure AddStrings(Strings: TWideStrings); overload; virtual;
procedure Assign(Source: TPersistent); override;
procedure AssignTo(Dest: TPersistent); override;
procedure BeginUpdate;
procedure Clear; virtual; abstract;
procedure Delete(Index: Integer); virtual; abstract;
procedure EndUpdate;
function Equals(Strings: TWideStrings): Boolean;
procedure Exchange(Index1, Index2: Integer); virtual;
function GetSeparatedText(Separators: WideString): WideString; virtual;
function IndexOf(const S: WideString): Integer; virtual;
function IndexOfName(const Name: WideString): Integer;
function IndexOfObject(AObject: TObject): Integer;
procedure Insert(Index: Integer; const S: WideString); virtual; abstract;
procedure InsertObject(Index: Integer; const S: WideString; AObject: TObject);
procedure LoadFromFile(const FileName: string); virtual;
procedure LoadFromStream(Stream: TStream); virtual;
procedure Move(CurIndex, NewIndex: Integer); virtual;
procedure SaveToFile(const FileName: string); virtual;
procedure SaveToStream(Stream: TStream; WithBOM: Boolean = True); virtual;
procedure SetText(const Value: WideString); virtual;
property Capacity: Integer read GetCapacity write SetCapacity;
property CommaText: WideString read GetCommaText write SetCommaText;
property Count: Integer read GetCount;
property Language: LCID read FLanguage write SetLanguage;
property Names[Index: Integer]: WideString read GetName;
property NormalizationForm: TNormalizationForm read FNormalizationForm write SetNormalizationForm default nfC;
property Objects[Index: Integer]: TObject read GetObject write PutObject;
property Values[const Name: WideString]: WideString read GetValue write SetValue;
property Saved: Boolean read FSaved;
property SaveUnicode: Boolean read FSaveUnicode write FSaveUnicode default True;
property Strings[Index: Integer]: WideString read Get write Put; default;
property Text: WideString read GetText write SetText;
property OnConfirmConversion: TConfirmConversionEvent read FOnConfirmConversion write FOnConfirmConversion;
end;
//----- TWideStringList class
TDynWideCharArray = array of WideChar;
TWideStringItem = record
{$IFDEF OWN_WIDESTRING_MEMMGR}
FString: PWideChar; // "array of WideChar";
{$ELSE}
FString: WideString;
{$ENDIF OWN_WIDESTRING_MEMMGR}
FObject: TObject;
end;
TWideStringItemList = array of TWideStringItem;
TWideStringList = class(TWideStrings)
private
FList: TWideStringItemList;
FCount: Integer;
FSorted: Boolean;
FDuplicates: TDuplicates;
FOnChange: TNotifyEvent;
FOnChanging: TNotifyEvent;
procedure ExchangeItems(Index1, Index2: Integer);
procedure Grow;
procedure QuickSort(L, R: Integer);
procedure InsertItem(Index: Integer; const S: WideString);
procedure SetSorted(Value: Boolean);
{$IFDEF OWN_WIDESTRING_MEMMGR}
procedure SetListString(Index: Integer; const S: WideString);
{$ENDIF OWN_WIDESTRING_MEMMGR}
protected
procedure Changed; virtual;
procedure Changing; virtual;
function Get(Index: Integer): WideString; override;
function GetCapacity: Integer; override;
function GetCount: Integer; override;
function GetObject(Index: Integer): TObject; override;
procedure Put(Index: Integer; const S: WideString); override;
procedure PutObject(Index: Integer; AObject: TObject); override;
procedure SetCapacity(NewCapacity: Integer); override;
procedure SetUpdateState(Updating: Boolean); override;
procedure SetLanguage(Value: LCID); override;
public
destructor Destroy; override;
function Add(const S: WideString): Integer; override;
procedure Clear; override;
procedure Delete(Index: Integer); override;
procedure Exchange(Index1, Index2: Integer); override;
function Find(const S: WideString; var Index: Integer): Boolean; virtual;
function IndexOf(const S: WideString): Integer; override;
procedure Insert(Index: Integer; const S: WideString); override;
procedure Sort; virtual;
property Duplicates: TDuplicates read FDuplicates write FDuplicates;
property Sorted: Boolean read FSorted write SetSorted;
property OnChange: TNotifyEvent read FOnChange write FOnChange;
property OnChanging: TNotifyEvent read FOnChanging write FOnChanging;
end;
// result type for number retrieval functions
TUcNumber = record
Numerator,
Denominator: Integer;
end;
TFontCharSet = 0..255;
const
ReplacementCharacter: UCS4 = $0000FFFD;
MaximumUCS2: UCS4 = $0000FFFF;
MaximumUTF16: UCS4 = $0010FFFF;
MaximumUCS4: UCS4 = $7FFFFFFF;
SurrogateHighStart: UCS4 = $D800;
SurrogateHighEnd: UCS4 = $DBFF;
SurrogateLowStart: UCS4 = $DC00;
SurrogateLowEnd: UCS4 = $DFFF;
// functions involving null-terminated strings
// NOTE: PWideChars as well as WideStrings are NOT managed by reference counting under Win32.
// In Kylix this is different. WideStrings are reference counted there, just like ANSI strings.
function StrLenW(Str: PWideChar): Cardinal;
function StrEndW(Str: PWideChar): PWideChar;
function StrMoveW(Dest, Source: PWideChar; Count: Cardinal): PWideChar;
function StrCopyW(Dest, Source: PWideChar): PWideChar;
function StrECopyW(Dest, Source: PWideChar): PWideChar;
function StrLCopyW(Dest, Source: PWideChar; MaxLen: Cardinal): PWideChar;
function StrPCopyW(Dest: PWideChar; const Source: WideString): PWideChar;
function StrPLCopyW(Dest: PWideChar; const Source: WideString; MaxLen: Cardinal): PWideChar;
function StrCatW(Dest, Source: PWideChar): PWideChar;
function StrLCatW(Dest, Source: PWideChar; MaxLen: Cardinal): PWideChar;
function StrCompW(Str1, Str2: PWideChar): Integer;
function StrICompW(Str1, Str2: PWideChar): Integer;
function StrLCompW(Str1, Str2: PWideChar; MaxLen: Cardinal): Integer;
function StrLICompW(Str1, Str2: PWideChar; MaxLen: Cardinal): Integer;
function StrNScanW(S1, S2: PWideChar): Integer;
function StrRNScanW(S1, S2: PWideChar): Integer;
function StrScanW(Str: PWideChar; Chr: WideChar): PWideChar; overload;
function StrScanW(Str: PWideChar; Chr: WideChar; StrLen: Cardinal): PWideChar; overload;
function StrRScanW(Str: PWideChar; Chr: WideChar): PWideChar;
function StrPosW(Str, SubStr: PWideChar): PWideChar;
function StrAllocW(Size: Cardinal): PWideChar;
function StrBufSizeW(Str: PWideChar): Cardinal;
function StrNewW(Str: PWideChar): PWideChar;
procedure StrDisposeW(Str: PWideChar);
procedure StrSwapByteOrder(Str: PWideChar);
// functions involving Delphi wide strings
function WideAdjustLineBreaks(const S: WideString): WideString;
function WideCharPos(const S: WideString; const Ch: WideChar; const Index: Integer): Integer; //az
function WideCompose(const S: WideString): WideString;
function WideDecompose(const S: WideString; Compatible: Boolean): WideString;
function WideExtractQuotedStr(var Src: PWideChar; Quote: WideChar): WideString;
function WideQuotedStr(const S: WideString; Quote: WideChar): WideString;
function WideStringOfChar(C: WideChar; Count: Cardinal): WideString;
function WideCaseFolding(C: WideChar): WideString; overload;
function WideCaseFolding(const S: WideString): WideString; overload;
function WideLowerCase(C: WideChar): WideString; overload;
function WideLowerCase(const S: WideString): WideString; overload;
function WideNormalize(const S: WideString; Form: TNormalizationForm): WideString;
function WideSameText(const Str1, Str2: WideString): Boolean;
function WideTitleCase(C: WideChar): WideString; overload;
function WideTitleCase(const S: WideString): WideString; overload;
function WideTrim(const S: WideString): WideString;
function WideTrimLeft(const S: WideString): WideString;
function WideTrimRight(const S: WideString): WideString;
function WideUpperCase(C: WideChar): WideString; overload;
function WideUpperCase(const S: WideString): WideString; overload;
// Low level character routines
function UnicodeNumberLookup(Code: UCS4; var Number: TUcNumber): Boolean;
function UnicodeComposePair(First, Second: UCS4; var Composite: UCS4): Boolean;
function UnicodeCaseFold(Code: UCS4): TUCS4Array;
function UnicodeToUpper(Code: UCS4): TUCS4Array;
function UnicodeToLower(Code: UCS4): TUCS4Array;
function UnicodeToTitle(Code: UCS4): TUCS4Array;
// Character test routines
function UnicodeIsAlpha(C: UCS4): Boolean;
function UnicodeIsDigit(C: UCS4): Boolean;
function UnicodeIsAlphaNum(C: UCS4): Boolean;
function UnicodeIsCased(C: UCS4): Boolean;
function UnicodeIsControl(C: UCS4): Boolean;
function UnicodeIsSpace(C: UCS4): Boolean;
function UnicodeIsWhiteSpace(C: UCS4): Boolean;
function UnicodeIsBlank(C: UCS4): Boolean;
function UnicodeIsPunctuation(C: UCS4): Boolean;
function UnicodeIsGraph(C: UCS4): Boolean;
function UnicodeIsPrintable(C: UCS4): Boolean;
function UnicodeIsUpper(C: UCS4): Boolean;
function UnicodeIsLower(C: UCS4): Boolean;
function UnicodeIsTitle(C: UCS4): Boolean;
function UnicodeIsHexDigit(C: UCS4): Boolean;
function UnicodeIsIsoControl(C: UCS4): Boolean;
function UnicodeIsFormatControl(C: UCS4): Boolean;
function UnicodeIsSymbol(C: UCS4): Boolean;
function UnicodeIsNumber(C: UCS4): Boolean;
function UnicodeIsNonSpacing(C: UCS4): Boolean;
function UnicodeIsOpenPunctuation(C: UCS4): Boolean;
function UnicodeIsClosePunctuation(C: UCS4): Boolean;
function UnicodeIsInitialPunctuation(C: UCS4): Boolean;
function UnicodeIsFinalPunctuation(C: UCS4): Boolean;
function UnicodeIsComposed(C: UCS4): Boolean;
function UnicodeIsQuotationMark(C: UCS4): Boolean;
function UnicodeIsSymmetric(C: UCS4): Boolean;
function UnicodeIsMirroring(C: UCS4): Boolean;
function UnicodeIsNonBreaking(C: UCS4): Boolean;
// Directionality functions
function UnicodeIsRightToLeft(C: UCS4): Boolean;
function UnicodeIsLeftToRight(C: UCS4): Boolean;
function UnicodeIsStrong(C: UCS4): Boolean;
function UnicodeIsWeak(C: UCS4): Boolean;
function UnicodeIsNeutral(C: UCS4): Boolean;
function UnicodeIsSeparator(C: UCS4): Boolean;
// Other character test functions
function UnicodeIsMark(C: UCS4): Boolean;
function UnicodeIsModifier(C: UCS4): Boolean;
function UnicodeIsLetterNumber(C: UCS4): Boolean;
function UnicodeIsConnectionPunctuation(C: UCS4): Boolean;
function UnicodeIsDash(C: UCS4): Boolean;
function UnicodeIsMath(C: UCS4): Boolean;
function UnicodeIsCurrency(C: UCS4): Boolean;
function UnicodeIsModifierSymbol(C: UCS4): Boolean;
function UnicodeIsNonSpacingMark(C: UCS4): Boolean;
function UnicodeIsSpacingMark(C: UCS4): Boolean;
function UnicodeIsEnclosing(C: UCS4): Boolean;
function UnicodeIsPrivate(C: UCS4): Boolean;
function UnicodeIsSurrogate(C: UCS4): Boolean;
function UnicodeIsLineSeparator(C: UCS4): Boolean;
function UnicodeIsParagraphSeparator(C: UCS4): Boolean;
function UnicodeIsIdentifierStart(C: UCS4): Boolean;
function UnicodeIsIdentifierPart(C: UCS4): Boolean;
function UnicodeIsDefined(C: UCS4): Boolean;
function UnicodeIsUndefined(C: UCS4): Boolean;
function UnicodeIsHan(C: UCS4): Boolean;
function UnicodeIsHangul(C: UCS4): Boolean;
// Utility functions
function CharSetFromLocale(Language: LCID): TFontCharSet;
function GetCharSetFromLocale(Language: LCID; out FontCharSet: TFontCharSet): Boolean;
function CodePageFromLocale(Language: LCID): Integer;
function CodeBlockFromChar(const C: UCS4): TUnicodeBlock;
function KeyboardCodePage: Word;
function KeyUnicode(C: Char): WideChar;
function StringToWideStringEx(const S: string; CodePage: Word): WideString;
function TranslateString(const S: string; CP1, CP2: Word): string;
function WideStringToStringEx(const WS: WideString; CodePage: Word): string;
// WideString conversion routines
procedure ExpandANSIString(const Source: PChar; Target: PWideChar; Count: Cardinal);
function WideStringToUTF8(S: WideString): AnsiString;
function UTF8ToWideString(S: AnsiString): WideString;
type
TCompareFunc = function (const W1, W2: WideString; Locale: LCID): Integer;
var
WideCompareText: TCompareFunc;
{$ENDIF SUPPORTS_WIDESTRING}
type
EJclUnicodeError = class(EJclError);
implementation
{$IFDEF SUPPORTS_WIDESTRING}
// Unicode data for case mapping, decomposition, numbers etc. This data is
// loaded on demand which means only those parts will be put in memory which are
// needed by one of the lookup functions.
// Note: There is a little tool called UDExtract which creates a resouce script from
// the Unicode database file which can be compiled to the needed res file.
// This tool, including its source code, can be downloaded from www.lischke-online.de/Unicode.html.
{$R JclUnicode.res}
uses
{$IFDEF HAS_UNIT_RTLCONSTS}
RtlConsts,
{$ELSE}
{$IFNDEF FPC}
Consts,
{$ENDIF ~FPC}
{$ENDIF HAS_UNIT_RTLCONSTS}
SysUtils,
JclResources, JclSynch;
const
{$IFDEF FPC} // declarations from unit [Rtl]Consts
SDuplicateString = 'String list does not allow duplicates';
SListIndexError = 'List index out of bounds (%d)';
SSortedListError = 'Operation not allowed on sorted string list';
{$ENDIF FPC}
// some predefined sets to shorten parameter lists below and ease repeative usage
ClassLetter = [ccLetterUppercase, ccLetterLowercase, ccLetterTitlecase, ccLetterModifier, ccLetterOther];
ClassSpace = [ccSeparatorSpace, ccSpaceOther];
ClassPunctuation = [ccPunctuationConnector, ccPunctuationDash, ccPunctuationOpen, ccPunctuationClose,
ccPunctuationOther, ccPunctuationInitialQuote, ccPunctuationFinalQuote];
ClassMark = [ccMarkNonSpacing, ccMarkSpacingCombining, ccMarkEnclosing];
ClassNumber = [ccNumberDecimalDigit, ccNumberLetter, ccNumberOther];
ClassSymbol = [ccSymbolMath, ccSymbolCurrency, ccSymbolModifier, ccSymbolOther];
ClassEuropeanNumber = [ccEuropeanNumber, ccEuropeanNumberSeparator, ccEuropeanNumberTerminator];
// used to negate a set of categories
ClassAll = [Low(TCharacterCategory)..High(TCharacterCategory)];
var
// As the global data can be accessed by several threads it should be guarded
// while the data is loaded.
LoadInProgress: TJclCriticalSection;
//----------------- support for character categories -----------------------------------------------
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -