📄 unicode.pas
字号:
L, R: Integer;
begin
// load case mapping data if not already done
if CaseMap = nil then LoadUnicodeCaseData;
if UnicodeIsUpper(Code) then Result := Code
else
begin
if UnicodeIsLower(Code) then
begin
Field := 2;
L := CaseLengths[0];
R := (L + CaseLengths[1]) - 3;
end
else
begin
Field := 1;
L := CaseLengths[0] + CaseLengths[1];
R := CaseMapSize - 3;
end;
Result := CaseLookup(Code, L, R, Field);
end;
end;
//----------------------------------------------------------------------------------------------------------------------
function UnicodeToLower(Code: UCS4): UCS4;
var
Field,
L, R: Integer;
begin
// load case mapping data if not already done
if CaseMap = nil then LoadUnicodeCaseData;
if UnicodeIsLower(Code) then Result := Code
else
begin
if UnicodeIsUpper(Code) then
begin
Field := 1;
L := 0;
R := CaseLengths[0] - 3;
end
else
begin
Field := 2;
L := CaseLengths[0] + CaseLengths[1];
R := CaseMapSize - 3;
end;
Result := CaseLookup(Code, L, R, Field);
end;
end;
//----------------------------------------------------------------------------------------------------------------------
function UnicodeToTitle(Code: UCS4): UCS4;
var
Field,
L, R: Integer;
begin
// load case mapping data if not already done
if CaseMap = nil then LoadUnicodeCaseData;
if UnicodeIsTitle(Code) then Result := Code
else
begin
// The offset will always be the same for converting to title case.
Field := 2;
if UnicodeIsUpper(Code) then
begin
L := 0;
R := CaseLengths[0] - 3;
end
else
begin
L := CaseLengths[0];
R := (L + CaseLengths[1]) - 3;
end;
Result := CaseLookup(Code, L, R, Field);
end;
end;
//----------------- Support for decomposition --------------------------------------------------------------------------
const // constants for hangul composition and decomposition (this is done algorithmically
// saving so significant memory)
SBase = $AC00;
LBase = $1100;
VBase = $1161;
TBase = $11A7;
LCount = 19;
VCount = 21;
TCount = 28;
NCount = VCount * TCount; // 588
SCount = LCount * NCount; // 11172
var
DecompositionSize: Cardinal;
DecompositionNodes,
Decompositions: TCardinalArray;
//----------------------------------------------------------------------------------------------------------------------
procedure LoadUnicodeDecompositionData;
var
Stream: TResourceStream;
I: Cardinal;
Header: TUHeader;
begin
// make sure no other code is currently modifying the global data area
if LoadInProgress = nil then LoadInProgress := TCriticalSection.Create;
LoadInProgress.Enter;
if Decompositions = nil then
begin
Stream := TResourceStream.Create(HInstance, 'DECOMPOSE', 'UNICODE');
Stream.Read(Header, SizeOf(Header));
if Header.BOM = BOM_MSB_FIRST then
begin
Header.Count := Swap(Header.Count);
Header.Bytes := SwapCardinal(Header.Bytes);
end;
DecompositionSize := Header.Count shl 1; // two values per node
SetLength(DecompositionNodes, DecompositionSize + 1); // one entry more (the sentinel)
Stream.Read(DecompositionNodes[0], (DecompositionSize + 1) * SizeOf(Cardinal));
SetLength(Decompositions, (Header.Bytes div SizeOf(Cardinal)) - DecompositionSize - 1);
Stream.Read(Decompositions[0], Length(Decompositions) * SizeOf(Cardinal));
// Do an endian swap if necessary.
if Header.BOM = BOM_MSB_FIRST then
begin
for I := 0 to High(DecompositionNodes) do
DecompositionNodes[I] := SwapCardinal(DecompositionNodes[I]);
for I := 0 to High(Decompositions) do
Decompositions[I] := SwapCardinal(Decompositions[I]);
end;
Stream.Free;
end;
LoadInProgress.Leave;
end;
//----------------------------------------------------------------------------------------------------------------------
function UnicodeDecomposeHangul(Code: UCS4): TCardinalArray;
// algorithmically decompose hangul character using some predefined contstants
var
Rest: Integer;
begin
if not UnicodeIsHangul(Code) then Result := nil
else
begin
Dec(Code, SBase);
Rest := Code mod TCount;
if Rest = 0 then SetLength(Result, 2)
else SetLength(Result, 3);
Result[0] := LBase + (Code div NCount);
Result[1] := VBase + ((Code mod NCount) div TCount);
if Rest <> 0 then Result[2] := TBase + Rest;
end;
end;
//----------------------------------------------------------------------------------------------------------------------
function UnicodeDecompose(Code: UCS4): TCardinalArray;
var
L, R, M: Integer;
begin
// load decomposition data if not already done
if Decompositions = nil then LoadUnicodeDecompositionData;
if not UnicodeIsComposite(Code) then
begin
// return the code itself if it is not a composite
SetLength(Result, 1);
Result[0] := Code;
end
else
begin
// if the code is hangul then decomposition is algorithmically
Result := UnicodeDecomposeHangul(Code);
if Result = nil then
begin
L := 0;
R := DecompositionNodes[DecompositionSize] - 1;
while L <= R do
begin
// Determine a "mid" point and adjust to make sure the mid point is at
// the beginning of a code + offset pair.
M := (L + R) shr 1;
Dec(M, M and 1);
if Code > DecompositionNodes[M] then L := M + 2
else
if Code < DecompositionNodes[M] then R := M - 2
else
if Code = DecompositionNodes[M] then
begin
// found a decomposition, return the codes
SetLength(Result, DecompositionNodes[M + 3] - DecompositionNodes[M + 1] - 1);
Move(Decompositions[DecompositionNodes[M + 1]], Result[0], Length(Result) * SizeOf(Cardinal));
Break;
end;
end;
end;
end;
end;
//----------------- Support for combining classes ----------------------------------------------------------------------
var
CCLSize: Cardinal;
CCLNodes: TCardinalArray;
//----------------------------------------------------------------------------------------------------------------------
procedure LoadUnicodeCombiningData;
var
Stream: TResourceStream;
I: Cardinal;
Header: TUHeader;
begin
// make sure no other code is currently modifying the global data area
if LoadInProgress = nil then LoadInProgress := TCriticalSection.Create;
LoadInProgress.Enter;
if CCLNodes = nil then
begin
Stream := TResourceStream.Create(HInstance, 'COMBINE', 'UNICODE');
Stream.Read(Header, SizeOf(Header));
if Header.BOM = BOM_MSB_FIRST then
begin
Header.Count := Swap(Header.Count);
Header.Bytes := SwapCardinal(Header.Bytes);
end;
CCLSize := Header.Count * 3;
SetLength(CCLNodes, CCLSize);
Stream.Read(CCLNodes[0], CCLSize * SizeOf(Cardinal));
if Header.BOM = BOM_MSB_FIRST then
for I := 0 to CCLSize - 1 do
CCLNodes[I] := SwapCardinal(CCLNodes[I]);
Stream.Free;
end;
LoadInProgress.Leave;
end;
//----------------------------------------------------------------------------------------------------------------------
function UnicodeCanonicalClass(Code: Cardinal): Cardinal;
var
L, R, M: Integer;
begin
// load combination data if not already done
if CCLNodes = nil then LoadUnicodeCombiningData;
Result := 0;
L := 0;
R := CCLSize - 1;
while L <= R do
begin
M := (L + R) shr 1;
Dec(M, M mod 3);
if Code > CCLNodes[M + 1] then L := M + 3
else
if Code < CCLNodes[M] then R := M - 3
else
if (Code >= CCLNodes[M]) and (Code <= CCLNodes[M + 1]) then
begin
Result := CCLNodes[M + 2];
Break;
end;
end;
end;
//----------------- Support for numeric values -------------------------------------------------------------------------
var
NumberSize: Cardinal;
NumberNodes: TCardinalArray;
NumberValues: TWordArray;
//----------------------------------------------------------------------------------------------------------------------
procedure LoadUnicodeNumberData;
var
Stream: TResourceStream;
I: Cardinal;
Header: TUHeader;
begin
// make sure no other code is currently modifying the global data area
if LoadInProgress = nil then LoadInProgress := TCriticalSection.Create;
LoadInProgress.Enter;
if NumberNodes = nil then
begin
Stream := TResourceStream.Create(HInstance, 'NUMBERS', 'UNICODE');
Stream.Read(Header, SizeOf(Header));
if Header.BOM = BOM_MSB_FIRST then
begin
Header.Count := Swap(Header.Count);
Header.Bytes := SwapCardinal(Header.Bytes);
end;
NumberSize := Header.Count;
SetLength(NumberNodes, NumberSize);
Stream.Read(NumberNodes[0], NumberSize * SizeOf(Cardinal));
SetLength(NumberValues, (Header.Bytes - NumberSize * SizeOf(Cardinal)) div SizeOf(Word));
Stream.Read(NumberValues[0], Length(NumberValues) * SizeOf(Word));
if Header.BOM = BOM_MSB_FIRST then
begin
for I := 0 to High(NumberNodes) do
NumberNodes[I] := SwapCardinal(NumberNodes[I]);
for I := 0 to High(NumberValues) do
NumberValues[I] := Swap(NumberValues[I]);
end;
Stream.Free;
end;
LoadInProgress.Leave;
end;
//----------------------------------------------------------------------------------------------------------------------
function UnicodeNumberLookup(Code: UCS4; var num: TUNumber): Boolean;
var
L, R, M: Integer;
VP: PWord;
begin
// load number data if not already done
if NumberNodes = nil then LoadUnicodeNumberData;
Result := False;
L := 0;
R := NumberSize - 1;
while L <= R do
begin
// Determine a "mid" point and adjust to make sure the mid point is at
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -