⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 unicode.pas

📁 几十个处理Unicode的函数
💻 PAS
📖 第 1 页 / 共 5 页
字号:
  L, R: Integer;

begin
  // load case mapping data if not already done
  if CaseMap = nil then LoadUnicodeCaseData;

  if UnicodeIsUpper(Code) then Result := Code
                          else
  begin
    if UnicodeIsLower(Code) then
    begin
      Field := 2;
      L := CaseLengths[0];
      R := (L + CaseLengths[1]) - 3;
    end
    else
    begin
      Field := 1;
      L := CaseLengths[0] + CaseLengths[1];
      R := CaseMapSize - 3;
    end;
    Result := CaseLookup(Code, L, R, Field);
  end;
end;

//----------------------------------------------------------------------------------------------------------------------

function UnicodeToLower(Code: UCS4): UCS4;

var
  Field,
  L, R: Integer;
  
begin
  // load case mapping data if not already done
  if CaseMap = nil then LoadUnicodeCaseData;

  if UnicodeIsLower(Code) then Result := Code
                          else
  begin
    if UnicodeIsUpper(Code) then
    begin
      Field := 1;
      L := 0;
      R := CaseLengths[0] - 3;
    end
    else
    begin
      Field := 2;
      L := CaseLengths[0] + CaseLengths[1];
      R := CaseMapSize - 3;
    end;
    Result := CaseLookup(Code, L, R, Field);
  end;
end;

//----------------------------------------------------------------------------------------------------------------------

function UnicodeToTitle(Code: UCS4): UCS4;

var
  Field,
  L, R: Integer;

begin
  // load case mapping data if not already done
  if CaseMap = nil then LoadUnicodeCaseData;

  if UnicodeIsTitle(Code) then Result := Code
                          else
  begin
    // The offset will always be the same for converting to title case.
    Field := 2;

    if UnicodeIsUpper(Code) then
    begin
      L := 0;
      R := CaseLengths[0] - 3;
    end
    else
    begin
      L := CaseLengths[0];
      R := (L + CaseLengths[1]) - 3;
    end;
    Result := CaseLookup(Code, L, R, Field);
  end;
end;

//----------------- Support for decomposition --------------------------------------------------------------------------

const // constants for hangul composition and decomposition (this is done algorithmically
      // saving so significant memory)
  SBase = $AC00;
  LBase = $1100;
  VBase = $1161;
  TBase = $11A7;
  LCount = 19;
  VCount = 21;
  TCount = 28;
  NCount = VCount * TCount;   // 588
  SCount = LCount * NCount;   // 11172
  
var
  DecompositionSize: Cardinal;
  DecompositionNodes,
  Decompositions: TCardinalArray;

//----------------------------------------------------------------------------------------------------------------------

procedure LoadUnicodeDecompositionData;

var
  Stream: TResourceStream;
  I: Cardinal;
  Header: TUHeader;
  
begin
  // make sure no other code is currently modifying the global data area
  if LoadInProgress = nil then LoadInProgress := TCriticalSection.Create;
  LoadInProgress.Enter;

  if Decompositions = nil then
  begin
    Stream := TResourceStream.Create(HInstance, 'DECOMPOSE', 'UNICODE');
    Stream.Read(Header, SizeOf(Header));

    if Header.BOM = BOM_MSB_FIRST then
    begin
      Header.Count := Swap(Header.Count);
      Header.Bytes := SwapCardinal(Header.Bytes);
    end;

    DecompositionSize := Header.Count shl 1; // two values per node
    SetLength(DecompositionNodes, DecompositionSize + 1); // one entry more (the sentinel)
    Stream.Read(DecompositionNodes[0], (DecompositionSize + 1) * SizeOf(Cardinal));
    SetLength(Decompositions, (Header.Bytes div SizeOf(Cardinal)) - DecompositionSize - 1);
    Stream.Read(Decompositions[0], Length(Decompositions) * SizeOf(Cardinal));

    // Do an endian swap if necessary.
    if Header.BOM = BOM_MSB_FIRST then
    begin
      for I := 0 to High(DecompositionNodes) do
          DecompositionNodes[I] := SwapCardinal(DecompositionNodes[I]);
      for I := 0 to High(Decompositions) do
          Decompositions[I] := SwapCardinal(Decompositions[I]);
    end;
    Stream.Free;
  end;

  LoadInProgress.Leave;
end;

//----------------------------------------------------------------------------------------------------------------------

function UnicodeDecomposeHangul(Code: UCS4): TCardinalArray;

// algorithmically decompose hangul character using some predefined contstants

var
  Rest: Integer;                             
  
begin
  if not UnicodeIsHangul(Code) then Result := nil
                               else
  begin
    Dec(Code, SBase);
    Rest := Code mod TCount;
    if Rest = 0 then SetLength(Result, 2)
                else SetLength(Result, 3);
    Result[0] := LBase + (Code div NCount);
    Result[1] := VBase + ((Code mod NCount) div TCount);
    if Rest <> 0 then Result[2] := TBase + Rest;
  end;
end;

//----------------------------------------------------------------------------------------------------------------------

function UnicodeDecompose(Code: UCS4): TCardinalArray;

var
  L, R, M: Integer;

begin
  // load decomposition data if not already done
  if Decompositions = nil then LoadUnicodeDecompositionData;

  if not UnicodeIsComposite(Code) then
  begin
    // return the code itself if it is not a composite
    SetLength(Result, 1);
    Result[0] := Code;
  end
  else
  begin
    // if the code is hangul then decomposition is algorithmically 
    Result := UnicodeDecomposeHangul(Code);
    if Result = nil then
    begin
      L := 0;
      R := DecompositionNodes[DecompositionSize] - 1;

      while L <= R do
      begin
        // Determine a "mid" point and adjust to make sure the mid point is at
        // the beginning of a code + offset pair.
        M := (L + R) shr 1;
        Dec(M, M and 1);
        if Code > DecompositionNodes[M] then L := M + 2
                                        else
          if Code < DecompositionNodes[M] then R := M - 2
                                          else
            if Code = DecompositionNodes[M] then
            begin
              // found a decomposition, return the codes
              SetLength(Result, DecompositionNodes[M + 3] - DecompositionNodes[M + 1] - 1);
              Move(Decompositions[DecompositionNodes[M + 1]], Result[0], Length(Result) * SizeOf(Cardinal));
              Break;
            end;
      end;
    end;
  end;
end;

//----------------- Support for combining classes ----------------------------------------------------------------------

var
  CCLSize: Cardinal;
  CCLNodes: TCardinalArray;

//----------------------------------------------------------------------------------------------------------------------

procedure LoadUnicodeCombiningData;

var
  Stream: TResourceStream;
  I: Cardinal;
  Header: TUHeader;

begin
  // make sure no other code is currently modifying the global data area
  if LoadInProgress = nil then LoadInProgress := TCriticalSection.Create;
  LoadInProgress.Enter;

  if CCLNodes = nil then
  begin
    Stream := TResourceStream.Create(HInstance, 'COMBINE', 'UNICODE');
    Stream.Read(Header, SizeOf(Header));

    if Header.BOM = BOM_MSB_FIRST then
    begin
      Header.Count := Swap(Header.Count);
      Header.Bytes := SwapCardinal(Header.Bytes);
    end;

    CCLSize := Header.Count * 3;
    SetLength(CCLNodes, CCLSize);
    Stream.Read(CCLNodes[0], CCLSize * SizeOf(Cardinal));

    if Header.BOM = BOM_MSB_FIRST then
      for I := 0 to CCLSize - 1 do
        CCLNodes[I] := SwapCardinal(CCLNodes[I]);

    Stream.Free;
  end;
  LoadInProgress.Leave;
end;

//----------------------------------------------------------------------------------------------------------------------

function UnicodeCanonicalClass(Code: Cardinal): Cardinal;

var
  L, R, M: Integer;

begin
  // load combination data if not already done
  if CCLNodes = nil then LoadUnicodeCombiningData;

  Result := 0;
  L := 0;
  R := CCLSize - 1;

  while L <= R do
  begin
    M := (L + R) shr 1;
    Dec(M, M mod 3);
    if Code > CCLNodes[M + 1] then L := M + 3
                              else
      if Code < CCLNodes[M] then R := M - 3
                            else
        if (Code >= CCLNodes[M]) and (Code <= CCLNodes[M + 1]) then
        begin
          Result := CCLNodes[M + 2];
          Break;
        end;
  end;
end;

//----------------- Support for numeric values -------------------------------------------------------------------------

var
  NumberSize: Cardinal;
  NumberNodes: TCardinalArray;
  NumberValues: TWordArray;

//----------------------------------------------------------------------------------------------------------------------

procedure LoadUnicodeNumberData;

var
  Stream: TResourceStream;
  I: Cardinal;
  Header: TUHeader;

begin
  // make sure no other code is currently modifying the global data area
  if LoadInProgress = nil then LoadInProgress := TCriticalSection.Create;
  LoadInProgress.Enter;

  if NumberNodes = nil then
  begin
    Stream := TResourceStream.Create(HInstance, 'NUMBERS', 'UNICODE');
    Stream.Read(Header, SizeOf(Header));

    if Header.BOM = BOM_MSB_FIRST then
    begin
      Header.Count := Swap(Header.Count);
      Header.Bytes := SwapCardinal(Header.Bytes);     
    end;

    NumberSize := Header.Count;
    SetLength(NumberNodes, NumberSize);
    Stream.Read(NumberNodes[0], NumberSize * SizeOf(Cardinal));
    SetLength(NumberValues, (Header.Bytes - NumberSize * SizeOf(Cardinal)) div SizeOf(Word));
    Stream.Read(NumberValues[0], Length(NumberValues) * SizeOf(Word));

    if Header.BOM = BOM_MSB_FIRST then
    begin
      for I := 0 to High(NumberNodes) do
        NumberNodes[I] := SwapCardinal(NumberNodes[I]);
      for I := 0 to High(NumberValues) do
        NumberValues[I] := Swap(NumberValues[I]);
    end;
    Stream.Free;
  end;
  LoadInProgress.Leave;
end;

//----------------------------------------------------------------------------------------------------------------------

function UnicodeNumberLookup(Code: UCS4; var num: TUNumber): Boolean;

var
  L, R, M: Integer;
  VP: PWord;

begin
  // load number data if not already done
  if NumberNodes = nil then LoadUnicodeNumberData;

  Result := False;
  L := 0;
  R := NumberSize - 1;
  while L <= R do
  begin
    // Determine a "mid" point and adjust to make sure the mid point is at

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -