📄 jclunicode.pas

📁 East make Tray Icon in delphi
💻 PAS
📖 第 1 页 / 共 5 页
字号:

// Character category data is quite a large block since every defined character in Unicode is assigned at least
// one category. Because of this we cannot use a sparse matrix to provide quick access as implemented for
// e.g. composition data.
// The approach used here is based on the fact that an application seldomly uses all characters defined in Unicode
// simultanously. In fact the opposite is true. Most application will use either Western Europe or Arabic or
// Far East character data, but very rarely all together. Based on this fact is the implementation of virtual
// memory using the systems paging file (aka file mapping) to load only into virtual memory what is used currently.
// The implementation is not yet finished and needs a lot of improvements yet.

type
  // start and stop of a range of code points
  TRange = record
    Start,
    Stop: Cardinal;
  end;

  TRangeArray = array of TRange;
  TCategoriesArray = array of TCharacterCategories;

var
  // character categories, stored in the system's swap file and mapped on demand
  CategoriesLoaded: Boolean;
  Categories: array [Byte] of TCategoriesArray;

procedure LoadCharacterCategories;
// Loads the character categories data (as saved by the Unicode database extractor, see also
// the comments about JclUnicode.res above).
var
  Size: Integer;
  Stream: TResourceStream;
  Category: TCharacterCategory;
  Buffer: TRangeArray;
  First,
  Second: Byte;
  J, K: Integer;
begin
  // Data already loaded?
  if not CategoriesLoaded then
  begin
    // make sure no other code is currently modifying the global data area
    LoadInProgress.Enter;
    try
      CategoriesLoaded := True;
      Stream := TResourceStream.Create(HInstance, 'CATEGORIES', 'UNICODEDATA');
      try
        while Stream.Position < Stream.Size do
        begin
          // a) read which category is current in the stream
          Stream.ReadBuffer(Category, 1);
          // b) read the size of the ranges and the ranges themself
          Stream.ReadBuffer(Size, 4);
          if Size > 0 then
          begin
            SetLength(Buffer, Size);
            Stream.ReadBuffer(Buffer[0], Size * SizeOf(TRange));

            // c) go through every range and add the current category to each code point
            for J := 0 to Size - 1 do
              for K := Buffer[J].Start to Buffer[J].Stop do
              begin
                if K > $FFFF then
                  Break;

                First := (K shr 8) and $FF;
                Second := K and $FF;
                // add second step array if not yet done
                if Categories[First] = nil then
                  SetLength(Categories[First], 256);
                Include(Categories[First, Second], Category);
              end;
          end;
        end;
      finally
        Stream.Free;
      end;
    finally
      LoadInProgress.Leave;
    end;
  end;
end;

function CategoryLookup(Code: Cardinal; Cats: TCharacterCategories): Boolean; overload;
// determines whether the Code is in the given category
var
  First,
  Second: Byte;
begin
  // load property data if not already done
  if not CategoriesLoaded then
    LoadCharacterCategories;

  First := (Code shr 8) and $FF;
  Second := Code and $FF;
  if Categories[First] <> nil then
    Result := Categories[First, Second] * Cats <> []
  else
    Result := False;
end;

//----------------- support for case mapping -------------------------------------------------------

type
  TCase = array [0..3] of TUCS4Array; // mapping for case fold, lower, title and upper in this order
  TCaseArray = array of TCase;

var
  // An array for all case mappings (including 1 to many casing if saved by the extraction program).
  // The organization is a sparse, two stage matrix.
  // SingletonMapping is to quickly return a single default mapping.
  CaseDataLoaded: Boolean;
  CaseMapping: array [Byte] of TCaseArray;
  SingletonMapping: TUCS4Array;

procedure LoadCaseMappingData;
var
  Stream: TResourceStream;
  I, Code,
  Size: Cardinal;
  First,
  Second: Byte;
begin
  if not CaseDataLoaded then
  begin
    // make sure no other code is currently modifying the global data area
    LoadInProgress.Enter;

    try
      SetLength(SingletonMapping, 1);
      CaseDataLoaded := True;
      Stream := TResourceStream.Create(HInstance, 'CASE', 'UNICODEDATA');
      try
        // the first entry in the stream is the number of entries in the case mapping table
        Stream.ReadBuffer(Size, 4);
        for I := 0 to Size - 1 do
        begin
          // a) read actual code point
          Stream.ReadBuffer(Code, 4);

          Assert(Code < $10000, LoadResString(@RsCasedUnicodeChar));
          // if there is no high byte entry in the first stage table then create one
          First := (Code shr 8) and $FF;
          Second := Code and $FF;
          if CaseMapping[First] = nil then
            SetLength(CaseMapping[First], 256);

          // b) read fold case array
          Stream.ReadBuffer(Size, 4);
          if Size > 0 then
          begin
            SetLength(CaseMapping[First, Second, 0], Size);
            Stream.ReadBuffer(CaseMapping[First, Second, 0, 0], Size * SizeOf(UCS4));
          end;
          // c) read lower case array
          Stream.ReadBuffer(Size, 4);
          if Size > 0 then
          begin
            SetLength(CaseMapping[First, Second, 1], Size);
            Stream.ReadBuffer(CaseMapping[First, Second, 1, 0], Size * SizeOf(UCS4));
          end;
          // d) read title case array
          Stream.ReadBuffer(Size, 4);
          if Size > 0 then
          begin
            SetLength(CaseMapping[First, Second, 2], Size);
            Stream.ReadBuffer(CaseMapping[First, Second, 2, 0], Size * SizeOf(UCS4));
          end;
          // e) read upper case array
          Stream.ReadBuffer(Size, 4);
          if Size > 0 then
          begin
            SetLength(CaseMapping[First, Second, 3], Size);
            Stream.ReadBuffer(CaseMapping[First, Second, 3, 0], Size * SizeOf(UCS4));
          end;
        end;

      finally
        Stream.Free;
      end;
    finally
      LoadInProgress.Leave;
    end;
  end;
end;

function CaseLookup(Code: Cardinal; CaseType: Cardinal): TUCS4Array;
// Performs a lookup of the given code and returns its case mapping if found.
// CaseType must be 0 for case folding, 1 for lower case, 2 for title case and 3 for upper case, respectively.
// If Code could not be found (or there is no case mapping) then the result is a mapping of length 1 with the
// code itself. Otherwise an array of code points is returned which represent the mapping.
var
  First,
  Second: Byte;
begin
  // load case mapping data if not already done
  if not CaseDataLoaded then
    LoadCaseMappingData;

  First := (Code shr 8) and $FF;
  Second := Code and $FF;
  // Check first stage table whether there is a mapping for a particular block and
  // (if so) then whether there is a mapping or not.
  if (CaseMapping[First] = nil) or (CaseMapping[First, Second, CaseType] = nil) then
  begin
    SingletonMapping[0] := Code;
    Result := SingletonMapping;
  end
  else
    Result := CaseMapping[First, Second, CaseType];
end;

function UnicodeCaseFold(Code: UCS4): TUCS4Array;
// This function returnes an array of special case fold mappings if there is one defined for the given
// code, otherwise the lower case will be returned. This all applies only to cased code points.
// Uncased code points are returned unchanged.
begin
  Result := CaseLookup(Code, 0);
end;

function UnicodeToUpper(Code: UCS4): TUCS4Array;
begin
  Result := CaseLookup(Code, 3);
end;

function UnicodeToLower(Code: UCS4): TUCS4Array;
begin
  Result := CaseLookup(Code, 1);
end;

function UnicodeToTitle(Code: UCS4): TUCS4Array;
begin
  Result := CaseLookup(Code, 2);
end;

//----------------- support for decomposition ------------------------------------------------------

const
  // constants for hangul composition and hangul-to-jamo decomposition
  SBase = $AC00;             // hangul syllables start code point
  LBase = $1100;             // leading syllable
  VBase = $1161;
  TBase = $11A7;             // trailing syllable
  LCount = 19;
  VCount = 21;
  TCount = 28;
  NCount = VCount * TCount;   // 588
  SCount = LCount * NCount;   // 11172

type
  TDecompositions = array of TUCS4Array;
  TDecompositionsArray = array [Byte] of TDecompositions;
  
var
  // list of decompositions, organized (again) as two stage matrix
  // Note: there are two tables, one for canonical decompositions and the other one
  //       for compatibility decompositions.
  DecompositionsLoaded: Boolean;
  CanonicalDecompositions,
  CompatibleDecompositions: TDecompositionsArray;

procedure LoadDecompositionData;
var
  Stream: TResourceStream;
  I, Code,
  Size: Cardinal;
  First,
  Second: Byte;
begin
  if not DecompositionsLoaded then
  begin
    // make sure no other code is currently modifying the global data area
    LoadInProgress.Enter;

    try
      DecompositionsLoaded := True;
      Stream := TResourceStream.Create(HInstance, 'DECOMPOSITION', 'UNICODEDATA');
      try
        // determine how many decomposition entries we have
        Stream.ReadBuffer(Size, 4);
        for I := 0 to Size - 1 do
        begin
          Stream.ReadBuffer(Code, 4);

          Assert((Code and not $40000000) < $10000, LoadResString(@RsDecomposedUnicodeChar));

          // if there is no high byte entry in the first stage table then create one
          First := (Code shr 8) and $FF;
          Second := Code and $FF;

          // insert into the correct table depending on bit 30
          // (if set then it is a compatibility decomposition)
          if Code and $40000000 <> 0 then
          begin
            if CompatibleDecompositions[First] = nil then
              SetLength(CompatibleDecompositions[First], 256);

            Stream.ReadBuffer(Size, 4);
            if Size > 0 then
            begin
              SetLength(CompatibleDecompositions[First, Second], Size);
              Stream.ReadBuffer(CompatibleDecompositions[First, Second, 0], Size * SizeOf(UCS4));
            end;
          end
          else
          begin
            if CanonicalDecompositions[First] = nil then
              SetLength(CanonicalDecompositions[First], 256);

            Stream.ReadBuffer(Size, 4);
            if Size > 0 then
            begin
              SetLength(CanonicalDecompositions[First, Second], Size);
              Stream.ReadBuffer(CanonicalDecompositions[First, Second, 0], Size * SizeOf(UCS4));
            end;
          end;
        end;
      finally
        Stream.Free;
      end;
    finally
      LoadInProgress.Leave;
    end;
  end;
end;

function UnicodeDecomposeHangul(Code: UCS4): TUCS4Array;
// algorithmically decomposes hangul character
var
  Rest: Integer;
begin
  Dec(Code, SBase);
  Rest := Code mod TCount;
  if Rest = 0 then
    SetLength(Result, 2)
  else
    SetLength(Result, 3);
  Result[0] := LBase + (Code div NCount);
  Result[1] := VBase + ((Code mod NCount) div TCount);
  if Rest <> 0 then
    Result[2] := TBase + Rest;
end;

function UnicodeDecompose(Code: UCS4; Compatible: Boolean): TUCS4Array;
var
  First,
  Second: Byte;
begin
  // load decomposition data if not already done
  if not DecompositionsLoaded then
    LoadDecompositionData;

  Result := nil;

  // if the code is hangul then decomposition is algorithmically
  if UnicodeIsHangul(Code) then
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -