📄 unicode.~pas

📁 几十个处理Unicode的函数
💻 ~PAS
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
    // the beginning of a code+offset pair.
    M := (L + R) shr 1;
    Dec(M, M and 1);
    if Code > NumberNodes[M] then L := M + 2
                             else
      if Code < NumberNodes[M] then R := M - 2
                               else
      begin
        VP := Pointer(Cardinal(@NumberValues[0]) + NumberNodes[M + 1]);
        num.numerator := VP^;
        Inc(VP);
        num.denominator := VP^;
        Result := True;
        Break;
      end;
  end;
end;

//----------------------------------------------------------------------------------------------------------------------

function UnicodeDigitLookup(Code: UCS4; var Digit: Integer): Boolean;

var
  L, R, M: Integer;
  VP: PWord;

begin
  // load number data if not already done
  if NumberNodes = nil then LoadUnicodeNumberData;

  Result := False;
  L := 0;
  R := NumberSize - 1;
  while L <= R do
  begin
    // Determine a "mid" point and adjust to make sure the mid point is at
    // the beginning of a code+offset pair.
    M := (L + R) shr 1;
    Dec(M, M and 1);
    if Code > NumberNodes[M] then L := M + 2
                             else
      if Code < NumberNodes[M] then R := M - 2
                               else
      begin
        VP := Pointer(Cardinal(@NumberValues[0]) + NumberNodes[M + 1]);
        M := VP^;
        Inc(VP);
        if M = VP^ then
        begin
          Digit := M;
          Result := True;
        end;
        Break;
      end;
  end;
end;

//----------------------------------------------------------------------------------------------------------------------

function UnicodeGetNumber(Code: UCS4): TUNumber;

begin
  // Initialize with some arbitrary value, because the caller simply cannot
  // tell for sure if the code is a number without calling the ucisnumber()
  // macro before calling this function.
  Result.Numerator := -111;
  Result.Denominator := -111;

  UnicodeNumberLookup(Code, Result);
end;

//----------------------------------------------------------------------------------------------------------------------

function UnicodeGetDigit(Code: UCS4): Integer;

begin
  // Initialize with some arbitrary value, because the caller simply cannot
  // tell for sure if the code is a number without calling the ucisdigit()
  //  macro before calling this function.
  Result := -111;

  UnicodeDigitLookup(Code, Result);
end;

//----------------- TSearchEngine --------------------------------------------------------------------------------------

constructor TSearchEngine.Create(AOwner: TWideStrings);

begin
  FOwner := AOwner;
  FResults := TList.Create;
end;

//----------------------------------------------------------------------------------------------------------------------

destructor TSearchEngine.Destroy;

begin
  Clear;
  FResults.Free;
  inherited;
end;

//----------------------------------------------------------------------------------------------------------------------

procedure TSearchEngine.AddResult(Start, Stop: Cardinal);

begin
  FResults.Add(Pointer(Start));
  FResults.Add(Pointer(Stop));
end;

//----------------------------------------------------------------------------------------------------------------------

procedure TSearchEngine.Clear;

begin
  ClearResults;
end;

//----------------------------------------------------------------------------------------------------------------------

procedure TSearchEngine.ClearResults;

begin
  FResults.Clear;
end;

//----------------------------------------------------------------------------------------------------------------------

procedure TSearchEngine.DeleteResult(Index: Cardinal);

// explicitly deletes a search result

begin
  with FResults do
  begin
    // start index
    Delete(2 * Index);
    // stop index
    Delete(2 * Index);
  end;
end;

//----------------------------------------------------------------------------------------------------------------------

function TSearchEngine.GetCount: Integer;

// returns the number of matches found

begin
  Result := FResults.Count div 2;
end;

//----------------------------------------------------------------------------------------------------------------------

procedure TSearchEngine.GetResult(Index: Cardinal; var Start, Stop: Integer);

// returns the start position of a match (end position can be determined by adding the length
// of the pattern to the start position)

begin
  Start := Cardinal(FResults[2 * Index]);
  Stop := Cardinal(FResults[2 * Index + 1]);
end;

//----------------- TUTBMSearch ----------------------------------------------------------------------------------------

constructor TUTBMSearch.Create(AOwner: TWideStrings);

begin
  inherited;
end;

//----------------------------------------------------------------------------------------------------------------------

destructor TUTBMSearch.Destroy;

begin
  inherited;
end;

//----------------------------------------------------------------------------------------------------------------------

procedure TUTBMSearch.ClearPattern;

begin
  FreeMem(FPattern);
  FPattern := nil;
  FFlags := [];
  FPatternUsed := 0;
  FPatternSize := 0;
  FPatternLength := 0;
  FreeMem(FSkipValues);
  FSkipValues := nil;
  FSkipsUsed := 0;
  FMD4 := 0;
end;

//----------------------------------------------------------------------------------------------------------------------

function TUTBMSearch.GetSkipValue(TextStart, TextEnd: PUCS2): Cardinal;

// looks up the SkipValues value for a character

var
 I: Integer;
 C1, C2: UCS4;
 Sp: PUTBMSkip;

begin
  Result := 0;
  if Cardinal(TextStart) < Cardinal(TextEnd) then
  begin
    C1 := Word(TextStart^);
    if (TextStart + 1) < TextEnd then C2 := Word((TextStart + 1)^)
                                 else C2 := $FFFFFFFF;
    if (SurrogateHighStart <= C1) and
       (C1 <= SurrogateHighEnd) and
       (SurrogateLowStart <= C2) and
       (C2 <= $DDDD) then C1 := $10000 + (((C1 and $03FF) shl 10) or (C2 and $03FF));

    Sp := FSkipValues;
    for I := 0 to FSkipsUsed - 1 do
    begin
      if not (Boolean(C1 xor Sp.BMChar.UpCase) and
              Boolean(C1 xor Sp.BMChar.LoCase) and
              Boolean(C1 xor Sp.BMChar.TitleCase)) then
      begin
        if (TextEnd - TextStart) < Sp.SkipValues then Result := TextEnd - TextStart
                                                 else Result := Sp.SkipValues;
        Exit;
      end;
      Inc(Sp);
    end;
    Result := FPatternLength;
  end;
end;

//----------------------------------------------------------------------------------------------------------------------

function TUTBMSearch.Match(Text, Start, Stop: PUCS2; var MatchStart, MatchEnd: Cardinal): Boolean;

// Checks once whether the text at position Start (which points to the end of the current text part to be matched)
// matches.
// Note: If whole words only are allowed then the left and right border tests are done here too. The keypoint for the
//       right border is that the next character after the search string is either the text end or a space character.
//       For the left side this is similar, but there is nothing like a string start marker (like the string end marker #0).
//
//       It seems not obvious, but we still can use the passed Text pointer to do the left check. Although this pointer
//       might not point to the real string start (e.g. in TUTBMSearch.FindAll Text is incremented as needed) it is
//       still a valid check mark. The reason is that Text either points to the real string start or a previous match
//       (happend already, keep in mind the search options do not change in the FindAll loop) and the character just
//       before Text is a space character.
//       This fact implies, though, that strings passed to Find (or FindFirst, FindAll in TUTBMSearch) always really
//       start at the given address. Although this might not be the case in some circumstances (e.g. if you pass only
//       the selection from an editor) it is still assumed that a pattern matching from the first position on (from the
//       search string start) also matches when whole words only are allowed.

var
  CheckSpace: Boolean;
  C1, C2: UCS4;
  Count: Integer;
  Cp: PUTBMChar;

begin
  // be pessimistic
  Result := False;

  // set the potential match endpoint first
  MatchEnd := (Start - Text) + 1;

  C1 := Word(Start^);
  if (Start + 1) < Stop then C2 := Word((Start + 1)^)
                        else C2 := $FFFFFFFF;
  if (SurrogateHighStart <= C1) and
     (C1 <= SurrogateHighEnd) and
     (SurrogateLowStart <= C2) and
     (C2 <= SurrogateLowEnd) then
  begin
    C1 := $10000 + (((C1 and $03FF) shl 10) or (C2 and $03FF));
    // Adjust the match end point to occur after the UTF-16 character.
    Inc(MatchEnd);
  end;

  // check special cases
  if FPatternUsed = 1 then
  begin
    MatchStart := Start - Text;
    Result := True;
    Exit;
  end;

  // Early out if entire words need to be matched and the next character 
  // in the search string is neither the string end nor a space character.
  if (sfWholeWordOnly in FFlags) and
     not ((Start + 1)^ = WideNull) and
     not UnicodeIsWhiteSpace(Word((Start + 1)^)) then Exit;

  // compare backward
  Cp := FPattern;
  Inc(Cp, FPatternUsed - 1);

  Count := FPatternLength;
  while (Start >= Text) and (Count > 0) do
  begin
    // ignore non-spacing characters if indicated
    if sfIgnoreNonSpacing in FFlags then
    begin
      while (Start > Text) and UnicodeIsNonSpacing(C1) do
      begin
        Dec(Start);
        C2 := Word(Start^);
        if (Start - 1) > Text then C1 := Word((Start - 1)^)
                              else C1 := $FFFFFFFF;
        if (SurrogateLowStart <= C2) and (C2 <= SurrogateLowEnd) and
           (SurrogateHighStart <= C1) and (C1 <= SurrogateHighEnd) then
        begin
          C1 := $10000 + (((C1 and $03FF) shl 10) or (C2 and $03FF));
          Dec(Start);
        end
        else C1 := C2;
      end;
    end;

    // handle space compression if indicated
    if sfSpaceCompress in FFlags then
    begin
      CheckSpace := False;
      while (Start > Text) and
            (UnicodeIsWhiteSpace(C1) or UnicodeIsControl(C1)) do
      begin
        CheckSpace := UnicodeIsWhiteSpace(C1);
        Dec(Start);
        C2 := Word(Start^);
        if (Start - 1) > Text then C1 := Word((Start - 1)^)
                              else C1 := $FFFFFFFF;
        if (SurrogateLowStart <= C2) and (C2 <= SurrogateLowEnd) and
           (SurrogateHighStart <= C1) and (C1 <= SurrogateHighEnd) then
        begin
          C1 := $10000 + (((C1 and $03FF) shl 10) or (C2 and $03FF));
          Dec(Start);
        end
        else C1 := C2;
      end;
      // Handle things if space compression was indicated and one or
      // more member characters were found.
      if CheckSpace then
      begin
        if Cp.UpCase <> $20 then Exit;
        Dec(Cp);
        Dec(Count);
        // If Count is 0 at this place then the space character(s) was the first
        // in the pattern and we need to correct the start position.
        if Count = 0 then Inc(Start);
      end;
    end;

    // handle the normal comparison cases
    if (Count > 0) and
       (Boolean(C1 xor Cp.UpCase) and
        Boolean(C1 xor Cp.LoCase) and
        Boolean(C1 xor C
上一页 1 2 3 45
💿 文件大小 111 K
👤 上传用户 zhou28
📂 所属分类 Delphi控件源码
🏷️ 相关标签

#Unicode #函数
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -