⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 adxbase.pas

📁 测试用例
💻 PAS
📖 第 1 页 / 共 2 页
字号:
(***** BEGIN LICENSE BLOCK *****
 * Version: MPL 1.1
 *
 * The contents of this file are subject to the Mozilla Public License Version
 * 1.1 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 *
 * The Original Code is TurboPower Async Professional
 *
 * The Initial Developer of the Original Code is
 * TurboPower Software
 *
 * Portions created by the Initial Developer are Copyright (C) 1991-2002
 * the Initial Developer. All Rights Reserved.
 *
 * Contributor(s):
 *
 * ***** END LICENSE BLOCK ***** *)

{*********************************************************}
{*                   ADXBASE.PAS 4.06                    *}
{*********************************************************}
{*     XML parser Base types and conversion routines     *}
{*********************************************************}

{Global defines potentially affecting this unit}
{$I AWDEFINE.INC}

unit AdXBase;

interface

uses
  classes,
  OOMisc;


{===System functions=================================================}

type
  TApdUcs4Char = Longint;
  TApdUtf8Char = string[6];
  DOMChar = WideChar;
  PDOMChar = PWideChar;

  { Character encoding types}
  TApdCharEncoding = (ceUnknown, ceUTF8);

  {The TApdMemoryStream class is used to expose TMemoryStream's SetPointer
   method.}
  TApdMemoryStream = class(TMemoryStream)
  public
    procedure SetPointer(Ptr : Pointer; Size : Longint);
  end;

  TApdFileStream = class(TFileStream)
    FFileName : string;
  public
    constructor CreateEx(Mode : Word; const FileName : string);

    property Filename : string read FFileName;
  end;

{ Utility methods }
function ApxPos(const aSubStr, aString : DOMString) : Integer;
function ApxRPos(const sSubStr, sTerm : DOMString) : Integer;
{ character conversion routines }
function ApxIso88591ToUcs4(aInCh  : AnsiChar;
                      var aOutCh : TApdUcs4Char) : Boolean;
function ApxUcs4ToIso88591(aInCh  : TApdUcs4Char;
                      var aOutCh : AnsiChar) : Boolean;
function ApxUcs4ToWideChar(const aInChar : TApdUcs4Char;
                            var aOutWS  : DOMChar) : Boolean;
function ApxUtf16ToUcs4(aInChI,
                       aInChII   : DOMChar;
                   var aOutCh    : TApdUcs4Char;
                   var aBothUsed : Boolean) : Boolean;
function ApxUcs4ToUtf8(aInCh  : TApdUcs4Char;
                  var aOutCh : TApdUtf8Char) : Boolean;
function ApxUtf8ToUcs4(const aInCh  : TApdUtf8Char;
                            aBytes : Integer;
                        var aOutCh : TApdUcs4Char) : Boolean;

{ UTF specials }
function ApxGetLengthUtf8(const aCh : AnsiChar) : byte;

{ character classes }
function ApxIsBaseChar(aCh : TApdUcs4Char) : Boolean;
function ApxIsChar(const aCh : TApdUcs4Char) : Boolean;
function ApxIsCombiningChar(aCh : TApdUcs4Char) : Boolean;
function ApxIsDigit(aCh : TApdUcs4Char) : Boolean;
function ApxIsExtender(aCh : TApdUcs4Char) : Boolean;
function ApxIsIdeographic(aCh : TApdUcs4Char) : Boolean;
function ApxIsLetter(aCh : TApdUcs4Char) : Boolean;
function ApxIsNameChar(aCh : TApdUcs4Char) : Boolean;
function ApxIsNameCharFirst(aCh : TApdUcs4Char) : Boolean;
function ApxIsPubidChar(aCh : TApdUcs4Char) : Boolean;
function ApxIsSpace(aCh : TApdUcs4Char) : Boolean;

implementation

uses
  sysutils;

{== Utility methods ==================================================}
function ApxPos(const aSubStr, aString : DOMString) : Integer;
begin
  Result := AnsiPos(aSubStr, aString);
end;
{--------}
function ApxRPos(const sSubStr, sTerm : DOMString) : Integer;
var
  cLast : DOMChar;
  i, j  : Integer;
begin
  j := Length(sSubStr);
  cLast := sSubStr[j];
  for i := Length(sTerm) downto j do begin
    if (sTerm[i] = cLast) and
       (Copy(sTerm, i - j + 1, j) = sSubStr) then begin
      Result := i - j + 1;
      Exit;
    end;
  end;
  Result := 0;
end;
{===character conversion routines====================================}
function ApxIso88591ToUcs4(aInCh  : AnsiChar;
                      var aOutCh : TApdUcs4Char) : boolean;
begin
  {Note: the conversion from ISO-8859-1 to UCS-4 is very simple: the
         result is the original character}
  aOutCh := ord(aInCh);
  Result := true; {cannot fail}
end;
{--------}
function ApxUcs4ToIso88591(aInCh  : TApdUcs4Char;
                      var aOutCh : AnsiChar) : Boolean;
begin
  {Note: the conversion from UCS-4 to ISO-8859-1 is very simple: if
         the character is contained in a byte, the result is the
         original character; otherwise the conversion cannot be done}
  aInCh := abs(aInCh);
  if (($00 <= aInCh) and (aInCh <= $FF)) then begin
    aOutCh := AnsiChar(aInCh and $FF);
    Result := true;
  end
  else begin
    Result := false;
    aOutCh := #0;
  end;
end;
{--------}
function ApxUcs4ToWideChar(const aInChar : TApdUcs4Char;
                            var aOutWS  : DOMChar) : Boolean;
var
  Temp : Longint;
begin
  Temp := abs(aInChar);
  if (Temp < $10000) then begin
    aOutWS := DOMChar(Temp);
    Result := True;
  end else if (Temp <= $10FFFF) then begin
    dec(Temp, $10000);
    Temp := $DC00 or (Temp and $3FF);
    Temp := $D800 or (Temp shr 10);
    aOutWS := DOMChar(Temp);
    Result := True;
  end else begin
    aOutWS := #0;
    Result := False;
  end;
end;
{--------}
function ApxUtf16ToUcs4(aInChI,
                       aInChII   : DOMChar;
                   var aOutCh    : TApdUcs4Char;
                   var aBothUsed : Boolean) : Boolean;
begin
  aBothUsed := False;
  if (aInChI < #$D800) or (aInChI > #$DFFF) then begin
    aOutCh := Integer(aInChI);
    Result := True;
  end
  else if (aInChI < #$DC00) and
          ((#$DC00 <= aInChII) and (aInChII <= #$DFFF)) then begin
    aOutCh := ((integer(aInChI) and $3FF) shl 10) or
              (integer(aInChII) and $3FF);
    aBothUsed := True;
    Result := True;
  end
  else begin
    Result := False;
    aOUtCh := 0;
  end;
end;
{--------}
function ApxUcs4ToUtf8(aInCh  : TApdUcs4Char;
                  var aOutCh : TApdUtf8Char) : Boolean;
begin
  aInCh := abs(aInCh);
  {if the UCS-4 value is $00 to $7f, no conversion is required}
  if (aInCh < $80) then begin
    aOutCh[0] := #1;
    aOutCh[1] := AnsiChar(aInCh);
  end
  {if the UCS-4 value is $80 to $7ff, a two character string is
   produced}
  else if (aInCh < $800) then begin
    aOutCh[0] := #2;
    aOutCh[1] := AnsiChar($C0 or (aInCh shr 6));
    aOutCh[2] := AnsiChar($80 or (aInCh and $3F));
  end
  {if the UCS-4 value is $800 to $ffff, a three character string is
   produced}
  else if (aInCh < $10000) then begin
    aOutCh[0] := #3;
    aOutCh[1] := AnsiChar($E0 or (aInCh shr 12));
    aOutCh[2] := AnsiChar($80 or ((aInCh shr 6) and $3F));
    aOutCh[3] := AnsiChar($80 or (aInCh and $3F));
  end
  {NOTE: the following if clauses will be very rarely used since the
         majority of characters will be unicode characters: $0000 to
         $FFFF}
  {if the UCS-4 value is $10000 to $1fffff, a four character string
   is produced}
  else if (aInCh < $200000) then begin
    aOutCh[0] := #4;
    aOutCh[1] := AnsiChar($F0 or (aInCh shr 18));
    aOutCh[2] := AnsiChar($80 or ((aInCh shr 12) and $3F));
    aOutCh[3] := AnsiChar($80 or ((aInCh shr 6) and $3F));
    aOutCh[4] := AnsiChar($80 or (aInCh and $3F));
  end
  {if the UCS-4 value is $200000 to $3ffffff, a five character
   string is produced}
  else if (aInCh < $4000000) then begin
    aOutCh[0] := #5;
    aOutCh[1] := AnsiChar($F8 or (aInCh shr 24));
    aOutCh[2] := AnsiChar($80 or ((aInCh shr 18) and $3F));
    aOutCh[3] := AnsiChar($80 or ((aInCh shr 12) and $3F));
    aOutCh[4] := AnsiChar($80 or ((aInCh shr 6) and $3F));
    aOutCh[5] := AnsiChar($80 or (aInCh and $3F));
  end
  {for all other UCS-4 values, a six character string is produced}
  else begin
    aOutCh[0] := #6;
    aOutCh[1] := AnsiChar($FC or (aInCh shr 30));
    aOutCh[2] := AnsiChar($80 or ((aInCh shr 24) and $3F));
    aOutCh[3] := AnsiChar($80 or ((aInCh shr 18) and $3F));
    aOutCh[4] := AnsiChar($80 or ((aInCh shr 12) and $3F));
    aOutCh[5] := AnsiChar($80 or ((aInCh shr 6) and $3F));
    aOutCh[6] := AnsiChar($80 or (aInCh and $3F));
  end;
  Result := True; {cannot fail}
end;
{--------}
function ApxUtf8ToUcs4(const aInCh  : TApdUtf8Char;
                            aBytes : Integer;
                        var aOutCh : TApdUcs4Char) : Boolean;
var
  InFirstByte : AnsiChar;
  InCharLen   : Integer;
  i           : Integer;
begin
  InFirstByte := aInCh[1];
  InCharLen := Length(aInCh);
  {the length of the UTF-8 character cannot be zero and must match
   that of the first ASCII character in the string}
  if ((InCharLen = 0) or
      (InCharLen <> aBytes)) then begin
    Result := False;
    aOutCh := 0;
    Exit;
  end;
  {all subsequent characters must have the most significant bit set
   and the next to most significant digit clear; we'll test for this
   as we go along}
  {get the bits from the first ASCII character}
  if (InFirstByte <= #$7F) then
    aOutCh := Ord(InFirstByte)
  else if (InFirstByte <= #$DF) then
    aOutCh := Ord(InFirstByte) and $1F
  else if (InFirstByte <= #$EF) then
    aOutCh := Ord(InFirstByte) and $0F
  else if (InFirstByte <= #$F7) then
    aOutCh := Ord(InFirstByte) and $07
  else if (InFirstByte <= #$FB) then
    aOutCh := Ord(InFirstByte) and $03
  else
    aOutCh := Ord(InFirstByte) and $01;
  {get the bits from the remaining ASCII characters}
  for i := 2 to InCharLen do begin
    if ((Byte(aInCh[i]) and $C0) <> $80) then begin
      Result := False;
      aOutCh := 0;
      Exit;
    end;
    aOutCh := (aOutCh shl 6) or (Byte(aInCh[i]) and $3F);
  end;
  {success}
  Result := True;
end;
{====================================================================}


{===UTF specials=====================================================}
function ApxGetLengthUtf8(const aCh : AnsiChar) : Byte;
begin
  if (aCh <= #$7F) then
    Result := 1
  else if (aCh <= #$BF) then
    Result := 0              { $80--$BF is an error }
  else if (aCh <= #$DF) then
    Result := 2
  else if (aCh <= #$EF) then
    Result := 3
  else if (aCh <= #$F7) then
    Result := 4
  else if (aCh <= #$FB) then
    Result := 5
  else if (aCh <= #$FD) then
    Result := 6
  else
    Result := 0;             { $FE, $FF is an error }
end;
{====================================================================}


{===character classes================================================}
function ApxIsBaseChar(aCh : TApdUcs4Char) : boolean;
begin
  Result := (($0041 <= aCh) and (aCh <= $005A)) or
            (($0061 <= aCh) and (aCh <= $007A)) or
            (($00C0 <= aCh) and (aCh <= $00D6)) or
            (($00D8 <= aCh) and (aCh <= $00F6)) or

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -