⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 htmlparse2.pas

📁 一个经典的读写Excel的控件
💻 PAS
📖 第 1 页 / 共 2 页
字号:
unit HTMLParse2;

{
********************************************************************************
******* XLSReadWriteII V2.00                                             *******
*******                                                                  *******
******* Copyright(C) 1999,2004 Lars Arvidsson, Axolot Data               *******
*******                                                                  *******
******* email: components@axolot.com                                     *******
******* URL:   http://www.axolot.com                                     *******
********************************************************************************
** Users of the XLSReadWriteII component must accept the following            **
** disclaimer of warranty:                                                    **
**                                                                            **
** XLSReadWriteII is supplied as is. The author disclaims all warranties,     **
** expressedor implied, including, without limitation, the warranties of      **
** merchantability and of fitness for any purpose. The author assumes no      **
** liability for damages, direct or consequential, which may result from the  **
** use of XLSReadWriteII.                                                     **
********************************************************************************
}
 
{$B-}

interface

uses Classes, SysUtils, Forms, Dialogs, HTMLTypes2, Graphics;

type THTMLParser = class(TObject)
private
     ElementNamesHash: array[0..High(ElementNames)] of word;
     AttributeNamesHash: array[0..High(AttributeNames)] of word;
     FStream: TStream;
     Buffer: PChar;
     BufferPos,BytesInBuffer: integer;
     ElementCount: integer;
     Elements: PHTMLElementArray;
     LineCount: integer;

     procedure CreateHash;
     procedure DecodeHTMLText(var S: string);
     procedure NewElement(ID: THTMLElementID);
     function  ReadStream: boolean;
     function  GetNextChar(var C: char): boolean;
     function  GetNextTag: boolean;
     function  GetElementID(S: string): THTMLElementID;
     function  GetAttributeID(S: string): TElementAttributeID;
     procedure AddAttribute(Index: integer; ID: TElementAttributeID; Data: string);
     procedure ScanAttributes(Index: integer; S: string);
     procedure DoComment;
     procedure DoScript;
public
     constructor Create(var E: THTMLElementArray);
     destructor Destroy; override;
     procedure Clear;
     procedure LoadFromStream(Stream: TStream);
     procedure LoadFromFile(const Filename: string);
     function  GetAttribute(E: THTMLElement; Attr: TElementAttributeID): PElementAttribute;
     function  GetStrAttribute(E: THTMLElement; Attr: TElementAttributeID; var S: string): boolean;
     end;

implementation

const
  BUFFER_SIZE = 8192;

function GetHashCode(const Buffer; Count: Integer): Word; assembler;
asm
        CMP     EDX,0
        JNE     @@2
        MOV     EAX,0
        JMP     @@3
@@2:
        MOV     ECX,EDX
        MOV     EDX,EAX
        XOR     EAX,EAX
@@1:    ROL     AX,5
        XOR     AL,[EDX]
        INC     EDX
        DEC     ECX
        JNE     @@1
@@3:
end;

function CPos(C: char; S: string): integer;
begin
  for Result := 1 to Length(S) do begin
    if S[Result] = C then
      Exit;
  end;
  Result := -1;
end;

function FastPos(const aSourceString, aFindString : String; const aSourceLen, aFindLen, StartPos : integer) : integer;
var
  SourceLen : integer;
begin
  SourceLen := aSourceLen;
  SourceLen := SourceLen - aFindLen;
  if (StartPos-1) > SourceLen then begin
    Result := 0;
    Exit;
  end;
  SourceLen := SourceLen - StartPos;
  SourceLen := SourceLen +2;
  asm
    push ESI
    push EDI
    push EBX

    mov EDI, aSourceString
    add EDI, StartPos
    Dec EDI

    mov ESI, aFindString

    mov ECX, SourceLen

    Mov  Al, [ESI]

    @ScaSB:
    Mov  Ah, [EDI]

    cmp  Ah,Al
    jne  @NextChar

    @CompareStrings:
    mov  EBX, aFindLen

    dec  EBX

    @CompareNext:

    mov  Al, [ESI+EBX]

    mov  Ah, [EDI+EBX]

    cmp  Al, Ah
    Jz   @Matches

    Mov  Al, [ESI]
    Jmp  @NextChar

    @Matches:
    Dec  EBX
    Jnz  @CompareNext

    mov  EAX, EDI
    sub  EAX, aSourceString
    inc  EAX
    mov  Result, EAX
    jmp  @TheEnd
    @NextChar:

    Inc  EDI
    dec  ECX
    jnz  @ScaSB

    mov  Result,0

    @TheEnd:
    pop  EBX
    pop  EDI
    pop  ESI
  end;
end;

function FastPosNoCase(const aSourceString, aFindString : String; const aSourceLen, aFindLen, StartPos : integer) : integer;
var
  SourceLen : integer;
begin
  SourceLen := aSourceLen;
  SourceLen := SourceLen - aFindLen;
  if (StartPos-1) > SourceLen then begin
    Result := 0;
    Exit;
  end;
  SourceLen := SourceLen - StartPos;
  SourceLen := SourceLen +2;
  asm
    push ESI
    push EDI
    push EBX

    mov EDI, aSourceString
    add EDI, StartPos
    Dec EDI
    mov ESI, aFindString
    mov ECX, SourceLen

    Mov  Al, [ESI]

    and  Al, $df
    @ScaSB:
    Mov  Ah, [EDI]
    and  Ah, $df
    cmp  Ah,Al
    jne  @NextChar

    @CompareStrings:
    mov  EBX, aFindLen
    dec  EBX
    @CompareNext:
    mov  Al, [ESI+EBX]
    mov  Ah, [EDI+EBX]
    and  Al, $df
    and  Ah, $df
    cmp  Al, Ah
    Jz   @Matches
    Mov  Al, [ESI]
    and  Al, $df
    Jmp  @NextChar
    @Matches:
    Dec  EBX
    Jnz  @CompareNext
    mov  EAX, EDI
    sub  EAX, aSourceString
    inc  EAX
    mov  Result, EAX
    jmp  @TheEnd
    @NextChar:
    Inc  EDI
    dec  ECX
    jnz  @ScaSB
    mov  Result,0

    @TheEnd:
    pop  EBX
    pop  EDI
    pop  ESI
  end;
end;

procedure MyMove(const Source; var Dest; Count : Integer);
asm
  cmp   ECX,0
  Je    @JustQuit

  push  ESI
  push  EDI

  mov   ESI, EAX
  mov   EDI, EDX

  @Loop:
  Mov   AL, [ESI]
  Inc   ESI
  mov   [EDI], AL
  Inc   EDI
  Dec   ECX
  Jnz   @Loop


  pop   EDI
  pop   ESI
  @JustQuit:
end;

Type
  TFastPosProc = function(
    const aSourceString, aFindString : String;
    const aSourceLen, aFindLen, StartPos : integer
    ) : integer;

function FastReplace(var aSourceString : String; const aFindString, aReplaceString : String; CaseSensitive : Boolean = False) : String;
var
  ActualResultLen,
  CurrentPos,
  LastPos,
  BytesToCopy,
  ResultLen,
  FindLen,
  ReplaceLen,
  SourceLen         : Integer;

  FastPosProc       : TFastPosProc;
begin
  if CaseSensitive then
    FastPosProc := FastPOS
  else
    FastPOSProc := FastPOSNoCase;

  Result := '';

  FindLen := Length(aFindString);
  ReplaceLen := Length(aReplaceString);
  SourceLen := Length(aSourceString);


  if ReplaceLen <= FindLen then
    ActualResultLen := SourceLen
  else
    ActualResultLen := SourceLen + (SourceLen * ReplaceLen div FindLen) + ReplaceLen;

  SetLength(Result,ActualResultLen);

  CurrentPos := 1;
  ResultLen := 0;
  LastPos := 1;

  if ReplaceLen > 0 then begin
    repeat
      CurrentPos := FastPosProc(aSourceString, aFindString,SourceLen, FindLen, CurrentPos);

      if CurrentPos = 0 then break;

      BytesToCopy := CurrentPos-LastPos;

      MyMove(aSourceString[LastPos],Result[ResultLen+1], BytesToCopy);

      MyMove(aReplaceString[1],Result[ResultLen+1+BytesToCopy], ReplaceLen);

      ResultLen := ResultLen + BytesToCopy + ReplaceLen;

      CurrentPos := CurrentPos + FindLen;
      LastPos := CurrentPos;
    until false;
  end else begin
    repeat
      CurrentPos := FastPos(aSourceString,
        aFindString, SourceLen, FindLen, CurrentPos);
      if CurrentPos = 0 then break;

      BytesToCopy := CurrentPos-LastPos;

      MyMove(aSourceString[LastPos],
        Result[ResultLen+1], BytesToCopy);
      ResultLen := ResultLen +
        BytesToCopy + ReplaceLen;

      CurrentPos := CurrentPos + FindLen;
      LastPos := CurrentPos;
    until false;
  end;

  Dec(LastPOS);

  SetLength(Result, ResultLen + (SourceLen-LastPos));

  if LastPOS+1 <= SourceLen then
    MyMove(aSourceString[LastPos+1],Result[ResultLen+1],SourceLen-LastPos);
end;

constructor THTMLParser.Create(var E: THTMLElementArray);
begin
  Elements := @E;
  GetMem(Buffer,BUFFER_SIZE);
  CreateHash;
end;

destructor THTMLParser.Destroy;
begin
  FreeMem(Buffer);
  Clear;
  inherited Destroy;
end;

procedure THTMLParser.Clear;
var
  i,j: integer;
begin
  for i := 0 to High(Elements^) do begin
    for j := 0 to High(Elements^[i].Attributes) do begin
      if Elements^[i].Attributes[j].AttType = atString then
        StrDispose(Elements^[i].Attributes[j].StrVal);
    end;
    SetLength(Elements^[i].Attributes,0);
  end;
  SetLength(Elements^,0);
end;

procedure THTMLParser.CreateHash;
var
  i: integer;
begin
  for i := 0 to High(ElementNames) do
    ElementNamesHash[i] := GetHashCode(ElementNames[i][1],Length(ElementNames[i]));
  for i := 0 to High(AttributeNames) do
    AttributeNamesHash[i] := GetHashCode(AttributeNames[i][1],Length(AttributeNames[i]));
end;

procedure THTMLParser.NewElement(ID: THTMLElementID);
begin
  if ElementCount >= Length(Elements^) then
    SetLength(Elements^,Length(Elements^) + 2000);
  FillChar(Elements^[ElementCount],SizeOf(THTMLElement),0);
  Elements^[ElementCount].ID := ID;
  Inc(ElementCount);
end;

function THTMLParser.GetElementID(S: string): THTMLElementID;
var
  i: integer;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -