📄 htmlparse2.pas
字号:
unit HTMLParse2;
{
********************************************************************************
******* XLSReadWriteII V2.00 *******
******* *******
******* Copyright(C) 1999,2004 Lars Arvidsson, Axolot Data *******
******* *******
******* email: components@axolot.com *******
******* URL: http://www.axolot.com *******
********************************************************************************
** Users of the XLSReadWriteII component must accept the following **
** disclaimer of warranty: **
** **
** XLSReadWriteII is supplied as is. The author disclaims all warranties, **
** expressedor implied, including, without limitation, the warranties of **
** merchantability and of fitness for any purpose. The author assumes no **
** liability for damages, direct or consequential, which may result from the **
** use of XLSReadWriteII. **
********************************************************************************
}
{$B-}
interface
uses Classes, SysUtils, Forms, Dialogs, HTMLTypes2, Graphics;
type THTMLParser = class(TObject)
private
ElementNamesHash: array[0..High(ElementNames)] of word;
AttributeNamesHash: array[0..High(AttributeNames)] of word;
FStream: TStream;
Buffer: PChar;
BufferPos,BytesInBuffer: integer;
ElementCount: integer;
Elements: PHTMLElementArray;
LineCount: integer;
procedure CreateHash;
procedure DecodeHTMLText(var S: string);
procedure NewElement(ID: THTMLElementID);
function ReadStream: boolean;
function GetNextChar(var C: char): boolean;
function GetNextTag: boolean;
function GetElementID(S: string): THTMLElementID;
function GetAttributeID(S: string): TElementAttributeID;
procedure AddAttribute(Index: integer; ID: TElementAttributeID; Data: string);
procedure ScanAttributes(Index: integer; S: string);
procedure DoComment;
procedure DoScript;
public
constructor Create(var E: THTMLElementArray);
destructor Destroy; override;
procedure Clear;
procedure LoadFromStream(Stream: TStream);
procedure LoadFromFile(const Filename: string);
function GetAttribute(E: THTMLElement; Attr: TElementAttributeID): PElementAttribute;
function GetStrAttribute(E: THTMLElement; Attr: TElementAttributeID; var S: string): boolean;
end;
implementation
const
BUFFER_SIZE = 8192;
function GetHashCode(const Buffer; Count: Integer): Word; assembler;
asm
CMP EDX,0
JNE @@2
MOV EAX,0
JMP @@3
@@2:
MOV ECX,EDX
MOV EDX,EAX
XOR EAX,EAX
@@1: ROL AX,5
XOR AL,[EDX]
INC EDX
DEC ECX
JNE @@1
@@3:
end;
function CPos(C: char; S: string): integer;
begin
for Result := 1 to Length(S) do begin
if S[Result] = C then
Exit;
end;
Result := -1;
end;
function FastPos(const aSourceString, aFindString : String; const aSourceLen, aFindLen, StartPos : integer) : integer;
var
SourceLen : integer;
begin
SourceLen := aSourceLen;
SourceLen := SourceLen - aFindLen;
if (StartPos-1) > SourceLen then begin
Result := 0;
Exit;
end;
SourceLen := SourceLen - StartPos;
SourceLen := SourceLen +2;
asm
push ESI
push EDI
push EBX
mov EDI, aSourceString
add EDI, StartPos
Dec EDI
mov ESI, aFindString
mov ECX, SourceLen
Mov Al, [ESI]
@ScaSB:
Mov Ah, [EDI]
cmp Ah,Al
jne @NextChar
@CompareStrings:
mov EBX, aFindLen
dec EBX
@CompareNext:
mov Al, [ESI+EBX]
mov Ah, [EDI+EBX]
cmp Al, Ah
Jz @Matches
Mov Al, [ESI]
Jmp @NextChar
@Matches:
Dec EBX
Jnz @CompareNext
mov EAX, EDI
sub EAX, aSourceString
inc EAX
mov Result, EAX
jmp @TheEnd
@NextChar:
Inc EDI
dec ECX
jnz @ScaSB
mov Result,0
@TheEnd:
pop EBX
pop EDI
pop ESI
end;
end;
function FastPosNoCase(const aSourceString, aFindString : String; const aSourceLen, aFindLen, StartPos : integer) : integer;
var
SourceLen : integer;
begin
SourceLen := aSourceLen;
SourceLen := SourceLen - aFindLen;
if (StartPos-1) > SourceLen then begin
Result := 0;
Exit;
end;
SourceLen := SourceLen - StartPos;
SourceLen := SourceLen +2;
asm
push ESI
push EDI
push EBX
mov EDI, aSourceString
add EDI, StartPos
Dec EDI
mov ESI, aFindString
mov ECX, SourceLen
Mov Al, [ESI]
and Al, $df
@ScaSB:
Mov Ah, [EDI]
and Ah, $df
cmp Ah,Al
jne @NextChar
@CompareStrings:
mov EBX, aFindLen
dec EBX
@CompareNext:
mov Al, [ESI+EBX]
mov Ah, [EDI+EBX]
and Al, $df
and Ah, $df
cmp Al, Ah
Jz @Matches
Mov Al, [ESI]
and Al, $df
Jmp @NextChar
@Matches:
Dec EBX
Jnz @CompareNext
mov EAX, EDI
sub EAX, aSourceString
inc EAX
mov Result, EAX
jmp @TheEnd
@NextChar:
Inc EDI
dec ECX
jnz @ScaSB
mov Result,0
@TheEnd:
pop EBX
pop EDI
pop ESI
end;
end;
procedure MyMove(const Source; var Dest; Count : Integer);
asm
cmp ECX,0
Je @JustQuit
push ESI
push EDI
mov ESI, EAX
mov EDI, EDX
@Loop:
Mov AL, [ESI]
Inc ESI
mov [EDI], AL
Inc EDI
Dec ECX
Jnz @Loop
pop EDI
pop ESI
@JustQuit:
end;
Type
TFastPosProc = function(
const aSourceString, aFindString : String;
const aSourceLen, aFindLen, StartPos : integer
) : integer;
function FastReplace(var aSourceString : String; const aFindString, aReplaceString : String; CaseSensitive : Boolean = False) : String;
var
ActualResultLen,
CurrentPos,
LastPos,
BytesToCopy,
ResultLen,
FindLen,
ReplaceLen,
SourceLen : Integer;
FastPosProc : TFastPosProc;
begin
if CaseSensitive then
FastPosProc := FastPOS
else
FastPOSProc := FastPOSNoCase;
Result := '';
FindLen := Length(aFindString);
ReplaceLen := Length(aReplaceString);
SourceLen := Length(aSourceString);
if ReplaceLen <= FindLen then
ActualResultLen := SourceLen
else
ActualResultLen := SourceLen + (SourceLen * ReplaceLen div FindLen) + ReplaceLen;
SetLength(Result,ActualResultLen);
CurrentPos := 1;
ResultLen := 0;
LastPos := 1;
if ReplaceLen > 0 then begin
repeat
CurrentPos := FastPosProc(aSourceString, aFindString,SourceLen, FindLen, CurrentPos);
if CurrentPos = 0 then break;
BytesToCopy := CurrentPos-LastPos;
MyMove(aSourceString[LastPos],Result[ResultLen+1], BytesToCopy);
MyMove(aReplaceString[1],Result[ResultLen+1+BytesToCopy], ReplaceLen);
ResultLen := ResultLen + BytesToCopy + ReplaceLen;
CurrentPos := CurrentPos + FindLen;
LastPos := CurrentPos;
until false;
end else begin
repeat
CurrentPos := FastPos(aSourceString,
aFindString, SourceLen, FindLen, CurrentPos);
if CurrentPos = 0 then break;
BytesToCopy := CurrentPos-LastPos;
MyMove(aSourceString[LastPos],
Result[ResultLen+1], BytesToCopy);
ResultLen := ResultLen +
BytesToCopy + ReplaceLen;
CurrentPos := CurrentPos + FindLen;
LastPos := CurrentPos;
until false;
end;
Dec(LastPOS);
SetLength(Result, ResultLen + (SourceLen-LastPos));
if LastPOS+1 <= SourceLen then
MyMove(aSourceString[LastPos+1],Result[ResultLen+1],SourceLen-LastPos);
end;
constructor THTMLParser.Create(var E: THTMLElementArray);
begin
Elements := @E;
GetMem(Buffer,BUFFER_SIZE);
CreateHash;
end;
destructor THTMLParser.Destroy;
begin
FreeMem(Buffer);
Clear;
inherited Destroy;
end;
procedure THTMLParser.Clear;
var
i,j: integer;
begin
for i := 0 to High(Elements^) do begin
for j := 0 to High(Elements^[i].Attributes) do begin
if Elements^[i].Attributes[j].AttType = atString then
StrDispose(Elements^[i].Attributes[j].StrVal);
end;
SetLength(Elements^[i].Attributes,0);
end;
SetLength(Elements^,0);
end;
procedure THTMLParser.CreateHash;
var
i: integer;
begin
for i := 0 to High(ElementNames) do
ElementNamesHash[i] := GetHashCode(ElementNames[i][1],Length(ElementNames[i]));
for i := 0 to High(AttributeNames) do
AttributeNamesHash[i] := GetHashCode(AttributeNames[i][1],Length(AttributeNames[i]));
end;
procedure THTMLParser.NewElement(ID: THTMLElementID);
begin
if ElementCount >= Length(Elements^) then
SetLength(Elements^,Length(Elements^) + 2000);
FillChar(Elements^[ElementCount],SizeOf(THTMLElement),0);
Elements^[ElementCount].ID := ID;
Inc(ElementCount);
end;
function THTMLParser.GetElementID(S: string): THTMLElementID;
var
i: integer;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -