📄 htmlparse2.pas
字号:
Hash: word;
begin
S := Uppercase(S);
Hash := GetHashCode(Pointer(S)^,Length(S));
for i := 0 to High(ElementNames) do begin
if (Hash = ElementNamesHash[i]) and (ElementNames[i] = S) then begin
Result := THTMLElementID(i);
Exit;
end;
end;
Result := heUNKNOWN;
end;
function THTMLParser.GetAttributeID(S: string): TElementAttributeID;
var
i: integer;
Hash: word;
begin
S := Uppercase(S);
Hash := GetHashCode(Pointer(S)^,Length(S));
for i := 0 to High(AttributeNames) do begin
if (Hash = AttributeNamesHash[i]) and (AttributeNames[i] = S) then begin
Result := TElementAttributeID(i);
Exit;
end;
end;
Result := eaUNKNOWN;
end;
function THTMLParser.ReadStream: boolean;
begin
BytesInBuffer := FStream.Read(Buffer^,BUFFER_SIZE);
BufferPos := 0;
Result := BytesInBuffer > 0;
end;
procedure THTMLParser.DoComment;
var
C,PrevC: char;
begin
PrevC := #0;
while GetNextChar(C) do begin
if (C = '>') and (PrevC = '-') then
Exit;
PrevC := C;
end;
end;
procedure THTMLParser.DoScript;
var
S: string;
C: char;
p: integer;
begin
S := '</script>';
while GetNextChar(C) do begin
if C = '<' then begin
p := 2;
while GetNextChar(C) and (Lowercase(C) = S[p]) and (p < Length(S)) do
Inc(p);
if (Lowercase(C) = S[p]) and (p = Length(S)) then begin
NewElement(heEndScript);
Exit;
end;
end;
end;
end;
function THTMLParser.GetNextChar(var C: char): boolean;
function RStripBlank: boolean;
var
C: char;
begin
Result := True;
if BufferPos >= BytesInBuffer then
Result := ReadStream;
if Result then begin
C := Buffer[BufferPos];
if C = ' ' then begin
Inc(BufferPos);
Result := RStripBlank;
end;
end;
end;
begin
Result := True;
if BufferPos >= BytesInBuffer then
Result := ReadStream;
if Result then begin
C := Buffer[BufferPos];
Inc(BufferPos);
if C in [#10,#13] then begin
if C = #13 then
Inc(LineCount);
Result := RStripBlank;
if Result then
Result := GetNextChar(C);
end;
end;
end;
procedure THTMLParser.AddAttribute(Index: integer; ID: TElementAttributeID; Data: string);
var
i: integer;
S: string;
function GetHtmlColor(s: string): TColor;
var
Col: TColor;
begin
if (CompareText(s, 'white') = 0) then Result := clWhite else
if (CompareText(s, 'black') = 0) then Result := clBlack else
if (CompareText(s, 'red') = 0) then Result := clRed else
if (CompareText(s, 'green') = 0) then Result := clGreen else
if (CompareText(s, 'blue') = 0) then Result := clBlue else
if (CompareText(s, 'aqua') = 0) then Result := clAqua else
if (CompareText(s, 'fuchsia') = 0) then Result := clFuchsia else
if (CompareText(s, 'gray') = 0) then Result := clDkGray else
if (CompareText(s, 'lime') = 0) then Result := clLime else
if (CompareText(s, 'maroon') = 0) then Result := clMaroon else
if (CompareText(s, 'navy') = 0) then Result := clNavy else
if (CompareText(s, 'olive') = 0) then Result := clOlive else
if (CompareText(s, 'purple') = 0) then Result := clPurple else
if (CompareText(s, 'silver') = 0) then Result := clGray else
if (CompareText(s, 'teal') = 0) then Result := clTeal else
if (CompareText(s, 'yellow') = 0) then Result := clYellow else
begin
if s[1] = '#' then
Delete(s, 1, 1);
Col := StrToIntDef('$'+s, -1);
if Col <> -1 then
Result := ((Col and $FF) shl 16) or (Col and $FF00) or ((Col and $FF0000) shr 16)
else
Result := 0;
end;
end;
begin
if ID = eaText then
DecodeHTMLText(Data);
i := Length(Elements^[Index].Attributes);
SetLength(Elements^[Index].Attributes,i + 1);
Elements^[Index].Attributes[i].ID := ID;
if ID in [eaSize] then begin
Elements^[Index].Attributes[i].AttType := atNumber;
Elements^[Index].Attributes[i].NumVal := StrToIntDef(Data,0);
if (Data <> '') and (Data[1] = '+') then
Elements^[Index].Attributes[i].NumVal := -(Elements^[Index].Attributes[i].NumVal + 100000);
end
else if ID in [eaAlign] then begin
S := Lowercase(Data);
Elements^[Index].Attributes[i].AttType := atAlign;
with Elements^[Index].Attributes[i] do begin
if S = 'left' then AlignVal := aaLeft
else if S = 'center' then AlignVal := aaCenter
else if S = 'right' then AlignVal := aaRight
else if S = 'justify' then AlignVal := aaJustify
else if S = 'char' then AlignVal := aaChar
else if S = 'top' then AlignVal := aaTop
else if S = 'middle' then AlignVal := aaMiddle
else if S = 'bottom' then AlignVal := aaBottom
else AlignVal := aaLeft;
end;
end
else if ID in [eaColor,eaBgcolor] then begin
Elements^[Index].Attributes[i].AttType := atColor;
Elements^[Index].Attributes[i].ColorVal := GetHtmlColor(Data);
end
else if ID in [eaHeight,eaWidth] then begin
if CPos('%',Data) > 0 then begin
Elements^[Index].Attributes[i].AttType := atPercent;
Elements^[Index].Attributes[i].PercentVal := StrToIntDef(Copy(Data,1,Length(Data) - 1),100);
end
else begin
Elements^[Index].Attributes[i].AttType := atNumber;
Elements^[Index].Attributes[i].NumVal := StrToIntDef(Data,0);
end;
end
else begin
Elements^[Index].Attributes[i].AttType := atString;
Elements^[Index].Attributes[i].StrVal := StrAlloc(Length(Data) + 1);
StrPCopy(Elements^[Index].Attributes[i].StrVal,Data);
end;
end;
procedure THTMLParser.ScanAttributes(Index: integer; S: string);
var
p,i: integer;
Val: string;
ID: TElementAttributeID;
begin
repeat
S := Trim(S);
p := CPos('=',S);
if p > 0 then begin
ID := GetAttributeID(Copy(S,1,p - 1));
if (Length(S) >= p + 1) and (S[p + 1] = '"') then begin
i := p + 2;
while (i < Length(S)) and (S[i] <> '"') do
Inc(i);
Val := Copy(S,P + 2,i - (P + 2));
S := Copy(S,i + 1,1024);
end
else begin
i := CPos(' ',S);
if i > 0 then begin
Val := Copy(S,p + 1,i - (p + 1));
S := Copy(S,i + 1,1024);
end
else begin
Val := Copy(S,p + 1,MAXINT);
S := '';
end;
end;
AddAttribute(Index,ID,Val);
end;
until (p <= 0);
end;
procedure THTMLParser.DecodeHTMLText(var S: string);
type
TCTableEntry = record
CData : string;
CChar : byte;
end;
const
CTable : array[0..99] of TCTableEntry =
(
(CData:' '; CChar: 32),(CData:'"'; CChar: 34),(CData:'&'; CChar: 38),
(CData:'<'; CChar: 60),(CData:'>'; CChar: 62),
(* ISO Latin-1 (cnt=95) *)
(CData:'¡'; CChar: 161),(CData:'¢'; CChar: 162),
(CData:'£'; CChar: 163),(CData:'¤'; CChar: 164),(CData:'¥'; CChar: 165),
(CData:'¦'; CChar: 166),(CData:'§'; CChar: 167),(CData:'¨'; CChar: 168),
(CData:'©'; CChar: 169),(CData:'ª'; CChar: 170),(CData:'«'; CChar: 171),
(CData:'¬'; CChar: 172),(CData:'­'; CChar: 173),(CData:'®'; CChar: 174),
(CData:'¯'; CChar: 175),(CData:'°'; CChar: 176),(CData:'±'; CChar: 177),
(CData:'²'; CChar: 178),(CData:'³'; CChar: 179),(CData:'´'; CChar: 180),
(CData:'µ'; CChar: 181),(CData:'¶'; CChar: 182),(CData:'·'; CChar: 183),
(CData:'¸'; CChar: 184),(CData:'¹'; CChar: 185),(CData:'º'; CChar: 186),
(CData:'»'; CChar: 187),(CData:'¼'; CChar: 188),(CData:'½'; CChar: 189),
(CData:'¾'; CChar: 190),(CData:'¿'; CChar: 191),(CData:'À'; CChar: 192),
(CData:'Á'; CChar: 193),(CData:'Â'; CChar: 194),(CData:'Ã'; CChar: 195),
(CData:'Ä'; CChar: 196),(CData:'Å'; CChar: 197),(CData:'Æ'; CChar: 198),
(CData:'Ç'; CChar: 199),(CData:'È'; CChar: 200),(CData:'É'; CChar: 201),
(CData:'Ê'; CChar: 202),(CData:'Ë'; CChar: 203),(CData:'Ì'; CChar: 204),
(CData:'Í'; CChar: 205),(CData:'Î'; CChar: 206),(CData:'Ï'; CChar: 207),
(CData:'Ð'; CChar: 208),(CData:'Ñ'; CChar: 209),(CData:'Ò'; CChar: 210),
(CData:'Ó'; CChar: 211),(CData:'Ô'; CChar: 212),(CData:'Õ'; CChar: 213),
(CData:'Ö'; CChar: 214),(CData:'×'; CChar: 215),(CData:'Ø'; CChar: 216),
(CData:'Ù'; CChar: 217),(CData:'Ú'; CChar: 218),(CData:'Ûv;'; CChar: 219),
(CData:'Ü'; CChar: 220),(CData:'Ý'; CChar: 221),(CData:'Þ'; CChar: 222),
(CData:'ß'; CChar: 223),(CData:'à'; CChar: 224),(CData:'á'; CChar: 225),
(CData:'â'; CChar: 226),(CData:'ã'; CChar: 227),(CData:'ä'; CChar: 228),
(CData:'å'; CChar: 229),(CData:'æ'; CChar: 230),(CData:'ç'; CChar: 231),
(CData:'è'; CChar: 232),(CData:'é'; CChar: 233),(CData:'ê'; CChar: 234),
(CData:'ë'; CChar: 235),(CData:'ì'; CChar: 236),(CData:'í'; CChar: 237),
(CData:'î'; CChar: 238),(CData:'ï'; CChar: 239),(CData:'ð'; CChar: 240),
(CData:'ñ'; CChar: 241),(CData:'ò'; CChar: 242),(CData:'ó'; CChar: 243),
(CData:'ô'; CChar: 244),(CData:'õ'; CChar: 245),(CData:'ö'; CChar: 246),
(CData:'÷'; CChar: 247),(CData:'ø'; CChar: 248),(CData:'ù'; CChar: 249),
(CData:'ú'; CChar: 250),(CData:'û'; CChar: 251),(CData:'ü'; CChar: 252),
(CData:'ý'; CChar: 253),(CData:'þ'; CChar: 254),(CData:'ÿ'; CChar: 255)
);
var
i: integer;
SC: string;
begin
SetLength(SC,1);
for i := 0 to High(CTable) do begin
SC[1] := Char(CTable[i].CChar);
S := FastReplace(S,CTable[i].CData,SC,True);
end;
end;
function THTMLParser.GetNextTag: boolean;
var
C: char;
i,p: integer;
OK: boolean;
S,Tag: string;
begin
Result := True;
NewElement(heUNKNOWN);
i := ElementCount - 1;
Tag := '';
S := '';
C := #0;
while GetNextChar(C) and (C <> '<') do
S := S + C;
if S <> '' then begin
Elements^[i].ID := heText;
AddAttribute(i,eaText,S);
// AddAttribute(i,eaText,TrimLeft(S));
NewElement(heUNKNOWN);
i := ElementCount - 1;
end;
OK := GetNextChar(C);
if OK then begin
if C = '!' then begin
if GetNextChar(C) and (C = '-') then
DoComment
end
else if C = '/' then begin
while GetNextChar(C) and (C <> '>') do
Tag := Tag + C;
if C = '>' then
Elements^[i].ID := THTMLElementID(Integer(GetElementID(Tag)) + Integer(heBeginEndtags) + 1)
else
Elements^[i].ID := heEndUNKNOWN;
Exit;
end;
end;
if (C in ['A'..'Z']) or (C in ['a'..'z']) then begin
Tag := C;
while GetNextChar(C) and (C <> '>') do
Tag := Tag + C;
if C = '>' then begin
p := CPos(' ',Tag);
if p > 0 then begin
S := Copy(Tag,1,p - 1);
ScanAttributes(i,Copy(Tag,p + 1,1024));
end
else
S := Tag;
Elements^[i].ID := GetElementID(S);
if Elements^[i].ID = heScript then
DoScript;
end;
end
else if not OK then begin
Elements^[i].ID := heEOF;
Result := False;
end;
end;
procedure THTMLParser.LoadFromStream(Stream: TStream);
begin
FStream := Stream;
LineCount := 0;
try
while GetNextTag do ;
except
raise Exception.Create('HTML Error on line #' + IntToStr(LineCount));
end;
SetLength(Elements^,ElementCount);
end;
procedure THTMLParser.LoadFromFile(const Filename: string);
var
Stream: TFileStream;
begin
Stream := TFileStream.Create(Filename,fmOpenRead + fmShareDenyNone);
try
LoadFromStream(Stream);
finally
Stream.Free;
end;
end;
function THTMLParser.GetStrAttribute(E: THTMLElement; Attr: TElementAttributeID; var S: string): boolean;
var
i: integer;
begin
for i := 0 to High(E.Attributes) do begin
if (E.Attributes[i].ID = Attr) and (E.Attributes[i].AttType = atString) then begin
S := E.Attributes[i].StrVal;
Result := True;
Exit;
end;
end;
S := '';
Result := False;
end;
function THTMLParser.GetAttribute(E: THTMLElement; Attr: TElementAttributeID): PElementAttribute;
var
i: integer;
begin
for i := 0 to High(E.Attributes) do begin
if E.Attributes[i].ID = Attr then begin
Result := @E.Attributes[i];
Exit;
end;
end;
Result := Nil;
end;
end.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -