📄 readhtml.pas
字号:
GetChBasic;
if Assigned(IncludeEvent) and (Ch = '#') then
DoInclude
else
DoDashDash; {a <!-- comment}
end
else ReadToGT;
end
else ReadToGT;
end
else if Peek = '%' then { <%....%> regarded as comment }
begin
Comment := True;
GetChBasic;
repeat
GetChBasic;
until (Ch = '%') and (Peek = '>') or (Ch = EOFChar);
GetChBasic;
end;
end;
until not Comment;
end;
{-------------SkipWhiteSpace}
procedure SkipWhiteSpace;
begin
while (LCh in [' ', Tab, ^M]) do
GetCh;
end;
procedure GetEntity(T: TokenObj; CodePage: integer); forward;
function GetEntityStr(CodePage: integer): string; forward;
function GetQuotedValue(var S: String): boolean;
{get a quoted string but strip the quotes}
var
Term: char;
SaveSy: Symb;
begin
Result := False;
Term := Ch;
if (Term <> '"') and (Term <> '''') then Exit;
Result := True;
SaveSy := Sy;
GetCh;
while not (Ch in [Term, EofChar]) do
begin
if LCh = '&' then
S := S + GetEntityStr(CP_ACP)
else
begin
if LCh = ^M then
S := S + ' '
else S := S + LCh;
GetCh;
end;
end;
if Ch = Term then GetCh; {pass termination char}
Sy := SaveSy;
end;
{----------------GetNameValueParameter}
function GetNameValueParameter(var Name, Value: String): boolean;
begin
Result := False;
SkipWhiteSpace;
Name := '';
if not (Ch in ['A'..'Z']) then Exit;
while Ch in ['A'..'Z', '_', '0'..'9'] do
begin
Name := Name+LCh;
GetCh;
end;
SkipWhiteSpace;
Value := '';
Result := True; {at least have an ID}
if Ch <> '=' then Exit;
GetCh;
SkipWhiteSpace;
if not GetQuotedValue(Value) then
{in case quotes left off string}
while not (Ch in [' ', Tab, ^M, '-', '>', EofChar]) do {need to exclude '-' to find '-->'}
begin
Value := Value+LCh;
GetCh;
end;
end;
{----------------GetValue}
function GetValue(var S: String; var Value: integer): boolean;
{read a numeric. Also reads a string if it looks like a numeric initially}
var
Code: integer;
ValD: double;
begin
Result := Ch in ['-', '+', '0'..'9'];
if not Result then Exit;
Value := 0;
if Ch in ['-', '+'] then
begin
S := Ch;
GetCh;
end
else S := '';
while not (Ch in [' ', Tab, ^M, '>', '%', EofChar]) do
if LCh = '&' then
S := S + GetEntityStr(PropStack.Last.CodePage)
else
begin
S := S + LCh;
GetCh;
end;
SkipWhiteSpace;
{see if a numerical value is appropriate.
avoid the exception that happens when the likes of 'e1234' occurs}
try
Val(S, ValD, Code);
Value := Round(ValD);
except
end;
if LCh = '%' then
begin
S := S + '%';
GetCh;
end;
end;
{----------------GetQuotedStr}
function GetQuotedStr(var S: String; var Value: integer; WantCrLf: boolean; Sym: Symb): boolean;
{get a quoted string but strip the quotes, check to see if it is numerical}
var
Term: char;
S1: string;
Code: integer;
ValD: double;
SaveSy: Symb;
begin
Result := False;
Term := Ch;
if (Term <> '"') and (Term <> '''') then Exit;
Result := True;
SaveSy := Sy;
GetCh;
while not (Ch in [Term, EofChar]) do
begin
if LCh <> ^M then
begin
if LCh = '&' then
begin
if (Sym = ValueSy) and UnicodeControls then
S := S + GetEntityStr(PropStack.Last.CodePage)
else
S := S + GetEntityStr(CP_ACP);
end
else
begin
S := S + LCh;
GetCh;
end;
end
else if WantCrLf then
begin
S := S + ^M+^J;
GetCh;
end
else
GetCh;
end;
if Ch = Term then GetCh; {pass termination char}
S1 := Trim(S);
if Pos('%', S1) = Length(S1) then
SetLength(S1, Length(S1)-1);
{see if S1 evaluates to a numerical value. Note that something like
S1 = 'e8196' can give exception because of the 'e'}
Value := 0;
if (Length(S1) > 0) and (S1[1] in ['0'..'9', '+', '-', '.']) then
try
Val(S1, ValD, Code);
Value := Round(ValD);
except
end;
Sy := SaveSy;
end;
{----------------GetSomething}
procedure GetSomething(var S: string);
begin
while not (Ch in [' ', Tab, ^M, '>', EofChar]) do
if LCh = '&' then
S := S + GetEntityStr(PropStack.Last.CodePage)
else
begin
S := S+LCh;
GetCh;
end;
end;
{----------------GetID}
function GetID(var S: String): boolean;
begin
Result := False;
if not (Ch in ['A'..'Z']) then Exit;
while Ch in ['A'..'Z', '-', '0'..'9'] do
begin
S := S+Ch;
GetCh;
end;
Result := True;
end;
{----------------GetAttribute}
function GetAttribute(var Sym: Symb; var St: String;
var S: string; var Val: integer): boolean;
const
MaxAttr = 84;
Attrib : array[1..MaxAttr] of string[16] =
('HREF', 'NAME', 'SRC', 'ALT', 'ALIGN', 'TEXT', 'BGCOLOR', 'LINK',
'BACKGROUND', 'COLSPAN', 'ROWSPAN', 'BORDER', 'CELLPADDING',
'CELLSPACING', 'VALIGN', 'WIDTH', 'START', 'VALUE', 'TYPE',
'CHECKBOX', 'RADIO', 'METHOD', 'ACTION', 'CHECKED', 'SIZE',
'MAXLENGTH', 'COLS', 'ROWS', 'MULTIPLE', 'VALUE', 'SELECTED',
'FACE', 'COLOR', 'TRANSP', 'CLEAR', 'ISMAP', 'BORDERCOLOR',
'USEMAP', 'SHAPE', 'COORDS', 'NOHREF', 'HEIGHT', 'PLAIN', 'TARGET',
'NORESIZE', 'SCROLLING', 'HSPACE', 'LANGUAGE', 'FRAMEBORDER',
'MARGINWIDTH', 'MARGINHEIGHT', 'LOOP', 'ONCLICK', 'WRAP', 'NOSHADE',
'HTTP-EQUIV', 'CONTENT', 'ENCTYPE', 'VLINK', 'OLINK', 'ACTIVE',
'VSPACE', 'CLASS', 'ID', 'STYLE', 'REL', 'REV', 'NOWRAP',
'BORDERCOLORLIGHT', 'BORDERCOLORDARK', 'CHARSET', 'RATIO',
'TITLE', 'ONFOCUS', 'ONBLUR', 'ONCHANGE', 'SPAN', 'TABINDEX',
'BGPROPERTIES', 'DISABLED', 'TOPMARGIN', 'LEFTMARGIN', 'LABEL',
'READONLY');
AttribSym: array[1..MaxAttr] of Symb =
(HrefSy, NameSy, SrcSy, AltSy, AlignSy, TextSy, BGColorSy, LinkSy,
BackgroundSy, ColSpanSy, RowSpanSy, BorderSy, CellPaddingSy,
CellSpacingSy, VAlignSy, WidthSy, StartSy, ValueSy, TypeSy,
CheckBoxSy, RadioSy, MethodSy, ActionSy, CheckedSy, SizeSy,
MaxLengthSy, ColsSy, RowsSy, MultipleSy, ValueSy, SelectedSy,
FaceSy, ColorSy, TranspSy, ClearSy, IsMapSy, BorderColorSy,
UseMapSy, ShapeSy, CoordsSy, NoHrefSy, HeightSy, PlainSy, TargetSy,
NoResizeSy, ScrollingSy, HSpaceSy, LanguageSy, FrameBorderSy,
MarginWidthSy, MarginHeightSy, LoopSy, OnClickSy, WrapSy, NoShadeSy,
HttpEqSy, ContentSy, EncTypeSy, VLinkSy, OLinkSy, ActiveSy,
VSpaceSy, ClassSy, IDSy, StyleSy, RelSy, RevSy, NoWrapSy,
BorderColorLightSy, BorderColorDarkSy, CharSetSy, RatioSy,
TitleSy, OnFocusSy, OnBlurSy, OnChangeSy, SpanSy, TabIndexSy,
BGPropertiesSy, DisabledSy, TopMarginSy, LeftMarginSy, LabelSy,
ReadonlySy);
var
I: integer;
begin
Sym := OtherAttribute;
Result := False;
SkipWhiteSpace;
St := '';
if GetID(St) then
begin
for I := 1 to MaxAttr do
if St = Attrib[I] then
begin
Sym := AttribSym[I];
Break;
end;
end
else Exit; {no ID}
SkipWhiteSpace;
S := '';
if Sym = BorderSy then Val := 1 else Val := 0;
Result := True; {at least have an ID}
if Ch <> '=' then Exit;
GetCh;
SkipWhiteSpace;
if not GetQuotedStr(S, Val, Sym in [TitleSy, AltSy], Sym) then {either it's a quoted string or a number}
if not GetValue(S, Val) then
GetSomething(S); {in case quotes left off string}
if (Sym = IDSy) and (S <> '') and Assigned(MasterList) and not LinkSearch then
MasterList.IDNameList.AddChPosObject(S, SIndex);
end;
{-------------GetTag}
function GetTag: boolean; {Pick up a Tag or pass a single '<'}
Var
Done, EndTag : Boolean;
Compare: String[255];
SymStr: string;
AttrStr: string;
I: Integer;
L: integer;
Save: integer;
Sym: Symb;
begin
if Ch <> '<' then
begin
Result := False;
Exit;
end
else Result := True;
Save := SIndex;
TagIndex := SIndex;
Compare := '';
GetCh;
if Ch = '/' then
begin
EndTag := True;
GetCh;
end
else if not (Ch in ['A'..'Z', '?']) then
begin {an odd '<'}
Sy := TextSy;
LCToken.AddUnicodeChar('<', Save);
Exit;
end
else
EndTag := False;
Sy := CommandSy;
Done := False;
while not Done do
case Ch of
'A'..'Z', '0'..'9', '/', '_' :
begin
if (Ch = '/') and (Length(Compare) > 0) then {allow xhtml's <br/>, etc }
Done := True
else if Length(Compare) < 255 then
begin
Inc(Compare[0]);
Compare[Length(Compare)] := Ch;
end;
GetCh;
Done := Done or (Ch in ['1'..'6']) and (Compare = 'H');
end;
else Done := True;
end;
for I := 1 to MaxRes do
if Compare = ResWords[I] then
begin
if not EndTag then
Sy := ResSy[I]
else
if I <= MaxEndRes then
Sy := EndResSy[I]; {else Sy := CommandSy}
Break;
end;
SkipWhiteSpace;
Value := 0;
if ((Sy = HeadingSy) or (Sy = HeadingEndSy)) and (Ch in ['1'..'6']) then
begin
Value := ord(Ch)-ord('0');
GetCh;
end;
Attributes.Clear;
while GetAttribute(Sym, SymStr, AttrStr, L) do
Attributes.Add(TAttribute.Create(Sym, L, SymStr, AttrStr, CodePage));
while (Ch <> '>') and (Ch <> EofChar) do
GetCh;
if not (Sy in [StyleSy, ScriptSy]) then {in case <!-- comment immediately follows}
GetCh;
end;
function CollectText: boolean;
// Considers the current data as pure text and collects everything until
// the input end or one of the reserved tokens is found.
var
SaveIndex: Integer;
Buffer: TCharCollection;
CodePage: Integer;
begin
Sy := TextSy;
CodePage := PropStack.Last.CodePage;
Buffer := TCharCollection.Create;
try
Result := not (LCh in [#0..#8, EOFChar, '<']);
while not (LCh in [#0..#8, EOFChar, '<']) do
begin
while LCh = '&' do
GetEntity(LCToken, CodePage);
// Get any normal text.
repeat
SaveIndex := SIndex;
// Collect all leading white spaces.
if LCh in [' ', #13, #10, #9] then
begin
if not LinkSearch then
Buffer.Add(' ', SaveIndex);
// Skip other white spaces.
repeat
GetCh;
until not (LCh in [' ', #13, #10, #9]);
end;
// Collect any non-white space characters which are not special.
while not (LCh in [#0..#8, EOFChar, '<', '&', ' ', #13, #10, #9]) do
begin
if not LinkSearch then
Buffer.Add(LCh, SIndex);
GetCh;
end;
until LCh in [#0..#8, EOFChar, '<', '&'];
if Buffer.Size > 0 then
begin
LCToken.AddString(Buffer, CodePage);
Buffer.Clear;
end;
end;
// Flush any pending ANSI string data.
if Buffer.Size > 0 then
LCToken.AddString(Buffer, CodePage);
finally
Buffer.Free;
end;
end;
{-----------Next}
PROCEDURE Next;
{Get the next token}
begin {already have fresh character loaded here}
LCToken.Clear;
if LCh = EofChar then
Sy := EofSy
else
if not GetTag then
if not CollectText then
if LCh in [#0..#8] then
Raise EParseError.Create('Not an HTML or Text document');
end;
function PushNewProp(const Tag, AClass, AnID, APseudo, ATitle: string; AProp: TProperties): boolean;
{add a TProperties to the Prop stack}
begin
PropStack.Add(TProperties.Create);
PropStack.Last.Inherit(Tag, PropStack[PropStackIndex-1]);
PropStack.Last.Combine(MasterList.Styles, Tag, AClass, AnID, APseudo, ATitle, AProp);
Result := True;
end;
procedure PopProp;
{pop and free a TProperties from the Prop stack}
begin
if PropStackIndex > 0 then
PropStack.Delete(PropStackIndex);
end;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -