⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 readhtml.pas

📁 查看html文件的控件
💻 PAS
📖 第 1 页 / 共 5 页
字号:
          GetChBasic;
          if Assigned(IncludeEvent) and (Ch = '#') then
            DoInclude
          else
            DoDashDash;  {a <!-- comment}
          end
        else ReadToGT;
        end
      else ReadToGT;
      end
    else if Peek = '%' then    { <%....%> regarded as comment }
      begin
      Comment := True;
      GetChBasic;
      repeat
        GetChBasic;
      until (Ch = '%') and (Peek = '>') or (Ch = EOFChar);
      GetChBasic;
      end;
    end;
until not Comment;
end;

{-------------SkipWhiteSpace}
procedure SkipWhiteSpace;
begin
while (LCh in [' ', Tab, ^M]) do
  GetCh;
end;

procedure GetEntity(T: TokenObj; CodePage: integer); forward;
function GetEntityStr(CodePage: integer): string; forward;

function GetQuotedValue(var S: String): boolean;  
{get a quoted string but strip the quotes}
var
  Term: char;
  SaveSy: Symb;
begin
Result := False;
Term := Ch;
if (Term <> '"') and (Term <> '''') then Exit;
Result := True;
SaveSy := Sy;
GetCh;
while not (Ch in [Term, EofChar]) do
  begin
  if LCh = '&' then
    S := S + GetEntityStr(CP_ACP)
  else
    begin
    if LCh = ^M then
      S := S + ' '
    else S := S + LCh;
    GetCh;
    end;
  end;
if Ch = Term then GetCh;  {pass termination char}
Sy := SaveSy;
end;

{----------------GetNameValueParameter}
function GetNameValueParameter(var Name, Value: String): boolean;
begin
Result := False;
SkipWhiteSpace;
Name := '';
if not (Ch in ['A'..'Z']) then Exit;
while Ch in ['A'..'Z', '_', '0'..'9'] do
  begin
  Name := Name+LCh;
  GetCh;
  end;

SkipWhiteSpace;
Value := '';
Result := True;      {at least have an ID}
if Ch <> '=' then Exit;
GetCh;

SkipWhiteSpace;
if not GetQuotedValue(Value) then
  {in case quotes left off string}
  while not (Ch in [' ', Tab, ^M, '-', '>', EofChar]) do {need to exclude '-' to find '-->'}
    begin
    Value := Value+LCh;
    GetCh;
    end;
end;

{----------------GetValue}
function GetValue(var S: String; var Value: integer): boolean;
{read a numeric.  Also reads a string if it looks like a numeric initially}
var
  Code: integer;
  ValD: double;
begin
Result := Ch in ['-', '+', '0'..'9'];
if not Result then Exit;
Value := 0;
if Ch in ['-', '+'] then
  begin
  S := Ch;
  GetCh;
  end
else S := '';
while not (Ch in [' ', Tab, ^M, '>', '%', EofChar]) do
  if LCh = '&' then
    S := S + GetEntityStr(PropStack.Last.CodePage)    
  else
    begin
    S := S + LCh;
    GetCh;
    end;
SkipWhiteSpace;
{see if a numerical value is appropriate.
 avoid the exception that happens when the likes of 'e1234' occurs}
try
  Val(S, ValD, Code);
    Value := Round(ValD);
except
  end;

if LCh = '%' then
  begin
  S := S + '%';
  GetCh;
  end;
end;

{----------------GetQuotedStr}
function GetQuotedStr(var S: String; var Value: integer; WantCrLf: boolean; Sym: Symb): boolean;
{get a quoted string but strip the quotes, check to see if it is numerical}
var
  Term: char;
  S1: string;
  Code: integer;
  ValD: double;
  SaveSy: Symb;
begin
Result := False;
Term := Ch;
if (Term <> '"') and (Term <> '''') then Exit;
Result := True;
SaveSy := Sy;
GetCh;
while not (Ch in [Term, EofChar]) do
  begin
  if LCh <> ^M then
    begin
    if LCh = '&' then
      begin
      if (Sym = ValueSy) and UnicodeControls then   
        S := S + GetEntityStr(PropStack.Last.CodePage)
      else
        S := S + GetEntityStr(CP_ACP);   
      end
    else
      begin
      S := S + LCh;
      GetCh;
      end;
    end
  else if WantCrLf then
    begin
    S := S + ^M+^J;
    GetCh;
    end
  else
    GetCh;
  end;
if Ch = Term then GetCh;  {pass termination char}
S1 := Trim(S);
if Pos('%', S1) = Length(S1) then
  SetLength(S1, Length(S1)-1);
{see if S1 evaluates to a numerical value.  Note that something like
 S1 = 'e8196' can give exception because of the 'e'}
Value := 0;
if (Length(S1) > 0) and (S1[1] in ['0'..'9', '+', '-', '.']) then   
  try
    Val(S1, ValD, Code);
      Value := Round(ValD);
  except
    end;
Sy := SaveSy;
end;

{----------------GetSomething}
procedure GetSomething(var S: string);  
begin
while not (Ch in [' ', Tab, ^M, '>', EofChar]) do
  if LCh = '&' then
    S := S + GetEntityStr(PropStack.Last.CodePage)    
  else
    begin
    S := S+LCh;
    GetCh;
    end;
end;

{----------------GetID}
function GetID(var S: String): boolean;

begin
Result := False;
if not (Ch in ['A'..'Z']) then Exit;
while Ch in ['A'..'Z', '-', '0'..'9'] do  
  begin
  S := S+Ch;
  GetCh;
  end;
Result := True;
end;

{----------------GetAttribute}
function GetAttribute(var Sym: Symb; var St: String;
           var S: string; var Val: integer): boolean;   

const
  MaxAttr = 84;
  Attrib : array[1..MaxAttr] of string[16] =
     ('HREF', 'NAME', 'SRC', 'ALT', 'ALIGN', 'TEXT', 'BGCOLOR', 'LINK',
      'BACKGROUND', 'COLSPAN', 'ROWSPAN', 'BORDER', 'CELLPADDING',
      'CELLSPACING', 'VALIGN', 'WIDTH', 'START', 'VALUE', 'TYPE',
      'CHECKBOX', 'RADIO', 'METHOD', 'ACTION', 'CHECKED', 'SIZE',
      'MAXLENGTH', 'COLS', 'ROWS', 'MULTIPLE', 'VALUE', 'SELECTED',
      'FACE', 'COLOR', 'TRANSP', 'CLEAR', 'ISMAP', 'BORDERCOLOR',
      'USEMAP', 'SHAPE', 'COORDS', 'NOHREF', 'HEIGHT', 'PLAIN', 'TARGET',
      'NORESIZE', 'SCROLLING', 'HSPACE', 'LANGUAGE', 'FRAMEBORDER',
      'MARGINWIDTH', 'MARGINHEIGHT', 'LOOP', 'ONCLICK', 'WRAP', 'NOSHADE',
      'HTTP-EQUIV', 'CONTENT', 'ENCTYPE', 'VLINK', 'OLINK', 'ACTIVE',
      'VSPACE', 'CLASS', 'ID', 'STYLE', 'REL', 'REV', 'NOWRAP',
      'BORDERCOLORLIGHT', 'BORDERCOLORDARK', 'CHARSET', 'RATIO',
      'TITLE', 'ONFOCUS', 'ONBLUR', 'ONCHANGE', 'SPAN', 'TABINDEX',  
      'BGPROPERTIES', 'DISABLED', 'TOPMARGIN', 'LEFTMARGIN', 'LABEL',
      'READONLY');
  AttribSym: array[1..MaxAttr] of Symb =
     (HrefSy, NameSy, SrcSy, AltSy, AlignSy, TextSy, BGColorSy, LinkSy,
      BackgroundSy, ColSpanSy, RowSpanSy, BorderSy, CellPaddingSy,
      CellSpacingSy, VAlignSy, WidthSy, StartSy, ValueSy, TypeSy,
      CheckBoxSy, RadioSy, MethodSy, ActionSy, CheckedSy, SizeSy,
      MaxLengthSy, ColsSy, RowsSy, MultipleSy, ValueSy, SelectedSy,
      FaceSy, ColorSy, TranspSy, ClearSy, IsMapSy, BorderColorSy,
      UseMapSy, ShapeSy, CoordsSy, NoHrefSy, HeightSy, PlainSy, TargetSy,
      NoResizeSy, ScrollingSy, HSpaceSy, LanguageSy, FrameBorderSy,
      MarginWidthSy, MarginHeightSy, LoopSy, OnClickSy, WrapSy, NoShadeSy,
      HttpEqSy, ContentSy, EncTypeSy, VLinkSy, OLinkSy, ActiveSy,
      VSpaceSy, ClassSy, IDSy, StyleSy, RelSy, RevSy, NoWrapSy,
      BorderColorLightSy, BorderColorDarkSy, CharSetSy, RatioSy,
      TitleSy, OnFocusSy, OnBlurSy, OnChangeSy, SpanSy, TabIndexSy,
      BGPropertiesSy, DisabledSy, TopMarginSy, LeftMarginSy, LabelSy,
      ReadonlySy);
var
  I: integer;
begin
Sym := OtherAttribute;
Result := False;
SkipWhiteSpace;
St := '';
if GetID(St) then
  begin
  for I := 1 to MaxAttr do
    if St = Attrib[I] then
      begin
      Sym := AttribSym[I];
      Break;
      end;
  end
else Exit;   {no ID}
SkipWhiteSpace;
S := '';
if Sym = BorderSy then Val := 1 else Val := 0;
Result := True;      {at least have an ID}
if Ch <> '=' then Exit;
GetCh;

SkipWhiteSpace;
if not GetQuotedStr(S, Val, Sym in [TitleSy, AltSy], Sym) then   {either it's a quoted string or a number}
if not GetValue(S, Val) then
  GetSomething(S);    {in case quotes left off string}
if (Sym = IDSy) and (S <> '') and Assigned(MasterList) and not LinkSearch then
  MasterList.IDNameList.AddChPosObject(S, SIndex);
end;

{-------------GetTag}
function GetTag: boolean;  {Pick up a Tag or pass a single '<'}
Var
  Done, EndTag : Boolean;
  Compare: String[255];
  SymStr: string;
  AttrStr: string;   
  I: Integer;
  L: integer;
  Save: integer;  
  Sym: Symb;
begin
if Ch <> '<' then
  begin
  Result := False;
  Exit;
  end
else Result := True;
Save := SIndex;
TagIndex := SIndex;
Compare := '';
GetCh;
if Ch = '/' then
  begin
  EndTag := True;
  GetCh;
  end
else if not (Ch in ['A'..'Z', '?']) then    
  begin     {an odd '<'}
  Sy := TextSy;
  LCToken.AddUnicodeChar('<', Save);
  Exit;
  end
else
  EndTag := False;
Sy := CommandSy;
Done := False;
while not Done do
  case Ch of
    'A'..'Z', '0'..'9', '/', '_' :        
          begin
          if (Ch = '/') and (Length(Compare) > 0) then    {allow xhtml's <br/>, etc }
            Done := True
          else if Length(Compare) < 255 then
            begin
            Inc(Compare[0]);
            Compare[Length(Compare)] := Ch;
            end;
          GetCh;
          Done := Done or (Ch in ['1'..'6']) and (Compare = 'H');
          end;
    else Done := True;
   end;
for I := 1 to MaxRes do
  if Compare = ResWords[I] then
    begin
    if not EndTag then
      Sy := ResSy[I]
    else
      if I <= MaxEndRes then
        Sy := EndResSy[I];   {else Sy  := CommandSy}
    Break;
    end;
SkipWhiteSpace;
Value := 0;
if ((Sy = HeadingSy) or (Sy = HeadingEndSy)) and (Ch in ['1'..'6']) then
  begin
  Value := ord(Ch)-ord('0');
  GetCh;
  end;

Attributes.Clear;
while GetAttribute(Sym, SymStr, AttrStr, L) do
    Attributes.Add(TAttribute.Create(Sym, L, SymStr, AttrStr, CodePage)); 

while (Ch <> '>') and (Ch <> EofChar) do
  GetCh;
if not (Sy in [StyleSy, ScriptSy]) then {in case <!-- comment immediately follows}
  GetCh;
end;

function CollectText: boolean;
// Considers the current data as pure text and collects everything until
// the input end or one of the reserved tokens is found.
var
  SaveIndex: Integer;
  Buffer: TCharCollection;
  CodePage: Integer;

begin
  Sy := TextSy;
  CodePage := PropStack.Last.CodePage;
  Buffer := TCharCollection.Create;
  try
    Result := not (LCh in [#0..#8, EOFChar, '<']);
    while not (LCh in [#0..#8, EOFChar, '<']) do
      begin
      while LCh = '&' do
        GetEntity(LCToken, CodePage);

      // Get any normal text.
      repeat
        SaveIndex := SIndex;
        // Collect all leading white spaces.
        if LCh in [' ', #13, #10, #9] then
        begin
          if not LinkSearch then     
            Buffer.Add(' ', SaveIndex);
          // Skip other white spaces.
          repeat
            GetCh;
          until not (LCh in [' ', #13, #10, #9]);
        end;
        // Collect any non-white space characters which are not special.
        while not (LCh in [#0..#8, EOFChar, '<', '&', ' ', #13, #10, #9]) do
        begin
          if not LinkSearch then   
            Buffer.Add(LCh, SIndex);
          GetCh;
        end;
      until LCh in [#0..#8, EOFChar, '<', '&'];
      if Buffer.Size > 0 then
        begin
        LCToken.AddString(Buffer, CodePage);
        Buffer.Clear;
        end;
      end;

    // Flush any pending ANSI string data.
    if Buffer.Size > 0 then
      LCToken.AddString(Buffer, CodePage);
  finally
    Buffer.Free;
  end;
end;

{-----------Next}
PROCEDURE Next;
  {Get the next token}
begin  {already have fresh character loaded here}
LCToken.Clear;
  if LCh = EofChar then
    Sy := EofSy
else
    if not GetTag then
      if not CollectText then
        if LCh in [#0..#8] then
          Raise EParseError.Create('Not an HTML or Text document'); 
 end;

function PushNewProp(const Tag, AClass, AnID, APseudo, ATitle: string; AProp: TProperties): boolean;
{add a TProperties to the Prop stack}
begin
PropStack.Add(TProperties.Create);
PropStack.Last.Inherit(Tag, PropStack[PropStackIndex-1]);  
PropStack.Last.Combine(MasterList.Styles, Tag, AClass, AnID, APseudo, ATitle, AProp);
Result := True;
end;

procedure PopProp;
{pop and free a TProperties from the Prop stack}
begin
if PropStackIndex > 0 then
  PropStack.Delete(PropStackIndex);
end;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -