⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 readhtml.pas

📁 查看html文件的控件
💻 PAS
📖 第 1 页 / 共 5 页
字号:

{Version 9.4}
{*********************************************************}
{*                     READHTML.PAS                      *}
{*              Copyright (c) 1995-2006 by               *}
{*                   L. David Baldwin                    *}
{*                 All rights reserved.                  *}
{*                                                       *}
{*           Thanks to Mike Lischke for his              *}
{*        assistance with the Unicode conversion         *} 
{*                                                       *}
{*********************************************************}

{$i htmlcons.inc}

{             The Parser
This module contains the parser which reads thru the document.  It divides it
into sections storing the pertinent information in Section objects.  The
document itself is then a TList of section objects.  See the HTMLSubs unit for
the definition of the section objects.

Key Variables:

  Sy:
      An enumerated type which indicates what the current token is.  For
      example, a value of TextSy would indicate a hunk of text, PSy that a <P>
      tag was encountered, etc.
  LCh:
      The next character in the stream to be analyzed.  In mixed case.
  Ch:
      The same character in upper case.
  LCToken:
      A string which is associated with the current token.  If Sy is TextSy,
      then LCToken contains the text.
  Attributes:
      A list of TAttribute's for tokens such as <img>, <a>, which have
      attributes.
  Section:
      The current section being built.
  SectionList:
      The list of sections which form the document.  When in a Table,
      SectionList will contain the list that makes up the current cell.

Key Routines:

  GetCh:
      Gets the next character from the stream.  Fills Ch and LCh.  Skips
      comments.
  Next:
      Gets the next token.  Fills Sy, LCToken, Attributes.  Calls GetCh so the
      next character after the present token is available.  Each part of the
      parser is responsible for calling Next after it does its thing.
}

unit Readhtml;

interface
uses
  SysUtils, WinTypes, WinProcs, Messages, Classes, Graphics, Controls, 
  Dialogs, StdCtrls, HTMLUn2, StyleUn;

type
  LoadStyleType = (lsFile, lsString, lsInclude);
  TIncludeType = procedure(Sender: TObject; const Command: string;
                    Params: TStrings; var IString: string) of Object;
  TSoundType = procedure(Sender: TObject; const SRC: string; Loop: integer;
                    Terminate: boolean) of Object;
  TMetaType = procedure(Sender: TObject; const HttpEq, Name, Content: string) of Object;
  TLinkType = procedure(Sender: TObject; const Rel, Rev, Href: string) of Object;

  TGetStreamEvent = procedure(Sender: TObject; const SRC: string;
                    var Stream: TMemoryStream) of Object;
  TFrameViewerBase = class(TWinControl)
    private
      procedure wmerase(var msg:TMessage); message wm_erasebkgnd;
    protected
    FOnInclude: TIncludeType;
    FOnSoundRequest: TSoundType;
    FOnScript: TScriptEvent;
    FOnLink: TLinkType;

    procedure AddFrame(FrameSet: TObject; Attr: TAttributeList; const FName: string); virtual; abstract;
    function CreateSubFrameSet(FrameSet: TObject): TObject; virtual; abstract;
    procedure DoAttributes(FrameSet: TObject; Attr: TAttributeList); virtual; abstract;
    procedure EndFrameSet(FrameSet: TObject); virtual; abstract;
    end;

  TPropStack = class(TFreeList)
    private
      function GetProp(Index: integer): TProperties;
    public
      property AnItem[Index: integer]: TProperties read GetProp; default;
      function Last: TProperties;
      procedure Delete(Index: integer);
    end;

var
  PropStack: TPropStack;
  Title: string;
  Base: string;
  CodePage: integer;   
  BaseTarget: string;
  NoBreak: boolean;       {set when in <NoBr>}

procedure ParseHTMLString(const S: string; ASectionList: TList;
                   AIncludeEvent: TIncludeType;
                   ASoundEvent: TSoundType; AMetaEvent: TMetaType; ALinkEvent: TLinkType);
procedure ParseTextString(const S: string; ASectionList: TList);  

procedure FrameParseString(FrameViewer: TFrameViewerBase; FrameSet: TObject;
              ALoadStyle: LoadStyleType; const FName, S: string; AMetaEvent: TMetaType);
function IsFrameString(ALoadStyle: LoadStyleType; const FName, S : string;
             FrameViewer: TObject): boolean;  
function TranslateCharset(const Content: string; var Charset: TFontCharset): boolean;
procedure InitializeFontSizes(Size: integer);
function PushNewProp(const Tag, AClass, AnID, APseudo, ATitle: string; AProp: TProperties): boolean;
procedure PopAProp(Tag: string);

implementation

uses
  htmlsubs, htmlsbs1, htmlview, StylePars, UrlSubs;

Const
  Tab = #9;
  EofChar = #0;

var
  Sy : Symb;
  Section : TSection;
  SectionList: TCellBasic;
  MasterList: TSectionList;
  CurrentURLTarget: TURLTarget;
  InHref: boolean;
  Attributes : TAttributeList;
  BaseFontSize: integer;
  InScript: boolean;     {when in a <SCRIPT>}
  TagIndex: integer;
  BodyBlock: TBodyBlock;
  ListLevel: integer;
  TableLevel: integer;
  Entities: TStringList;
  InComment: boolean;
  LinkSearch: boolean;
  SIndex: integer;
  IsUTF8: boolean;  


type
  SymString = string[12];

Const
  MaxRes = 80;
  MaxEndRes = 57;
  ResWords : array[1..MaxRes] of SymString =
     ('HTML', 'TITLE', 'BODY', 'HEAD', 'B', 'I', 'H', 'EM', 'STRONG',
      'U', 'CITE', 'VAR', 'TT', 'CODE', 'KBD', 'SAMP', 'OL', 'UL', 'DIR',
      'MENU', 'DL',
      'A', 'ADDRESS', 'BLOCKQUOTE', 'PRE', 'CENTER', 'TABLE', 'TD', 'TH',
      'CAPTION', 'FORM', 'TEXTAREA', 'SELECT', 'OPTION', 'FONT', 'SUB', 'SUP',
      'BIG', 'SMALL', 'P', 'MAP', 'FRAMESET', 'NOFRAMES', 'SCRIPT', 'DIV',
      'S', 'STRIKE', 'TR', 'NOBR', 'STYLE', 'SPAN', 'COLGROUP', 'LABEL',  
      'THEAD', 'TBODY', 'TFOOT', 'OBJECT',

      'LI', 'BR', 'HR', 'DD', 'DT', 'IMG', 'BASE', 'BUTTON','INPUT',
      'SELECTED', 'BASEFONT', 'AREA', 'FRAME', 'PAGE', 'BGSOUND', 'WRAP',
      'META', 'PANEL', 'WBR', 'LINK', 'COL', 'PARAM', 'READONLY');

  ResSy : array[1..MaxRes] of Symb =
     (htmlSy, TitleSy, BodySy, HeadSy, BSy, ISy, HeadingSy, EmSy, StrongSy,
      USy, CiteSy, VarSy, TTSy, CodeSy, KbdSy, SampSy, OLSy, ULSy, DirSy,
      MenuSy, DLSy, ASy, AddressSy, BlockQuoteSy, PreSy, CenterSy,TableSy,
      TDsy, THSy, CaptionSy, FormSy, TextAreaSy,  SelectSy, OptionSy, FontSy,
      SubSy, SupSy, BigSy, SmallSy, PSy, MapSy, FrameSetSy, NoFramesSy,
      ScriptSy, DivSy, SSy, StrikeSy, TRSy, NoBrSy, StyleSy, SpanSy, ColGroupSy,
      LabelSy, THeadSy, TBodySy, TFootSy, ObjectSy,   

      LISy, BRSy, HRSy, DDSy, DTSy, ImageSy, BaseSy, ButtonSy,
      InputSy, SelectedSy, BaseFontSy, AreaSy, FrameSy, PageSy, BgSoundSy,
      WrapSy, MetaSy, PanelSy, WbrSy, LinkSy, ColSy, ParamSy, ReadonlySy);     

  {keep these in order with those above}
  EndResSy : array[1..MaxEndRes] of Symb =
     (HtmlEndSy, TitleEndSy, BodyEndSy, HeadEndSy, BEndSy, IEndSy, HeadingEndSy,
      EmEndSy, StrongEndSy, UEndSy, CiteEndSy, VarEndSy, TTEndSy, CodeEndSy,
      KbdEndSy, SampEndSy,
      OLEndSy, ULEndSy, DirEndSy, MenuEndSy, DLEndSy, AEndSy, AddressEndSy,
      BlockQuoteEndSy, PreEndSy, CenterEndSy, TableEndSy, TDEndSy, THEndSy,
      CaptionEndSy, FormEndSy, TextAreaEndSy, SelectEndSy, OptionEndSy, FontEndSy,
      SubEndSy, SupEndSy, BigEndSy, SmallEndSy, PEndSy, MapEndSy, FrameSetEndSy,
      NoFramesEndSy, ScriptEndSy, DivEndSy, SEndSy, StrikeEndSy, TREndSy,
      NoBrEndSy, StyleEndSy, SpanEndSy, ColGroupEndSy, LabelEndSy,
      THeadEndSy, TBodyEndSy, TFootEndSy, ObjectEndSy);

Type
  EParseError = class(Exception);
Var
  LCh, Ch: Char;
  LastChar: (lcOther, lcCR, lcLF);
  Value : integer;
  LCToken : TokenObj;
  LoadStyle: LoadStyleType;
  Buff, BuffEnd: PChar;
  DocS: string;
  HaveTranslated: boolean;   

  IBuff, IBuffEnd: PChar;
  SIBuff: string;     
  IncludeEvent: TIncludeType;
  CallingObject: TObject;
  SaveLoadStyle: LoadStyleType;
  SoundEvent: TSoundType;
  MetaEvent: TMetaType;
  LinkEvent: TLinkType;

function PropStackIndex: integer;
begin
Result := PropStack.Count-1;
end;

function SymbToStr(Sy: Symb): string;
var
  I: integer;
begin
for I := 1 to MaxRes do
  if ResSy[I] = Sy then
    begin
    Result := Lowercase(ResWords[I]);
    Exit;
    end;
Result := '';
end;

function EndSymbToStr(Sy: Symb): string;
var
  I: integer;
begin
for I := 1 to MaxEndRes do
  if EndResSy[I] = Sy then
    begin
    Result := Lowercase(ResWords[I]);
    Exit;
    end;
Result := '';
end;

function EndSymbFromSymb(Sy: Symb): Symb;
var
  I: integer;
begin
for I := 1 to MaxEndRes do
  if ResSy[I] = Sy then
    begin
    Result := EndResSy[I];
    Exit;
    end;
Result := HtmlSy;  {won't match}
end;

function StrToSymb(const S: string): Symb;
var
  I: integer;
  S1: string;
begin
S1 := UpperCase(S);
for I := 1 to MaxRes do
  if ResWords[I] = S1 then
    begin
    Result := ResSy[I];
    Exit;
    end;
Result := OtherSy;
end;

function GetNameValueParameter(var Name, Value: String): boolean; forward;

function ReadChar: char;
begin
case LoadStyle of
  lsString:
     begin
     if Buff < BuffEnd then
       begin
       Result := Buff^;
       Inc(Buff);
       Inc(SIndex);
       end
     else
       Result := EOFChar;
     end;

  lsInclude:
     if IBuff < IBuffEnd then
       begin
       Result := IBuff^;
       Inc(IBuff);
       end
     else
       begin
       IBuff := Nil;       {reset for next include}
       LoadStyle := SaveLoadStyle;
       Result := ReadChar;
       end;
  else Result := #0;       {to prevent warning msg}
  end;
if (Integer(Buff) and $FFF = 0)    {about every 4000 chars}   
      and not LinkSearch and Assigned(MasterList) and (DocS <> '') then
  ThtmlViewer(CallingObject).htProgress(((Buff-PChar(DocS)) *MasterList.ProgressStart) div (BuffEnd-PChar(DocS)));
end;

{----------------GetchBasic; }
function GetchBasic: char; {read a character}
begin
LCh := ReadChar;
case LCh of    {skip a ^J after a ^M or a ^M after a ^J}
  ^M: if LastChar = lcLF then
        LCh := ReadChar;
  ^J: if LastChar = lcCR then
        LCh := ReadChar;
  end;
case LCh of
  ^M: LastChar := lcCR;
  ^J: begin
      LastChar := lcLF;
      LCh := ^M;
      end;
  else
      begin
      LastChar := lcOther;
      if LCh = Tab then LCh := ' ';
      end;
  end;
Ch := UpCase(LCh);
if (LCh = EofChar) and InComment then   
  Raise EParseError.Create('Open Comment at End of HTML File');
Result := LCh
end;

{-------------GetCh}
PROCEDURE GetCh;
{Return next char in Lch, its uppercase value in Ch.  Ignore comments}
var
  Done, Comment: boolean;   

  function Peek: char;  {take a look at the next char}
  begin
  case LoadStyle of
    lsString:
       begin
       if Buff < BuffEnd then
         Result := Buff^
       else
         Result := EOFChar;
       end;

    lsInclude:
       if IBuff < IBuffEnd then
         Result := IBuff^
       else
         begin
         IBuff := Nil;       
         LoadStyle := SaveLoadStyle;
         Result := Peek;
         end;
    else Result := #0;       {to prevent warning msg}
    end;
  end;

  procedure DoDashDash;   {do the comment after a <!-- }
  begin
  repeat
    while Ch <> '-' do GetChBasic; {get first '-'}
    GetChBasic;
    if Ch = '-' then  {second '-'}
      begin
      while Ch = '-' do GetChBasic;  {any number of '-'}
      while (Ch = ' ') or (Ch = ^M) do GetChBasic;  {eat white space}
      if Ch = '!' then GetChBasic;    {accept --!> also}
      Done := Ch = '>';
      end
    else Done := False;
  until Done;
  InComment := False;    
  end;

  procedure ReadToGT;    {read to the next '>' }
  begin
  while Ch <> '>' do
    GetChBasic;
  InComment := False;    
  end;

  procedure DoInclude;
  {recursive suggestions by Ben Geerdes}
  var
    S, Name, Value: string;
    Rest : string; 
    SL: TStringList;
    SaveLCToken: TokenObj;
  begin
  S := '';
  SaveLCToken := LCToken;  
  LCToken := TokenObj.Create; 
  try
    GetChBasic;
    while Ch in ['A'..'Z', '_', '0'..'9'] do
      begin
      S := S + LCh;
      GetChBasic;
      end;
    SL := TStringList.Create;
    while GetNameValueParameter(Name, Value) do
      SL.Add(Name+'='+Value);
    DoDashDash;
    Rest := IBuff; 
    SIBuff := '';
    IncludeEvent(CallingObject, S, SL, SIBuff);   
    if Length(SIBuff) > 0 then
      begin
      if LoadStyle <> lsInclude then 
        SaveLoadStyle := LoadStyle;
      LoadStyle := lsInclude;
      SIBuff := SIBuff + Rest; 
      IBuff := PAnsiChar(SIBuff);
      IBuffEnd := IBuff+Length(SIBuff);
      end;
  finally
    LCToken.Free;     
    LCToken := SaveLCToken;  
    end;
  end;

begin  {Getch}
repeat    {in case a comment immediately follows another comment}
   {comments may be either '<! stuff >' or '<!-- stuff -->'  }
  Comment := False;
  GetchBasic;
  if (Ch = '<') and not InScript then
    begin
    if Peek = '!' then
      begin
      GetChBasic;
      Comment:=True;
      InComment := True;
      GetChBasic;
      if Ch = '-' then
        begin
        GetChBasic;
        if Ch = '-' then
          begin

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -