⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 udownlistthread.pas

📁 这是一个从指定网页格式分离单词的小程序
💻 PAS
字号:
{-----------------------------------------------------------------------------
 Unit Name: uDownListThread
 Author:    Piao
 Date:      2005-3-12 15:21:29
 Purpose:   下载线程单元
 History:
-----------------------------------------------------------------------------}
unit uDownListThread;

interface

uses
  Classes, Windows, ADODB, Activex, IdHTTP, SHDocVw, MSHTML_TLB,
  OleServer, SysUtils, Forms;

type
  TRefreshDownList = class(TThread)
  private
    { Private declarations }
    FmdbLinkStr: string;       //数据库连接字串
    FstrURL: string;           //URL链接
    FProHandle: HWND;          //处理消息的Handle
    FADOCon: TADOConnection;
    FProADOQ: TADOQuery;       //添加指定数据
    FIdHTTPDown: TIdHTTP;      //下载数据组件
    FPHtmlWB: TWebBrowser;     //分析Html
    FLoadHtmlComplete: boolean;//是否Html加载完毕
    FFailedTry: integer;       //下载文件失败重试次数
    FUrlList: TStringList;     //已经抓取的链接
  protected
    function InitADOConn: boolean;
    procedure Execute; override;
    procedure WebBrowserDocComplete(Sender: TObject;
      const pDisp: IDispatch; var URL: OleVariant);
    procedure LoadHtmlToWB(WebBro: TWebBrowser; StrHtml: String);
    function DownHtml(var StrHtml: string): boolean;
  public
    constructor Create(mdbLinkStr, strURL:String; ProHandle: HWND; FailedTry: integer);
    destructor Destroy; override;
  end;

implementation

uses uPublic, uConst;

{ Important: Methods and properties of objects in visual components can only be
  used in a method called using Synchronize, for example,

      Synchronize(UpdateCaption);

  and UpdateCaption could look like,

    procedure RefreshDownList.UpdateCaption;
    begin
      Form1.Caption := 'Updated in a thread';
    end; }

{ RefreshDownList }

function TRefreshDownList.InitADOConn: boolean;
begin
  Result := FADOCon.Connected;
  with FADOCon do
  if not Result then
  try
    Close;
    LoginPrompt := False;
    ConnectionString := FmdbLinkStr;
    Open;
  except
    Result := False;
  end;//try
  Result := FADOCon.Connected;
end;

constructor TRefreshDownList.Create(mdbLinkStr, strURL: String;
  ProHandle: HWND; FailedTry: integer);
begin
  inherited Create(False);
  FreeOnTerminate := True;         //当线程结束时自动释放
  FmdbLinkStr := mdbLinkStr;       //数据库连接字串
  FstrURL := strURL;               //URL链接
  FProHandle := ProHandle;         //处理消息的Handle
  FFailedTry := FailedTry;         //下载文件失败重试次数
  Activex.CoInitialize(nil);
  FADOCon := TADOConnection.Create(nil);
  FProADOQ := TADOQuery.Create(nil);
  FProADOQ.Connection := FADOCon;
  FIdHTTPDown := TIdHTTP.Create(nil);
  FPHtmlWB := TWebBrowser.Create(nil);
  FPHtmlWB.ParentWindow := ProHandle;
  FPHtmlWB.Left := 0;
  FPHtmlWB.Top := 0;
  FPHtmlWB.Width := 0;
  FPHtmlWB.Height := 0;

  FLoadHtmlComplete := False;     //默认标识为没有加载完毕,注意每次加载前置为False
  FUrlList := TStringList.Create;
end;

destructor TRefreshDownList.Destroy;
begin
  FADOCon.Free;
  FProADOQ.Free;
  FIdHTTPDown.Free;
  FPHtmlWB.Free;
  FUrlList.Free;
  Activex.CoUninitialize;
  inherited Destroy;
end;

procedure TRefreshDownList.Execute;
var DownStrHtml: string;
begin
  { Place thread code here }
  if InitADOConn then
  if DownHtml(DownStrHtml) and (not Terminated) then
  begin
//    LoadHtmlToWB(FPHtmlWB, DownStrHtml);
    FPHtmlWB.OnDocumentComplete := WebBrowserDocComplete;
    FPHtmlWB.Navigate('D:\应聘实验\Test01.html');
    while not FLoadHtmlComplete do
    begin
      Sleep(10000);
      //Application.ProcessMessages;
    end;//while
    if FLoadHtmlComplete then SendMessage(FProHandle,WM_SendErrorMsg,0,0);
  end;//if
end;

procedure TRefreshDownList.WebBrowserDocComplete(Sender: TObject;
  const pDisp: IDispatch; var URL: OleVariant);
var Doc: IHTMLDocument2;
    ElementCollection: IHTMLElementCollection;
    HtmlElement: IHTMLElement;
    AnchorStr, AnchorLinkStr: string;
    i: integer;
begin

  Doc := FPHtmlWB.Document as IHTMLDocument2;
  if not Assigned(Doc) then
  begin
    Exit;
  end;//if
  //Doc.url := FstrURL;
  ElementCollection := Doc.all;//夺取web上的所有元素。
  SendMessage(FProHandle,WM_SendErrorMsg,0,ElementCollection.Length);
  for i := 0 to ElementCollection.Length - 1 do
  begin
    HtmlElement := ElementCollection.Item(i,i) as IHTMLElement;
    if Assigned(HtmlElement) then
    if SameText(HtmlElement.tagName,'A') then
    begin
      AnchorLinkStr := (HtmlElement as IHTMLAnchorElement).href;
      AnchorLinkStr := UpperCase(AnchorLinkStr);//URL链接
      AnchorStr := Trim(HtmlElement.innerText); //链接名称
      if (Pos('ARTICLE_VIEW.ASP',AnchorLinkStr) > 0) and (AnchorStr <> '') then
      begin
        if FUrlList.IndexOf(AnchorLinkStr) = -1 then
        begin
          FUrlList.Add(AnchorLinkStr);

          //AnchorLinkStr := StringReplace(AnchorLinkStr,Doc.url,' ',[rfReplaceAll]);
          if GetCountCondition('FName','TEnWordClass','FName=''' + AnchorStr + '''',FProADOQ) = 0 then
          begin
            ExecADOQ('insert into TEnWordClass(FName,FURL) ' +
              ' values(''' + AnchorStr + ''',''' + AnchorLinkStr + ''')',FProADOQ)
          end
          else
          begin
            ExecADOQ('update TEnWordClass set FURL=''' +
               AnchorLinkStr + ''' where FName=''' + AnchorStr + '''' ,FProADOQ)
          end;//if
        end;//if
      end;//if
    end;//if
  end;//for i
//  FLoadHtmlComplete := ElementCollection.Length <> 0;
  FLoadHtmlComplete := True;
end;


procedure TRefreshDownList.LoadHtmlToWB(WebBro: TWebBrowser;
  StrHtml: String);
var MyMemStream: TMemoryStream;
begin
  FLoadHtmlComplete := False;     //默认标识为没有加载完毕,注意每次加载前置为False
  WebBro.OnDocumentComplete := nil;
  WebBro.Navigate('about:blank');
  while (WebBro.ReadyState <> READYSTATE_COMPLETE)  do
  begin
    Sleep(5);
    //Application.ProcessMessages;
  end;//while
  if Terminated then Exit;
  FLoadHtmlComplete := False;
  WebBro.OnDocumentComplete := WebBrowserDocComplete;
  MyMemStream := TMemoryStream.Create;
  MyMemStream.Write(Pointer(StrHtml)^,Length(StrHtml));
  //SendMessage(FProHandle,WM_SendErrorMsg,0,Length(StrHtml));
  MyMemStream.Seek(0,0);
  (WebBro.Document as IPersistStreamInit).Load(
    TStreamAdapter.Create(MyMemStream));
  MyMemStream.SaveToFile('c:\ok.txt');
  MyMemStream.Free;

end;

function TRefreshDownList.DownHtml(var StrHtml: string): boolean;
var iTryNum: integer;
begin
  Result := False;
  if FIdHTTPDown.Connected then FIdHTTPDown.Disconnect;
  for iTryNum := 0 to FFailedTry - 1 do
  begin
    try
      Result := True;
      StrHtml := '';
      StrHtml := FIdHTTPDown.Get(FstrURL);
    except
      Result := False;
    end;//try
    if (StrHtml <> '') or Result or Terminated then Break;
  end;//for iTryNum
  SendMessage(FProHandle,WM_SendErrorMsg,0,11);
end;

end.

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -