📄 udownlistthread.pas
字号:
{-----------------------------------------------------------------------------
Unit Name: uDownListThread
Author: Piao
Date: 2005-3-12 15:21:29
Purpose: 下载线程单元
History:
-----------------------------------------------------------------------------}
unit uDownListThread;
interface
uses
Classes, Windows, ADODB, Activex, IdHTTP, SHDocVw, MSHTML_TLB,
OleServer, SysUtils, Forms;
type
TRefreshDownList = class(TThread)
private
{ Private declarations }
FmdbLinkStr: string; //数据库连接字串
FstrURL: string; //URL链接
FProHandle: HWND; //处理消息的Handle
FADOCon: TADOConnection;
FProADOQ: TADOQuery; //添加指定数据
FIdHTTPDown: TIdHTTP; //下载数据组件
FPHtmlWB: TWebBrowser; //分析Html
FLoadHtmlComplete: boolean;//是否Html加载完毕
FFailedTry: integer; //下载文件失败重试次数
FUrlList: TStringList; //已经抓取的链接
protected
function InitADOConn: boolean;
procedure Execute; override;
procedure WebBrowserDocComplete(Sender: TObject;
const pDisp: IDispatch; var URL: OleVariant);
procedure LoadHtmlToWB(WebBro: TWebBrowser; StrHtml: String);
function DownHtml(var StrHtml: string): boolean;
public
constructor Create(mdbLinkStr, strURL:String; ProHandle: HWND; FailedTry: integer);
destructor Destroy; override;
end;
implementation
uses uPublic, uConst;
{ Important: Methods and properties of objects in visual components can only be
used in a method called using Synchronize, for example,
Synchronize(UpdateCaption);
and UpdateCaption could look like,
procedure RefreshDownList.UpdateCaption;
begin
Form1.Caption := 'Updated in a thread';
end; }
{ RefreshDownList }
function TRefreshDownList.InitADOConn: boolean;
begin
Result := FADOCon.Connected;
with FADOCon do
if not Result then
try
Close;
LoginPrompt := False;
ConnectionString := FmdbLinkStr;
Open;
except
Result := False;
end;//try
Result := FADOCon.Connected;
end;
constructor TRefreshDownList.Create(mdbLinkStr, strURL: String;
ProHandle: HWND; FailedTry: integer);
begin
inherited Create(False);
FreeOnTerminate := True; //当线程结束时自动释放
FmdbLinkStr := mdbLinkStr; //数据库连接字串
FstrURL := strURL; //URL链接
FProHandle := ProHandle; //处理消息的Handle
FFailedTry := FailedTry; //下载文件失败重试次数
Activex.CoInitialize(nil);
FADOCon := TADOConnection.Create(nil);
FProADOQ := TADOQuery.Create(nil);
FProADOQ.Connection := FADOCon;
FIdHTTPDown := TIdHTTP.Create(nil);
FPHtmlWB := TWebBrowser.Create(nil);
FPHtmlWB.ParentWindow := ProHandle;
FPHtmlWB.Left := 0;
FPHtmlWB.Top := 0;
FPHtmlWB.Width := 0;
FPHtmlWB.Height := 0;
FLoadHtmlComplete := False; //默认标识为没有加载完毕,注意每次加载前置为False
FUrlList := TStringList.Create;
end;
destructor TRefreshDownList.Destroy;
begin
FADOCon.Free;
FProADOQ.Free;
FIdHTTPDown.Free;
FPHtmlWB.Free;
FUrlList.Free;
Activex.CoUninitialize;
inherited Destroy;
end;
procedure TRefreshDownList.Execute;
var DownStrHtml: string;
begin
{ Place thread code here }
if InitADOConn then
if DownHtml(DownStrHtml) and (not Terminated) then
begin
// LoadHtmlToWB(FPHtmlWB, DownStrHtml);
FPHtmlWB.OnDocumentComplete := WebBrowserDocComplete;
FPHtmlWB.Navigate('D:\应聘实验\Test01.html');
while not FLoadHtmlComplete do
begin
Sleep(10000);
//Application.ProcessMessages;
end;//while
if FLoadHtmlComplete then SendMessage(FProHandle,WM_SendErrorMsg,0,0);
end;//if
end;
procedure TRefreshDownList.WebBrowserDocComplete(Sender: TObject;
const pDisp: IDispatch; var URL: OleVariant);
var Doc: IHTMLDocument2;
ElementCollection: IHTMLElementCollection;
HtmlElement: IHTMLElement;
AnchorStr, AnchorLinkStr: string;
i: integer;
begin
Doc := FPHtmlWB.Document as IHTMLDocument2;
if not Assigned(Doc) then
begin
Exit;
end;//if
//Doc.url := FstrURL;
ElementCollection := Doc.all;//夺取web上的所有元素。
SendMessage(FProHandle,WM_SendErrorMsg,0,ElementCollection.Length);
for i := 0 to ElementCollection.Length - 1 do
begin
HtmlElement := ElementCollection.Item(i,i) as IHTMLElement;
if Assigned(HtmlElement) then
if SameText(HtmlElement.tagName,'A') then
begin
AnchorLinkStr := (HtmlElement as IHTMLAnchorElement).href;
AnchorLinkStr := UpperCase(AnchorLinkStr);//URL链接
AnchorStr := Trim(HtmlElement.innerText); //链接名称
if (Pos('ARTICLE_VIEW.ASP',AnchorLinkStr) > 0) and (AnchorStr <> '') then
begin
if FUrlList.IndexOf(AnchorLinkStr) = -1 then
begin
FUrlList.Add(AnchorLinkStr);
//AnchorLinkStr := StringReplace(AnchorLinkStr,Doc.url,' ',[rfReplaceAll]);
if GetCountCondition('FName','TEnWordClass','FName=''' + AnchorStr + '''',FProADOQ) = 0 then
begin
ExecADOQ('insert into TEnWordClass(FName,FURL) ' +
' values(''' + AnchorStr + ''',''' + AnchorLinkStr + ''')',FProADOQ)
end
else
begin
ExecADOQ('update TEnWordClass set FURL=''' +
AnchorLinkStr + ''' where FName=''' + AnchorStr + '''' ,FProADOQ)
end;//if
end;//if
end;//if
end;//if
end;//for i
// FLoadHtmlComplete := ElementCollection.Length <> 0;
FLoadHtmlComplete := True;
end;
procedure TRefreshDownList.LoadHtmlToWB(WebBro: TWebBrowser;
StrHtml: String);
var MyMemStream: TMemoryStream;
begin
FLoadHtmlComplete := False; //默认标识为没有加载完毕,注意每次加载前置为False
WebBro.OnDocumentComplete := nil;
WebBro.Navigate('about:blank');
while (WebBro.ReadyState <> READYSTATE_COMPLETE) do
begin
Sleep(5);
//Application.ProcessMessages;
end;//while
if Terminated then Exit;
FLoadHtmlComplete := False;
WebBro.OnDocumentComplete := WebBrowserDocComplete;
MyMemStream := TMemoryStream.Create;
MyMemStream.Write(Pointer(StrHtml)^,Length(StrHtml));
//SendMessage(FProHandle,WM_SendErrorMsg,0,Length(StrHtml));
MyMemStream.Seek(0,0);
(WebBro.Document as IPersistStreamInit).Load(
TStreamAdapter.Create(MyMemStream));
MyMemStream.SaveToFile('c:\ok.txt');
MyMemStream.Free;
end;
function TRefreshDownList.DownHtml(var StrHtml: string): boolean;
var iTryNum: integer;
begin
Result := False;
if FIdHTTPDown.Connected then FIdHTTPDown.Disconnect;
for iTryNum := 0 to FFailedTry - 1 do
begin
try
Result := True;
StrHtml := '';
StrHtml := FIdHTTPDown.Get(FstrURL);
except
Result := False;
end;//try
if (StrHtml <> '') or Result or Terminated then Break;
end;//for iTryNum
SendMessage(FProHandle,WM_SendErrorMsg,0,11);
end;
end.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -