📄 clmultidownloader.pas
字号:
DoExtractUrls(Item, AUrlList, handled);
if not handled then
begin
urlParser := nil;
parser := nil;
try
urlParser := TclUrlParser.Create();
parser := TclHtmlParser.Create(nil);
parser.ParseMethod := pmTagsOnly;
parser.Parse(AHtml);
AUrlList.Clear();
for i := 0 to parser.Frames.Count - 1 do
begin
attr := parser.Frames[i].AttributeByName('src');
if (attr <> nil) and (attr.Value <> '') then
begin
AUrlList.Add(attr.Value, attr.InnerTextPos);
if (gaGrabFrames in GrabOptions) then
begin
urlParser.Parse(AUrlList[AUrlList.Count - 1].URL);
if (urlParser.Host <> '') then
begin
FAllowedUrls.Add(LowerCase(urlParser.Host));
end;
end;
end;
end;
for i := 0 to parser.Links.Count - 1 do
begin
attr := parser.Links[i].AttributeByName('href');
if (attr <> nil) and (attr.Value <> '') then
begin
AUrlList.Add(attr.Value, attr.InnerTextPos);
end;
end;
for i := 0 to parser.Images.Count - 1 do
begin
attr := parser.Images[i].AttributeByName('src');
if (attr <> nil) and (attr.Value <> '') then
begin
AUrlList.Add(attr.Value, attr.InnerTextPos);
end;
end;
finally
parser.Free();
urlParser.Free();
end;
end;
end;
function TclCustomMultiDownLoaderControl.GetFullFileNameByUrl(const AUrl, ALocalFolder: string): string;
var
s: string;
URLParser: TclUrlParser;
begin
URLParser := TclUrlParser.Create();
try
URLParser.Parse(AUrl);
Result := AddTrailingBackSlash(ALocalFolder);
s := StringReplace(URLParser.Urlpath, '/', '\', [rfReplaceAll]);
if (s <> '') and (s[1] = '\') then
begin
system.Delete(s, 1, 1);
end;
if (s = '') or (s[Length(s)] = '\') then
begin
s := s + 'index.htm';
end;
Result := Result + s;
if not (gaReplaceExisting in GrabOptions) then
begin
Result := GetUniqueFileName(Result);
end;
finally
URLParser.Free();
end;
end;
function TclCustomMultiDownLoaderControl.ProcessNextUrl(const AUrl: string;
APrevItem: TclDownLoadItem): string;
var
i: Integer;
item: TclDownLoadItem;
canProcess, handled: Boolean;
prevUrl, fullUrl: string;
BaseURLParser: TclUrlParser;
begin
if (APrevItem <> nil) then
begin
prevUrl := APrevItem.URL;
canProcess := (FMaxNestLevel = 0) or (APrevItem.NestLevel < FMaxNestLevel - 1);
end else
begin
canProcess := True;
prevUrl := AUrl;
end;
fullUrl := LowerCase(GetFullUrlByRoot(AUrl, prevUrl));
if canProcess then
begin
BaseURLParser := TclUrlParser.Create();
try
BaseURLParser.Parse(FRootUrl);
canProcess := (system.Pos(BaseURLParser.Host, fullUrl) > 0);
if not canProcess then
begin
BaseURLParser.Parse(fullUrl);
canProcess := FAllowedUrls.IndexOf(LowerCase(BaseURLParser.Host)) > -1;
end;
finally
BaseURLParser.Free();
end;
end;
if canProcess then
begin
canProcess := (system.Pos('#', AUrl) < 1);
end;
if canProcess then
begin
for i := 0 to DownLoadList.Count - 1 do
begin
canProcess := (DownLoadList[i].URL <> fullUrl);
if (not canProcess) then Break;
end;
end;
item := DownLoadList.Add();
if (APrevItem <> nil) then
begin
item.FNestLevel := APrevItem.NestLevel + 1;
end;
item.URL := fullUrl;
item.LocalFile := GetFullFileNameByUrl(fullUrl, LocalFolder);
handled := False;
DoProcessNextUrl(item, canProcess, handled);
Result := item.LocalFile;
if canProcess then
begin
item.Start(True);
end else
begin
item.Free();
if not FileExists(Result) then
begin
Result := '';
end;
end;
end;
function TclCustomMultiDownLoaderControl.GetInternetItems(Index: Integer): TclInternetItem;
begin
Result := FDownLoadList[Index];
end;
function TclCustomMultiDownLoaderControl.GetInternetItemsCount: Integer;
begin
Result := FDownLoadList.Count;
end;
procedure TclCustomMultiDownLoaderControl.GrabWebsite(const ARootUrl: string;
AMaxNestLevel: Integer);
begin
if IsGrabbing or IsBusy then
begin
raise EclInternetError.Create(cOperationIsInProgress, -1);
end;
FAllowedUrls.Clear();
FIsGrabbing := True;
DownLoadList.Clear();
FMaxNestLevel := AMaxNestLevel;
if (FMaxNestLevel < 0) then
begin
FMaxNestLevel := 0;
end;
FRootUrl := ARootUrl;
ProcessNextUrl(FRootUrl, nil);
end;
procedure TclCustomMultiDownLoaderControl.IsBusyChanged;
begin
if not IsBusy then
begin
FIsGrabbing := False;
end;
inherited IsBusyChanged();
end;
procedure TclCustomMultiDownLoaderControl.SetDownLoadList(const Value: TclDownLoadList);
begin
FDownLoadList.Assign(Value);
if (csLoading in ComponentState) then Exit;
AssignSaveToFiles();
end;
procedure TclCustomMultiDownLoaderControl.SetLocalFolder(const Value: string);
begin
if (FLocalFolder = Value) then Exit;
FLocalFolder := Value;
if (csLoading in ComponentState) then Exit;
AssignSaveToFiles();
end;
procedure TclCustomMultiDownLoaderControl.SetPreviewCharCount(const Value: Integer);
begin
if (FPreviewCharCount <> Value) and (Value > - 1) then
begin
FPreviewCharCount := Value;
end;
end;
procedure TclCustomMultiDownLoaderControl.InternalStop(Item: TclInternetItem);
begin
FIsGrabbing := False;
inherited InternalStop(Item);
end;
procedure TclCustomMultiDownLoaderControl.StartNextItem(APrevItem: TclInternetItem);
var
i: Integer;
html: TStrings;
list: TclUrlList;
urlFile: string;
begin
inherited StartNextItem(APrevItem);
if IsGrabbing and CanProcessGrabbedUrl(APrevItem as TclDownLoadItem) then
begin
list := nil;
html := nil;
try
list := TclUrlList.Create(TclUrlItem);
html := TStringList.Create();
html.LoadFromFile(APrevItem.LocalFile);
ExtractUrls(APrevItem as TclDownLoadItem, html, list);
for i := 0 to list.Count - 1 do
begin
urlFile := ProcessNextUrl(list[i].URL, APrevItem as TclDownLoadItem);
if (gaMakeBrowsable in GrabOptions) then
begin
ReplaceHtmlUrl(html, list[i], APrevItem.LocalFile, urlFile);
end;
end;
if (gaMakeBrowsable in GrabOptions) then
begin
//TODO html.SaveToFile(ChangeFileExt(APrevItem.LocalFile, '.htm'));
// DeleteFile(PChar(APrevItem.LocalFile));
html.SaveToFile(APrevItem.LocalFile);
end;
finally
html.Free();
list.Free();
end;
end;
end;
procedure TclCustomMultiDownLoaderControl.ReplaceHtmlUrl(AHtml: TStrings; const AUrlItem: TclUrlItem;
const AHtmlFile, AUrlFile: string);
var
newUrl, src, dst: string;
begin
if (AUrlFile = '') or (AHtmlFile = '') or (AUrlItem.URL = '') or (AUrlItem.InnerTextPos = 0) then Exit;
newUrl := MakeRelativePath(AHtmlFile, AUrlFile);
if (newUrl <> '') then
begin
src := AHtml.Text;
dst := system.Copy(src, 1, AUrlItem.InnerTextPos - 1);
dst := dst + newUrl + system.Copy(src, AUrlItem.InnerTextPos + Length(AUrlItem.URL), Length(src));
AHtml.Text := dst;
end;
end;
procedure TclCustomMultiDownLoaderControl.DoFileExists(const AFileName: string;
var ANewName: string; var Action: TclFileExistsAction);
begin
if Assigned(OnFileExists) then
begin
OnFileExists(Self, AFileName, ANewName, Action);
end;
end;
type TclInternetItemAccess = class(TclInternetItem);
function TclCustomMultiDownLoaderControl.CanProcess(Item: TclInternetItem): Boolean;
var
s, newName: string;
action: TclFileExistsAction;
begin
Result := inherited CanProcess(Item);
if Result and IsGrabbing and (Item.ResourceInfo <> nil) then
begin
s := LowerCase(Item.ResourceInfo.ContentType);
Result := (Item.ResourceInfo.StatusCode < HTTP_STATUS_BAD_REQUEST) and (
(gaGrabAll in GrabOptions)
or (system.Pos('html', s) > 0)
or ((system.Pos('image', s) > 0) and (gaGrabImages in GrabOptions))
or ((system.Pos('audio', s) > 0) and (gaGrabAudioVideo in GrabOptions))
or ((system.Pos('video', s) > 0) and (gaGrabAudioVideo in GrabOptions))
or ((system.Pos('application', s) > 0) and (gaGrabData in GrabOptions))
);
end;
if not TclInternetItemAccess(Item).IsCommit and Result and (Item.LocalFile <> '') and FileExists(Item.LocalFile) then
begin
newName := '';
action := faReplace;
DoFileExists(Item.LocalFile, newName, action);
case action of
faRename: TclInternetItemAccess(Item).FLocalFile := newName;
faSkip: Result := False;
faStop:
begin
Result := False;
Stop();
end;
end;
end;
end;
{ TclUrlList }
function TclUrlList.Add(const AUrl: string; AInnerTextPos: Integer): TclUrlItem;
begin
Result := TclUrlItem(inherited Add());
Result.URL := AUrl;
Result.InnerTextPos := AInnerTextPos;
end;
function TclUrlList.GetItem(Index: Integer): TclUrlItem;
begin
Result := TclUrlItem(inherited GetItem(Index));
end;
procedure TclUrlList.SetItem(Index: Integer; const Value: TclUrlItem);
begin
inherited SetItem(Index, Value);
end;
{ TclUrlItem }
procedure TclUrlItem.Assign(Source: TPersistent);
begin
if (Source is TclUrlItem) then
begin
FInnerTextPos := TclUrlItem(Source).InnerTextPos;
FURL := TclUrlItem(Source).URL;
end else
begin
inherited Assign(Source);
end;
end;
end.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -