📄 frmwebrobot.frm
字号:
txtOutput.Text = ""
Set Analyst = New clsDocumentAnalyst
Analyst.Init txtSearchString.Text
If RootWebNode.AlreadyContains(txtURL.Text, ExistingNode) Then
Set Diplomat = DispatchNewDiplomat(ExistingNode.URL)
If Not (Diplomat Is Nothing) Then
SearchWeb ExistingNode, SearchDepth, Diplomat
End If
Else
Set NewWebNode = New clsHTMLPageResourceNode
NewWebNode.URL = txtURL.Text
NewWebNode.Path = txtURL.Text
Set NewWebNode.Parent = RootWebNode
Set Diplomat = DispatchNewDiplomat(NewWebNode.URL)
If Not (Diplomat Is Nothing) Then
RootWebNode.PageLinks.Add NewWebNode, NewWebNode.URL
SearchWeb NewWebNode, SearchDepth, Diplomat
End If
End If
txtURL.Enabled = True
mnuPerformSearch.Enabled = True
mnuStopSearch.Enabled = False
tbToolbar.Buttons("btnSearch").Enabled = True
tbToolbar.Buttons("btnStop").Enabled = False
tmrBusy.Enabled = False
Me.MousePointer = vbDefault
bPerformingSearch = False
bPerformingRequest = False
End Select
End If
End Sub
Private Function ValidSearchDepth(SearchDepth As Integer)
ValidSearchDepth = True
If IsNumeric(txtSearchDepth) Then
SearchDepth = Val(txtSearchDepth.Text)
If Not (SearchDepth = Abs(Int(SearchDepth)) And _
(SearchDepth > 0)) Then
MsgBox "Please enter an integer value for Search Depth.", _
vbOKOnly + vbCritical, _
"Invalid Search Depth"
ValidSearchDepth = False
End If
Else
MsgBox "Please enter an integer value for Search Depth.", _
vbOKOnly + vbCritical, _
"Invalid Search Depth"
ValidSearchDepth = False
End If
End Function
Private Sub udSearchDepth_DownClick()
Dim SearchDepth As Integer
If ValidSearchDepth(SearchDepth) Then
SearchDepth = MaxLong(1, SearchDepth - 1)
txtSearchDepth.Text = Str(SearchDepth)
End If
End Sub
Private Sub udSearchDepth_UpClick()
Dim SearchDepth As Integer
If ValidSearchDepth(SearchDepth) Then
SearchDepth = MinLong(SearchDepth + 1, 999)
txtSearchDepth.Text = Str(SearchDepth)
End If
End Sub
Private Sub SearchWeb(CurrentWebNode As clsHTMLPageResourceNode, _
ByVal LevelsToSearch As Integer, _
ByVal Diplomat As clsDiplomat)
Dim WebNode As clsHTMLPageResourceNode
Dim NodeIcon As Integer
Dim TempText As String
' Evaluate Access Restrictions
If Diplomat.NewServerURL(CurrentWebNode.URL) Then
Set Diplomat = DispatchNewDiplomat(CurrentWebNode.URL)
End If
If Diplomat.Rejects(CurrentWebNode.URL) Then
AddNodeToTreeView CurrentWebNode, icoERROR
Exit Sub
End If
If AbleToGetWebDocument(CurrentWebNode.URL) Then
TimeOfLastTransfer = Now
CurrentWebNode.MatchesSearch = Analyst.DocumentMatches(txtOutput.Text)
If CurrentWebNode.MatchesSearch Then
NodeIcon = icoHIT
ElseIf InStr(CurrentWebNode.Path, "://") Then
NodeIcon = icoSERVER
Else
NodeIcon = icoDOCUMENT
End If
AddNodeToTreeView CurrentWebNode, NodeIcon
If (LevelsToSearch > 1) Then
TempText = txtOutput.Text
ExtractLinksFromDocument TempText, "href", CurrentWebNode
ExtractLinksFromDocument TempText, "src", CurrentWebNode
For Each WebNode In CurrentWebNode.PageElements
DoEvents
If bSearchCancelled Then Exit For
AddNodeToTreeView WebNode, icoIMAGE
Next WebNode
For Each WebNode In CurrentWebNode.PageLinks
DoEvents
If bSearchCancelled Then Exit For
Do Until DateDiff("s", TimeOfLastTransfer, Now) > 5
DoEvents
Loop
SearchWeb WebNode, LevelsToSearch - 1, Diplomat
Debug.Print "Call SearchWeb "; WebNode.URL
Next WebNode
If bSearchCancelled Then MsgBox "Search Cancelled"
End If
Else
AddNodeToTreeView CurrentWebNode, icoERROR
End If
End Sub
Public Sub AddNodeToTreeView(CurrentWebNode As clsHTMLPageResourceNode, _
TreeNodeIcon As Integer)
Dim CurrentTreeViewNode As Node
Dim WebNode As clsHTMLPageResourceNode
On Error Resume Next
Set CurrentTreeViewNode = tvURLTreeView.Nodes(CurrentWebNode.URL)
If CurrentTreeViewNode Is Nothing Then
If (CurrentWebNode.Parent Is Nothing) Then
Set CurrentTreeViewNode = tvURLTreeView.Nodes.Add _
(, , _
CurrentWebNode.URL, CurrentWebNode.Path, _
TreeNodeIcon, TreeNodeIcon)
Else
Set CurrentTreeViewNode = tvURLTreeView.Nodes.Add _
(CurrentWebNode.Parent.URL, tvwChild, _
CurrentWebNode.URL, CurrentWebNode.Path, _
TreeNodeIcon, TreeNodeIcon)
End If
Else
tvURLTreeView.Nodes(CurrentWebNode.URL).Image = TreeNodeIcon
tvURLTreeView.Nodes(CurrentWebNode.URL).SelectedImage = TreeNodeIcon
End If
If Not (CurrentTreeViewNode Is Nothing) Then
Set ActiveTreeNode = CurrentTreeViewNode
CurrentTreeViewNode.EnsureVisible
End If
End Sub
Public Function DispatchNewDiplomat(ByVal URL As String) As clsDiplomat
Dim AgentFileReceived As Boolean
Dim Diplomat As clsDiplomat
URL = GetServerURLFrom(URL)
AgentFileReceived = AbleToGetWebDocument(URL & "/robots.txt")
If HTTPSemaphore.Cancelled Then
Set DispatchNewDiplomat = Nothing
Else
Set Diplomat = New clsDiplomat
Diplomat.Init URL, txtOutput.Text
Set DispatchNewDiplomat = Diplomat
End If
End Function
Public Function AbleToGetWebDocument(URL As String) As Boolean
' Initialize Semaphore Flags.
HTTPSemaphore.Error = False
HTTPSemaphore.Timeout = False
HTTPSemaphore.Connected = False
HTTPSemaphore.ConnectionFailed = False
HTTPSemaphore.Complete = False
HTTPSemaphore.Cancelled = False
' Request HTML document retrieval.
txtOutput.Text = ""
bPerformingRequest = True
Inet1.Execute URL
' Wait until asynchronous transfer terminates:
Do
DoEvents
Debug.Print "Waiting for transfer..."
Loop Until (HTTPSemaphore.Complete Or _
HTTPSemaphore.Cancelled Or _
bSearchCancelled)
' I also monitored for a ConnectionFailed event:
' HTTPSemaphore.ConnectionFailed Or _
'
' but this seemed to fire even when the document transfer
' was able to continue and complete successfully,
' which caused the program to make the next request
' before the previous request had finished, resulting
' in an error.
' The correct way to detect activity on the Inet control
' would be to watch the StillExecuting property. But, at
' this writing, this property never indicates True, which
' prevents me from using it:
'Do While Inet1.StillExecuting
' DoEvents
' Loop
AbleToGetWebDocument = HTTPSemaphore.Complete
End Function
Public Function AbleToGetWebDocumentHeaders(URL As String) As Boolean
' Initialize Semaphore Flags.
HTTPSemaphore.Error = False
HTTPSemaphore.Timeout = False
HTTPSemaphore.Connected = False
HTTPSemaphore.ConnectionFailed = False
HTTPSemaphore.Complete = False
HTTPSemaphore.Cancelled = False
' Request HTML document header retrieval.
txtOutput.Text = ""
bPerformingRequest = True
bRequestingHeaders = True
Inet1.Execute URL, "HEAD"
' Wait until asynchronous transfer terminates:
Do
DoEvents
Loop Until (HTTPSemaphore.Complete Or _
HTTPSemaphore.Cancelled Or _
bSearchCancelled)
bRequestingHeaders = False
AbleToGetWebDocumentHeaders = HTTPSemaphore.Complete
End Function
Private Sub ExtractLinksFromDocument(Document As String, _
TagParamName As String, _
CurrentWebNode As clsHTMLPageResourceNode)
Dim HostCollection As Collection
Dim ParentURL As String
Dim ServerURL As String
Dim TargetURL As String
Dim URLType As String
Dim StringPos As Long
Dim CurrentStartPos As Long
Dim WebNode As clsHTMLPageResourceNode
Dim ExistingNode As clsHTMLPageResourceNode
Document = LCase(Document)
CurrentWebNode.URL = LCase(CurrentWebNode.URL)
' Extract ParentURL from Document URL.
ParentURL = GetParentURLFrom(CurrentWebNode.URL)
' Extract ServerURL from Document URL.
ServerURL = GetServerURLFrom(CurrentWebNode.URL)
CurrentStartPos = 1
Do
TargetURL = GetNextTargetURLFromDocument(Document, CurrentStartPos, TagParamName)
If TargetURL <> "" Then
On Error Resume Next
StringPos = InStr(TargetURL, ":")
If StringPos > 0 Then
URLType = Left(TargetURL, StringPos - 1)
If URLType = "http" Then
Set WebNode = New clsHTMLPageResourceNode
Set WebNode.Parent = CurrentWebNode
WebNode.URL = TargetURL
WebNode.Path = TargetURL
Set HostCollection = SelectHostCollection(WebNode.URL, CurrentWebNode)
If Not (HostCollection Is Nothing) Then
If Not WebNode.Root.AlreadyContains(WebNode.URL, ExistingNode) Then
HostCollection.Add WebNode, WebNode.URL
End If
End If
End If
ElseIf Left(TargetURL, 1) = "/" Then
Set WebNode = New clsHTMLPageResourceNode
Set WebNode.Parent = CurrentWebNode
WebNode.URL = ServerURL & TargetURL
WebNode.Path = TargetURL
Set HostCollection = SelectHostCollection(WebNode.URL, CurrentWebNode)
If Not (HostCollection Is Nothing) Then
If Not WebNode.Root.AlreadyContains(WebNode.URL, ExistingNode) Then
HostCollection.Add WebNode, WebNode.URL
End If
End If
Else
Set WebNode = New clsHTMLPageResourceNode
Set WebNode.Parent = CurrentWebNode
WebNode.URL = ParentURL & TargetURL
WebNode.Path = TargetURL
Set HostCollection = SelectHostCollection(WebNode.URL, CurrentWebNode)
If Not (HostCollection Is Nothing) Then
If Not WebNode.Root.AlreadyContains(WebNode.URL, ExistingNode) Then
HostCollection.Add WebNode, WebNode.URL
End If
End If
End If
Debug.Print "Collection Count: ", HostCollection.Count
Else
' No URL found.
End If 'TargetURL <> ""
Loop While (CurrentStartPos > 0)
End Sub
Private Function GetNextTargetURLFromDocument(Document As String, _
CurrentStartPos As Long, _
TagParamName As String) As String
Dim TempURL As String
Dim EndOfCurrentTagPos As Long
Dim CurrentEndPos As Long
TempURL = ""
CurrentStartPos = InStr(CurrentStartPos, Document, TagParamName)
If CurrentStartPos > 0 Then
EndOfCurrentTagPos = InStr(CurrentStartPos, Document, ">")
CurrentStartPos = InStr(CurrentStartPos, Document, """")
If CurrentStartPos > 0 Then
CurrentStartPos = CurrentStartPos + 1
CurrentEndPos = InStr(CurrentStartPos, Document, """")
If (CurrentEndPos > CurrentStartPos) And _
(CurrentEndPos < EndOfCurrentTagPos) Then
TempURL = Mid(Document, CurrentStartPos, _
CurrentEndPos - CurrentStartPos)
Else
CurrentStartPos = EndOfCurrentTagPos
End If
End If
End If
GetNextTargetURLFromDocument = TempURL
End Function
Public Function SelectHostCollection(URL As String, _
WebNode As clsHTMLPageResourceNode) As Collection
Dim sHeaderValue As String
If AbleToGetWebDocumentHeaders(URL) Then
sHeaderValue = Inet1.GetHeader("Content-type")
If (InStr(1, sHeaderValue, "text/", 1) > 0) Then
Set SelectHostCollection = WebNode.PageLinks
Else
Set SelectHostCollection = WebNode.PageElements
End If
Else
Set SelectHostCollection = Nothing
End If
Debug.Print URL
Debug.Print sHeaderValue
End Function
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -