⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 htmlpageresources.old

📁 vb写的网络蜘蛛程序
💻 OLD
字号:
VERSION 1.0 CLASS
BEGIN
  MultiUse = -1  'True
END
Attribute VB_Name = "HTMLPageResources"
Attribute VB_Creatable = False
Attribute VB_Exposed = False
Option Explicit

Public URL As String
Public Path As String
Public PageLinks As New Collection
Public PageElements As New Collection

Private Function GetParentURL(URL As String)
    Dim StringPos As Long
    
    If URLEndsWithFilename(URL) Then
        StringPos = Len(URL)
        Do Until (StringPos = 1) Or (Mid(URL, StringPos, 1) = "/")
            StringPos = StringPos - 1
          Loop
        GetParentURL = Left(URL, StringPos)
      Else
        GetParentURL = URL
      End If

End Function



Private Sub Class_Terminate()
    Dim Counter As Integer
    ' As a precaution, remove all items
    ' from the two collections.
    For Counter = 1 To PageLinks.Count
        PageLinks.Remove 1
    Next Counter
    For Counter = 1 To PageElements.Count
        PageElements.Remove 1
    Next Counter
End Sub



Private Sub ExtractLinksFromDocument(Document As String, _
                                    DocumentURL As String, _
                                    TagParamName As String, _
                                    HostCollection As Collection)
    Dim ParentURL As String
    Dim ServerURL As String
    Dim TargetURL As String
    Dim URLType As String
    Dim StringPos As Long
    Dim CurrentStartPos As Long
    Dim CurrentEndPos As Long
    Dim WebNode As HTMLPageResourceNodes

    Document = LCase(Document)
    DocumentURL = LCase(DocumentURL)
    ' Extract ParentURL from DocumentURL.
    ParentURL = GetParentURL(DocumentURL)
    ' Extract ServerURL from DocumentURL.
    ServerURL = GetServerURL(DocumentURL)
    
    CurrentStartPos = 1
    CurrentEndPos = 0
    Do
        TargetURL = GetNextTargetURLFromDocument(Document, CurrentStartPos, CurrentEndPos, TagParamName)
        If TargetURL <> "" Then
            On Error Resume Next
            StringPos = InStr(TargetURL, ":")
            If StringPos > 0 Then
                URLType = Left(TargetURL, StringPos - 1)
                If URLType = "http" Then
                    Set WebNode = New HTMLPageResourceNodes
                    WebNode.URL = TargetURL
                    WebNode.Path = TargetURL
                    HostCollection.Add WebNode, TargetURL
                    'HostCollection.Add TargetURL, TargetURL
                  End If
              Else
                If Left(TargetURL, 1) = "/" Then
                    Set WebNode = New HTMLPageResourceNodes
                    WebNode.URL = ServerURL & TargetURL
                    WebNode.Path = TargetURL
                    HostCollection.Add WebNode, TargetURL
                    'HostCollection.Add TargetURL, ServerURL & TargetURL
                  Else
                    Set WebNode = New HTMLPageResourceNodes
                    WebNode.URL = ParentURL & TargetURL
                    WebNode.Path = TargetURL
                    HostCollection.Add WebNode, TargetURL
                    'HostCollection.Add TargetURL, ParentURL & TargetURL
                  End If
              End If
          End If
        Debug.Print "Collection Count: ", HostCollection.Count
        Loop Until (TargetURL = "") Or _
            (Not ((CurrentStartPos > 1) And (CurrentEndPos > 0)))
        For Each WebNode In HostCollection
            Debug.Print WebNode.URL
            Next WebNode
    End Sub

Private Function GetServerURL(URL As String)
    Dim StringPos As Long
    
    StringPos = InStr(1, URL, "//")
    If StringPos > 0 Then
        StringPos = InStr(StringPos + 2, URL, "/")
        If StringPos > 0 Then
            GetServerURL = Left(URL, StringPos - 1)
          Else
            GetServerURL = URL
          End If
      End If
End Function
      
            
            
  

Private Function GetNextTargetURLFromDocument(Document As String, _
                                             CurrentStartPos As Long, _
                                             CurrentEndPos As Long, _
                                             TagParamName As String) As String
    Dim TempURL As String
    
    CurrentStartPos = InStr(CurrentStartPos, Document, TagParamName)
    If CurrentStartPos > 0 Then
        CurrentStartPos = InStr(CurrentStartPos, Document, """")
        If CurrentStartPos > 0 Then
            CurrentStartPos = CurrentStartPos + 1
            CurrentEndPos = InStr(CurrentStartPos, Document, """")
            If CurrentEndPos > CurrentStartPos Then
                CurrentEndPos = CurrentEndPos - 1
                TempURL = Mid(Document, CurrentStartPos, _
                              CurrentEndPos - CurrentStartPos + 1)
                If Right(TempURL, 1) <> "/" Then
                    If Not URLEndsWithFilename(TempURL) Then
                        TempURL = TempURL & "/"
                      End If
                  End If
                GetNextTargetURLFromDocument = TempURL
              End If
          End If
      End If
            
End Function

Private Function URLEndsWithFilename(URL As String) As Boolean
    URLEndsWithFilename = _
        ((Left(Right(URL, 4), 1) = ".") Or _
         (Left(Right(URL, 5), 1) = "."))
End Function


Public Sub AddLinksFromDocument(Document As String, DocumentURL As String)
    ExtractLinksFromDocument Document, DocumentURL, "href", PageLinks
    ExtractLinksFromDocument Document, DocumentURL, "src", PageElements
    End Sub

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -