📄 parser.bas
字号:
'
'If DisableDatabase is set to true then you will need to comment out the
'following Global Declarations from the General Declarations section of
'this module:
'
'Global db As database 'Handle for HTML.mdb
'Global Dt As Dynaset 'Handle for TempLinks table
'Global Dp As Dynaset 'Handle for PermLinks table
'Global PDt As Dynaset 'Handle for Projects table
'
'This module makes judicious use of form and componant object variables so
'using it should be a simple matter of changing the form and control names
'in the appropriate declarations.
'
'
End Sub
Sub ParseLinks (FileName As String)
Dim FileNum As Integer 'Number of next available free filenumber
Dim LinkText As String 'Contents of HTML document opened
Dim TempVar As String 'Temporary container for document labels
Dim LinkStart As Long 'Holds starting position of element to be parsed
Dim LinkEnd As Long 'Holds ending position of element to be parsed
Dim FileSize As Long 'Used for status line (percentage completed)
Dim Lycos As Integer 'Flags true if user answers yes on lycos search
Dim NotLycos As Integer 'Flags for second try on lycos query
'----------------------------------------------------------------------------
'Check for existance of "html.mdb". If it does not exist routine will create it.
If DisableDatabase = False Then CheckDatabase
'If using database
If DisableDatabase = False Then
Set Dt = db.CreateDynaset("TempLinks") 'Global Var declared in General Declarations.
db.Execute "Delete From Templinks" 'Delete any pre-existing records in temp database
End If
'============================================================================
' Open HTML file for parsing
'============================================================================
'FIND FIRST AVAILABLE FILE NUMBER
'--------------------------------
FileNum = FreeFile
'OPEN FILE WITH FILE NUMBER
'--------------------------
Open FileName For Input As FileNum 'ATT: You must write code to assign value to Filename
'ASSIGN FILE CONTENTS TO VARIABLE
'--------------------------------
LinkText = Input$(LOF(FileNum), FileNum)
FileSize = LOF(FileNum)
'============================================================================
' Get document Title
'============================================================================
LinkStart = InStr(UCase(LinkText), "<TITLE>")
LinkStart = LinkStart + Len("<TITLE>")
LinkEnd = InStr(UCase(LinkText), "</TITLE>")
DocTitle = Mid$(LinkText, LinkStart, LinkEnd - LinkStart)
'============================================================================
' Begin Parsing Document
'============================================================================
'PROGRAM LABEL
'-------------
ParseLinksStart:
URLRecognized = False
'ERROR CONTROL
'-------------
On Error Resume Next
'FIND NEXT HOTLINK REFERENCE
'---------------------------
LinkStart = InStr(1, UCase(LinkText), "HREF")
'*** CHECK FOR NO MORE HOTLINKS LEFT AND IF TRUE THEN ESCAPE FROM ROUTINE ***
'----------------------------------------------------------------------------
If LinkStart = 0 Then 'Or LinkStart = "" Then
Exit Sub
End If
'INFORM USER OF PERCENTAGE COMPLETED
'-----------------------------------
Hotlist.Caption = "Scanning File: " + Format((Len(LinkText) / FileSize), "0%")
'TRUNCATE TEXT TO BEGINING OF HREF
'---------------------------------
LinkText = Mid$(LinkText, LinkStart, Len(LinkText) - LinkStart)
'FIND POSITION OF FIRST URL DELIMTER
'------------------------------------
LinkStart = InStr(1, LinkText, Chr$(34))
'TRUNCATE TEXT UP TO AND INCLUDING FIRST URL DELIMITER
'-----------------------------------------------------
LinkText = Mid$(LinkText, LinkStart + 1, Len(LinkText) - LinkStart)
'FIND POSITION OF SECOND URL DELIMITER
'-------------------------------------
LinkEnd = InStr(1, LinkText, Chr$(34))
'EXTRACT URL AND ASSIGN TO LOCAL VAR
'-----------------------------------
LinkURL = Mid$(LinkText, 1, LinkEnd - 1)
'TRUNCATE TEXT UP TO AND INCLUDING SECOND URL DELIMITER
'------------------------------------------------------
LinkText = Mid$(LinkText, LinkEnd + 1, Len(LinkText) - LinkEnd)
'FIND CAPTION DELIMITER
'----------------------
LinkStart = InStr(1, LinkText, ">")
'TRUNCATE TEXT UP TO AND INCLUDING FIRST CAPTION DELIMITER
'---------------------------------------------------------
LinkText = Mid$(LinkText, LinkStart + 1, Len(LinkText) - LinkStart)
'FIND SECOND CAPTION DELIMITER
'-----------------------------
LinkEnd = InStr(1, LinkText, "</")
'EXTRACT CAPTION AND ASSIGN TO LOCAL VAR
'---------------------------------------
LinkCaption = Mid$(LinkText, 1, LinkEnd - 1)
'CLEAN UP VARIOUS FORMATTING ITEMS EMBEDDED IN LINK CAPTION
'AND ASSIGN RESULT TO BACK TO LINK CAPTION VARIABLE
'----------------------------------------------------------
TempVar = LinkCaption
LinkCaption = CleanUpCaption(TempVar)
LinkCaption = TempVar
'REMOVE EXCESS WHITE SPACE FROM LOCAL VARS
'-----------------------------------------
LinkCaption = Trim$(LinkCaption)
LinkURL = Trim$(LinkURL)
'IF NOT LINK CAPTION ASSIGN DEFAULT CAPTION
'------------------------------------------
If LinkCaption = "" Then LinkCaption = LinkURL
'============================================================================
' Check for Lycos Search
'============================================================================
'NotLycos = TRUE IF USER RESPONDS TO NEXT LINE THAT HTML DOC IS A LYCOS
'SEARCH. IF IT IS A LYCOS SEARCH THIS LINE CAUSES THE EXECUTION TO SKIP
'THE OTHER TWO CODE SECTIONS.
'-----------------------------------------------------------------------
If NotLycos = True Then GoTo ParseLinksNextSegment
'ASK USER IF HTML DOC IS RESULTS OF LYCOS SEARCH
'IF TRUE THEN NotLycos IS SET TO TRUE
'-----------------------------------------------
If Lycos = False And InStr(UCase(LinkURL), "LYCOS") <> 0 Then
If MsgBox("There are indications that the document you submited was the result of a Lycos Search. Is this correct?", 4, "Confirm Document Origin") = 6 Then
Lycos = True
Else
NotLycos = True
End If
End If
'IF LYCOS IS FOUND IN SUBSEQUENTLY PARSED URL'S THEN THE PARSE
'ROUTINE REPEATS ITSELF WITH THE NEXT LINK, FAILING TO ADD THE
'LYCOS LINK TO THE ARRAY
'-------------------------------------------------------------
If Lycos = True And InStr(UCase(LinkURL), "LYCOS") <> 0 Then
GoTo ParseLinksStart
End If
'============================================================================
' Build Hotlink Variable Arrays
'============================================================================
ParseLinksNextSegment:
'ADD DESCRIPTION TO PROPER LIST
'------------------------------
ReDim Preserve SrcLabel(SrcCount)
ReDim Preserve SrcURL(SrcCount)
SrcLabel(SrcCount) = LinkCaption
SrcURL(SrcCount) = LinkURL
'IF DATABASE IS ENABLED THEN
'ADD RECORD TO TEMP TABLE OF DATABASE
'------------------------------------
If DisableDatabase = False Then
Dt.AddNew
Dt("LinkID") = SrcCount
Dt("LinkText") = SrcLabel(SrcCount)
Dt("LinkURL") = SrcURL(SrcCount)
Dt("URLType") = URLType(SrcURL(SrcCount)) '<-- This is a User Function
Dt.Update
End If
'INCREMENT SrcCount VARIABLE
'---------------------------
SrcCount = SrcCount + 1
'RETURN TO BEGINING OF ROUTINE
'-----------------------------
GoTo ParseLinksStart
End Sub
Function URLType (URL As String) As String
Select Case UCase(Left$(URL, 7))
Case "HTTP://"
URLType = "WWW"
Exit Function
Case "TELNET:"
URLType = "TELNET"
Exit Function
Case "GOPHER:"
URLType = "GOPHER"
Exit Function
Case "MAILTO:"
URLType = "MAILTO"
Exit Function
Case "FILE://"
URLType = "FILE"
Exit Function
End Select
If UCase(Left$(URL, 6)) = "FTP://" Then
URLType = "FTP"
Exit Function
End If
If UCase(Left$(URL, 5)) = "NEWS:" Then
URLType = "NEWS"
Exit Function
End If
URLType = "FILE"
End Function
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -