⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 parser.bas

📁 从html中分析提取链接
💻 BAS
📖 第 1 页 / 共 2 页
字号:
'============================================================================
'                           VARIABLE DECLARATIONS
'============================================================================

'PLEASE SEE THE PROCEDURE TITLED "LICENSE" FOR DISCLAIMER AND OTHER
'PERTINENT INFO.


Global FileName As String   'Stores name of html doc to open (you must write
			    'the code that assigns a value to this variable.
			    'This module will use the value to open the file
			    'for parsing.

Global SrcCount As Long         'Stores number of links found by parse engine
Global SrcLabel() As String     'Stores the caption of the hotlink
Global SrcURL() As String       'Stores the URL of the hotlink

Global db As database           'Handle for HTML.mdb
Global Dt As Dynaset            'Handle for TempLinks table
Global Dp As Dynaset            'Handle for PermLinks table
Global PDt As Dynaset           'Handle for Projects table

Global DisableDatabase          'Disables use of Access database in module.
				'Set value to TRUE if you experience
				'prolems with the database and wish to
				'use procedure without database storage.


'Pgm Constant for KeyDown
Global Const KEY_DELETE = &H2E

'Data Constants for Database use
Global Const DB_LANG_GENERAL = ";LANGID=0x0809;CP=1252;COUNTRY=0"
Global Const DB_LONG = 4
Global Const DB_TEXT = 10
Global Const DB_MEMO = 12


Global ProjectTitle As String   'Stores name of Project
Global DocTitle As String       'Stores Doc Title as found within <title></title>

Function BuildLink (Txt As String, URL As String) As String

BuildLink = "<a href=" + Chr$(34) + URL + Chr$(34) + ">" + Txt + "</a>"

End Function

Sub BuildLists (Order As Integer)

'THIS ROUTINE BUILDS THE INITIAL LISTS OF DESCRIPTIONS AND URL'S.
'IT ALSO USES THE LOOP TO ADD THE RECORDS TO THE TEMPLINKS TABLE
'IN THE HTML.MDB DATABASE PROVIDING THE GLOBAL VAR DisableDatabse = False

'The case argument tells the routine whether to sort the records in the order
'they were found or in alphabetical order.  This is only the case when using
'the database capabilities and will not work with DisableDatabase set to TRUE

Dim F As Form
Dim L1 As Control
Dim L2 As Control
Dim Total As Integer, Count As Integer

Set F = Hotlist
Set L1 = F.List1
Set L2 = F.List2

L1.Clear
L2.Clear

'Does not add links to database if DisableDatabase = True
If DisableDefault = True Then Order = 2

Select Case Order

'Create list of references in default order as pulled from doc
Case 0
    Dim Ds1 As Dynaset
    Dim Ds2 As Dynaset
    Set Ds1 = db.CreateDynaset("Select Templinks.* From Templinks")
    Ds1.MoveLast
    Total = Ds1.RecordCount
    Ds1.MoveFirst
    Count = 0
    Do Until Ds1.EOF
	Count = Count + 1
	F.Caption = "Creating database " + Str(Count) + " of" + Str(Total) + " links"
	L1.AddItem Ds1("LinkText")
	L1.ItemData(L1.NewIndex) = Ds1("LinkID")
	L2.AddItem Ds1("LinkURL")
	L2.ItemData(L2.NewIndex) = Ds1("LinkID")
    Ds1.MoveNext
    Loop

'Create list of references in alphabetical order both by caption and URL
Case 1
    Dim Ds1 As Dynaset
    Dim Ds2 As Dynaset
    Set Ds1 = db.CreateDynaset("Select Templinks.* From Templinks Order by LinkText")
    Set Ds2 = db.CreateDynaset("Select Templinks.* From Templinks Order by LinkURL")
    Ds1.MoveLast
    Total = Ds1.RecordCount
    Ds1.MoveFirst
    Ds2.MoveFirst
    Count = 0
    Do Until Ds1.EOF
	Count = Count + 1
	F.Caption = "Creating database " + Str(Count) + " of" + Str(Total) + " links"
	L1.AddItem Ds1("LinkText")
	L1.ItemData(L1.NewIndex) = Ds1("LinkID")
	L2.AddItem Ds2("LinkURL")
	L2.ItemData(L2.NewIndex) = Ds2("LinkID")
    Ds1.MoveNext
    Ds2.MoveNext
    Loop

'Avoid using database, Case 2 only used if DisableDatabase = TRUE
Case 2
    Total = SrcCount
    Count = 0
    For x = 0 To SrcCount - 1
	Count = Count + 1
	F.Caption = "Creating database " + Str(Count) + " of" + Str(Total) + " links"
	L1.AddItem SrcLabel(x)
	L1.ItemData(L1.NewIndex) = x
	L2.AddItem SrcURL(x)
	L2.ItemData(L2.NewIndex) = x
    Next x

End Select

F.Caption = FileName

End Sub

Sub CheckDatabase ()

'Check to see if database already exists or is disabled - if so exit sub
If Dir$("html.mdb") <> "" Or DisableDatabase = True Then Exit Sub

'Declare database object variables
Dim NewDb As database
Dim NewTd As New TableDef
Dim F1 As New Field, F2 As New Field, F3 As New Field, F4 As New Field

'Create database file HTML.mdb
Set NewDb = CreateDatabase("html.mdb", DB_LANG_GENERAL)

'Error handler
If db Is Nothing Then MsgBox "Could not create database": Exit Sub
If Err <> 0 Then MsgBox "Error during open:" & Err & "/" & Error$(Err)
On Error GoTo 0

'Define new table name
NewTd.Name = "TempLinks"

'Define new table fields
F1.Name = "LinkID"
F1.Type = DB_LONG
NewTd.Fields.Append F1

F2.Name = "LinkText"
F2.Type = DB_MEMO
NewTd.Fields.Append F2

F3.Name = "LinkURL"
F3.Type = DB_MEMO
NewTd.Fields.Append F3

F4.Name = "URLType"
F4.Type = DB_TEXT
F4.Size = 10
NewTd.Fields.Append F4

'Create field def's in table def
NewDb.TableDefs.Append NewTd

'Msg user that database has been created
MsgBox "html.mdb created"

'Close the new database
NewDb.Close

End Sub

Function CleanUpCaption (TempCap As String) As String

Dim Clean1 As Integer, Clean2 As Integer, Temp1 As String, Temp2 As String
Dim Linktrim As Integer

'FIND STYLE/IMG TAG DELIMITERS
'-----------------------------
Clean1 = InStr(TempCap, "<")
Clean2 = InStr(TempCap, ">")

'COPY TEXT NOT IN STYLE/IMG TAG
'------------------------------
If Clean1 <> 0 Then 'Or Clean1 <> "" Then
    Temp1 = Left$(TempCap, Clean1 - 1)
End If

If Clean2 <> 0 Then 'Or Clean2 <> "" Then
    Temp2 = Trim$(Right$(TempCap, Len(TempCap) - Clean2))
End If

'COPY REMAINING TEXT BACK TO CAPTION VARIABLE
'--------------------------------------------
If Clean1 > 0 And Clean2 > 0 Then
    TempCap = Temp1 + Temp2
End If

'EXCHANGE CR AND LF FOR SPACES
'-----------------------------
For Linktrim = 1 To Len(TempCap)
    If Asc(Mid$(TempCap, Linktrim, 1)) = 10 Or Asc(Mid$(TempCap, Linktrim, 1)) = 13 Then
	Mid$(TempCap, Linktrim, 1) = " "
    End If
Next Linktrim

'RETURN CLEANED UP CAPTION TO PARSE ROUTINE
'------------------------------------------
CleanUpCaption = TempCap

End Function

Sub LICENSE ()
'===========================================================================
'                         HTML HOTLINK PARSE ROUTINE
'===========================================================================
'DISCLAIMER
'----------
'
'This module is made freely available to the public and may be reproduced,
'in whole or in part, as well as redistributed without royalty.  Use of the
'routines in this module are the sole responsibility of you, the user.  I
'make no warranty, written or implied, regarding any portion of this module.
'It has run on my US3486/33 board with 20 megs RAM without incident.  Before
'using this module with any critical data please read through all the
'functions and procedures and make sure you understand what they're doing.
'There are always potential problems when disk I/O is involved.
'
'This module utilizes form, componant, and database object variables.  Use of
'these are only available in the PRO edition of VB.  Please contact Microsoft
'or your local software vendor for more information on updating to VB-Pro.
'
'This routine will create a Microsoft Access Database the first time you use
'it.  I am currently using Microsoft Access 2.0 with the compatability layer
'installed.  If you experience problems with the database portion of the
'module I recommend you:
'
    'a) install the compatability layer on your system.
    '   This can be found using FTP to "microsoft.com".
'
    'b) disable the use of the database by changing the
    '   "DisableDatabase" global variable from False to
    '   True.

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -