📄 collecting.asp
字号:
SourceFootSetting = RsSiteObj("SourceFootSetting")
AddDateHeadSetting = RsSiteObj("AddDateHeadSetting")
AddDateFootSetting = RsSiteObj("AddDateFootSetting")
SysClassID = RsSiteObj("SysClass")
SysTemplet = RsSiteObj("SysTemplet")
TextTF = RsSiteObj("TextTF")
SaveRemotePic = RsSiteObj("SaveRemotePic")
CollectObjURL = RsSiteObj("objURL")
SaveIMGPath = RsSiteObj("SaveIMGPath")
Dim TempSaveIMGPath
TempSaveIMGPath = SaveIMGPath
SaveIMGPath =SaveIMGPath &"/"& Year(Date) &"-"& Month(Date) &"/"& Day(Date)
CreateDateDir(Server.mappath(DummyDir & TempSaveIMGPath))
IsStyle = RsSiteObj("IsStyle")
IsDiv = RsSiteObj("IsDiv")
IsA = RsSiteObj("IsA")
IsClass = RsSiteObj("IsClass")
IsFont = RsSiteObj("IsFont")
IsSpan = RsSiteObj("IsSpan")
IsObjectTF = RsSiteObj("IsObject")
IsIFrame = RsSiteObj("IsIFrame")
IsScript = RsSiteObj("IsScript")
IndexRule = RsSiteObj("IndexRule")
StartPageNum = RsSiteObj("StartPageNum")
EndPageNum = RsSiteObj("EndPageNum")
HandPageContent = RsSiteObj("HandPageContent")
OtherType = RsSiteObj("OtherType")
HandSetAuthor = RsSiteObj("HandSetAuthor")
HandSetSource = RsSiteObj("HandSetSource")
HandSetAddDate = RsSiteObj("HandSetAddDate")
ObjURL = GetOtherURL(CollectPageNumber,RsSiteObj)
IsReverse=RsSiteObj("IsReverse")
if ObjURL = "" then
CollectPageNumber = 0
CollectStartLocation = 0
CollectedPageURL = ""
CollectSiteIndex = CollectSiteIndex + 1
Set RsSiteObj = Nothing
GetCollectPara
Exit Function
else
if CollectPageNumber > NewsListPagesNumber then
CollectPageNumber = 0
CollectStartLocation = 0
CollectedPageURL = ""
CollectSiteIndex = CollectSiteIndex + 1
Set RsSiteObj = Nothing
GetCollectPara
Exit Function
end if
end if
end if
Set RsSiteObj = Nothing
End Function
Function GetOtherURL(PageNum,Obj) '取得其他新闻列表的URL
Dim OtherObjURL,OtherResponseAllStr,OtherNewsListArray,i
if PageNum = 0 then
GetOtherURL = CollectObjURL
CollectedPageURL = ""
else
Select Case OtherType
Case 0 '不分页
GetOtherURL = ""
Case 1 '标记分页
if IsNull(OtherPageHeadSetting) OR IsNull(OtherPageFootSetting) OR (OtherPageFootSetting = "") OR (OtherPageHeadSetting = "") then
GetOtherURL = ""
else
if PageNum = 1 then
CollectedPageURL = CollectObjURL
end if
OtherResponseAllStr = GetPageContent(FormatUrl(CollectedPageURL,CollectObjURL))
OtherObjURL = GetOtherContent(OtherResponseAllStr,OtherPageHeadSetting,OtherPageFootSetting)
if OtherObjURL <> "" then
OtherObjURL = FormatUrl(OtherObjURL,CollectObjURL)
else
OtherObjURL = ""
end if
GetOtherURL = OtherObjURL
end if
Case 2 '索引分页
if IsNull(IndexRule) OR (IndexRule = "") OR IsNull(StartPageNum) OR (StartPageNum = "") OR IsNull(EndPageNum) OR (EndPageNum = "") then
GetOtherURL = ""
else
if Not IsNumeric(StartPageNum) OR Not IsNumeric(EndPageNum) then
GetOtherURL = ""
else
if CInt(StartPageNum) < CInt(EndPageNum) Then '按从小到大的页数
if PageNum >= CInt(EndPageNum) then
GetOtherURL = ""
else
if PageNum = 1 then
IndexRule = Replace(FormatUrl(IndexRule,CollectObjURL),"^$^",StartPageNum)
else
StartPageNum = CInt(StartPageNum) + PageNum - 1
IndexRule = Replace(FormatUrl(IndexRule,CollectObjURL),"^$^",StartPageNum)
end if
GetOtherURL = IndexRule
end if
Else '按从大到小的页数,从而实现倒序采集,比如从10到1
if PageNum >= CInt(StartPageNum) then
GetOtherURL = ""
else
if PageNum = 1 then
IndexRule = Replace(FormatUrl(IndexRule,CollectObjURL),"^$^",StartPageNum)
else
EndPageNum = CInt(StartPageNum) - PageNum + 1
IndexRule = Replace(FormatUrl(IndexRule,CollectObjURL),"^$^",EndPageNum)
end if
GetOtherURL = IndexRule
end if
end if
end if
end if
Case 3 '手工分页
if IsNull(HandPageContent) OR (HandPageContent = "") then
GetOtherURL = ""
ElseIf InStr(HandPageContent,Chr(10))=0 And PageNum<2 Then
GetOtherURL = HandPageContent
Else
HandPageContent = Split(HandPageContent,Chr(10))
if PageNum > UBound(HandPageContent) then
GetOtherURL = ""
else
if HandPageContent(PageNum - 1) <> "" then
GetOtherURL = HandPageContent(PageNum - 1)
else
GetOtherURL = ""
end if
end if
end if
Case Else
GetOtherURL = ""
End Select
end if
End Function
Function GetNewsPageContent()
Dim NewsPageStr,TitleStr,ContentStr,AuthorStr,SourceStr,AddDate,i
Dim ResponseAllStr,NewsListStr,NewsLinkStr,RsCheckNewsObj
Dim NewsListStrArray,TempArray
ResponseAllStr = GetPageContent(FormatUrl(ObjURL,CollectObjURL))
if ResponseAllStr = False then
CollectPageNumber = CollectPageNumber + 1
ReturnValue = ReturnValue & "<br> <strong>错误</strong>:读取新闻列表页面失败<br>"
Exit Function
end if
Dim BLinkHeadSetting,BLinkFootSetting
BLinkHeadSetting = False
BLinkFootSetting = False
If Instr(LinkHeadSetting,"[变量]")<=0 Then
BLinkHeadSetting = True
ElseIf Instr(LinkFootSetting,"[变量]")<=0 Then
BLinkFootSetting = True
End If
If InStr(ResponseAllStr,ListHeadSetting)>0 And InStr(ResponseAllStr,ListFootSetting) Then
NewsListStr = GetOtherContent(ResponseAllStr,ListHeadSetting,ListFootSetting)
Else
NewsListStr = ResponseAllStr
End If
If BLinkHeadSetting Then
NewsListStr = Mid(NewsListStr,Instr(NewsListStr,LinkHeadSetting)+len(LinkHeadSetting))
NewsListStrArray = Split(NewsListStr,LinkHeadSetting)
elseif BLinkFootSetting Then
NewsListStr = Left(NewsListStr,InstrRev(NewsListStr,LinkFootSetting))
NewsListStrArray = Split(NewsListStr,LinkFootSetting)
End If
'倒序采集
If IsReverse="1" then
Dim TempArr,j
TempArr=NewsListStrArray
For j =0 to UBound(NewsListStrArray)
NewsListStrArray(j)=TempArr(UBound(NewsListStrArray)-j)
Next
If Num>0 and Num<=UBound(NewsListStrArray)Then
TempArr=NewsListStrArray
For j =0 to Num-1 'UBound(NewsListStrArray)
NewsListStrArray(j)=TempArr(UBound(NewsListStrArray)-Num+j+1)
Next
End If
End If
For i = CollectStartLocation to CollectStartLocation + CollectMaxOfOnePage - 1
if i > UBound(NewsListStrArray) Or (i >= Num And Num<>0) then
CollectPageNumber = CollectPageNumber + 1
CollectStartLocation = 0
CollectedPageURL = ObjURL
Exit Function
end If
AllNewsNumber = AllNewsNumber + 1
if NewsListStrArray(i) <> "" then
If BLinkHeadSetting=True Then
TempArray = GetOtherContent(LinkHeadSetting&NewsListStrArray(i),LinkHeadSetting,LinkFootSetting)
ElseIf BLinkFootSetting=True Then
TempArray = GetOtherContent(NewsListStrArray(i)&LinkFootSetting,LinkHeadSetting,LinkFootSetting)
End If
if TempArray <> "" Then
NewsLinkStr = LoseHtml(FormatUrl(TempArray,CollectObjURL))
NewsPageStr = GetPageContent(NewsLinkStr)
if NewsPageStr <> False then
TitleStr = LoseHtml(GetOtherContent(NewsPageStr,PageTitleHeadSetting,PageTitleFootSetting))
Set RsCheckNewsObj = CollectConn.Execute("Select * from FS_News where Links='" & NewsLinkStr & "'")
if Not RsCheckNewsObj.Eof then
ReturnValue = GetOneNewsReturnValue(1,i + 1,TitleStr,"",NewsLinkStr) & ReturnValue
else
ContentStr = GetOneNewsContent(NewsPageStr,NewsLinkStr)
ContentStr = ReplaceContentStr(ContentStr)
ContentStr = ReplaceIMGRemoteUrl(ContentStr,SaveIMGPath,AvailableDoMain,DummyDir,NewsLinkStr,SaveRemotePic)
if TitleStr = "" then
ReturnValue = GetOneNewsReturnValue(2,i + 1,"","",NewsLinkStr) & ReturnValue
elseif ContentStr = "" then
ReturnValue = GetOneNewsReturnValue(3,i + 1,TitleStr,"",NewsLinkStr) & ReturnValue
else
ReturnValue = GetOneNewsReturnValue(4,i + 1,TitleStr,ContentStr,NewsLinkStr) & ReturnValue
if IsNull(HandSetAuthor) OR (HandSetAuthor = "") then
AuthorStr = LoseHtml(GetOtherContent(NewsPageStr,AuthorHeadSetting,AuthorFootSetting))
else
AuthorStr = HandSetAuthor
end if
if IsNull(HandSetSource) OR (HandSetSource = "") then
SourceStr = LoseHtml(GetOtherContent(NewsPageStr,SourceHeadSetting,SourceFootSetting))
else
SourceStr = HandSetSource
end if
if IsNull(HandSetAddDate) OR Not IsDate(HandSetSource) then
AddDate = LoseHtml(GetOtherContent(NewsPageStr,AddDateHeadSetting,AddDateFootSetting))
else
AddDate = HandSetAddDate
end if
if AddDate <> "" then
if Not IsDate(AddDate) then AddDate = Now
else
AddDate = Now
end if
SaveCollectContent TitleStr,NewsLinkStr,ContentStr,SysClassID,AuthorStr,SourceStr,AddDate
end if
end if
Set RsCheckNewsObj = Nothing
else
ReturnValue = GetOneNewsReturnValue(5,i + 1,"","",NewsLinkStr) & ReturnValue
end if
else
ReturnValue = GetOneNewsReturnValue(5,i + 1,"","",NewsLinkStr) & ReturnValue
end if
else
ReturnValue = GetOneNewsReturnValue(5,i + 1,"","",NewsLinkStr) & ReturnValue
end if
Next
CollectStartLocation = i
End Function
Function ResumeGetNewsPageContent()
dim ResumeSql,RsResumeNewsObj,ResumeNewsURL,ResumeNewsURL1,ResumeNewsLocation
ResumeSql = "Select top 1 Links from FS_News where SiteID='" & CollectingSiteID &"' order by ID DESC"
Set RsResumeNewsObj = CollectConn.Execute(ResumeSql)
If RsResumeNewsObj.EOF Then
set RsResumeNewsObj = nothing
response.Write("<script>alert(""无法确定您以前的采集记录,\n续采失败!"");history.go(-2);</script>")
else
ResumeNewsURL = RsResumeNewsObj("Links")
set RsResumeNewsObj = nothing
End If
Dim NewsPageStr,TitleStr,ContentStr,AuthorStr,SourceStr,AddDate,i,n
Dim ResponseAllStr,NewsListStr,NewsLinkStr,RsCheckNewsObj
Dim NewsListStrArray,TempArray
ResponseAllStr = GetPageContent(FormatUrl(ObjURL,CollectObjURL))
if ResponseAllStr = False then
CollectPageNumber = CollectPageNumber + 1
ReturnValue = ReturnValue & "<br> <strong>错误</strong>:读取新闻列表页面失败<br>"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -