⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 collecting.asp

📁 asp源码 图片ASP整站
💻 ASP
📖 第 1 页 / 共 3 页
字号:
		SourceFootSetting = RsSiteObj("SourceFootSetting")
		AddDateHeadSetting = RsSiteObj("AddDateHeadSetting")
		AddDateFootSetting = RsSiteObj("AddDateFootSetting")
		SysClassID = RsSiteObj("SysClass")
		SysTemplet = RsSiteObj("SysTemplet")
		TextTF = RsSiteObj("TextTF")
		SaveRemotePic = RsSiteObj("SaveRemotePic")
		CollectObjURL = RsSiteObj("objURL")
		SaveIMGPath = RsSiteObj("SaveIMGPath")
		Dim TempSaveIMGPath
		TempSaveIMGPath = SaveIMGPath
		SaveIMGPath =SaveIMGPath &"/"& Year(Date) &"-"& Month(Date) &"/"& Day(Date)
		CreateDateDir(Server.mappath(DummyDir & TempSaveIMGPath))
		IsStyle = RsSiteObj("IsStyle")
		IsDiv = RsSiteObj("IsDiv")
		IsA = RsSiteObj("IsA")
		IsClass = RsSiteObj("IsClass")
		IsFont = RsSiteObj("IsFont")
		IsSpan = RsSiteObj("IsSpan")
		IsObjectTF = RsSiteObj("IsObject")
		IsIFrame = RsSiteObj("IsIFrame")
		IsScript = RsSiteObj("IsScript")
		IndexRule = RsSiteObj("IndexRule")
		StartPageNum = RsSiteObj("StartPageNum")
		EndPageNum = RsSiteObj("EndPageNum")
		HandPageContent = RsSiteObj("HandPageContent")
		OtherType = RsSiteObj("OtherType")
		HandSetAuthor = RsSiteObj("HandSetAuthor")
		HandSetSource = RsSiteObj("HandSetSource")
		HandSetAddDate = RsSiteObj("HandSetAddDate")
		ObjURL = GetOtherURL(CollectPageNumber,RsSiteObj)
		IsReverse=RsSiteObj("IsReverse")
		if ObjURL = "" then
			CollectPageNumber = 0
			CollectStartLocation = 0
			CollectedPageURL = ""
			CollectSiteIndex = CollectSiteIndex + 1
			Set RsSiteObj = Nothing
			GetCollectPara
			Exit Function
		else
			if CollectPageNumber > NewsListPagesNumber then
				CollectPageNumber = 0
				CollectStartLocation = 0
				CollectedPageURL = ""
				CollectSiteIndex = CollectSiteIndex + 1
				Set RsSiteObj = Nothing
				GetCollectPara
				Exit Function
			end if
		end if
	end if
	Set RsSiteObj = Nothing
End Function

Function GetOtherURL(PageNum,Obj) '取得其他新闻列表的URL
	Dim OtherObjURL,OtherResponseAllStr,OtherNewsListArray,i
	if PageNum = 0 then
		GetOtherURL = CollectObjURL
		CollectedPageURL = ""
	else
		Select Case OtherType
			Case 0 '不分页
				GetOtherURL = ""
			Case 1 '标记分页
				if IsNull(OtherPageHeadSetting) OR IsNull(OtherPageFootSetting) OR (OtherPageFootSetting = "") OR (OtherPageHeadSetting = "") then
					GetOtherURL = ""
				else
					if PageNum = 1 then
						CollectedPageURL = CollectObjURL
					end if
					OtherResponseAllStr = GetPageContent(FormatUrl(CollectedPageURL,CollectObjURL))
					OtherObjURL = GetOtherContent(OtherResponseAllStr,OtherPageHeadSetting,OtherPageFootSetting)
					if OtherObjURL <> "" then
						OtherObjURL = FormatUrl(OtherObjURL,CollectObjURL)
					else
						OtherObjURL = ""
					end if
					GetOtherURL = OtherObjURL
				end if
			Case 2 '索引分页
				if IsNull(IndexRule) OR (IndexRule = "") OR IsNull(StartPageNum) OR (StartPageNum = "") OR IsNull(EndPageNum) OR (EndPageNum = "") then
					GetOtherURL = ""
				else
					if Not IsNumeric(StartPageNum) OR Not IsNumeric(EndPageNum) then
						GetOtherURL = ""
					else
						if CInt(StartPageNum) < CInt(EndPageNum) Then '按从小到大的页数
							if PageNum >= CInt(EndPageNum) then
								GetOtherURL = ""
							else
								if PageNum = 1 then
									IndexRule = Replace(FormatUrl(IndexRule,CollectObjURL),"^$^",StartPageNum)
								else
									StartPageNum = CInt(StartPageNum) + PageNum - 1
									IndexRule = Replace(FormatUrl(IndexRule,CollectObjURL),"^$^",StartPageNum)
								end if
								GetOtherURL = IndexRule
							end if
						Else  '按从大到小的页数,从而实现倒序采集,比如从10到1
							if PageNum >= CInt(StartPageNum) then
								GetOtherURL = ""
							else
								if PageNum = 1 then
									IndexRule = Replace(FormatUrl(IndexRule,CollectObjURL),"^$^",StartPageNum)
								else
									EndPageNum = CInt(StartPageNum) - PageNum + 1
									IndexRule = Replace(FormatUrl(IndexRule,CollectObjURL),"^$^",EndPageNum)
								end if
								GetOtherURL = IndexRule
							end if
						end if
					end if
				end if
			Case 3 '手工分页
				if IsNull(HandPageContent) OR (HandPageContent = "") then
					GetOtherURL = ""
				ElseIf InStr(HandPageContent,Chr(10))=0 And PageNum<2 Then
					GetOtherURL = HandPageContent
				Else
					HandPageContent = Split(HandPageContent,Chr(10))
					if PageNum > UBound(HandPageContent) then
						GetOtherURL = ""
					else
						if HandPageContent(PageNum - 1) <> "" then
							GetOtherURL = HandPageContent(PageNum - 1)
						else
							GetOtherURL = ""
						end if
					end if
				end if
			Case Else
				GetOtherURL = ""
		End Select
	end if
End Function

Function GetNewsPageContent()
	Dim NewsPageStr,TitleStr,ContentStr,AuthorStr,SourceStr,AddDate,i
	Dim ResponseAllStr,NewsListStr,NewsLinkStr,RsCheckNewsObj
	Dim NewsListStrArray,TempArray
	ResponseAllStr = GetPageContent(FormatUrl(ObjURL,CollectObjURL))	
	if ResponseAllStr = False then
		CollectPageNumber = CollectPageNumber + 1
		ReturnValue = ReturnValue & "<br>&nbsp;&nbsp;&nbsp;&nbsp;<strong>错误</strong>:读取新闻列表页面失败<br>"
		Exit Function
	end if

	Dim BLinkHeadSetting,BLinkFootSetting
	BLinkHeadSetting = False
	BLinkFootSetting = False
	If Instr(LinkHeadSetting,"[变量]")<=0 Then
		BLinkHeadSetting = True
	ElseIf Instr(LinkFootSetting,"[变量]")<=0 Then
		BLinkFootSetting = True
	End If
	If InStr(ResponseAllStr,ListHeadSetting)>0 And InStr(ResponseAllStr,ListFootSetting) Then
		NewsListStr = GetOtherContent(ResponseAllStr,ListHeadSetting,ListFootSetting)
	Else 
		NewsListStr = ResponseAllStr
	End If

	If BLinkHeadSetting Then
		NewsListStr = Mid(NewsListStr,Instr(NewsListStr,LinkHeadSetting)+len(LinkHeadSetting))
		NewsListStrArray = Split(NewsListStr,LinkHeadSetting)
	elseif BLinkFootSetting Then 
		NewsListStr = Left(NewsListStr,InstrRev(NewsListStr,LinkFootSetting))
		NewsListStrArray = Split(NewsListStr,LinkFootSetting)
	End If

	'倒序采集
	If IsReverse="1" then 
		Dim TempArr,j
		TempArr=NewsListStrArray
		For j =0 to UBound(NewsListStrArray)
			NewsListStrArray(j)=TempArr(UBound(NewsListStrArray)-j)
		Next 
		If Num>0 and Num<=UBound(NewsListStrArray)Then
			TempArr=NewsListStrArray
			For j =0 to Num-1 'UBound(NewsListStrArray)
				NewsListStrArray(j)=TempArr(UBound(NewsListStrArray)-Num+j+1)
			Next 	
		End If 
	End If

	For i = CollectStartLocation to CollectStartLocation + CollectMaxOfOnePage - 1
		if i > UBound(NewsListStrArray) Or (i >= Num And Num<>0) then
			CollectPageNumber = CollectPageNumber + 1
			CollectStartLocation = 0
			CollectedPageURL = ObjURL

			Exit Function
		end If

		AllNewsNumber = AllNewsNumber + 1
		if NewsListStrArray(i) <> "" then
			If BLinkHeadSetting=True Then
				TempArray = GetOtherContent(LinkHeadSetting&NewsListStrArray(i),LinkHeadSetting,LinkFootSetting) 
			ElseIf BLinkFootSetting=True Then 
				TempArray = GetOtherContent(NewsListStrArray(i)&LinkFootSetting,LinkHeadSetting,LinkFootSetting) 
			End If 
			if TempArray <> "" Then
				NewsLinkStr = LoseHtml(FormatUrl(TempArray,CollectObjURL))
				NewsPageStr = GetPageContent(NewsLinkStr)
				if NewsPageStr <> False then		
					TitleStr = LoseHtml(GetOtherContent(NewsPageStr,PageTitleHeadSetting,PageTitleFootSetting))
					Set RsCheckNewsObj = CollectConn.Execute("Select * from FS_News where Links='" & NewsLinkStr & "'")
					if Not RsCheckNewsObj.Eof then
						ReturnValue = GetOneNewsReturnValue(1,i + 1,TitleStr,"",NewsLinkStr) & ReturnValue
					else
						ContentStr = GetOneNewsContent(NewsPageStr,NewsLinkStr)
						ContentStr = ReplaceContentStr(ContentStr)
						ContentStr = ReplaceIMGRemoteUrl(ContentStr,SaveIMGPath,AvailableDoMain,DummyDir,NewsLinkStr,SaveRemotePic)
						if TitleStr = "" then
							ReturnValue = GetOneNewsReturnValue(2,i + 1,"","",NewsLinkStr) & ReturnValue
						elseif ContentStr = "" then
							ReturnValue = GetOneNewsReturnValue(3,i + 1,TitleStr,"",NewsLinkStr) & ReturnValue
						else
							ReturnValue = GetOneNewsReturnValue(4,i + 1,TitleStr,ContentStr,NewsLinkStr) & ReturnValue
							if IsNull(HandSetAuthor) OR (HandSetAuthor = "") then
								AuthorStr = LoseHtml(GetOtherContent(NewsPageStr,AuthorHeadSetting,AuthorFootSetting))
							else
								AuthorStr = HandSetAuthor
							end if
							if IsNull(HandSetSource) OR (HandSetSource = "") then
								SourceStr = LoseHtml(GetOtherContent(NewsPageStr,SourceHeadSetting,SourceFootSetting))
							else
								SourceStr = HandSetSource
							end if
							if IsNull(HandSetAddDate) OR Not IsDate(HandSetSource) then
								AddDate = LoseHtml(GetOtherContent(NewsPageStr,AddDateHeadSetting,AddDateFootSetting))
							else
								AddDate = HandSetAddDate
							end if
							if AddDate <> "" then
								if Not IsDate(AddDate) then	AddDate = Now
							else
								AddDate = Now
							end if
							SaveCollectContent TitleStr,NewsLinkStr,ContentStr,SysClassID,AuthorStr,SourceStr,AddDate
						end if
					end if
					Set RsCheckNewsObj = Nothing
				else
					ReturnValue = GetOneNewsReturnValue(5,i + 1,"","",NewsLinkStr) & ReturnValue
				end if
			else
				ReturnValue = GetOneNewsReturnValue(5,i + 1,"","",NewsLinkStr) & ReturnValue
			end if
		else
			ReturnValue = GetOneNewsReturnValue(5,i + 1,"","",NewsLinkStr) & ReturnValue
		end if
	Next
	CollectStartLocation = i
End Function

Function ResumeGetNewsPageContent()
	dim ResumeSql,RsResumeNewsObj,ResumeNewsURL,ResumeNewsURL1,ResumeNewsLocation
	ResumeSql = "Select top 1 Links from FS_News where SiteID='" & CollectingSiteID &"' order by ID DESC"
	Set RsResumeNewsObj = CollectConn.Execute(ResumeSql)	
	If RsResumeNewsObj.EOF Then 
		set RsResumeNewsObj = nothing
		response.Write("<script>alert(""无法确定您以前的采集记录,\n续采失败!"");history.go(-2);</script>")	
	else
		ResumeNewsURL = RsResumeNewsObj("Links")
		set RsResumeNewsObj = nothing
	End If
	

	Dim NewsPageStr,TitleStr,ContentStr,AuthorStr,SourceStr,AddDate,i,n
	Dim ResponseAllStr,NewsListStr,NewsLinkStr,RsCheckNewsObj
	Dim NewsListStrArray,TempArray
	ResponseAllStr = GetPageContent(FormatUrl(ObjURL,CollectObjURL))	
	if ResponseAllStr = False then
		CollectPageNumber = CollectPageNumber + 1
		ReturnValue = ReturnValue & "<br>&nbsp;&nbsp;&nbsp;&nbsp;<strong>错误</strong>:读取新闻列表页面失败<br>"

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -