⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 collect_itemmodify5.asp

📁 SK信息采集2.0功能介绍: 1.可针对任何静态网页,动态网页进行采集。包括htm,html,shtml,ASP,ASPX,JSP,PHP等。 2.增加自定文件采集.用户可采集网页中的所有文件.
💻 ASP
📖 第 1 页 / 共 2 页
字号:
				  RsItem("NewsPageEnd") = NewsPageEnd
			   ElseIf NewsPageType = 2 Then
			   End If
			   RsItem.Update
			   RsItem.Close
			   Set RsItem = Nothing
			End If
			End Sub
			
			'==================================================
			'过程名:GetTest
			'作  用:采集测试
			'参  数:无
			'==================================================
			Sub GetTest()
			   SqlItem = "Select * From KS_CollectItem Where ItemID=" & ItemID
			   Set RsItem = Server.CreateObject("adodb.recordset")
			   RsItem.Open SqlItem, ConnItem, 1, 1
			   If RsItem.EOF And RsItem.BOF Then
					 FoundErr = True
				  ErrMsg = ErrMsg & "<br><li>参数错误,找不到该项目</li>"
			   Else
				  LoginType = RsItem("LoginType")
				  LoginUrl = RsItem("LoginUrl")
				  LoginPostUrl = RsItem("LoginPostUrl")
				  LoginUser = RsItem("LoginUser")
				  LoginPass = RsItem("LoginPass")
				  LoginFalse = RsItem("LoginFalse")
				  
				  ListStr = RsItem("ListStr")
				  LsString = RsItem("LsString")
				  LoString = RsItem("LoString")
				  ListPageType = RsItem("ListPageType")
				  LPsString = RsItem("LPsString")
				  LPoString = RsItem("LPoString")
				  ListPageStr1 = RsItem("ListPageStr1")
				  ListPageStr2 = RsItem("ListPageStr2")
				  ListPageID1 = RsItem("ListPageID1")
				  ListPageID2 = RsItem("ListPageID2")
				  ListPageStr3 = RsItem("ListPageStr3")
				  
				  HsString = RsItem("HsString")
				  HoString = RsItem("HoString")
				  HttpUrlType = RsItem("HttpUrlType")
				  HttpUrlStr = RsItem("HttpUrlStr")
				  
				  TsString = RsItem("TsString")
				  ToString = RsItem("ToString")
				  CsString = RsItem("CsString")
				  CoString = RsItem("CoString")
				  
				  DateType = RsItem("DateType")
				  DsString = RsItem("DsString")
				  DoString = RsItem("DoString")
			
				  AuthorType = RsItem("AuthorType")
				  AsString = RsItem("AsString")
				  AoString = RsItem("AoString")
				  AuthorStr = RsItem("AuthorStr")
			
				  CopyFromType = RsItem("CopyFromType")
				  FsString = RsItem("FsString")
				  FoString = RsItem("FoString")
				  CopyFromStr = RsItem("CopyFromStr")
			
				  KeyType = RsItem("KeyType")
				  KsString = RsItem("KsString")
				  KoString = RsItem("KoString")
				  KeyStr = RsItem("KeyStr")
			
				  NewsPageType = RsItem("NewsPageType")
				  NPsString = RsItem("NPsString")
				  NPoString = RsItem("NPoString")
				  NewsPageStr = RsItem("NewsPageStr")
				  NewsPageEnd = RsItem("NewsPageEnd")
				  
				  UpDateType = RsItem("UpDateType")
			   End If
			   RsItem.Close
			   Set RsItem = Nothing
			
			   If LoginType = 1 Then
				  If LoginUrl = "" Or LoginPostUrl = "" Or LoginUser = "" Or LoginPass = "" Or LoginFalse = "" Then
					 FoundErr = True
					 ErrMsg = ErrMsg & "<br><li>您要采集的网站需要登录!请将登录信息填写完整</li>"
				  End If
			   End If
			   If LsString = "" Then
				  FoundErr = True
				  ErrMsg = ErrMsg & "<br><li>列表开始标记不能为空!</li>"
			   End If
			   If LoString = "" Then
				  FoundErr = True
				  ErrMsg = ErrMsg & "<br><li>列表结束标记不能为空!</li>"
			   End If
			   If ListPageType = 0 Or ListPageType = 1 Then
				  If ListStr = "" Then
					 FoundErr = True
					 ErrMsg = ErrMsg & "<br><li>列表索引页不能为空!</li>"
				  End If
				  If ListPageType = 1 Then
					 If LPsString = "" Or LPoString = "" Then
						FoundErr = True
						ErrMsg = ErrMsg & "<br><li>索引分页开始、结束标记不能为空!</li>"
					 End If
				  End If
				  If ListPageStr1 <> "" And Len(ListPageStr1) < 15 Then
					 FoundErr = True
					 ErrMsg = ErrMsg & "<br><li>索引分页重定向设置不正确!</li>"
						End If
			   ElseIf ListPageType = 2 Then
				  If ListPageStr2 = "" Then
					 FoundErr = True
					 ErrMsg = ErrMsg & "<br><li>批量生成原字符串不能为空!</li>"
				  End If
				  If IsNumeric(ListPageID1) = False Or IsNumeric(ListPageID2) = False Then
					 FoundErr = True
					 ErrMsg = ErrMsg & "<br><li>批量生成的范围只能是数字!</li>"
				  Else
					 ListPageID1 = CLng(ListPageID1)
					 ListPageID2 = CLng(ListPageID2)
					 If ListPageID1 = 0 And ListPageID2 = 0 Then
						FoundErr = True
						ErrMsg = ErrMsg & "<br><li>批量生成的范围不正确!</li>"
					 End If
				  End If
			   ElseIf ListPageType = 3 Then
				  If ListPageStr3 = "" Then
					 FoundErr = True
					 ErrMsg = ErrMsg & "<br><li>索引分页不能为空!</li>"
				  End If
			   Else
				  FoundErr = True
				  ErrMsg = ErrMsg & "<br><li>请选择索引分页类型</li>"
			   End If
				 If HsString = "" Or HoString = "" Then
				  FoundErr = True
				  ErrMsg = ErrMsg & "<br><li>链接开始/结束标记不能为空!</li>"
				  End If
			
			
			   If FoundErr <> True And Action <> "SaveEdit" Then
				  Select Case ListPageType
				  Case 0, 1
						ListUrl = ListStr
				  Case 2
					 ListUrl = Replace(ListPageStr2, "{$ID}", CStr(ListPageID1))
				  Case 3
					 If InStr(ListPageStr3, "|") > 0 Then
						ListUrl = Left(ListPageStr3, InStr(ListPageStr3, "|") - 1)
					 Else
						ListUrl = ListPageStr3
					 End If
				  End Select
			   End If
			   
				  If FoundErr <> True And Action <> "SaveEdit" And LoginType = 1 Then
				  LoginData = KMCObj.UrlEncoding(LoginUser & "&" & LoginPass)
				  LoginResult = KMCObj.PostHttpPage(LoginUrl, LoginPostUrl, LoginData)
				  If InStr(LoginResult, LoginFalse) > 0 Then
					 FoundErr = True
					 ErrMsg = ErrMsg & "<br><li>登录网站时发生错误,请确认登录信息的正确性!</li>"
				  End If
				  End If
			   
			   If FoundErr <> True And Action <> "SaveEdit" Then
					 ListCode = KMCObj.GetHttpPage(ListUrl)
					 If ListCode <> "Error" Then
						ListCode = KMCObj.GetBody(ListCode, LsString, LoString, False, False)
						If ListCode <> "Error" Then
						   NewsArrayCode = KMCObj.GetArray(ListCode, HsString, HoString, False, False)
						   If NewsArrayCode <> "Error" Then
							  If InStr(NewsArrayCode, "$Array$") > 0 Then
								 NewsArray = Split(NewsArrayCode, "$Array$")
								 If HttpUrlType = 1 Then
									NewsUrl = Trim(Replace(HttpUrlStr, "{$ID}", NewsArray(0)))
								 Else
									NewsUrl = Trim(KMCObj.DefiniteUrl(NewsArray(0), ListUrl))
								 End If
							  Else
								 FoundErr = True
								 ErrMsg = ErrMsg & "<br><li>只发现一个有效链接?:" & NewsArrayCode & "</li>"
							 End If
						  Else
							 FoundErr = True
							 ErrMsg = ErrMsg & "<br><li>在获取链接列表时出错。</li>"
						  End If
					   Else
						   FoundErr = True
						  ErrMsg = ErrMsg & "<br><li>在截取列表时发生错误。</li>"
					   End If
					Else
						FoundErr = True
					   ErrMsg = ErrMsg & "<br><li>在获取:" & ListUrl & "网页源码时发生错误。</li>"
					End If
				 End If
			
			If FoundErr <> True Then
			   NewsCode = KMCObj.GetHttpPage(NewsUrl)
			   If NewsCode <> "Error" Then
				  Title = KMCObj.GetBody(NewsCode, TsString, ToString, False, False)
				  Content = KMCObj.GetBody(NewsCode, CsString, CoString, False, False)
				  If Title = "Error" Or Content = "Error" Then
					 FoundErr = True
					 ErrMsg = ErrMsg & "<br><li>在截取新闻标题/正文的时候发生错误:" & NewsUrl & "</li>"
				  Else
					 Title = KMCObj.FpHtmlEnCode(Title)
					 Title = KMCObj.dvHTMLEncode(Title)
			
					 '新闻分页
					' If NewsPageType = 1 Then
					'	NewsPageNext = KMCObj.GetPage(NewsCode, NPsString, NPoString, False, False)
					'	Do While NewsPageNext <> "Error"
					'	   If NewsPageStr = "" Or IsNull(NewsPageStr) = True Then
					'		  NewsPageNext = KMCObj.DefiniteUrl(NewsPageNext, NewsUrl)
					'	   Else
					'		  NewsPageNext = Replace(NewsPageStr, "{$ID}", NewsPageNext)
					'	   End If
					'	   If NewsPageNext = "" Or NewsPageNext = "Error" Then Exit Do
					'	   NewsPageNextCode = KMCObj.GetHttpPage(NewsPageNext)
					'	   ContentTemp = KMCObj.GetBody(NewsPageNextCode, CsString, CoString, False, False)
					'	   If ContentTemp = "Error" Then
					'		  Exit Do
					'	   Else
					'		  Content = Content & NewsPageEnd & ContentTemp
					'		  NewsPageNext = KMCObj.GetPage(NewsPageNextCode, NPsString, NPoString, False, False)
					'	   End If
					'	Loop
					' End If
					
					
					'源代码中获取分页URL   
					If NewsPageType = 1 Then
						InfoPageStr = KMCObj.GetBody(NewsCode, NPsString, NPoString, False, False)
						If InfoPageStr = "Error" Then  '正文没有分页
							
				        Else
							 InfoPageArrayCode = KMCObj.GetArray(InfoPageStr, NewsPageStr, NewsPageEnd, False, False)
							 If InfoPageArrayCode = "Error" Then
								 FoundErr = True
								 ErrMsg = ErrMsg & "<br><li>在分析:新闻正文分页时发生错误,请检查分页链接的开始代码和结束代码!</li>"
							  Else
								 InfoPageArray = Split(InfoPageArrayCode, "$Array$")
								 If IsArray(InfoPageArray) = True Then
									For Testi = 0 To UBound(InfoPageArray)
										  InfoPageArray(Testi) = KMCObj.DefiniteUrl(InfoPageArray(Testi), NewsUrl)
									Next
									'UrlTest = InfoPageArray(0)
									'NewsCode = KMCObj.GetHttpPage(UrlTest)
								 Else
									FoundErr = True
									ErrMsg = ErrMsg & "<br><li>在分析:" & NewsUrl & "新闻列表时发生错误!</li>"
								 End If
							  End If
						End if
			        End If
			     
					 If UpDateType = 0 Then
						UpDateTime = Now()
					 ElseIf UpDateType = 1 Then
						If DateType = 0 Then
						   UpDateTime = Now()
						Else
						   UpDateTime = KMCObj.GetBody(NewsCode, DsString, DoString, False, False)
						   UpDateTime = KMCObj.FpHtmlEnCode(UpDateTime)
						   If IsDate(UpDateTime) = True Then
							  UpDateTime = CDate(UpDateTime)
						   Else
							  UpDateTime = Now()
						   End If
						End If
					 ElseIf UpDateType = 2 Then
					 Else
						UpDateTime = Now()
					 End If
			
					 '作者
					 If AuthorType = 1 Then
						Author = KMCObj.GetBody(NewsCode, AsString, AoString, False, False)
					 ElseIf AuthorType = 2 Then
						Author = AuthorStr
					 End If
					 If Author = "Error" Or Trim(Author) = "" Then
						Author = "佚名"
					 Else
						Author = KMCObj.FpHtmlEnCode(Author)
					 End If
			
					 '来源
					 If CopyFromType = 1 Then
						CopyFrom = KMCObj.GetBody(NewsCode, FsString, FoString, False, False)
					 ElseIf CopyFromType = 2 Then
						CopyFrom = CopyFromStr
					 End If
					 If CopyFrom = "Error" Or Trim(CopyFrom) = "" Then
						CopyFrom = "不详"
					 Else
						CopyFrom = KMCObj.FpHtmlEnCode(CopyFrom)
					 End If
			
					 If KeyType = 0 Then
						Key = Title
						Key = KMCObj.CreateKeyWord(Key, 2)
					 ElseIf KeyType = 1 Then
						Key = KMCObj.GetBody(NewsCode, KsString, KoString, False, False)
						Key = KMCObj.FpHtmlEnCode(Key)
						Key = KMCObj.CreateKeyWord(Key, 2)
					 ElseIf KeyType = 2 Then
						Key = KMCObj.FpHtmlEnCode(Key)
					 End If
					 If Key = "Error" Or Trim(Key) = "" Then
						Key = ""
					 End If
				 End If
			   Else
				 FoundErr = True
				 ErrMsg = ErrMsg & "<br><li>在获取源码时发生错误:" & NewsUrl & "</li>"
			   End If
			End If
			
			If FoundErr <> True Then
			   Call GetFilters
			   Call Filters
			   Content = KMCObj.ReplaceSaveRemoteFile(UploadFiles, Content, strInstallDir, strChannelDir, False, NewsUrl)
			End If
			
			End Sub
			
			
			'==================================================
			'过程名:GetFilters
			'作  用:提取过滤信息
			'参  数:无
			'==================================================
			Sub GetFilters()
			   SqlF = "Select * From KS_Filters Where Flag=True And (PublicTf=True Or ItemID=" & ItemID & ") order by FilterID ASC"
			   Set RsF = ConnItem.Execute(SqlF)
			   If RsF.EOF And RsF.BOF Then
				  Arr_Filters = ""
			   Else
				  Arr_Filters = RsF.GetRows()
			   End If
			   RsF.Close
			   Set RsF = Nothing
			End Sub
			
			
			'==================================================
			'过程名:Filters
			'作  用:过滤
			'==================================================
			Sub Filters()
			If IsArray(Arr_Filters) = False Then
			   Exit Sub
			End If
			
			   For Filteri = 0 To UBound(Arr_Filters, 2)
				  FilterStr = ""
				  If Arr_Filters(1, Filteri) = ItemID Or Arr_Filters(10, Filteri) = True Then
					 If Arr_Filters(3, Filteri) = 1 Then '标题过滤
						If Arr_Filters(4, Filteri) = 1 Then
						   Title = Replace(Title, Arr_Filters(5, Filteri), Arr_Filters(8, Filteri))
						ElseIf Arr_Filters(4, Filteri) = 2 Then
						   FilterStr = KMCObj.GetBody(Title, Arr_Filters(6, Filteri), Arr_Filters(7, Filteri), True, True)
						   Do While FilterStr <> "Error"
							  Title = Replace(Title, FilterStr, Arr_Filters(8, Filteri))
							  FilterStr = KMCObj.GetBody(Title, Arr_Filters(6, Filteri), Arr_Filters(7, Filteri), True, True)
						   Loop
						End If
					 ElseIf Arr_Filters(3, Filteri) = 2 Then '正文过滤
						If Arr_Filters(4, Filteri) = 1 Then
						   Content = Replace(Content, Arr_Filters(5, Filteri), Arr_Filters(8, Filteri))
						ElseIf Arr_Filters(4, Filteri) = 2 Then
						   FilterStr = KMCObj.GetBody(Content, Arr_Filters(6, Filteri), Arr_Filters(7, Filteri), True, True)
						   Do While FilterStr <> "Error"
							  Content = Replace(Content, FilterStr, Arr_Filters(8, Filteri))
							  FilterStr = KMCObj.GetBody(Content, Arr_Filters(6, Filteri), Arr_Filters(7, Filteri), True, True)
						   Loop
						End If
					 End If
				  End If
			   Next
			End Sub
End Class
%>

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -