⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 collect_itemcollecfast.asp

📁 SK信息采集2.0功能介绍: 1.可针对任何静态网页,动态网页进行采集。包括htm,html,shtml,ASP,ASPX,JSP,PHP等。 2.增加自定文件采集.用户可采集网页中的所有文件.
💻 ASP
📖 第 1 页 / 共 3 页
字号:
<%@LANGUAGE="VBSCRIPT" CODEPAGE="936"%>
<%option explicit%>
<!--#include file="../../Conn.asp"-->
<!--#include file="../../SysCls/KS_CommonCls.asp"-->
<!--#include file="../../SysCls/KS_CollectCommonCls.asp"-->
<!--#include file="../Inc/Session.asp"-->
<%
'===================================================================================================================
'软件名称:科汛网站管理系统
'当前版本:科汛网站管理系统 V2.2 0628个人Access版
'Copyright (C) 2006-2008 Kesion.Com  All rights reserved.
'产品咨询QQ:9537636,41904294
'技术支持QQ:111394,54004407 
'程序版权:科汛网络
'程序开发:科汛网络开发组(总策划:林文仲)
'E-Mail  :kesioncms@hotmail.com webmaster@kesion.com
'官方网站:http://www.kesion.com  
'演示站点:http://test.kesion.com 
'郑重声明:
'    ①、免费版本请在程序首页保留版权信息,并做上本站LOGO友情连接,商业版本无此要求;
'    ②、任何个人或组织不得在授权允许的情况下删除、修改、拷贝本软件及其他副本上一切关于版权的信息;
'    ③、科汛网络保留此软件的法律追究权利
'===================================================================================================================
Response.Buffer = True
Server.ScriptTimeout = 999
Response.Expires = -1
Response.ExpiresAbsolute = Now() - 1
Response.Expires = 0
Response.CacheControl = "no-cache"

Dim KSCls
Set KSCls = New Collect_ItemCollectFast
KSCls.Execute()
Set KSCls = Nothing

Class Collect_ItemCollectFast
        Private KSCMS
		Private KMCObj
		Private ConnItem
		Private ItemNum, ListNum, PageNum, NewsSuccesNum, NewsFalseNum
		Private Rs, Sql, RsItem, SqlItem, FoundErr, ErrMsg, ItemEnd, ListEnd
		
		'项目变量
		Private ItemID, ItemName, ChannelID, strChannelDir, ClassID, SpecialID, LoginType, LoginUrl, LoginPostUrl, LoginUser, LoginPass, LoginFalse
		Private ListStr, LsString, LoString, ListPageType, LPsString, LPoString, ListPageStr1, ListPageStr2, ListPageID1, ListPageID2, ListPageStr3, HsString, HoString, HttpUrlType, HttpUrlStr
		Private TsString, ToString, CsString, CoString, DateType, DsString, DoString, AuthorType, AsString, AoString, AuthorStr, CopyFromType, FsString, FoString
		Private CopyFromStr, KeyType, KsString, KoString, KeyStr, NewsPageType, NPsString, NPoString, NewsPageStr, NewsPageEnd
		Private ItemCollecDate, PaginationType, MaxCharPerPage, ReadLevel, Stars, ReadPoint, Hits, UpDateType, UpDateTime, Strip, Rolls, Comment, Recommend, Popular
		Private FnameType, TemplateID, Script_Iframe, Script_Object, Script_Script, Script_Div, Script_Class, Script_Span, Script_Img, Script_Font, Script_A, Script_Html, CollecListNum, CollecNewsNum, IntoBase, BeyondSavePic, CollecOrder, Verific, InputerType, Inputer, EditorType, Editor, ShowComment, Script_Table, Script_Tr, Script_Td
		
		Private InfoPageArrayCode ,InfoPageArray,Testi,NewsNextPageStr

		
		'过滤变量
		Private Arr_Filters, FilterStr, Filteri
		
		'采集相关的变量
		Private ContentTemp, NewsPageNext, NewsPageNextCode, Arr_i, NewsUrl, NewsCode
		
		'文章保存变量
		Private ArticleID, Title, Content, Author, CopyFrom, Key, IncludePic, UploadFiles, DefaultPicUrl
		
		'其它变量
		Private LoginData, LoginResult, OrderTemp
		Private Arr_Item, CollecTest, Content_View, CollecNewsAll
		Private StepID
		
		'历史记录
		Private Arr_Historys, His_Title, His_CollecDate, His_Result, His_Repeat, His_i
		
		'执行时间变量
		Private StartTime, OverTime
		
		'图片统计
		Private Arr_Images, ImagesNum, ImagesNumAll
		
		'列表
		Private ListUrl, ListCode, NewsArrayCode, NewsArray, ListArray, ListPageNext
		
		'安装路径
		Private strInstallDir, CacheTemp
		
		Private Sub Class_Initialize()
		  Set KSCMS=New CommonCls
		  Set KMCObj=New CollectCommonCls
		  Set ConnItem = KSCMS.ConnItem()
		End Sub
        Private Sub Class_Terminate()
		 Call KSCMS.CloseConnItem()
		 Call KSCMS.Closeconn
		 Set KSCMS=Nothing
		 Set KMCObj=Nothing
		End Sub
		Sub Execute()
		
		strInstallDir = Trim(Request.ServerVariables("SCRIPT_NAME"))
		strInstallDir = Left(strInstallDir, InStrRev(LCase(strInstallDir), "/") - 1)
		'strInstallDir=left(strInstallDir,instrrev(lcase(strInstallDir),"/"))
		'缓存路径
		CacheTemp = LCase(Trim(Request.ServerVariables("SCRIPT_NAME")))
		CacheTemp = Left(CacheTemp, InStrRev(CacheTemp, "/"))
		CacheTemp = Replace(CacheTemp, "\", "_")
		CacheTemp = Replace(CacheTemp, "/", "_")
		CacheTemp = "ansir" & CacheTemp
		
		'数据初始化
		CollecListNum = 0
		CollecNewsNum = 0
		ArticleID = 0
		ItemNum = CLng(Trim(Request("ItemNum")))
		ListNum = CLng(Trim(Request("ListNum")))
		NewsSuccesNum = CLng(Trim(Request("NewsSuccesNum")))
		NewsFalseNum = CLng(Trim(Request("NewsFalseNum")))
		ImagesNumAll = CLng(Trim(Request("ImagesNumAll")))
		ListPageNext = Trim(Request("ListPageNext"))
		FoundErr = False
		ItemEnd = False
		ListEnd = False
		ErrMsg = ""
		
		Call SetCache
		
		If ItemEnd <> True Then
		   If (ItemNum - 1) > UBound(Arr_Item, 2) Then
			  ItemEnd = True
		   Else
			  Call SetItems
		   End If
		End If
		
		If ItemEnd <> True Then
		   If ListPageType = 0 Then
			  If ListNum = 1 Then
				 ListUrl = ListStr
			  Else
				 ListEnd = True
			  End If
		   ElseIf ListPageType = 1 Then
			  If ListNum = 1 Then
				 ListUrl = ListStr
			  Else
				 If ListPageNext = "" Or ListPageNext = "Error" Then
					ListEnd = True
				 Else
					ListPageNext = Replace(ListPageNext, "{$ID}", "&")
					ListUrl = ListPageNext
				 End If
			  End If
		   ElseIf ListPageType = 2 Then  '索引方式
					If ListNum = 1 Then
					 ListUrl = ListStr
					Else
						If ListPageID1 > ListPageID2 Then
						   If (ListPageID1 - ListNum + 1) < ListPageID2 Or (ListPageID1 - ListNum + 1) < 0 Then
							  ListEnd = True
						   Else
							  ListUrl = Replace(ListPageStr2, "{$ID}", CStr(ListPageID1 - ListNum + 1))
						   End If
						Else
						   If (ListPageID1 + ListNum - 1) > ListPageID2 Then
							  ListEnd = True
						   Else
							  ListUrl = Replace(ListPageStr2, "{$ID}", CStr(ListPageID1 + ListNum - 1))
						   End If
						End If
				   End If
		   ElseIf ListPageType = 3 Then
			  ListArray = Split(ListPageStr3, "|")
			  If (ListNum - 1) > UBound(ListArray) Then
				 ListEnd = True
			  Else
				 ListUrl = ListArray(ListNum - 1)
			  End If
		   End If
		   If ListNum > CollecListNum And CollecListNum <> 0 Then
			  ListEnd = True
		   End If
		End If
		
		If ItemEnd = True Then
		   ErrMsg = "<br>采集任务全部完成"
		   ErrMsg = ErrMsg & "<br>成功采集: " & NewsSuccesNum & "  篇,失败: " & NewsFalseNum & "  篇,图片:" & ImagesNumAll & "  张"
		   Call DelCache
		Else
		   If ListEnd = True Then
			  ItemNum = ItemNum + 1
			  ListNum = 1
			  ErrMsg = "<br>" & ItemName & "  项目所有列表采集完成,正在整理数据请稍后..."
			  ErrMsg = ErrMsg & "<meta http-equiv=""refresh"" content=""3;url=Collect_ItemCollecFast.asp?ItemNum=" & ItemNum & "&ListNum=" & ListNum & "&NewsSuccesNum=" & NewsSuccesNum & "&NewsFalseNum=" & NewsFalseNum & "&ImagesNumAll=" & ImagesNumAll & """>"
		   End If
		End If
		
		Call TopItem
		If ItemEnd = True Or ListEnd = True Then
		   If ItemEnd <> True Then
			  Call SetCache_His
		   End If
		   Call KMCObj.WriteCollectSucced(ErrMsg)
		Else
		   FoundErr = False
		   ErrMsg = ""
		   Call StartCollection
		   Call FootItem2
		End If
		Response.Flush
		End Sub
		'==================================================
		'过程名:StartCollection
		'作  用:开始采集
		'参  数:无
		'==================================================
		Sub StartCollection()
		Dim Rs
		'第一次采集时登录
		If LoginType = 1 And ListNum = 1 Then
		   LoginData = KMCObj.UrlEncoding(LoginUser & "&" & LoginPass)
		   LoginResult = KMCObj.PostHttpPage(LoginUrl, LoginPostUrl, LoginData)
		   If InStr(LoginResult, LoginFalse) > 0 Then
			  FoundErr = True
			  ErrMsg = ErrMsg & "<br><li>在登录网站时发生错误,请确保登录信息的正确性!</li>"
		   End If
		End If
		Set Rs = Server.CreateObject("Adodb.Recordset")
			 Rs.Open "Select ID From KS_Class Where ID='" & ClassID & "'", conn, 1, 1
			If Rs.EOF And Rs.BOF Then
				  FoundErr = True
				  ErrMsg = ErrMsg & "<br>系统检测到栏目ID[<font color=red>" & ClassID & "</font>]在主数据库中已删除,请修改项目属性的所属栏目后,再采集"
				  Call KMCObj.WriteCollectSuccedStart(ErrMsg)
			   Response.End
			End If
			Rs.Close
			Set Rs = Nothing
			
		If FoundErr <> True Then
		   ListCode = KMCObj.GetHttpPage(ListUrl)
		   Call GetListPage
		   If ListCode = "Error" Then
			  FoundErr = True
			  ErrMsg = ErrMsg & "<br><li>在获取列表:" & ListUrl & "网页源码时发生错误!</li>"
		   Else
			  ListCode = KMCObj.GetBody(ListCode, LsString, LoString, False, False)
			  If ListCode = "Error" Or ListCode = "" Then
				 FoundErr = True
				 ErrMsg = ErrMsg & "<br><li>在截取:" & ListUrl & "的文章列表时发生错误!</li>"
			  End If
		   End If
		End If
		
		If FoundErr <> True Then
		   NewsArrayCode = KMCObj.GetArray(ListCode, HsString, HoString, False, False)
		   If NewsArrayCode = "Error" Then
			  FoundErr = True
			  ErrMsg = ErrMsg & "<br><li>在分析:" & ListUrl & "文章列表时发生错误!</li>"
		   Else
			  NewsArray = Split(NewsArrayCode, "$Array$")
			  For Arr_i = 0 To UBound(NewsArray)
				 If HttpUrlType = 1 Then
					NewsArray(Arr_i) = Trim(Replace(HttpUrlStr, "{$ID}", NewsArray(Arr_i)))
				  
				 Else
					NewsArray(Arr_i) = Trim(KMCObj.DefiniteUrl(NewsArray(Arr_i), ListUrl))
				  
				 End If
				 NewsArray(Arr_i) = KMCObj.CheckUrl(NewsArray(Arr_i))
			  Next
			  If CollecOrder = True Then
				 For Arr_i = 0 To Fix(UBound(NewsArray) / 2)
					OrderTemp = NewsArray(Arr_i)
					NewsArray(Arr_i) = NewsArray(UBound(NewsArray) - Arr_i)
					NewsArray(UBound(NewsArray) - Arr_i) = OrderTemp
				 Next
			  End If
		   End If
		End If
		
		If FoundErr <> True Then
		   Call TopItem2
		   '边框开始
		  Response.Write "<div style=""border: double #E7E7E7;height:355; overflow: auto; width:100%"" align=""center"">"

		   CollecNewsAll = 0
		   For Arr_i = 0 To UBound(NewsArray)
			  If CollecNewsAll >= CollecNewsNum And CollecNewsNum <> 0 Then
				 Exit For
			  End If
			  CollecNewsAll = CollecNewsAll + 1
			  '变量初始化
			  UploadFiles = ""
			  DefaultPicUrl = ""
			  IncludePic = 0
			  ImagesNum = 0
			  NewsCode = ""
			  FoundErr = False
			  ErrMsg = ""
			  His_Repeat = False
			  NewsUrl = NewsArray(Arr_i)
			  'Response.Write NewsArray(Arr_i)
			  Title = ""
			  PageNum = 1
			  '………………………………………………
			  If Response.IsClientConnected Then
				 Response.Flush
			  Else
				 Response.End
			  End If
			  '………………………………………………
		
			  If CollecTest = False Then
				 His_Repeat = CheckRepeat(NewsUrl)
			  Else
				 His_Repeat = False
			  End If
			  If His_Repeat = True Then
				 FoundErr = True
			  End If
					  
			   
			  If FoundErr <> True Then
				 NewsCode = KMCObj.GetHttpPage(NewsUrl)
				 If NewsCode = "Error" Then
					FoundErr = True
					ErrMsg = ErrMsg & "<br>在获取:" & NewsUrl & "文章源码时发生错误!"
					Title = "获取网页源码失败"
				 End If
			  End If
		
			  If FoundErr <> True Then
				 Title = KMCObj.GetBody(NewsCode, TsString, ToString, False, False)
				 If Title = "Error" Or Title = "" Then
					FoundErr = True
					ErrMsg = ErrMsg & "<br>在分析:" & NewsUrl & "的文章标题时发生错误"
					Title = "<br>标题分析错误"
				 End If
				 If FoundErr <> True Then
					Content = KMCObj.GetBody(NewsCode, CsString, CoString, False, False)
					If Content = "Error" Or Content = "" Then
					   FoundErr = True
					   ErrMsg = ErrMsg & "<br>在分析:" & NewsUrl & "的文章正文时发生错误"
					   Title = Title & "<br>正文分析错误"
					End If
				 End If
				 If FoundErr <> True Then
				 
				    '源代码中获取分页URL  
					 
					 '正文分页
					If NewsPageType = 1 Then
						NewsPageNext = KMCObj.GetBody(NewsCode, NPsString, NPoString, False, False)
						If NewsPageNext = "Error" Then  '正文没有分页
						
				        Else
								 InfoPageArrayCode = KMCObj.GetArray(NewsPageNext, NewsPageStr, NewsPageEnd, False, False)
								 If InfoPageArrayCode = "Error" Then
									 FoundErr = True
									 ErrMsg = ErrMsg & "<br><li>在分析:新闻正文分页时发生错误,请检查分页链接的开始代码和结束代码!</li>"

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -