📄 collect_itemcollecfast.asp
字号:
<%@LANGUAGE="VBSCRIPT" CODEPAGE="936"%>
<%option explicit%>
<!--#include file="../../Conn.asp"-->
<!--#include file="../../SysCls/KS_CommonCls.asp"-->
<!--#include file="../../SysCls/KS_CollectCommonCls.asp"-->
<!--#include file="../Inc/Session.asp"-->
<%
'===================================================================================================================
'软件名称:科汛网站管理系统
'当前版本:科汛网站管理系统 V2.2 0628个人Access版
'Copyright (C) 2006-2008 Kesion.Com All rights reserved.
'产品咨询QQ:9537636,41904294
'技术支持QQ:111394,54004407
'程序版权:科汛网络
'程序开发:科汛网络开发组(总策划:林文仲)
'E-Mail :kesioncms@hotmail.com webmaster@kesion.com
'官方网站:http://www.kesion.com
'演示站点:http://test.kesion.com
'郑重声明:
' ①、免费版本请在程序首页保留版权信息,并做上本站LOGO友情连接,商业版本无此要求;
' ②、任何个人或组织不得在授权允许的情况下删除、修改、拷贝本软件及其他副本上一切关于版权的信息;
' ③、科汛网络保留此软件的法律追究权利
'===================================================================================================================
Response.Buffer = True
Server.ScriptTimeout = 999
Response.Expires = -1
Response.ExpiresAbsolute = Now() - 1
Response.Expires = 0
Response.CacheControl = "no-cache"
Dim KSCls
Set KSCls = New Collect_ItemCollectFast
KSCls.Execute()
Set KSCls = Nothing
Class Collect_ItemCollectFast
Private KSCMS
Private KMCObj
Private ConnItem
Private ItemNum, ListNum, PageNum, NewsSuccesNum, NewsFalseNum
Private Rs, Sql, RsItem, SqlItem, FoundErr, ErrMsg, ItemEnd, ListEnd
'项目变量
Private ItemID, ItemName, ChannelID, strChannelDir, ClassID, SpecialID, LoginType, LoginUrl, LoginPostUrl, LoginUser, LoginPass, LoginFalse
Private ListStr, LsString, LoString, ListPageType, LPsString, LPoString, ListPageStr1, ListPageStr2, ListPageID1, ListPageID2, ListPageStr3, HsString, HoString, HttpUrlType, HttpUrlStr
Private TsString, ToString, CsString, CoString, DateType, DsString, DoString, AuthorType, AsString, AoString, AuthorStr, CopyFromType, FsString, FoString
Private CopyFromStr, KeyType, KsString, KoString, KeyStr, NewsPageType, NPsString, NPoString, NewsPageStr, NewsPageEnd
Private ItemCollecDate, PaginationType, MaxCharPerPage, ReadLevel, Stars, ReadPoint, Hits, UpDateType, UpDateTime, Strip, Rolls, Comment, Recommend, Popular
Private FnameType, TemplateID, Script_Iframe, Script_Object, Script_Script, Script_Div, Script_Class, Script_Span, Script_Img, Script_Font, Script_A, Script_Html, CollecListNum, CollecNewsNum, IntoBase, BeyondSavePic, CollecOrder, Verific, InputerType, Inputer, EditorType, Editor, ShowComment, Script_Table, Script_Tr, Script_Td
Private InfoPageArrayCode ,InfoPageArray,Testi,NewsNextPageStr
'过滤变量
Private Arr_Filters, FilterStr, Filteri
'采集相关的变量
Private ContentTemp, NewsPageNext, NewsPageNextCode, Arr_i, NewsUrl, NewsCode
'文章保存变量
Private ArticleID, Title, Content, Author, CopyFrom, Key, IncludePic, UploadFiles, DefaultPicUrl
'其它变量
Private LoginData, LoginResult, OrderTemp
Private Arr_Item, CollecTest, Content_View, CollecNewsAll
Private StepID
'历史记录
Private Arr_Historys, His_Title, His_CollecDate, His_Result, His_Repeat, His_i
'执行时间变量
Private StartTime, OverTime
'图片统计
Private Arr_Images, ImagesNum, ImagesNumAll
'列表
Private ListUrl, ListCode, NewsArrayCode, NewsArray, ListArray, ListPageNext
'安装路径
Private strInstallDir, CacheTemp
Private Sub Class_Initialize()
Set KSCMS=New CommonCls
Set KMCObj=New CollectCommonCls
Set ConnItem = KSCMS.ConnItem()
End Sub
Private Sub Class_Terminate()
Call KSCMS.CloseConnItem()
Call KSCMS.Closeconn
Set KSCMS=Nothing
Set KMCObj=Nothing
End Sub
Sub Execute()
strInstallDir = Trim(Request.ServerVariables("SCRIPT_NAME"))
strInstallDir = Left(strInstallDir, InStrRev(LCase(strInstallDir), "/") - 1)
'strInstallDir=left(strInstallDir,instrrev(lcase(strInstallDir),"/"))
'缓存路径
CacheTemp = LCase(Trim(Request.ServerVariables("SCRIPT_NAME")))
CacheTemp = Left(CacheTemp, InStrRev(CacheTemp, "/"))
CacheTemp = Replace(CacheTemp, "\", "_")
CacheTemp = Replace(CacheTemp, "/", "_")
CacheTemp = "ansir" & CacheTemp
'数据初始化
CollecListNum = 0
CollecNewsNum = 0
ArticleID = 0
ItemNum = CLng(Trim(Request("ItemNum")))
ListNum = CLng(Trim(Request("ListNum")))
NewsSuccesNum = CLng(Trim(Request("NewsSuccesNum")))
NewsFalseNum = CLng(Trim(Request("NewsFalseNum")))
ImagesNumAll = CLng(Trim(Request("ImagesNumAll")))
ListPageNext = Trim(Request("ListPageNext"))
FoundErr = False
ItemEnd = False
ListEnd = False
ErrMsg = ""
Call SetCache
If ItemEnd <> True Then
If (ItemNum - 1) > UBound(Arr_Item, 2) Then
ItemEnd = True
Else
Call SetItems
End If
End If
If ItemEnd <> True Then
If ListPageType = 0 Then
If ListNum = 1 Then
ListUrl = ListStr
Else
ListEnd = True
End If
ElseIf ListPageType = 1 Then
If ListNum = 1 Then
ListUrl = ListStr
Else
If ListPageNext = "" Or ListPageNext = "Error" Then
ListEnd = True
Else
ListPageNext = Replace(ListPageNext, "{$ID}", "&")
ListUrl = ListPageNext
End If
End If
ElseIf ListPageType = 2 Then '索引方式
If ListNum = 1 Then
ListUrl = ListStr
Else
If ListPageID1 > ListPageID2 Then
If (ListPageID1 - ListNum + 1) < ListPageID2 Or (ListPageID1 - ListNum + 1) < 0 Then
ListEnd = True
Else
ListUrl = Replace(ListPageStr2, "{$ID}", CStr(ListPageID1 - ListNum + 1))
End If
Else
If (ListPageID1 + ListNum - 1) > ListPageID2 Then
ListEnd = True
Else
ListUrl = Replace(ListPageStr2, "{$ID}", CStr(ListPageID1 + ListNum - 1))
End If
End If
End If
ElseIf ListPageType = 3 Then
ListArray = Split(ListPageStr3, "|")
If (ListNum - 1) > UBound(ListArray) Then
ListEnd = True
Else
ListUrl = ListArray(ListNum - 1)
End If
End If
If ListNum > CollecListNum And CollecListNum <> 0 Then
ListEnd = True
End If
End If
If ItemEnd = True Then
ErrMsg = "<br>采集任务全部完成"
ErrMsg = ErrMsg & "<br>成功采集: " & NewsSuccesNum & " 篇,失败: " & NewsFalseNum & " 篇,图片:" & ImagesNumAll & " 张"
Call DelCache
Else
If ListEnd = True Then
ItemNum = ItemNum + 1
ListNum = 1
ErrMsg = "<br>" & ItemName & " 项目所有列表采集完成,正在整理数据请稍后..."
ErrMsg = ErrMsg & "<meta http-equiv=""refresh"" content=""3;url=Collect_ItemCollecFast.asp?ItemNum=" & ItemNum & "&ListNum=" & ListNum & "&NewsSuccesNum=" & NewsSuccesNum & "&NewsFalseNum=" & NewsFalseNum & "&ImagesNumAll=" & ImagesNumAll & """>"
End If
End If
Call TopItem
If ItemEnd = True Or ListEnd = True Then
If ItemEnd <> True Then
Call SetCache_His
End If
Call KMCObj.WriteCollectSucced(ErrMsg)
Else
FoundErr = False
ErrMsg = ""
Call StartCollection
Call FootItem2
End If
Response.Flush
End Sub
'==================================================
'过程名:StartCollection
'作 用:开始采集
'参 数:无
'==================================================
Sub StartCollection()
Dim Rs
'第一次采集时登录
If LoginType = 1 And ListNum = 1 Then
LoginData = KMCObj.UrlEncoding(LoginUser & "&" & LoginPass)
LoginResult = KMCObj.PostHttpPage(LoginUrl, LoginPostUrl, LoginData)
If InStr(LoginResult, LoginFalse) > 0 Then
FoundErr = True
ErrMsg = ErrMsg & "<br><li>在登录网站时发生错误,请确保登录信息的正确性!</li>"
End If
End If
Set Rs = Server.CreateObject("Adodb.Recordset")
Rs.Open "Select ID From KS_Class Where ID='" & ClassID & "'", conn, 1, 1
If Rs.EOF And Rs.BOF Then
FoundErr = True
ErrMsg = ErrMsg & "<br>系统检测到栏目ID[<font color=red>" & ClassID & "</font>]在主数据库中已删除,请修改项目属性的所属栏目后,再采集"
Call KMCObj.WriteCollectSuccedStart(ErrMsg)
Response.End
End If
Rs.Close
Set Rs = Nothing
If FoundErr <> True Then
ListCode = KMCObj.GetHttpPage(ListUrl)
Call GetListPage
If ListCode = "Error" Then
FoundErr = True
ErrMsg = ErrMsg & "<br><li>在获取列表:" & ListUrl & "网页源码时发生错误!</li>"
Else
ListCode = KMCObj.GetBody(ListCode, LsString, LoString, False, False)
If ListCode = "Error" Or ListCode = "" Then
FoundErr = True
ErrMsg = ErrMsg & "<br><li>在截取:" & ListUrl & "的文章列表时发生错误!</li>"
End If
End If
End If
If FoundErr <> True Then
NewsArrayCode = KMCObj.GetArray(ListCode, HsString, HoString, False, False)
If NewsArrayCode = "Error" Then
FoundErr = True
ErrMsg = ErrMsg & "<br><li>在分析:" & ListUrl & "文章列表时发生错误!</li>"
Else
NewsArray = Split(NewsArrayCode, "$Array$")
For Arr_i = 0 To UBound(NewsArray)
If HttpUrlType = 1 Then
NewsArray(Arr_i) = Trim(Replace(HttpUrlStr, "{$ID}", NewsArray(Arr_i)))
Else
NewsArray(Arr_i) = Trim(KMCObj.DefiniteUrl(NewsArray(Arr_i), ListUrl))
End If
NewsArray(Arr_i) = KMCObj.CheckUrl(NewsArray(Arr_i))
Next
If CollecOrder = True Then
For Arr_i = 0 To Fix(UBound(NewsArray) / 2)
OrderTemp = NewsArray(Arr_i)
NewsArray(Arr_i) = NewsArray(UBound(NewsArray) - Arr_i)
NewsArray(UBound(NewsArray) - Arr_i) = OrderTemp
Next
End If
End If
End If
If FoundErr <> True Then
Call TopItem2
'边框开始
Response.Write "<div style=""border: double #E7E7E7;height:355; overflow: auto; width:100%"" align=""center"">"
CollecNewsAll = 0
For Arr_i = 0 To UBound(NewsArray)
If CollecNewsAll >= CollecNewsNum And CollecNewsNum <> 0 Then
Exit For
End If
CollecNewsAll = CollecNewsAll + 1
'变量初始化
UploadFiles = ""
DefaultPicUrl = ""
IncludePic = 0
ImagesNum = 0
NewsCode = ""
FoundErr = False
ErrMsg = ""
His_Repeat = False
NewsUrl = NewsArray(Arr_i)
'Response.Write NewsArray(Arr_i)
Title = ""
PageNum = 1
'………………………………………………
If Response.IsClientConnected Then
Response.Flush
Else
Response.End
End If
'………………………………………………
If CollecTest = False Then
His_Repeat = CheckRepeat(NewsUrl)
Else
His_Repeat = False
End If
If His_Repeat = True Then
FoundErr = True
End If
If FoundErr <> True Then
NewsCode = KMCObj.GetHttpPage(NewsUrl)
If NewsCode = "Error" Then
FoundErr = True
ErrMsg = ErrMsg & "<br>在获取:" & NewsUrl & "文章源码时发生错误!"
Title = "获取网页源码失败"
End If
End If
If FoundErr <> True Then
Title = KMCObj.GetBody(NewsCode, TsString, ToString, False, False)
If Title = "Error" Or Title = "" Then
FoundErr = True
ErrMsg = ErrMsg & "<br>在分析:" & NewsUrl & "的文章标题时发生错误"
Title = "<br>标题分析错误"
End If
If FoundErr <> True Then
Content = KMCObj.GetBody(NewsCode, CsString, CoString, False, False)
If Content = "Error" Or Content = "" Then
FoundErr = True
ErrMsg = ErrMsg & "<br>在分析:" & NewsUrl & "的文章正文时发生错误"
Title = Title & "<br>正文分析错误"
End If
End If
If FoundErr <> True Then
'源代码中获取分页URL
'正文分页
If NewsPageType = 1 Then
NewsPageNext = KMCObj.GetBody(NewsCode, NPsString, NPoString, False, False)
If NewsPageNext = "Error" Then '正文没有分页
Else
InfoPageArrayCode = KMCObj.GetArray(NewsPageNext, NewsPageStr, NewsPageEnd, False, False)
If InfoPageArrayCode = "Error" Then
FoundErr = True
ErrMsg = ErrMsg & "<br><li>在分析:新闻正文分页时发生错误,请检查分页链接的开始代码和结束代码!</li>"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -