📄 collecting.asp
字号:
<%@language=vbscript codepage=936 %>
<% Option Explicit %>
<%
response.buffer=true
Const ChannelID=1
Server.ScriptTimeOut=999
Response.Expires = -1
Response.ExpiresAbsolute = Now() - 1
Response.Expires = 0
Response.CacheControl = "no-cache"
%>
<!--#include file="inc/Cls_DB.asp" -->
<!--#include file="inc/Function.asp" -->
<!--#include file="Inc/Const.asp" -->
<%
Dim ServerScriptTimeout
ServerScriptTimeout = Request("ServerScriptTimeout")
if ServerScriptTimeout = "" then
ServerScriptTimeout = 10000
else
ServerScriptTimeout = Clng(ServerScriptTimeout)
end if
Server.ScriptTimeout = ServerScriptTimeout
Dim DBC,CollectConn,Conn
Set DBC = New DataBaseClass
Set CollectConn = DBC.OpenConnection()
DBC.ConnStr = "DBQ=" + Server.MapPath(DataBaseConnectStr) + ";DefaultDir=;DRIVER={Microsoft Access Driver (*.mdb)};"
Set Conn = DBC.OpenConnection()
Set DBC = Nothing
'判断权限
%>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=gb2312">
<title>正在采集</title>
</head>
<link href="Inc/Collect.css" rel="stylesheet">
<body topmargin="2" leftmargin="2" oncontextmenu="//return false;">
</body>
</html>
<%
Sub InitialFun(InfoStr)
%>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<% = InfoStr %>
<link href="Inc/Collect.css" rel="stylesheet">
<body topmargin="2" leftmargin="2" oncontextmenu="//return false;">
<table width="100%" height="100%" border="0" cellpadding="0" cellspacing="0">
<tr>
<td valign="middle" width="50%;"><div align="right"><font color="#FF0000">正在连接远程目标地址</font></div></td>
<td valign="middle" width="50%;"><div align="left"><font color="#FF0000" id="ShowInfoArea" size="+1"></font></div></td>
</tr>
</table>
</body>
</html>
<script language="JavaScript">
var ForwardShow=true;
function ShowPromptInfo()
{
var TempStr=document.all.ShowInfoArea.innerText;
if (ForwardShow==true)
{
if (TempStr.length>4) ForwardShow=false;
document.all.ShowInfoArea.innerText=TempStr+'.';
}
else
{
if (TempStr.length==1) ForwardShow=true;
document.all.ShowInfoArea.innerText=TempStr.substr(0,TempStr.length-1);
}
}
window.setInterval('ShowPromptInfo()',300)
</script>
<%
End Sub
Dim SiteID,CollectCount,SysClassID,SiteUrl,demoid
Dim ListHeadSetting,ListFootSetting
Dim LinkHeadSetting,LinkFootSetting
Dim PagebodyHeadSetting,PagebodyFootSetting
Dim PageTitleHeadSetting,PageTitleFootSetting
Dim SysTemplet,SiteName '新闻模板
Dim ObjURL,NewsLinkStr,OldNewsLinkStr
Dim ErrorInfoStr '错误信息
Dim AvailCollectFlag
Dim SessionTitleAndLinkList,ResponseInfoStr,SessionResponseInfoStr
ErrorInfoStr = ""
AvailCollectFlag = True
SiteID = Request("SiteID")
CollectCount = Request("CollectCount")
OldNewsLinkStr = Request("Link")
if CollectCount = "" then
CollectCount = 100
else
CollectCount = CLng(CollectCount)
end if
if SiteID <> "" then
if Session("SessionCollectIndex") = -1 then
Session("SessionCollectIndex") = Session("SessionCollectIndex") + 1
InitialFun "<meta http-equiv=""refresh"" content=""1;url=Collecting.asp?SiteID=" & SiteID & "&Link=" & OldNewsLinkStr & "&CollectCount=" & CollectCount & "&AllowNewsSameName=" & Request("AllowNewsSameName") & "&OnlyText=" & Request("OnlyText") & "&SaveRemoteImage=" & Request("SaveRemoteImage") & "&ServerScriptTimeout=" & Request("ServerScriptTimeout") & """>"
else
GetCollectPara
if ErrorInfoStr <> "" then
ShowInfo ErrorInfoStr
Response.End
end if
AvailCollectFlag = GetNewsPageContent
if AvailCollectFlag =True then
ResponseInfoStr = Session("SessionResponseInfoStr") & "<script language=""javascript"">parent.SetProcessBar('" & CollectCount & "','" & Session("SessionCollectIndex") & "');</script><meta http-equiv=""refresh"" content=""1;url=Collecting.asp?SiteID=" & SiteID & "&Link=" & OldNewsLinkStr & "&CollectCount=" & CollectCount & "&AllowNewsSameName=" & Request("AllowNewsSameName") & "&OnlyText=" & Request("OnlyText") & "&SaveRemoteImage=" & Request("SaveRemoteImage") & "&ServerScriptTimeout=" & Request("ServerScriptTimeout") & """>"
ShowInfo ResponseInfoStr
else
if ErrorInfoStr = "" then
ShowInfo "<tr><td><font color=""#FF0000"">结果</font></td><td colspan=""2""><font color=""#FF0000"">共读取<strong>" & Session("SessionCollectIndex") & "</strong>条新闻,采集成功<strong>" & Session("SessionAlreadySaveNum") & "</strong>条新闻,<strong><font color=""#FF0000"">" & Session("SessionCollectIndex") - Session("SessionAlreadySaveNum") & "</font></strong>条新闻发生错误,保存" &Session("SessionSaveRemotePicNumber") & "张远程图片</font></td></tr>" & Session("SessionResponseInfoStr") & "<script language=""javascript"">parent.document.all.BtnCollect.disabled=false;parent.InitialProcessBar();</script>"
else
ShowInfo ErrorInfoStr & "<script language=""javascript"">parent.document.all.BtnCollect.disabled=false;parent.InitialProcessBar();</script>"
end if
Session("SessionCollectIndex") = -1
Session("SessionAlreadySaveNum") = 0
Session("SessionSaveRemotePicNumber") = 0
Session("SessionResponseInfoStr") = ""
Session("SessionTitleAndLinkList") = ""
end if
Set CollectConn = Nothing
end if
end if
Function GetNewsPageContent()
Dim NewsPageStr,TitleStr,ContentStr
Dim ResponseAllStr,NewsListStr
if Session("SessionCollectIndex") >= CollectCount then '采集结束
GetNewsPageContent = False
Exit Function
end if
if Session("SessionTitleAndLinkList") = "" then
ResponseAllStr = GetPageContent(FormatUrl(ObjURL,SiteUrl))
if ResponseAllStr = False then
GetNewsPageContent = False
ErrorInfoStr = "<td>读取采集目标页失败</td>"
Exit Function
end if
NewsListStr = GetContent(ResponseAllStr,ListHeadSetting,ListFootSetting,0)
Session("SessionTitleAndLinkList") = NewsListStr
else
NewsListStr = Session("SessionTitleAndLinkList")
end if
if OldNewsLinkStr <> "" then
NewsListStr = Mid(NewsListStr,InStr(NewsListStr,OldNewsLinkStr)+Len(OldNewsLinkStr))
end if
NewsLinkStr = GetContent(NewsListStr,LinkHeadSetting,LinkFootSetting,0)
if NewsLinkStr = "" then
GetNewsPageContent = False
Exit Function
end if
OldNewsLinkStr = Replace(Replace(NewsLinkStr,"""",""),"'","")
NewsLinkStr = FormatUrl(NewsLinkStr,SiteUrl)
NewsPageStr = GetPageContent(NewsLinkStr)
Session("SessionCollectIndex") = Session("SessionCollectIndex") + 1
if NewsPageStr <> False then
TitleStr = LoseHtml(Replace(GetContent(NewsPageStr,PageTitleHeadSetting,PageTitleFootSetting,0),"'",""))
Dim RsCheckNewsObj
Set RsCheckNewsObj = CollectConn.Execute("Select * from News where Title='" & TitleStr & "'")
if Not RsCheckNewsObj.Eof then
if Request("AllowNewsSameName") <> "1" then
GetNewsPageContent = True
Session("SessionCollectIndex") = Session("SessionCollectIndex") - 1
if RsCheckNewsObj("History") = True then
Session("SessionResponseInfoStr") = "<tr><td nowrap><font color=""#FF0000"">历史数据</font></td><td nowrap>" & TitleStr & "</td><td nowrap>" & NewsLinkStr & "</td></tr>" & Session("SessionResponseInfoStr")
else
Session("SessionResponseInfoStr") = "<tr><td nowrap><font color=""#FF0000"">等待审核</font></td><td nowrap>" & TitleStr & "</td><td nowrap>" & NewsLinkStr & "</td></tr>" & Session("SessionResponseInfoStr")
end if
Set RsCheckNewsObj = Nothing
Exit Function
end if
end if
Set RsCheckNewsObj = Nothing
ContentStr = GetContent(NewsPageStr,PagebodyHeadSetting,PagebodyFootSetting,0)
ContentStr = ReplaceKeyWords(ContentStr)
if Not (TitleStr = "" Or NewsLinkStr = "" Or ContentStr = "") then
if Request("OnlyText") = "1" then
ContentStr = LoseHtml(ContentStr)
ContentStr = Replace(ContentStr," ","")
else
if Request("SaveRemoteImage") = "1" then
Session("SessionSaveRemotePicNumber") = Session("SessionSaveRemotePicNumber") + 1
ContentStr = ReplaceRemoteUrl(ContentStr,SaveImagePath)
end if
end if
SaveCollectContent TitleStr,NewsLinkStr,ContentStr,SysClassID
Session("SessionAlreadySaveNum") = Session("SessionAlreadySaveNum") + 1
Session("SessionResponseInfoStr") = "<tr><td nowrap><font color=""#FF0000"">NO:" & Session("SessionCollectIndex") & "</font></td><td nowrap><a target=""_blank"" href=""" & NewsLinkStr & """>" & TitleStr & "</a><td nowrap>" & NewsLinkStr & "</td></td></tr>" & Session("SessionResponseInfoStr")
else
if TitleStr = "" then
Session("SessionResponseInfoStr") = "<tr><td nowrap><font color=""#FF0000"">NO:" & Session("SessionCollectIndex") & "</font></td><td nowrap>标题为空,没有保存</td><td nowrap>" & NewsLinkStr & "</td></tr>" & Session("SessionResponseInfoStr")
elseif NewsLinkStr = "" then
Session("SessionResponseInfoStr") = "<tr><td nowrap><font color=""#FF0000"">NO:" & Session("SessionCollectIndex") & "</font></td><td nowrap>链接为空,没有保存</td><td nowrap>" & NewsLinkStr & "</td></tr>" & Session("SessionResponseInfoStr")
elseif ContentStr = "" then
Session("SessionResponseInfoStr") = "<tr><td nowrap><font color=""#FF0000"">NO:" & Session("SessionCollectIndex") & "</font></td><td nowrap>新闻内容为空,没有保存</td><td nowrap>" & NewsLinkStr & "</td></tr>" & Session("SessionResponseInfoStr")
end if
end if
else
Session("SessionResponseInfoStr") = "<tr><td nowrap><font color=""#FF0000"">NO:" & Session("SessionCollectIndex") & "</font></td><td nowrap>读取新闻目标页出错</td><td nowrap>" & NewsLinkStr & "</td></tr>" & Session("SessionResponseInfoStr")
end if
GetNewsPageContent = True
End Function
Function GetCollectPara()
Dim RsSiteObj,Sql
if SiteID = "" then
ErrorInfoStr = "<td>没有采集站点,请重试</td>"
Exit Function
end if
Sql = "Select * from Site where ID=" & SiteID
Set RsSiteObj = CollectConn.Execute(Sql)
if RsSiteObj.Eof then
ErrorInfoStr = "<td>没有采集站点,请重试</td>"
else
ListHeadSetting = RsSiteObj("ListHeadSetting")
ListFootSetting = RsSiteObj("ListFootSetting")
LinkHeadSetting = RsSiteObj("LinkHeadSetting")
LinkFootSetting = RsSiteObj("LinkFootSetting")
PagebodyHeadSetting = RsSiteObj("PagebodyHeadSetting")
PagebodyFootSetting = RsSiteObj("PagebodyFootSetting")
PageTitleHeadSetting = RsSiteObj("PageTitleHeadSetting")
PageTitleFootSetting = RsSiteObj("PageTitleFootSetting")
ObjURL = RsSiteObj("objURL")
SysClassID = RsSiteObj("SysClass")
SiteUrl = RsSiteObj("SiteUrl")
SysTemplet = RsSiteObj("SysTemplet")
SiteName = RsSiteObj("SiteName")
demoid = RsSiteObj("demoid")
end if
Set RsSiteObj = Nothing
End Function
Function SaveCollectContent(Title,Links,Content,ClassID)
Dim RsNewsObj,RsTempObj,BoardnameObj,ClassnameObj
Set RsNewsObj = Server.CreateObject("Adodb.RecordSet")
RsNewsObj.Open "Select * from News where 1=0",CollectConn,3,3
RsNewsObj.AddNew
RsNewsObj("Title") = LoseHtml(Title)
RsNewsObj("Links") = Links
RsNewsObj("Content") = Content
RsNewsObj("ContentLength") = Len(Content)
RsNewsObj("AddDate") = Now
RsNewsObj("ImagesCount") = 0
RsNewsObj("boardid") = ClassID
set BoardnameObj=conn.execute("select boardname,classid from [board] where boardid="&ClassID&"")
RsNewsObj("boardname") = BoardnameObj("boardname")
RsNewsObj("classid") = BoardnameObj("classid")
set ClassnameObj=conn.execute("select classname,classid from [class] where classid="&BoardnameObj("classid")&"")
RsNewsObj("classname") = ClassnameObj("classname")
RsNewsObj("SysTemplet") = SysTemplet
RsNewsObj("SiteName") = SiteName
RsNewsObj("SiteID") = SiteID
RsNewsObj("demoid") = demoid
RsNewsObj.UpDate
RsNewsObj.Close
Set RsNewsObj = Nothing
End Function
Function ReplaceKeyWords(Content)
Dim RsRuleObj,HeadSeting,FootSeting,ReContent,regEx
Set RsRuleObj = CollectConn.Execute("Select * from Rule where SiteID=" & SiteID)
do while Not RsRuleObj.Eof
HeadSeting = RsRuleObj("HeadSeting")
FootSeting = RsRuleObj("FootSeting")
ReContent = RsRuleObj("ReContent")
if IsNull(FootSeting) or FootSeting = "" then
if HeadSeting <> "" then
Content = Replace(Content,HeadSeting,ReContent)
end if
end if
if Not IsNull(FootSeting) and FootSeting <> "" and Not IsNull(HeadSeting) and HeadSeting <> "" then
Set regEx = New RegExp
regEx.Pattern = HeadSeting & "[^\0]*" & FootSeting
regEx.IgnoreCase = False
regEx.Global = True
'Dim Matches,Match,HaveTF,ShowStr
'HaveTF = False
'Set Matches = regEx.Execute(Content)
'For Each Match in Matches
'ShowStr = ShowStr & Match.Value & "<br>"
'HaveTF = True
'Next
'if HaveTF = True then
'Response.Write(ShowStr)
'Response.End
'end if
if IsNull(ReContent) then
Content = regEx.Replace(Content,"")
else
Content = regEx.Replace(Content,ReContent)
end if
Set regEx = Nothing
end if
RsRuleObj.MoveNext
loop
Set RsRuleObj = Nothing
ReplaceKeyWords = Content
End Function
Sub ShowInfo(InfoStr)
%>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=gb2312">
<title>正在采集</title>
</head>
<link href="Inc/Collect.css" rel="stylesheet">
<body topmargin="2" leftmargin="2" oncontextmenu="//return false;">
<table width="100%" border="0" cellpadding="0" cellspacing="0">
<% = InfoStr %>
</table>
</body>
</html>
<%
End Sub
%>
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -