⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 processlist.asp

📁 新闻采集系统1.0.2 For 4.03(2005.05.15更新) 【使用环境】 本系统是基于动易4.03免费版的基础上的新闻采集系统
💻 ASP
字号:
<%@LANGUAGE="VBSCRIPT" CODEPAGE="936"%>
<!-- #include file="Conn.asp" -->
<!-- #include file="Function.asp" -->
<!-- #include file="Config.asp" -->
<%
'===============请保留以下版权信息==================
'程序名称:Goaler 采集
'程序版本:Ver 0.01
'运行环境:ASP+ACCESS+XMLHTTP
'采集目标:http://edu.cnzz.cn
'程序编写:goaler
'个人主页:http://www.goalercn.com
'	       http://blog.goalercn.com
'===============请保留以上版权信息==================
Dim BID,SID,CategoryName
BID = CheckStr(Request.QueryString("BID"),"i")
SID = CheckStr(Request.QueryString("SID"),"i")
CategoryName = Request.QueryString("CategoryName")
IF(SID=False OR SID=False)THEN
	Response.Write("目标参数不正确")
	Response.End
END IF 

IF(CreatHTML = True)THEN
	Dim FSO,MyFSO,strTemplate,artID,artBName
	IF(Application("strTemplate")="")THEN Application("strTemplate") = ReadTemplate(TemplatePath)
	artBName = Conn.Execute("SELECT BName FROM artBClass WHERE BID="&BID)(0)
END IF 

Dim SuccessNum,NoActNum,FailNum,TotalNum
SuccessNum = CheckStr(Request.QueryString("SuccessNum"),"i")
NoActNum = CheckStr(Request.QueryString("NoActNum"),"i")
FailNum = CheckStr(Request.QueryString("FailNum"),"i")
TotalNum = CheckStr(Request.QueryString("TotalNum"),"i")
IF(SuccessNum=False)THEN SuccessNum = 0
IF(NoActNum=False)THEN NoActNum = 0
IF(FailNum=False)THEN FailNum = 0
IF(TotalNum=False)THEN TotalNum = 0

Dim ReturnMessagePosition,ReturnMessage
Dim Action,CurrPage,StartPage,EndPage,CurrURL,i,RS
Action = Request.QueryString("Action")
CurrPage = CheckStr(Request.QueryString("CurrPage"),"i")
StartPage = CheckStr(Request.QueryString("StartPage"),"i")
EndPage = CheckStr(Request.QueryString("EndPage"),"i")
IF(StartPage<=0 OR StartPage=False)THEN StartPage = 1
IF(EndPage<=0 OR EndPage=False)THEN EndPage = 1
IF(CurrPage<=0 OR CurrPage=False)THEN CurrPage = EndPage
IF(EndPage<StartPage)THEN EndPage = StartPage
CurrURL = MainURL&"list.aspx?id="&SID&"&page="&CurrPage
%>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=gb2312">
<title>采集</title>
<link href="style.css" rel="stylesheet" type="text/css">
</head>
<body>

<table width="100%" cellpadding="3" cellspacing="1" bgcolor="#cccccc" border="0">
<tr bgcolor="#ffffff">
	<td>
	<form method="get">
	起始页:<input type="text" name="StartPage" value="<%=StartPage%>" onfocus="select()" size="3">
	终止页:<input type="text" name="EndPage" value="<%=EndPage%>" onfocus="select()" size="3">
	&nbsp;&nbsp;&nbsp;<input type="submit" value="开始采集">
	<input type="hidden" name="BID" value="<%=BID%>">
	<input type="hidden" name="SID" value="<%=SID%>">
	<input type="hidden" name="CategoryName" value="<%=CategoryName%>">
	<input type="hidden" name="Action" value="1">
	<font color="#ff0000">请注意页码范围,实际采集是从终止页向起始页开始采集(倒序)</font>
	</form>
	</td>
</tr>
<tr bgcolor="#ffffff">
	<td>当前:<font color="#ff0000"><%=CategoryName%></font> → 第<font color="#ff0000"><%=CurrPage%></font>页,URL=<a href="<%=CurrURL%>" target="_blank"><%=CurrURL%></a></td>
</tr>
<tr bgcolor="#ffffff">
	<td>状态:<span id="curAction">等待中。。。</span></td>
</tr>
<tr bgcolor="#ffffff">
	<td>结果:<span id="Result">等待中。。。</span></td>
</tr>
</table>
<%
IF(Action="")THEN
	Response.Write("<div align=center><div class=""info"">请设定开始采集的页码,然后点击开始采集<br><br>开始采集前请注意相看终止页码大小</div></div>")
ELSE
	ReturnMessagePosition = "Result"
	ReturnMessage = "正在抓取数据,请稍等。。。"
	call Message(ReturnMessagePosition,ReturnMessage)
	Dim L_Content,L_Array_ID
	L_Content = getHTTPPage(CurrURL)

	IF(Err<>0)THEN
		Response.Write("<div align=center><div class=""info"">抓取列表页出现错误或已经采集完所有页面,请检查参数</div></div>")
	ELSE
		L_Content = RegExpText(L_Content,"<TABLE cellSpacing=""0"" cellPadding=""0"" width=""450"" align=""center"" border=""0"">","<table width=""500"" height=""40"" border=""0"" align=""center"" cellpadding=""0""",0)

		L_Array_ID = RegExpText(L_Content,"&nbsp; <a href='/","' target=_blank>",1)
		L_Array_ID = Split(L_Array_ID,",")
		
		i = Ubound(L_Array_ID)-1
		
		Dim V_PageContent,V_TempContent
		Dim V_Title,V_Author,V_From,V_Content
		Dim V_ID,V_UpdateTime
		While(i >= 0)
			'Response.Write(L_Array_ID(i) & "<br>")
			V_PageContent = getHTTPPage(MainURL&L_Array_ID(i))
			IF(NOT Response.IsClientConnected)THEN Response.End	'检查客户端是否连接,若非则马上中止程序

			IF(Err<>0)THEN
				Response.Write("<div class=""result"">抓取内容页面出现错误,跳过</div>")
			ELSE
				V_Title = RegExpText(V_PageContent,"<span id=""Title"">","</span>",0)
				V_TempContent = RegExpText(V_PageContent,"<span id=""lblTime"">","</td>",0)
				V_Author = RegExpText(V_TempContent,"<a id=""lnkAuthor"" target=""_blank"">","</a>",0)
				V_From = RegExpText(V_TempContent,"<a id=""lnkSource"" target=""_blank"">","</a>",0)
				V_Content = RegExpText(V_PageContent,"<td height=""400"" colspan=""2""><span id=""Content"">","</span><br>"&VBCrlf&"								</td>",0)
				IF(SaveToLocal = True)THEN
					V_Content = ReplaceRemoteUrl(V_Content, SavePath, SaveFileTypeExt)
				ELSE
					V_Content = ReplaceRemoteImage(V_Content)
				END IF 
				IF(Err<>0)THEN
					ReturnMessagePosition = "curAction"
					ReturnMessage = L_Array_ID(i)&" 采集失败,跳过"
					call Message(ReturnMessagePosition,ReturnMessage)
					FailNum = FailNum + 1
					Response.Write("<div class=""result"">"&L_Array_ID(i)&" → "&V_Title&" → <font color=""#ff0000"">采集失败,跳过</font></div>")
				ELSE
					SET RS=Conn.Execute("SELECT artTitle FROM artList WHERE artTitle='"&CheckStr(V_Title,"s")&"' AND artBID="&BID&" AND artSID="&SID)
					IF(RS.EOF)THEN
						V_UpdateTime = Now()
						Conn.Execute("INSERT INTO artList(artBID,artSID,artTitle,artAuthor,artFrom,artContent,artTime) VALUES("&BID&","&SID&",'"&CheckStr(V_Title,"s")&"','"&CheckStr(V_Author,"s")&"','"&CheckStr(V_From,"s")&"','"&CheckStr(V_Content,"s")&"','"&V_UpdateTime&"')")
						
						ReturnMessagePosition = "curAction"
						ReturnMessage = L_Array_ID(i)&" 采集成功,已入库"
						call Message(ReturnMessagePosition,ReturnMessage)
						SuccessNum = SuccessNum + 1
						Response.Write("<div class=""result""><a href="""&MainURL&L_Array_ID(i)&""" target=""_blank"">"&L_Array_ID(i)&"</a> → "&V_Title&" → <font color=""#339933"">已入库</font></div>")
						IF(CreatHTML = True)THEN
							artID = Conn.Execute("SELECT TOP 1 Id FROM artList ORDER BY Id DESC")(0)
							strTemplate = Application("strTemplate")
							strTemplate = Replace(strTemplate,"{artBID}",BID)
							strTemplate = Replace(strTemplate,"{artSID}",SID)
							strTemplate = Replace(strTemplate,"{artBName}",artBName)
							strTemplate = Replace(strTemplate,"{artSName}",CategoryName)
							strTemplate = Replace(strTemplate,"{artTitle}",V_Title)
							strTemplate = Replace(strTemplate,"{artAuthor}",V_Author)
							strTemplate = Replace(strTemplate,"{artFrom}",V_From)
							strTemplate = Replace(strTemplate,"{artUpdateTime}",V_UpdateTime)
							strTemplate = Replace(strTemplate,"{artContent}",V_Content)
							Call makeHTML(strTemplate,artID,HtmlFilePath)
							strTemplate = ""
						END IF 
					ELSE
						ReturnMessagePosition = "curAction"
						ReturnMessage = L_Array_ID(i)&" 已存在,跳过"
						call Message(ReturnMessagePosition,ReturnMessage)
						NoActNum = NoActNum + 1
						Response.Write("<div class=""result""><a href="""&MainURL&L_Array_ID(i)&""" target=""_blank"">"&L_Array_ID(i)&"</a> → "&V_Title&" → <font color=""#A53127"">已存在,跳过</font></div>")
					END IF 
					RS.CLOSE
					SET RS=NOTHING
				END IF 
				
			END IF 
			
			i=i-1
			Response.Flush()
		Wend
	END IF
	TotalNum = TotalNum + Ubound(L_Array_ID)
	IF(CurrPage>StartPage)THEN
		Response.Write("<script language=javascript>window.location='ProcessList.asp?BID="&BID&"&SID="&SID&"&StartPage="&StartPage&"&EndPage="&EndPage&"&CategoryName="&CategoryName&"&Action=1&SuccessNum="&SuccessNum&"&NoActNum="&NoActNum&"&FailNum="&FailNum&"&TotalNum="&TotalNum&"&CurrPage="&CurrPage-1&"';</script>")
	ELSE
		ReturnMessagePosition = "curAction"
		ReturnMessage = "采集完毕"
		call Message(ReturnMessagePosition,ReturnMessage)

		ReturnMessagePosition = "Result"
		ReturnMessage = "采集完毕,共计<font color='#ff0000'>"&TotalNum&"</font>个,入库<font color='#ff0000'>"&SuccessNum&"</font>个,错误<font color='#ff0000'>"&FailNum&"</font>个,跳过<font color='#ff0000'>"&NoActNum&"</font>个"
		call Message(ReturnMessagePosition,ReturnMessage)
	END IF 
END IF 
%>
</body>
</html>

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -