📄 admin_itemcollecnews.asp
字号:
<%@language=vbscript codepage=936 %>
<%
'********************************************************
'程序名称:最强新闻采集系统
'版权所有:火红的五月(qq:88389917)
'程序制作:火红的五月
%>
<%
option explicit
Response.Buffer = True
Server.ScriptTimeOut=999
Response.Expires = -1
Response.ExpiresAbsolute = Now() - 1
Response.Expires = 0
Response.CacheControl = "no-cache"
%>
<!--#include file="inc/conn.asp"-->
<!--#include file="inc/function.asp"-->
<!--#include file="Admin_ChkPurview.asp"-->
<!--#include file="inc/ubbcode.asp"-->
<!--#include file="inc/clsCache.asp"-->
<%
Dim ItemNum,NewsNum,PaingNum,NewsSuccesNum,NewsFalseNum,NewsNumAll
Dim Rs,Sql,RsItem,SqlItem,FoundErr,ErrMsg,ItemEnd,NewsEnd
'项目变量
Dim ItemID,ItemName,ChannelID,strChannelDir,ClassID,SpecialID
Dim TsString,ToString,CsString,CoString,DateType,DsString,DoString,AuthorType,AsString,AoString,AuthorStr,CopyFromType,FsString,FoString
Dim CopyFromStr,KeyType,KsString,KoString,KeyStr,NewsPaingType,NPsString,NpoString,NewsPaingStr,NewsPaingHtml
Dim PaginationType,MaxCharPerPage,ReadLevel,Stars,ReadPoint,Hits,UpDateType,UpDateTime,IncludePicYn,DefaultPicYn,OnTop,Elite,Hot
Dim SkinID,TemplateID,Script_Iframe,Script_Object,Script_Script,Script_Div,Script_Class,Script_Span,Script_Img,Script_Font,Script_A,Script_Html,CollecNewsNum,Passed,SaveFiles,CollecOrder,LinkUrlYn,InputerType,Inputer,EditorType,Editor,ShowCommentLink
'过滤变量
Dim Arr_Filters,FilterStr,Filteri
'采集相关的变量
Dim ContentTemp,NewsPaingNext,NewsPaingNextCode,Arr_i,NewsUrl,NewsCode
'文章保存变量
Dim ArticleID,Title,Content,Author,CopyFrom,Key,IncludePic,UploadFiles,DefaultPicUrl
'其它变量
Dim Arr_Item,Arr_News,CollecTest,Content_View
'历史记录
Dim Arr_Histrolys,His_Title,His_CollecDate,His_Result,His_Repeat,His_i
'执行时间变量
Dim StartTime,OverTime
'图片统计
Dim Arr_Images,ImagesNum,ImagesNumAll
Dim strInstallDir,CacheTemp
strInstallDir=trim(request.ServerVariables("SCRIPT_NAME"))
strInstallDir=left(strInstallDir,instrrev(lcase(strInstallDir),"/")-1)
strInstallDir=left(strInstallDir,instrrev(lcase(strInstallDir),"/"))
CacheTemp=Lcase(trim(request.ServerVariables("SCRIPT_NAME")))
CacheTemp=left(CacheTemp,instrrev(CacheTemp,"/"))
CacheTemp=replace(CacheTemp,"\","_")
CacheTemp=replace(CacheTemp,"/","_")
CacheTemp="ansir" & CacheTemp
ItemNum=Clng(Trim(Request("ItemNum")))
NewsNum=Clng(Trim(Request("NewsNum")))
NewsSuccesNum=Clng(Trim(Request("NewsSuccesNum")))
NewsFalseNum=Clng(Trim(Request("NewsFalseNum")))
ImagesNumAll=Clng(Trim(Request("ImagesNumAll")))
NewsPaingNext=Trim(Request("NewsPaingNext"))
ArticleID=Trim(Request("ArticleID"))
NewsNumAll=Trim(Request("NewsNumAll"))
If ArticleID="" Then
ArticleID=0
Else
ArticleID=Clng(ArticleID)
End If
If NewsNumAll="" Then
NewsNumAll=0
Else
NewsNumAll=Clng(NewsNumAll)
End If
FoundErr=False
ItemEnd=False
NewsEnd=False
Call SetCache
If ItemEnd<>True Then
If (ItemNum-1)>Ubound(Arr_Item,2) then
ItemEnd=True
Else
Call SetItems()
End If
If ItemEnd<>True Then
If NewsNum=1 Then
Call SetNews()
Else
Call GetNews()
End if
If NewsEnd<>True Then
If (NewsNum-1)>Ubound(Arr_News,2) Then
NewsEnd=True
Else
NewsUrl=Arr_News(0,NewsNum-1)
End If
End If
End If
End If
If ItemEnd=True Then
ErrMsg="<br>采集任务全部完成"
ErrMsg=ErrMsg & "<br>全部新闻:" & NewsNumAll & " 条,成功采集: " & NewsSuccesNum & " 条新闻,失败: " & NewsFalseNum & " 条,图片: " & ImagesNumAll & " 张"
Call DelCache()
Else
If NewsEnd=True Then
ItemNum=ItemNum+1
NewsNum=1
Call SetHistroly()
ErrMsg="<br>" & ItemName & " 项目所有列表采集完成,正在整理数据请稍后..."
ErrMsg=ErrMsg & "<meta http-equiv=""refresh"" content=""3;url=Admin_ItemCollecNews.asp?ItemNum=" & ItemNum & "&NewsNum=" & NewsNum & "&NewsSuccesNum=" & NewsSuccesNum & "&NewsFalseNum=" & NewsFalseNum & "&ImagesNumAll=" & ImagesNumAll & "&NewsNumAll=" & NewsNumAll & """>"
End If
End If
Call TopItem()
Response.Flush
If ItemEnd=True Or NewsEnd=True Then
Call WriteSucced(ErrMsg)
Else
FoundErr=False
ErrMsg=""
Call TopItem2()
Response.Flush
Call StartCollection()
Call FootItem2()
End If
Call FootItem()
Response.Flush()
'关闭数据库链接
Call CloseConn()
Call CloseConnItem()
%>
<%
'==================================================
'过程名:StartCollection
'作 用:开始采集
'参 数:无
'==================================================
Sub StartCollection()
'变量初始化
UploadFiles=""
DefaultPicUrl=""
IncludePic=0
ImagesNum=0
NewsCode=""
FoundErr=False
ErrMsg=""
His_Repeat=False
Title=""
PaingNum=1
'………………………………………………
If Response.IsClientConnected Then
Response.Flush
Else
Response.End
End If
'………………………………………………
If CollecTest=False Then
His_Repeat=CheckRepeat(NewsUrl)
Else
His_Repeat=False
End If
If His_Repeat=True Then
FoundErr=True
End If
If FoundErr<>True Then
NewsCode=GetHttpPage(NewsUrl)
If NewsCode="$False$" Then
FoundErr=True
ErrMsg=ErrMsg & "<br>在获取:" & NewsUrl & "新闻源码时发生错误!"
Title="分析源码错误"
End If
End If
If FoundErr<>True Then
Title=GetBody(NewsCode,TsString,ToString,False,False)
If Title="$False$" or Title="" then
FoundErr=True
ErrMsg=ErrMsg & "<br>在分析:" & NewsUrl & "的新闻标题时发生错误"
Title="<br>标题分析错误"
End If
If FoundErr<>True Then
Content=GetBody(NewsCode,CsString,CoString,False,False)
If Content="$False$" or Content="" Then
FoundErr=True
ErrMsg=ErrMsg & "<br>在分析:" & NewsUrl & "的新闻正文时发生错误"
Title=Title & "<br>正文分析错误"
End If
End If
End If
If FoundErr<>True Then
'新闻分页
If NewsPaingType=1 Then
NewsPaingNext=GetPaing(NewsCode,NPsString,NPoString,False,False)
NewsPaingNext=FpHtmlEnCode(NewsPaingNext)
Do While NewsPaingNext<>"$False$" And NewsPaingNext<>""
If NewsPaingStr="" or IsNull(NewsPaingStr)=True Then
NewsPaingNext=DefiniteUrl(NewsPaingNext,NewsUrl)
Else
NewsPaingNext=Replace(NewsPaingStr,"{$ID}",NewsPaingNext)
End If
If NewsPaingNext="" or NewsPaingNext="$False$" Then
Exit Do
End If
NewsPaingNextCode=GetHttpPage(NewsPaingNext)
ContentTemp=GetBody(NewsPaingNextCode,CsString,CoString,False,False)
If ContentTemp="$False$" Then
Exit Do
Else
PaingNum=PaingNum+1
Content=Content & NewsPaingHtml & ContentTemp
NewsPaingNext=GetPaing(NewsPaingNextCode,NPsString,NPoString,False,False)
NewsPaingNext=FpHtmlEnCode(NewsPaingNext)
End If
Loop
End If
'过滤
Call Filters()
Title=FpHtmlEnCode(Title)
Call FilterScript()
Content=Ubbcode(Content)
End If
'分开写(太长了照顾不过来)
If FoundErr<>True Then
'时间
If UpDateType=0 Then
UpDateTime=Now()
ElseIf UpDateType=1 Then
If DateType=0 then
UpDateTime=Now()
Else
UpDateTime=GetBody(NewsCode,DsString,DoString,False,False)
UpDateTime=Lcase(FpHtmlEncode(UpDateTime))
UpDateTime=Trim(Replace(UpDateTime," "," "))
If IsDate(UpDateTime)=True Then
UpDateTime=CDate(UpDateTime)
Else
UpDateTime=Now()
End If
End If
ElseIf UpDateType=2 Then
Else
UpDateTime=Now()
End If
'作者
If AuthorType=1 Then
Author=GetBody(NewsCode,AsString,AoString,False,False)
ElseIf AuthorType=2 Then
Author=AuthorStr
Else
Author="佚名"
End If
Author=FpHtmlEncode(Author)
If Author="" or Author="$False$" then
Author="佚名"
Else
If Len(Author)>255 then
Author=Left(Author,255)
End If
End If
'来源
If CopyFromType=1 Then
CopyFrom=GetBody(NewsCode,FsString,FoString,False,False)
ElseIf CopyFromType=2 Then
CopyFrom=CopyFromStr
Else
CopyFrom="不详"
End If
CopyFrom=FpHtmlEncode(CopyFrom)
If CopyFrom="" or CopyFrom="$False$" Then
CopyFrom="不详"
Else
If Len(CopyFrom)>255 Then
CopyFrom=Left(CopyFrom,255)
End If
End If
'关键字
If KeyType=0 Then
Key=Title
Key=CreateKeyWord(Key,2)
ElseIf KeyType=1 Then
Key=GetBody(NewsCode,KsString,KoString,False,False)
Key=FpHtmlEncode(Key)
Key=CreateKeyWord(Key)
ElseIf KeyType=2 Then
Key=KeyStr
Key=FpHtmlEncode(Key)
If Len(Key)>253 Then
Key="|" & Left(Key,253) & "|"
Else
Key="|" & Key & "|"
End If
End If
If Key="" or Key="$False$" Then
Key="|南国都市|新闻|"
End If
End If
If FoundErr<>True Then
'转换图片相对地址为绝对地址/保存
If CollecTest=False And SaveFiles=True then
Content=ReplaceSaveRemoteFile(Content,strInstallDir,strChannelDir,True,NewsUrl)
Else
Content=ReplaceSaveRemoteFile(Content,strInstallDir,strChannelDir,False,NewsUrl)
End If
'转换swf文件地址
Content=ReplaceSwfFile(Content,NewsUrl)
'图片统计、文章图片属性设置
If UploadFiles<>"" Then
If Instr(UploadFiles,"|")>0 Then
Arr_Images=Split(UploadFiles,"|")
ImagesNum=Ubound(Arr_Images)+1
DefaultPicUrl=Arr_Images(0)
Else
ImagesNum=1
DefaultPicUrl=UploadFiles
End If
If DefaultPicYn=False then
DefaultPicUrl=""
End If
If IncludePicYn=True Then
IncludePic=-1
Else
IncludePic=0
End If
If SaveFiles<>True Then
UploadFiles=""
End If
Else
ImagesNum=0
DefaultPicUrl=""
IncludePic=0
End If
ImagesNumAll=ImagesNumAll+ImagesNum
End If
If FoundErr<>True Then
If CollecTest=False Then
Call SaveArticle
SqlItem="INSERT INTO Histroly(ItemID,ChannelID,ClassID,SpecialID,ArticleID,Title,CollecDate,NewsUrl,Result) VALUES ('" & ItemID & "','" & ChannelID & "','" & ClassID & "','" & SpecialID & "','" & ArticleID & "','" & Title & "','" & Now() & "','" & NewsUrl & "',True)"
ConnItem.Execute(SqlItem)
Content=Replace(Content,"[InstallDir_ChannelDir]",strInstallDir & strChannelDir & "/")
End If
NewsSuccesNum=NewsSuccesNum+1
ErrMsg=ErrMsg & "No:<font color=red>" & NewsSuccesNum+NewsFalseNum & "</font><br>"
ErrMsg=ErrMsg & "新闻标题:"
ErrMsg=ErrMsg & "<font color=red>" & Title & "</font><br>"
ErrMsg=ErrMsg & "更新时间:" & UpDateTime & "<br>"
ErrMsg=ErrMsg & "新闻作者:" & Author & "<br>"
ErrMsg=ErrMsg & "新闻来源:" & CopyFrom & "<br>"
ErrMsg=ErrMsg & "采集页面:<a href=" & NewsUrl & " target=_blank>" & NewsUrl & "</a><br>"
ErrMsg=ErrMsg & "其它信息:分页--" & PaingNum & " 页,图片--" & ImagesNum & " 张<br>"
ErrMsg=ErrMsg & "正文预览:"
If Content_View=True Then
ErrMsg=ErrMsg & "<br>" & Content
Else
ErrMsg=ErrMsg & "您没有启用正文预览功能"
End If
ErrMsg=ErrMsg & "<br><br>关 键 字:" & Key & ""
Else
NewsFalseNum=NewsFalseNum+1
If His_Repeat=True Then
ErrMsg=ErrMsg & "No:<font color=red>" & NewsSuccesNum+NewsFalseNum & "</font><br>"
ErrMsg=ErrMsg & "目标新闻:<font color=red>"
If His_Result=True Then
ErrMsg=ErrMsg & His_Title
Else
ErrMsg=ErrMsg & NewsUrl
End If
ErrMsg=ErrMsg & "</font> 的记录已存在,不予采集。<br>"
ErrMsg=ErrMsg & "采集时间:" & His_CollecDate & "<br>"
ErrMsg=ErrMsg & "新闻来源:<a href='" & NewsUrl & "' target=_blank>"&NewsUrl&"</a><br>"
ErrMsg=ErrMsg & "采集结果:"
If His_Result=False Then
ErrMsg=ErrMsg & "失败"
ErrMsg=ErrMsg & "<br>失败原因:" & Title
Else
ErrMsg=ErrMsg & "成功"
End If
ErrMsg=ErrMsg & "<br>提示信息:如想再次采集,请先将该新闻的历史记录<font color=red>删除</font><br>"
End If
If CollecTest=False And His_Repeat=False Then
SqlItem="INSERT INTO Histroly(ItemID,ChannelID,ClassID,SpecialID,Title,CollecDate,NewsUrl,Result) VALUES ('" & ItemID & "','" & ChannelID & "','" & ClassID & "','" & SpecialID & "','" & Title & "','" & Now() & "','" & NewsUrl & "',False)"
ConnItem.Execute(SqlItem)
End If
End If
ErrMsg=ErrMsg & "<table width=""100%"" border=""0"" align=""center"" cellpadding=""2"" cellspacing=""1"">"
ErrMsg=ErrMsg & "<tr>"
ErrMsg=ErrMsg & "<td height=""22"" colspan=""2"" align=""left"" class=""tdbg"">"
ErrMsg=ErrMsg & "数据整理中,3秒后继续......3秒后如果还没反应请点击 <a href='Admin_ItemCollecNews.asp?ItemNum=" & ItemNum & "&NewsNum=" & NewsNum+1 & "&NewsSuccesNum=" & NewsSuccesNum & "&NewsFalseNum=" & NewsFalseNum & "&ImagesNumAll=" & ImagesNumAll & "&ArticleID=" & ArticleID & "&NewsNumAll=" & NewsNumAll & "'><font color=red>这里</font></a> 继续<br>"
ErrMsg=ErrMsg & "<meta http-equiv=""refresh"" content=""3;url=Admin_ItemCollecNews.asp?ItemNum=" & ItemNum & "&NewsNum=" & NewsNum+1 & "&NewsSuccesNum=" & NewsSuccesNum & "&NewsFalseNum=" & NewsFalseNum & "&ImagesNumAll=" & ImagesNumAll & "&ArticleID=" & ArticleID & "&NewsNumAll=" & NewsNumAll & """>"
ErrMsg=ErrMsg & "</td></tr>"
ErrMsg=ErrMsg & "</table>"
Call ShowMsg(ErrMsg)
Response.Flush()'刷新
End Sub
'==================================================
'过程名:SetCache
'作 用:获取变量
'参 数:无
'==================================================
Sub SetCache()
Dim myCache
Set myCache=new clsCache
'项目信息
myCache.name=CacheTemp & "items"
If myCache.valid then
Arr_Item=myCache.value
Else
ItemEnd=True
ErrMsg="<br><li>参数错误,请重新运行!</li>"
End If
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -