⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 admin_itemcollecnews.asp

📁 用ASP开发环境写出来的新闻采集系统
💻 ASP
📖 第 1 页 / 共 2 页
字号:
<%@language=vbscript codepage=936 %>
<%
'********************************************************
'程序名称:最强新闻采集系统
'版权所有:火红的五月(qq:88389917)
'程序制作:火红的五月
%>
<%
option explicit
Response.Buffer = True 
Server.ScriptTimeOut=999
Response.Expires = -1
Response.ExpiresAbsolute = Now() - 1 
Response.Expires = 0 
Response.CacheControl = "no-cache" 
%>
<!--#include file="inc/conn.asp"-->
<!--#include file="inc/function.asp"-->
<!--#include file="Admin_ChkPurview.asp"-->
<!--#include file="inc/ubbcode.asp"-->
<!--#include file="inc/clsCache.asp"-->
<%
Dim ItemNum,NewsNum,PaingNum,NewsSuccesNum,NewsFalseNum,NewsNumAll
Dim Rs,Sql,RsItem,SqlItem,FoundErr,ErrMsg,ItemEnd,NewsEnd

'项目变量
Dim ItemID,ItemName,ChannelID,strChannelDir,ClassID,SpecialID
Dim TsString,ToString,CsString,CoString,DateType,DsString,DoString,AuthorType,AsString,AoString,AuthorStr,CopyFromType,FsString,FoString
Dim CopyFromStr,KeyType,KsString,KoString,KeyStr,NewsPaingType,NPsString,NpoString,NewsPaingStr,NewsPaingHtml
Dim PaginationType,MaxCharPerPage,ReadLevel,Stars,ReadPoint,Hits,UpDateType,UpDateTime,IncludePicYn,DefaultPicYn,OnTop,Elite,Hot
Dim SkinID,TemplateID,Script_Iframe,Script_Object,Script_Script,Script_Div,Script_Class,Script_Span,Script_Img,Script_Font,Script_A,Script_Html,CollecNewsNum,Passed,SaveFiles,CollecOrder,LinkUrlYn,InputerType,Inputer,EditorType,Editor,ShowCommentLink

'过滤变量
Dim Arr_Filters,FilterStr,Filteri

'采集相关的变量
Dim ContentTemp,NewsPaingNext,NewsPaingNextCode,Arr_i,NewsUrl,NewsCode

'文章保存变量
Dim ArticleID,Title,Content,Author,CopyFrom,Key,IncludePic,UploadFiles,DefaultPicUrl

'其它变量
Dim Arr_Item,Arr_News,CollecTest,Content_View

'历史记录
Dim Arr_Histrolys,His_Title,His_CollecDate,His_Result,His_Repeat,His_i 

'执行时间变量
Dim StartTime,OverTime

'图片统计
Dim Arr_Images,ImagesNum,ImagesNumAll

Dim strInstallDir,CacheTemp
strInstallDir=trim(request.ServerVariables("SCRIPT_NAME"))
strInstallDir=left(strInstallDir,instrrev(lcase(strInstallDir),"/")-1)
strInstallDir=left(strInstallDir,instrrev(lcase(strInstallDir),"/"))

CacheTemp=Lcase(trim(request.ServerVariables("SCRIPT_NAME")))
CacheTemp=left(CacheTemp,instrrev(CacheTemp,"/"))
CacheTemp=replace(CacheTemp,"\","_")
CacheTemp=replace(CacheTemp,"/","_")
CacheTemp="ansir" & CacheTemp

ItemNum=Clng(Trim(Request("ItemNum")))
NewsNum=Clng(Trim(Request("NewsNum")))
NewsSuccesNum=Clng(Trim(Request("NewsSuccesNum")))
NewsFalseNum=Clng(Trim(Request("NewsFalseNum")))
ImagesNumAll=Clng(Trim(Request("ImagesNumAll")))
NewsPaingNext=Trim(Request("NewsPaingNext"))
ArticleID=Trim(Request("ArticleID"))
NewsNumAll=Trim(Request("NewsNumAll"))
If ArticleID="" Then
   ArticleID=0
Else
   ArticleID=Clng(ArticleID)
End If
If NewsNumAll="" Then
   NewsNumAll=0
Else
   NewsNumAll=Clng(NewsNumAll)
End If
FoundErr=False
ItemEnd=False
NewsEnd=False

Call SetCache
If ItemEnd<>True Then
   If (ItemNum-1)>Ubound(Arr_Item,2) then
      ItemEnd=True
   Else
      Call SetItems()
   End If
   If ItemEnd<>True Then
      If NewsNum=1 Then
         Call SetNews()
      Else
         Call GetNews()
      End if
      If NewsEnd<>True Then
         If (NewsNum-1)>Ubound(Arr_News,2) Then
            NewsEnd=True
         Else
            NewsUrl=Arr_News(0,NewsNum-1)
         End If
      End If
   End If
End If

If ItemEnd=True Then
   ErrMsg="<br>采集任务全部完成"
   ErrMsg=ErrMsg & "<br>全部新闻:" & NewsNumAll & " 条,成功采集: "  &  NewsSuccesNum  &  "  条新闻,失败: "    &  NewsFalseNum  &  "  条,图片: " & ImagesNumAll & "  张"
   Call DelCache()
Else
   If NewsEnd=True Then
      ItemNum=ItemNum+1
      NewsNum=1
      Call SetHistroly()
      ErrMsg="<br>" & ItemName & "  项目所有列表采集完成,正在整理数据请稍后..."
      ErrMsg=ErrMsg & "<meta http-equiv=""refresh"" content=""3;url=Admin_ItemCollecNews.asp?ItemNum=" & ItemNum & "&NewsNum=" & NewsNum & "&NewsSuccesNum=" & NewsSuccesNum & "&NewsFalseNum=" & NewsFalseNum & "&ImagesNumAll=" & ImagesNumAll & "&NewsNumAll=" & NewsNumAll & """>"
   End If
End If

Call TopItem()
Response.Flush
If ItemEnd=True Or NewsEnd=True Then
   Call WriteSucced(ErrMsg)
Else
   FoundErr=False
   ErrMsg=""
   Call TopItem2()
   Response.Flush
   Call StartCollection()
   Call FootItem2()
End  If
Call FootItem()
Response.Flush()
'关闭数据库链接
Call CloseConn()
Call CloseConnItem()
%>

<%
'==================================================
'过程名:StartCollection
'作  用:开始采集
'参  数:无
'==================================================
Sub StartCollection()
   '变量初始化
   UploadFiles=""
   DefaultPicUrl=""
   IncludePic=0
   ImagesNum=0
   NewsCode=""
   FoundErr=False
   ErrMsg=""
   His_Repeat=False
   Title=""
   PaingNum=1
   '……………………………………………… 
   If Response.IsClientConnected Then 
      Response.Flush 
   Else 
      Response.End 
   End If
   '……………………………………………… 

   If CollecTest=False Then
      His_Repeat=CheckRepeat(NewsUrl)
   Else
      His_Repeat=False
   End If
   If His_Repeat=True Then
      FoundErr=True
   End If

   If FoundErr<>True Then
      NewsCode=GetHttpPage(NewsUrl)
      If NewsCode="$False$" Then
         FoundErr=True
         ErrMsg=ErrMsg & "<br>在获取:" & NewsUrl & "新闻源码时发生错误!"
         Title="分析源码错误"
      End If
   End If

   If FoundErr<>True Then
      Title=GetBody(NewsCode,TsString,ToString,False,False)
      If Title="$False$" or Title="" then
         FoundErr=True
         ErrMsg=ErrMsg & "<br>在分析:" & NewsUrl & "的新闻标题时发生错误"
         Title="<br>标题分析错误" 
      End If
      If FoundErr<>True Then
         Content=GetBody(NewsCode,CsString,CoString,False,False)
         If Content="$False$" or Content="" Then
            FoundErr=True
            ErrMsg=ErrMsg & "<br>在分析:" & NewsUrl & "的新闻正文时发生错误"
            Title=Title & "<br>正文分析错误" 
         End If
      End If
   End If
   If FoundErr<>True Then
      '新闻分页
      If NewsPaingType=1 Then
         NewsPaingNext=GetPaing(NewsCode,NPsString,NPoString,False,False)
         NewsPaingNext=FpHtmlEnCode(NewsPaingNext)
         Do While NewsPaingNext<>"$False$" And NewsPaingNext<>""
            If NewsPaingStr="" or IsNull(NewsPaingStr)=True Then
               NewsPaingNext=DefiniteUrl(NewsPaingNext,NewsUrl)
            Else
               NewsPaingNext=Replace(NewsPaingStr,"{$ID}",NewsPaingNext)
            End If
            If NewsPaingNext="" or NewsPaingNext="$False$" Then
               Exit Do
            End If
            NewsPaingNextCode=GetHttpPage(NewsPaingNext)                  
            ContentTemp=GetBody(NewsPaingNextCode,CsString,CoString,False,False)
            If ContentTemp="$False$" Then
               Exit Do
            Else
               PaingNum=PaingNum+1
               Content=Content & NewsPaingHtml & ContentTemp
               NewsPaingNext=GetPaing(NewsPaingNextCode,NPsString,NPoString,False,False)
               NewsPaingNext=FpHtmlEnCode(NewsPaingNext)
            End If
         Loop
      End If
      '过滤
      Call Filters()
      Title=FpHtmlEnCode(Title)
      Call FilterScript()
      Content=Ubbcode(Content)
   End If

   '分开写(太长了照顾不过来)
   If FoundErr<>True Then
      '时间
      If UpDateType=0 Then
         UpDateTime=Now()
      ElseIf UpDateType=1 Then
         If DateType=0 then
            UpDateTime=Now()
         Else
            UpDateTime=GetBody(NewsCode,DsString,DoString,False,False)
            UpDateTime=Lcase(FpHtmlEncode(UpDateTime))
            UpDateTime=Trim(Replace(UpDateTime,"&nbsp;"," "))
            If IsDate(UpDateTime)=True Then
               UpDateTime=CDate(UpDateTime)
            Else
               UpDateTime=Now()
            End If
         End If
      ElseIf UpDateType=2 Then  
      Else
         UpDateTime=Now()
      End If
                
      '作者
      If AuthorType=1 Then
         Author=GetBody(NewsCode,AsString,AoString,False,False)
      ElseIf AuthorType=2 Then
         Author=AuthorStr
      Else
         Author="佚名"
      End If
      Author=FpHtmlEncode(Author)
      If Author="" or Author="$False$" then
         Author="佚名"
      Else
         If Len(Author)>255 then
            Author=Left(Author,255)
         End If
      End If
         
      '来源
      If CopyFromType=1 Then
         CopyFrom=GetBody(NewsCode,FsString,FoString,False,False)
      ElseIf CopyFromType=2 Then
         CopyFrom=CopyFromStr
      Else
         CopyFrom="不详"
      End If
      CopyFrom=FpHtmlEncode(CopyFrom)
      If CopyFrom="" or CopyFrom="$False$" Then
         CopyFrom="不详"
      Else
         If Len(CopyFrom)>255 Then 
            CopyFrom=Left(CopyFrom,255)
         End If
      End If

      '关键字
      If KeyType=0 Then
         Key=Title
         Key=CreateKeyWord(Key,2)
      ElseIf KeyType=1 Then
         Key=GetBody(NewsCode,KsString,KoString,False,False)
         Key=FpHtmlEncode(Key)
         Key=CreateKeyWord(Key)
      ElseIf KeyType=2 Then
         Key=KeyStr
         Key=FpHtmlEncode(Key)
         If Len(Key)>253 Then
            Key="|" & Left(Key,253) & "|"
         Else
            Key="|" & Key & "|"
         End If
      End If
      If Key="" or Key="$False$" Then
         Key="|南国都市|新闻|"
      End If
   End If

   If FoundErr<>True Then 
      '转换图片相对地址为绝对地址/保存
      If CollecTest=False And SaveFiles=True then
         Content=ReplaceSaveRemoteFile(Content,strInstallDir,strChannelDir,True,NewsUrl)              
      Else
         Content=ReplaceSaveRemoteFile(Content,strInstallDir,strChannelDir,False,NewsUrl)
      End If
      '转换swf文件地址
      Content=ReplaceSwfFile(Content,NewsUrl)

      '图片统计、文章图片属性设置
      If UploadFiles<>"" Then
         If Instr(UploadFiles,"|")>0 Then
            Arr_Images=Split(UploadFiles,"|") 
            ImagesNum=Ubound(Arr_Images)+1
            DefaultPicUrl=Arr_Images(0)
         Else
            ImagesNum=1
            DefaultPicUrl=UploadFiles
         End If
         If DefaultPicYn=False then
            DefaultPicUrl=""
         End If
         If IncludePicYn=True Then
            IncludePic=-1
         Else
            IncludePic=0
         End If
         If SaveFiles<>True Then
            UploadFiles=""
         End If
      Else
         ImagesNum=0
         DefaultPicUrl=""
         IncludePic=0         
      End If
      ImagesNumAll=ImagesNumAll+ImagesNum
   End If

   If FoundErr<>True Then
      If CollecTest=False Then
         Call SaveArticle
         SqlItem="INSERT INTO Histroly(ItemID,ChannelID,ClassID,SpecialID,ArticleID,Title,CollecDate,NewsUrl,Result) VALUES ('" & ItemID & "','" & ChannelID & "','" & ClassID & "','" & SpecialID & "','" & ArticleID & "','" & Title & "','" & Now() & "','" & NewsUrl & "',True)"
         ConnItem.Execute(SqlItem)
         Content=Replace(Content,"[InstallDir_ChannelDir]",strInstallDir & strChannelDir & "/")
      End If
      NewsSuccesNum=NewsSuccesNum+1
      ErrMsg=ErrMsg & "No:<font color=red>" & NewsSuccesNum+NewsFalseNum & "</font><br>"
      ErrMsg=ErrMsg & "新闻标题:"
      ErrMsg=ErrMsg & "<font color=red>" & Title & "</font><br>"
      ErrMsg=ErrMsg & "更新时间:" & UpDateTime & "<br>"
      ErrMsg=ErrMsg & "新闻作者:" & Author & "<br>"
      ErrMsg=ErrMsg & "新闻来源:" & CopyFrom & "<br>"
      ErrMsg=ErrMsg & "采集页面:<a href=" & NewsUrl & " target=_blank>" & NewsUrl & "</a><br>"
      ErrMsg=ErrMsg & "其它信息:分页--" & PaingNum & " 页,图片--" & ImagesNum & " 张<br>"
      ErrMsg=ErrMsg & "正文预览:"
      If Content_View=True Then
         ErrMsg=ErrMsg & "<br>" & Content
      Else
         ErrMsg=ErrMsg & "您没有启用正文预览功能"
      End If
      ErrMsg=ErrMsg & "<br><br>关 键 字:" & Key & ""
   Else
      NewsFalseNum=NewsFalseNum+1
      If His_Repeat=True Then
         ErrMsg=ErrMsg & "No:<font color=red>" & NewsSuccesNum+NewsFalseNum & "</font><br>"
         ErrMsg=ErrMsg & "目标新闻:<font color=red>"
         If His_Result=True Then
            ErrMsg=ErrMsg & His_Title
         Else
            ErrMsg=ErrMsg & NewsUrl
         End If
         ErrMsg=ErrMsg & "</font> 的记录已存在,不予采集。<br>"
         ErrMsg=ErrMsg & "采集时间:" & His_CollecDate & "<br>"
         ErrMsg=ErrMsg & "新闻来源:<a href='" & NewsUrl & "' target=_blank>"&NewsUrl&"</a><br>"
         ErrMsg=ErrMsg & "采集结果:"
         If His_Result=False Then
            ErrMsg=ErrMsg & "失败"
            ErrMsg=ErrMsg & "<br>失败原因:" & Title
         Else
            ErrMsg=ErrMsg & "成功"
         End If            
         ErrMsg=ErrMsg & "<br>提示信息:如想再次采集,请先将该新闻的历史记录<font color=red>删除</font><br>"
      End If
      If CollecTest=False And His_Repeat=False Then
         SqlItem="INSERT INTO Histroly(ItemID,ChannelID,ClassID,SpecialID,Title,CollecDate,NewsUrl,Result) VALUES ('" & ItemID & "','" & ChannelID & "','" & ClassID & "','" & SpecialID & "','" & Title & "','" & Now() & "','" & NewsUrl & "',False)"
         ConnItem.Execute(SqlItem)
      End If
   End If

   ErrMsg=ErrMsg & "<table width=""100%"" border=""0"" align=""center"" cellpadding=""2"" cellspacing=""1"">"
   ErrMsg=ErrMsg & "<tr>"
   ErrMsg=ErrMsg & "<td height=""22"" colspan=""2"" align=""left"" class=""tdbg"">"
   ErrMsg=ErrMsg & "数据整理中,3秒后继续......3秒后如果还没反应请点击 <a href='Admin_ItemCollecNews.asp?ItemNum=" & ItemNum & "&NewsNum=" & NewsNum+1 & "&NewsSuccesNum=" & NewsSuccesNum & "&NewsFalseNum=" & NewsFalseNum & "&ImagesNumAll=" & ImagesNumAll & "&ArticleID=" & ArticleID & "&NewsNumAll=" & NewsNumAll & "'><font color=red>这里</font></a> 继续<br>"
   ErrMsg=ErrMsg & "<meta http-equiv=""refresh"" content=""3;url=Admin_ItemCollecNews.asp?ItemNum=" & ItemNum & "&NewsNum=" & NewsNum+1 & "&NewsSuccesNum=" & NewsSuccesNum & "&NewsFalseNum=" & NewsFalseNum & "&ImagesNumAll=" & ImagesNumAll & "&ArticleID=" & ArticleID & "&NewsNumAll=" & NewsNumAll & """>"
   ErrMsg=ErrMsg & "</td></tr>"
   ErrMsg=ErrMsg & "</table>"

   Call ShowMsg(ErrMsg)
   Response.Flush()'刷新
End Sub



'==================================================
'过程名:SetCache
'作  用:获取变量
'参  数:无
'==================================================
Sub SetCache()
   Dim myCache
   Set myCache=new clsCache

   '项目信息
   myCache.name=CacheTemp & "items"
   If myCache.valid then 
      Arr_Item=myCache.value
   Else
      ItemEnd=True
      ErrMsg="<br><li>参数错误,请重新运行!</li>"
   End If

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -