⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 admin_itemcollecfast.asp

📁 用ASP开发环境写出来的新闻采集系统
💻 ASP
📖 第 1 页 / 共 3 页
字号:
<%@language=vbscript codepage=936 %>
<%
'********************************************************
'程序名称:最强新闻采集系统
'版权所有:火红的五月(qq:88389917)
'程序制作:火红的五月
%>
<%
option explicit
Response.Buffer = True 
Server.ScriptTimeOut=999
Response.Expires = -1
Response.ExpiresAbsolute = Now() - 1 
Response.Expires = 0 
Response.CacheControl = "no-cache" 
%>
<!--#include file="inc/conn.asp"-->
<!--#include file="inc/function.asp"-->
<!--#include file="Admin_ChkPurview.asp"-->
<!--#include file="inc/ubbcode.asp"-->
<!--#include file="inc/clsCache.asp"-->
<%
Dim ItemNum,ListNum,PaingNum,NewsSuccesNum,NewsFalseNum
Dim Rs,Sql,RsItem,SqlItem,FoundErr,ErrMsg,ItemEnd,ListEnd

'项目变量
Dim ItemID,ItemName,ChannelID,strChannelDir,ClassID,SpecialID,LoginType,LoginUrl,LoginPostUrl,LoginUser,LoginPass,LoginFalse
Dim ListStr,LsString,LoString,ListPaingType,LPsString,LPoString,ListPaingStr1,ListPaingStr2,ListPaingID1,ListPaingID2,ListPaingStr3,HsString,HoString,HttpUrlType,HttpUrlStr
Dim TsString,ToString,CsString,CoString,DateType,DsString,DoString,AuthorType,AsString,AoString,AuthorStr,CopyFromType,FsString,FoString
Dim CopyFromStr,KeyType,KsString,KoString,KeyStr,NewsPaingType,NPsString,NpoString,NewsPaingStr,NewsPaingHtml
Dim ItemCollecDate,PaginationType,MaxCharPerPage,ReadLevel,Stars,ReadPoint,Hits,UpDateType,UpDateTime,IncludePicYn,DefaultPicYn,OnTop,Elite,Hot
Dim SkinID,TemplateID,Script_Iframe,Script_Object,Script_Script,Script_Div,Script_Class,Script_Span,Script_Img,Script_Font,Script_A,Script_Html,CollecListNum,CollecNewsNum,Passed,SaveFiles,CollecOrder,LinkUrlYn,InputerType,Inputer,EditorType,Editor,ShowCommentLink,Script_Table,Script_Tr,Script_Td

'过滤变量
Dim Arr_Filters,FilterStr,Filteri

'采集相关的变量
Dim ContentTemp,NewsPaingNext,NewsPaingNextCode,Arr_i,NewsUrl,NewsCode

'文章保存变量
Dim ArticleID,Title,Content,Author,CopyFrom,Key,IncludePic,UploadFiles,DefaultPicUrl

'其它变量
Dim LoginData,LoginResult,OrderTemp
Dim Arr_Item,CollecTest,Content_View,CollecNewsAll
Dim StepID

'历史记录
Dim Arr_Histrolys,His_Title,His_CollecDate,His_Result,His_Repeat,His_i 

'执行时间变量
Dim StartTime,OverTime

'图片统计
Dim Arr_Images,ImagesNum,ImagesNumAll

'列表
Dim ListUrl,ListCode,NewsArrayCode,NewsArray,ListArray,ListPaingNext

'安装路径
Dim strInstallDir,CacheTemp
strInstallDir=trim(request.ServerVariables("SCRIPT_NAME"))
strInstallDir=left(strInstallDir,instrrev(lcase(strInstallDir),"/")-1)
'strInstallDir=left(strInstallDir,instrrev(lcase(strInstallDir),"/"))
'缓存路径
CacheTemp=Lcase(trim(request.ServerVariables("SCRIPT_NAME")))
CacheTemp=left(CacheTemp,instrrev(CacheTemp,"/"))
CacheTemp=replace(CacheTemp,"\","_")
CacheTemp=replace(CacheTemp,"/","_")
CacheTemp="ansir" & CacheTemp

'数据初始化
CollecListNum=0
CollecNewsNum=0
ArticleID=0
ItemNum=Clng(Trim(Request("ItemNum")))
ListNum=Clng(Trim(Request("ListNum")))
NewsSuccesNum=Clng(Trim(Request("NewsSuccesNum")))
NewsFalseNum=Clng(Trim(Request("NewsFalseNum")))
ImagesNumAll=Clng(Trim(Request("ImagesNumAll")))
ListPaingNext=Trim(Request("ListPaingNext"))
FoundErr=False
ItemEnd=False
ListEnd=False
ErrMsg=""

Call SetCache

If ItemEnd<>True Then
   If (ItemNum-1)>Ubound(Arr_Item,2) then
      ItemEnd=True
   Else
      Call SetItems()
   End If
End If

If ItemEnd<>True Then
   If ListPaingType=0 Then
      If ListNum=1 Then
         ListUrl=ListStr
      Else
         ListEnd=True
      End If
   ElseIf ListPaingType=1 Then
      If ListNum=1 Then
         ListUrl=ListStr
      Else
         If ListPaingNext="" or ListPaingNext="$False$" Then
            ListEnd=True
         Else
            ListPaingNext=Replace(ListPaingNext,"{$ID}","&")
            ListUrl=ListPaingNext
         End If
      End If
   ElseIf ListPaingType=2 Then
      If ListPaingID1>ListPaingID2 then
         If (ListPaingID1-ListNum+1)<ListPaingID2 or (ListPaingID1-ListNum+1)<0 Then
            Listend=True
         Else
            ListUrl=Replace(ListPaingStr2,"{$ID}",Cstr(ListpaingID1-ListNum+1))
         End if
      Else
         If (ListPaingID1+ListNum-1)>ListPaingID2 Then
            ListEnd=True
         Else
            ListUrl=Replace(ListPaingStr2,"{$ID}",CStr(ListPaingID1+ListNum-1))
         End If
      End If      
   ElseIf ListPaingType=3  Then
      ListArray=Split(ListPaingStr3,"|")
      If (ListNum-1)>Ubound(ListArray) Then
         ListEnd=True
      Else
         ListUrl=ListArray(ListNum-1)
      End If    
   End If
   If ListNum>CollecListNum And CollecListNum<>0 Then
      ListEnd=True
   End if
End If

If ItemEnd=True Then
   ErrMsg="<br>采集任务全部完成"
   ErrMsg=ErrMsg & "<br>成功采集: "  &  NewsSuccesNum  &  "  条,失败: "    &  NewsFalseNum  &  "  条,图片:" & ImagesNumAll & "  张"
   Call DelCache()
Else
   If ListEnd=True Then
      ItemNum=ItemNum+1
      ListNum=1
      ErrMsg="<br>" & ItemName & "  项目所有列表采集完成,正在整理数据请稍后..."
      ErrMsg=ErrMsg & "<meta http-equiv=""refresh"" content=""3;url=Admin_ItemCollecFast.asp?ItemNum=" & ItemNum & "&ListNum=" & ListNum & "&NewsSuccesNum=" & NewsSuccesNum & "&NewsFalseNum=" & NewsFalseNum & "&ImagesNumAll=" & ImagesNumAll & """>"
   End If
End If

Call TopItem()
If ItemEnd=True Or ListEnd=True Then
   If ItemEnd<>True Then
      Call SetCache_His()
   End If
   Call WriteSucced(ErrMsg)
Else
   FoundErr=False
   ErrMsg=""
   Call StartCollection()
   Call FootItem2()
End  If
Call FootItem()
Response.Flush()
'关闭数据库链接
Call CloseConn()
Call CloseConnItem()
%>

<%
'==================================================
'过程名:StartCollection
'作  用:开始采集
'参  数:无
'==================================================
Sub StartCollection

'第一次采集时登录
If LoginType=1 And ListNum=1 then
   LoginData=UrlEncoding(LoginUser & "&" & LoginPass)
   LoginResult=PostHttpPage(LoginUrl,LoginPostUrl,LoginData)
   If Instr(LoginResult,LoginFalse)>0 Then
      FoundErr=True
      ErrMsg=ErrMsg & "<br><li>在登录网站时发生错误,请确保登录信息的正确性!</li>"
   End If
End If

If FoundErr<>True then
   ListCode=GetHttpPage(ListUrl)
   Call GetListPaing()
   If ListCode="$False$" Then
      FoundErr=True
      ErrMsg=ErrMsg & "<br><li>在获取列表:" & ListUrl & "网页源码时发生错误!</li>"
   Else
      ListCode=GetBody(ListCode,LsString,LoString,False,False)
      If ListCode="$False$" Or ListCode="" Then
         FoundErr=True
         ErrMsg=ErrMsg & "<br><li>在截取:" & ListUrl & "的新闻列表时发生错误!</li>"
      End If
   End If
End If

If FoundErr<>True Then
   NewsArrayCode=GetArray(ListCode,HsString,HoString,False,False)
   If NewsArrayCode="$False$" Then
      FoundErr=True
      ErrMsg=ErrMsg & "<br><li>在分析:" & ListUrl & "新闻列表时发生错误!</li>"
   Else
      NewsArray=Split(NewsArrayCode,"$Array$")
      For Arr_i=0 to Ubound(NewsArray)
         If HttpUrlType=1 Then
            NewsArray(Arr_i)=Trim(Replace(HttpUrlStr,"{$ID}",NewsArray(Arr_i)))
         Else
            NewsArray(Arr_i)=Trim(DefiniteUrl(NewsArray(Arr_i),ListUrl))           
         End If
         NewsArray(Arr_i)=CheckUrl(NewsArray(Arr_i))
      Next
      If CollecOrder=True Then
         For Arr_i=0 to Fix(Ubound(NewsArray)/2)
            OrderTemp=NewsArray(Arr_i)
            NewsArray(Arr_i)=NewsArray(Ubound(NewsArray)-Arr_i)
            NewsArray(Ubound(NewsArray)-Arr_i)=OrderTemp
         Next
      End If
   End If
End If

If FoundErr<>True Then
   Call TopItem2()
   CollecNewsAll=0
   For Arr_i=0 to Ubound(NewsArray)
      If CollecNewsAll>=CollecNewsNum And CollecNewsNum<>0 Then
         Exit For
      End If
      CollecNewsAll=CollecNewsAll+1
      '变量初始化
      UploadFiles=""
      DefaultPicUrl=""
      IncludePic=0
      ImagesNum=0
      NewsCode=""
      FoundErr=False
      ErrMsg=""
      His_Repeat=False
      NewsUrl=NewsArray(Arr_i)
      Title=""
      PaingNum=1
      '……………………………………………… 
      If Response.IsClientConnected Then 
         Response.Flush 
      Else 
         Response.End 
      End If
      '……………………………………………… 

      If CollecTest=False Then
         His_Repeat=CheckRepeat(NewsUrl)
      Else
         His_Repeat=False
      End If
      If His_Repeat=True Then
         FoundErr=True
      End If

      If FoundErr<>True Then
         NewsCode=GetHttpPage(NewsUrl)
         If NewsCode="$False$" Then
            FoundErr=True
            ErrMsg=ErrMsg & "<br>在获取:" & NewsUrl & "新闻源码时发生错误!"
            Title="获取网页源码失败"
         End If
      End If

      If FoundErr<>True Then
         Title=GetBody(NewsCode,TsString,ToString,False,False)
         If Title="$False$" or Title="" then
            FoundErr=True
            ErrMsg=ErrMsg & "<br>在分析:" & NewsUrl & "的新闻标题时发生错误"
            Title="<br>标题分析错误" 
         End If
         If FoundErr<>True Then
            Content=GetBody(NewsCode,CsString,CoString,False,False)
            If Content="$False$" or Content="" Then
               FoundErr=True
               ErrMsg=ErrMsg & "<br>在分析:" & NewsUrl & "的新闻正文时发生错误"
               Title=Title & "<br>正文分析错误" 
            End If
         End If
         If FoundErr<>True Then
            '新闻分页
            If NewsPaingType=1 Then
               NewsPaingNext=GetPaing(NewsCode,NPsString,NPoString,False,False)
               NewsPaingNext=FpHtmlEnCode(NewsPaingNext)
               Do While NewsPaingNext<>"$False$" And NewsPaingNext<>""
                  If NewsPaingStr="" or IsNull(NewsPaingStr)=True Then
                     NewsPaingNext=DefiniteUrl(NewsPaingNext,NewsUrl)
                  Else
                     NewsPaingNext=Replace(NewsPaingStr,"{$ID}",NewsPaingNext)
                  End If
                  If NewsPaingNext="" or NewsPaingNext="$False$" Then
                     Exit Do
                  End If
                  NewsPaingNextCode=GetHttpPage(NewsPaingNext)                  
                  ContentTemp=GetBody(NewsPaingNextCode,CsString,CoString,False,False)
                  If ContentTemp="$False$" Then
                     Exit Do
                  Else
                     PaingNum=PaingNum+1
                     Content=Content & NewsPaingHtml & ContentTemp
                     NewsPaingNext=GetPaing(NewsPaingNextCode,NPsString,NPoString,False,False)
                     NewsPaingNext=FpHtmlEnCode(NewsPaingNext)
                  End If
               Loop
            End If
            '过滤
            Call Filters
            Title=FpHtmlEnCode(Title)
            Call FilterScript()
            Content=Ubbcode(Content)
         End If
      End If

      '分开写(太长了照顾不过来)
      If FoundErr<>True Then
         '时间
         If UpDateType=0 Then
            UpDateTime=Now()
         ElseIf UpDateType=1 Then

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -