📄 admin_collection.asp
字号:
His_Repeat = False '是否采集过
NewsUrl = Trim(NewsArray(Arr_i)) '要采集的正文链接页
If ThumbnailType = 1 Then
ThumbnailUrl = Trim(ThumbnailArray(Arr_i))
End If
PaingNum = 1 '正文中有多少分页
UploadFiles = "" '上传的图片地址
ErrMsg = ""
'………………………………………………
'检测客户连接是否仍然有效
If Response.IsClientConnected Then
Response.Flush '强迫输出Html 到浏览器
Else
Exit Sub
End If
If CollecTest = False Then
His_Repeat = CheckRepeat(NewsUrl)
Else
His_Repeat = False
End If
If His_Repeat = True Then
FoundErr = True
End If
'标题 正文 获取过滤
If FoundErr <> True Then
'NewsCode 获取内容页Html
NewsCode = GetHttpPage(NewsUrl, PE_CLng(WebUrl))
If NewsCode = "$False$" Then
FoundErr = True
ErrMsg = ErrMsg & "<li>在获取:" & NewsUrl & "新闻源码时发生错误!</li>"
End If
End If
If FoundErr <> True Then
Title = FpHtmlEnCode(Trim(GetBody(NewsCode, TsString, ToString, False, False))) '获得标题代码
If Title = "$False$" Or Title = "" Or Len(Title) > 200 Then
FoundErr = True
ErrMsg = ErrMsg & "<li>在采集:" & NewsUrl & "新闻标题时发生错误</li>"
End If
If CollecTest = False And IsTitle = True And FoundErr <> True Then
If PE_CLng(Conn.Execute("Select count(*) From PE_Article Where Title='" & Title & "' And ClassID =" & ClassID)(0)) > 0 Then
FoundErr = True
End If
End If
End If
If FoundErr <> True Then
If CollecType <> 2 Then
Content = Trim(GetBody(NewsCode, CsString, CoString, False, False)) '获得正文代码
End If
If Content = "$False$" Or Content = "" And CollecType <> 2 Then '如果标题和正文产生错误
FoundErr = True
ErrMsg = ErrMsg & "<li>在采集:" & NewsUrl & "新闻正文时发生错误</li>"
If CollecTest = False Then '不为测试时
'写入历史记录
sql = "INSERT INTO PE_HistrolyNews(ItemID,ChannelID,ClassID,NewsCollecDate,NewsUrl,Result) VALUES ('" & ItemID & "','" & ChannelID & "','" & ClassID & "','" & Now() & "','" & NewsUrl & "'," & PE_False & ")"
Conn.Execute (sql)
End If
Else
If CollecType <> 2 Then
If NewsPaingType = 1 Then '新闻分页 正文分页为 设置标签时
NewsPaingNext = GetPaing(NewsCode, NPsString, NPoString, False, False) '获取分页地址
'影响了部分内容页分页暂时中止
'If Left(NewsPaingNext,1) = "/" then
ConversionTrails = NewsUrl
'Else
' ConversionTrails = ListStr
'End If
NewsPaingNext = DefiniteUrl(NewsPaingNext, ConversionTrails) '将相对路径转绝对路径
Dim NewsPaingNextTemp
Do While NewsPaingNext <> "$False$" And NewsPaingNext <> ""
If NewsPaingNextTemp <> "" Then
If FoundInArr(NewsPaingNextTemp, NewsPaingNext, "$$$") = True Then
Exit Do
Else
NewsPaingNextTemp = NewsPaingNextTemp & "$$$" & NewsPaingNext
End If
Else
NewsPaingNextTemp = NewsPaingNext
End If
If CheckUrl(NewsPaingNext) = False Then
Response.Write "<font color=red>内容页分页代码不正确,不是有效的网页链接代码。</a>"
Exit Do
End If
NewsPaingNextCode = GetHttpPage(NewsPaingNext, PE_CLng(WebUrl)) '获得分页html代码
If NewsPaingNextCode = "$False$" Or NewsPaingNextCode = "" Then Exit Do
ContentTemp = GetBody(NewsPaingNextCode, CsString, CoString, False, False) '截取正文代码
If ContentTemp = "$False$" Or ContentTemp = "" Then
Exit Do
Else
PaingNum = PaingNum + 1
If PaginationType = 2 Then '加一行段落链接上一正文
Content = Content & "<p> </p>[NextPage]<p> </p>" & ContentTemp
Else
Content = Content & "<p> </p>" & ContentTemp
End If
'得到下一分页链接代码
NewsPaingNext = GetPaing(NewsPaingNextCode, NPsString, NPoString, False, False) '获取分页地址
''影响了部分内容页分页暂时中止
'If Left(NewsPaingNext,1) = "/" then
ConversionTrails = NewsUrl
'Else
' ConversionTrails = ListStr
'End If
NewsPaingNext = DefiniteUrl(NewsPaingNext, ConversionTrails) '将相对路径转绝对路径
End If
Loop
ElseIf NewsPaingType = 2 Then
PageListCode = GetBody(NewsCode, PsString, PoString, False, False) '获取列表页
If PageListCode <> "$False$" Then
PageArrayCode = GetArray(PageListCode, PhsString, PhoString, False, False) '获取链接地址
If PageArrayCode <> "$False$" Then
If InStr(PageArrayCode, "$Array$") > 0 Then
'去掉地址开始
Dim tempk, TempPaingNext
PageArray = Split(PageArrayCode, "$Array$") '分割得到地址
TempPaingNext = ""
For tempk = 0 To UBound(PageArray)
If InStr(LCase(TempPaingNext), LCase(PageArray(tempk))) < 1 Then
TempPaingNext = TempPaingNext & "$Array$" & PageArray(tempk)
End If
Next
TempPaingNext = Right(TempPaingNext, Len(TempPaingNext) - 7)
PageArray = Split(TempPaingNext, "$Array$")
'去掉地址结束
For i = 0 To UBound(PageArray)
NewsPaingNextCode = GetHttpPage(DefiniteUrl(PageArray(i), NewsUrl), PE_CLng(WebUrl)) '获得分页html代码
If NewsPaingNextCode <> "$False$" Or NewsPaingNextCode <> "" Then
ContentTemp = GetBody(NewsPaingNextCode, CsString, CoString, False, False) '截取正文代码
If ContentTemp <> "$False$" Or ContentTemp <> "" Then
PaingNum = PaingNum + 1
If PaginationType = 2 Then '加一行段落链接上一正文
Content = Content & "<p> </p>[NextPage]<p> </p>" & ContentTemp
Else
Content = Content & "<p> </p>" & ContentTemp
End If
End If
End If
Next
Else
NewsPaingNextCode = GetHttpPage(DefiniteUrl(PageArrayCode, NewsUrl), PE_CLng(WebUrl)) '获得分页html代码
If NewsPaingNextCode <> "$False$" Or NewsPaingNextCode <> "" Then
ContentTemp = GetBody(NewsPaingNextCode, CsString, CoString, False, False) '截取正文代码
If ContentTemp <> "$False$" Or ContentTemp <> "" Then
PaingNum = PaingNum + 1
If PaginationType = 2 Then '加一行段落链接上一正文
Content = Content & "<p> </p>[NextPage]<p> </p>" & ContentTemp
Else
Content = Content & "<p> </p>" & ContentTemp
End If
End If
End If
End If
Else
Response.Write "<li>在获取分页链接列表时出错。</li>"
End If
Else
Response.Write "<li>在截取分页代码发生错误。</li>"
End If
End If
End If
Call Filters ' 标题过滤 正文过滤 广告
End If
End If
If FoundErr <> True Then
'………………………………………………
'时间
If UpDateType = 0 Or UpDateType = "" Then
UpdateTime = Now()
ElseIf UpDateType = 1 Then
If DateType = 0 Then
UpdateTime = Now()
Else
UpdateTime = GetBody(NewsCode, DsString, DoString, False, False)
UpdateTime = FpHtmlEnCode(UpdateTime)
UpdateTime = PE_CDate(Trim(Replace(UpdateTime, " ", " ")))
End If
ElseIf UpDateType = 2 Then
Else
UpdateTime = Now()
End If
'………………………………………………
'作者获取过滤
If AuthorType = 1 Then
Author = GetBody(NewsCode, AsString, AoString, False, False) '获得当前作
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -