📄 pub_collection.php
字号:
//生成缩略图
if($mtype=='img' && $this->breImage=='' && !$isError){
$this->breImage = $filename;
if(!eregi("^http://",$this->breImage) && file_exists($GLOBALS['cfg_basedir'].$filename)){
$filenames = explode('/',$filename);
$filenamed = $filenames[count($filenames)-1];
$nfilename = "lit_".$filenamed;
$nfilename = str_replace($filenamed,$nfilename,$filename);
if(file_exists($GLOBALS['cfg_basedir'].$nfilename)){
$this->breImage = $nfilename;
}else if(copy($GLOBALS['cfg_basedir'].$filename,$GLOBALS['cfg_basedir'].$nfilename)){
ImageResize($GLOBALS['cfg_basedir'].$nfilename,$GLOBALS['cfg_ddimg_width'],$GLOBALS['cfg_ddimg_height']);
$this->breImage = $nfilename;
}
}
}
if($wi && !$isError) @WaterImg($GLOBALS['cfg_basedir'].$filename,'up');
if(!$isError) return $filename;
else return $errfile;
}
//------------------------------
//获得下载媒体的随机名称
//------------------------------
function GetRndName($url,$v)
{
global $threadnum;
$this->MediaCount++;
$mnum = $this->MediaCount;
$timedir = strftime("%y%m%d",time());
//存放路径
$fullurl = preg_replace("/\/{1,}/","/",$this->Item["imgurl"]."/");
if(!is_dir($GLOBALS['cfg_basedir']."/$fullurl")) MkdirAll($GLOBALS['cfg_basedir']."/$fullurl",$GLOBALS['cfg_dir_purview']);
$fullurl = $fullurl.$timedir."/";
if(!is_dir($GLOBALS['cfg_basedir']."/$fullurl")) MkdirAll($GLOBALS['cfg_basedir']."/$fullurl",$GLOBALS['cfg_dir_purview']);
//文件名称
$timename = str_replace(".","",ExecTime());
$nthreadnum =(!empty($threadnum) ? $threadnum : 0);
$filename = $timename.$nthreadnum.$mnum.mt_rand(1000,9999);
//把适合的数字转为字母
$filename = dd2char($filename);
//分配扩展名
$urls = explode(".",$url);
if($v=="img"){
$shortname = ".jpg";
if(eregi("\.gif\?(.*)$",$url) || eregi("\.gif$",$url)) $shortname = ".gif";
else if(eregi("\.png\?(.*)$",$url) || eregi("\.png$",$url)) $shortname = ".png";
}
else if($v=="embed") $shortname = ".swf";
else $shortname = "";
//-----------------------------------------
$fullname = $fullurl.$filename.$shortname;
return preg_replace("/\/{1,}/","/",$fullname);
}
//------------------------------------------------
//按载入的网页内容获取规则,从一个HTML文件中获取内容
//-------------------------------------------------
function GetPageFields($dourl,$needDown)
{
if($this->tmpHtml == "") return "";
$artitem = "";
$isPutUnit = false;
$tmpLtKeys = array();
foreach($this->ArtNote as $k=>$sarr)
{
//可能出现意外的情况
if($k=="sppage"||$k=="sptype") continue;
if(!is_array($sarr)) continue;
//特殊的规则或没匹配选项
if($sarr['match']==''||trim($sarr['match'])=='[var:内容]'
||$sarr['value']!='[var:内容]'){
if($sarr['value']!='[var:内容]') $v = $sarr['value'];
else $v = "";
}
else //需匹配的情况
{
//分多页的内容
if($this->tmpUnitValue!="" && !$isPutUnit && $sarr["isunit"]==1){
$v = $this->tmpUnitValue;
$isPutUnit = true;
//其它内容
}else{
$v = $this->GetHtmlArea("[var:内容]",$sarr["match"],$this->tmpHtml);
}
//过滤内容规则
if(isset($sarr["trim"]) && $v!=""){
foreach($sarr["trim"] as $nv){
if($nv=="") continue;
$nv = str_replace("/","\\/",$nv);
$v = preg_replace("/$nv/isU","",$v);
}
}
//是否下载远程资源
if($needDown){
if($sarr["isdown"] == '1'){ $v = $this->DownMedias($v,$dourl); }
}
else{
if($sarr["isdown"] == '1') $v = $this->MediasReplace($v,$dourl);
}
}
//用户自行对内容进行处理的接口
if($sarr["function"]!=""){
if(!eregi('@litpic',$sarr["function"])){
$v = $this->RunPHP($v,$sarr["function"]);
$artitem .= "{dede:field name='$k'}$v{/dede:field}\r\n";
}else{
$tmpLtKeys[$k]['v'] = $v;
$tmpLtKeys[$k]['f'] = $sarr["function"];
}
}else{
$artitem .= "{dede:field name='$k'}$v{/dede:field}\r\n";
}
}//End Foreach
//处理带缩略图变量的项目
foreach($tmpLtKeys as $k=>$sarr){
$v = $this->RunPHP($sarr['v'],$sarr['f']);
$artitem .= "{dede:field name='$k'}$v{/dede:field}\r\n";
}
return $artitem;
}
//----------------------------------
//下载内容里的资源
//----------------------------------
function DownMedias(&$html,$url)
{
$this->CDedeHtml->GetLinkType = "media";
$this->CDedeHtml->SetSource($html,$url,false);
//下载img标记里的图片
foreach($this->CDedeHtml->Medias as $k=>$v){
$furl = $this->CDedeHtml->FillUrl($k);
if($v=="embed" && !eregi("\.(swf)\?(.*)$",$k)&& !eregi("\.(swf)$",$k)){ continue; }
$okurl = $this->DownMedia($furl,$v);
$html = str_replace($k,$okurl,$html);
}
//下载超链接里的图片
foreach($this->CDedeHtml->Links as $v=>$k){
if(eregi("\.(jpg|gif|png)\?(.*)$",$v) || eregi("\.(jpg|gif|png)$",$v)){ $m = "img"; }
else if(eregi("\.(swf)\?(.*)$",$v) || eregi("\.(swf)$",$v)){ $m = "embed"; }
else continue;
$furl = $this->CDedeHtml->FillUrl($v);
$okurl = $this->DownMedia($furl,$m);
$html = str_replace($v,$okurl,$html);
}
return $html;
}
//---------------------------------
//仅替换内容里的资源为绝对网址
//----------------------------------
function MediasReplace(&$html,$dourl)
{
$this->CDedeHtml->GetLinkType = "media";
$this->CDedeHtml->SetSource($html,$dourl,false);
foreach($this->CDedeHtml->Medias as $k=>$v)
{
$k = trim($k);
if(!eregi("^http://",$k)){
$okurl = $this->CDedeHtml->FillUrl($k);
$html = str_replace($k,$okurl,$html);
}
}
return $html;
}
//---------------------
//测试列表
//---------------------
function TestList()
{
if(isset($this->List["url"][0])) $dourl = $this->List["url"][0];
else{
echo "配置中指定列表的网址错误!\r\n";
return ;
}
if($this->List["sourcetype"]=="archives")
{
echo "配置中指定的源参数为文档的原始URL:\r\n";
$i=0;
$v = "";
foreach($this->List["url"] as $v){
echo $v."\r\n"; $i++; if($i>9) break;
}
return $v;
}
$dhtml = new DedeHtml2();
$html = $this->DownOnePage($dourl);
//$html = str_replace('" class="tool comments">','?999" class="tool comments">',$html);
if($html==""){
echo "读取其中的一个网址: $dourl 时失败!\r\n";
return ;
}
if(trim($this->List["linkarea"])!=""&&trim($this->List["linkarea"])!="[var:区域]"){
$html = $this->GetHtmlArea("[var:区域]",$this->List["linkarea"],$html);
}
$dhtml->GetLinkType = "link";
$dhtml->SetSource($html,$dourl,false);
$testpage = "";
$TestPage = "";
if(is_array($dhtml->Links))
{
echo "按指定规则在 $dourl 发现的网址:\r\n";
echo $this->List["need"];
foreach($dhtml->Links as $k=>$v)
{
$k = $dhtml->FillUrl($k);
if($this->List["need"]!="")
{
if(eregi($this->List["need"],$k))
{
if($this->List["cannot"]==""
||!eregi($this->List["cannot"],$k)){
echo "$k - ".$v."\r\n";
$TestPage = $k;
}
}//eg1
}else{
echo "$k - ".$v."\r\n";
$TestPage = $k;
}
}//foreach
}else{
echo "分析网页的HTML时失败!\r\n";
return ;
}
return $TestPage;
}
//测试文章规则
function TestArt($dourl)
{
if($dourl==""){
echo "没有递交测试的网址!";
exit();
}
$this->tmpHtml = $this->DownOnePage($dourl);
echo $this->GetPageFields($dourl,false);
}
//--------------------------------
//采集种子网址
//--------------------------------
function GetSourceUrl($downall=0,$glstart=0,$pagesize=10)
{
if($downall==1 && $glstart==0){
$this->dsql->ExecuteNoneQuery("Delete From #@__courl where nid='".$this->NoteId."'");
$this->dsql->ExecuteNoneQuery("Delete From #@__co_listenurl where nid='".$this->NoteId."'");
}
if($this->List["sourcetype"]=="archives")
{
echo "配置中指定的源参数为文档的原始URL:<br/>处理中...<br/>\r\n";
foreach($this->List["url"] as $v)
{
if($downall==0){
$lrow = $this->dsql->GetOne("Select * From #@__co_listenurl where url like '".addslashes($v)."'");
if(is_array($lrow)) continue;
}
$inquery = "INSERT INTO #@__courl(nid,title,url,dtime,isdown,result)
VALUES ('".$this->NoteId."','用户手工指定的网址','$v','".time()."','0','');";
$this->dsql->ExecuteNoneQuery($inquery);
}
echo "完成种子网址的处理!<br/>\r\n";
return 0;
}
$tmplink = array();
$arrStart = 0;
$moviePostion = 0;
$endpos = $glstart + $pagesize;
$totallen = count($this->List["url"]);
foreach($this->List["url"] as $k=>$v)
{
$moviePostion++;
if($moviePostion > $endpos) break;
if($moviePostion > $glstart)
{
$html = $this->DownOnePage($v);
//$html = str_replace('" class="tool comments">','?999" class="tool comments">',$html);
if(trim($this->List["linkarea"])!=""&&trim($this->List["linkarea"])!="[var:区域]"){
$html = $this->GetHtmlArea("[var:区域]",$this->List["linkarea"],$html);
}
$this->CDedeHtml->GetLinkType = "link";
$this->CDedeHtml->SetSource($html,$v,false);
foreach($this->CDedeHtml->Links as $k=>$v)
{
$k = $this->CDedeHtml->FillUrl($k);
if($this->List["need"]!=""){
if(eregi($this->List["need"],$k)){
if($this->List["cannot"]==""){
$tmplink[$arrStart][0] = $this->CDedeHtml->FillUrl($k);
$tmplink[$arrStart][1] = $v;
$arrStart++;
}
else if(!eregi($this->List["cannot"],$k)){
$tmplink[$arrStart][0] = $this->CDedeHtml->FillUrl($k);
$tmplink[$arrStart][1] = $v;
$arrStart++;
}
}
}else{
$tmplink[$arrStart][0] = $this->CDedeHtml->FillUrl($k);
$tmplink[$arrStart][1] = $v;
$arrStart++;
}
}
$this->CDedeHtml->Clear();
}//在位置内
}//foreach
krsort($tmplink);
$unum = count($tmplink);
if($unum>0){
//echo "完成本次种子网址抓取,共找到:{$unum} 个记录!<br/>\r\n";
$this->dsql->ExecuteNoneQuery();
foreach($tmplink as $v)
{
$k = addslashes($v[0]);
$v = addslashes($v[1]);
if($downall==0){
$lrow = $this->dsql->GetOne("Select * From #@__co_listenurl where url like '$v' ");
if(is_array($lrow)) continue;
}
if($v=="") $v="无标题,可能是图片链接";
$inquery = "
INSERT INTO #@__courl(nid,title,url,dtime,isdown,result)
VALUES ('".$this->NoteId."','$v','$k','".time()."','0','');
";
$this->dsql->ExecuteNoneQuery($inquery);
}
if($endpos >= $totallen) return 0;
else return ($totallen-$endpos);
}
else{
echo "按指定规则没找到任何链接!";
return -1;
}
return -1;
}
//---------------------------------
//用扩展函数处理采集到的原始数据
//-------------------------------
function RunPHP($fvalue,$phpcode)
{
$DedeMeValue = $fvalue;
$phpcode = preg_replace("/'@me'|\"@me\"|@me/isU",'$DedeMeValue',$phpcode);
$DedeLitPicValue = $this->breImage;
$phpcode = preg_replace("/'@litpic'|\"@litpic\"|@litpic/isU",'$DedeLitPicValue',$phpcode);
if(eregi('@body',$phpcode)){
$DedeBodyValue = $this->tmpHtml;
$phpcode = preg_replace("/'@body'|\"@body\"|@body/isU",'$DedeBodyValue',$phpcode);
}
eval($phpcode.";");// or die($phpcode."[$DedeMeValue]");
return $DedeMeValue;
}
//-----------------------
//编码转换
//-----------------------
function ChangeCode(&$str)
{
if($GLOBALS['cfg_ver_lang']=='utf-8'){
if($this->Item["language"]=="gb2312") $str = gb2utf8($str);
if($this->Item["language"]=="big5") $str = gb2utf8(big52gb($str));
}else{
if($this->Item["language"]=="utf-8") $str = utf82gb($str);
if($this->Item["language"]=="big5") $str = big52gb($str);
}
}
}
?>
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -