⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dedecollection.func.php

📁 这是matlab的一个小程序
💻 PHP
字号:
<?php
if(!defined('DEDEINC'))
{
	exit('dedecms');
}
require_once(DEDEINC."/dedehttpdown.class.php");
require_once(DEDEINC."/dedetag.class.php");
require_once(DEDEINC."/charset.func.php");

function DownImageKeep($gurl,$rfurl,$filename,$gcookie="",$JumpCount=0,$maxtime=30)
{
	$urlinfos = GetHostInfo($gurl);
	$ghost = trim($urlinfos['host']);
	if($ghost=='')
	{
		return false;
	}
	$gquery = $urlinfos['query'];
	if($gcookie=="" && !empty($rfurl))
	{
		$gcookie = RefurlCookie($rfurl);
	}
	$sessionQuery = "GET $gquery HTTP/1.1\r\n";
	$sessionQuery .= "Host: $ghost\r\n";
	$sessionQuery .= "Referer: $rfurl\r\n";
	$sessionQuery .= "Accept: */*\r\n";
	$sessionQuery .= "User-Agent: Mozilla/4.0 (compatible; MSIE 5.00; Windows 98)\r\n";
	if($gcookie!=""&&!ereg("[\r\n]",$gcookie))
	{
		$sessionQuery .= $gcookie."\r\n";
	}
	$sessionQuery .= "Connection: Keep-Alive\r\n\r\n";
	$errno = "";
	$errstr = "";
	$m_fp = fsockopen($ghost, 80, $errno, $errstr,10);
	fwrite($m_fp,$sessionQuery);
	$lnum = 0;

	//获取详细应答头
	$m_httphead = Array();
	$httpstas = explode(" ",fgets($m_fp,256));
	$m_httphead["http-edition"] = trim($httpstas[0]);
	$m_httphead["http-state"] = trim($httpstas[1]);
	while(!feof($m_fp))
	{
		$line = trim(fgets($m_fp,256));
		if($line == "" || $lnum>100)
		{
			break;
		}
		$hkey = "";
		$hvalue = "";
		$v = 0;
		for($i=0;$i<strlen($line);$i++)
		{
			if($v==1)
			{
				$hvalue .= $line[$i];
			}
			if($line[$i]==":")
			{
				$v = 1;
			}
			if($v==0)
			{
				$hkey .= $line[$i];
			}
		}
		$hkey = trim($hkey);
		if($hkey!="")
		{
			$m_httphead[strtolower($hkey)] = trim($hvalue);
		}
	}

	//分析返回记录
	if(ereg("^3",$m_httphead["http-state"]))
	{
		if(isset($m_httphead["location"]) && $JumpCount<3)
		{
			$JumpCount++;
			DownImageKeep($gurl,$rfurl,$filename,$gcookie,$JumpCount);
		}
		else
		{
			return false;
		}
	}
	if(!ereg("^2",$m_httphead["http-state"]))
	{
		return false;
	}
	if(!isset($m_httphead))
	{
		return false;
	}
	$contentLength = $m_httphead['content-length'];

	//保存文件
	$fp = fopen($filename,"w") or die("写入文件:{$filename} 失败!");
	$i=0;
	$okdata = "";
	$starttime = time();
	while(!feof($m_fp))
	{
		$okdata .= fgetc($m_fp);
		$i++;

		//超时结束
		if(time()-$starttime>$maxtime)
		{
			break;
		}

		//到达指定大小结束
		if($i >= $contentLength)
		{
			break;
		}
	}
	if($okdata!="")
	{
		fwrite($fp,$okdata);
	}
	fclose($fp);
	if($okdata=="")
	{
		@unlink($filename);
		fclose($m_fp);
		return false;
	}
	fclose($m_fp);
	return true;
}

//获得某页面返回的Cookie信息
function RefurlCookie($gurl)
{
	global $gcookie,$lastRfurl;
	$gurl = trim($gurl);
	if(!empty($gcookie) && $lastRfurl==$gurl)
	{
		return $gcookie;
	}
	else
	{
		$lastRfurl=$gurl;
	}
	if(trim($gurl)=='')
	{
		return '';
	}
	$urlinfos = GetHostInfo($gurl);
	$ghost = $urlinfos['host'];
	$gquery = $urlinfos['query'];
	$sessionQuery = "GET $gquery HTTP/1.1\r\n";
	$sessionQuery .= "Host: $ghost\r\n";
	$sessionQuery .= "Accept: */*\r\n";
	$sessionQuery .= "User-Agent: Mozilla/4.0 (compatible; MSIE 5.00; Windows 98)\r\n";
	$sessionQuery .= "Connection: Close\r\n\r\n";
	$errno = "";
	$errstr = "";
	$m_fp = fsockopen($ghost, 80, $errno, $errstr,10) or die($ghost.'<br />');
	fwrite($m_fp,$sessionQuery);
	$lnum = 0;

	//获取详细应答头
	$gcookie = "";
	while(!feof($m_fp))
	{
		$line = trim(fgets($m_fp,256));
		if($line == "" || $lnum>100)
		{
			break;
		}
		else
		{
			if(eregi("^cookie",$line))
			{
				$gcookie = $line;
				break;
			}
		}
	}
	fclose($m_fp);
	return $gcookie;
}

//获得网址的host和query部份
function GetHostInfo($gurl)
{
	$gurl = eregi_replace("^http://","",trim($gurl));
	$garr['host'] = eregi_replace("/(.*)$","",$gurl);
	$garr['query'] = "/".eregi_replace("^([^/]*)/","",$gurl);
	return $garr;
}

//HTML里的图片转DEDE格式
function TurnImageTag(&$body)
{
	global $cfg_album_width,$cfg_ddimg_width;
	if(empty($cfg_album_width))
	{
		$cfg_album_width = 800;
	}
	if(empty($cfg_ddimg_width))
	{
		$cfg_ddimg_width = 150;
	}
	preg_match_all('/src=[\'"](.+?)[\'"]/is',$body,$match);
	$ttx = '';
	if(is_array($match[1]) && count($match[1])>0)
	{
		for($i=0;isset($match[1][$i]);$i++)
		{
			$ttx .= "{dede:img text='' }".$match[1][$i]." {/dede:img}"."\r\n";
		}
	}
	$ttx = "\r\n{dede:pagestyle maxwidth='{$cfg_album_width}' ddmaxwidth='{$cfg_ddimg_width}' row='3' col='3' value='2'/}\r\n{dede:comments}图集类型会采集时生成此配置是正常的,不过如果后面没有跟着img标记则表示规则无效{/dede:comments}\r\n".$ttx;
	return $ttx;
}

//HTML里的网址格式转换
function TurnLinkTag(&$body)
{
	$ttx = '';
	$handid = '服务器';
	preg_match_all("/<a href=['\"](.+?)['\"]([^>]+?)>(.+?)<\/a>/is",$body,$match);
	if(is_array($match[1]) && count($match[1])>0)
	{
		for($i=0;isset($match[1][$i]);$i++)
		{
			$servername = (isset($match[3][$i]) ? str_replace("'","`",$match[3][$i]) : $handid.($i+1));
			if(ereg("[<>]",$servername) || strlen($servername)>40)
			{
				$servername = $handid.($i+1);
			}
			$ttx .= "{dede:link text='$servername'} {$match[1][$i]} {/dede:link}\r\n";
		}
	}
	return $ttx;
}

//替换XML的CDATA
function RpCdata($str)
{
	$str = str_replace('<![CDATA[','',$str);
	$str = str_replace(']]>','',$str);
	return  $str;
}

//分析RSS里的链接
function GetRssLinks($rssurl)
{
	global $cfg_soft_lang;
	$dhd = new DedeHttpDown();
	$dhd->OpenUrl($rssurl);
	$rsshtml = $dhd->GetHtml();

	//分析编码
	preg_match("/encoding=[\"']([^\"']*)[\"']/is",$rsshtml,$infos);
	if(isset($infos[1]))
	{
		$pcode = strtolower(trim($infos[1]));
	}
	else
	{
		$pcode = strtolower($cfg_soft_lang);
	}
	if($cfg_soft_lang=='gb2312')
	{
		if($pcode=='utf-8')
		{
			$rsshtml = utf82gb($rsshtml);
		}
		else if($pcode=='big5')
		{
			$rsshtml = big52gb($rsshtml);
		}
	}
	else if($cfg_soft_lang=='utf-8')
	{
		if($pcode=='gbk'||$pcode=='gb2312')
		{
			$rsshtml = gb2utf8($rsshtml);
		}
		else if($pcode=='big5')
		{
			$rsshtml = gb2utf8(big52gb($rsshtml));
		}
	}
	$rsarr = array();
	preg_match_all("/<item(.*)<title>(.*)<\/title>/isU",$rsshtml,$titles);
	preg_match_all("/<item(.*)<link>(.*)<\/link>/isU",$rsshtml,$links);
	preg_match_all("/<item(.*)<description>(.*)<\/description>/isU",$rsshtml,$descriptions);
	if(!isset($links[2]))
	{
		return '';
	}
	foreach($links[2] as $k=>$v)
	{
		$rsarr[$k]['link'] = RpCdata($v);

		if(isset($titles[2][$k]))
		{
			$rsarr[$k]['title'] = RpCdata($titles[2][$k]);
		}
		else
		{
			$rsarr[$k]['title'] = ereg_replace("^(.*)/","",RpCdata($titles[2][$k]));
		}
		if(isset($descriptions[2][$k]))
		{
			$rsarr[$k]['image'] = GetddImgFromRss($descriptions[2][$k],$rssurl);
		}
		else
		{
			$rsarr[$k]['image'] = '';
		}
	}
	return $rsarr;
}

//从RSS摘要获取图片信息
function GetddImgFromRss($descriptions,$refurl)
{
	if($descriptions=='')
	{
		return '';
	}
	preg_match_all("/<img(.*)src=[\"']{0,1}(.*)[\"']{0,1}[> \r\n\t]{1,}/isU",$descriptions,$imgs);
	if(isset($imgs[2][0]))
	{
		$imgs[2][0] = ereg_replace("[\"']",'',$imgs[2][0]);
		$imgs[2][0] = ereg_replace("/{1,}",'/',$imgs[2][0]);
		return FillUrl($refurl,$imgs[2][0]);
	}
	else
	{
		return '';
	}
}

//补全网址
function FillUrl($refurl,$surl)
{
	$i = $pathStep = 0;
	$dstr = $pstr = $okurl = '';
	$refurl = trim($refurl);
	$surl = trim($surl);
	$urls = @parse_url($refurl);
	$basehost = ( (!isset($urls['port']) || $urls['port']=='80') ? $urls['host'] : $urls['host'].':'.$urls['port']);

	//$basepath = $basehost.(!isset($urls['path']) ? '' : '/'.$urls['path']);
	//由于直接获得的path在处理 http://xxxx/nnn/aaa?fdsafd 这种情况时会有错误,因此用其它方式处理
	$basepath = $basehost;
	$paths = explode('/',eregi_replace("^http://","",$refurl));
	$n = count($paths);
	for($i=1;$i < ($n-1);$i++)
	{
		if(!ereg("[\?]",$paths[$i])) $basepath .= '/'.$paths[$i];
	}
	if(!ereg("[\?\.]",$paths[$n-1]))
	{
		$basepath .= '/'.$paths[$n-1];
	}
	if($surl=='')
	{
		return $basepath;
	}
	$pos = strpos($surl,"#");
	if($pos>0)
	{
		$surl = substr($surl,0,$pos);
	}

	//用 '/' 表示网站根的网址
	if($surl[0]=='/')
	{
		$okurl = $basehost.$surl;
	}
	else if($surl[0]=='.')
	{
		if(strlen($surl)<=2)
		{
			return '';
		}
		else if($surl[1]=='/')
		{
			$okurl = $basepath.ereg_replace('^.','',$surl);
		}
		else
		{
			$okurl = $basepath.'/'.$surl;
		}
	}
	else
	{
		if( strlen($surl) < 7 )
		{
			$okurl = $basepath.'/'.$surl;
		}
		else if( eregi('^http://',$surl) )
		{
			$okurl = $surl;
		}
		else
		{
			$okurl = $basepath.'/'.$surl;
		}
	}
	$okurl = eregi_replace('^http://','',$okurl);
	$okurl = 'http://'.eregi_replace('/{1,}','/',$okurl);
	return $okurl;
}

//从匹配规则中获取列表网址
function GetUrlFromListRule($regxurl='',$handurl='',$startid=0,$endid=0,$addv=1,$usemore=0,$batchrule='')
{
	global $dsql,$islisten;

	$lists = array();
	$n = 0;
	$islisten = (empty($islisten) ? 0 : $islisten);
	if($handurl!='')
	{
		$handurls = explode("\n",$handurl);
		foreach($handurls as $handurl)
		{
			$handurl = trim($handurl);
			if(eregi("^http://",$handurl))
			{
				$lists[$n][0] = $handurl;
				$lists[$n][1] = 0;
				$n++;
				if($islisten==1)
				{
					break;
				}
			}
		}
	}
	if($regxurl!='')
	{
		//没指定(#)和(*)
		if(!ereg("\(\*\)",$regxurl) && !ereg("\(#\)",$regxurl))
		{
			$lists[$n][0] = $regxurl;
			$lists[$n][1] = 0;
			$n++;
		}
		else
		{
			if($addv <= 0)
			{
				$addv = 1;
			}

			//没指定多栏目匹配规则
			if($usemore==0)
			{
				while($startid <= $endid)
				{
					$lists[$n][0] = str_replace("(*)",sprintf('%0'.strlen($startid).'d',$startid),$regxurl);
					$lists[$n][1] = 0;
					$startid = sprintf('%0'.strlen($startid).'d',$startid + $addv);
					$n++;
					if($n>2000 || $islisten==1)
					{
						break;
					}
				}
			}

			//匹配多个栏目
			//规则表达式 [(#)=>(#)匹配的网址; (*)=>(*)的范围,如:1-20; typeid=>栏目id; addurl=>附加的网址(用|分开多个)]
			else
			{
				$nrules = explode(']',trim($batchrule));
				foreach($nrules as $nrule)
				{
					$nrule = trim($nrule);
					$nrule = ereg_replace("^\[|\]$",'',$nrule);
					$nrules  = explode(';',$nrule);
					if(count($nrules)<3)
					{
						continue;
					}
					$brtag = '';
					$startid = 0;
					$endid = 0;
					$typeid = 0;
					$addurls = array();
					foreach($nrules as $nrule)
					{
						$nrule = trim($nrule);
						list($k,$v) = explode('=>',$nrule);
						if(trim($k)=='(#)')
						{
							$brtag = trim($v);
						}
						else if(trim($k)=='typeid')
						{
							$typeid = trim($v);
						}
						else if(trim($k)=='addurl')
						{
							$addurl = trim($v);
							$addurls = explode('|',$addurl);
						}
						else if(trim($k)=='(*)')
						{
							$v = ereg_replace("[ \r\n\t]",'',trim($v));
							list($startid,$endid) = explode('-',$v);
						}
					}

					//如果栏目用栏目名称
					if(ereg('[^0-9]',$typeid))
					{
						$arr = $dsql->GetOne("Select id From `#@__arctype` where typename like '$typeid' ");
						if(is_array($arr))
						{
							$typeid = $arr['id'];
						}
						else
						{
							$typeid = 0;
						}
					}

					//附加网址优先
					$mjj = 0;
					if(isset($addurls[0]))
					{
						foreach($addurls as $addurl)
						{
							$addurl = trim($addurl);
							if($addurl=='')
							{
								continue;
							}
							$lists[$n][0] = $addurl;
							$lists[$n][1] = $typeid;
							$n++;
							$mjj++;
							if($islisten==1)
							{
								break;
							}
						}
					}

					//如果为非监听模式或监听模式没手工指定的附加网址
					if($islisten!=1 || $mjj==0 )
					{
						//匹配规则里的网址,注:(#)的网址是是允许使用(*)的
						while($startid <= $endid)
						{
							$lists[$n][0] = str_replace("(#)",$brtag,$regxurl);
							$lists[$n][0] = str_replace("(*)",sprintf('%0'.strlen($startid).'d',$startid),$lists[$n][0]);
							$lists[$n][1] = $typeid;
							$startid = sprintf('%0'.strlen($startid).'d',$startid + $addv);
							$n++;
							if($islisten==1)
							{
								break;
							}
							if($n>20000)
							{
								break;
							}
						}
					}
				}
			} //End 匹配多栏目

		} //End使用规则匹配的情况

	}

	return $lists;
}
?>

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -