📄 snoopy.class.inc

📁 下载系统 # 汉化：Fising # 邮箱：fising@163.com # 网址：http://www.fising.cn # 声明：本人水平有限
💻 INC
📖 第 1 页 / 共 2 页
字号:
12 下一页
<?php/*************************************************Snoopy - the PHP net clientAuthor: Monte Ohrt <monte@ispi.net>Copyright (c): 1999-2000 ispi, all rights reservedVersion: 1.0 * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USAYou may contact the author of Snoopy by e-mail at:monte@ispi.netOr, write to:Monte OhrtCTO, ispi237 S. 70th suite 220Lincoln, NE 68510The latest version of Snoopy can be obtained from:http://snoopy.sourceforge.com*************************************************/class Snoopy{	/**** Public variables ****/		/* user definable vars */	var $host			=	"www.php.net";		// host name we are connecting to	var $port			=	80;					// port we are connecting to	var $proxy_host		=	"";					// proxy host to use	var $proxy_port		=	"";					// proxy port to use	var $agent			=	"Snoopy v1.0";		// agent we masquerade as	var	$referer		=	"";					// referer info to pass	var $cookies		=	array();			// array of cookies to pass												// $cookies["username"]="joe";	var	$rawheaders		=	array();			// array of raw headers to send												// $rawheaders["Content-type"]="text/html";	var $maxredirs		=	5;					// http redirection depth maximum. 0 = disallow	var $lastredirectaddr	=	"";				// contains address of last redirected address	var	$offsiteok		=	true;				// allows redirection off-site	var $maxframes		=	0;					// frame content depth maximum. 0 = disallow	var $expandlinks	=	true;				// expand links to fully qualified URLs.												// this only applies to fetchlinks()												// or submitlinks()	var $passcookies	=	true;				// pass set cookies back through redirects												// NOTE: this currently does not respect												// dates, domains or paths.		var	$user			=	"";					// user for http authentication	var	$pass			=	"";					// password for http authentication		// http accept types	var $accept			=	"image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";		var $results		=	"";					// where the content is put			var $error			=	"";					// error messages sent here	var	$response_code	=	"";					// response code returned from server	var	$headers		=	array();			// headers returned from server sent here	var	$maxlength		=	500000;				// max return data length (body)	var $read_timeout	=	0;					// timeout on read operations, in seconds												// supported only since PHP 4 Beta 4												// set to 0 to disallow timeouts	var $timed_out		=	false;				// if a read operation timed out	var	$status			=	0;					// http request status		var	$curl_path		=	"/usr/bin/curl";												// Snoopy will use cURL for fetching												// SSL content if a full system path to												// the cURL binary is supplied here.												// set to false if you do not have												// cURL installed. See http://curl.haxx.se												// for details on installing cURL.												// Snoopy does *not* use the cURL												// library functions built into php,												// as these functions are not stable												// as of this Snoopy release.		// send Accept-encoding: gzip?	var $use_gzip		= true;			/**** Private variables ****/			var	$_maxlinelen	=	4096;				// max line length (headers)		var $_httpmethod	=	"GET";				// default http request method	var $_httpversion	=	"HTTP/1.0";			// default http request version	var $_submit_method	=	"POST";				// default submit method	var $_submit_type	=	"application/x-www-form-urlencoded";	// default submit type	var $_mime_boundary	=   "";					// MIME boundary for multipart/form-data submit type	var $_redirectaddr	=	false;				// will be set if page fetched is a redirect	var $_redirectdepth	=	0;					// increments on an http redirect	var $_frameurls		= 	array();			// frame src urls	var $_framedepth	=	0;					// increments on frame depth		var $_isproxy		=	false;				// set if using a proxy server	var $_fp_timeout	=	30;					// timeout for socket connection/*======================================================================*\	Function:	fetch	Purpose:	fetch the contents of a web page				(and possibly other protocols in the				future like ftp, nntp, gopher, etc.)	Input:		$URI	the location of the page to fetch	Output:		$this->results	the output text from the fetch\*======================================================================*/	function fetch($URI)	{			//preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);		$URI_PARTS = parse_url($URI);		if (!empty($URI_PARTS["user"]))			$this->user = $URI_PARTS["user"];		if (!empty($URI_PARTS["pass"]))			$this->pass = $URI_PARTS["pass"];						switch($URI_PARTS["scheme"])		{			case "http":				$this->host = $URI_PARTS["host"];				if(!empty($URI_PARTS["port"]))					$this->port = $URI_PARTS["port"];				if($this->_connect($fp))				{					if($this->_isproxy)					{						// using proxy, send entire URI						$this->_httprequest($URI,$fp,$URI,$this->_httpmethod);					}					else					{						$path = $URI_PARTS["path"].(isset($URI_PARTS["query"]) ? "?".$URI_PARTS["query"] : "");						// no proxy, send only the path						$this->_httprequest($path, $fp, $URI, $this->_httpmethod);					}										$this->_disconnect($fp);					if($this->_redirectaddr)					{						/* url was redirected, check if we've hit the max depth */						if($this->maxredirs > $this->_redirectdepth)						{							// only follow redirect if it's on this site, or offsiteok is true							if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)							{								/* follow the redirect */								$this->_redirectdepth++;								$this->lastredirectaddr=$this->_redirectaddr;								$this->fetch($this->_redirectaddr);							}						}					}					if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)					{						$frameurls = $this->_frameurls;						$this->_frameurls = array();												while(list(,$frameurl) = each($frameurls))						{							if($this->_framedepth < $this->maxframes)							{								$this->fetch($frameurl);								$this->_framedepth++;							}							else								break;						}					}									}				else				{					return false;				}				return true;									break;			case "https":				if(!$this->curl_path || (!is_executable($this->curl_path))) {					$this->error = "Bad curl ($this->curl_path), can't fetch HTTPS \n";					return false;				}				$this->host = $URI_PARTS["host"];				if(!empty($URI_PARTS["port"]))					$this->port = $URI_PARTS["port"];				if($this->_isproxy)				{					// using proxy, send entire URI					$this->_httpsrequest($URI,$URI,$this->_httpmethod);				}				else				{					$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");					// no proxy, send only the path					$this->_httpsrequest($path, $URI, $this->_httpmethod);				}				if($this->_redirectaddr)				{					/* url was redirected, check if we've hit the max depth */					if($this->maxredirs > $this->_redirectdepth)					{						// only follow redirect if it's on this site, or offsiteok is true						if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)						{							/* follow the redirect */							$this->_redirectdepth++;							$this->lastredirectaddr=$this->_redirectaddr;							$this->fetch($this->_redirectaddr);						}					}				}				if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)				{					$frameurls = $this->_frameurls;					$this->_frameurls = array();					while(list(,$frameurl) = each($frameurls))					{						if($this->_framedepth < $this->maxframes)						{							$this->fetch($frameurl);							$this->_framedepth++;						}						else							break;					}				}									return true;									break;			default:				// not a valid protocol				$this->error	=	'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';				return false;				break;		}				return true;	}/*======================================================================*\	Private functions\*======================================================================*/		/*======================================================================*\	Function:	_striplinks	Purpose:	strip the hyperlinks from an html document	Input:		$document	document to strip.	Output:		$match		an array of the links\*======================================================================*/	function _striplinks($document)	{			preg_match_all("'<\s*a\s+.*href\s*=\s*			# find <a href=						([\"\'])?					# find single or double quote						(?(1) (.*?)\\1 | ([^\s\>]+))		# if quote found, match up to next matching													# quote, otherwise match up to next space						'isx",$document,$links);								// catenate the non-empty matches from the conditional subpattern		while(list($key,$val) = each($links[2]))		{			if(!empty($val))				$match[] = $val;		}								while(list($key,$val) = each($links[3]))		{			if(!empty($val))				$match[] = $val;		}						// return the links		return $match;	}/*======================================================================*\	Function:	_stripform	Purpose:	strip the form elements from an html document	Input:		$document	document to strip.	Output:		$match		an array of the links\*======================================================================*/	function _stripform($document)	{			preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);				// catenate the matches		$match = implode("\r\n",$elements[0]);						// return the links		return $match;	}		/*======================================================================*\	Function:	_striptext	Purpose:	strip the text from an html document	Input:		$document	document to strip.	Output:		$text		the resulting text\*======================================================================*/	function _striptext($document)	{				// I didn't use preg eval (//e) since that is only available in PHP 4.0.		// so, list your entities one by one here. I included some of the		// more common ones.										$search = array("'<script[^>]*?>.*?</script>'si",	// strip out javascript						"'<[\/\!]*?[^<>]*?>'si",			// strip out html tags						"'([\r\n])[\s]+'",					// strip out white space						"'&(quote|#34);'i",					// replace html entities						"'&(amp|#38);'i",						"'&(lt|#60);'i",						"'&(gt|#62);'i",						"'&(nbsp|#160);'i",						"'&(iexcl|#161);'i",						"'&(cent|#162);'i",						"'&(pound|#163);'i",						"'&(copy|#169);'i"						);						$replace = array(	"",							"",							"\\1",							"\"",							"&",							"<",							">",							" ",							chr(161),							chr(162),							chr(163),							chr(169));							$text = preg_replace($search,$replace,$document);										return $text;	}/*======================================================================*\	Function:	_expandlinks	Purpose:	expand each link into a fully qualified URL	Input:		$links			the links to qualify				$URI			the full URI to get the base from	Output:		$expandedLinks	the expanded links\*======================================================================*/	function _expandlinks($links,$URI)	{				preg_match("/^[^\?]+/",$URI,$match);		$match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);						$search = array( 	"|^http://".preg_quote($this->host)."|i",							"|^(?!http://)(\/)?(?!mailto:)|i",							"|/\./|",							"|/[^\/]+/\.\./|"						);								$replace = array(	"",							$match."/",							"/",							"/"						);									$expandedLinks = preg_replace($search,$replace,$links);		return $expandedLinks;	}/*======================================================================*\	Function:	_httprequest	Purpose:	go get the http data from the server	Input:		$url		the url to fetch				$fp			the current open file pointer				$URI		the full URI				$body		body contents to send if any (POST)	Output:		\*======================================================================*/		function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")	{		if($this->passcookies && $this->_redirectaddr)			$this->setcookies();					$URI_PARTS = parse_url($URI);		if(empty($url))			$url = "/";		$headers = $http_method." ".$url." ".$this->_httpversion."\r\n";				if(!empty($this->agent))			$headers .= "User-Agent: ".$this->agent."\r\n";		if(!empty($this->host) && !isset($this->rawheaders['Host']))			$headers .= "Host: ".$this->host."\r\n";		if(!empty($this->accept))			$headers .= "Accept: ".$this->accept."\r\n";				if($this->use_gzip) {			// make sure PHP was built with --with-zlib			// and we can handle gzipp'ed data			if ( function_exists(gzinflate) ) {			   $headers .= "Accept-encoding: gzip\r\n";			}			else {			   trigger_error(			   	"use_gzip is on, but PHP was built without zlib support.".				"  Requesting file(s) without gzip encoding.", 				E_USER_NOTICE);			}		}				if(!empty($this->referer))			$headers .= "Referer: ".$this->referer."\r\n";		if(!empty($this->cookies))		{						if(!is_array($this->cookies))				$this->cookies = (array)$this->cookies;				reset($this->cookies);			if ( count($this->cookies) > 0 ) {				$cookie_headers .= 'Cookie: ';				foreach ( $this->cookies as $cookieKey => $cookieVal ) {				$cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";				}				$headers .= substr($cookie_headers,0,-2) . "\r\n";			}
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -