📄 class_httpretriever.php
字号:
<?php/* HTTP Retriever * Version v1.1.10 * Copyright 2004-2007, Steve Blinch * http://code.blitzaffe.com * ============================================================================ * * DESCRIPTION * * Provides a pure-PHP implementation of an HTTP v1.1 client, including support * for chunked transfer encoding and user agent spoofing. Both GET and POST * requests are supported. * * This can be used in place of something like CURL or WGET for HTTP requests. * Native SSL (HTTPS) requests are also supported if the OpenSSL extension is * installed under PHP v4.3.0 or greater. * * If native SSL support is not available, the class will also check for the * CURL extension; if it's installed, it will transparently be used for SSL * (HTTPS) requests. * * If neither native SSL support nor the CURL extension are available, and * libcurlemu (a CURL emulation library available from our web site) is found, * the class will also check for the CURL console binary (usually in * /usr/bin/curl); if it's installed, it will transparently be used for SSL * requests. * * In short, if it's possible to make an HTTP/HTTPS request from your server, * this class can most likely do it. * * * HISTORY * * 1.1.10 (13-Feb-2006) * - Fixed bug wherein libcurlemu may not be correctly included when * needed. * - Fixed bug wherein stream read timeouts may not be recognized * - Adjusted timeout handling code to better handle timeout conditions * - Added intelligent caching support * - Caching is now better-handled for high-volume requests * - Added postprocessing callback support * - Improved redirect support * - Fixed bug in which POST requests couldn't use GET-style query strings * - Added header cleanup between requests * - Added partial proxy support via $http->curl_proxy (only useable when * $http->force_curl is TRUE; internal support not yet implemented) * * * 1.1.9 (11-Oct-2006) * - Added set_transfer_display() and default_transfer_callback() * methods for transfer progress tracking * - Suppressed possible "fatal protocol error" when remote SSL server * closes the connection early * - Added get_content_type() method * - make_query_string() now handles arrays * * 1.1.8 (19-Jun-2006) * - Added set_progress_display() and default_progress_callback() * methods for debug output * - Added support for relative URLs in HTTP redirects * - Added cookie support (sending and receiving) * - Numerous bug fixes * * 1.1.7 (18-Apr-2006) * - Added support for automatically following HTTP redirects * - Added ::get_error() method to get any available error message (be * it an HTTP result error or an internal/connection error) * - Added ::cache_hit variable to determine whether the page was cached * * 1.1.6 (04-Mar-2006) * - Added stream_timeout class variable. * - Added progress_callback class variable. * - Added support for braindead servers that ignore Connection: close * * * EXAMPLE * * // HTTPRetriever usage example * require_once("class_HTTPRetriever.php"); * $http = &new HTTPRetriever(); * * * // Example GET request: * // ---------------------------------------------------------------------------- * $keyword = "blitzaffe code"; // search Google for this keyword * if (!$http->get("http://www.google.com/search?hl=en&q=%22".urlencode($keyword)."%22&btnG=Search&meta=")) { * echo "HTTP request error: #{$http->result_code}: {$http->result_text}"; * return false; * } * echo "HTTP response headers:<br><pre>"; * var_dump($http->response_headers); * echo "</pre><br>"; * * echo "Page content:<br><pre>"; * echo $http->response; * echo "</pre>"; * // ---------------------------------------------------------------------------- * * * // Example POST request: * // ---------------------------------------------------------------------------- * $keyword = "blitzaffe code"; // search Google for this keyword * $values = array( * "hl"=>"en", * "q"=>"%22".urlencode($keyword)."%22", * "btnG"=>"Search", * "meta"=>"" * ); * // Note: This example is just to demonstrate the POST equivalent of the GET * // example above; running this script will return a 501 Not Implemented, as * // Google does not support POST requests. * if (!$http->post("http://www.google.com/search",$http->make_query_string($values))) { * echo "HTTP request error: #{$http->result_code}: {$http->result_text}"; * return false; * } * echo "HTTP response headers:<br><pre>"; * var_dump($http->response_headers); * echo "</pre><br>"; * * echo "Page content:<br><pre>"; * echo $http->response; * echo "</pre>"; * // ---------------------------------------------------------------------------- * * * LICENSE * * This script is free software; you can redistribute it and/or modify it under the * terms of the GNU General Public License as published by the Free Software * Foundation; either version 2 of the License, or (at your option) any later * version. * * This script is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License along * with this script; if not, write to the Free Software Foundation, Inc., * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */// define user agent ID'sdefine("UA_EXPLORER",0);define("UA_MOZILLA",1);define("UA_FIREFOX",2);define("UA_OPERA",3);// define progress message severity levelsdefine('HRP_DEBUG',0);define('HRP_INFO',1);define('HRP_ERROR',2);if (!defined("CURL_PATH")) define("CURL_PATH","/usr/bin/curl");// if the CURL extension is not loaded, but the CURL Emulation Library is found, try// to load itif (!extension_loaded("curl") && !defined('HTTPR_NO_REDECLARE_CURL') ) { require_once('./modules/member/openid_lib/libcurlemu.inc.php');}class HTTPRetriever { // Constructor function HTTPRetriever() { // default HTTP headers to send with all requests $this->headers = array( "Referer"=>"", "User-Agent"=>"HTTPRetriever/1.0", "Connection"=>"close" ); // HTTP version (has no effect if using CURL) $this->version = "1.1"; // Normally, CURL is only used for HTTPS requests; setting this to // TRUE will force CURL for HTTP requests as well. Not recommended. $this->force_curl = false; // If you don't want to use CURL at all, set this to TRUE. $this->disable_curl = false; // If HTTPS request return an error message about SSL certificates in // $this->error and you don't care about security, set this to TRUE $this->insecure_ssl = false; // Set the maximum time to wait for a connection $this->connect_timeout = 15; // Set the maximum time to allow a transfer to run, or 0 to disable. $this->max_time = 0; // Set the maximum time for a socket read/write operation, or 0 to disable. $this->stream_timeout = 0; // If you're making an HTTPS request to a host whose SSL certificate // doesn't match its domain name, AND YOU FULLY UNDERSTAND THE // SECURITY IMPLICATIONS OF IGNORING THIS PROBLEM, set this to TRUE. $this->ignore_ssl_hostname = false; // If TRUE, the get() and post() methods will close the connection // and return immediately after receiving the HTTP result code $this->result_close = false; // If set to a positive integer value, retrieved pages will be cached // for this number of seconds. Any subsequent calls within the cache // period will return the cached page, without contacting the remote // server. $this->caching = false; // If TRUE and $this->caching is not false, retrieved pages/files will be // cached only if they appear to be static. $this->caching_intelligent = false; // If TRUE, cached files will be stored in subdirectories corresponding // to the first 2 letters of the hash filename $this->caching_highvolume = false; // If $this->caching is enabled, this specifies the folder under which // cached pages are saved. $this->cache_path = '/tmp/'; // Set these to perform basic HTTP authentication $this->auth_username = ''; $this->auth_password = ''; // Optionally set this to a valid callback method to have HTTPRetriever // provide page preprocessing capabilities to your script. If set, this // method should accept two arguments: an object representing an instance // of HTTPRetriever, and a string containing the page contents $this->page_preprocessor = null; // Optionally set this to a valid callback method to have HTTPRetriever // provide progress messages. Your callback must accept 2 parameters: // an integer representing the severity (0=debug, 1=information, 2=error), // and a string representing the progress message $this->progress_callback = null; // Optionally set this to a valid callback method to have HTTPRetriever // provide bytes-transferred messages. Your callbcak must accept 2 // parameters: an integer representing the number of bytes transferred, // and an integer representing the total number of bytes expected (or // -1 if unknown). $this->transfer_callback = null; // Set this to TRUE if you HTTPRetriever to transparently follow HTTP // redirects (code 301, 302, 303, and 307). Optionally set this to a // numeric value to limit the maximum number of redirects to the specified // value. (Redirection loops are detected automatically.) // Note that non-GET/HEAD requests will NOT be redirected except on code // 303, as per HTTP standards. $this->follow_redirects = false; } // Send an HTTP GET request to $url; if $ipaddress is specified, the // connection will be made to the selected IP instead of resolving the // hostname in $url. // // If $cookies is set, it should be an array in one of two formats. // // Either: $cookies[ 'cookiename' ] = array ( // '/path/'=>array( // 'expires'=>time(), // 'domain'=>'yourdomain.com', // 'value'=>'cookievalue' // ) // ); // // Or, a more simplified format: // $cookies[ 'cookiename' ] = 'value'; // // The former format will automatically check to make sure that the path, domain, // and expiration values match the HTTP request, and will only send the cookie if // they do match. The latter will force the cookie to be set for the HTTP request // unconditionally. // function get($url,$ipaddress = false,$cookies = false) { $this->method = "GET"; $this->post_data = ""; $this->connect_ip = $ipaddress; return $this->_execute_request($url,$cookies); } // Send an HTTP POST request to $url containing the POST data $data. See ::get() // for a description of the remaining arguments. function post($url,$data="",$ipaddress = false,$cookies = false) { $this->method = "POST"; $this->post_data = $data; $this->connect_ip = $ipaddress; return $this->_execute_request($url,$cookies); } // Send an HTTP HEAD request to $url. See ::get() for a description of the arguments. function head($url,$ipaddress = false,$cookies = false) { $this->method = "HEAD"; $this->post_data = ""; $this->connect_ip = $ipaddress; return $this->_execute_request($url,$cookies); } // send an alternate (non-GET/POST) HTTP request to $url function custom($method,$url,$data="",$ipaddress = false,$cookies = false) { $this->method = $method; $this->post_data = $data; $this->connect_ip = $ipaddress; return $this->_execute_request($url,$cookies); } function array_to_query($arrayname,$arraycontents) { $output = ""; foreach ($arraycontents as $key=>$value) { if (is_array($value)) { $output .= $this->array_to_query(sprintf('%s[%s]',$arrayname,urlencode($key)),$value); } else { $output .= sprintf('%s[%s]=%s&',$arrayname,urlencode($key),urlencode($value)); } } return $output; } // builds a query string from the associative array array $data; // returns a string that can be passed to $this->post() function make_query_string($data) { $output = ""; if (is_array($data)) { foreach ($data as $name=>$value) { if (is_array($value)) { $output .= $this->array_to_query(urlencode($name),$value); } elseif (is_scalar($value)) { $output .= urlencode($name)."=".urlencode($value)."&"; } else { $output .= urlencode($name)."=".urlencode(serialize($value)).'&'; } } } return substr($output,0,strlen($output)-1); } // this is pretty limited... but really, if you're going to spoof you UA, you'll probably // want to use a Windows OS for the spoof anyway // // if you want to set the user agent to a custom string, just assign your string to // $this->headers["User-Agent"] directly function set_user_agent($agenttype,$agentversion,$windowsversion) { $useragents = array( "Mozilla/4.0 (compatible; MSIE %agent%; Windows NT %os%)", // IE "Mozilla/5.0 (Windows; U; Windows NT %os%; en-US; rv:%agent%) Gecko/20040514", // Moz "Mozilla/5.0 (Windows; U; Windows NT %os%; en-US; rv:1.7) Gecko/20040803 Firefox/%agent%", // FFox "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT %os%) Opera %agent% [en]", // Opera ); $agent = $useragents[$agenttype]; $this->headers["User-Agent"] = str_replace(array("%agent%","%os%"),array($agentversion,$windowsversion),$agent); } // this isn't presently used as it's now handled inline by the request parser function remove_chunkiness() { $remaining = $this->response; $this->response = ""; while ($remaining) { $hexlen = strpos($remaining,"\r"); $chunksize = substr($remaining,0,$hexlen); $argstart = strpos($chunksize,';'); if ($argstart!==false) $chunksize = substr($chunksize,0,$argstart); $chunksize = (int) @hexdec($chunksize); $this->response .= substr($remaining,$hexlen+2,$chunksize); $remaining = substr($remaining,$hexlen+2+$chunksize+2); if (!$chunksize) { // either we're done, or something's borked... exit $this->response .= $remaining; return; } } } // (internal) store a page in the cache function _cache_store($token,$url) { if ($this->caching_intelligent) { $urlinfo = parse_url($url); if ($this->method=='POST') { $this->progress(HRP_DEBUG,"POST request; not caching"); return; } else if (strlen($urlinfo['query'])) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -