⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 crobotinternet.cpp

📁 Visual C++自动查询和智能代理程序设计书籍源码-SkyBot
💻 CPP
📖 第 1 页 / 共 4 页
字号:
////////////////////////////////////////////////////////////////////
//
//  CRobotInternet.cpp - CRobotInternet class implementation
//
//  Source: "Programming Robots, Spiders and Intelligent Agents
//           using Visual C++"
//
//	Copyright (C) 1999 David Pallmann. All Rights Reserved.


#include <stdafx.h>
#include <afxinet.h>
#include "CRobot.h"
#include "CRobotInternet.h"


// Constructor

CRobotInternet::CRobotInternet()
{
	m_bReadFromCache = false;
	m_bWriteToCache = false;
	m_nContext = 0;
	m_sLogonUsername = "";
	m_sLogonPassword = "";
	m_sProxyLogonMethod = "";
	m_sProxyLogonUsername = "";
	m_sProxyLogonPassword = "";
	m_sUserAgent = "Mozilla";
}


// Destructor

CRobotInternet::~CRobotInternet()
{
	m_sProxyLogonMethod.Empty();
	m_sProxyLogonUsername.Empty();
	m_sProxyLogonPassword.Empty();
}


////////////////////////////////////////////////////////////////////
//
// Support functions


// ********************** private
// *                    *
// *  EncodeTextBase64  *
// *                    *
// **********************
// Function: Returns the Base64-encoded version of a text string.

CString CRobotInternet::EncodeTextBase64(const CString& sText)
{
	unsigned char cChar[255];
	int nIndex1, nIndex2, nIndex3, nIndex4;
	int nChars;
	CString sBase64 = "";
	char cTable[64 + 1];
	CString sTemp;

	cTable[0] = 'A';
	cTable[1] = 'B';
	cTable[2] = 'C';
	cTable[3] = 'D';
	cTable[4] = 'E';
	cTable[5] = 'F';
	cTable[6] = 'G';
	cTable[7] = 'H';
	cTable[8] = 'I';
	cTable[9] = 'J';
	cTable[10] = 'K';
	cTable[11] = 'L';
	cTable[12] = 'M';
	cTable[13] = 'N';
	cTable[14] = 'O';
	cTable[15] = 'P';

	cTable[16] = 'Q';
	cTable[17] = 'R';
	cTable[18] = 'S';
	cTable[19] = 'T';
	cTable[20] = 'U';
	cTable[21] = 'V';
	cTable[22] = 'W';
	cTable[23] = 'X';
	cTable[24] = 'Y';
	cTable[25] = 'Z';
	cTable[26] = 'a';
	cTable[27] = 'b';
	cTable[28] = 'c';
	cTable[29] = 'd';
	cTable[30] = 'e';
	cTable[31] = 'f';

	cTable[32] = 'g';
	cTable[33] = 'h';
	cTable[34] = 'i';
	cTable[35] = 'j';
	cTable[36] = 'k';
	cTable[37] = 'l';
	cTable[38] = 'm';
	cTable[39] = 'n';
	cTable[40] = 'o';
	cTable[41] = 'p';
	cTable[42] = 'q';
	cTable[43] = 'r';
	cTable[44] = 's';
	cTable[45] = 't';
	cTable[46] = 'u';
	cTable[47] = 'v';

	cTable[48] = 'w';
	cTable[49] = 'x';
	cTable[50] = 'y';
	cTable[51] = 'z';
	cTable[52] = '0';
	cTable[53] = '1';
	cTable[54] = '2';
	cTable[55] = '3';
	cTable[56] = '4';
	cTable[57] = '5';
	cTable[58] = '6';
	cTable[59] = '7';
	cTable[60] = '8';
	cTable[61] = '9';
	cTable[62] = '+';
	cTable[63] = '/';

	cTable[64] = '=';

	nChars = sText.GetLength();
	for (int nPos = 0; nPos < nChars; nPos++) 
	{
		cChar[nPos] = sText.GetAt(nPos);
	} // End for

	//   cChar[nPos]    cChar[nPos+1]   cChar[nPos+2]
	//        |               |               |
	// -------+------- -------+------- -------+-------
	// 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
	// | | | | | | | | | | | | | | | | | | | | | | | |
	// x x x x x x x x x x x x x x x x x x x x x x x x
	// | | | | | | | | | | | | | | | | | | | | | | | |
	// 5 4 3 2 1 0 5 4 3 2 1 0 5 4 3 2 1 0 5 4 3 2 1 0
	// -----+----- -----+----- -----+----- -----+-----
	//      |           |           |           |
	//   nIndex1     nIndex2     nIndex3     nIndex4
	//

	for (nPos = 0; nPos < nChars; nPos += 3)
	{
		if (nPos + 1 >= nChars) cChar[nPos + 1] = '0';
		if (nPos + 2 >= nChars) cChar[nPos + 2] = '0';
		nIndex4 = ( cChar[nPos + 2] & 0x3F ) & 0x3F;
		nIndex3 = ( ((cChar[nPos + 1] & 0x0F) << 2) 
					| ((cChar[nPos + 2] & 0xC0) >> 6) ) & 0x3F;
		nIndex2 = ( ((cChar[nPos] & 3) << 4) 
					| ((cChar[nPos + 1] & 0xF0) >> 4) ) & 0x3F;
		nIndex1 = ( (cChar[nPos] & 0xFC) >> 2 ) & 0x3F;
		if (nPos + 1 >= nChars)
		{
			nIndex3 = 64;
			nIndex4 = 64;
		} // end if
		if (nPos + 2 >= nChars)
		{
			nIndex4 = 64;
		} // end if
		sTemp.Format("%c%c%c%c",
					 cTable[nIndex1],
					 cTable[nIndex2],
					 cTable[nIndex3],
					 cTable[nIndex4]);
		sBase64 += sTemp;
	} // End for

	return sBase64;
}


// ********************* private
// *                   *
// *  ResponseMessage  *
// *                   *
// *********************
// Function: Returns a textual message describing 
//           a TCP/IP response code.

CString CRobotInternet::ResponseMessage(const int nCode)
{
	CString sErrMsg = "";

	switch (nCode)
	{
	//---- 200 series (success) ----
	case 200:
		sErrMsg = "OK, request succeeded";
		break;
	case 201:
		sErrMsg = "OK, new resource created.";
		break;
	case 202:
		sErrMsg = "Request accepted but processing not completed.";
		break;
	case 204:
		sErrMsg = "OK, but no content to return.";
		break;
	//---- 300 series (redirection) ----
	case 301:
		sErrMsg = "Requested resource has been assigned a "
				  "new permanent URL.";
		break;
	case 302:
		sErrMsg = "Requested resource resides temporarily "
				  "under a different URL.";
		break;
	case 304:
		sErrMsg = "Document has not been modified.";
		break;
	//---- 400 series (client error) ----
	case 400:
		sErrMsg = "Bad request.";
		break;
	case 401:
		sErrMsg = "Unauthorized; request requires "
				  "user authentication.";
		break;
	case 403:
		sErrMsg = "Forbidden for unspecified reason.";
		break;
	case 404:
		sErrMsg = "Not Found.";
		break;
	case 407:
		sErrMsg = "Unauthorized; reject by proxy server.";
		break;
	//---- 500 series (server error) ----
	case 500:
		sErrMsg = "Internal server error.";
		break;
	case 501:
		sErrMsg = "Not implemented.";
		break;
	case 502:
		sErrMsg = "Bad gateway; invalid response from "
				  "gateway or upstream server.";
		break;
	case 503:
		sErrMsg = "Service temporarily unavailable.";
		break;
	default:
		sErrMsg.Format("Error %d", nCode);
		break;
	} // End switch
	return sErrMsg;
}


// ****************** private
// *                *
// *  ErrorMessage  *
// *                *
// ******************
// Function: Returns a textual message describing a CRobot
//           error code.

CString CRobotInternet::ErrorMessage(const int nError)
{
	CString sErrMsg = "";

	switch (nError) 
	{
	case CROBOT_ERR_SUCCESS:
		sErrMsg = "Successful";
		break;
	case CROBOT_ERR_INVALID_URL:
		sErrMsg = "Invalid URL";
		break;
	case CROBOT_ERR_INVALID_PARAMETER:
		sErrMsg = "Invalid parameter";
		break;
	case CROBOT_ERR_CONNECTION_FAILED:
		sErrMsg = "Connection failed";
		break;
	case CROBOT_ERR_TIMED_OUT:
		sErrMsg = "Timed out";
		break;
	case CROBOT_ERR_NOT_FOUND:
		sErrMsg = "Not found";
		break;
	case CROBOT_ERR_NOT_AUTHORIZED:
		sErrMsg = "Not authorized";
		break;
	case CROBOT_ERR_DISK_FILE_ERROR:
		sErrMsg = "Disk/file error";
		break;
	default:
		sErrMsg.Format("CRobotInternet error %d", nError);
		break;
	} // End switch
	return sErrMsg;
}


// **************************************************************
// *															*
// *															*
// *				 H T T P  F u n c t i o n s					*
// *															*
// *															*
// **************************************************************


// ---------------------------------------------------------------
// ************************** private
// *                        *
// *  CreateStandardHeader  *
// *                        *
// **************************
// Function: Return a standard header to use with OpenURL calls.
//           If a call has been made to set proxy logon information,
//           the authentication string is included in the header
//			 that is returned.
//
// This is a private function called by various public functions.

CString CRobotInternet::CreateStandardHeader()
{
	CString sHeader;

	sHeader = "Accept: */*\r\n";

	if (m_sProxyLogonMethod=="basic" && m_sProxyLogonUsername!="")
	{
		sHeader += "Proxy-authorization: Basic "
				   + EncodeTextBase64(m_sProxyLogonUsername 
									  + ":"
									  + m_sProxyLogonPassword)
				   + "\r\n";
	} // End if

	if (m_sLogonUsername != "") 
	{
/*		sHeader += "Authorization: "
				   + m_sLogonUsername
				   + ":"
				   + m_sLogonPassword
				   + "\r\n";
*/
		sHeader += "Authorization: Basic "
				   + EncodeTextBase64(m_sLogonUsername
				                      + ":"
									  + m_sLogonPassword)
				   + "\r\n";
	} // End if

	sHeader += "\r\n";
	return sHeader;
}


// --------------------------------------------------------------
// ************* public
// *           *
// *  httpGet  *
// *           *
// *************
// Function: Retrieves a URL and returns it in CString form.
//
// Inputs:	sURL              - The URL to access
//								 (example: "www.mysite.com")
//
// Outputs:	<function_result> - True if data was successfully
//								 retrieved, false otherwise
//			sResponse         - The HTML retrieved.
//			nResult           - Completion code. 0 = success,
//								 n = error (defined in CRobot.h)
//			sErrMsg           - The error message, if nResult != 0

BOOL CRobotInternet::httpGet(const CString& sURL,
							 CString& sResponse,
							 int& nResult,
							 CString& sErrMsg)
{
	// Variable declarations
	CInternetSession* pSession;
	CHttpFile* pHttpFile;
	CString sHeader;

	int nRead;
	LPSTR pBuffer = NULL;
	CString sResult;
	CString sWorkingUrl;
	CString sMsg;
	sErrMsg = "";
	nResult = CROBOT_ERR_SUCCESS;
	DWORD dwHttpStatus;

	try 
	{
		// Initialize variables
		pSession = NULL;
		pHttpFile = NULL;
		sHeader = CreateStandardHeader();
		nRead = 0;
		pBuffer = new char[1024];
		sResult = "";
		sWorkingUrl = sURL;
		
		/* Trim URL and add http:// if it contains no 
		   protocol identifier */

		sWorkingUrl.TrimLeft();
		sWorkingUrl.TrimRight();
		if (sWorkingUrl.Find(":") == -1) 
		{
			if (sWorkingUrl.Left(1) == "/")
				sWorkingUrl = "http:" + sWorkingUrl;
			else
				sWorkingUrl = "http://" + sWorkingUrl;
		} // End if

		DWORD dwFlags;

		// Check for invalid parameters
		if (!sURL.IsEmpty()) 
		{
			// URL is not empty
			/* Check the URL - must be valid and of the 'http:'
			   service type */
			DWORD dwServiceType;
			CString sServer, sObject;
			unsigned short nPort;
			if (AfxParseURL(sWorkingUrl,
							dwServiceType,
							sServer,
							sObject,
							nPort))
			{
				// URL is valid
				if (dwServiceType == AFX_INET_SERVICE_HTTP)
				{
					//URL is the correct service type (HTTP).
					pSession = new CInternetSession(
									m_sUserAgent,
									++m_nContext,
									INTERNET_OPEN_TYPE_PRECONFIG);

					dwFlags = INTERNET_FLAG_TRANSFER_BINARY 
							  | INTERNET_FLAG_EXISTING_CONNECT;
					if (!m_bReadFromCache) 
						dwFlags = dwFlags | INTERNET_FLAG_RELOAD;
					if (!m_bWriteToCache)
						dwFlags = dwFlags | INTERNET_FLAG_DONT_CACHE;

					pHttpFile = (CHttpFile*) 
									pSession->OpenURL(sWorkingUrl,
													  1,
													  dwFlags,
													  sHeader,
													  -1L);
					if (pHttpFile) /* OpenURL worked */
					{
						// Check the HTTP return code
						if (!pHttpFile->QueryInfoStatusCode(dwHttpStatus))
							dwHttpStatus = 200;

						if (dwHttpStatus >= 400)
						{
							switch(dwHttpStatus)
							{
							case 404:
								nResult = CROBOT_ERR_NOT_FOUND;
								break;
							case 403:
							case 407:
								nResult = CROBOT_ERR_NOT_AUTHORIZED;
								break;
							default:
								nResult = CROBOT_ERR_CONNECTION_FAILED;
								break;
							} // End switch
						} // End if dwHttpStatus
						else /* No error - read response data */
						{
							nResult = CROBOT_ERR_SUCCESS;
							// Read the data
							do 
							{
								nRead = pHttpFile->Read(pBuffer, 1023);
								if (nRead != 0) 
								{
									pBuffer[nRead] = 0;
									sResult += pBuffer;
								} // End if
							} while (nRead != 0);
							sResponse = sResult;
						} // End else
					} // End if pHttpFile
					else /* OpenURL failed */
						nResult = CROBOT_ERR_CONNECTION_FAILED;

				} // End if
				else
					// Wrong service type
					nResult = CROBOT_ERR_INVALID_URL;
			}
			else
				// Invalid URL
				nResult = CROBOT_ERR_INVALID_URL;
		} // End if
		else
			// Empty URL
			nResult = CROBOT_ERR_INVALID_PARAMETER;
	} // End try

	catch (CInternetException* e) 
	{
		e->Delete();
		sResponse = sResult;
		
		// Internet exception occurred
		nResult = CROBOT_ERR_CONNECTION_FAILED;
	} // End catch
	catch (...) 
	{
		sResponse = sResult;
		
		// Exception occurred
		nResult = CROBOT_ERR_CONNECTION_FAILED;
	} // End catch

// Clean up and exit function

	if (pBuffer != NULL) 
	{
		delete pBuffer;
		pBuffer = NULL;
	} // End if

	if (pHttpFile != NULL) 
	{
		pHttpFile->Close();
		delete pHttpFile;
	} // End if

	if (pSession != NULL) 
	{
		pSession->Close();
		delete pSession;
	} // End if

	sErrMsg = ErrorMessage(nResult);
	if (nResult == CROBOT_ERR_SUCCESS)
		return true;
	else
		return false;
}


// --------------------------------------------------------------
// ***************** public
// *               *
// *  httpGetFile  *
// *               *
// *****************
// Function: Retrieves a URL and outputs it to a local file
//
// Inputs:	sURL              - The URL to access
//                               (example: "www.mysite.com")
//			sFile             - File to output to
//                               (example: "c:\temp\file1.gif")
//
// Outputs:	<function_result> - True if data was successfully
//                               retrieved, false otherwise
//			nResult           - Completion code. 0 = success,
//                               n = error (defined in CRobot.h)
//			sErrMsg           - The error message, if nResult != 0

BOOL CRobotInternet::httpGetFile(const CString& sURL,
								 const CString& sOutputFilespec,
								 int& nResult,
								 CString& sErrMsg)
{
	CInternetSession* pSession;
	CHttpFile* pHttpFile;
	CFile* pLocalFile;
	CFileException exFile;
	CString sHeader;
	int nRead;
	LPSTR pBuffer = NULL;
	CString sResult;
	CString sWorkingUrl;
	CString sTemp;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -