⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ghttp.cpp

📁 一个由Mike Gashler完成的机器学习方面的includes neural net, naive bayesian classifier, decision tree, KNN, a genet
💻 CPP
📖 第 1 页 / 共 2 页
字号:
/*	Copyright (C) 2006, Mike Gashler	This library is free software; you can redistribute it and/or	modify it under the terms of the GNU Lesser General Public	License as published by the Free Software Foundation; either	version 2.1 of the License, or (at your option) any later version.	see http://www.gnu.org/copyleft/lesser.html*/#include "GFile.h"#include "GHttp.h"#include "GSocket.h"#include "GString.h"#include "GMacros.h"#include "GQueue.h"#include "GTime.h"#include "GThread.h" // Sleep#include "GHashTable.h"#include <time.h>class GHttpClient;class GHttpClientSocket : public GSocketClient{protected:	GHttpClient* m_pParent;public:	GHttpClientSocket(GHttpClient* pParent, int nMaxPacketSize) : GSocketClient(false, nMaxPacketSize)	{		m_pParent = pParent;	}	virtual ~GHttpClientSocket()	{	}	static GHttpClientSocket* ConnectToTCPSocket(GHttpClient* pParent, const char* szHost, int nPort)	{		GHttpClientSocket* pSocket = new GHttpClientSocket(pParent, 0);		if(!pSocket)			return NULL;		if(!pSocket->Connect(szHost, nPort))		{			delete(pSocket);			return NULL;		}		return pSocket;	}protected:	virtual void OnLoseConnection(int nSocketNumber)	{		m_pParent->OnLoseConnection();	}};// -------------------------------------------------------------------------------//FILE* g_pFile;GHttpClient::GHttpClient(){	m_pSocket = NULL;	m_status = Error;	m_pData = NULL;	m_pChunkQueue = NULL;	m_bPastHeader = false;	strcpy(m_szServer, "\0");	m_szRedirect = NULL;	m_dLastReceiveTime = 0;	strcpy(m_szClientName, "GHttpClient/1.0");	caLastModifiedString[0] = '\0';//g_pFile = fopen("tmp.txt", "w");}GHttpClient::~GHttpClient(){	delete(m_pSocket);	delete(m_pData);	delete(m_pChunkQueue);	delete(m_szRedirect);}void GHttpClient::Abort(){	m_pSocket->Disconnect();//lodo take this out THEN fix it.  So this ends up fixing 'hanging' network bugs, because I think at the very lowest socket level it retries a few times, hence when the connection is dropped it just retries it.  I don't know.  Maybe Abort is called asynchronously.  Anyway it works, could look into it [the problem being that I THINK the connection closes sometimes -- then when we reuse that same connection we never have to do it again -- it restarts itself-- this is an odd behavior I wonder what does it, etc -- we are ignoring the 'http connection-close' message which makes me uncomfortable [rdp].	m_status = Aborted;}void GHttpClient::SetClientName(const char* szClientName){	strncpy(m_szClientName, szClientName, 32);	m_szClientName[31] = '\0';}GHttpClient::Status GHttpClient::CheckStatus(float* pfProgress){	const unsigned char* szChunk;	int nSize;	while(m_pSocket->GetMessageCount() > 0)	{		m_dLastReceiveTime = GTime::GetTime();		szChunk = m_pSocket->GetNextMessage(&nSize);//fwrite(szChunk, nSize, 1, g_pFile);//fflush(g_pFile);		if(m_bPastHeader)			ProcessBody(szChunk, nSize);		else			ProcessHeader(szChunk, nSize);		if(pfProgress)		{			if(m_nContentSize)				*pfProgress = (float)m_nDataPos / m_nContentSize;			else				*pfProgress = 0;		}	}	return m_status;}bool GHttpClient::Get(const char* szUrl, bool actuallyGetData) // actuallyGetData default = true todo rename sendRequestToserver{ // todo make it more lenient with the timeout values for downloading the heads...	GTEMPBUF(char, szNewUrl, strlen(szUrl) + 1);	strcpy(szNewUrl, szUrl);	GFile::CondensePath(szNewUrl);	// Get the port	int nHostIndex, nPortIndex, nPathIndex;	GHttpClientSocket::ParseURL(szNewUrl, &nHostIndex, &nPortIndex, &nPathIndex, NULL);	int nPort;	if(nPathIndex > nPortIndex)		nPort = atoi(&szNewUrl[nPortIndex + 1]); // the "+1" is for the ':'	else		nPort = 80;	// Copy the host name	int nTempBufSize = nPortIndex - nHostIndex + 1;	GTEMPBUF(char, szHost, nTempBufSize);	memcpy(szHost, &szNewUrl[nHostIndex], nPortIndex - nHostIndex);	szHost[nPortIndex - nHostIndex] = '\0';	// Connect	if(!m_pSocket || GTime::GetTime() - m_dLastReceiveTime > 10 || !m_pSocket->IsConnected() || strcmp(szHost, m_szServer) != 0)	{		delete(m_pSocket);		m_pSocket = GHttpClientSocket::ConnectToTCPSocket(this, szHost, nPort);		if(!m_pSocket)			return false;		strncpy(m_szServer, szHost, 255);		m_szServer[255] = '\0';	}	// Send the request	const char* szPath = &szNewUrl[nPathIndex];	if(szPath[0] == 0)		szPath = "/index.html";	GString s;	if(actuallyGetData)	{		//do GET		m_bAmCurrentlyDoingJustHeaders = false;		s.Add(L"GET ");	}else	{		//Do HEAD		m_bAmCurrentlyDoingJustHeaders = true;		s.Add(L"HEAD ");	}	while(*szPath != '\0')	{		if(*szPath == ' ')			s.Add(L"%20");		else			s.Add(*szPath);		szPath++;	}	s.Add(L" HTTP/1.1\r\n");	s.Add(L"Host: ");	s.Add(szHost);	s.Add(L":");	s.Add(nPort);// todo: undo the next line//	s.Add(L"\r\nUser-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.12) Gecko/20051010 Firefox/1.0.7 (Ubuntu package 1.0.7)\r\nAccept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5\r\nAccept-Language: en-us,en;q=0.5\r\nAccept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7\r\nKeep-Alive: 300\r\nConnection: keep-alive\r\n\r\n");	s.Add(L"\r\nUser-Agent: ");	s.Add(m_szClientName);	s.Add("\r\nKeep-Alive: 60\r\nConnection: keep-alive\r\n\r\n");	int nTempBufLen = s.GetLength() + 1;	GTEMPBUF(char, szRequest, nTempBufLen);	s.GetAnsi(szRequest);	if(!m_pSocket->Send((unsigned char*)szRequest, s.GetLength()))	{		return false;	}			// Update [reset] status	m_nContentSize = 0;	m_nDataPos = 0;	m_bChunked = false;	m_bPastHeader = false;	m_nHeaderPos = 0;	delete(m_pData);	m_pData = NULL;	m_status = Downloading;	return true;}void GHttpClient::ProcessHeader(const unsigned char* szData, int nSize){	while(nSize > 0)	{		if(m_nHeaderPos < 256)			m_szHeaderBuf[m_nHeaderPos++] = *szData;		if(*szData == '\n')		{			m_szHeaderBuf[m_nHeaderPos] = '\0';			if(m_nHeaderPos <= 2)			{ // done with headers				szData++;				nSize--;				m_bPastHeader = true;				if(m_bAmCurrentlyDoingJustHeaders)				{					//set it as done, as headers are done\n");					m_status = Done;					//lodo could reset 					GAssert(!(nSize > 0 && (m_bChunked || m_nContentSize > 0)), "uh we shouldn't have a body we were just waiting for the headers!");				}				 				if(m_szRedirect)				{					if(!Get(m_szRedirect)) // note that this will set all the variables the 'right' way as if this download was getting the next.						m_status = Error; // todo make it a redirect error					delete(m_szRedirect);					m_szRedirect = NULL;				}				else if(nSize > 0 && (m_bChunked || m_nContentSize > 0))				{// i.e. we just ran into a section of the body -- so process it as the body now					ProcessBody(szData, nSize); 				}				return;			}			if(strnicmp(m_szHeaderBuf, "HTTP/", 5) == 0)			{				char* szTmp = m_szHeaderBuf + 5;				while(*szTmp != ' ' && *szTmp != '\n')					szTmp++;				if(*szTmp == ' ')					szTmp++;				if(*szTmp == '4')					m_status = NotFound;				else if(*szTmp != '2')					m_status = Error;				else					m_nContentSize = 0;			} // lodo all catches return correctly?			else if(strnicmp(m_szHeaderBuf, "Content-Length:", 15) == 0)			{				char* szTmp = m_szHeaderBuf + 15;				while(*szTmp != '\n' && *szTmp <= ' ')					szTmp++;				m_nContentSize = atoi(szTmp);				if(m_nContentSize > 0)				{					m_pData = new unsigned char[m_nContentSize + 1];					GAssert(m_pData, "out of memory");					m_nDataPos = 0;				}			}			else if(strnicmp(m_szHeaderBuf, "Transfer-Encoding: chunked", 26) == 0)			{				m_bChunked = true;			}			else if(strnicmp(m_szHeaderBuf, "Location:", 9) == 0)			{				const char* szLoc = m_szHeaderBuf + 9;				while(*szLoc > '\0' && *szLoc <= ' ')					szLoc++;				int nLen = strlen(szLoc);				delete(m_szRedirect);				m_szRedirect = new char[nLen + 1];				strcpy(m_szRedirect, szLoc);			}else if(strnicmp(m_szHeaderBuf, "Last-Modified:", 14) == 0)				{					strcpy(caLastModifiedString, m_szHeaderBuf); // copies over the last null terminator, so we're good to go						}				m_nHeaderPos = 0;		}		szData++;		nSize--;	}}void GHttpClient::ProcessBody(const unsigned char* szData, int nSize){	if(m_bChunked)		ProcessChunkBody(szData, nSize);	else if(m_nContentSize > 0)	{		if(m_nDataPos + nSize > m_nContentSize)			nSize = m_nContentSize - m_nDataPos;		memcpy(m_pData + m_nDataPos, szData, nSize);		m_nDataPos += nSize;		if(m_nDataPos >= m_nContentSize)		{			if(m_status == Downloading)			{				m_status = Done; // too bad this involves polling...teeny teeny lag :)			}			m_pData[m_nContentSize] = '\0';			m_bPastHeader = false; // reset this baaad boy.  Hmm.		}	}	else	{		if(!m_pChunkQueue)			m_pChunkQueue = new GQueue();		m_pChunkQueue->Push(szData, nSize);	}}void GHttpClient::OnLoseConnection(){	if(m_bChunked)	{		if(m_status == Downloading)			m_status = Error;	}	else if(m_nContentSize > 0)	{		if(m_status == Downloading)			m_status = Error;	}	else	{		if(m_status == Downloading)			m_status = Done;		m_nContentSize = m_pChunkQueue->GetSize();		delete(m_pData);		m_pData = (unsigned char*)m_pChunkQueue->DumpToString();		m_bPastHeader = false;	}	// todo: take a lock around this	GHttpClientSocket* pSocket = m_pSocket;	m_pSocket = NULL;	delete(pSocket);}void GHttpClient::ProcessChunkBody(const unsigned char* szData, int nSize){	if(!m_pChunkQueue)		m_pChunkQueue = new GQueue();	while(nSize > 0)	{		if(m_nContentSize == 0)		{			// Read the chunk size			int n;			for(n = 0; (szData[n] < '0' || szData[n] > 'f') && n < nSize; n++)			{			}			int nHexStart = n;			for( ; szData[n] >= '0' && szData[n] <= 'f' && n < nSize; n++)			{			}			if(n >= nSize)				break;			// Convert it from hex to an integer			int nPow = 1;			int nDig;			int i;			for(i = n - 1; i >= nHexStart; i--)			{				if(szData[i] >= '0' && szData[i] <= '9')					nDig = szData[i] - '0';				else if(szData[i] >= 'a' && szData[i] <= 'f')					nDig = szData[i] - 'a' + 10;				else if(szData[i] >= 'A' && szData[i] <= 'F')					nDig = szData[i] - 'A' + 10;				else				{					nDig = 0;					GAssert(false, "expected a hex digit");				}				m_nContentSize += (nDig * nPow);				nPow *= 16;			}			for( ; szData[n] != '\n' && n < nSize; n++)			{			}			if(n < nSize && szData[n] == '\n')				n++;			szData += n;			nSize -= n;		}		if(m_nContentSize == 0)		{			m_nContentSize = m_pChunkQueue->GetSize();			delete(m_pData);			m_pData = (unsigned char*)m_pChunkQueue->DumpToString();			m_bChunked = false;			m_bPastHeader = false;			if(m_status == Downloading)				m_status = Done;			break;		}		else		{			int nChunkSize = MIN(m_nContentSize, nSize);			m_pChunkQueue->Push(szData, nChunkSize);			szData += nChunkSize;			nSize -= nChunkSize;			m_nContentSize -= nChunkSize;		}	}}// todo: this is a hack--fix it properlyvoid GHttpClient::GimmeWhatYouGot(){	if(m_bChunked)	{		m_nContentSize = m_pChunkQueue->GetSize();		if(m_nContentSize > 64)		{			delete(m_pData);			m_pData = (unsigned char*)m_pChunkQueue->DumpToString();			m_bChunked = false;			m_bPastHeader = false;			if(m_status == Downloading)				m_status = Done;		}	}	else if(m_nContentSize > 0)	{		if(m_nDataPos > 64)		{			if(m_status == Downloading)				m_status = Done;			m_pData[m_nDataPos] = '\0';			m_bPastHeader = false;			m_nContentSize = m_nDataPos;		}	}	else	{		if(m_pChunkQueue && m_pChunkQueue->GetSize() > 64)		{			if(m_status == Downloading)				m_status = Done;			m_nContentSize = m_pChunkQueue->GetSize();			delete(m_pData);			m_pData = (unsigned char*)m_pChunkQueue->DumpToString();			m_bPastHeader = false;		}	}}unsigned char* GHttpClient::GetData(int* pnSize)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -