⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 http.cpp

📁 Linux TSE 源代码! 保贵十分
💻 CPP
📖 第 1 页 / 共 2 页
字号:
#include <stdlib.h>#include <stdio.h>#include <string.h>#include <strings.h>#include <errno.h>#include <netdb.h>#include <unistd.h>#include <netinet/in.h>#include <sys/types.h>#include <sys/socket.h>#include <sys/time.h>#include <fcntl.h>#include <iostream>#include "Http.h"#include "Tse.h"#include "CommonDef.h"#include "Url.h"#include "Page.h"#include "StrFun.h"char *userAgent = NULL;int timeout = DEFAULT_TIMEOUT;int hideUserAgent = 0;CHttp::CHttp(){}CHttp::~CHttp(){}	/*         * Actually downloads the page, registering a hit (donation)         *      If the fileBuf passed in is NULL, the url is downloaded and then         *      freed; otherwise the necessary space is allocated for fileBuf.         *      Returns size of download on success, 			-1 on error is set,	 		-2 out of ip block,	 		-3 invalid host,			-4 MIME is imag/xxx	 		-300 on 301.         */int CHttp::Fetch(string strUrl, char **fileBuf, char **fileHeadBuf, char **location, int* nPSock ){	char *tmp, *url, *requestBuf, *pageBuf;	const char *host, *path;	int sock, bytesRead = 0, bufsize = REQUEST_BUF_SIZE;	int ret = -1, tempSize, selectRet;	int port = 80;	if( strUrl.empty() ){		cout << "strUrl is NULL" << endl;		return -1;	}	/* Copy the url passed in into a buffer we can work with, change, etc. *//*	url = (char*)malloc(strUrl.length()+1);	if( url == NULL ){		cout << "can not allocate enought memory for url" << endl;		return -1;	} else {		memset(url, 0,strUrl.length()+1);		memcpy(url, strUrl.c_str(), strUrl.length() );	}*/	//pthread_mutex_lock(&mutexMemory);	url = strdup(strUrl.c_str());	//pthread_mutex_unlock(&mutexMemory);	if( url == NULL ){		cout << "!error: stdup() in Fetch()" << endl;		return -1;	}	// parse the url	CUrl u;	if( u.ParseUrlEx(url) == false ){		cout << "ParseUrlEx error in Fetch(): " << strUrl << endl;		return -1;	}	host = u.m_sHost.c_str();	path = u.m_sPath.c_str();	if( u.m_nPort > 0 ) port = u.m_nPort;	/* Compose a request string */	//pthread_mutex_lock(&mutexMemory);	requestBuf = (char*)malloc(bufsize);	//pthread_mutex_unlock(&mutexMemory);	if(requestBuf == NULL){		if (url)		{			//pthread_mutex_lock(&mutexMemory);			free(url); url=NULL;			//pthread_mutex_unlock(&mutexMemory);		}		cout << "can not allocate enought memory for requestBuf" << endl;		return -1;	}	requestBuf[0] = 0;	if( strlen(path) < 1 ){		/* The url has no '/' in it, assume the user is making a root-level                 *      request */		tempSize = strlen("GET /") + strlen(HTTP_VERSION) +2;/*		if( tempSize > bufsize ){			free(url);			free(requestBuf);			cout << "tempSize larger than bufsize" << endl;			return -1;		}*/		if(checkBufSize(&requestBuf, &bufsize, tempSize) ||			snprintf(requestBuf, bufsize, "GET / %s\r\n", 			HTTP_VERSION) < 0 )		{			//pthread_mutex_lock(&mutexMemory);			if (url)			{				 free(url); url=NULL;			}			if (requestBuf)			{				 free(requestBuf); requestBuf=NULL;			}			//pthread_mutex_unlock(&mutexMemory);			cout << "1.checkBuffSize(&requestBuf..) error" << endl;			return -1;		}	}else{		tempSize = strlen("GET ") + strlen(path) + strlen(HTTP_VERSION) + 4;		if(checkBufSize(&requestBuf, &bufsize, tempSize) ||			snprintf(requestBuf, bufsize, "GET %s %s\r\n", 			path, HTTP_VERSION) < 0)		{			//pthread_mutex_lock(&mutexMemory);			if (url)			{				 free(url); url=NULL;			}			if (requestBuf)			{				 free(requestBuf); requestBuf=NULL;			}			//pthread_mutex_unlock(&mutexMemory);			cout << "2._checkBuffSize(&requestBuf..) error" << endl;			return -1;		}	}	/* Use Host: even though 1.0 doesn't specify it.  Some servers         *      won't play nice if we don't send Host, and it shouldn't hurt anything */	tempSize = (int)strlen("Host: ") + (int)strlen(host) + 3;/* +3 for "\r\n\0" */	if(checkBufSize(&requestBuf, &bufsize, tempSize + 128)){		//pthread_mutex_lock(&mutexMemory);		if (url)		{			 free(url); url=NULL;		}		if (requestBuf)		{			 free(requestBuf); requestBuf=NULL;		}		//pthread_mutex_unlock(&mutexMemory);		cout << "3._checkBuffSize(&requestBuf..) error" << endl;		return -1;	}	strcat(requestBuf, "Host: ");	strcat(requestBuf, host);	strcat(requestBuf, "\r\n");	if(!hideUserAgent && userAgent == NULL) {		tempSize = (int)strlen("User-Agent: ") +			(int)strlen(DEFAULT_USER_AGENT) + (int)strlen(VERSION) + 4;		if(checkBufSize(&requestBuf, &bufsize, tempSize)) {			//pthread_mutex_lock(&mutexMemory);			if (url)			{			 	free(url); url=NULL;			}			if (requestBuf)			{			 	free(requestBuf); requestBuf=NULL;			}			//pthread_mutex_unlock(&mutexMemory);			cout << "4._checkBuffSize(&requestBuf..) error" << endl;			return -1;		}		strcat(requestBuf, "User-Agent: ");		strcat(requestBuf, DEFAULT_USER_AGENT);		strcat(requestBuf, "/");		strcat(requestBuf, VERSION);		strcat(requestBuf, "\r\n");	} else if(!hideUserAgent) {		tempSize = (int)strlen("User-Agent: ") + (int)strlen(userAgent) + 3;		if(checkBufSize(&requestBuf, &bufsize, tempSize)) {			//pthread_mutex_lock(&mutexMemory);			if (url)			{			 	free(url); url=NULL;			}			if (requestBuf)			{			 	free(requestBuf); requestBuf=NULL;			}			//pthread_mutex_unlock(&mutexMemory);			cout << "5._checkBuffSize(&requestBuf..) error" << endl;			return -1;		}		strcat(requestBuf, "User-Agent: ");		strcat(requestBuf, userAgent);		strcat(requestBuf, "\r\n");	}	//tempSize = (int)strlen("Connection: Close\n\n");	tempSize = (int)strlen("Connection: Keep-Alive\r\n\r\n");	if(checkBufSize(&requestBuf, &bufsize, tempSize)) {		//pthread_mutex_lock(&mutexMemory);		if (url)		{		 	free(url); url=NULL;		}		if (requestBuf)		{		 	free(requestBuf); requestBuf=NULL;		}		//pthread_mutex_unlock(&mutexMemory);		cout << "6._checkBuffSize(&requestBuf..) error" << endl;		return -1;	}	//strcat(requestBuf, "Connection: Close\n\n");	strcat(requestBuf, "Connection: Keep-Alive\r\n\r\n");	/* Now free any excess memory allocated to the buffer */	//pthread_mutex_lock(&mutexMemory);	tmp = (char *)realloc(requestBuf, strlen(requestBuf) + 1);	//pthread_mutex_unlock(&mutexMemory);	if(tmp == NULL){		//pthread_mutex_lock(&mutexMemory);		if (url)		{		 	free(url); url=NULL;		}		if (requestBuf)		{		 	free(requestBuf); requestBuf=NULL;		}		//pthread_mutex_unlock(&mutexMemory);		cout << "realloc for tmp error" << endl;		return -1;	}	requestBuf = tmp;	if( *nPSock != -1 ){		sock = *nPSock;		cout << "using privous socket " << *nPSock << endl;	}else{		// cout << "1.get a new one" << endl;		sock = CreateSocket( host, port );		if(sock == -1) { // invalid host			//pthread_mutex_lock(&mutexMemory);			if (url)			{		 		free(url); url=NULL;			}			if (requestBuf)			{		 		free(requestBuf); requestBuf=NULL;			}			//pthread_mutex_unlock(&mutexMemory);			return -3;		}		if(sock == -2) { // out of ip block			//pthread_mutex_lock(&mutexMemory);			if (url)			{		 		free(url); url=NULL;			}			if (requestBuf)			{		 		free(requestBuf); requestBuf=NULL;			}			//pthread_mutex_unlock(&mutexMemory);			//cout << "2.not able to MakeSocket" << endl;			return -2;		}	}		ret = write(sock, requestBuf, strlen(requestBuf));	if( ret == 0 ){		cout << "requestBuf is " << requestBuf << endl;		cout << "write nothing" << endl;		//pthread_mutex_lock(&mutexMemory);		if (url)		{			free(url); url=NULL;		}		if (requestBuf)		{			free(requestBuf); requestBuf=NULL;		}		//pthread_mutex_unlock(&mutexMemory);		close(sock);		*nPSock = -1;		return -1;			}	if( ret == -1){		//cout << "write error" << endl;		// sock is invalid,we should make a new one		close(sock);		*nPSock  = -1;		cout << "2.close previous socket " << *nPSock << " and get a new one" << endl;		//maybe sock is dead,try again		sock = CreateSocket( host, port );		if(sock == -1) { 			//pthread_mutex_lock(&mutexMemory);			if (url)			{				free(url); url=NULL;			}			if (requestBuf)			{				free(requestBuf); requestBuf=NULL;			}			//pthread_mutex_unlock(&mutexMemory);			cout << "3.not able to MakeSocket" << endl;			return -1;		}		if(sock == -2) { 			//pthread_mutex_lock(&mutexMemory);			if (url)			{				free(url); url=NULL;			}			if (requestBuf)			{				free(requestBuf); requestBuf=NULL;			}			//pthread_mutex_unlock(&mutexMemory);			cout << "4.not able to MakeSocket" << endl;			return -1;		}		if(write(sock, requestBuf, strlen(requestBuf)) == -1){			//pthread_mutex_lock(&mutexMemory);			if (url)			{				free(url); url=NULL;			}			if (requestBuf)			{				free(requestBuf); requestBuf=NULL;			}			//pthread_mutex_unlock(&mutexMemory);			close(sock);			*nPSock = -1;			cout << "write error" << endl;			return -1;		}	}	//pthread_mutex_lock(&mutexMemory);	if (url)	{		free(url); url=NULL;	}	if (requestBuf)	{		free(requestBuf); requestBuf=NULL;	}	//pthread_mutex_unlock(&mutexMemory);	char headerBuf[HEADER_BUF_SIZE];	/* Grab enough of the response to get the metadata */	memset( headerBuf,0,HEADER_BUF_SIZE );	//cout << "old sock is " << sock << endl;	ret = read_header(sock, headerBuf);	//cout << "ret = " << ret << endl;	if(ret < 0) { 		close(sock); 		*nPSock = -1;		return -1;	}	//cout << headerBuf << endl;	if( strlen(headerBuf) == 0 ){		cout << "strlen(headerBuf) = 0" << headerBuf << endl;		cout << "strUrl: " << strUrl << endl << endl;;		close(sock);                *nPSock = -1;		return -1;	}	CPage iPage;	iPage.ParseHeaderInfo(headerBuf);	if (iPage.m_nStatusCode == -1)	{		close(sock);		*nPSock = -1;		cout << "headerBuf: " << headerBuf << endl;		cout << "!header error: not find HTTP" << endl;		return -1;	}	#ifdef DEBUG	// http return code		cout <<"######Http return code: ######" << endl << i << endl;	#endif	// deal with http://net.cs.pku.edu.cn/~cnds	if (iPage.m_nStatusCode == 301 || iPage.m_nStatusCode == 302)	{		if (iPage.m_sLocation.empty() || iPage.m_sLocation.size()>URL_LEN)		{				close(sock);			*nPSock = -1;			cout << headerBuf << endl;			cout << "!error: Location" << endl;			return -1;		} else{			//pthread_mutex_lock(&mutexMemory);			char *loc=strdup(iPage.m_sLocation.c_str());			//pthread_mutex_unlock(&mutexMemory);			*location = loc;			close(sock);			*nPSock = -1;			return -300;		}	}	if(iPage.m_nStatusCode<200 || iPage.m_nStatusCode>299 ){		close(sock);		*nPSock = -1;		cout << "!header code = " << iPage.m_nStatusCode << endl;		return -1;	}	// when crawling images for ImgSE, remember to comment the paragraph	// when crawling plain text for SE, remember to open the paragraph	// paragraph begin

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -