⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 httptse.cpp

📁 小型搜索引擎,用C/C++编写,属于全文搜索引擎
💻 CPP
📖 第 1 页 / 共 2 页
字号:
		if(DEFAULT_TIMEOUT >= 0)			selectRet = select(sock+1, &rfds, NULL, NULL, &tv);		else            /* No timeout, can block indefinately */			selectRet = select(sock+1, &rfds, NULL, NULL, NULL);		if(selectRet == 0 && timeout < 0){			close(sock);			*nPSock = -1;			free(pageBuf);			cout << "selectRet == 0 && timeout < 0" << endl;			return -1;		} else if(selectRet == -1){			close(sock);			*nPSock = -1;			free(pageBuf);			cout << "selectRet == -1" << endl;			return -1;		}                ret = read(sock, pageBuf + bytesRead, contentLength);                //ret = read(sock, (char*)pageBuf.c_str() + bytesRead, contentLength);		if(ret == 0) break;		if(ret == -1 && pre_ret==0) {			close(sock);			*nPSock = -1;			free(pageBuf);			cout << "read()'s retval=-1" << endl;			return -1;		}else if( ret == -1 && pre_ret ){			//cout << "2. pre_ret = " << pre_ret << endl;/*			if( bytesRead < contentLength){	// meaning we lost the connection too soon				cout << "lost the connection too soon" << endl;				freeOpageBuf);				return -1;			}*/			break;		}		pre_ret = ret;		//cout << "1.pre_ret = " << pre_ret << endl;		bytesRead += ret;			/* To be tolerant of inaccurate Content-Length fields, we'll			 *      allocate another read-sized chunk to make sure we have			 *      enough room.			 */		if(ret > 0) {			pageBuf = (char *)realloc(pageBuf, bytesRead + contentLength);			if(pageBuf == NULL) {				close(sock);				*nPSock = -1;				free(pageBuf);				cout << "realloc()" << endl;				return -1;			}		}	}	/*	 * The download buffer is too large.  Trim off the safety padding.	*/	pageBuf = (char *)realloc(pageBuf, bytesRead+1);	if(pageBuf == NULL){		close(sock);		*nPSock = -1;		free(pageBuf);		cout << "2.realloc()" << endl;		return -1;	}	pageBuf[bytesRead] = '\0';	if(fileBuf == NULL){	/* They just wanted us to "hit" the url */		free(pageBuf);	}else{		char *tmp;		//tmp = (char *)malloc(HEADER_BUF_SIZE);		tmp = (char *)malloc(strlen(headerBuf)+1);        	if(tmp == NULL){                	close(sock);			*nPSock = -1;			free(pageBuf);			cout << "malloc() for headerBuf" << endl;                	return -1;        	}		//memcpy( tmp, headerBuf, HEADER_BUF_SIZE-1 );		strncpy( tmp, headerBuf, strlen(headerBuf)+1 );		*fileHeadBuf = tmp;		*fileBuf = pageBuf;	}			//close(sock);	*nPSock = sock;	return bytesRead;}	int MakeSocket(const char *host){	int sock;		/* Socket descriptor */	struct sockaddr_in sa;	/* Socket address */	unsigned long   inaddr;	int ret;	inaddr = (unsigned long)inet_addr(host);	if ( (int)inaddr != -1){ 		memcpy((char *)&sa.sin_addr, (char *)&inaddr, sizeof(inaddr));        }//firt find from cache	else{	map<string,string>::iterator it  = mapCacheHostLookup.find(host);	if( it != mapCacheHostLookup.end() ){	// find in host lookup cache		string strHostIp;		strHostIp = (*it).second;		inaddr = (unsigned long)inet_addr( strHostIp.c_str() );		if ( (int)inaddr != -1){ 			memcpy((char *)&sa.sin_addr, (char *)&inaddr, sizeof(inaddr));			cout << ":)" ;        	}	}else{		struct hostent *hp;	/* Host entity */		hp = gethostbyname(host);		if(hp == NULL) { 			cout << "gethostbyname() in MakeSock: " << host << endl;			// do not consider MAX_UNREACHABLE_HOST_NUM			vector<string>::iterator itResult;			itResult = find( vsUnreachHost.begin(), vsUnreachHost.end(), host );			if( itResult == vsUnreachHost.end() ){				pthread_mutex_lock(&mymutex);				vsUnreachHost.push_back(host);				pthread_mutex_unlock(&mymutex);			}			return -1;		}/*		struct hostent *hp, hp_buf;		//char buf[2048];		char *buf;		int tse_errno;		buf = (char*)malloc(1024);		if(buf == NULL){			cout << "allocate error for buf of gethostbyname_r" << endl;			return -1;		}		memset(buf,0,1024);		gethostbyname_r(host, &hp_buf, buf, 1024, &hp, &tse_errno);		if( tse_errno != 0){			cout << "gethostbyname_r() in MakeSock:" << tse_errno 				<< " host: " << host << " " << strerror(tse_errno) << endl;			free(buf);                	return -1;        	}*/		memcpy((char *)&sa.sin_addr, (char *)hp->h_addr, hp->h_length);				// cache host lookup        	struct  in_addr in;		bcopy(*(hp->h_addr_list), (caddr_t)&in, hp->h_length);				char    abuf[INET_ADDRSTRLEN];        	if( inet_ntop(AF_INET, (void *)&in,abuf, sizeof(abuf)) == NULL ){			cout << "inet_ntop() in MakeSock" << endl;			//free(buf);			return -1;		}		inaddr = (unsigned long)inet_addr( abuf );		pthread_mutex_lock(&mymutex);		if( mapCacheHostLookup.count(host) == 0){			//cout << endl << host << " and " << abuf << endl;			mapCacheHostLookup.insert( valTypeCHL ( host, abuf));		}		pthread_mutex_unlock(&mymutex);		//free(buf);	}}	if( mapIpBlock.size() > 0){		map<unsigned long,unsigned long>::iterator pos;		bool b_fContinue = false;		for(pos=mapIpBlock.begin(); pos != mapIpBlock.end(); ++pos ){			unsigned long ret;			ret = inaddr & ~((*pos).second);			if( ret == (*pos).first ){				b_fContinue = true;				break;			}		}		if( b_fContinue == false ){		/*			//save blocked Urls			string ofsName = IP_BLOCKED_URL + "." + CStrFun::itos(pthread_self());			ofstream ofsBlockUrl(ofsName.c_str(),ios::app|ios::binary);			if(!ofsBlockUrl){				cerr << "cannot open " << ofsName.c_str() << "for output" << endl;				exit(-1);			}			ofsBlockUrl << host << endl;			ofsBlockUrl.close();*/			cout << "b_fContinue == false" << endl;			return -2;		}	}	/* Copy host address from hostent to (server) socket address */	sa.sin_family = AF_INET;			sa.sin_port = htons(PORT_NUMBER);	/* Put portnum into sockaddr */	sock = -1;	sock = socket(AF_INET, SOCK_STREAM, 0);	if(sock < 0 ) { 		cout << "socket() in MakeSocket" << endl;		return -1;	}	int optval = 1;  	if (setsockopt (sock, SOL_SOCKET, SO_REUSEADDR,		(char *)&optval, sizeof (optval)) < 0){		cout << "setsockopt() in MakeSocket" << endl;		close(sock);		return -1;	}        //ret = connect(sock, (struct sockaddr *)&sa, sizeof(sa));        ret = nonb_connect(sock, (struct sockaddr *)&sa, DEFAULT_TIMEOUT);        if(ret == -1) { 		cout << "nonb_connect() in MakeSocket" << endl;		close(sock);		return -1; 	}        return sock;}		int _http_read_header(int sock, char *headerPtr){	fd_set rfds;	struct timeval tv;	int bytesRead = 0, newlines = 0, ret, selectRet;	int flags;	flags=fcntl(sock,F_GETFL,0);	if(flags<0){		cout << "1.fcntl() < 0" << endl;		return -1;	}        flags|=O_NONBLOCK;        if(fcntl(sock,F_SETFL,flags)<0){		cout << "2.fcntl() < 0" << endl;		return -1;	}	while(newlines != 2 && bytesRead != HEADER_BUF_SIZE-1) {		FD_ZERO(&rfds);		FD_SET(sock, &rfds);		tv.tv_sec = timeout;		tv.tv_usec = 0;		if(timeout >= 0)			selectRet = select(sock+1, &rfds, NULL, NULL, &tv);		else            /* No timeout, can block indefinately */			selectRet = select(sock+1, &rfds, NULL, NULL, NULL);		if(selectRet == 0 && timeout < 0) {			cout << "selectRet == 0 && timeout < 0" << endl;			return -1;                }else if(selectRet == -1) {			cout << "selectRet == 0 && timeout < 0 else" << endl;			return -1;		}                		ret = read(sock, headerPtr, 1);                if(ret == -1){			cout << "read()" << endl;			return -1;		}		bytesRead++;                		if(*headerPtr == '\r') {                 /* Ignore CR */			/* Basically do nothing special, just don't set newlines			 *      to 0 */			headerPtr++;			continue;		}		else if(*headerPtr == '\n')             /* LF is the separator */			newlines++;		else    			newlines = 0;                		headerPtr++;	}        	//headerPtr -= 3;         /* Snip the trailing LF's */				  /* to be compatible with Tianwang format, we have to retain them*/	headerPtr -= 2;	*headerPtr = '\0';	//cout << "in it " << headerPtr << endl;	return bytesRead;}/* *function nonblocking connect *parameter sec is the second of timing out */int nonb_connect(int sockfd,struct sockaddr* sa,int sec){	int flags;	int status;	fd_set mask;	struct timeval timeout;	//set the socket as nonblocking	flags=fcntl(sockfd,F_GETFL,0);	if(flags<0) return -1;	flags|=O_NONBLOCK;	if(fcntl(sockfd,F_SETFL,flags) < 0){		cout << "1.fcntl() in nonb_connect" << endl;		return -1;	}	if( connect(sockfd,sa,sizeof(struct sockaddr)) == 0){		flags&=~O_NONBLOCK;		fcntl(sockfd,F_SETFL,flags);		return sockfd;//connected immediately        }	FD_ZERO(&mask);	FD_SET(sockfd,&mask);	timeout.tv_sec=sec;	timeout.tv_usec=0;	status=select(sockfd+1,NULL,&mask,NULL,&timeout);	switch(status){		case -1:		// Select error, set the socket as default blocking			flags&=~O_NONBLOCK;			fcntl(sockfd,F_SETFL,flags);			cout << "2.fcntl() in nonb_connect" << endl;			return -1;		case 0: 		//Connection timed out.			flags&=~O_NONBLOCK;			fcntl(sockfd,F_SETFL,flags);			cout << "3.fcntl() in nonb_connect" << endl;			return -1;		default: 		// Connected successfully.			FD_CLR(sockfd,&mask);			flags&=~O_NONBLOCK;			fcntl(sockfd,F_SETFL,flags);			return 0;	}}	/*	 * Determines if the given NULL-terminated buffer is large enough to	 *      concatenate the given number of characters.  If not, it attempts to	 *      grow the buffer to fit.	 * Returns:	 *      0 on success, or	 *      -1 on error (original buffer is unchanged).	 */int _checkBufSize(char **buf, int *bufsize, int more){	char *tmp;	int roomLeft = *bufsize - (strlen(*buf) + 1);	if(roomLeft > more) return 0;	tmp = (char *)realloc(*buf, *bufsize + more + 1);	if(tmp == NULL) return -1;	*buf = tmp;	*bufsize += more + 1;	return 0;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -