⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cgetwebpage.cpp

📁 可以进行膜板定制的动态网页下载分析的源程序
💻 CPP
📖 第 1 页 / 共 2 页
字号:
	{
		*(cstatus+i++)=*pstatus;
		pstatus++;
	}
	if(i==11)
	{
		delete ppageheadlower;
		return 500;
	}
	m_dwRet=atol(cstatus);
	if(m_dwRet!=200)
	{
		if( m_dwRet >= 300 && m_dwRet < 400 ) //首先检测一下服务器的应答是否为重定向
		{
			delete ppageheadlower;
			return m_dwRet;
		}
		else if( m_dwRet >=500 )// 服务器错误,可以重试
		{
			delete ppageheadlower;
			return m_dwRet;
		}
		// 客户端错误,重试无用
		else if( m_dwRet >=400 && m_dwRet <500 )
		{
			delete ppageheadlower;
			return m_dwRet;
		}
	}
	char *pfilelen="content-length:";
	char *pfilelenplace=strstr(ppageheadlower,pfilelen);
	i=0;
	if(pfilelenplace==NULL)
		FileSize=0;
	else
	{
		pfilelenplace+=strlen(pfilelen);
		while(*pfilelenplace==' ')
			pfilelenplace++;
		char cfilelen[24];
		memset(cfilelen,0,24);
		while(*pfilelenplace>=48&&*pfilelenplace<=57)
		{
			*(cfilelen+i++)=*pfilelenplace;
			pfilelenplace++;
		}
		FileSize=atol(cfilelen);
		if(FileSize>MAX_INET_BUFFER)
		{
			FileSize=MAX_INET_BUFFER;
		}
	}
	char *plastmodifyt="last-modified:";
	char *pmodifyplace=strstr(ppageheadlower,plastmodifyt);
	detecttime(pmodifyplace);
	delete ppageheadlower;
	if(lasttime>0&&FileTime>0&&FileTime<=lasttime)
		return -10;//没有更新
	return m_dwRet;
}
char *cgetwebpage::outgetmodheadinfo(char *buffer)
{
	char *phead=new char[1560];
	memset(phead,0,1560);
	char *ptwoenter="\r\n\r\n";
	char *p1=strstr(buffer,ptwoenter);
	char chtmp;
	if(p1&&p1-buffer<1500)
	{
		p1+=strlen(ptwoenter);
		if(*p1==0)
			strcpy(phead,buffer);
		else
		{
			chtmp=*p1;
			*p1=0;
			strcpy(phead,buffer);
			*p1=chtmp;
		}
	}
	else
	{
		delete phead;
		phead=NULL;
		return NULL;
	}
	return phead;
}

char * stristrzyy(char *str1,char *str2)
{
	if(str1 == NULL)
		return NULL;
	if(str2 == NULL)
		return NULL;
	if(str1 == str2)
		return str1;
	char *p=str1;
	int l1=strlen(str1);
	int l2=strlen(str2);
	while(l1 >=l2 && *p  )
	{
		if(strnicmp(p,str2,l2)==0)
			return p;
		p++;
		l1--;
	}
	return NULL;

}
int cgetwebpage::getapptype(char *pheader)
{
	char *p2enter=strstr(pheader,"\r\n\r\n");
	char *ptype="Content-Type";
	char *p3=stristrzyy(pheader,ptype);
/*	FILE *fp=fopen("d:\\testcontenttype.dat","a+");
	if(fp)
	{
		fwrite(p3,p2enter-p3,1,fp);
	
		fclose(fp);
	}
*/

	int n1=strlen(ptype);
	if(p3)
	{
		char *papplication="application";
		char *ptextplain="text/plain";
		char *ptext="text";
		p3+=n1;
		while(*p3!=':')
			p3++;
		p3++;
		while(*p3==' ')
			p3++;
		char *p5=stristrzyy(p3,papplication);
		if(p5&&p5-p3<3)
			return -1;//下载可执行程序
		char *p51=stristrzyy(p3,ptextplain);
		if(p51&&p51-p3<3)
			return -1;//下载图片
		char *p4=stristrzyy(p3,ptext);
		if(p4-p3==0)
			return 0;
		else 
			return -1;
	}
	return 0;
}

int cgetwebpage::DoGet(char *strU, const char *httpHeaders, const char *body,int PageType,int AcceptType ,time_t fTime,int flag)
{
	FileSize=0;
	FileTime=0;
	m_dwRet=0;
	m_nport=80;
	memset(m_credirecturl,0,512);

	m_dwRet=0;
	m_nport=80;
	int nurllen=strlen(strU);
	memset(m_curlbak,0,512);
	strcpy(m_curlbak,strU);
	int ret=0;
	int nheadlen=0;
	int bredirectone=0;
	char *pnohosturl;
	unsigned short port;
	int retval;
	SOCKET  conn_socket;
	char *pnullnohosturl="/";
	int nrevlen=0;
	int outtime=1000;
	m_cookie[0] = 0;
	while(1)
	{
		while(bredirectone<3)
		{
			char *server_name=getmyhostname(m_curlbak);
			if(server_name==NULL||strlen(server_name)>127)
			{
				return -1;
			}
			memset(m_chost,0,128);
			strcpy(m_chost,server_name);
			delete server_name;
			server_name=NULL;
			pnohosturl=strstr(m_curlbak,m_chost);
			pnohosturl+=strlen(m_chost);
			while(*pnohosturl&&*(pnohosturl)!='/')
				pnohosturl++;
			if(*pnohosturl==0)
			{
				pnohosturl=pnullnohosturl;
			}
			port = m_nport;
			PHOSTENT hp;
			if(stricmp(gbufferaddr.curlhost,m_chost)!=0)
			{
				memset(gbufferaddr.curlhost,0,128);
				strcpy(gbufferaddr.curlhost,m_chost);
				if (isalpha(m_chost[0])) 
				{
					hp = gethostbyname(m_chost);
					if (hp == NULL ) 
					{
						m_dwRet=173;
						return -1;
					}
					memset(&(gbufferaddr.serverlast),0,sizeof(sockaddr_in));
					memcpy(&(gbufferaddr.serverlast.sin_addr),hp->h_addr,hp->h_length);
					gbufferaddr.serverlast.sin_family =  hp->h_addrtype;
					gbufferaddr.serverlast.sin_port = htons(port);
				}
				else
				{
					memset(&(gbufferaddr.serverlast),0,sizeof(sockaddr_in));
					gbufferaddr.serverlast.sin_addr.s_addr=inet_addr(m_chost);//m_chost);//可连不可用62.5.170.130:3128
					gbufferaddr.serverlast.sin_family =AF_INET;
					gbufferaddr.serverlast.sin_port = htons(port);
				}
			}
			memset(chttphead,0,1024);
			memset(chttpget,0,1024);
			if(flag==0)
			{
				if(bredirectone>0)
				{
					sprintf(chttphead,"HEAD %s HTTP/1.0\r\nHost: %s\r\nAccept: text/*\r\nPragma: no-cache\r\nCache-Control: no-cache\r\nUser-Agent: HTML_GET_APP\r\nReferer: %s\r\nAccept-Language: zh-cn\r\nContent-Type: application/x-www-form-urlencoded\r\n\r\n",pnohosturl,m_chost,strU);
					sprintf(chttpget,"GET %s HTTP/1.0\r\nHost: %s\r\nAccept: text/*\r\nPragma: no-cache\r\nCache-Control: no-cache\r\nUser-Agent: HTML_GET_APP\r\nReferer: %s\r\nAccept-Language: zh-cn\r\nContent-Type: application/x-www-form-urlencoded\r\n\r\n",pnohosturl,m_chost,strU);
				}
				else
				{
					sprintf(chttphead,"HEAD %s HTTP/1.0\r\nHost: %s\r\nAccept: text/*\r\nPragma: no-cache\r\nCache-Control: no-cache\r\nUser-Agent: HTML_GET_APP\r\nAccept-Language: zh-cn\r\nContent-Type: application/x-www-form-urlencoded\r\n\r\n",pnohosturl,m_chost);
					sprintf(chttpget,"GET %s HTTP/1.0\r\nHost: %s\r\nAccept: text/*\r\nPragma: no-cache\r\nCache-Control: no-cache\r\nUser-Agent: HTML_GET_APP\r\nAccept-Language: zh-cn\r\nContent-Type: application/x-www-form-urlencoded\r\n\r\n",pnohosturl,m_chost);
				}
			}
			else
			{
				sprintf(chttphead,"HEAD %s HTTP/1.0\r\nHost: %s\r\nAccept: text/*\r\nPragma: no-cache\r\nCache-Control: no-cache\r\nUser-Agent: HTML_GET_APP\r\nAccept-Language: zh-cn\r\nContent-Type: application/x-www-form-urlencoded\r\n\r\n",pnohosturl,m_chost);
//				strcpy(m_cookie,"SrchJob=page=1&where=+FROM+searchjob++WHERE++%%28%%28ind%%5Fid1+%%3E%%3D+3600+and+ind%%5Fid1+%%3C+3700%%29+or+%%28ind%%5Fid2+%%3E%%3D+3600+and+ind%%5Fid2+%%3C+3700%%29+or+%%28ind%%5Fid3+%%3E%%3D+3600+and+ind%%5Fid3+%%3C+3700%%29%%29++and+job%%5Fid+in+%%28Select+job%%5Fid+From+searchjobloc+Where+searchjobloc%%2E.job%%5Fid+%%3D+searchjob%%2E.job%%5Fid++and+loc%%5Fid+in+%%285%%29%%29;JobAgent=; domain=.chinahr.com;path=/;");
				if(m_cookie[0])
					sprintf(chttpget,"POST %s HTTP/1.0\r\nHost: %s\r\nCookie:%s\r\ncontent-type:application/x-www-form-urlencoded\r\nContent-Length:%d\r\n\r\n%s\r\n",pnohosturl,m_chost,m_cookie,strlen(body),body);
				else
					sprintf(chttpget,"POST %s HTTP/1.0\r\nHost: %s\r\ncontent-type:application/x-www-form-urlencoded\r\nContent-Length:%d\r\n\r\n%s\r\n",pnohosturl,m_chost,strlen(body),body);
			}
			int iii=0;
			iii++;
			if(fTime>0)
			{
				conn_socket = socket(AF_INET,SOCK_STREAM,0); 
				if(conn_socket== INVALID_SOCKET)
				{
					m_dwRet=173;
					return -1;
				}
				setsockopt(conn_socket,SOL_SOCKET,SO_RCVTIMEO,(const char *)&outtime,sizeof(int));
				setsockopt(conn_socket,SOL_SOCKET,SO_SNDTIMEO,(const char *)&outtime,sizeof(int));

				if(connect(conn_socket,(struct sockaddr*)&(gbufferaddr.serverlast),sizeof(gbufferaddr.serverlast))== SOCKET_ERROR) 
				{
					closesocket(conn_socket);
					m_dwRet=173;
					return -1;
				}
				retval=send(conn_socket,chttphead,strlen(chttphead),0);
				if (retval == SOCKET_ERROR ) 
				{
					closesocket(conn_socket);
					m_dwRet=173;
					return -1;
				}
				memset(Bufferhead,0,1024);
				retval = recv(conn_socket,Bufferhead,1023,0 );
				closesocket(conn_socket);
				if (retval == SOCKET_ERROR ) 
				{
					m_dwRet=173;
					return -1;
				}
				if (retval==0) 
				{
					m_dwRet=173;
					return -1;
				}
				nheadlen=strlen(Bufferhead);
				ret=analypagehead(Bufferhead,fTime);
				if(ret==-10)//没有更新
				{
					FileSize=0;
					m_dwRet=-10;
					return -1;
				}
				else if(ret!=200)
				{
					if(ret>=300&&ret<306)
					{
						if(getredirectfun(Bufferhead)!=0)
						{
							return -1;
						}
						else
						{
							bredirectone+=1;
							strcpy(m_curlbak,m_credirecturl);
							m_cookie[0] = 0;
							char *p=NULL,*p1 = NULL;
							long lLen=0;
							p = Buffer;
							while(p = strstr(p,"Set-Cookie:"))
							{
								p1 = p+11;
								p=p1;
								while(*p==' ')p++;
								p1 = p;
								while(*p && *p !=0x0d&&*p!=0x0a)
									p++;
								memcpy(&m_cookie[lLen],p1,p-p1);
								lLen+=p-p1;
								if(m_cookie[lLen-1]!=';')
									m_cookie[lLen++]=';';

							}
							if(lLen>0&&m_cookie[lLen-1]==';')
								m_cookie[lLen-1]=0;
							else
								m_cookie[lLen] = 0;
							continue;
						}
					}
					else
					{
						return -1;
					}
				}
				else 
				{
					break;
				}
			}
			else
				break;
		}
		if(FileSize>=MAX_INET_BUFFER)
		{
			return -1;
		}
		if(bredirectone>=3)
		{
			m_dwRet=173;
			return -1;
			
		}
		FileSize=0;
		//以下是取得全文
		conn_socket = socket(AF_INET,SOCK_STREAM,0);
		if(conn_socket==INVALID_SOCKET )
		{
			m_dwRet=173;
			return -1;
		}
		setsockopt(conn_socket,SOL_SOCKET,SO_RCVTIMEO,(const char *)&outtime,sizeof(int));
		setsockopt(conn_socket,SOL_SOCKET,SO_SNDTIMEO,(const char *)&outtime,sizeof(int));
		if (connect(conn_socket,(struct sockaddr*)&gbufferaddr.serverlast,sizeof(sockaddr_in))== SOCKET_ERROR) 
		{
			closesocket(conn_socket);
			m_dwRet=173;
			return -1;
		}
		retval=send(conn_socket,chttpget,strlen(chttpget),0);
		if (retval == SOCKET_ERROR ) 
		{
			closesocket(conn_socket);
			m_dwRet=173;
			return -1;
		}
		memset(Buffer,0,DYNBUFFER);
		nrevlen=DYNBUFFER-1;
		retval = recv(conn_socket,Buffer,nrevlen,0 );
		if(getapptype(Buffer)!=0)
		{
			closesocket(conn_socket);
			m_dwRet=173;			
			return -1;
		}
		if (retval == SOCKET_ERROR ) 
		{
			closesocket(conn_socket);
			m_dwRet=173;			
			return -1;
		}
		if (retval == 0) 
		{
			closesocket(conn_socket);
			m_dwRet=173;
			return -1;
		}
		//增加第一遍文件返回信息处理方式。
		if(fTime==0)
		{
			char *pgetmodhead=outgetmodheadinfo(Buffer);
			if(pgetmodhead==NULL)
			{
				closesocket(conn_socket);
				m_dwRet=173;			
				return-1;
			}
			nheadlen=strlen(pgetmodhead);
			ret=analypagehead(pgetmodhead,fTime);
			delete pgetmodhead;
			pgetmodhead=NULL;
			if(ret!=200)
			{
				if(ret>=300&&ret<306)
				{
					if(getredirectfun(Buffer)!=0)
					{

						closesocket(conn_socket);
						m_dwRet=173;			
						return -1;
					}
					else
					{
						bredirectone+=1;
						strcpy(m_curlbak,m_credirecturl);
						closesocket(conn_socket);
						m_cookie[0] = 0;
						char *p=NULL,*p1 = NULL;
						long lLen=0;
						p = Buffer;
						while(p = strstr(p,"Set-Cookie:"))
						{
							p1 = p+11;
							p=p1;
							while(*p==' ')p++;
							p1 = p;
							while(*p && *p !=0x0d&&*p!=0x0a)
								p++;
							memcpy(&m_cookie[lLen],p1,p-p1);
							lLen+=p-p1;
							if(m_cookie[lLen-1]!=';')
								m_cookie[lLen++]=';';

						}
						if(lLen>0&&m_cookie[lLen-1]==';')
							m_cookie[lLen-1]=0;
						else
							m_cookie[lLen] = 0;
						continue;
					}
				}
				else
				{
					closesocket(conn_socket);
					m_dwRet=173;			
					return -1;
				}
			}
			else
			{
				if(FileSize>=MAX_INET_BUFFER)
				{
					closesocket(conn_socket);
					return -1;
				}
			}
		}
		break;
	}
	//
	int kkzero=strlen(Buffer);
	if(kkzero<(retval-1))
	{
		while(kkzero<(retval-1)&&*(Buffer+kkzero)==0)
		{
			*(Buffer+kkzero)=32;
			kkzero++;
		}
	}
	int tmp=strlen(Buffer);
	char *pouthead;
	if(nheadlen>0)
		pouthead=Buffer+nheadlen;
	else
		pouthead=Buffer;
	int ncontlen=strlen(pouthead);
	strcpy(HTMLFileBuf,pouthead);
	while(retval>0)
	{
		memset(Buffer,0,DYNBUFFER);
		retval = recv(conn_socket,Buffer,nrevlen,0 );
		if (retval == SOCKET_ERROR ) 
		{
			closesocket(conn_socket);
			m_dwRet=173;
			return -1;
		}
		if (retval == 0) 
		{
			break;
		}
		kkzero=strlen(Buffer);
		if(kkzero<retval)
		{
			while(kkzero<retval&&*(Buffer+kkzero)==0)
			{
				*(Buffer+kkzero)=32;
				kkzero++;
			}
		}
		char *ppagenowplace=HTMLFileBuf+ncontlen;
		ncontlen+=strlen(Buffer);
		if(ncontlen>=MAX_INET_BUFFER)
		{
			ncontlen=0;
			break;
		}
		strcpy(ppagenowplace,Buffer);
	}
	FileSize=ncontlen;
	closesocket(conn_socket);
	return m_dwRet;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -