⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cgetwebpage.cpp

📁 可以进行膜板定制的动态网页下载分析的源程序
💻 CPP
📖 第 1 页 / 共 2 页
字号:
// cgetwebpage.cpp: implementation of the cgetwebpage class.
//
//////////////////////////////////////////////////////////////////////

#include <stdio.h> 
#include "cgetwebpage.h"
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
//extern CRITICAL_SECTION AddHostcache;
//CRITICAL_SECTION Addrurlcache;
//#define _TESTOFZYY
#ifndef WIN32
#define SOCKET int
#define PHOSTENT struct hostent *
#define INVALID_SOCKET  (SOCKET)(~0)
#define SOCKET_ERROR            (-1)
#define MYUpper(x) (((x)>='a'&&(x)<='z')?((x)-'a'+'A'):(x))
#define isupper(x) ((x)>='A'&&(x)<='Z')
#define closesocket(x) close(x)
#define isalpha(x) isupper(MYUpper(x))
int stricmp(const char *str1,const char *str2)
{
	for(;*str1&&*str2;str1++,str2++)
		if(MYUpper(*str1)!=MYUpper(*str2))break;
	if((unsigned char)MYUpper(*str1)==(unsigned char)MYUpper(*str2))
		return 0;
	if((unsigned char)MYUpper(*str1)>(unsigned char)MYUpper(*str2))
		return 1;
	return -1;
}

int strnicmp(const char *str1,const char *str2,size_t len)
{
	int i=0;
	for(i=0;i<len&&*str1&&*str2;i++,str1++,str2++)
		if(MYUpper(*str1)!=MYUpper(*str2))break;
	if(i==len)return 0;
	if((unsigned char)MYUpper(*str1)>(unsigned char)MYUpper(*str2))
		return 1;
	return -1;
}
#endif
cgetwebpage::cgetwebpage()
{
	memset(m_credirecturl,0,512);
	memset(&gbufferaddr,0,sizeof(bufferaddr));
}
cgetwebpage::~cgetwebpage()
{
}
int cgetwebpage::standardurl(char *url,char *newurl)
{
	return 0;
}
char *cgetwebpage::getmyhostname(char *url)
{
	if(url==NULL)
		return NULL;
	char *hostname=new char[512];
	memset(hostname,0,512);
	char *pstart="//";
	char *p=strstr(url,pstart);
	char *p2=strstr(url,pstart);
	if(p==NULL)
	{
		delete hostname;
		return NULL;
	}
	p+=2;
	while(*p==' ')
		p++;
	int i=0;
	while(*p!='/'&&*(p))
	{
		*(hostname+i)=*(p++);
		i++;
	}
	i--;
	char *pport=strchr(p2,':');
	if(pport&&((p-pport)>0))
	{
		pport++;
		char pportn[100];
		memset(pportn,0,100);
		int j=0;
		while(*pport!='/'&&*pport)
		{
			*(pportn+j)=*(pport++);
			j++;
		}
		if(*(pportn)>=48&&*(pportn)<=57)
		{
			m_nport=atol(pportn);
			while(*(hostname+i)!=':')
				i--;
			*(hostname+i)=0;
		}
	}
	return hostname;
}
int cgetwebpage::detecttime(char *pmodifyplace)
{
	char *plastmodifyt="last-modified:";
	int i=0;
	char *pgmtflag="gmt";
	if(pmodifyplace==NULL)
	{
		FileTime=0;
		FileTime=time(NULL);
		return 0;
	}
	else
	{
		if(strstr(pmodifyplace,pgmtflag)==NULL)
		{
			FileTime=0;
			FileTime=time(NULL);
			return -1;
		}
		char pgmt[128];
		memset(pgmt,0,128);
		pmodifyplace+=strlen(plastmodifyt);
		while(*pmodifyplace==' ')
			pmodifyplace++;
		i=0;
		while(!(*pmodifyplace=='g'&&*(pmodifyplace+1)=='m'))
		{
			*(pgmt+i++)=*pmodifyplace;
			pmodifyplace++;
		}
		char *pgmtmp=pgmt;
		int ntmp=0;
		char ctmp[8];
		memset(ctmp,0,8);
		struct tm t;
		while(*pgmtmp!=' '&&(*pgmtmp))
			pgmtmp++;
		if(*pgmtmp==0)
		{
			FileTime=0;
			FileTime=time(NULL);
			return -1;
		}
		pgmtmp++;
		i=0;
		while(*pgmtmp!=' '&&(*pgmtmp))
		{
			
			*(ctmp+i)=*(pgmtmp++);
			i++;
		}
		if(*pgmtmp==0)
		{
			FileTime=0;
			FileTime=time(NULL);
			return -1;
		}
		t.tm_mday=atol(ctmp);
		memset(ctmp,0,8);
		pgmtmp++;
		i=0;
		while(*pgmtmp!=' '&&(*pgmtmp))
		{
			*(ctmp+i)=*(pgmtmp++);
			i++;
		}
		if(*pgmtmp==0)
		{
			FileTime=0;
			FileTime=time(NULL);
			return -1;
		}
		char *strAllMonth = "jan,feb,mar,apr,may,jun,jul,aug,sep,oct,nov,dec";
		char *pmon=strstr(strAllMonth,ctmp);
		if(pmon)
			t.tm_mon=(pmon-strAllMonth)/4;
		else
		{
			FileTime=0;
			FileTime=time(NULL);
			return -1;
		}
		memset(ctmp,0,8);
		pgmtmp++;
		i=0;
		while(*pgmtmp!=' '&&(*pgmtmp))
		{
			*(ctmp+i)=*(pgmtmp++);
			i++;
		}
		if(*pgmtmp==0)
		{
			FileTime=0;
			FileTime=time(NULL);
			return -1;
		}
		int nyear=atol(ctmp);
		if(nyear<1990)
		{
			nyear=1990;
		}
		t.tm_year=nyear-1900;
		memset(ctmp,0,8);
		pgmtmp++;
		i=0;
		while(*pgmtmp!=':'&&(*pgmtmp))
		{
			*(ctmp+i)=*(pgmtmp++);
			i++;
		}
		if(*pgmtmp==0)
		{
			FileTime=0;
			FileTime=time(NULL);
			return -1;
		}
		t.tm_hour=atol(ctmp);
		memset(ctmp,0,8);
		pgmtmp++;
		i=0;
		while(*pgmtmp!=':'&&(*pgmtmp))
		{
			*(ctmp+i)=*(pgmtmp++);
			i++;
		}
		if(*pgmtmp==0)
		{
			FileTime=0;
			FileTime=time(NULL);
			return -1;
		}
		t.tm_min=atol(ctmp);
		memset(ctmp,0,8);
		pgmtmp++;
		i=0;
		while(*pgmtmp!=' '&&(*pgmtmp))
		{
			*(ctmp+i)=*(pgmtmp++);
			i++;
		}
		if(*pgmtmp==0)
		{
			FileTime=0;
			FileTime=time(NULL);
			return -1;
		}
		t.tm_sec=atol(ctmp);
		memset(ctmp,0,8);
		i=0;
		t.tm_isdst=0;
		FileTime=mktime(&t);
		time_t FileTimenow=time(NULL);
		if(FileTime>FileTimenow)
			FileTime=FileTimenow;
		//Last-Modified:       Mon, 04 Mar 2002 09:39:23 GMT
		//Mon, 04 Mar 2002 06:46:15 GMT
	}
	return 0;
}

int cgetwebpage::IsUrl(char *url)
{
	int l=strlen(url);
	while(l > 0&& (url[l-1]==' ' || url[l-1]==(char)0x0d || url[l-1]==(char)0x0a || url[l-1]==(char)0x09 ) )
		l--;
	url[l]=0;
	if(l == 0)
		return -1;
	if( l > 4 && ( strnicmp(&url[l-4],".mp3",4)==0 || strnicmp(&url[l-4],".gif",4)==0 ||
		strnicmp(&url[l-4],".dat",4)==0 || strnicmp(&url[l-4],".zip",4) ==0 ||
		strnicmp(&url[l-4],".bmp",4)==0 || strnicmp(&url[l-4],".jpg",4)==0 ||
		strnicmp(&url[l-4],".doc",4)==0 || strnicmp(&url[l-4],".xls",4)==0 ||
		strnicmp(&url[l-4],".mid",4) ==0 || strnicmp(&url[l-4],".chm",4) ==0||
		strnicmp(&url[l-4],".ram",4) ==0 ))
		return -1;
	if(l > 5 && (strnicmp(&url[l-5],".mpeg",5)==0 || strnicmp(&url[l-5],".jpeg",5)==0 ))
		return -1;
	if(l > 3 && ( strnicmp(&url[l-3],".rm",3)  ==0 ||strnicmp(&url[l-3],".ra",3) ==0 ))
		return -1;
	return 0;
}
int cgetwebpage::getredirectfun(char *ppagehead)
{
	int npagelen=strlen(ppagehead);
	char *ppageheadlower=new char[npagelen+1];
	memset(ppageheadlower,0,npagelen+1);
	strcpy(ppageheadlower,ppagehead);
	int i=0;
	while(*(ppageheadlower+i)!=0)
	{
		if(isupper(*(ppageheadlower+i)))
			*(ppageheadlower+i)+=32;
		i++;
	}
	char *plocation="location:";
	char *plocplace=strstr(ppageheadlower,plocation);
	if(plocplace==NULL)																										
	{
		delete ppageheadlower;
		return -1;
	}
	char credirectstr[512];
	memset(credirectstr,0,512);
	plocplace=ppagehead+(plocplace-ppageheadlower);
	plocplace+=strlen(plocation);
	delete ppageheadlower;
	while(*plocplace==' ')
		plocplace++;
	i=0;
	while(!(*(plocplace)=='\r'&&*(plocplace+1)=='\n')&&i<510)
	{
		*(credirectstr+i)=*plocplace++;
		i++;
	}
	if(i==510)
		return -1;
	char *phttp="http://";
	plocplace=strstr(credirectstr,phttp);
	if(plocplace!=NULL)
	{
		char *phostbak=m_chost;
		int nhostlen=strlen(m_chost);

		phostbak+=nhostlen-1;
		i=0;
		if(*phostbak=='n'&&*(phostbak-1)=='c')
		{
			int ndotnum=0;
			while(i<nhostlen)
			{
				if(*(phostbak-i)=='.')
					ndotnum++;
				if(ndotnum==2)
					break;
				i++;
			}
		}
		else
		{
			int ndotnum=0;
			while(i<nhostlen)
			{
				if(*(phostbak-i)=='.')
					ndotnum++;
				if(ndotnum==1)
					break;
				i++;
			}
		}
		if(i==nhostlen)
			return -1;
		else
		{
			phostbak=phostbak-i;
			if(phostbak>m_chost)
				phostbak--;
			while(phostbak>m_chost&&*phostbak!='.')
			{
				phostbak--;
			}
			if(*phostbak=='.')
				phostbak++;
		}
		char *phttploc=strstr(plocplace,phostbak);
		if(phttploc)
		{
			memset(m_credirecturl,0,512);
			strcpy(m_credirecturl,plocplace);
			if(IsUrl(m_credirecturl)==0)
				return 0;
			else
				return -1;
		}
		else
			return -1;
	}
	else
	{
		i=0;
		while(*(credirectstr+i))
		{
			if(*(credirectstr+i)=='\\')
				*(credirectstr+i)='/';
			i++;
		}
		i=0;
		if(*(credirectstr+i)=='/')
		{
			memset(m_credirecturl,0,512);
			sprintf(m_credirecturl,"http://%s%s",m_chost,credirectstr);
			if(IsUrl(m_credirecturl)==0)
				return 0;
			else
				return -1;
		}
		else
		{
			int nparantnum=0;
			plocplace=strstr(credirectstr,"../");		
			char *ptmpsun=plocplace;
			if(plocplace)
			{
				while(plocplace)
				{
					nparantnum++;
					ptmpsun+=3;
					plocplace=strstr(ptmpsun,"../");		
				}
				char curltmp[512];
				memset(curltmp,0,512);
				strcpy(curltmp,m_curlbak);
				char *purlplace=curltmp;
				while(*purlplace!=0)
				{
					if(*purlplace=='\\')
						*purlplace='/';
					purlplace++;
				}
				purlplace--;
				for(i=0;i<=nparantnum;i++)
				{
					while(*purlplace!='/')
						purlplace--;
					*purlplace=0;
				}
				memset(m_credirecturl,0,512);
				if(*ptmpsun=='/')
				{
					sprintf(m_credirecturl,"%s%s",curltmp,ptmpsun);
					if(IsUrl(m_credirecturl)==0)
						return 0;
					else
						return -1;
				}
				else
				{
					sprintf(m_credirecturl,"%s/%s",curltmp,ptmpsun);
					if(IsUrl(m_credirecturl)==0)
						return 0;
					else
						return -1;
				}
			}
			else//相对目录
			{
				char curltmp[512];
				memset(curltmp,0,512);
				strcpy(curltmp,m_curlbak);
				char *purlplace=curltmp;
				int klen=strlen(purlplace);
				memset(m_credirecturl,0,512);
				if(*credirectstr != '.' && *credirectstr != '/')//为当前路径
				{
					for(;klen >= 0 && *(purlplace+klen-1) != '/' && *(purlplace+klen-1) != '\\' ;klen --);
					if(klen < 0)
						return -1;
					memcpy(m_credirecturl,purlplace,klen);
					strcpy(m_credirecturl+klen,credirectstr);
					if(IsUrl(m_credirecturl) == 0)
						return 0;
					return -1;
				}
				if(*(purlplace+klen-1)=='/')
				{
					sprintf(m_credirecturl,"%s%s",purlplace,credirectstr);
					if(IsUrl(m_credirecturl)==0)
						return 0;
					else
						return -1;
				}
				else
				{
					char *p2=purlplace+7;
					while(*p2&&*p2!='/')
						p2++;
					if(*p2=='/')//http://www.sina.com.cn/1.html
					{
						while(*(purlplace+klen-1)!='/')
						{
							klen--;
						}
						sprintf(m_credirecturl,"%s%s",purlplace,credirectstr);
						if(IsUrl(m_credirecturl)==0)
							return 0;
						else
							return -1;
					}
					else//http://www.sina.com.cn
					{
						sprintf(m_credirecturl,"%s/%s",purlplace,credirectstr);
						if(IsUrl(m_credirecturl)==0)
							return 0;
						else
							return -1;
					}
					sprintf(m_credirecturl,"%s/%s",curltmp,ptmpsun);
					if(IsUrl(m_credirecturl)==0)
						return 0;
					else
						return -1;
				}
			}
		}
	}
	return -1;
}
int cgetwebpage::analypagehead(char *ppagehead,time_t lasttime)
{
	char *ppageheadlower=new char[strlen(ppagehead)+1];
	memset(ppageheadlower,0,strlen(ppagehead)+1);
	strcpy(ppageheadlower,ppagehead);
	int i=0;
	while(*(ppageheadlower+i)!=0)
	{
		if(isupper(*(ppageheadlower+i)))
			*(ppageheadlower+i)+=32;
		i++;
	}
	char *pstatus=ppageheadlower;
	while(*pstatus!=' ')
		pstatus++;
	pstatus++;
	char cstatus[12];
	memset(cstatus,0,12);
	i=0;
	while(*pstatus!=' '&&i<11)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -