📄 cgetwebpage.cpp
字号:
// cgetwebpage.cpp: implementation of the cgetwebpage class.
//
//////////////////////////////////////////////////////////////////////
#include <stdio.h>
#include "cgetwebpage.h"
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
//extern CRITICAL_SECTION AddHostcache;
//CRITICAL_SECTION Addrurlcache;
//#define _TESTOFZYY
#ifndef WIN32
#define SOCKET int
#define PHOSTENT struct hostent *
#define INVALID_SOCKET (SOCKET)(~0)
#define SOCKET_ERROR (-1)
#define MYUpper(x) (((x)>='a'&&(x)<='z')?((x)-'a'+'A'):(x))
#define isupper(x) ((x)>='A'&&(x)<='Z')
#define closesocket(x) close(x)
#define isalpha(x) isupper(MYUpper(x))
int stricmp(const char *str1,const char *str2)
{
for(;*str1&&*str2;str1++,str2++)
if(MYUpper(*str1)!=MYUpper(*str2))break;
if((unsigned char)MYUpper(*str1)==(unsigned char)MYUpper(*str2))
return 0;
if((unsigned char)MYUpper(*str1)>(unsigned char)MYUpper(*str2))
return 1;
return -1;
}
int strnicmp(const char *str1,const char *str2,size_t len)
{
int i=0;
for(i=0;i<len&&*str1&&*str2;i++,str1++,str2++)
if(MYUpper(*str1)!=MYUpper(*str2))break;
if(i==len)return 0;
if((unsigned char)MYUpper(*str1)>(unsigned char)MYUpper(*str2))
return 1;
return -1;
}
#endif
cgetwebpage::cgetwebpage()
{
memset(m_credirecturl,0,512);
memset(&gbufferaddr,0,sizeof(bufferaddr));
}
cgetwebpage::~cgetwebpage()
{
}
int cgetwebpage::standardurl(char *url,char *newurl)
{
return 0;
}
char *cgetwebpage::getmyhostname(char *url)
{
if(url==NULL)
return NULL;
char *hostname=new char[512];
memset(hostname,0,512);
char *pstart="//";
char *p=strstr(url,pstart);
char *p2=strstr(url,pstart);
if(p==NULL)
{
delete hostname;
return NULL;
}
p+=2;
while(*p==' ')
p++;
int i=0;
while(*p!='/'&&*(p))
{
*(hostname+i)=*(p++);
i++;
}
i--;
char *pport=strchr(p2,':');
if(pport&&((p-pport)>0))
{
pport++;
char pportn[100];
memset(pportn,0,100);
int j=0;
while(*pport!='/'&&*pport)
{
*(pportn+j)=*(pport++);
j++;
}
if(*(pportn)>=48&&*(pportn)<=57)
{
m_nport=atol(pportn);
while(*(hostname+i)!=':')
i--;
*(hostname+i)=0;
}
}
return hostname;
}
int cgetwebpage::detecttime(char *pmodifyplace)
{
char *plastmodifyt="last-modified:";
int i=0;
char *pgmtflag="gmt";
if(pmodifyplace==NULL)
{
FileTime=0;
FileTime=time(NULL);
return 0;
}
else
{
if(strstr(pmodifyplace,pgmtflag)==NULL)
{
FileTime=0;
FileTime=time(NULL);
return -1;
}
char pgmt[128];
memset(pgmt,0,128);
pmodifyplace+=strlen(plastmodifyt);
while(*pmodifyplace==' ')
pmodifyplace++;
i=0;
while(!(*pmodifyplace=='g'&&*(pmodifyplace+1)=='m'))
{
*(pgmt+i++)=*pmodifyplace;
pmodifyplace++;
}
char *pgmtmp=pgmt;
int ntmp=0;
char ctmp[8];
memset(ctmp,0,8);
struct tm t;
while(*pgmtmp!=' '&&(*pgmtmp))
pgmtmp++;
if(*pgmtmp==0)
{
FileTime=0;
FileTime=time(NULL);
return -1;
}
pgmtmp++;
i=0;
while(*pgmtmp!=' '&&(*pgmtmp))
{
*(ctmp+i)=*(pgmtmp++);
i++;
}
if(*pgmtmp==0)
{
FileTime=0;
FileTime=time(NULL);
return -1;
}
t.tm_mday=atol(ctmp);
memset(ctmp,0,8);
pgmtmp++;
i=0;
while(*pgmtmp!=' '&&(*pgmtmp))
{
*(ctmp+i)=*(pgmtmp++);
i++;
}
if(*pgmtmp==0)
{
FileTime=0;
FileTime=time(NULL);
return -1;
}
char *strAllMonth = "jan,feb,mar,apr,may,jun,jul,aug,sep,oct,nov,dec";
char *pmon=strstr(strAllMonth,ctmp);
if(pmon)
t.tm_mon=(pmon-strAllMonth)/4;
else
{
FileTime=0;
FileTime=time(NULL);
return -1;
}
memset(ctmp,0,8);
pgmtmp++;
i=0;
while(*pgmtmp!=' '&&(*pgmtmp))
{
*(ctmp+i)=*(pgmtmp++);
i++;
}
if(*pgmtmp==0)
{
FileTime=0;
FileTime=time(NULL);
return -1;
}
int nyear=atol(ctmp);
if(nyear<1990)
{
nyear=1990;
}
t.tm_year=nyear-1900;
memset(ctmp,0,8);
pgmtmp++;
i=0;
while(*pgmtmp!=':'&&(*pgmtmp))
{
*(ctmp+i)=*(pgmtmp++);
i++;
}
if(*pgmtmp==0)
{
FileTime=0;
FileTime=time(NULL);
return -1;
}
t.tm_hour=atol(ctmp);
memset(ctmp,0,8);
pgmtmp++;
i=0;
while(*pgmtmp!=':'&&(*pgmtmp))
{
*(ctmp+i)=*(pgmtmp++);
i++;
}
if(*pgmtmp==0)
{
FileTime=0;
FileTime=time(NULL);
return -1;
}
t.tm_min=atol(ctmp);
memset(ctmp,0,8);
pgmtmp++;
i=0;
while(*pgmtmp!=' '&&(*pgmtmp))
{
*(ctmp+i)=*(pgmtmp++);
i++;
}
if(*pgmtmp==0)
{
FileTime=0;
FileTime=time(NULL);
return -1;
}
t.tm_sec=atol(ctmp);
memset(ctmp,0,8);
i=0;
t.tm_isdst=0;
FileTime=mktime(&t);
time_t FileTimenow=time(NULL);
if(FileTime>FileTimenow)
FileTime=FileTimenow;
//Last-Modified: Mon, 04 Mar 2002 09:39:23 GMT
//Mon, 04 Mar 2002 06:46:15 GMT
}
return 0;
}
int cgetwebpage::IsUrl(char *url)
{
int l=strlen(url);
while(l > 0&& (url[l-1]==' ' || url[l-1]==(char)0x0d || url[l-1]==(char)0x0a || url[l-1]==(char)0x09 ) )
l--;
url[l]=0;
if(l == 0)
return -1;
if( l > 4 && ( strnicmp(&url[l-4],".mp3",4)==0 || strnicmp(&url[l-4],".gif",4)==0 ||
strnicmp(&url[l-4],".dat",4)==0 || strnicmp(&url[l-4],".zip",4) ==0 ||
strnicmp(&url[l-4],".bmp",4)==0 || strnicmp(&url[l-4],".jpg",4)==0 ||
strnicmp(&url[l-4],".doc",4)==0 || strnicmp(&url[l-4],".xls",4)==0 ||
strnicmp(&url[l-4],".mid",4) ==0 || strnicmp(&url[l-4],".chm",4) ==0||
strnicmp(&url[l-4],".ram",4) ==0 ))
return -1;
if(l > 5 && (strnicmp(&url[l-5],".mpeg",5)==0 || strnicmp(&url[l-5],".jpeg",5)==0 ))
return -1;
if(l > 3 && ( strnicmp(&url[l-3],".rm",3) ==0 ||strnicmp(&url[l-3],".ra",3) ==0 ))
return -1;
return 0;
}
int cgetwebpage::getredirectfun(char *ppagehead)
{
int npagelen=strlen(ppagehead);
char *ppageheadlower=new char[npagelen+1];
memset(ppageheadlower,0,npagelen+1);
strcpy(ppageheadlower,ppagehead);
int i=0;
while(*(ppageheadlower+i)!=0)
{
if(isupper(*(ppageheadlower+i)))
*(ppageheadlower+i)+=32;
i++;
}
char *plocation="location:";
char *plocplace=strstr(ppageheadlower,plocation);
if(plocplace==NULL)
{
delete ppageheadlower;
return -1;
}
char credirectstr[512];
memset(credirectstr,0,512);
plocplace=ppagehead+(plocplace-ppageheadlower);
plocplace+=strlen(plocation);
delete ppageheadlower;
while(*plocplace==' ')
plocplace++;
i=0;
while(!(*(plocplace)=='\r'&&*(plocplace+1)=='\n')&&i<510)
{
*(credirectstr+i)=*plocplace++;
i++;
}
if(i==510)
return -1;
char *phttp="http://";
plocplace=strstr(credirectstr,phttp);
if(plocplace!=NULL)
{
char *phostbak=m_chost;
int nhostlen=strlen(m_chost);
phostbak+=nhostlen-1;
i=0;
if(*phostbak=='n'&&*(phostbak-1)=='c')
{
int ndotnum=0;
while(i<nhostlen)
{
if(*(phostbak-i)=='.')
ndotnum++;
if(ndotnum==2)
break;
i++;
}
}
else
{
int ndotnum=0;
while(i<nhostlen)
{
if(*(phostbak-i)=='.')
ndotnum++;
if(ndotnum==1)
break;
i++;
}
}
if(i==nhostlen)
return -1;
else
{
phostbak=phostbak-i;
if(phostbak>m_chost)
phostbak--;
while(phostbak>m_chost&&*phostbak!='.')
{
phostbak--;
}
if(*phostbak=='.')
phostbak++;
}
char *phttploc=strstr(plocplace,phostbak);
if(phttploc)
{
memset(m_credirecturl,0,512);
strcpy(m_credirecturl,plocplace);
if(IsUrl(m_credirecturl)==0)
return 0;
else
return -1;
}
else
return -1;
}
else
{
i=0;
while(*(credirectstr+i))
{
if(*(credirectstr+i)=='\\')
*(credirectstr+i)='/';
i++;
}
i=0;
if(*(credirectstr+i)=='/')
{
memset(m_credirecturl,0,512);
sprintf(m_credirecturl,"http://%s%s",m_chost,credirectstr);
if(IsUrl(m_credirecturl)==0)
return 0;
else
return -1;
}
else
{
int nparantnum=0;
plocplace=strstr(credirectstr,"../");
char *ptmpsun=plocplace;
if(plocplace)
{
while(plocplace)
{
nparantnum++;
ptmpsun+=3;
plocplace=strstr(ptmpsun,"../");
}
char curltmp[512];
memset(curltmp,0,512);
strcpy(curltmp,m_curlbak);
char *purlplace=curltmp;
while(*purlplace!=0)
{
if(*purlplace=='\\')
*purlplace='/';
purlplace++;
}
purlplace--;
for(i=0;i<=nparantnum;i++)
{
while(*purlplace!='/')
purlplace--;
*purlplace=0;
}
memset(m_credirecturl,0,512);
if(*ptmpsun=='/')
{
sprintf(m_credirecturl,"%s%s",curltmp,ptmpsun);
if(IsUrl(m_credirecturl)==0)
return 0;
else
return -1;
}
else
{
sprintf(m_credirecturl,"%s/%s",curltmp,ptmpsun);
if(IsUrl(m_credirecturl)==0)
return 0;
else
return -1;
}
}
else//相对目录
{
char curltmp[512];
memset(curltmp,0,512);
strcpy(curltmp,m_curlbak);
char *purlplace=curltmp;
int klen=strlen(purlplace);
memset(m_credirecturl,0,512);
if(*credirectstr != '.' && *credirectstr != '/')//为当前路径
{
for(;klen >= 0 && *(purlplace+klen-1) != '/' && *(purlplace+klen-1) != '\\' ;klen --);
if(klen < 0)
return -1;
memcpy(m_credirecturl,purlplace,klen);
strcpy(m_credirecturl+klen,credirectstr);
if(IsUrl(m_credirecturl) == 0)
return 0;
return -1;
}
if(*(purlplace+klen-1)=='/')
{
sprintf(m_credirecturl,"%s%s",purlplace,credirectstr);
if(IsUrl(m_credirecturl)==0)
return 0;
else
return -1;
}
else
{
char *p2=purlplace+7;
while(*p2&&*p2!='/')
p2++;
if(*p2=='/')//http://www.sina.com.cn/1.html
{
while(*(purlplace+klen-1)!='/')
{
klen--;
}
sprintf(m_credirecturl,"%s%s",purlplace,credirectstr);
if(IsUrl(m_credirecturl)==0)
return 0;
else
return -1;
}
else//http://www.sina.com.cn
{
sprintf(m_credirecturl,"%s/%s",purlplace,credirectstr);
if(IsUrl(m_credirecturl)==0)
return 0;
else
return -1;
}
sprintf(m_credirecturl,"%s/%s",curltmp,ptmpsun);
if(IsUrl(m_credirecturl)==0)
return 0;
else
return -1;
}
}
}
}
return -1;
}
int cgetwebpage::analypagehead(char *ppagehead,time_t lasttime)
{
char *ppageheadlower=new char[strlen(ppagehead)+1];
memset(ppageheadlower,0,strlen(ppagehead)+1);
strcpy(ppageheadlower,ppagehead);
int i=0;
while(*(ppageheadlower+i)!=0)
{
if(isupper(*(ppageheadlower+i)))
*(ppageheadlower+i)+=32;
i++;
}
char *pstatus=ppageheadlower;
while(*pstatus!=' ')
pstatus++;
pstatus++;
char cstatus[12];
memset(cstatus,0,12);
i=0;
while(*pstatus!=' '&&i<11)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -