📄 cgetwebpage.cpp
字号:
{
*(cstatus+i++)=*pstatus;
pstatus++;
}
if(i==11)
{
delete ppageheadlower;
return 500;
}
m_dwRet=atol(cstatus);
if(m_dwRet!=200)
{
if( m_dwRet >= 300 && m_dwRet < 400 ) //首先检测一下服务器的应答是否为重定向
{
delete ppageheadlower;
return m_dwRet;
}
else if( m_dwRet >=500 )// 服务器错误,可以重试
{
delete ppageheadlower;
return m_dwRet;
}
// 客户端错误,重试无用
else if( m_dwRet >=400 && m_dwRet <500 )
{
delete ppageheadlower;
return m_dwRet;
}
}
char *pfilelen="content-length:";
char *pfilelenplace=strstr(ppageheadlower,pfilelen);
i=0;
if(pfilelenplace==NULL)
FileSize=0;
else
{
pfilelenplace+=strlen(pfilelen);
while(*pfilelenplace==' ')
pfilelenplace++;
char cfilelen[24];
memset(cfilelen,0,24);
while(*pfilelenplace>=48&&*pfilelenplace<=57)
{
*(cfilelen+i++)=*pfilelenplace;
pfilelenplace++;
}
FileSize=atol(cfilelen);
if(FileSize>MAX_INET_BUFFER)
{
FileSize=MAX_INET_BUFFER;
}
}
char *plastmodifyt="last-modified:";
char *pmodifyplace=strstr(ppageheadlower,plastmodifyt);
detecttime(pmodifyplace);
delete ppageheadlower;
if(lasttime>0&&FileTime>0&&FileTime<=lasttime)
return -10;//没有更新
return m_dwRet;
}
char *cgetwebpage::outgetmodheadinfo(char *buffer)
{
char *phead=new char[1560];
memset(phead,0,1560);
char *ptwoenter="\r\n\r\n";
char *p1=strstr(buffer,ptwoenter);
char chtmp;
if(p1&&p1-buffer<1500)
{
p1+=strlen(ptwoenter);
if(*p1==0)
strcpy(phead,buffer);
else
{
chtmp=*p1;
*p1=0;
strcpy(phead,buffer);
*p1=chtmp;
}
}
else
{
delete phead;
phead=NULL;
return NULL;
}
return phead;
}
char * stristrzyy(char *str1,char *str2)
{
if(str1 == NULL)
return NULL;
if(str2 == NULL)
return NULL;
if(str1 == str2)
return str1;
char *p=str1;
int l1=strlen(str1);
int l2=strlen(str2);
while(l1 >=l2 && *p )
{
if(strnicmp(p,str2,l2)==0)
return p;
p++;
l1--;
}
return NULL;
}
int cgetwebpage::getapptype(char *pheader)
{
char *p2enter=strstr(pheader,"\r\n\r\n");
char *ptype="Content-Type";
char *p3=stristrzyy(pheader,ptype);
/* FILE *fp=fopen("d:\\testcontenttype.dat","a+");
if(fp)
{
fwrite(p3,p2enter-p3,1,fp);
fclose(fp);
}
*/
int n1=strlen(ptype);
if(p3)
{
char *papplication="application";
char *ptextplain="text/plain";
char *ptext="text";
p3+=n1;
while(*p3!=':')
p3++;
p3++;
while(*p3==' ')
p3++;
char *p5=stristrzyy(p3,papplication);
if(p5&&p5-p3<3)
return -1;//下载可执行程序
char *p51=stristrzyy(p3,ptextplain);
if(p51&&p51-p3<3)
return -1;//下载图片
char *p4=stristrzyy(p3,ptext);
if(p4-p3==0)
return 0;
else
return -1;
}
return 0;
}
int cgetwebpage::DoGet(char *strU, const char *httpHeaders, const char *body,int PageType,int AcceptType ,time_t fTime,int flag)
{
FileSize=0;
FileTime=0;
m_dwRet=0;
m_nport=80;
memset(m_credirecturl,0,512);
m_dwRet=0;
m_nport=80;
int nurllen=strlen(strU);
memset(m_curlbak,0,512);
strcpy(m_curlbak,strU);
int ret=0;
int nheadlen=0;
int bredirectone=0;
char *pnohosturl;
unsigned short port;
int retval;
SOCKET conn_socket;
char *pnullnohosturl="/";
int nrevlen=0;
int outtime=1000;
m_cookie[0] = 0;
while(1)
{
while(bredirectone<3)
{
char *server_name=getmyhostname(m_curlbak);
if(server_name==NULL||strlen(server_name)>127)
{
return -1;
}
memset(m_chost,0,128);
strcpy(m_chost,server_name);
delete server_name;
server_name=NULL;
pnohosturl=strstr(m_curlbak,m_chost);
pnohosturl+=strlen(m_chost);
while(*pnohosturl&&*(pnohosturl)!='/')
pnohosturl++;
if(*pnohosturl==0)
{
pnohosturl=pnullnohosturl;
}
port = m_nport;
PHOSTENT hp;
if(stricmp(gbufferaddr.curlhost,m_chost)!=0)
{
memset(gbufferaddr.curlhost,0,128);
strcpy(gbufferaddr.curlhost,m_chost);
if (isalpha(m_chost[0]))
{
hp = gethostbyname(m_chost);
if (hp == NULL )
{
m_dwRet=173;
return -1;
}
memset(&(gbufferaddr.serverlast),0,sizeof(sockaddr_in));
memcpy(&(gbufferaddr.serverlast.sin_addr),hp->h_addr,hp->h_length);
gbufferaddr.serverlast.sin_family = hp->h_addrtype;
gbufferaddr.serverlast.sin_port = htons(port);
}
else
{
memset(&(gbufferaddr.serverlast),0,sizeof(sockaddr_in));
gbufferaddr.serverlast.sin_addr.s_addr=inet_addr(m_chost);//m_chost);//可连不可用62.5.170.130:3128
gbufferaddr.serverlast.sin_family =AF_INET;
gbufferaddr.serverlast.sin_port = htons(port);
}
}
memset(chttphead,0,1024);
memset(chttpget,0,1024);
if(flag==0)
{
if(bredirectone>0)
{
sprintf(chttphead,"HEAD %s HTTP/1.0\r\nHost: %s\r\nAccept: text/*\r\nPragma: no-cache\r\nCache-Control: no-cache\r\nUser-Agent: HTML_GET_APP\r\nReferer: %s\r\nAccept-Language: zh-cn\r\nContent-Type: application/x-www-form-urlencoded\r\n\r\n",pnohosturl,m_chost,strU);
sprintf(chttpget,"GET %s HTTP/1.0\r\nHost: %s\r\nAccept: text/*\r\nPragma: no-cache\r\nCache-Control: no-cache\r\nUser-Agent: HTML_GET_APP\r\nReferer: %s\r\nAccept-Language: zh-cn\r\nContent-Type: application/x-www-form-urlencoded\r\n\r\n",pnohosturl,m_chost,strU);
}
else
{
sprintf(chttphead,"HEAD %s HTTP/1.0\r\nHost: %s\r\nAccept: text/*\r\nPragma: no-cache\r\nCache-Control: no-cache\r\nUser-Agent: HTML_GET_APP\r\nAccept-Language: zh-cn\r\nContent-Type: application/x-www-form-urlencoded\r\n\r\n",pnohosturl,m_chost);
sprintf(chttpget,"GET %s HTTP/1.0\r\nHost: %s\r\nAccept: text/*\r\nPragma: no-cache\r\nCache-Control: no-cache\r\nUser-Agent: HTML_GET_APP\r\nAccept-Language: zh-cn\r\nContent-Type: application/x-www-form-urlencoded\r\n\r\n",pnohosturl,m_chost);
}
}
else
{
sprintf(chttphead,"HEAD %s HTTP/1.0\r\nHost: %s\r\nAccept: text/*\r\nPragma: no-cache\r\nCache-Control: no-cache\r\nUser-Agent: HTML_GET_APP\r\nAccept-Language: zh-cn\r\nContent-Type: application/x-www-form-urlencoded\r\n\r\n",pnohosturl,m_chost);
// strcpy(m_cookie,"SrchJob=page=1&where=+FROM+searchjob++WHERE++%%28%%28ind%%5Fid1+%%3E%%3D+3600+and+ind%%5Fid1+%%3C+3700%%29+or+%%28ind%%5Fid2+%%3E%%3D+3600+and+ind%%5Fid2+%%3C+3700%%29+or+%%28ind%%5Fid3+%%3E%%3D+3600+and+ind%%5Fid3+%%3C+3700%%29%%29++and+job%%5Fid+in+%%28Select+job%%5Fid+From+searchjobloc+Where+searchjobloc%%2E.job%%5Fid+%%3D+searchjob%%2E.job%%5Fid++and+loc%%5Fid+in+%%285%%29%%29;JobAgent=; domain=.chinahr.com;path=/;");
if(m_cookie[0])
sprintf(chttpget,"POST %s HTTP/1.0\r\nHost: %s\r\nCookie:%s\r\ncontent-type:application/x-www-form-urlencoded\r\nContent-Length:%d\r\n\r\n%s\r\n",pnohosturl,m_chost,m_cookie,strlen(body),body);
else
sprintf(chttpget,"POST %s HTTP/1.0\r\nHost: %s\r\ncontent-type:application/x-www-form-urlencoded\r\nContent-Length:%d\r\n\r\n%s\r\n",pnohosturl,m_chost,strlen(body),body);
}
int iii=0;
iii++;
if(fTime>0)
{
conn_socket = socket(AF_INET,SOCK_STREAM,0);
if(conn_socket== INVALID_SOCKET)
{
m_dwRet=173;
return -1;
}
setsockopt(conn_socket,SOL_SOCKET,SO_RCVTIMEO,(const char *)&outtime,sizeof(int));
setsockopt(conn_socket,SOL_SOCKET,SO_SNDTIMEO,(const char *)&outtime,sizeof(int));
if(connect(conn_socket,(struct sockaddr*)&(gbufferaddr.serverlast),sizeof(gbufferaddr.serverlast))== SOCKET_ERROR)
{
closesocket(conn_socket);
m_dwRet=173;
return -1;
}
retval=send(conn_socket,chttphead,strlen(chttphead),0);
if (retval == SOCKET_ERROR )
{
closesocket(conn_socket);
m_dwRet=173;
return -1;
}
memset(Bufferhead,0,1024);
retval = recv(conn_socket,Bufferhead,1023,0 );
closesocket(conn_socket);
if (retval == SOCKET_ERROR )
{
m_dwRet=173;
return -1;
}
if (retval==0)
{
m_dwRet=173;
return -1;
}
nheadlen=strlen(Bufferhead);
ret=analypagehead(Bufferhead,fTime);
if(ret==-10)//没有更新
{
FileSize=0;
m_dwRet=-10;
return -1;
}
else if(ret!=200)
{
if(ret>=300&&ret<306)
{
if(getredirectfun(Bufferhead)!=0)
{
return -1;
}
else
{
bredirectone+=1;
strcpy(m_curlbak,m_credirecturl);
m_cookie[0] = 0;
char *p=NULL,*p1 = NULL;
long lLen=0;
p = Buffer;
while(p = strstr(p,"Set-Cookie:"))
{
p1 = p+11;
p=p1;
while(*p==' ')p++;
p1 = p;
while(*p && *p !=0x0d&&*p!=0x0a)
p++;
memcpy(&m_cookie[lLen],p1,p-p1);
lLen+=p-p1;
if(m_cookie[lLen-1]!=';')
m_cookie[lLen++]=';';
}
if(lLen>0&&m_cookie[lLen-1]==';')
m_cookie[lLen-1]=0;
else
m_cookie[lLen] = 0;
continue;
}
}
else
{
return -1;
}
}
else
{
break;
}
}
else
break;
}
if(FileSize>=MAX_INET_BUFFER)
{
return -1;
}
if(bredirectone>=3)
{
m_dwRet=173;
return -1;
}
FileSize=0;
//以下是取得全文
conn_socket = socket(AF_INET,SOCK_STREAM,0);
if(conn_socket==INVALID_SOCKET )
{
m_dwRet=173;
return -1;
}
setsockopt(conn_socket,SOL_SOCKET,SO_RCVTIMEO,(const char *)&outtime,sizeof(int));
setsockopt(conn_socket,SOL_SOCKET,SO_SNDTIMEO,(const char *)&outtime,sizeof(int));
if (connect(conn_socket,(struct sockaddr*)&gbufferaddr.serverlast,sizeof(sockaddr_in))== SOCKET_ERROR)
{
closesocket(conn_socket);
m_dwRet=173;
return -1;
}
retval=send(conn_socket,chttpget,strlen(chttpget),0);
if (retval == SOCKET_ERROR )
{
closesocket(conn_socket);
m_dwRet=173;
return -1;
}
memset(Buffer,0,DYNBUFFER);
nrevlen=DYNBUFFER-1;
retval = recv(conn_socket,Buffer,nrevlen,0 );
if(getapptype(Buffer)!=0)
{
closesocket(conn_socket);
m_dwRet=173;
return -1;
}
if (retval == SOCKET_ERROR )
{
closesocket(conn_socket);
m_dwRet=173;
return -1;
}
if (retval == 0)
{
closesocket(conn_socket);
m_dwRet=173;
return -1;
}
//增加第一遍文件返回信息处理方式。
if(fTime==0)
{
char *pgetmodhead=outgetmodheadinfo(Buffer);
if(pgetmodhead==NULL)
{
closesocket(conn_socket);
m_dwRet=173;
return-1;
}
nheadlen=strlen(pgetmodhead);
ret=analypagehead(pgetmodhead,fTime);
delete pgetmodhead;
pgetmodhead=NULL;
if(ret!=200)
{
if(ret>=300&&ret<306)
{
if(getredirectfun(Buffer)!=0)
{
closesocket(conn_socket);
m_dwRet=173;
return -1;
}
else
{
bredirectone+=1;
strcpy(m_curlbak,m_credirecturl);
closesocket(conn_socket);
m_cookie[0] = 0;
char *p=NULL,*p1 = NULL;
long lLen=0;
p = Buffer;
while(p = strstr(p,"Set-Cookie:"))
{
p1 = p+11;
p=p1;
while(*p==' ')p++;
p1 = p;
while(*p && *p !=0x0d&&*p!=0x0a)
p++;
memcpy(&m_cookie[lLen],p1,p-p1);
lLen+=p-p1;
if(m_cookie[lLen-1]!=';')
m_cookie[lLen++]=';';
}
if(lLen>0&&m_cookie[lLen-1]==';')
m_cookie[lLen-1]=0;
else
m_cookie[lLen] = 0;
continue;
}
}
else
{
closesocket(conn_socket);
m_dwRet=173;
return -1;
}
}
else
{
if(FileSize>=MAX_INET_BUFFER)
{
closesocket(conn_socket);
return -1;
}
}
}
break;
}
//
int kkzero=strlen(Buffer);
if(kkzero<(retval-1))
{
while(kkzero<(retval-1)&&*(Buffer+kkzero)==0)
{
*(Buffer+kkzero)=32;
kkzero++;
}
}
int tmp=strlen(Buffer);
char *pouthead;
if(nheadlen>0)
pouthead=Buffer+nheadlen;
else
pouthead=Buffer;
int ncontlen=strlen(pouthead);
strcpy(HTMLFileBuf,pouthead);
while(retval>0)
{
memset(Buffer,0,DYNBUFFER);
retval = recv(conn_socket,Buffer,nrevlen,0 );
if (retval == SOCKET_ERROR )
{
closesocket(conn_socket);
m_dwRet=173;
return -1;
}
if (retval == 0)
{
break;
}
kkzero=strlen(Buffer);
if(kkzero<retval)
{
while(kkzero<retval&&*(Buffer+kkzero)==0)
{
*(Buffer+kkzero)=32;
kkzero++;
}
}
char *ppagenowplace=HTMLFileBuf+ncontlen;
ncontlen+=strlen(Buffer);
if(ncontlen>=MAX_INET_BUFFER)
{
ncontlen=0;
break;
}
strcpy(ppagenowplace,Buffer);
}
FileSize=ncontlen;
closesocket(conn_socket);
return m_dwRet;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -