📄 templetidentify.cpp
字号:
lLastSign = 0;
}
}
else
{
*p1++=*p++;
lLastSign = 0;
}
}
*p1 = 0;
return 0;
}
long TempletIdentify::TempletCompare(TempletControl *pTemplet,char *Content,char *OutStr,char *pLoopStr,long &lLoopLen)
{
TempletControl *pNode1 = pTemplet;
char *p2 = Content,*p1;
char *p3 = OutStr;
char *p4 = pLoopStr;
long lLoopFlag = 0;
long lFlag;
long lLoopNum = 0;
long HaveLoop = 0;
while(pNode1)
{
if(pNode1->lLoopFlag)
{
HaveLoop = 1;
lLoopFlag = 1;
}
p1 = pNode1->description;
if(*p1)
{
while(*p1)
{
if(*p1 == *p2)
{
p1++;
p2++;
continue;
}
else if (*p2 == ' ' || *p2 == '\t' || *p2 == (char)0x0d || *p2 == (char)0x0a)
{
p2++;
continue;
}
else if(*p1 == ' ' || *p1 == '\t' || *p1 == (char)0x0d || *p1 == (char)0x0a)
{
p1++;
continue;
}
else if(strncmp(p1," ",2) == 0)
{
p1+=2;
continue;
}
else if(strncmp(p2," ",2) == 0)
{
p2+=2;
continue;
}
break;
}
if(*p1)
return -1;
if(*pNode1->pVariable)
{
if(lLoopFlag)
{
*p4++ = '&';
strcpy(p4,pNode1->pVariable);
p4+=strlen(pNode1->pVariable);
*p4++ = '=';
}
else
{
*p3++ = '&';
strcpy(p3,pNode1->pVariable);
p3+=strlen(pNode1->pVariable);
*p3++ = '=';
}
}
//判断是否为下一个
lFlag = 0;
long lTabFlag = 0;
char *pBak = p2;
long lYhFlag1 = 0;
long lYhFlag2 = 0;
if(pNode1->pLoopControl)
long kk = 0;
while(*p2)
{
if(pNode1->pNextControl)
{
if(MyCompareString(p2,pNode1->pNextControl->description,pNode1->pNextControl->lDescriptionLen) == 0)
{
lFlag = 1;
break;
}
}
if(pNode1->pLoopControl)
{
if(MyCompareString(p2,pNode1->pLoopControl->description,pNode1->pLoopControl->lDescriptionLen) == 0)
{
lFlag = 2;
break;
}
}
if(*pNode1->pVariable)
{
if(*p2 == '<')
{
lTabFlag = 1;
p2++;
continue;
}
else if(*p2 == '>')
{
if(lTabFlag)
{
if(lYhFlag1 || lYhFlag2)
{
p2++;
continue;
}
lTabFlag = 0;
p2++;
continue;
}
if(lLoopFlag)
*p4++ = *p2++;
else
*p3++ = *p2++;
}
else if(lTabFlag)
{
if(*p2 == '\'')
{
if(lYhFlag1 == 1)
lYhFlag1 = 0;
else
lYhFlag1 = 1;
}
else if(*p2 == '"')
{
if(lYhFlag2 == 1)
lYhFlag2 = 0;
else
lYhFlag2 = 1;
}
p2++;
}
else
{
if(lLoopFlag)
*p4++ = *p2++;
else
*p3++ = *p2++;
}
}
else
p2++;
}
if(lFlag == 0)
{
if(pNode1->pNextControl || pNode1->pLoopControl)
return -1;
*p4++ = 0;
lLoopLen = p4 - pLoopStr;
*p3++ = 0;
return lLoopNum+HaveLoop;
}
if(lLoopFlag == 1 && pNode1->pLoopControl && lFlag==1)
lLoopFlag = 0;
if(lFlag == 1)
pNode1 = pNode1->pNextControl;
else if(lFlag == 2)
{
pNode1 = pNode1->pLoopControl;
if(p4 != pLoopStr)
*p4++ = 0;
lLoopNum ++;
}
}
else
{
lFlag = 0;
if(pNode1->pNextControl)
{
if(MyCompareString(p2,pNode1->pNextControl->description,pNode1->pNextControl->lDescriptionLen) == 0)
{
pNode1 = pNode1->pNextControl;
lFlag = 1;
}
}
if(lFlag == 0 &&pNode1->pLoopControl)
{
if(MyCompareString(p2,pNode1->pLoopControl->description,pNode1->pLoopControl->lDescriptionLen) == 0)
{
pNode1 = pNode1->pLoopControl;
lFlag = 2;
}
}
if(lFlag == 0)
return -1;
}
}
if(pNode1 || *p2)
return -1;
*p3 = 0;
*p4++ = 0;
lLoopLen = p4 - pLoopStr;
return lLoopNum+HaveLoop;
}
long TempletIdentify::ExcuteProc()
{
WSADATA wsaData;
::WSAStartup(0x0101,&wsaData);
cgetwebpage cget;
pNowPos = pAllBuf;
cget.HTMLFileBuf = pNowPos;
pNowPos += 512<<10;
long lNowIdx = 0;
long i,j,k,m;
char *str1;
char *str2;
char *pBuf;
char *LoopStr1,*LoopBuf;
char Url[300];
char *pPage1,*pPage2;
char *p,*p2,*p3,*p4;
long lRetNum = 0;
long lLoopLen,lLoopLen1;
long kkr = 0,kke = 0;
int flag;
char *pLoop;
long lBreakFlag = 1;
long lMaxj = 1;
long lTmp,lTmp1;
long *lpNowArgIdx = new long[lClientArgNum];
for(i = 0;i<lClientArgNum;i++)
lMaxj*=pClientArg[i].lArgValueNum;
for(;lBreakFlag;)
{
j = 0;
for(;j<lMaxj;j++,Sleep(100))
{
sprintf(Url,"%s\\stop.dat",syspath);
if(access(Url,0) == 0)
{
sprintf(Url,"%s\\indextree.dat",syspath);
pGetTree->SaveTree(Url);
lBreakFlag = 0;
break;
}
pPage1 = pNowPos;
p = pPage1;
//合成下载串,并且下载页面
lTmp = j;
for(i=0;i<lClientArgNum;i++)
{
lTmp1 = lTmp%pClientArg[i].lArgValueNum;
lTmp/=pClientArg[i].lArgValueNum;
strcpy(p,pClientArg[i].argName);
p+=strlen(p);
*p++ = '=';
lpNowArgIdx[i] = lTmp1;
strcpy(p,pClientArg[i].pArgv[lTmp1]);
p+=strlen(p);
*p++='&';
}
*(p-1) = 0;
cget.DoGet(pClientUrl,"",pPage1,1,1,0,1);
if(cget.m_dwRet != 200)
continue;
//页面分析
pPage2 = pPage1+strlen(pPage1)+1;
strcpy(pPage2,cget.HTMLFileBuf);
DelBlank(pPage2);
str1 = pPage2+strlen(pPage2)+1;
LoopStr1 = str1 + (100<<10);
lRetNum = TempletCompare(pControlHead[0],pPage2,str1,LoopStr1,lLoopLen);
if(lRetNum < 0)
{
// printf("error1!\n");
continue;
}
strcpy(pPage2,str1);
str1 = pPage2;
pPage2 = str1 + strlen(str1)+1;
memcpy(pPage2,LoopStr1,lLoopLen);
LoopStr1 = pPage2;
pLoop = LoopStr1;
for(i=0;i<lRetNum;i++)
{
p4 = strstr(pLoop,"URL=");
if(p4 == NULL)
{
pLoop+=strlen(pLoop)+1;
continue;
}
p4+=4;
pGetTree->Find_A_dataNode(p4,flag);
if(flag == 0)
continue;
pGetTree->AddWords(p4,0);
if(strlen(pSecondUrlMode) != 0)
{
p2 = strstr(pSecondUrlMode,"<?URL?>");
if(p2 == NULL)
{
pLoop+=strlen(pLoop)+1;
// printf("can not find <?URL?>\n");
continue;
}
memcpy(Url,pSecondUrlMode,p2-pSecondUrlMode);
p3 = Url+(p2-pSecondUrlMode);
strcpy(p3,p4);
p3+=strlen(p3);
strcpy(p3,p2+7);
}
else
{
strcpy(Url,p4);
}
pLoop+=strlen(pLoop)+1;
cget.DoGet(Url,NULL,0,0,0,0);
if(cget.m_dwRet == 200)
{
//
p = cget.HTMLFileBuf;
DelBlank(p);
pBuf = LoopStr1 + lLoopLen + 1;
LoopBuf = pBuf + (100<<10);
long lRet = TempletCompare(pControlHead[1],p,pBuf,LoopBuf,lLoopLen1);
if(lRet < 0)
{
// printf("error2!\n");
continue;
}
if(lRet == 0) lRet++;
p2 = LoopBuf;
str2 = LoopBuf + lLoopLen1+1;
for(k=0;k<lRet;k++)
{
p3 = str2;
for(m=0;m<lClientArgNum;m++)
{
if(pClientArg[m].pArgvServer[lpNowArgIdx[m]] && *(pClientArg[m].pArgvServer[lpNowArgIdx[m]]))
{
strcpy(p3,pClientArg[m].pArgvServer[lpNowArgIdx[m]]);
p3+=strlen(p3);
*p3++ = '&';
}
}
strcpy(p3,pBuf);
p3+=strlen(pBuf);
*p3++ = '&';
strcpy(p3,p2);
p2+=strlen(p2)+1;
str1 = str2+strlen(str2)+10;
GetFinalStr(str1,str2,pServerArg);
cget.DoGet(pServerUrl,"",str1,1,1,0,1);
/* if(cget.m_dwRet != 200)
{
printf("failure\n");
}
else
printf("success!\n");
printf("%d ------- success!\n%s\n",kkr++,str1);
*/ }
}
}
}
}
delete lpNowArgIdx;
return 0;
}
long TempletIdentify::GetFinalStr(char *outbuf,char *inputbuf,char *rule)
{
char *p1,*p2,*p4,*p5,ch;
p1 = outbuf;
p2 = rule;
char var[300];
while(*p2)
{
p4 = p2;
while(*p2 && *p2 != '&')p2++;
ch = *p2;
*p2 = 0;
sprintf(var,"&%s=",p4);
p5 = strstr(inputbuf,var);
if(p5 != NULL)
{
p5+=strlen(var);
strcpy(p1,p4);
p1+=strlen(p4);
*p1++='=';
while(*p5 && *p5 != '&')
*p1++ = *p5++;
*p1++ = '&';
}
*p2 = ch;
if(*p2)p2++;
}
*p1 = 0;
return 0;
}
long TempletIdentify::MyCompareString(char *str1,char *str2,long lLen)
{
long i;
for(i=0;*str1&&*str2&&i<lLen;i++)
{
if(*str1 == ' ' || *str1 == '\t' || *str1 == (char)0x0d || *str1 == (char)0x0a)
{
str1++;
continue;
}
if(*str2 == ' ' || *str2 == '\t' || *str2 == (char)0x0d || *str2 == (char)0x0a)
{
str2++;
continue;
}
if(Lowcase(*str1) == Lowcase(*str2))
{
str1++;
str2++;
continue;
}
break;
}
if(*str2 == 0 || lLen == i)
return 0;
return -1;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -