⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 templetidentify.cpp

📁 可以进行膜板定制的动态网页下载分析的源程序
💻 CPP
📖 第 1 页 / 共 2 页
字号:
				lLastSign = 0;
			}
		}
		else
		{
			*p1++=*p++;
			lLastSign = 0;
		}
	}
	*p1 = 0;
	return 0;
}
long TempletIdentify::TempletCompare(TempletControl *pTemplet,char *Content,char *OutStr,char *pLoopStr,long &lLoopLen)
{
	TempletControl *pNode1 = pTemplet;
	char *p2 = Content,*p1;	
	char *p3 = OutStr;
	char *p4 = pLoopStr;
	long lLoopFlag = 0;
	long lFlag;
	long lLoopNum = 0;
	long HaveLoop = 0;
	while(pNode1)
	{
		if(pNode1->lLoopFlag)
		{
			HaveLoop = 1;
			lLoopFlag = 1;
		}
		p1 = pNode1->description;
		if(*p1)
		{
			while(*p1)
			{
				if(*p1 == *p2)
				{
					p1++;
					p2++;
					continue;
				}
				else if (*p2 == ' ' || *p2 == '\t' || *p2 == (char)0x0d || *p2 == (char)0x0a)
				{
					p2++;
					continue;
				}
				else if(*p1 == ' ' || *p1 == '\t' || *p1 == (char)0x0d || *p1 == (char)0x0a)
				{
					p1++;
					continue;
				}
				else if(strncmp(p1," ",2) == 0)
				{
					p1+=2;
					continue;
				}
				else if(strncmp(p2," ",2) == 0)
				{
					p2+=2;
					continue;
				}
				break;
			}
			if(*p1)
				return -1;
			if(*pNode1->pVariable)
			{
				if(lLoopFlag)
				{
					*p4++ = '&';
					strcpy(p4,pNode1->pVariable);
					p4+=strlen(pNode1->pVariable);
					*p4++ = '=';
				}
				else
				{
					*p3++ = '&';
					strcpy(p3,pNode1->pVariable);
					p3+=strlen(pNode1->pVariable);
					*p3++ = '=';
				}
			}
			//判断是否为下一个
			lFlag = 0;
			long lTabFlag = 0;
			char *pBak = p2;
			long lYhFlag1 = 0;
			long lYhFlag2 = 0;
			if(pNode1->pLoopControl)
				long kk = 0;
			while(*p2)
			{
				if(pNode1->pNextControl)
				{
					if(MyCompareString(p2,pNode1->pNextControl->description,pNode1->pNextControl->lDescriptionLen) == 0)
					{
						lFlag = 1;
						break;
					}
				}
				if(pNode1->pLoopControl)
				{
					if(MyCompareString(p2,pNode1->pLoopControl->description,pNode1->pLoopControl->lDescriptionLen) == 0)
					{
						lFlag = 2;
						break;
					}
				}
				if(*pNode1->pVariable)
				{
					if(*p2 == '<')
					{
						lTabFlag = 1;
						p2++;
						continue;
					}
					else if(*p2 == '>')
					{
						if(lTabFlag)
						{
							if(lYhFlag1 || lYhFlag2)
							{
								p2++;
								continue;
							}
							lTabFlag = 0;
							p2++;
							continue;
						}
						if(lLoopFlag)
							*p4++ = *p2++;
						else
							*p3++ = *p2++;
					}
					else if(lTabFlag)
					{
						if(*p2 == '\'')
						{
							if(lYhFlag1 == 1)
								lYhFlag1 = 0;
							else
								lYhFlag1 = 1;
						}
						else if(*p2 == '"')
						{
							if(lYhFlag2 == 1)
								lYhFlag2 = 0;
							else
								lYhFlag2 = 1;
						}
						p2++;
					}
					else
					{
						if(lLoopFlag)
							*p4++ = *p2++;
						else
							*p3++ = *p2++;
					}
				}
				else
					p2++;
			}
			if(lFlag == 0)
			{
				if(pNode1->pNextControl || pNode1->pLoopControl)
					return -1;
				*p4++ = 0;
				lLoopLen = p4 - pLoopStr;
				*p3++ = 0;
				return lLoopNum+HaveLoop;
			}
			if(lLoopFlag == 1 && pNode1->pLoopControl && lFlag==1)
				lLoopFlag = 0;
			if(lFlag == 1)
				pNode1 = pNode1->pNextControl;
			else if(lFlag == 2)
			{
				pNode1 = pNode1->pLoopControl;
				if(p4 != pLoopStr)
					*p4++ = 0;
				lLoopNum ++;
			}
		}
		else
		{
			lFlag = 0;
			if(pNode1->pNextControl)
			{
				if(MyCompareString(p2,pNode1->pNextControl->description,pNode1->pNextControl->lDescriptionLen) == 0)
				{
					pNode1 = pNode1->pNextControl;
					lFlag = 1;
				}			
			}
			if(lFlag == 0 &&pNode1->pLoopControl)
			{
				if(MyCompareString(p2,pNode1->pLoopControl->description,pNode1->pLoopControl->lDescriptionLen) == 0)
				{
					pNode1 = pNode1->pLoopControl;
					lFlag = 2;
				}
			}
			if(lFlag == 0)
				return -1;
		}
	}
	if(pNode1 || *p2)
		return -1;
	*p3 = 0;
	*p4++ = 0;
	lLoopLen = p4 - pLoopStr;
	return lLoopNum+HaveLoop;
}
long TempletIdentify::ExcuteProc()
{
	WSADATA wsaData;
	::WSAStartup(0x0101,&wsaData);
	cgetwebpage cget;
	pNowPos = pAllBuf;
	cget.HTMLFileBuf = pNowPos;
	pNowPos += 512<<10;
	long lNowIdx = 0;
	long i,j,k,m;
	char *str1;
	char *str2;
	char *pBuf;
	char *LoopStr1,*LoopBuf;
	char Url[300];
	char *pPage1,*pPage2;
	char *p,*p2,*p3,*p4;
	long lRetNum = 0;
	long lLoopLen,lLoopLen1;
	long kkr = 0,kke = 0;
	int flag;
	char *pLoop;
	long lBreakFlag = 1;
	long lMaxj = 1;
	long lTmp,lTmp1;
	long *lpNowArgIdx = new long[lClientArgNum];
	for(i = 0;i<lClientArgNum;i++)
		lMaxj*=pClientArg[i].lArgValueNum;
	for(;lBreakFlag;)
	{
		j = 0;
		for(;j<lMaxj;j++,Sleep(100))
		{
			sprintf(Url,"%s\\stop.dat",syspath);
			if(access(Url,0) == 0)
			{
				sprintf(Url,"%s\\indextree.dat",syspath);
				pGetTree->SaveTree(Url);
				lBreakFlag = 0;
				break;
			}
			pPage1 = pNowPos;
			p = pPage1;
			//合成下载串,并且下载页面
			lTmp = j;

			for(i=0;i<lClientArgNum;i++)
			{
				lTmp1 = lTmp%pClientArg[i].lArgValueNum;
				lTmp/=pClientArg[i].lArgValueNum;
				strcpy(p,pClientArg[i].argName);
				p+=strlen(p);
				*p++ = '=';
				lpNowArgIdx[i] = lTmp1;
				strcpy(p,pClientArg[i].pArgv[lTmp1]);
				p+=strlen(p);
				*p++='&';
			}
			*(p-1) = 0;
			cget.DoGet(pClientUrl,"",pPage1,1,1,0,1);
			if(cget.m_dwRet != 200)
				continue;				
			//页面分析
			pPage2 = pPage1+strlen(pPage1)+1;
			strcpy(pPage2,cget.HTMLFileBuf);
			DelBlank(pPage2);
			str1 = pPage2+strlen(pPage2)+1;
			LoopStr1 = str1 + (100<<10);
			lRetNum = TempletCompare(pControlHead[0],pPage2,str1,LoopStr1,lLoopLen);
			if(lRetNum < 0)
			{
//				printf("error1!\n");
				continue;
			}
			strcpy(pPage2,str1);
			str1 = pPage2;
			pPage2 = str1 + strlen(str1)+1;
			memcpy(pPage2,LoopStr1,lLoopLen);
			LoopStr1 = pPage2;
			pLoop = LoopStr1;
			for(i=0;i<lRetNum;i++)
			{
				p4 = strstr(pLoop,"URL=");
				if(p4 == NULL)
				{
					pLoop+=strlen(pLoop)+1;
					continue;
				}
				p4+=4;
				pGetTree->Find_A_dataNode(p4,flag);
				if(flag == 0)
					continue;
				pGetTree->AddWords(p4,0);
				if(strlen(pSecondUrlMode) != 0)
				{
					p2 = strstr(pSecondUrlMode,"<?URL?>");
					if(p2 == NULL)
					{
						pLoop+=strlen(pLoop)+1;
//						printf("can not find <?URL?>\n");
						continue;
					}
					memcpy(Url,pSecondUrlMode,p2-pSecondUrlMode);
					p3 = Url+(p2-pSecondUrlMode);
					strcpy(p3,p4);
					p3+=strlen(p3);
					strcpy(p3,p2+7);
				}
				else
				{
					strcpy(Url,p4);
				}
				pLoop+=strlen(pLoop)+1;
				cget.DoGet(Url,NULL,0,0,0,0);
				if(cget.m_dwRet == 200)
				{
					//
					p = cget.HTMLFileBuf;
					DelBlank(p);
					pBuf = LoopStr1 + lLoopLen + 1;
					LoopBuf = pBuf + (100<<10);
					long lRet = TempletCompare(pControlHead[1],p,pBuf,LoopBuf,lLoopLen1);
					if(lRet < 0)
					{
//						printf("error2!\n");
						continue;
					}
					if(lRet == 0) lRet++;
					p2 = LoopBuf;
					str2 = LoopBuf + lLoopLen1+1;
					for(k=0;k<lRet;k++)
					{
						p3 = str2;
						for(m=0;m<lClientArgNum;m++)
						{
							if(pClientArg[m].pArgvServer[lpNowArgIdx[m]] && *(pClientArg[m].pArgvServer[lpNowArgIdx[m]]))
							{
								strcpy(p3,pClientArg[m].pArgvServer[lpNowArgIdx[m]]);
								p3+=strlen(p3);
								*p3++ = '&';
							}
						}
						strcpy(p3,pBuf);
						p3+=strlen(pBuf);
						*p3++ = '&';
						strcpy(p3,p2);
						p2+=strlen(p2)+1;
						str1 = str2+strlen(str2)+10;
						GetFinalStr(str1,str2,pServerArg);
						cget.DoGet(pServerUrl,"",str1,1,1,0,1);
/*						if(cget.m_dwRet != 200)
						{
							printf("failure\n");
						}
						else
							printf("success!\n");

						printf("%d ------- success!\n%s\n",kkr++,str1);
*/					}
				}
			}
		}
	}
	delete lpNowArgIdx;
	return 0;
}
long TempletIdentify::GetFinalStr(char *outbuf,char *inputbuf,char *rule)
{
	char *p1,*p2,*p4,*p5,ch;
	p1 = outbuf;
	p2 = rule;
	char var[300];
	while(*p2)
	{
		p4 = p2;
		while(*p2 && *p2 != '&')p2++;
		ch = *p2;
		*p2 = 0;
		sprintf(var,"&%s=",p4);
		p5 = strstr(inputbuf,var);
		if(p5 != NULL)
		{
			p5+=strlen(var);

			strcpy(p1,p4);
			p1+=strlen(p4);
			*p1++='=';
			while(*p5 && *p5 != '&')
				*p1++ = *p5++;
			*p1++ = '&';
		}
		*p2 = ch;
		if(*p2)p2++;
	}
	*p1 = 0;
	return 0;
}
long TempletIdentify::MyCompareString(char *str1,char *str2,long lLen)
{
	long i;
	for(i=0;*str1&&*str2&&i<lLen;i++)
	{
		if(*str1 == ' ' || *str1 == '\t' || *str1 == (char)0x0d || *str1 == (char)0x0a)
		{
			str1++;
			continue;
		}
		if(*str2 == ' ' || *str2 == '\t' || *str2 == (char)0x0d || *str2 == (char)0x0a)
		{
			str2++;
			continue;
		}
		if(Lowcase(*str1) == Lowcase(*str2))
		{
			str1++;
			str2++;
			continue;
		}
		break;
	}
	if(*str2 == 0 || lLen == i)
		return 0;
	return -1;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -