⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 utily.cpp

📁 VC下的INTERNET的建立
💻 CPP
字号:

// Utily.cpp : implementation 
//
/****************************************************************
Pre-emptive Multithreading Web Spider
Copyright (c) 1998 by Sim Ayers.
**************************************************************/

#include "stdafx.h"
#include "Spider.h"
#include <string.h>
#include "utily.h"


BOOL FindHref(LPCTSTR lpszFind,LPCTSTR lpszString,CString& lpszResult,UINT nLen)
{
	// start the search at nStart
	LPCTSTR lpsz = lpszFind;

		// single-byte string search
		UINT nCompare = nLen;
	int k=0;
	int nLenFind = lstrlen(lpszString);
	lpszResult ="";
		while (nCompare > 0)
		{

			LPSTR lpch = (LPSTR)(lpsz + nLenFind);
			char chSave = *lpch;
			*lpch = '\0';
			int nResult = lstrcmpi(lpsz, lpszString);
			*lpch = chSave;
			if (nResult == 0)
			{
				while(*lpsz != '\0')
				{
					if(*lpsz =='"')
					{
						lpsz++;	
						k++;
						int j=0;
						while(*lpsz != '"')
						{
							lpszResult += lpszFind[k+j];
							lpsz++;	
							j++;

						}
				

						return TRUE;

					}
					lpsz++;	
					k++;
				}

				return TRUE;
			}

			// restore character at end of search
			*lpch = chSave;

			// move on to next substring
			nCompare--;
			lpsz ++;
			k++;
		}

	return FALSE;
}



BOOL GetHref(LPCSTR szBuffer,LPCSTR szfind, CStringList& list)

{			
	BOOL InTag=FALSE;
	CStringList RList;
	int i = 0,j=0;
	POSITION pos = NULL;
	int nLen = strlen(szBuffer);
	LPCTSTR lpsz = szBuffer;
	if( nLen <= 0) return FALSE;

	CString str,hString;
	str="";
	while (nLen)
	{
		if( *lpsz == '<')	InTag=TRUE;
	
		if(InTag==TRUE)		str += szBuffer[j];
		
		if( *lpsz == '>')  
		{
			RList.AddTail(str);
			str ="";
			InTag=FALSE;
		}
	
		++lpsz;
		j++;
		nLen--;
		
	}

	int count = RList.GetCount();
	char* pdest;

	for(i=0; i<count; i++)
	{
		if( ( pos = RList.FindIndex( i)) != NULL )
		{
			str = RList.GetAt( pos );
			
			pdest = strstr( str, _T("NOINDEX")); //for robot exclusion
			if( pdest != NULL )	return FALSE;

			pdest = strstr( str, _T("noindex"));
			if( pdest != NULL )	return FALSE;

			if(FindHref((LPCTSTR) str,szfind,hString,str.GetLength()))
			{
			pdest = strstr( hString, _T("mailto:"));
			if( pdest == NULL )	list.AddTail(hString);
			}
			else
			{
				if(FindHref((LPCTSTR) str,_T("option"),hString,str.GetLength()))
				{
				pdest = strstr( hString, _T("http"));
				if( pdest != NULL )	list.AddTail(hString);
				}
			}

		}
	}

	count = list.GetCount();
	if(count>0) return TRUE;

	return FALSE;
}

BOOL GetEmail(LPCSTR szBuffer,LPCSTR szfind, CStringList& list)

{			
	BOOL InTag=FALSE;
	CStringList RList;
	int i = 0,j=0;
	POSITION pos = NULL;
	int nLen = strlen(szBuffer);
	LPCTSTR lpsz = szBuffer;
	if( nLen <= 0) return FALSE;

	CString str,hString;
	str="";
	while (nLen)
	{
		if( *lpsz == '<')	InTag=TRUE;
	
		if(InTag==TRUE)		str += szBuffer[j];
		
		if( *lpsz == '>')  
		{
			RList.AddTail(str);
			str ="";
			InTag=FALSE;
		}
	
		++lpsz;
		j++;
		nLen--;
		
	}

	int count = RList.GetCount();
	char* pdest;
	int p;
	for(i=0; i<count; i++)
	{
		if( ( pos = RList.FindIndex( i)) != NULL )
		{
			str = RList.GetAt( pos );
			
			pdest = strstr( str, _T("NOINDEX")); //for robot exclusion
			if( pdest != NULL )	return FALSE;

			pdest = strstr( str, _T("noindex"));
			if( pdest != NULL )	return FALSE;

			if(FindHref((LPCTSTR) str,szfind,hString,str.GetLength()))
			{
				pdest = strstr( hString, _T("mailto:"));
				if( pdest != NULL )
				{
					p = hString.Find("mailto:");
					if(p >= 0)
					{
						str = hString.Mid(p+7);
						list.AddTail(str);
					}
				}
			}


		}
	}

	count = list.GetCount();
	if(count>0) return TRUE;

	return FALSE;
}

BOOL GetHTMLTags(LPCSTR szBuffer, CStringList& list)

{			
	BOOL InTag=FALSE;
	int j=0;
	int nLen = strlen(szBuffer);
	LPCTSTR lpsz = szBuffer;
	if( nLen <= 0) return FALSE;

	CString str;
	str="";
	while (nLen)
	{
		if( *lpsz == '<')	InTag=TRUE;
	
		if(InTag==TRUE  )	str += szBuffer[j];
		
		if( *lpsz == '>')  
		{
			list.AddTail(str);
			str ="";
			InTag=FALSE;
		}
	
		++lpsz;
		j++;
		nLen--;
		
	}


	int count = list.GetCount();
	if(count>0) return TRUE;

	return FALSE;
}

BOOL GetHTMLText(LPCSTR szBuffer, CStringList& list)

{			
	BOOL InTag=FALSE;
	int j=0;
	int nLen = strlen(szBuffer);
	LPCTSTR lpsz = szBuffer;
	if( nLen <= 0) return FALSE;

	CString str;
	str="";
	while (nLen)
	{
		if( *lpsz == '<')	InTag=FALSE;
	
		if(InTag==TRUE  && *lpsz != '>')		str += szBuffer[j];
		
		if( *lpsz == '>')  
		{
			list.AddTail(str);
			str ="";

			InTag=TRUE;
		}
	
		++lpsz;
		j++;
		nLen--;
		
	}


	int count = list.GetCount();
	if(count>0) return TRUE;

	return FALSE;

}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -