⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 thread.cpp

📁 VC下的INTERNET的建立
💻 CPP
📖 第 1 页 / 共 2 页
字号:
	{
		// catch errors from WinINet
		dwRet = pEx->m_dwError;
		m_pServer = NULL;
		pEx->Delete();
		CleanUp();
		return   dwRet;
	}

	m_strCurrentServer = ServerName;

	try
	{

		// This can never return NULL. The call may have failed, but it can
		// never be null.
		m_pFile = m_pServer->OpenRequest(_T("GET"),strObject,NULL, m_nThreadID, NULL, NULL, dwHttpRequestFlags);
		m_pFile->SendRequest();

	}

	catch (CInternetException* pEx)
	{
		// catch errors from WinINet
		dwRet = pEx->m_dwError;
		m_pFile = NULL;
		pEx->Delete();
		CleanUp();
		return dwRet;
	}

	return dwRet;
}


DWORD CSpiderThread::GetHttpStatus(LPCTSTR lpServerName,LPCTSTR strObject)
{

	if(lpServerName  == NULL || strObject == NULL)return 0;

	INTERNET_PORT nPort = INTERNET_DEFAULT_HTTP_PORT;
	DWORD dwRet =  1;

	if (m_pSession == NULL) return dwRet;

	m_pSession->m_strHttpSite.Format("%s%s",lpServerName,strObject);

	dwRet =  2;

	if(m_pFile != NULL)
	{
		m_pFile->Close();
		delete m_pFile;
		m_pFile=NULL;

	}
	
	if (m_bDone)	return 0;

	if (m_strCurrentServer != lpServerName)
	{
		// Picked a new server, close out connection and make a new one:
		if (m_pServer != NULL)
		{
			m_pServer->Close ();
			delete m_pServer;
			m_pServer = NULL;
		}
	}

	if (m_pServer == NULL)
	{
		try
		{

			m_pServer = m_pSession->GetHttpConnection(lpServerName,nPort);
		}

		catch (CInternetException* pEx)
		{
			// catch errors from WinINet
			//pEx->ReportError();
			dwRet = pEx->m_dwError;
			m_pServer = NULL;
			pEx->Delete();
			if(!CleanUp()) return FALSE;
			return dwRet;
		}
		if (m_bDone)	return 0;


		m_strCurrentServer = lpServerName;
	}


	if (m_bDone)
		return 0;
	if (m_pServer == NULL) return dwRet;
  
	try
	{

	// This can never return NULL. The call may have failed, but it can
	// never be null.
	m_pFile = m_pServer->OpenRequest(_T("GET"),strObject,NULL, m_nThreadID, NULL, NULL, dwHttpRequestFlags);
	m_pFile->SendRequest();
	
	}

	catch (CInternetException* pEx)
	{
		// catch errors from WinINet
		//pEx->ReportError();
		dwRet = pEx->m_dwError;
		m_pFile = NULL;
		pEx->Delete();
		if(!CleanUp()) return FALSE;
		if(dwRet == ERROR_INTERNET_TIMEOUT ) Sleep(1000);  // Connection timed out, try again on new connection
		dwRet = NewConnection(lpServerName,strObject);
		if(dwRet != HTTP_STATUS_OK) return dwRet;
	}

	if (m_bDone)	return 0;

	if(m_pFile != NULL)
		m_pFile->QueryInfoStatusCode(dwRet);

  return dwRet;


}

BOOL CSpiderThread::CheckAllURLs(LPCTSTR ServerName,ThreadParams *pThreadParams)

{    
	if(ServerName == NULL) return FALSE;
	if(pThreadParams->m_pszURL.IsEmpty()) return FALSE;

	CString strMainURL = pThreadParams->m_pszURL;

	CStringList list;
	POSITION pos=NULL;
	CString strObject,strSub;
	CString strServer,strURL,strTemp;
	DWORD dwServiceType;
	INTERNET_PORT nPort;
	LPCTSTR lpszText =     pThreadParams->m_Contents.LockBuffer();
	if(lpszText == NULL) return FALSE;

	if(!GetHref(lpszText,_T("href"),list))
		return FALSE;

	pThreadParams->m_Contents.UnlockBuffer();
	if (m_bDone)
		return 0;


	int count = GetServerList(pThreadParams->m_pszURL,list,strSub);
	
	int pdest;
	int i;
	
	CriticalSectionLock  plock;
	
	
	lURLCount += count;
	
	::SendMessage(pThreadParams->m_hwndNotifyProgress,
				WM_USER_URL_STATUS, 0, (LONG)lURLCount);
	
	if (m_bDone)	return 0;

	if(count>0)
	{


		for(i=0; i<count; i++)		
		{
			if( ( pos = list.FindIndex( i)) != NULL )
			{
				strObject = list.GetAt( pos );

				pdest = strObject.Find(ServerName);   // external link check
				
				if( pdest < 0 )
				    GetStatus(pThreadParams,HTTP_CHECK_URL,strObject);		// create new threads for each URL
				
				if (m_bDone)	return 0;
				if(lThreadCount >= MAXIMUM_WAIT_OBJECTS) 
						WaitForSingleObject(hConnection,INFINITE);
			
								
			}
		}

		for(i=0; i<count; i++)
		{
			if( ( pos = list.FindIndex( i)) != NULL )
			{
				strObject = list.GetAt( pos );

				pdest = strObject.Find(ServerName);  
				
				if( pdest > 0 )
				{
					if(AfxParseURL(strObject,dwServiceType,strServer,strURL,nPort))
					{
						if(!strURL.IsEmpty())
							CheckURL(ServerName,strURL,pThreadParams);  // check root links in the current file
					}
				}

				if (m_bDone)	return 0;

			}
		}

		if(pThreadParams->m_RootLinks)
		{
			for(i=0; i<count; i++)
			{
				if( ( pos = list.FindIndex( i)) != NULL )
				{
					strObject = list.GetAt( pos );
					pdest = strObject.Find(ServerName);  // get root files and check the links in those files 
					if ( pdest > 0)
					{
						pdest = strObject.Find(strSub);

						if(( pdest > 0 ) && (strMainURL.Compare(strObject)!=0))
							GetStatus(pThreadParams,HTTP_CHECK_URL_ROOT,strObject);  // create new thread

					}
					
					if (m_bDone)	return 0;

					if(lThreadCount >= MAXIMUM_WAIT_OBJECTS) 
						WaitForSingleObject(hConnection,INFINITE);
					

				}
			}
		}

	}

	return TRUE;
}




BOOL CSpiderThread::PrintLine(ThreadParams *pThreadParams,LPCSTR line)
{
	pThreadParams->m_string = line;
	::SendMessage(pThreadParams->m_hwndNotifyProgress,
				WM_USER_THREAD_PRINT, 0, (LPARAM)pThreadParams);

	return TRUE;
}

BOOL CSpiderThread::PrintFile(ThreadParams *pThreadParams,
							LPCSTR line)
{
	::SendMessage(pThreadParams->m_hwndNotifyProgress,
				WM_USER_THREAD_FILE, 0, (LPARAM)line);

	return TRUE;
}
BOOL CSpiderThread::PrintStatus(ThreadParams *pThreadParams,
							LPCSTR line)
{
	::SendMessage(pThreadParams->m_hwndNotifyProgress,
				WM_USER_THREAD_STATUS, 0, (LPARAM)line);

	return TRUE;
}

BOOL CSpiderThread::GetStatus(ThreadParams *pThreadParams,UINT ntype,LPCSTR line)
{
	pThreadParams->m_checkURLName.Format("%s",(LPCSTR)line);
	::SendMessage(pThreadParams->m_hwndNotifyProgress,
				WM_USER_THREAD_GETSTATUS, (UINT)ntype, (LPARAM)pThreadParams);

	return TRUE;
}

BOOL CSpiderThread::GetNewFile(ThreadParams *pThreadParams,UINT ntype,LPCSTR line)
{
	pThreadParams->m_checkURLName.Format("%s",(LPCSTR)line);
	::SendMessage(pThreadParams->m_hwndNotifyProgress,
				WM_USER_THREAD_GETNEWFILE, (UINT)ntype, (LPARAM)pThreadParams);

	return TRUE;
}

BOOL CSpiderThread::CleanUp()
{
	m_strCurrentServer.Empty();

	try
	{
		if(m_pFile != NULL)
		{	m_pFile->Close();
			delete m_pFile;
			m_pFile= NULL;
		}

		

		if (m_pServer!= NULL)
		{
			m_pServer->Close();
			delete m_pServer;
			m_pServer = NULL;
		
		}


		if (m_pSession != NULL)
		{
			m_pSession->Close();
			delete m_pSession;
			m_pSession = NULL;
		}
		
	}
	catch (CInternetException* pEx)
	{
		// catch errors from WinINet
		pEx->Delete();
		return FALSE;
	}
	
	if (m_bDone)	return 0;

	return TRUE;


}


BOOL CSpiderThread::ParseURL(ThreadParams *pThreadParams)
{
	BOOL bRet = FALSE;
	if(pThreadParams->m_pszURL.IsEmpty()) return bRet;

	LPCTSTR lpsz = pThreadParams->m_pszURL;
	if(lpsz  == NULL) return bRet;

	int nLen = lstrlen(lpsz);
	int i=0;
	while (nLen)
	{
		if( *lpsz == '/')	i++;
		++lpsz;
		nLen--;
		
	}
	if( i< 3) pThreadParams->m_pszURL += "/";


	try
	{
		AfxParseURL(pThreadParams->m_pszURL,pThreadParams->m_dwServiceType,pThreadParams->m_strServerName,pThreadParams->m_strObject,pThreadParams->m_nPort);

	}
	catch (CInternetException* pEx)
	{
		// catch errors from WinINet
		//pEx->ReportError();
		pEx->Delete();
		return bRet;
	}

	
	lpsz = pThreadParams->m_strObject;
	if(lpsz  == NULL) return bRet;
	bRet = TRUE;
	nLen = lstrlen(lpsz);

	BOOL bdot = FALSE;
	while (nLen)
	{
		if( *lpsz == '.') bdot = TRUE;
		++lpsz;
		nLen--;
		
	}
	if(pThreadParams->m_strObject.GetLength()-1 > 0)
	{
	if( bdot == FALSE && pThreadParams->m_strObject[pThreadParams->m_strObject.GetLength()-1] != '/')
		pThreadParams->m_strObject += "/";
	}

	return bRet;
}



int CSpiderThread::GetServerList(LPCTSTR pszURL,CStringList& list, CString& lpSub)
{
	if(pszURL  == NULL) return FALSE;

	CStringList Tlist;
	CString strObject,strTemp,str="";
	CString strSub,strServer,strURL,strServerName;
	POSITION pos=NULL;
	POSITION Tpos=NULL;
	LPCTSTR lpsz;
	int nLen;
	int count = 0;
	int pdest2;

	count = list.GetCount();
	int Tcount,i,ti,pdest,j,k;

	for(i=0; i<count; i++)
	{
		if( ( pos = list.FindIndex( i)) != NULL )
		{
			strObject = list.GetAt( pos );
			if(strObject.GetLength() -1 > 0)
			{
				k=0; j= 0;
				lpsz = strObject;
				if(lpsz  != NULL)
				{
					nLen = lstrlen(lpsz);
					while (nLen)
					{
						if( *lpsz == ':') k=j;
						++lpsz;
						nLen--;
						j++;
						
					}
				}
				if(k > 6)
				{
					if(strObject[k-4] == 'h' )		
					{
						strURL =  strObject.Right(strObject.GetLength() - k+4 );
						strObject = strURL;
					}
				}
				pdest = strObject.FindOneOf("# %");     // don't check   /index.html#new
				pdest2 = strObject.Find("ftp:");     // don't check   ftp://somefile.zip

				if(pdest<0 &&  pdest2 < 0  &&  strObject[0] != '.')   // don't check      ../somedirectory
				{
					Tcount = Tlist.GetCount();
					for(ti=0; ti<Tcount; ti++)
					{
						if( ( Tpos = Tlist.FindIndex( ti)) != NULL ) // remove duplicate URLs
						{
							strTemp = Tlist.GetAt( Tpos );
							if (strObject.Compare(strTemp) == 0)
								Tlist.RemoveAt(Tpos);
						}
					}
					if(!strObject.IsEmpty()) Tlist.AddTail(strObject);
				}
			}
		}
	}

	list.RemoveAll();

	Tcount = Tlist.GetCount();
	for(ti=0; ti<Tcount; ti++)		// get rid of /  in  /directory/somefile.html
	{
		if( ( Tpos = Tlist.FindIndex( ti)) != NULL )
		{
			strTemp = Tlist.GetAt( Tpos );
					
			
			if(strTemp.GetLength()-1 > 0)
			{
			
				if (strTemp[0] != '/' )
					str = strTemp;
				else
					str = strTemp.Right(strTemp.GetLength()-1);
				
				list.AddTail(str);
			}
		}
	}
		
	strURL = pszURL;
	strSub = "";
	strServer = "";
	strTemp = "";
	CString strDirectory="";
	

	lpsz = pszURL;
	if(lpsz  != NULL)
	{

		nLen = lstrlen(lpsz);
		i=0;
		while (nLen)
		{
			if( *lpsz == '/')	i++;
		
			++lpsz;
			nLen--;
			
		}
		if( i<=2) strURL += "/";
	}

	if(!strURL.IsEmpty()){
		pdest = strURL.Find("//");
		if (pdest >= 0)
		{
			strTemp = strURL.Mid(pdest +2);  // get rid of http://
		}

	}
	if(!strTemp.IsEmpty()){
		pdest = strTemp.Find( '/');
		if (pdest >= 0)
			strServer = strTemp.Left(pdest);  // find server name
	}
	if(!strURL.IsEmpty()){
		pdest = strURL.ReverseFind('/');  // get rid of URL Object
		if (pdest >= 0)
			strSub =  strURL.Left(pdest);
	}
	
	if(!strSub.IsEmpty()){				// find directory on server 
		pdest = strSub.Find(strServer);
		if (pdest >= 0)
			strDirectory =  strSub.Right(strSub.GetLength()-1 - pdest - strServer.GetLength());
	}
	strSub ="";
	if(strDirectory.GetLength()-1 > 0){
		
		if (strDirectory[0] != '/' )
				strSub += "/" + strDirectory;
		else strSub = strDirectory;

		if(strSub.GetLength()-1 > 0){

			if (strDirectory[strDirectory.GetLength()-1] != '/'  &&  strSub[strSub.GetLength()-1] != '/')
				strSub += "/" ;
		}
	}

	if(strSub.IsEmpty()) strSub = "/" ;

	lpSub = strSub;


	Tlist.RemoveAll();
	count = list.GetCount();

		for(i=0; i<count; i++)			//  add the server name and directory to the URL
		{
			if( ( pos = list.FindIndex( i)) != NULL )
			{
				strObject = list.GetAt( pos );
				if(!strObject.IsEmpty())
				{

					pdest = strObject.Find( _T("http"));
				
					if( pdest < 0 )
						strURL = 	_T("http://") + strServer + strSub + strObject;  // need to fix for https
					else
						strURL = strObject;

					Tlist.AddTail(strURL);
				}

			}
		}

	list.RemoveAll();
	count = Tlist.GetCount();			// rewrite the CStringList list 

		for(i=0; i<count; i++)
		{
			if( ( pos = Tlist.FindIndex( i)) != NULL )
			{
				strObject = Tlist.GetAt( pos );
				if(!strObject.IsEmpty())
				{

					pdest = strObject.Find("amp;");     // fix  /index.cgi?&amp;file=....#new
					if(pdest>0)
					{
						strURL = strObject.Left(pdest);
						strTemp =  strObject.Right(strObject.GetLength() - pdest - 1 -3);
						strObject = strURL + strTemp + '\0';

					}

					
					int npos = LookUpEntry((LPCTSTR)strObject);
					if(npos < 0  )
						list.AddTail(strObject);
				}

			}
		}


	return count;

}





⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -