📄 thread.cpp
字号:
// InetThread.cpp : implementation file
//
/*******************************************
Pre-emptive Multithreading Web Spider
Copyright (c) 1998 by Sim Ayers.
*******************************************************************/
#include "stdafx.h"
#include "Spider.h"
#include "ThreadParams.h"
#include "Thread.h"
#include "utily.h"
#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif
extern HANDLE hConnection;
UINT g_nEntries = 0;
CArray<CEntry*, CEntry*> g_entry;
extern long lThreadCount;
extern long lURLCount;
static const TCHAR szHeaders[] = _T("Accept: text/*\r\n");
static const TCHAR AgentName[] = _T("XYZ Spider");
static DWORD dwHttpRequestFlags = INTERNET_FLAG_EXISTING_CONNECT | INTERNET_FLAG_RELOAD | INTERNET_FLAG_DONT_CACHE ;
static DWORD dwHttpRequestFlags2 = INTERNET_FLAG_EXISTING_CONNECT | INTERNET_FLAG_RELOAD | INTERNET_FLAG_DONT_CACHE |INTERNET_FLAG_NO_AUTO_REDIRECT;
void CMyInternetSession::OnStatusCallback(DWORD dwContext , DWORD dwInternetStatus,
LPVOID lpvStatusInformation , DWORD dwStatusInformationLen )
{
CString strStatus= "";
switch(dwInternetStatus)
{
case INTERNET_STATUS_STATE_CHANGE:
strStatus="Busy";
break;
case INTERNET_STATUS_REQUEST_COMPLETE:
strStatus="request complete";
break;
case INTERNET_STATUS_CONNECTING_TO_SERVER:
strStatus="Connecting to server...";
break;
case INTERNET_STATUS_RESOLVING_NAME:
strStatus.Format("resolving name for %s", lpvStatusInformation);
break;
case INTERNET_STATUS_NAME_RESOLVED:
strStatus.Format("resolved name for %s!", lpvStatusInformation);
break;
case INTERNET_STATUS_CONNECTED_TO_SERVER:
strStatus="Connected to server!";
break;
case INTERNET_STATUS_SENDING_REQUEST:
strStatus="Sending request...";
break;
case INTERNET_STATUS_REQUEST_SENT:
strStatus="Request sent";
break;
case INTERNET_STATUS_RECEIVING_RESPONSE:
strStatus="Receiving response...";
break;
case INTERNET_STATUS_RESPONSE_RECEIVED:
strStatus="Response received!";
break;
case INTERNET_STATUS_CLOSING_CONNECTION:
strStatus="Closing connection...";
break;
case INTERNET_STATUS_CONNECTION_CLOSED:
strStatus="Connection close!";
break;
case INTERNET_STATUS_HANDLE_CLOSING:
strStatus="Closing Connection...";
case INTERNET_STATUS_HANDLE_CREATED:
strStatus="Connection created!";
break;
case INTERNET_STATUS_REDIRECT:
strStatus ="Redirected to URL";
break;
case INTERNET_STATUS_CTL_RESPONSE_RECEIVED:
default:
strStatus.Format("Unknown status: %d", dwInternetStatus);
break;
}
LPCSTR line = strStatus;
if(m_pMainWnd !=NULL)
{
::SendMessage (m_pMainWnd,WM_USER_SERVER_STATUS, 0,(LPARAM)line);
LPCSTR lpstr = m_strHttpSite;
::SendMessage (m_pMainWnd,WM_USER_THREAD_STATUS, 0,(LPARAM)lpstr);
}
}
// simple worker thread Proc function
UINT CSpiderThread::ThreadFunc(LPVOID pParam)
{
ThreadParams * lpThreadParams = (ThreadParams*) pParam;
CSpiderThread* lpThread = (CSpiderThread*) lpThreadParams->m_pThread;
lpThread->ThreadRun(lpThreadParams);
// Use SendMessage instead of PostMessage here to keep the current thread count
// Synchronizied. If the number of threads is greater than MAXIMUM_WAIT_OBJECTS (64)
// the program will be come unresponsive to user input
::SendMessage(lpThreadParams->m_hwndNotifyProgress,
WM_USER_THREAD_DONE, 0, (LPARAM)lpThreadParams); // deletes lpThreadParams and decrements the thread count
return 0;
}
/////////////////////////////////////////////////////////////////////////////
// CSpiderThread
CSpiderThread::CSpiderThread(AFX_THREADPROC pfnThreadProc,ThreadParams *pThreadParams)
:CWinThread(pfnThreadProc,pThreadParams)
{
m_pSession = NULL;
m_pServer =NULL;
m_pFile= NULL;
m_strCurrentServer = "";
m_bDone = FALSE;
pThreadParams->m_pThread = this;
}
CSpiderThread::~CSpiderThread()
{
CleanUp();
}
void CSpiderThread::KillThread()
{
// Note: this function is called in the context of other threads,
// not the thread itself.
m_bDone = TRUE;
CleanUp();
}
BOOL CSpiderThread::InitServer()
{
try{
m_pSession = new CMyInternetSession(AgentName,m_nThreadID);
int ntimeOut = 30; // very important, can cause a Server time-out if set to low
// or hang the thread if set to high.
/*
The time-out value in milliseconds to use for Internet connection requests.
If a connection request takes longer than this timeout, the request is canceled.
The default timeout is infinite. */
m_pSession->SetOption(INTERNET_OPTION_CONNECT_TIMEOUT,1000* ntimeOut);
/* The delay value in milliseconds to wait between connection retries.*/
m_pSession->SetOption(INTERNET_OPTION_CONNECT_BACKOFF,1000);
/* The retry count to use for Internet connection requests. If a connection
attempt still fails after the specified number of tries, the request is canceled.
The default is five. */
m_pSession->SetOption(INTERNET_OPTION_CONNECT_RETRIES,1);
m_pSession->EnableStatusCallback(TRUE);
}
catch (CInternetException* pEx)
{
// catch errors from WinINet
//pEx->ReportError();
m_pSession = NULL;
pEx->Delete();
return FALSE ;
}
return TRUE;
}
BEGIN_MESSAGE_MAP(CSpiderThread, CWinThread)
//{{AFX_MSG_MAP(CSpiderThread)
// NOTE - the ClassWizard will add and remove mapping macros here.
//}}AFX_MSG_MAP
END_MESSAGE_MAP()
/////////////////////////////////////////////////////////////////////////////
// CSpiderThread message handlers
BOOL CSpiderThread::ThreadRun(ThreadParams *pThreadParams)
{
CString str;
BOOL bRun = FALSE;
bRun = InitServer();
if (!bRun)
return bRun;
if(pThreadParams->m_pszURL.IsEmpty()) return FALSE;
if (m_bDone)
return 0;
m_pSession->m_pMainWnd = pThreadParams->m_hwndNotifyProgress;
if(!ParseURL(pThreadParams)) return FALSE;
PrintStatus(pThreadParams,pThreadParams->m_pszURL);
switch(pThreadParams->m_type)
{
case HTTP_GET_FILE:
if(!GetHttpFile(pThreadParams->m_strServerName,
pThreadParams->m_strObject,pThreadParams))
{
str.Format("Error in getting %s",(LPCTSTR)pThreadParams->m_pszURL);
AfxMessageBox(str, MB_OK);
}
break;
case HTTP_CHECK_URL_ROOT:
if(GetHttpFile(pThreadParams->m_strServerName,pThreadParams->m_strObject, pThreadParams,FALSE))
{
if (m_bDone)
return 0;
CheckAllURLs(pThreadParams->m_strServerName,pThreadParams);
}
break;
case HTTP_CHECK_URL:
CheckURL(pThreadParams->m_strServerName, pThreadParams->m_strObject,pThreadParams);
break;
case HTTP_GET_HEADER:
if(!GetServerHeader(pThreadParams->m_strServerName,pThreadParams->m_strObject,pThreadParams))
{
str.Format("Error in getting Server Response Header for\n%s",(LPCTSTR)pThreadParams->m_pszURL);
AfxMessageBox(str, MB_OK);
}
break;
}
return TRUE;
}
BOOL CSpiderThread::GetServerHeader(LPCTSTR ServerName, LPCTSTR strObject,ThreadParams *pThreadParams)
{
if(ServerName == NULL || strObject == NULL) return 0;
DWORD dwRet = GetHttpStatus(ServerName, strObject);
pThreadParams->m_Status = dwRet;
if (m_bDone)
return 0;
if(dwRet <200 || dwRet > 400) return FALSE;
if(m_pFile != NULL && m_pServer != NULL && m_pSession != NULL)
{
CString rString;
m_pFile->QueryInfo(HTTP_QUERY_RAW_HEADERS_CRLF,rString);
PrintLine(pThreadParams,rString);
}
return TRUE;
}
BOOL CSpiderThread::GetHttpFile(LPCTSTR ServerName,LPCTSTR strObject,ThreadParams *pThreadParams,BOOL ViewFile)
{
CString rString;
LPCTSTR lpsz;
LPCTSTR lpszStop;
int pdest,j=0;
TCHAR sz[4096];
memset(sz,'\0',sizeof(sz));
pThreadParams->m_Contents = "";
if(strObject == NULL) return FALSE;
if(GetHttpStatus(ServerName, strObject)!= 200)
return FALSE;
if(m_pFile != NULL && m_pServer != NULL)
{
m_pFile->QueryInfo(HTTP_QUERY_CONTENT_TYPE,rString);
pdest = rString.Find("text");
if(pdest < 0 ) return FALSE;
}
if(m_pFile != NULL && m_pServer != NULL && m_pSession != NULL)
{
try
{
while (m_pFile->ReadString(sz, 4095))
{
rString = "";
lpsz = sz;
if(lpsz != NULL)
{
lpszStop = sz + lstrlen(sz);
j=0;
while (lpsz < lpszStop)
{
if(*lpsz =='\n') rString += "\r\n";
else rString += sz[j];
++lpsz;
j++;
}
pThreadParams->m_Contents += rString;
}
}
}
catch (CInternetException* pEx)
{
// catch errors from WinINet
//pEx->ReportError();
m_pFile= NULL;
pEx->Delete();
return 0;
}
}
if(ViewFile)
::SendMessage(pThreadParams->m_hwndNotifyProgress,
WM_USER_THREAD_FILE, pThreadParams->m_threadID, (LPARAM)pThreadParams);
return TRUE;
}
int LookUpEntry(LPCTSTR pszURL)
{
if(pszURL == NULL) return -1;
CriticalSectionLock plock;
CString URL,strTemp;
URL.Format("%s",(LPCTSTR)pszURL);
int pdest = URL.Find(":");
if(pdest>0)
strTemp = URL.Right(URL.GetLength()-1 - pdest - 2);
else
strTemp = URL;
int nRet = -1;
for (UINT i = 0; i < g_nEntries; i ++)
{
if(strTemp.CompareNoCase(g_entry[i]->m_URL)==0)
{
nRet = i;
break;
}
}
return nRet;
}
int AddEntry(URLStatus * lpEntry)
{
CriticalSectionLock plock;
if(lpEntry->m_URL.IsEmpty())
{
return -1;
}
for (UINT i = 0; i < g_nEntries; i ++) { // only add entry once
if(lpEntry->m_URL.Compare(g_entry[i]->m_URL)==0)
{
return -1;
}
}
CEntry* newEntry = new CEntry;
newEntry->m_URL = lpEntry->m_URL;
newEntry->m_URLPage = lpEntry->m_URLPage;
newEntry->m_Status = lpEntry->m_Status;
newEntry->m_StatusString = lpEntry->m_StatusString;
newEntry->m_LastModified = lpEntry->m_LastModified;
newEntry->m_ContentType = lpEntry->m_ContentType;
newEntry->m_ContentLength = lpEntry->m_ContentLength;
g_nEntries++;
// store in the array and get retVal to pass to the view for updating
int retVal = g_entry.Add(newEntry);
return retVal;
}
BOOL CSpiderThread::CheckURL(LPCTSTR ServerName, LPCTSTR strObject,ThreadParams *pThreadParams)
{
if(ServerName == NULL || strObject==NULL) return FALSE;
DWORD dwRet;
CString rString = "";
rString.Format("%s%s",ServerName,strObject);
int npos = LookUpEntry((LPCTSTR)rString);
if (m_bDone) return 0;
if(npos >= 0 && g_entry[npos]->m_Status != ERROR_INTERNET_TIMEOUT) // just print the status out to the view window if we have already visited the web page in question
{
pThreadParams->m_pStatus.m_URL = rString;
pThreadParams->m_pStatus.m_URLPage = pThreadParams->m_pszURL;
pThreadParams->m_pStatus.m_Status = g_entry[npos]->m_Status;
pThreadParams->m_pStatus.m_ContentType = g_entry[npos]->m_ContentType;
pThreadParams->m_pStatus.m_ContentLength = g_entry[npos]->m_ContentLength;
pThreadParams->m_pStatus.m_LastModified = g_entry[npos]->m_LastModified;
pThreadParams->m_pStatus.m_StatusString = g_entry[npos]->m_StatusString;
}
else
{
dwRet = GetHttpStatus(ServerName, strObject);
pThreadParams->m_Status = dwRet;
pThreadParams->m_pStatus.m_URL = rString;
pThreadParams->m_pStatus.m_URLPage = pThreadParams->m_pszURL;
pThreadParams->m_pStatus.m_Status = dwRet;
pThreadParams->m_pStatus.m_ContentType ="";
pThreadParams->m_pStatus.m_ContentLength = "";
pThreadParams->m_pStatus.m_LastModified ="";
if (m_bDone) return 0;
if(m_pFile != NULL && m_pServer != NULL)
{
m_pFile->QueryInfo(HTTP_QUERY_CONTENT_TYPE,pThreadParams->m_pStatus.m_ContentType);
m_pFile->QueryInfo(HTTP_QUERY_CONTENT_LENGTH,pThreadParams->m_pStatus.m_ContentLength);
m_pFile->QueryInfo(HTTP_QUERY_LAST_MODIFIED,pThreadParams->m_pStatus.m_LastModified);
}
if (m_bDone) return 0;
switch(dwRet)
{
case 200:
pThreadParams->m_pStatus.m_StatusString = _T("ok");
break;
case 301:
pThreadParams->m_pStatus.m_StatusString = _T("redirected to a new URL");
break;
case 302:
pThreadParams->m_pStatus.m_StatusString = _T("resides under a different URL");
break;
case 401:
pThreadParams->m_pStatus.m_StatusString = _T("auth required");
break;
case 402:
pThreadParams->m_pStatus.m_StatusString = _T("payment required");
break;
case 403:
pThreadParams->m_pStatus.m_StatusString = _T("forbidden");
break;
case 404:
pThreadParams->m_pStatus.m_StatusString = _T("not found");
break;
case 400:
pThreadParams->m_pStatus.m_StatusString = _T("Unintelligble request");
break;
case 405:
pThreadParams->m_pStatus.m_StatusString = _T("requested method not supported");
break;
case 500:
pThreadParams->m_pStatus.m_StatusString = _T("Unknown server error");
break;
case 501:
pThreadParams->m_pStatus.m_StatusString = _T("Not implemented on Server");
break;
case 502:
pThreadParams->m_pStatus.m_StatusString = _T("Server temporarily overloaded,busy");
break;
case 503:
pThreadParams->m_pStatus.m_StatusString = _T("Server capacity reached, busy");
break;
case ERROR_INTERNET_TIMEOUT:
pThreadParams->m_pStatus.m_StatusString = _T("Connection timed out");
break;
case ERROR_INTERNET_INVALID_URL:
pThreadParams->m_pStatus.m_StatusString = _T("URL is invalid");
break;
case ERROR_INTERNET_NAME_NOT_RESOLVED:
case 12029: // this seems to be the error for unresolved host names
pThreadParams->m_pStatus.m_StatusString = _T("Could not resolve host name. Check it, and try again");
break;
case ERROR_INTERNET_SHUTDOWN:
pThreadParams->m_pStatus.m_StatusString = _T("Win32 internet functions have been shut down");
break;
case ERROR_INTERNET_CONNECTION_ABORTED:
pThreadParams->m_pStatus.m_StatusString = _T("Connection was aborted prematurely");
break;
case 0:
case 1:
case 2:
default:
pThreadParams->m_pStatus.m_StatusString = _T("no connection");
break;
}
}
AddEntry(&pThreadParams->m_pStatus);
if (m_bDone) return 0;
if(pThreadParams->m_hwndNotifyView != NULL)
::SendMessage(pThreadParams->m_hwndNotifyView,WM_USER_CHECK_DONE, 0, (LPARAM) &pThreadParams->m_pStatus);
if(dwRet != 200) return FALSE;
return TRUE;
}
DWORD CSpiderThread::NewConnection(LPCTSTR ServerName,LPCTSTR strObject)
{
if(ServerName == NULL || strObject == NULL) return 0;
DWORD dwRet = HTTP_STATUS_OK;
INTERNET_PORT nPort = INTERNET_DEFAULT_HTTP_PORT;
try{
m_pSession = new CMyInternetSession(ServerName,m_nThreadID);
int ntimeOut = 30;
/*
The time-out value in milliseconds to use for Internet connection requests.
If a connection request takes longer than this timeout, the request is canceled.
The default timeout is infinite. */
m_pSession->SetOption(INTERNET_OPTION_CONNECT_TIMEOUT,1000* ntimeOut);
/* The delay value in milliseconds to wait between connection retries.*/
m_pSession->SetOption(INTERNET_OPTION_CONNECT_BACKOFF,1000);
/* The retry count to use for Internet connection requests. If a connection
attempt still fails after the specified number of tries, the request is canceled.
The default is five. */
m_pSession->SetOption(INTERNET_OPTION_CONNECT_RETRIES,2);
m_pSession->EnableStatusCallback(TRUE);
m_pSession->m_strHttpSite.Format("%s%s",ServerName,strObject);
}
catch (CInternetException* pEx)
{
// catch errors from WinINet
dwRet = pEx->m_dwError;
m_pSession=NULL;
pEx->Delete();
return dwRet;
}
try
{
m_pServer = m_pSession->GetHttpConnection(ServerName,nPort);
}
catch (CInternetException* pEx)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -