📄 thread.cpp
字号:
{
// catch errors from WinINet
dwRet = pEx->m_dwError;
m_pServer = NULL;
pEx->Delete();
CleanUp();
return dwRet;
}
m_strCurrentServer = ServerName;
try
{
// This can never return NULL. The call may have failed, but it can
// never be null.
m_pFile = m_pServer->OpenRequest(_T("GET"),strObject,NULL, m_nThreadID, NULL, NULL, dwHttpRequestFlags);
m_pFile->SendRequest();
}
catch (CInternetException* pEx)
{
// catch errors from WinINet
dwRet = pEx->m_dwError;
m_pFile = NULL;
pEx->Delete();
CleanUp();
return dwRet;
}
return dwRet;
}
DWORD CSpiderThread::GetHttpStatus(LPCTSTR lpServerName,LPCTSTR strObject)
{
if(lpServerName == NULL || strObject == NULL)return 0;
INTERNET_PORT nPort = INTERNET_DEFAULT_HTTP_PORT;
DWORD dwRet = 1;
if (m_pSession == NULL) return dwRet;
m_pSession->m_strHttpSite.Format("%s%s",lpServerName,strObject);
dwRet = 2;
if(m_pFile != NULL)
{
m_pFile->Close();
delete m_pFile;
m_pFile=NULL;
}
if (m_bDone) return 0;
if (m_strCurrentServer != lpServerName)
{
// Picked a new server, close out connection and make a new one:
if (m_pServer != NULL)
{
m_pServer->Close ();
delete m_pServer;
m_pServer = NULL;
}
}
if (m_pServer == NULL)
{
try
{
m_pServer = m_pSession->GetHttpConnection(lpServerName,nPort);
}
catch (CInternetException* pEx)
{
// catch errors from WinINet
//pEx->ReportError();
dwRet = pEx->m_dwError;
m_pServer = NULL;
pEx->Delete();
if(!CleanUp()) return FALSE;
return dwRet;
}
if (m_bDone) return 0;
m_strCurrentServer = lpServerName;
}
if (m_bDone)
return 0;
if (m_pServer == NULL) return dwRet;
try
{
// This can never return NULL. The call may have failed, but it can
// never be null.
m_pFile = m_pServer->OpenRequest(_T("GET"),strObject,NULL, m_nThreadID, NULL, NULL, dwHttpRequestFlags);
m_pFile->SendRequest();
}
catch (CInternetException* pEx)
{
// catch errors from WinINet
//pEx->ReportError();
dwRet = pEx->m_dwError;
m_pFile = NULL;
pEx->Delete();
if(!CleanUp()) return FALSE;
if(dwRet == ERROR_INTERNET_TIMEOUT ) Sleep(1000); // Connection timed out, try again on new connection
dwRet = NewConnection(lpServerName,strObject);
if(dwRet != HTTP_STATUS_OK) return dwRet;
}
if (m_bDone) return 0;
if(m_pFile != NULL)
m_pFile->QueryInfoStatusCode(dwRet);
return dwRet;
}
BOOL CSpiderThread::CheckAllURLs(LPCTSTR ServerName,ThreadParams *pThreadParams)
{
if(ServerName == NULL) return FALSE;
if(pThreadParams->m_pszURL.IsEmpty()) return FALSE;
CString strMainURL = pThreadParams->m_pszURL;
CStringList list;
POSITION pos=NULL;
CString strObject,strSub;
CString strServer,strURL,strTemp;
DWORD dwServiceType;
INTERNET_PORT nPort;
LPCTSTR lpszText = pThreadParams->m_Contents.LockBuffer();
if(lpszText == NULL) return FALSE;
if(!GetHref(lpszText,_T("href"),list))
return FALSE;
pThreadParams->m_Contents.UnlockBuffer();
if (m_bDone)
return 0;
int count = GetServerList(pThreadParams->m_pszURL,list,strSub);
int pdest;
int i;
CriticalSectionLock plock;
lURLCount += count;
::SendMessage(pThreadParams->m_hwndNotifyProgress,
WM_USER_URL_STATUS, 0, (LONG)lURLCount);
if (m_bDone) return 0;
if(count>0)
{
for(i=0; i<count; i++)
{
if( ( pos = list.FindIndex( i)) != NULL )
{
strObject = list.GetAt( pos );
pdest = strObject.Find(ServerName); // external link check
if( pdest < 0 )
GetStatus(pThreadParams,HTTP_CHECK_URL,strObject); // create new threads for each URL
if (m_bDone) return 0;
if(lThreadCount >= MAXIMUM_WAIT_OBJECTS)
WaitForSingleObject(hConnection,INFINITE);
}
}
for(i=0; i<count; i++)
{
if( ( pos = list.FindIndex( i)) != NULL )
{
strObject = list.GetAt( pos );
pdest = strObject.Find(ServerName);
if( pdest > 0 )
{
if(AfxParseURL(strObject,dwServiceType,strServer,strURL,nPort))
{
if(!strURL.IsEmpty())
CheckURL(ServerName,strURL,pThreadParams); // check root links in the current file
}
}
if (m_bDone) return 0;
}
}
if(pThreadParams->m_RootLinks)
{
for(i=0; i<count; i++)
{
if( ( pos = list.FindIndex( i)) != NULL )
{
strObject = list.GetAt( pos );
pdest = strObject.Find(ServerName); // get root files and check the links in those files
if ( pdest > 0)
{
pdest = strObject.Find(strSub);
if(( pdest > 0 ) && (strMainURL.Compare(strObject)!=0))
GetStatus(pThreadParams,HTTP_CHECK_URL_ROOT,strObject); // create new thread
}
if (m_bDone) return 0;
if(lThreadCount >= MAXIMUM_WAIT_OBJECTS)
WaitForSingleObject(hConnection,INFINITE);
}
}
}
}
return TRUE;
}
BOOL CSpiderThread::PrintLine(ThreadParams *pThreadParams,LPCSTR line)
{
pThreadParams->m_string = line;
::SendMessage(pThreadParams->m_hwndNotifyProgress,
WM_USER_THREAD_PRINT, 0, (LPARAM)pThreadParams);
return TRUE;
}
BOOL CSpiderThread::PrintFile(ThreadParams *pThreadParams,
LPCSTR line)
{
::SendMessage(pThreadParams->m_hwndNotifyProgress,
WM_USER_THREAD_FILE, 0, (LPARAM)line);
return TRUE;
}
BOOL CSpiderThread::PrintStatus(ThreadParams *pThreadParams,
LPCSTR line)
{
::SendMessage(pThreadParams->m_hwndNotifyProgress,
WM_USER_THREAD_STATUS, 0, (LPARAM)line);
return TRUE;
}
BOOL CSpiderThread::GetStatus(ThreadParams *pThreadParams,UINT ntype,LPCSTR line)
{
pThreadParams->m_checkURLName.Format("%s",(LPCSTR)line);
::SendMessage(pThreadParams->m_hwndNotifyProgress,
WM_USER_THREAD_GETSTATUS, (UINT)ntype, (LPARAM)pThreadParams);
return TRUE;
}
BOOL CSpiderThread::GetNewFile(ThreadParams *pThreadParams,UINT ntype,LPCSTR line)
{
pThreadParams->m_checkURLName.Format("%s",(LPCSTR)line);
::SendMessage(pThreadParams->m_hwndNotifyProgress,
WM_USER_THREAD_GETNEWFILE, (UINT)ntype, (LPARAM)pThreadParams);
return TRUE;
}
BOOL CSpiderThread::CleanUp()
{
m_strCurrentServer.Empty();
try
{
if(m_pFile != NULL)
{ m_pFile->Close();
delete m_pFile;
m_pFile= NULL;
}
if (m_pServer!= NULL)
{
m_pServer->Close();
delete m_pServer;
m_pServer = NULL;
}
if (m_pSession != NULL)
{
m_pSession->Close();
delete m_pSession;
m_pSession = NULL;
}
}
catch (CInternetException* pEx)
{
// catch errors from WinINet
pEx->Delete();
return FALSE;
}
if (m_bDone) return 0;
return TRUE;
}
BOOL CSpiderThread::ParseURL(ThreadParams *pThreadParams)
{
BOOL bRet = FALSE;
if(pThreadParams->m_pszURL.IsEmpty()) return bRet;
LPCTSTR lpsz = pThreadParams->m_pszURL;
if(lpsz == NULL) return bRet;
int nLen = lstrlen(lpsz);
int i=0;
while (nLen)
{
if( *lpsz == '/') i++;
++lpsz;
nLen--;
}
if( i< 3) pThreadParams->m_pszURL += "/";
try
{
AfxParseURL(pThreadParams->m_pszURL,pThreadParams->m_dwServiceType,pThreadParams->m_strServerName,pThreadParams->m_strObject,pThreadParams->m_nPort);
}
catch (CInternetException* pEx)
{
// catch errors from WinINet
//pEx->ReportError();
pEx->Delete();
return bRet;
}
lpsz = pThreadParams->m_strObject;
if(lpsz == NULL) return bRet;
bRet = TRUE;
nLen = lstrlen(lpsz);
BOOL bdot = FALSE;
while (nLen)
{
if( *lpsz == '.') bdot = TRUE;
++lpsz;
nLen--;
}
if(pThreadParams->m_strObject.GetLength()-1 > 0)
{
if( bdot == FALSE && pThreadParams->m_strObject[pThreadParams->m_strObject.GetLength()-1] != '/')
pThreadParams->m_strObject += "/";
}
return bRet;
}
int CSpiderThread::GetServerList(LPCTSTR pszURL,CStringList& list, CString& lpSub)
{
if(pszURL == NULL) return FALSE;
CStringList Tlist;
CString strObject,strTemp,str="";
CString strSub,strServer,strURL,strServerName;
POSITION pos=NULL;
POSITION Tpos=NULL;
LPCTSTR lpsz;
int nLen;
int count = 0;
int pdest2;
count = list.GetCount();
int Tcount,i,ti,pdest,j,k;
for(i=0; i<count; i++)
{
if( ( pos = list.FindIndex( i)) != NULL )
{
strObject = list.GetAt( pos );
if(strObject.GetLength() -1 > 0)
{
k=0; j= 0;
lpsz = strObject;
if(lpsz != NULL)
{
nLen = lstrlen(lpsz);
while (nLen)
{
if( *lpsz == ':') k=j;
++lpsz;
nLen--;
j++;
}
}
if(k > 6)
{
if(strObject[k-4] == 'h' )
{
strURL = strObject.Right(strObject.GetLength() - k+4 );
strObject = strURL;
}
}
pdest = strObject.FindOneOf("# %"); // don't check /index.html#new
pdest2 = strObject.Find("ftp:"); // don't check ftp://somefile.zip
if(pdest<0 && pdest2 < 0 && strObject[0] != '.') // don't check ../somedirectory
{
Tcount = Tlist.GetCount();
for(ti=0; ti<Tcount; ti++)
{
if( ( Tpos = Tlist.FindIndex( ti)) != NULL ) // remove duplicate URLs
{
strTemp = Tlist.GetAt( Tpos );
if (strObject.Compare(strTemp) == 0)
Tlist.RemoveAt(Tpos);
}
}
if(!strObject.IsEmpty()) Tlist.AddTail(strObject);
}
}
}
}
list.RemoveAll();
Tcount = Tlist.GetCount();
for(ti=0; ti<Tcount; ti++) // get rid of / in /directory/somefile.html
{
if( ( Tpos = Tlist.FindIndex( ti)) != NULL )
{
strTemp = Tlist.GetAt( Tpos );
if(strTemp.GetLength()-1 > 0)
{
if (strTemp[0] != '/' )
str = strTemp;
else
str = strTemp.Right(strTemp.GetLength()-1);
list.AddTail(str);
}
}
}
strURL = pszURL;
strSub = "";
strServer = "";
strTemp = "";
CString strDirectory="";
lpsz = pszURL;
if(lpsz != NULL)
{
nLen = lstrlen(lpsz);
i=0;
while (nLen)
{
if( *lpsz == '/') i++;
++lpsz;
nLen--;
}
if( i<=2) strURL += "/";
}
if(!strURL.IsEmpty()){
pdest = strURL.Find("//");
if (pdest >= 0)
{
strTemp = strURL.Mid(pdest +2); // get rid of http://
}
}
if(!strTemp.IsEmpty()){
pdest = strTemp.Find( '/');
if (pdest >= 0)
strServer = strTemp.Left(pdest); // find server name
}
if(!strURL.IsEmpty()){
pdest = strURL.ReverseFind('/'); // get rid of URL Object
if (pdest >= 0)
strSub = strURL.Left(pdest);
}
if(!strSub.IsEmpty()){ // find directory on server
pdest = strSub.Find(strServer);
if (pdest >= 0)
strDirectory = strSub.Right(strSub.GetLength()-1 - pdest - strServer.GetLength());
}
strSub ="";
if(strDirectory.GetLength()-1 > 0){
if (strDirectory[0] != '/' )
strSub += "/" + strDirectory;
else strSub = strDirectory;
if(strSub.GetLength()-1 > 0){
if (strDirectory[strDirectory.GetLength()-1] != '/' && strSub[strSub.GetLength()-1] != '/')
strSub += "/" ;
}
}
if(strSub.IsEmpty()) strSub = "/" ;
lpSub = strSub;
Tlist.RemoveAll();
count = list.GetCount();
for(i=0; i<count; i++) // add the server name and directory to the URL
{
if( ( pos = list.FindIndex( i)) != NULL )
{
strObject = list.GetAt( pos );
if(!strObject.IsEmpty())
{
pdest = strObject.Find( _T("http"));
if( pdest < 0 )
strURL = _T("http://") + strServer + strSub + strObject; // need to fix for https
else
strURL = strObject;
Tlist.AddTail(strURL);
}
}
}
list.RemoveAll();
count = Tlist.GetCount(); // rewrite the CStringList list
for(i=0; i<count; i++)
{
if( ( pos = Tlist.FindIndex( i)) != NULL )
{
strObject = Tlist.GetAt( pos );
if(!strObject.IsEmpty())
{
pdest = strObject.Find("amp;"); // fix /index.cgi?&file=....#new
if(pdest>0)
{
strURL = strObject.Left(pdest);
strTemp = strObject.Right(strObject.GetLength() - pdest - 1 -3);
strObject = strURL + strTemp + '\0';
}
int npos = LookUpEntry((LPCTSTR)strObject);
if(npos < 0 )
list.AddTail(strObject);
}
}
}
return count;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -