📄 mainfrm.cpp
字号:
return FALSE;
}
if (!pThread->CreateThread()) // Starts execution of a CWinThread object
{
AfxMessageBox("Cannot Start New Thread");
delete pThread;
pThread = NULL;
delete pThreadParams;
return FALSE;
}
pThreadParams->m_threadID = pThread->m_nThreadID;
// since everything is successful, add the thread to our list
m_threadList.AddTail(pThread);
CString string;
::InterlockedIncrement(&lThreadCount);
string.Format ("Threads:%d",lThreadCount);
m_wndStatusBar.SetPaneText (2, (LPCTSTR) string, TRUE);
return TRUE;
}
void CMainFrame::OnDestroy()
{
OnToolsKillthread();
CMDIFrameWnd::OnDestroy();
}
void CMainFrame::GetConnected()
{
DWORD
dwConnectionTypes = INTERNET_CONNECTION_LAN |
INTERNET_CONNECTION_MODEM |
INTERNET_CONNECTION_PROXY;
if (!InternetGetConnectedState(&dwConnectionTypes, 0))
{
InternetAutodial(INTERNET_AUTODIAL_FORCE_UNATTENDED,
0);
}
}
/*************************************
* Get a File from a URL Location
**************************************/
void CMainFrame::OnToolsGetURL()
{
CUrlDlg dlg;
CString str;
CSpiderApp *pApp = (CSpiderApp*) AfxGetApp();
char* pFileName = "Urls.log";
char lpFileName[MAX_PATH];
strcpy(lpFileName,pApp->m_HomeDir);
strcat(lpFileName,"\\");
strcat(lpFileName,pFileName);
dlg.LoadFile(lpFileName);
if(dlg.DoModal() != IDOK) return;
dlg.SaveFile(lpFileName);
if(!dlg.m_WebFileName.IsEmpty())
{
BOOL Result= GetURL(dlg.m_WebFileName,NULL,HTTP_GET_FILE,FALSE);
if(!Result)
{
str.Format("Error in thread file transfer");
AfxMessageBox(str, MB_OK);
}
}
}
/*************************************
* Check Broken URLs in a HTML file on the Web
**************************************/
void CMainFrame::OnToolCheckURLs()
{
CUrlDlg dlg;
CSpiderApp *pApp = (CSpiderApp*) AfxGetApp();
HWND hwndNotifyView = NULL;
char* pFileName = "Urls.log";
char lpFileName[MAX_PATH];
strcpy(lpFileName,pApp->m_HomeDir);
strcat(lpFileName,"\\");
strcat(lpFileName,pFileName);
dlg.LoadFile(lpFileName);
if(dlg.DoModal() != IDOK) return;
dlg.SaveFile(lpFileName);
CString str ="";
if(!dlg.m_WebFileName.IsEmpty())
{
if(RobotCheck(dlg.m_WebFileName))
{
AfxMessageBox("Robot exclusion text file found.\nGoing to bail out, before we get caught.");
return;
}
CSpiderApp *pApp = (CSpiderApp*) AfxGetApp();
pApp->ShowURL(dlg.m_WebFileName,str);
CMDIChildWnd * pChild =
((CMDIFrameWnd*)(AfxGetApp()->m_pMainWnd))->MDIGetActive();
::SendMessage(pChild->m_hWnd,WM_USER_LIST, 1, 0);
CSpiderList* pActiveView = NULL;
pActiveView = CSpiderList::GetView();
if(pActiveView)
hwndNotifyView = pActiveView->m_hWnd;
BOOL Result= GetURL(dlg.m_WebFileName,hwndNotifyView,HTTP_CHECK_URL_ROOT,dlg.m_root);
if(!Result)
{
str.Format("Error in thread ");
AfxMessageBox(str, MB_OK);
return;
}
}
lURLCount = 0;
str ="URLs:0";
m_wndStatusBar.SetPaneText (3, (LPCTSTR) str, TRUE);
}
/******************************************
Get a Server Response Header for a given URL
*******************************************/
void CMainFrame::OnToolsGetServerHeader()
{
CUrlDlg dlg;
CSpiderApp *pApp = (CSpiderApp*) AfxGetApp();
char* pFileName = "Urls.log";
char lpFileName[MAX_PATH];
strcpy(lpFileName,pApp->m_HomeDir);
strcat(lpFileName,"\\");
strcat(lpFileName,pFileName);
dlg.LoadFile(lpFileName);
if(dlg.DoModal() != IDOK) return;
dlg.SaveFile(lpFileName);
CString str ="";
if(!dlg.m_WebFileName.IsEmpty())
{
CSpiderApp *pApp = (CSpiderApp*) AfxGetApp();
pApp->ShowURL(dlg.m_WebFileName,str);
BOOL Result= GetURL(dlg.m_WebFileName,NULL,HTTP_GET_HEADER,FALSE);
if(!Result)
{
str.Format("Error in thread ");
AfxMessageBox(str, MB_OK);
return;
}
}
}
/******************************************
View the list of URLS that did NOT checked out OK
*******************************************/
void CMainFrame::OnToolsURLsNotFound()
{
if(g_nEntries == 0)
{
AfxMessageBox("No URL's checked yet!");
return;
}
CString string="";
CSpiderApp *pApp = (CSpiderApp*) AfxGetApp();
pApp->ShowURL("Not_Found",string);
CSpiderDoc *PDoc = NULL;
PDoc =CSpiderDoc::GetDoc();
for (UINT i = 0; i < g_nEntries; i ++)
{
if(g_entry[i]->m_Status != 200)
{
string.Format("%d...%s...%s",i+1,g_entry[i]->m_URL,g_entry[i]->m_StatusString);
if (PDoc)
PDoc->CheckURLUpdate(string);
}
}
}
/******************************************
View the list of all URLS that were checked
*******************************************/
void CMainFrame::OnToolsViewURLList()
{
if(g_nEntries == 0)
{
AfxMessageBox("No URL's checked yet!");
return;
}
CString string="";
CSpiderApp *pApp = (CSpiderApp*) AfxGetApp();
pApp->ShowURL("URL_LIST",string);
CSpiderDoc *PDoc = NULL;
PDoc =CSpiderDoc::GetDoc();
for (UINT i = 0; i < g_nEntries; i ++)
{
string.Format("%d...%s...%s",i+1,g_entry[i]->m_URL,g_entry[i]->m_StatusString);
if (PDoc)
PDoc->CheckURLUpdate(string);
}
}
/***********************************
* Let All the threads stop, without crashing the program
************************************/
void CMainFrame::OnToolsThreadsStop()
{
int nCount = 0;
POSITION pos= NULL;
int nThreadsLeft = m_threadList.GetCount();
for(int i=0; i<nThreadsLeft; i++)
{
if((pos = m_threadList.FindIndex(i))!= NULL )
{
CSpiderThread* pThread = m_threadList.GetAt(pos);
if(pThread->m_hThread != NULL)
{
nCount++;
pThread->KillThread();
}
}
}
CString string;
string.Format ("Threads:%d",nCount);
m_wndStatusBar.SetPaneText (2, (LPCTSTR) string, TRUE);
if(nCount > 0)
{
string.Format ("Number of Threads %d \nStill Active ",nCount );
AfxMessageBox(string);
}
}
void CMainFrame::OnUpdateToolsThreadsStop(CCmdUI* pCmdUI)
{
pCmdUI->Enable(lThreadCount!=0);
}
/***********************************
* Kill off All threads, might lock the program up at times
************************************/
void CMainFrame::OnToolsKillthread()
{
int nCount,tCount,i;
int nThreadsLeft;
POSITION pos= NULL;
DWORD dwStatus;
nCount = 0,tCount=0;
nThreadsLeft = m_threadList.GetCount();
for(i=0; i<nThreadsLeft; i++)
{
if((pos = m_threadList.FindIndex(i))!= NULL )
{
CSpiderThread* pThread = m_threadList.GetAt(pos);
if(pThread->m_hThread != NULL)
pThread->KillThread();
}
}
Sleep(200);
nThreadsLeft = m_threadList.GetCount();
for(i=0; i<nThreadsLeft; i++)
{
if((pos = m_threadList.FindIndex(i))!= NULL )
{
CSpiderThread* pThread = m_threadList.GetAt(pos);
if(pThread->m_hThread != NULL)
{
tCount++;
pThread->KillThread();
::GetExitCodeThread(pThread->m_hThread, &dwStatus);
if (dwStatus == STILL_ACTIVE)
nCount++;
else
m_threadList.RemoveAt(pos);
}
}
}
CString string;
string.Format ("Threads:%d",nCount);
m_wndStatusBar.SetPaneText (2, (LPCTSTR) string, TRUE);
if(nCount > 0)
{
string.Format ("Number of Threads %d \nStill Active %d",tCount,nCount );
AfxMessageBox(string);
}
}
void CMainFrame::OnUpdateToolsKillthread(CCmdUI* pCmdUI)
{
int nThreadsLeft = m_threadList.GetCount();
pCmdUI->Enable(nThreadsLeft != 0);
}
/***************************
* for robot exclusion
* will return true if the server has a robots.txt file.
**************************/
BOOL CMainFrame::RobotCheck(LPCTSTR pszURL)
{
DWORD dwRet = 0;
BOOL bRet = FALSE;
CString strServerName;
CString strObject;
DWORD dwServiceType;
INTERNET_PORT nPort;
CMyInternetSession* pSession = NULL;
CHttpConnection* pServer = NULL;
CHttpFile* pFile = NULL;
try
{
AfxParseURL(pszURL,dwServiceType,strServerName,strObject,nPort);
pSession = new CMyInternetSession("Robot",1);
pSession->SetOption(INTERNET_OPTION_CONNECT_TIMEOUT,30000);
/* The delay value in milliseconds to wait between connection retries.*/
pSession->SetOption(INTERNET_OPTION_CONNECT_BACKOFF,1000);
/* The retry count to use for Internet connection requests. If a connection
attempt still fails after the specified number of tries, the request is canceled.
The default is five. */
pSession->SetOption(INTERNET_OPTION_CONNECT_RETRIES,2);
pSession->EnableStatusCallback(TRUE);
pSession->m_pMainWnd = AfxGetMainWnd()->m_hWnd;
pSession->m_strHttpSite.Format("%s",pszURL);
pServer = pSession->GetHttpConnection(strServerName,nPort);
pFile = pServer->OpenRequest(_T("GET"),"/robots.txt");
pFile->SendRequest();
if(pFile != NULL)
pFile->QueryInfoStatusCode(dwRet);
if(dwRet== 200)
bRet = TRUE;
if(pFile != NULL)
{ pFile->Close();
delete pFile;
pFile= NULL;
}
if (pServer!= NULL)
{
pServer->Close();
delete pServer;
pServer = NULL;
}
if (pSession != NULL)
{
pSession->Close();
delete pSession;
pSession = NULL;
}
}
catch (CInternetException* pEx)
{
// catch errors from WinINet
//pEx->ReportError();
pEx->Delete();
bRet = FALSE;
}
return bRet;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -