📄 document.cpp
字号:
linkEntry.arrMedia.Add(strMedia);
}
}
// Success
bRet = TRUE;
}
return(bRet);
}
// Usings CInet to retrieve the multimedia item specified by strMedia. It saves
// this new file with the name specified in strFileName.
BOOL CSnaggerDoc::GetMedia(CString& strMedia, CString& strFileName)
{
BYTE *pbyBuffer = m_byBuffer;
int nLen;
BOOL bRet = FALSE;
// Get the specified page
CInet::RESULTS ret;
// Get the file from the INet
ret = m_Inet.GetFile(strMedia,&pbyBuffer,nLen);
if(ret == CInet::SUCCESS)
{
// Add the size to the statistics count
m_nTotalBytes += nLen;
// Write the file
m_pProgress->SetActionTitle("Saving File: "+strMedia);
m_Inet.SaveFile(strFileName,m_strDirectory,pbyBuffer,nLen);
bRet = TRUE;
}
return(bRet);
}
// Returns TRUE if the current page was already retrieved from the INet.
// If it has been retrieved then it returns a pointer to the entry in the
// list of retrieved pages.
BOOL CSnaggerDoc::ShouldGetPage(CString& strPage, MAP_FILES*& pMapEntry)
{
// Page names shouldn't be case sensitive
CString strNewPage = strPage;
strNewPage.MakeLower();
strNewPage = strNewPage.SpanExcluding("#");
// Did we find it??
return(!m_arrPagesDone.Lookup(strNewPage,(CObject *&) pMapEntry));
}
// Returns TRUE if the current multimedia file was already retrieved from the INet.
// If it has been retrieved then it returns a pointer to the entry in the
// list of retrieved multimedia files.
BOOL CSnaggerDoc::ShouldGetMedia(CString& strMedia, MAP_FILES*& pMapEntry)
{
// Page names shouldn't be case sensitive
CString strNewMedia = strMedia;
strNewMedia.MakeLower();
strNewMedia = strNewMedia.SpanExcluding("#");
// Page names shouldn't be case sensitive
return(!m_arrMediaDone.Lookup(strNewMedia,(CObject *&) pMapEntry));
}
// Returns TRUE if the specified page should be added to the download queue.
// This basically means that the page is not in the list of queued pages at a
// lower level or that it hasn't been previously downloaded at a lower level.
// If the above criteria are met -- it thens check to see if this is an offsite
// page and determines whether it should be downloaded.
BOOL CSnaggerDoc::ShouldQueuePage(CString& strNewPage, BOOL bOffsite)
{
MAP_FILES* pMapEntry;
// Have we downloaded this page yet???
if(ShouldGetPage(strNewPage,pMapEntry))
{
// No...then look for it in the queue of waiting pages at previous
// levels
for(int i = 0; i < m_nLevel; i++)
{
for(int j = 0; j < m_aLinks[i].arrLinks.GetSize(); j++)
{
if(strNewPage == m_aLinks[i].arrLinks.GetAt(j))
return(FALSE);
}
}
}
else
{
// Yes...did we follow its links all the way to the
// maximum level?
if(m_Options.nMaxDepth && m_nLevel >= pMapEntry->nMaxLevel)
return(TRUE);
}
// Make sure that we allow offsite links for offsite pages
if(bOffsite && !m_Options.bOffsiteLinks)
return(FALSE);
return(TRUE);
}
// Initializes the specified link stack entry
void CSnaggerDoc::ResetLink(int nLevel)
{
m_aLinks[nLevel].nIndex = 0;
m_aLinks[nLevel].arrLinks.SetSize(0,100);
m_aLinks[nLevel].arrMedia.SetSize(0,100);
m_aLinks[nLevel].arrOffsite.SetSize(0,100);
}
// Updates the information in the statistics window (if m_pProgress contains
// a valid window class pointer)
void CSnaggerDoc::UpdateStatus()
{
// Does the statistics window exist?
if(m_pProgress)
{
// Yep...update the info in its fields
m_pProgress->SetQueuedFiles(m_nQueuedPageCount);
m_pProgress->SetDownloadedPages(m_nGottenPageCount);
m_pProgress->SetDownloadedFiles(m_nGottenFileCount);
m_pProgress->SetKBDownloaded(m_nTotalBytes);
m_pProgress->SetLevel(m_nLevel+1);
}
}
// The workhouse thread routine that recursively navigates linked web pages and
// retrieves each of them along with their multimedia files. This process is
// spawned indirectrly in RecursiveDownload() using the AfxBeginThread() call.
UINT CSnaggerDoc::DownloadThread(LPVOID lpvData)
{
HTREEITEM htreePage;
// Static methods can't have a "this" pointer to get the parent class's
// pointer which the call passes as a parameter
CSnaggerDoc *pThis = (CSnaggerDoc *) lpvData;
int nMaxDepth = pThis->m_Options.nMaxDepth-1;
int nCount;
CString strPage = pThis->m_strStartPage;
CString strFileName;
CString strLogData;
CString strText;
POSITION pos = pThis->GetFirstViewPosition();
CSnaggerView* pView = (CSnaggerView *) pThis->GetNextView(pos);
BOOL bIsOffsite = FALSE;
// Establish the WinInet Session
try
{
pThis->m_Inet.OpenSession(pThis->m_Options.bUseProxy,pThis->m_Options.strProxyName);
}
catch(...)
{
}
// Create the log file
pThis->m_fileLog.Open(pThis->m_strDirectory+"sitesnag.log",
CFile::modeCreate|CFile::modeWrite);
// Create the table of contents file
if(pThis->m_Options.bContents)
{
pThis->m_fileContents.Open(pThis->m_strDirectory+"SnagCon1.htm",
CFile::modeCreate|CFile::modeWrite);
// Add the TOC to the list of downloaded files
pThis->SetPageCacheEntry("snagcon1.htm","SnagCon1.htm",0);
// Add the TOC to the tree control
CString strTitle = "Contents Page 1 (SnagCon1.htm)";
pView->AddTreeContent(strTitle);
// Write the beginning of the first TOC page
strText = "<HTML>\r\n<HEAD>\r\n<TITLE>SiteSnagger Contents</TITLE>\r\n";
strText += "</HEAD\r\n<BODY>\r\n";
strText += "<H1><center>SiteSnagger Table of Contents</center><br><br></H1>\r\n<UL>\r\n";
pThis->m_fileContents.Write(strText,strText.GetLength());
}
// Initialize the index for the first link level, start with the first level
pThis->m_nLevel = 0;
pThis->m_aLinks[0].nIndex = 0;
pThis->m_Inet.ResetUniqueCount();
// Recusively search web links until either we've searched them all (m_nLevel is
// -1 or if the user decides to abort
while(pThis->m_nLevel >= 0 && !pThis->m_pProgress->IsAborted())
{
// Get the name of a new page in a second dimension element
if(pThis->m_aLinks[pThis->m_nLevel].nIndex > 0)
{
// Save the URL and whether it's offsite
int nIndex = pThis->m_aLinks[pThis->m_nLevel].nIndex;
strPage = pThis->m_aLinks[pThis->m_nLevel].arrLinks.GetAt(nIndex);
bIsOffsite = pThis->m_aLinks[pThis->m_nLevel].arrOffsite.GetAt(nIndex);
// Bump to the next level so we can get the page's links
pThis->m_nLevel++;
}
// Generate a unique filename for this page
pThis->m_Inet.GenerateUniqueFileName(strPage,strFileName,
pThis->m_arrPagesDone,TRUE);
pThis->m_pProgress->SetActionTitle("Getting Page: "+strPage);
// Write a log entry for this page -- leave room for the result
strLogData.Format("[%02d] Getting page %s ",pThis->m_nLevel+1,strPage);
pThis->m_fileLog.Write(strLogData,strLogData.GetLength());
CString strOrigPage = strPage;
// Get the page from Inet or from local file
if(pThis->GetPage(strPage,strFileName,pThis->m_aLinks[pThis->m_nLevel]))
{
MAP_FILES *pMapEntry;
// Get the count of links
nCount = pThis->m_aLinks[pThis->m_nLevel].arrLinks.GetSize();
// Did we just download this new page??
if(pThis->ShouldGetPage(strPage,pMapEntry))
{
// Yes, add it to the list of retrieved pages
pThis->SetPageCacheEntry(strPage,strFileName,pThis->m_nLevel);
// If the page was redirected then add its original name too
if(strPage != strOrigPage && pThis->ShouldGetPage(strOrigPage,pMapEntry))
pThis->SetPageCacheEntry(strOrigPage,strFileName,pThis->m_nLevel);
// Prefix offsite pages with their URL (i.e. http://www.xxx.yyy)
if(bIsOffsite)
strText = strPage+" - ";
else strText.Empty();
// Add the page's title and local filename
strText += pThis->m_strPageTitle+" ("+
strFileName.SpanExcluding("#")+")";
htreePage = pView->AddTreePage(strText,bIsOffsite);
strText.Format("<a href=%s><li> %s (%s - %s)<br>\r\n",strFileName,
pThis->m_strPageTitle,
strFileName.SpanExcluding("#"),strPage);
pThis->m_fileContents.Write(strText,strText.GetLength());
// Update the statistics
pThis->m_nGottenPageCount++;
pThis->m_nGottenFileCount++;
pThis->UpdateStatus();
}
else
{
// Set the new depth level if necessary
if(nMaxDepth)
{
// Have we gone to the max level yet???
if(pThis->m_nLevel >= pMapEntry->nMaxLevel)
nCount = 0;
else pMapEntry->nMaxLevel = pThis->m_nLevel;
}
}
// Log the results
pThis->m_fileLog.Write("[OK]\n",5);
// Check for offsite links, don't follow the current page's
// links if it is an offsite page
if(bIsOffsite)
nCount = 0;
// Should we get multimedia files??
if(pThis->m_Options.bMultimedia)
{
// Iterate through the list of multimedia links
CString strMedia;
for(int j = 0; j < pThis->m_aLinks[pThis->m_nLevel].arrMedia.GetSize() &&
!pThis->m_pProgress->IsAborted(); j++)
{
strMedia = pThis->m_aLinks[pThis->m_nLevel].arrMedia.GetAt(j);
// Should we get this file?
if(pThis->ShouldGetMedia(strMedia,pMapEntry))
{
// Yep, make sure it has a unique name
pThis->m_Inet.GenerateUniqueFileName(strMedia,
strFileName,pThis->m_arrMediaDone,FALSE);
pThis->m_pProgress->SetActionTitle("Getting File: "+strFileName);
// Log the info
strLogData.Format("[%02d] Getting media %s ",pThis->m_nLevel,
strMedia);
pThis->m_fileLog.Write(strLogData,strLogData.GetLength());
// We don't need to download EMAIL links so just make
// them look like a successful file entry
BOOL bMail;
if(strMedia.Left(7) == "mailto:")
{
bMail = TRUE;
strFileName = strMedia;
}
else bMail = FALSE;
// Did everything work okay??
if(bMail || pThis->GetMedia(strMedia,strFileName))
{
// Yep...add this file to our file list and to the tree
pThis->SetMediaCacheEntry(strMedia,strFileName);
pView->AddTreeMedia(strFileName.SpanExcluding("#"),
CTree::GetMediaType(strFileName));
// Increment the statistics count
if(!bMail)
pThis->m_nGottenFileCount++;
pThis->UpdateStatus();
// Log the results
pThis->m_fileLog.Write("[OK]\n",5);
}
else
{
// Log the results
pThis->m_fileLog.Write("[FAILED] ",9);
// Show a detailed error -- if possible
CString strError = pThis->m_Inet.GetErrorText();
pThis->m_fileLog.Write(strError,strError.GetLength());
pThis->m_fileLog.Write("\n",1);
}
}
}
}
}
else
{
// Log the results
pThis->m_fileLog.Write("[FAILED] ",9);
// Show a detailed error -- if possible
CString strError = pThis->m_Inet.GetErrorText();
pThis->m_fileLog.Write(strError,strError.GetLength());
pThis->m_fileLog.Write("\n",1);
nCount = 0;
}
// Make sure the statistics window is updated properly
pThis->UpdateStatus();
// If we've hit the max page count then just get out
if(pThis->m_Options.nMaxPages > 0 &&
pThis->m_nGottenPageCount >= pThis->m_Options.nMaxPages)
break;
// Continue recursion if we haven't hit maximum depth yet
// and as long as we have links on this page
if(pThis->m_nLevel < nMaxDepth && nCount > 0)
{
// Get the next page to parse
strPage = pThis->m_aLinks[pThis->m_nLevel].arrLinks.GetAt(0);
bIsOffsite = pThis->m_aLinks[pThis->m_nLevel].arrOffsite.GetAt(0);
// Move to the next level, initialize its link info
pThis->m_nLevel++;
pThis->ResetLink(pThis->m_nLevel);
// Queue the links
pThis->m_nQueuedPageCount += nCount;
continue;
}
// Finished will all links on this page, reset its link info
pThis->ResetLink(pThis->m_nLevel);
// Move back to the previous level
pThis->m_nLevel--;
// Find the next page on the second dimension
if(pThis->m_nLevel >= 0)
{
int nMaxCount;
// Find another page that has links
while(pThis->m_nLevel >= 0)
{
// How many second dimension entries do we have??
nMaxCount = pThis->m_aLinks[pThis->m_nLevel].arrLinks.GetSize();
// Did we have another valid page at this level?
if(pThis->m_aLinks[pThis->m_nLevel].nIndex < nMaxCount-1)
{
// Yes, get the next page
pThis->m_aLinks[pThis->m_nLevel].nIndex++;
pThis->m_nQueuedPageCount--;
break;
}
else
{
// No, back up a level in the tree
pThis->m_nLevel--;
pThis->m_nQueuedPageCount--;
}
}
}
}
// Make sure the "stopping, please wait" message isn't displayed
pView->EndWait();
// Make sure that the tree was correctly parsed
// nNodeCount should always be 0
pThis->m_nLevel = pThis->m_nQueuedPageCount;
// Should we fix up the links for browsing??
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -