⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 document.cpp

📁 下载整个站点,将某一站点的资源全部下载,只要输入正确合法的站址!
💻 CPP
📖 第 1 页 / 共 3 页
字号:
					linkEntry.arrMedia.Add(strMedia);
			}
		}

		// Success
		bRet = TRUE;
	}

	return(bRet);
}

// Usings CInet to retrieve the multimedia item specified by strMedia.  It saves
// this new file with the name specified in strFileName.
BOOL CSnaggerDoc::GetMedia(CString& strMedia, CString& strFileName)
{
	BYTE *pbyBuffer = m_byBuffer;
	int nLen;
	BOOL bRet = FALSE;

	// Get the specified page
	CInet::RESULTS ret;

	// Get the file from the INet
	ret	= m_Inet.GetFile(strMedia,&pbyBuffer,nLen);

	if(ret == CInet::SUCCESS)
	{
		// Add the size to the statistics count
		m_nTotalBytes += nLen;

		// Write the file 
		m_pProgress->SetActionTitle("Saving File: "+strMedia);
		m_Inet.SaveFile(strFileName,m_strDirectory,pbyBuffer,nLen);

		bRet = TRUE;
	}

	return(bRet);
}

// Returns TRUE if the current page was already retrieved from the INet.
// If it has been retrieved then it returns a pointer to the entry in the
// list of retrieved pages.
BOOL CSnaggerDoc::ShouldGetPage(CString& strPage, MAP_FILES*& pMapEntry)
{
	// Page names shouldn't be case sensitive
	CString strNewPage = strPage;
	strNewPage.MakeLower();
	strNewPage = strNewPage.SpanExcluding("#");

	// Did we find it??
	return(!m_arrPagesDone.Lookup(strNewPage,(CObject *&) pMapEntry));
}

// Returns TRUE if the current multimedia file was already retrieved from the INet.
// If it has been retrieved then it returns a pointer to the entry in the
// list of retrieved multimedia files.
BOOL CSnaggerDoc::ShouldGetMedia(CString& strMedia, MAP_FILES*& pMapEntry)
{
	// Page names shouldn't be case sensitive
	CString strNewMedia = strMedia;
	strNewMedia.MakeLower();
	strNewMedia = strNewMedia.SpanExcluding("#");

	// Page names shouldn't be case sensitive
	return(!m_arrMediaDone.Lookup(strNewMedia,(CObject *&) pMapEntry));
}

// Returns TRUE if the specified page should be added to the download queue.
// This basically means that the page is not in the list of queued pages at a 
// lower level or that it hasn't been previously downloaded at a lower level.
// If the above criteria are met -- it thens check to see if this is an offsite
// page and determines whether it should be downloaded.
BOOL CSnaggerDoc::ShouldQueuePage(CString& strNewPage, BOOL bOffsite)
{
	MAP_FILES* pMapEntry;

	// Have we downloaded this page yet???
	if(ShouldGetPage(strNewPage,pMapEntry))
	{
		// No...then look for it in the queue of waiting pages at previous
		// levels
		for(int i = 0; i < m_nLevel; i++)
		{
			for(int j = 0; j < m_aLinks[i].arrLinks.GetSize(); j++)
			{
				if(strNewPage == m_aLinks[i].arrLinks.GetAt(j))
					return(FALSE);
			}
		}
	}
	else
	{
		// Yes...did we follow its links all the way to the 
		// maximum level?
		if(m_Options.nMaxDepth && m_nLevel >= pMapEntry->nMaxLevel)
			return(TRUE);
	}

	// Make sure that we allow offsite links for offsite pages
	if(bOffsite && !m_Options.bOffsiteLinks)
		return(FALSE);

	return(TRUE);
}

// Initializes the specified link stack entry
void CSnaggerDoc::ResetLink(int nLevel)
{
	m_aLinks[nLevel].nIndex = 0;
	m_aLinks[nLevel].arrLinks.SetSize(0,100);
	m_aLinks[nLevel].arrMedia.SetSize(0,100);
	m_aLinks[nLevel].arrOffsite.SetSize(0,100);
}

// Updates the information in the statistics window (if m_pProgress contains
// a valid window class pointer)
void CSnaggerDoc::UpdateStatus()
{
	// Does the statistics window exist?
	if(m_pProgress)
	{
		// Yep...update the info in its fields
		m_pProgress->SetQueuedFiles(m_nQueuedPageCount);
		m_pProgress->SetDownloadedPages(m_nGottenPageCount);
		m_pProgress->SetDownloadedFiles(m_nGottenFileCount);
		m_pProgress->SetKBDownloaded(m_nTotalBytes);
		m_pProgress->SetLevel(m_nLevel+1);
	}
}

// The workhouse thread routine that recursively navigates linked web pages and
// retrieves each of them along with their multimedia files.  This process is
// spawned indirectrly in RecursiveDownload() using the AfxBeginThread() call.
UINT CSnaggerDoc::DownloadThread(LPVOID lpvData)
{
	HTREEITEM htreePage;

	// Static methods can't have a "this" pointer to get the parent class's 
	// pointer which the call passes as a parameter
	CSnaggerDoc *pThis = (CSnaggerDoc *) lpvData;

	int nMaxDepth = pThis->m_Options.nMaxDepth-1;
	int nCount;
	CString strPage = pThis->m_strStartPage;
	CString strFileName;
	CString strLogData;
	CString strText;
	POSITION pos = pThis->GetFirstViewPosition();
	CSnaggerView* pView = (CSnaggerView *) pThis->GetNextView(pos);	
	BOOL bIsOffsite = FALSE;

	// Establish the WinInet Session
	try
	{
		pThis->m_Inet.OpenSession(pThis->m_Options.bUseProxy,pThis->m_Options.strProxyName);
	}
	catch(...)
	{
	}

	// Create the log file
	pThis->m_fileLog.Open(pThis->m_strDirectory+"sitesnag.log",
						CFile::modeCreate|CFile::modeWrite);

	// Create the table of contents file
	if(pThis->m_Options.bContents)
	{
		pThis->m_fileContents.Open(pThis->m_strDirectory+"SnagCon1.htm",
								CFile::modeCreate|CFile::modeWrite);

		// Add the TOC to the list of downloaded files 
		pThis->SetPageCacheEntry("snagcon1.htm","SnagCon1.htm",0);

		// Add the TOC to the tree control
		CString strTitle = "Contents Page 1 (SnagCon1.htm)";
		pView->AddTreeContent(strTitle);

		// Write the beginning of the first TOC page
		strText = "<HTML>\r\n<HEAD>\r\n<TITLE>SiteSnagger Contents</TITLE>\r\n";
		strText += "</HEAD\r\n<BODY>\r\n";
		strText += "<H1><center>SiteSnagger Table of Contents</center><br><br></H1>\r\n<UL>\r\n";
		pThis->m_fileContents.Write(strText,strText.GetLength());
	}

	// Initialize the index for the first link level, start with the first level
	pThis->m_nLevel = 0;
	pThis->m_aLinks[0].nIndex = 0;
	pThis->m_Inet.ResetUniqueCount();

	// Recusively search web links until either we've searched them all (m_nLevel is
	// -1 or if the user decides to abort
	while(pThis->m_nLevel >= 0 && !pThis->m_pProgress->IsAborted())
	{
		// Get the name of a new page in a second dimension element
		if(pThis->m_aLinks[pThis->m_nLevel].nIndex > 0)
		{
			// Save the URL and whether it's offsite
			int nIndex = pThis->m_aLinks[pThis->m_nLevel].nIndex;
			strPage = pThis->m_aLinks[pThis->m_nLevel].arrLinks.GetAt(nIndex);
			bIsOffsite = pThis->m_aLinks[pThis->m_nLevel].arrOffsite.GetAt(nIndex);

			// Bump to the next level so we can get the page's links
			pThis->m_nLevel++;
		}

		// Generate a unique filename for this page
		pThis->m_Inet.GenerateUniqueFileName(strPage,strFileName,
						pThis->m_arrPagesDone,TRUE);
		pThis->m_pProgress->SetActionTitle("Getting Page: "+strPage);

		// Write a log entry for this page -- leave room for the result
		strLogData.Format("[%02d] Getting page %s ",pThis->m_nLevel+1,strPage);
		pThis->m_fileLog.Write(strLogData,strLogData.GetLength());

		CString strOrigPage = strPage;

		// Get the page from Inet or from local file
		if(pThis->GetPage(strPage,strFileName,pThis->m_aLinks[pThis->m_nLevel]))
		{
			MAP_FILES *pMapEntry;
		
			// Get the count of links
			nCount = pThis->m_aLinks[pThis->m_nLevel].arrLinks.GetSize();

			// Did we just download this new page??
			if(pThis->ShouldGetPage(strPage,pMapEntry))
			{
				// Yes, add it to the list of retrieved pages
				pThis->SetPageCacheEntry(strPage,strFileName,pThis->m_nLevel);

				// If the page was redirected then add its original name too
				if(strPage != strOrigPage && pThis->ShouldGetPage(strOrigPage,pMapEntry))
					pThis->SetPageCacheEntry(strOrigPage,strFileName,pThis->m_nLevel);

				// Prefix offsite pages with their URL (i.e. http://www.xxx.yyy)
				if(bIsOffsite)
					strText = strPage+" - ";
				else strText.Empty();

				// Add the page's title and local filename 
				strText += pThis->m_strPageTitle+"  ("+
							strFileName.SpanExcluding("#")+")";

				htreePage = pView->AddTreePage(strText,bIsOffsite);
				strText.Format("<a href=%s><li> %s (%s - %s)<br>\r\n",strFileName,
									pThis->m_strPageTitle,
									strFileName.SpanExcluding("#"),strPage);
				pThis->m_fileContents.Write(strText,strText.GetLength());

				// Update the statistics
				pThis->m_nGottenPageCount++;
				pThis->m_nGottenFileCount++;
				pThis->UpdateStatus();
			}
			else
			{
				// Set the new depth level if necessary
				if(nMaxDepth)
				{
					// Have we gone to the max level yet???
					if(pThis->m_nLevel >= pMapEntry->nMaxLevel)
						nCount = 0;
					else pMapEntry->nMaxLevel = pThis->m_nLevel;
				}
			}

			// Log the results
			pThis->m_fileLog.Write("[OK]\n",5);

			// Check for offsite links, don't follow the current page's
			// links if it is an offsite page
			if(bIsOffsite)
				nCount = 0;

			// Should we get multimedia files??
			if(pThis->m_Options.bMultimedia)
			{
				// Iterate through the list of multimedia links
				CString strMedia;
				for(int j = 0; j < pThis->m_aLinks[pThis->m_nLevel].arrMedia.GetSize() &&
							!pThis->m_pProgress->IsAborted(); j++)
				{
					strMedia = pThis->m_aLinks[pThis->m_nLevel].arrMedia.GetAt(j);

					// Should we get this file?
					if(pThis->ShouldGetMedia(strMedia,pMapEntry))
					{
						// Yep, make sure it has a unique name
						pThis->m_Inet.GenerateUniqueFileName(strMedia,
										strFileName,pThis->m_arrMediaDone,FALSE);
						pThis->m_pProgress->SetActionTitle("Getting File: "+strFileName);

						// Log the info
						strLogData.Format("[%02d] Getting media %s ",pThis->m_nLevel,
														strMedia);
						pThis->m_fileLog.Write(strLogData,strLogData.GetLength());

						// We don't need to download EMAIL links so just make
						// them look like a successful file entry
						BOOL bMail;
						if(strMedia.Left(7) == "mailto:")
						{
							bMail = TRUE;
							strFileName = strMedia;
						}
						else bMail = FALSE;

						// Did everything work okay??
						if(bMail || pThis->GetMedia(strMedia,strFileName))
						{
							// Yep...add this file to our file list and to the tree
							pThis->SetMediaCacheEntry(strMedia,strFileName);
							pView->AddTreeMedia(strFileName.SpanExcluding("#"),
											CTree::GetMediaType(strFileName));

							// Increment the statistics count
							if(!bMail)
								pThis->m_nGottenFileCount++;
							pThis->UpdateStatus();

							// Log the results
							pThis->m_fileLog.Write("[OK]\n",5);
						}
						else
						{
							// Log the results
							pThis->m_fileLog.Write("[FAILED] ",9);

							// Show a detailed error -- if possible
							CString strError = pThis->m_Inet.GetErrorText();
							pThis->m_fileLog.Write(strError,strError.GetLength());
							pThis->m_fileLog.Write("\n",1);
						}
					}
				}
			}
		}
		else
		{
			// Log the results
			pThis->m_fileLog.Write("[FAILED] ",9);

			// Show a detailed error -- if possible
			CString strError = pThis->m_Inet.GetErrorText();
			pThis->m_fileLog.Write(strError,strError.GetLength());
			pThis->m_fileLog.Write("\n",1);
			
			nCount = 0;
		}

		// Make sure the statistics window is updated properly
		pThis->UpdateStatus();

		// If we've hit the max page count then just get out
		if(pThis->m_Options.nMaxPages > 0 && 
				pThis->m_nGottenPageCount >= pThis->m_Options.nMaxPages) 
			break;

		// Continue recursion if we haven't hit maximum depth yet
		// and as long as we have links on this page
		if(pThis->m_nLevel < nMaxDepth && nCount > 0)
		{
			// Get the next page to parse
			strPage = pThis->m_aLinks[pThis->m_nLevel].arrLinks.GetAt(0);
			bIsOffsite = pThis->m_aLinks[pThis->m_nLevel].arrOffsite.GetAt(0);

			// Move to the next level, initialize its link info
			pThis->m_nLevel++;
			pThis->ResetLink(pThis->m_nLevel);

			// Queue the links
			pThis->m_nQueuedPageCount += nCount;
			continue;
		}

		// Finished will all links on this page, reset its link info
		pThis->ResetLink(pThis->m_nLevel);

		// Move back to the previous level
		pThis->m_nLevel--;

		// Find the next page on the second dimension
		if(pThis->m_nLevel >= 0)
		{
			int nMaxCount;

			// Find another page that has links
			while(pThis->m_nLevel >= 0)
			{
				// How many second dimension entries do we have??
				nMaxCount = pThis->m_aLinks[pThis->m_nLevel].arrLinks.GetSize();

				// Did we have another valid page at this level?
				if(pThis->m_aLinks[pThis->m_nLevel].nIndex < nMaxCount-1)
				{
					// Yes, get the next page
					pThis->m_aLinks[pThis->m_nLevel].nIndex++;
					pThis->m_nQueuedPageCount--;
					break;
				}
				else 
				{
					// No, back up a level in the tree
					pThis->m_nLevel--;
					pThis->m_nQueuedPageCount--;
				}
			}
		}
	}

	// Make sure the "stopping, please wait" message isn't displayed
	pView->EndWait();

	// Make sure that the tree was correctly parsed
	// nNodeCount should always be 0
	pThis->m_nLevel = pThis->m_nQueuedPageCount;

	// Should we fix up the links for browsing??

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -