⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 document.cpp

📁 下载整个站点,将某一站点的资源全部下载,只要输入正确合法的站址!
💻 CPP
📖 第 1 页 / 共 3 页
字号:

/*	
	Document.cpp : implementation of the CSnaggerDoc class
	
	Implements project file persistence for options, tree data and statistics.
	Also performs the actual retrieval of files from the host using the 
	CInet class.

	Author: Steven E. Sipe
*/

#include "stdafx.h"

#include "SiteSnag.h"
#include "Document.h"
#include "View.h"
#include "progress.h"

#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif

// File version for serialization
const long g_lFileVersion = 0x0101;

/////////////////////////////////////////////////////////////////////////////
// CSnaggerDoc

IMPLEMENT_DYNCREATE(CSnaggerDoc, CDocument)

BEGIN_MESSAGE_MAP(CSnaggerDoc, CDocument)
	//{{AFX_MSG_MAP(CSnaggerDoc)
	//}}AFX_MSG_MAP
END_MESSAGE_MAP()


/////////////////////////////////////////////////////////////////////////////
// CSnaggerDoc construction/destruction

// Constructor
CSnaggerDoc::CSnaggerDoc()
{
	// Set some default project options
	m_Options.nMaxDepth = 2;
	m_Options.nMaxPages = 0;
	m_Options.bFixupLinks = TRUE;
	m_Options.bContents = TRUE;
	m_Options.bMultimedia = TRUE;
	m_Options.bOffsiteLinks = FALSE;


	// Set the initial hash table sizes
	m_arrPagesDone.InitHashTable(1200);
	m_arrMediaDone.InitHashTable(2400);

	// Initialize some flags
	m_bProjectLoaded = FALSE;
	m_pProgress = NULL;
	m_bAutoMode = FALSE;

	m_nLevel = 0;
}

// Destructor
CSnaggerDoc::~CSnaggerDoc()
{
	try
	{
		// Remove the page and media maps
		ClearCacheMaps();
	}
	catch(...)
	{
	}
}

// Handles creation of a new document -- this routine is called by the framework
// the first time to create the empty project, in this case no additional work
// is done.
BOOL CSnaggerDoc::OnNewDocument()
{

	static bFirstTime = TRUE;

	// Is is this the empty project file?
	if(bFirstTime)
	{
		bFirstTime = FALSE;

		// Yes, set the title to "(No project)"
		CString strDefName;
		strDefName.LoadString(IDS_NO_PROJECT);
		SetTitle(strDefName);

		// Call the base class and get out...
		if (!CDocument::OnNewDocument())
			return FALSE;

		return(TRUE);
	}

	// Call the base class
	if (!CDocument::OnNewDocument())
		return FALSE;

	// Clear the statisitics and indicate that we now have a 
	// project loaded
	m_bProjectLoaded = TRUE;
	m_strStartPage.Empty();
	m_nGottenPageCount = 0;
	m_nGottenFileCount = 0;
	m_nQueuedPageCount = 0;
	m_nTotalBytes = 0;

	// Make sure that the info in the statistics window is reset
	POSITION pos = GetFirstViewPosition();
	CSnaggerView* pView = (CSnaggerView *) GetNextView(pos);
	m_pProgress = pView->GetProgress();
	m_pProgress->SetActionTitle("");

	return TRUE;
}

// Called to handle opening an existing document
BOOL CSnaggerDoc::OnOpenDocument(LPCTSTR lpszPathName) 
{
	POSITION pos = GetFirstViewPosition();
	CSnaggerView* pView = (CSnaggerView *) GetNextView(pos);

	// Save the current project (if necessary)
	SaveModified();

	// Initialize the tree
	pView->ClearTree();

	// Call the base class to read the document's contents from disk
	if (!CDocument::OnOpenDocument(lpszPathName))
		return FALSE;

	// Set the project location
	SetPathName(lpszPathName);
	m_strDirectory = CInet::SplitFileName(lpszPathName,
				CInet::DRIVE|CInet::PATH|CInet::FNAME)+"\\";

	// Set the document's title
	SetTitle(CInet::SplitFileName(lpszPathName,CInet::FNAME|CInet::EXT));


	// Indicate that the project is loaded
	m_bProjectLoaded = TRUE;

	// Update the project's information in the statistics window
	if(m_pProgress)
	{
		m_pProgress = pView->GetProgress();
		m_pProgress->SetActionTitle("");
		UpdateStatus();
	}

	// Make sure this document gets saved at the end
	SetModifiedFlag(TRUE);

	return TRUE;
}

// Saves files that have been change (DoFileSave() calls the document's 
// ::Serialize() method
BOOL CSnaggerDoc::SaveModified() 
{
	// Was the document changed??
	if(IsModified())
		return CDocument::DoFileSave();
	return(TRUE);
}

// Prevents the user from exiting the application if a snagging operation is
// in progress
BOOL CSnaggerDoc::CanCloseFrame(CFrameWnd* pFrame) 
{
	POSITION pos = GetFirstViewPosition();
	CSnaggerView* pView = (CSnaggerView *) GetNextView(pos);

	return(!pView->GetSnagging());
}

// Reloads the empty project (after a delete)
void CSnaggerDoc::Reset(LPCTSTR lpszProjName)
{
	CString strNewProjName;
		
	if(lpszProjName)
		strNewProjName = lpszProjName;

	// Use the default name -- (No Project)
	strNewProjName.LoadString(IDS_NO_PROJECT);
	m_strPathName.Empty();
	m_strDirectory.Empty();
	m_bProjectLoaded = FALSE;
	SetModifiedFlag(FALSE);
	SetTitle(strNewProjName);

	// Reset the statistics window's information
	m_strStartPage.Empty();
	m_nGottenPageCount = 0;
	m_nGottenFileCount = 0;
	m_nQueuedPageCount = 0;
	m_nTotalBytes = 0;

	// Update the statistics window
	POSITION pos = GetFirstViewPosition();
	CSnaggerView* pView = (CSnaggerView *) GetNextView(pos);
	m_pProgress = pView->GetProgress();
	m_pProgress->SetActionTitle("");

	UpdateStatus();
}

// Handles closing the document -- i.e. the user pressed the X button or chose the
// close menu item
void CSnaggerDoc::OnCloseDocument() 
{
	// Make sure that we save the current document
	SaveModified();

	CDocument::OnCloseDocument();
}

// Get the project's options 
void CSnaggerDoc::GetOptions(CSnagOptions& Options)
{
	Options.nMaxDepth = m_Options.nMaxDepth;
	Options.nMaxPages = m_Options.nMaxPages;
	Options.bFixupLinks = m_Options.bFixupLinks;
	Options.bContents = m_Options.bContents;
	Options.bMultimedia = m_Options.bMultimedia;
	Options.bOffsiteLinks = m_Options.bOffsiteLinks;
}

// Set the project's options -- this is called during the command line
// mode of operation
void CSnaggerDoc::SetOptions(CSnagOptions& Options) 
{ 
	m_Options.nMaxDepth = Options.nMaxDepth;
	m_Options.nMaxPages = Options.nMaxPages;
	m_Options.bFixupLinks = Options.bFixupLinks;
	m_Options.bContents = Options.bContents;
	m_Options.bMultimedia = Options.bMultimedia;
	m_Options.bOffsiteLinks = Options.bOffsiteLinks;
}

/////////////////////////////////////////////////////////////////////////////
// CSnaggerDoc serialization

// Handles document persistence  -- called by the framework during OnOpenDocument()
// and during OnSaveDocument()
void CSnaggerDoc::Serialize(CArchive& ar)
{
	long lFileVersion;
	POSITION pos = GetFirstViewPosition();
	CSnaggerView* pView = (CSnaggerView *) GetNextView(pos);	

	// Are we writing to disk??
	if (ar.IsStoring())
	{
		// Write the SiteSnagger version information 
		ar << g_lFileVersion;
		lFileVersion = g_lFileVersion;

		// Write the information in the statistics window
		ar << m_nGottenPageCount;
		ar << m_nGottenFileCount;
		ar << m_nTotalBytes;
	}
	else
	{
		// Get the SiteSnagger version -- (right now we don't use this, 
		// since we only have one version of the utility)
		ar >> lFileVersion;

		// Read the information in the statistics window
		ar >> m_nGottenPageCount;
		ar >> m_nGottenFileCount;
		ar >> m_nTotalBytes;

		// Reset the queued page count (used in the statistics window)
		m_nQueuedPageCount = 0;
	}

	// Get/Save the project's options
	m_Options.SetVersion(lFileVersion);
	m_Options.Serialize(ar);

	// Get/Save the tree's contents
	pView->SerializeTree(ar);

	m_nLevel = 0;
}

// Retrieves the specified page using CInet or retrieves it from disk.  
// If the page is gotten using CInet it saves the new page to the 
// name specified in strFileName.  It also parses the page and returns its list 
// of links and multimedia (in linkEntry).
BOOL CSnaggerDoc::GetPage(CString& strPage, CString& strFileName, LINKS& linkEntry)
{
	BYTE *pbyBuffer = m_byBuffer;
	int nLen;
	BOOL bPageInCache = FALSE;
	BOOL bRet = FALSE;
	CInet::RESULTS ret;
	MAP_FILES* pMapEntry;

	// Initialize the link stack entry
	linkEntry.arrLinks.SetSize(0,100);
	linkEntry.arrMedia.SetSize(0,100);
	linkEntry.arrOffsite.SetSize(0,100);
	linkEntry.nIndex = 0;

	// Should we request the page from the host or use a local cached copy?
	if(ShouldGetPage(strPage,pMapEntry))
	{
		// Ask the web server to transmit the page
		ret = m_Inet.GetPage(strPage,&pbyBuffer,nLen,TRUE);
		if(ret == CInet::SUCCESS)
		{
			// Add the total bytes to the statistics window count
			bRet = TRUE;
			m_nTotalBytes += nLen;
		}
	}
	else
	{
		// Using a local cached copy...open the file and read it.  This is
		// done because we still may need the links for a previously saved
		// page.
		CFile fileIn;
		CFileException ex;

		// Make sure we get the full location of the file
		strFileName = pMapEntry->strFileName;
		CString strTempFileName = m_strDirectory+strFileName;

		// Open the local file for create 
		if(fileIn.Open(strTempFileName,CFile::modeRead,&ex))
		{
			// Read the data
			nLen = fileIn.Read(pbyBuffer,MAX_INET_BUFFER);
			fileIn.Close();
			bRet = TRUE;
		}

		// Indicate that we didn't download a new page from the INet
		bPageInCache = TRUE;
	}

	// Did everything work okay?
	if(bRet)
	{
		// Yes...get the links and media info for this page
		CSnaggerHtmlParser Parser;
		Parser.SetPageURL(strPage);
		m_pProgress->SetActionTitle("Parsing Page: "+strPage);

		// Just for safety
		if(nLen > MAX_INET_BUFFER)
			nLen = MAX_INET_BUFFER;

		// Initialize and call the parser
		pbyBuffer = m_byBuffer;
		Parser.SetFixupMode(FALSE);
		Parser.ResetArrays();
		Parser.SetGetMedia(m_Options.bMultimedia);
		Parser.ParseText((char *)pbyBuffer,nLen);
		m_strPageTitle = Parser.GetTitle();

		// Save the page to a local file (if it doesn't already exist)
		if(!bPageInCache)
		{
			m_pProgress->SetActionTitle("Saving Page: "+strPage);
			pbyBuffer = m_byBuffer;
			m_Inet.SaveFile(strFileName,m_strDirectory,pbyBuffer,nLen);
		}

		// Determine the number of linked pages that we have
		int nLinks;
		BOOL bOffsite;
		CString strNewPage;
		nLinks = Parser.GetLinks().GetSize();

		// Evaluate each of the links for this page to determine if we need to 
		// add them to the download queue
		for(int i = 0; i < nLinks; i++)
		{
			// Get the page's URL
			strNewPage = Parser.GetLinks().GetAt(i);

			// Get the offsite link flag for this page
			bOffsite = Parser.GetOffsiteFlags().GetAt(i);

			// See if we should at it to the download queue
			if(ShouldQueuePage(strNewPage,bOffsite)) 
			{
				linkEntry.arrLinks.Add(strNewPage);
				linkEntry.arrOffsite.Add(bOffsite);
			}
		}

		// Don't need the images if we've already parsed this page
		// before
		if(!bPageInCache)
		{
			// New page, so get the all of the media information
			int nMedia = Parser.GetMedia().GetSize();
			CString strMedia;
			for(i = 0; i < nMedia; i++)
			{
				strMedia = Parser.GetMedia().GetAt(i);
				if(ShouldGetMedia(strMedia,pMapEntry))

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -