📄 document.cpp
字号:
/*
Document.cpp : implementation of the CSnaggerDoc class
Implements project file persistence for options, tree data and statistics.
Also performs the actual retrieval of files from the host using the
CInet class.
Author: Steven E. Sipe
*/
#include "stdafx.h"
#include "SiteSnag.h"
#include "Document.h"
#include "View.h"
#include "progress.h"
#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif
// File version for serialization
const long g_lFileVersion = 0x0101;
/////////////////////////////////////////////////////////////////////////////
// CSnaggerDoc
IMPLEMENT_DYNCREATE(CSnaggerDoc, CDocument)
BEGIN_MESSAGE_MAP(CSnaggerDoc, CDocument)
//{{AFX_MSG_MAP(CSnaggerDoc)
//}}AFX_MSG_MAP
END_MESSAGE_MAP()
/////////////////////////////////////////////////////////////////////////////
// CSnaggerDoc construction/destruction
// Constructor
CSnaggerDoc::CSnaggerDoc()
{
// Set some default project options
m_Options.nMaxDepth = 2;
m_Options.nMaxPages = 0;
m_Options.bFixupLinks = TRUE;
m_Options.bContents = TRUE;
m_Options.bMultimedia = TRUE;
m_Options.bOffsiteLinks = FALSE;
// Set the initial hash table sizes
m_arrPagesDone.InitHashTable(1200);
m_arrMediaDone.InitHashTable(2400);
// Initialize some flags
m_bProjectLoaded = FALSE;
m_pProgress = NULL;
m_bAutoMode = FALSE;
m_nLevel = 0;
}
// Destructor
CSnaggerDoc::~CSnaggerDoc()
{
try
{
// Remove the page and media maps
ClearCacheMaps();
}
catch(...)
{
}
}
// Handles creation of a new document -- this routine is called by the framework
// the first time to create the empty project, in this case no additional work
// is done.
BOOL CSnaggerDoc::OnNewDocument()
{
static bFirstTime = TRUE;
// Is is this the empty project file?
if(bFirstTime)
{
bFirstTime = FALSE;
// Yes, set the title to "(No project)"
CString strDefName;
strDefName.LoadString(IDS_NO_PROJECT);
SetTitle(strDefName);
// Call the base class and get out...
if (!CDocument::OnNewDocument())
return FALSE;
return(TRUE);
}
// Call the base class
if (!CDocument::OnNewDocument())
return FALSE;
// Clear the statisitics and indicate that we now have a
// project loaded
m_bProjectLoaded = TRUE;
m_strStartPage.Empty();
m_nGottenPageCount = 0;
m_nGottenFileCount = 0;
m_nQueuedPageCount = 0;
m_nTotalBytes = 0;
// Make sure that the info in the statistics window is reset
POSITION pos = GetFirstViewPosition();
CSnaggerView* pView = (CSnaggerView *) GetNextView(pos);
m_pProgress = pView->GetProgress();
m_pProgress->SetActionTitle("");
return TRUE;
}
// Called to handle opening an existing document
BOOL CSnaggerDoc::OnOpenDocument(LPCTSTR lpszPathName)
{
POSITION pos = GetFirstViewPosition();
CSnaggerView* pView = (CSnaggerView *) GetNextView(pos);
// Save the current project (if necessary)
SaveModified();
// Initialize the tree
pView->ClearTree();
// Call the base class to read the document's contents from disk
if (!CDocument::OnOpenDocument(lpszPathName))
return FALSE;
// Set the project location
SetPathName(lpszPathName);
m_strDirectory = CInet::SplitFileName(lpszPathName,
CInet::DRIVE|CInet::PATH|CInet::FNAME)+"\\";
// Set the document's title
SetTitle(CInet::SplitFileName(lpszPathName,CInet::FNAME|CInet::EXT));
// Indicate that the project is loaded
m_bProjectLoaded = TRUE;
// Update the project's information in the statistics window
if(m_pProgress)
{
m_pProgress = pView->GetProgress();
m_pProgress->SetActionTitle("");
UpdateStatus();
}
// Make sure this document gets saved at the end
SetModifiedFlag(TRUE);
return TRUE;
}
// Saves files that have been change (DoFileSave() calls the document's
// ::Serialize() method
BOOL CSnaggerDoc::SaveModified()
{
// Was the document changed??
if(IsModified())
return CDocument::DoFileSave();
return(TRUE);
}
// Prevents the user from exiting the application if a snagging operation is
// in progress
BOOL CSnaggerDoc::CanCloseFrame(CFrameWnd* pFrame)
{
POSITION pos = GetFirstViewPosition();
CSnaggerView* pView = (CSnaggerView *) GetNextView(pos);
return(!pView->GetSnagging());
}
// Reloads the empty project (after a delete)
void CSnaggerDoc::Reset(LPCTSTR lpszProjName)
{
CString strNewProjName;
if(lpszProjName)
strNewProjName = lpszProjName;
// Use the default name -- (No Project)
strNewProjName.LoadString(IDS_NO_PROJECT);
m_strPathName.Empty();
m_strDirectory.Empty();
m_bProjectLoaded = FALSE;
SetModifiedFlag(FALSE);
SetTitle(strNewProjName);
// Reset the statistics window's information
m_strStartPage.Empty();
m_nGottenPageCount = 0;
m_nGottenFileCount = 0;
m_nQueuedPageCount = 0;
m_nTotalBytes = 0;
// Update the statistics window
POSITION pos = GetFirstViewPosition();
CSnaggerView* pView = (CSnaggerView *) GetNextView(pos);
m_pProgress = pView->GetProgress();
m_pProgress->SetActionTitle("");
UpdateStatus();
}
// Handles closing the document -- i.e. the user pressed the X button or chose the
// close menu item
void CSnaggerDoc::OnCloseDocument()
{
// Make sure that we save the current document
SaveModified();
CDocument::OnCloseDocument();
}
// Get the project's options
void CSnaggerDoc::GetOptions(CSnagOptions& Options)
{
Options.nMaxDepth = m_Options.nMaxDepth;
Options.nMaxPages = m_Options.nMaxPages;
Options.bFixupLinks = m_Options.bFixupLinks;
Options.bContents = m_Options.bContents;
Options.bMultimedia = m_Options.bMultimedia;
Options.bOffsiteLinks = m_Options.bOffsiteLinks;
}
// Set the project's options -- this is called during the command line
// mode of operation
void CSnaggerDoc::SetOptions(CSnagOptions& Options)
{
m_Options.nMaxDepth = Options.nMaxDepth;
m_Options.nMaxPages = Options.nMaxPages;
m_Options.bFixupLinks = Options.bFixupLinks;
m_Options.bContents = Options.bContents;
m_Options.bMultimedia = Options.bMultimedia;
m_Options.bOffsiteLinks = Options.bOffsiteLinks;
}
/////////////////////////////////////////////////////////////////////////////
// CSnaggerDoc serialization
// Handles document persistence -- called by the framework during OnOpenDocument()
// and during OnSaveDocument()
void CSnaggerDoc::Serialize(CArchive& ar)
{
long lFileVersion;
POSITION pos = GetFirstViewPosition();
CSnaggerView* pView = (CSnaggerView *) GetNextView(pos);
// Are we writing to disk??
if (ar.IsStoring())
{
// Write the SiteSnagger version information
ar << g_lFileVersion;
lFileVersion = g_lFileVersion;
// Write the information in the statistics window
ar << m_nGottenPageCount;
ar << m_nGottenFileCount;
ar << m_nTotalBytes;
}
else
{
// Get the SiteSnagger version -- (right now we don't use this,
// since we only have one version of the utility)
ar >> lFileVersion;
// Read the information in the statistics window
ar >> m_nGottenPageCount;
ar >> m_nGottenFileCount;
ar >> m_nTotalBytes;
// Reset the queued page count (used in the statistics window)
m_nQueuedPageCount = 0;
}
// Get/Save the project's options
m_Options.SetVersion(lFileVersion);
m_Options.Serialize(ar);
// Get/Save the tree's contents
pView->SerializeTree(ar);
m_nLevel = 0;
}
// Retrieves the specified page using CInet or retrieves it from disk.
// If the page is gotten using CInet it saves the new page to the
// name specified in strFileName. It also parses the page and returns its list
// of links and multimedia (in linkEntry).
BOOL CSnaggerDoc::GetPage(CString& strPage, CString& strFileName, LINKS& linkEntry)
{
BYTE *pbyBuffer = m_byBuffer;
int nLen;
BOOL bPageInCache = FALSE;
BOOL bRet = FALSE;
CInet::RESULTS ret;
MAP_FILES* pMapEntry;
// Initialize the link stack entry
linkEntry.arrLinks.SetSize(0,100);
linkEntry.arrMedia.SetSize(0,100);
linkEntry.arrOffsite.SetSize(0,100);
linkEntry.nIndex = 0;
// Should we request the page from the host or use a local cached copy?
if(ShouldGetPage(strPage,pMapEntry))
{
// Ask the web server to transmit the page
ret = m_Inet.GetPage(strPage,&pbyBuffer,nLen,TRUE);
if(ret == CInet::SUCCESS)
{
// Add the total bytes to the statistics window count
bRet = TRUE;
m_nTotalBytes += nLen;
}
}
else
{
// Using a local cached copy...open the file and read it. This is
// done because we still may need the links for a previously saved
// page.
CFile fileIn;
CFileException ex;
// Make sure we get the full location of the file
strFileName = pMapEntry->strFileName;
CString strTempFileName = m_strDirectory+strFileName;
// Open the local file for create
if(fileIn.Open(strTempFileName,CFile::modeRead,&ex))
{
// Read the data
nLen = fileIn.Read(pbyBuffer,MAX_INET_BUFFER);
fileIn.Close();
bRet = TRUE;
}
// Indicate that we didn't download a new page from the INet
bPageInCache = TRUE;
}
// Did everything work okay?
if(bRet)
{
// Yes...get the links and media info for this page
CSnaggerHtmlParser Parser;
Parser.SetPageURL(strPage);
m_pProgress->SetActionTitle("Parsing Page: "+strPage);
// Just for safety
if(nLen > MAX_INET_BUFFER)
nLen = MAX_INET_BUFFER;
// Initialize and call the parser
pbyBuffer = m_byBuffer;
Parser.SetFixupMode(FALSE);
Parser.ResetArrays();
Parser.SetGetMedia(m_Options.bMultimedia);
Parser.ParseText((char *)pbyBuffer,nLen);
m_strPageTitle = Parser.GetTitle();
// Save the page to a local file (if it doesn't already exist)
if(!bPageInCache)
{
m_pProgress->SetActionTitle("Saving Page: "+strPage);
pbyBuffer = m_byBuffer;
m_Inet.SaveFile(strFileName,m_strDirectory,pbyBuffer,nLen);
}
// Determine the number of linked pages that we have
int nLinks;
BOOL bOffsite;
CString strNewPage;
nLinks = Parser.GetLinks().GetSize();
// Evaluate each of the links for this page to determine if we need to
// add them to the download queue
for(int i = 0; i < nLinks; i++)
{
// Get the page's URL
strNewPage = Parser.GetLinks().GetAt(i);
// Get the offsite link flag for this page
bOffsite = Parser.GetOffsiteFlags().GetAt(i);
// See if we should at it to the download queue
if(ShouldQueuePage(strNewPage,bOffsite))
{
linkEntry.arrLinks.Add(strNewPage);
linkEntry.arrOffsite.Add(bOffsite);
}
}
// Don't need the images if we've already parsed this page
// before
if(!bPageInCache)
{
// New page, so get the all of the media information
int nMedia = Parser.GetMedia().GetSize();
CString strMedia;
for(i = 0; i < nMedia; i++)
{
strMedia = Parser.GetMedia().GetAt(i);
if(ShouldGetMedia(strMedia,pMapEntry))
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -