📄 document.cpp
字号:
/*
Document.cpp : implementation of the CSnaggerDoc class
Implements project file persistence for options, tree data and statistics.
Also performs the actual retrieval of files from the host using the
CInet class.
Author: Steven E. Sipe
*/
#include "stdafx.h"
#include "SiteSnag.h"
#include "Document.h"
#include "View.h"
#include "progress.h"
#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif
// File version for serialization
const long g_lFileVersion = 0x0101;
/////////////////////////////////////////////////////////////////////////////
// CSnaggerDoc
IMPLEMENT_DYNCREATE(CSnaggerDoc, CDocument)
BEGIN_MESSAGE_MAP(CSnaggerDoc, CDocument)
//{{AFX_MSG_MAP(CSnaggerDoc)
//}}AFX_MSG_MAP
END_MESSAGE_MAP()
/////////////////////////////////////////////////////////////////////////////
// CSnaggerDoc construction/destruction
// Constructor
CSnaggerDoc::CSnaggerDoc()
{
// Set some default project options
m_Options.nMaxDepth = 2;
m_Options.nMaxPages = 0;
m_Options.bFixupLinks = TRUE;
m_Options.bContents = TRUE;
m_Options.bMultimedia = TRUE;
m_Options.bOffsiteLinks = FALSE;
// Set the initial hash table sizes
m_arrPagesDone.InitHashTable(1200);
m_arrMediaDone.InitHashTable(2400);
// Initialize some flags
m_bProjectLoaded = FALSE;
m_pProgress = NULL;
m_bAutoMode = FALSE;
m_nLevel = 0;
}
// Destructor
CSnaggerDoc::~CSnaggerDoc()
{
try
{
// Remove the page and media maps
ClearCacheMaps();
}
catch(...)
{
}
}
//建立一个新文档
BOOL CSnaggerDoc::OnNewDocument()
{
static bFirstTime = TRUE;
// Is is this the empty project file?
if(bFirstTime)
{
bFirstTime = FALSE;
// Yes, set the title to "(No project)"
CString strDefName;
strDefName.LoadString(IDS_NO_PROJECT);
SetTitle(strDefName);
// Call the base class and get out...
if (!CDocument::OnNewDocument())
return FALSE;
return(TRUE);
}
// Call the base class
if (!CDocument::OnNewDocument())
return FALSE;
// Clear the statisitics and indicate that we now have a
// project loaded
m_bProjectLoaded = TRUE;
m_strStartPage.Empty();
m_nGottenPageCount = 0;
m_nGottenFileCount = 0;
m_nQueuedPageCount = 0;
m_nTotalBytes = 0;
// Make sure that the info in the statistics window is reset
POSITION pos = GetFirstViewPosition();
CSnaggerView* pView = (CSnaggerView *) GetNextView(pos);
m_pProgress = pView->GetProgress();
m_pProgress->SetActionTitle("");
return TRUE;
}
// 打开一个已经保存的文档
BOOL CSnaggerDoc::OnOpenDocument(LPCTSTR lpszPathName)
{
POSITION pos = GetFirstViewPosition();
CSnaggerView* pView = (CSnaggerView *) GetNextView(pos);
// Save the current project (if necessary)
SaveModified();
// Initialize the tree
pView->ClearTree();
// Call the base class to read the document's contents from disk
if (!CDocument::OnOpenDocument(lpszPathName))
return FALSE;
// Set the project location
SetPathName(lpszPathName);
m_strDirectory = CInet::SplitFileName(lpszPathName,
CInet::DRIVE|CInet::PATH|CInet::FNAME)+"\\";
// Set the document's title
SetTitle(CInet::SplitFileName(lpszPathName,CInet::FNAME|CInet::EXT));
// Indicate that the project is loaded
m_bProjectLoaded = TRUE;
// Update the project's information in the statistics window
if(m_pProgress)
{
m_pProgress = pView->GetProgress();
m_pProgress->SetActionTitle("");
UpdateStatus();
}
// Make sure this document gets saved at the end
SetModifiedFlag(TRUE);
return TRUE;
}
// Saves files that have been change (DoFileSave() calls the document's
// ::Serialize() method
BOOL CSnaggerDoc::SaveModified()
{
// Was the document changed??
if(IsModified())
return CDocument::DoFileSave();
return(TRUE);
}
// Prevents the user from exiting the application if a snagging operation is
// in progress
BOOL CSnaggerDoc::CanCloseFrame(CFrameWnd* pFrame)
{
POSITION pos = GetFirstViewPosition();
CSnaggerView* pView = (CSnaggerView *) GetNextView(pos);
return(!pView->GetSnagging());
}
// 载入一个空的工程
void CSnaggerDoc::Reset(LPCTSTR lpszProjName)
{
CString strNewProjName;
if(lpszProjName)
strNewProjName = lpszProjName;
// Use the default name -- (No Project)
strNewProjName.LoadString(IDS_NO_PROJECT);
m_strPathName.Empty();
m_strDirectory.Empty();
m_bProjectLoaded = FALSE;
SetModifiedFlag(FALSE);
SetTitle(strNewProjName);
// Reset the statistics window's information
m_strStartPage.Empty();
m_nGottenPageCount = 0;
m_nGottenFileCount = 0;
m_nQueuedPageCount = 0;
m_nTotalBytes = 0;
// Update the statistics window
POSITION pos = GetFirstViewPosition();
CSnaggerView* pView = (CSnaggerView *) GetNextView(pos);
m_pProgress = pView->GetProgress();
m_pProgress->SetActionTitle("");
UpdateStatus();
}
// Handles closing the document -- i.e. the user pressed the X button or chose the
// close menu item
void CSnaggerDoc::OnCloseDocument()
{
// Make sure that we save the current document
SaveModified();
CDocument::OnCloseDocument();
}
// 获得工程选项
void CSnaggerDoc::GetOptions(CSnagOptions& Options)
{
Options.nMaxDepth = m_Options.nMaxDepth;
Options.nMaxPages = m_Options.nMaxPages;
Options.bFixupLinks = m_Options.bFixupLinks;
Options.bContents = m_Options.bContents;
Options.bMultimedia = m_Options.bMultimedia;
Options.bOffsiteLinks = m_Options.bOffsiteLinks;
}
//设置工程选项,当通过命令行来执行的时候调用
void CSnaggerDoc::SetOptions(CSnagOptions& Options)
{
m_Options.nMaxDepth = Options.nMaxDepth;
m_Options.nMaxPages = Options.nMaxPages;
m_Options.bFixupLinks = Options.bFixupLinks;
m_Options.bContents = Options.bContents;
m_Options.bMultimedia = Options.bMultimedia;
m_Options.bOffsiteLinks = Options.bOffsiteLinks;
}
//文件序列化
void CSnaggerDoc::Serialize(CArchive& ar)
{
long lFileVersion;
POSITION pos = GetFirstViewPosition();
CSnaggerView* pView = (CSnaggerView *) GetNextView(pos);
// 是否要写入硬盘
if (ar.IsStoring())
{
// 写入版本信息
ar << g_lFileVersion;
lFileVersion = g_lFileVersion;
// 写入参数窗口的信息
ar << m_nGottenPageCount;
ar << m_nGottenFileCount;
ar << m_nTotalBytes;
}
else
{
// 获得信息
ar >> lFileVersion;
// 读入参数信息
ar >> m_nGottenPageCount;
ar >> m_nGottenFileCount;
ar >> m_nTotalBytes;
// Reset the queued page count (used in the statistics window)
m_nQueuedPageCount = 0;
}
// 获得或者保存工程信息
m_Options.SetVersion(lFileVersion);
m_Options.Serialize(ar);
// 获得或者保存树形控件的内容
pView->SerializeTree(ar);
m_nLevel = 0;
}
//下载指定的页面,通过CInet或者从硬盘上现在
//如果通过CInet下载,则保存程文件名strFileName,同时分析该文件中的链接
BOOL CSnaggerDoc::GetPage(CString& strPage, CString& strFileName, LINKS& linkEntry)
{
BYTE *pbyBuffer = m_byBuffer;
int nLen;
BOOL bPageInCache = FALSE;
BOOL bRet = FALSE;
CInet::RESULTS ret;
MAP_FILES* pMapEntry;
// 初始化链接堆栈入口
linkEntry.arrLinks.SetSize(0,100);
linkEntry.arrMedia.SetSize(0,100);
linkEntry.arrOffsite.SetSize(0,100);
linkEntry.nIndex = 0;
// 判断是否需要下载该页
if(ShouldGetPage(strPage,pMapEntry))
{
// 向服务器提出申请,要下载该文件
ret = m_Inet.GetPage(strPage,&pbyBuffer,nLen,TRUE);
if(ret == CInet::SUCCESS)
{
// 将总下载字节数增加
bRet = TRUE;
m_nTotalBytes += nLen;
}
}
else
{
// 如果已经下载过了,则复制本地文件
// 因为需要链接指向原来下载过的文件
CFile fileIn;
CFileException ex;
// 获得完整的文件路径
strFileName = pMapEntry->strFileName;
CString strTempFileName = m_strDirectory+strFileName;
// 打开该文件
if(fileIn.Open(strTempFileName,CFile::modeRead,&ex))
{
// 读入数据
nLen = fileIn.Read(pbyBuffer,MAX_INET_BUFFER);
fileIn.Close();
bRet = TRUE;
}
// 指示并没有重新下载该文件
bPageInCache = TRUE;
}
// 是否完毕
if(bRet)
{
// 获得链接以及多媒体文件信息
CSnaggerHtmlParser Parser;
Parser.SetPageURL(strPage);
m_pProgress->SetActionTitle("Parsing Page: "+strPage);
// 判断是否超出最大缓冲区
if(nLen > MAX_INET_BUFFER)
nLen = MAX_INET_BUFFER;
// 初始化并调用分析子程序parser
pbyBuffer = m_byBuffer;
Parser.SetFixupMode(FALSE);
Parser.ResetArrays();
Parser.SetGetMedia(m_Options.bMultimedia);
Parser.ParseText((char *)pbyBuffer,nLen);
m_strPageTitle = Parser.GetTitle();
// 保存该文件
if(!bPageInCache)
{
m_pProgress->SetActionTitle("Saving Page: "+strPage);
pbyBuffer = m_byBuffer;
m_Inet.SaveFile(strFileName,m_strDirectory,pbyBuffer,nLen);
}
// 分析被链接的页面的数量
int nLinks;
BOOL bOffsite;
CString strNewPage;
nLinks = Parser.GetLinks().GetSize();
// 分析每一个链接,是否要添加到下载等待列表中
for(int i = 0; i < nLinks; i++)
{
// 获得网页的URL地址
strNewPage = Parser.GetLinks().GetAt(i);
// Get the offsite link flag for this page
bOffsite = Parser.GetOffsiteFlags().GetAt(i);
// See if we should at it to the download queue
if(ShouldQueuePage(strNewPage,bOffsite))
{
linkEntry.arrLinks.Add(strNewPage);
linkEntry.arrOffsite.Add(bOffsite);
}
}
// Don't need the images if we've already parsed this page
// before
if(!bPageInCache)
{
// New page, so get the all of the media information
int nMedia = Parser.GetMedia().GetSize();
CString strMedia;
for(i = 0; i < nMedia; i++)
{
strMedia = Parser.GetMedia().GetAt(i);
if(ShouldGetMedia(strMedia,pMapEntry))
linkEntry.arrMedia.Add(strMedia);
}
}
// Success
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -