📄 mydocument.cpp
字号:
#include "stdafx.h"
#include "SiteDownload.h"
#include "MyDocument.h"
#include "MyView.h"
#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif
/////////////////////////////////////////////////////////////////////////////
// CMyDoc
IMPLEMENT_DYNCREATE(CMyDoc, CDocument)
BEGIN_MESSAGE_MAP(CMyDoc, CDocument)
//{{AFX_MSG_MAP(CMyDoc)
//}}AFX_MSG_MAP
END_MESSAGE_MAP()
/////////////////////////////////////////////////////////////////////////////
// CMyDoc construction/destruction
CMyDoc::CMyDoc()
{
m_Options.nMaxDepth = 3;
m_Options.nMaxPages = 0;
m_Options.bFixupLinks = TRUE;
m_Options.bContents = TRUE;
m_Options.bMultimedia = TRUE;
m_Options.bOffsiteLinks = TRUE;
// 设置哈希表尺寸
m_arrPagesDone.InitHashTable(1200);
m_arrMediaDone.InitHashTable(2400);
m_bProjectLoaded = FALSE;//空项目
m_bAutoMode = FALSE;
m_nLevel = 0;//处在第零层
}
CMyDoc::~CMyDoc()
{
try
{
ClearCacheMaps();
}
catch(...)
{
}
}
//处理打开的新文档,这个函数只在程序刚执行的时候调用一次
BOOL CMyDoc::OnNewDocument()
{
static bFirstTime = TRUE;
if(bFirstTime)
{
bFirstTime = FALSE;
CString strDefName;
strDefName.LoadString(IDS_NO_PROJECT);
SetTitle(strDefName);
if (!CDocument::OnNewDocument())
return FALSE;
return TRUE;
}
if (!CDocument::OnNewDocument())
return FALSE;
m_bProjectLoaded = TRUE;
m_strStartPage.Empty();
m_nGottenPageCount = 0;
m_nGottenFileCount = 0;
m_nQueuedPageCount = 0;
m_nTotalBytes = 0;
POSITION pos = GetFirstViewPosition();
CMyView* pView = (CMyView *) GetNextView(pos);
return TRUE;
}
//打开一个文件
BOOL CMyDoc::OnOpenDocument(LPCTSTR lpszPathName)
{
POSITION pos = GetFirstViewPosition();
CMyView* pView = (CMyView *) GetNextView(pos);
// 先要保存当前的项目
SaveModified();
//清空树形控件显示的内容
pView->ClearTree();
if (!CDocument::OnOpenDocument(lpszPathName))
return FALSE;
SetPathName(lpszPathName);
m_strDirectory = CInternetDownload::SplitFileName(lpszPathName,
CInternetDownload::DRIVE|CInternetDownload::PATH|CInternetDownload::FNAME)+"\\";
SetTitle(CInternetDownload::SplitFileName(lpszPathName,CInternetDownload::FNAME|CInternetDownload::EXT));
m_bProjectLoaded = TRUE;
SetModifiedFlag(TRUE);
return TRUE;
}
//保存修改的信息
BOOL CMyDoc::SaveModified()
{
if(IsModified())
return CDocument::DoFileSave();
return(TRUE);
}
//判断用户是否可以安全的退出程序
BOOL CMyDoc::CanCloseFrame(CFrameWnd* pFrame)
{
POSITION pos = GetFirstViewPosition();
CMyView* pView = (CMyView *) GetNextView(pos);
return(!pView->GetSnagging());
}
//重置一个新的文档:所有的文件信息和统计信息都需要重置
void CMyDoc::Reset(LPCTSTR lpszProjName)
{
CString strNewProjName;
if(lpszProjName)
strNewProjName = lpszProjName;
strNewProjName.LoadString(IDS_NO_PROJECT);
m_strPathName.Empty();
m_strDirectory.Empty();
m_bProjectLoaded = FALSE;
SetModifiedFlag(FALSE);
SetTitle(strNewProjName);
m_strStartPage.Empty();
m_nGottenPageCount = 0;
m_nGottenFileCount = 0;
m_nQueuedPageCount = 0;
m_nTotalBytes = 0;
POSITION pos = GetFirstViewPosition();
CMyView* pView = (CMyView *) GetNextView(pos);
}
//保存然后关闭文档
void CMyDoc::OnCloseDocument()
{
SaveModified();
CDocument::OnCloseDocument();
}
//得到项目下载的配置信息
void CMyDoc::GetOptions(CConfigure& Options)
{
Options.nMaxDepth = m_Options.nMaxDepth;
Options.nMaxPages = m_Options.nMaxPages;
Options.bFixupLinks = m_Options.bFixupLinks;
Options.bContents = m_Options.bContents;
Options.bMultimedia = m_Options.bMultimedia;
Options.bOffsiteLinks = m_Options.bOffsiteLinks;
}
//设置项目下载的配置信息
void CMyDoc::SetOptions(CConfigure& Options)
{
m_Options.nMaxDepth = Options.nMaxDepth;
m_Options.nMaxPages = Options.nMaxPages;
m_Options.bFixupLinks = Options.bFixupLinks;
m_Options.bContents = Options.bContents;
m_Options.bMultimedia = Options.bMultimedia;
m_Options.bOffsiteLinks = Options.bOffsiteLinks;
}
//文档信息的串行化
void CMyDoc::Serialize(CArchive& ar)
{
POSITION pos = GetFirstViewPosition();
CMyView* pView = (CMyView *) GetNextView(pos);
if (ar.IsStoring())
{
ar << m_nGottenPageCount;
ar << m_nGottenFileCount;
ar << m_nTotalBytes;
}
else
{
ar >> m_nGottenPageCount;
ar >> m_nGottenFileCount;
ar >> m_nTotalBytes;
m_nQueuedPageCount = 0;
}
m_Options.Serialize(ar);
// 视图中的树形控件信息也要串行化
pView->SerializeTree(ar);
m_nLevel = 0;
}
//使用CInternetDownload类或者直接从硬盘上得到想要的具体的页面。
//如果页面是从网络中得到的,那么将它保存在硬盘中,并且使用分析器分析该页面所包含的链接和多媒体资源。
BOOL CMyDoc::GetPage(CString& strPage, CString& strFileName, LINKS& linkEntry)
{
BYTE *pbyBuffer = m_byBuffer;
int nLen;
BOOL bPageInCache = FALSE;
BOOL bRet = FALSE;
CInternetDownload::RESULTS ret;
MAP_FILES* pMapEntry;
//初始化链接栈的入口信息
linkEntry.arrLinks.SetSize(0,100);
linkEntry.arrMedia.SetSize(0,100);
linkEntry.arrOffsite.SetSize(0,100);
linkEntry.nIndex = 0;
//判断是否应该从网络中得到该页面,还是从本地硬盘中得到
if(ShouldGetPage(strPage,pMapEntry))//网络资源
{
ret = m_Inet.GetPage(strPage,&pbyBuffer,nLen,TRUE);
if(ret == CInternetDownload::SUCCESS)
{
bRet = TRUE;
m_nTotalBytes += nLen;
}
}
else//硬盘资源
{
CFile fileIn;
CFileException ex;
strFileName = pMapEntry->strFileName;
CString strTempFileName = m_strDirectory+strFileName;
if(fileIn.Open(strTempFileName,CFile::modeRead,&ex))
{
nLen = fileIn.Read(pbyBuffer,MAX_INET_BUFFER);
fileIn.Close();
bRet = TRUE;
}
// 标注我们不是从CInternetDownload中得到的页面
bPageInCache = TRUE;
}
if(bRet)
{
// 用分析器分析该页面所包含的链接和媒体信息
CHTMLFileParser Parser;
Parser.SetPageURL(strPage);
if(nLen > MAX_INET_BUFFER)
nLen = MAX_INET_BUFFER;
pbyBuffer = m_byBuffer;
Parser.SetFixupMode(FALSE);
Parser.ResetArrays();
Parser.SetGetMedia(m_Options.bMultimedia);
Parser.ParseText((char *)pbyBuffer,nLen);
m_strPageTitle = Parser.GetTitle();
//把新下载的页面保存到硬盘中
if(!bPageInCache)
{
pbyBuffer = m_byBuffer;
m_Inet.SaveFile(strFileName,m_strDirectory,pbyBuffer,nLen);
}
// 判断链接的数目
int nLinks;
BOOL bOffsite;
CString strNewPage;
nLinks = Parser.GetLinks().GetSize();
//评估每一个链接然后决定是否将他们放入下载的队列中
for(int i = 0; i < nLinks; i++)
{
// 得到这个链接的url
strNewPage = Parser.GetLinks().GetAt(i);
// 得到这个页面的偏移链接号
bOffsite = Parser.GetOffsiteFlags().GetAt(i);
// 判断是否应该将其放入下载队列中
if(ShouldQueuePage(strNewPage,bOffsite))
{
linkEntry.arrLinks.Add(strNewPage);
linkEntry.arrOffsite.Add(bOffsite);
}
}
//是否是旧页面
if(!bPageInCache)
{
int nMedia = Parser.GetMedia().GetSize();
CString strMedia;
for(i = 0; i < nMedia; i++)
{
strMedia = Parser.GetMedia().GetAt(i);
if(ShouldGetMedia(strMedia,pMapEntry))
linkEntry.arrMedia.Add(strMedia);
}
}
bRet = TRUE;
}
return(bRet);
}
//使用CInternetDownload类得到网页中的多媒体项,并保存
BOOL CMyDoc::GetMedia(CString& strMedia, CString& strFileName)
{
BYTE *pbyBuffer = m_byBuffer;
int nLen;
BOOL bRet = FALSE;
CInternetDownload::RESULTS ret;
// 从Inet中得到文件
ret = m_Inet.GetFile(strMedia,&pbyBuffer,nLen);
if(ret == CInternetDownload::SUCCESS)
{
m_nTotalBytes += nLen;
// 保存文件
m_Inet.SaveFile(strFileName,m_strDirectory,pbyBuffer,nLen);
bRet = TRUE;
}
return bRet;
}
//判断这个页面是否已经通过INet下载了,如果已经下载了,
//那么就用一个指针指向返回页面列表的该页面
BOOL CMyDoc::ShouldGetPage(CString& strPage, MAP_FILES*& pMapEntry)
{
CString strNewPage = strPage;
strNewPage.MakeLower();
strNewPage = strNewPage.SpanExcluding("#");
return(!m_arrPagesDone.Lookup(strNewPage,(CObject *&) pMapEntry));
}
//判断这个多媒体项是否已经通过INet下载了,如果已经下载了,
//那么就用一个指针指向返回多媒体项列表的该元素
BOOL CMyDoc::ShouldGetMedia(CString& strMedia, MAP_FILES*& pMapEntry)
{
CString strNewMedia = strMedia;
strNewMedia.MakeLower();
strNewMedia = strNewMedia.SpanExcluding("#");
return(!m_arrMediaDone.Lookup(strNewMedia,(CObject *&) pMapEntry));
}
// Returns TRUE if the specified page should be added to the download queue.
// This basically means that the page is not in the list of queued pages at a
// lower level or that it hasn't been previously downloaded at a lower level.
// If the above criteria are met -- it thens check to see if this is an offsite
// page and determines whether it should be downloaded.
BOOL CMyDoc::ShouldQueuePage(CString& strNewPage, BOOL bOffsite)
{
MAP_FILES* pMapEntry;
// Have we downloaded this page yet???
if(ShouldGetPage(strNewPage,pMapEntry))
{
// No...then look for it in the queue of waiting pages at previous
// levels
for(int i = 0; i < m_nLevel; i++)
{
for(int j = 0; j < m_aLinks[i].arrLinks.GetSize(); j++)
{
if(strNewPage == m_aLinks[i].arrLinks.GetAt(j))
return(FALSE);
}
}
}
else
{
// Yes...did we follow its links all the way to the
// maximum level?
if(m_Options.nMaxDepth && m_nLevel >= pMapEntry->nMaxLevel)
return(TRUE);
}
// Make sure that we allow offsite links for offsite pages
if(bOffsite && !m_Options.bOffsiteLinks)
return(FALSE);
return(TRUE);
}
// Initializes the specified link stack entry
void CMyDoc::ResetLink(int nLevel)
{
m_aLinks[nLevel].nIndex = 0;
m_aLinks[nLevel].arrLinks.SetSize(0,100);
m_aLinks[nLevel].arrMedia.SetSize(0,100);
m_aLinks[nLevel].arrOffsite.SetSize(0,100);
}
// The workhouse thread routine that recursively navigates linked web pages and
// retrieves each of them along with their multimedia files. This process is
// spawned indirectrly in RecursiveDownload() using the AfxBeginThread() call.
UINT CMyDoc::DownloadThread(LPVOID lpvData)
{
HTREEITEM htreePage;
// Static methods can't have a "this" pointer to get the parent class's
// pointer which the call passes as a parameter
CMyDoc *pThis = (CMyDoc *) lpvData;
int nMaxDepth = pThis->m_Options.nMaxDepth-1;
int nCount;
CString strPage = pThis->m_strStartPage;
CString strFileName;
CString strLogData;
CString strText;
POSITION pos = pThis->GetFirstViewPosition();
CMyView* pView = (CMyView *) pThis->GetNextView(pos);
BOOL bIsOffsite = FALSE;
// Establish the WinInet Session
try
{
pThis->m_Inet.OpenSession(pThis->m_Options.bUseProxy,pThis->m_Options.strProxyName);
}
catch(...)
{
}
// Create the log file
pThis->m_fileLog.Open(pThis->m_strDirectory+"sitesnag.log",
CFile::modeCreate|CFile::modeWrite);
// Create the table of contents file
if(pThis->m_Options.bContents)
{
pThis->m_fileContents.Open(pThis->m_strDirectory+"SnagCon1.htm",
CFile::modeCreate|CFile::modeWrite);
// Add the TOC to the list of downloaded files
pThis->SetPageCacheEntry("snagcon1.htm","SnagCon1.htm",0);
// Add the TOC to the tree control
CString strTitle = "Contents Page 1 (SnagCon1.htm)";
pView->AddTreeContent(strTitle);
// Write the beginning of the first TOC page
strText = "<HTML>\r\n<HEAD>\r\n<TITLE>SiteSnagger Contents</TITLE>\r\n";
strText += "</HEAD\r\n<BODY>\r\n";
strText += "<H1><center>SiteSnagger Table of Contents</center><br><br></H1>\r\n<UL>\r\n";
pThis->m_fileContents.Write(strText,strText.GetLength());
}
// Initialize the index for the first link level, start with the first level
pThis->m_nLevel = 0;
pThis->m_aLinks[0].nIndex = 0;
pThis->m_Inet.ResetUniqueCount();
// Recusively search web links until either we've searched them all (m_nLevel is
// -1 or if the user decides to abort
while(pThis->m_nLevel >= 0 )
{
// Get the name of a new page in a second dimension element
if(pThis->m_aLinks[pThis->m_nLevel].nIndex > 0)
{
// Save the URL and whether it's offsite
int nIndex = pThis->m_aLinks[pThis->m_nLevel].nIndex;
strPage = pThis->m_aLinks[pThis->m_nLevel].arrLinks.GetAt(nIndex);
bIsOffsite = pThis->m_aLinks[pThis->m_nLevel].arrOffsite.GetAt(nIndex);
// Bump to the next level so we can get the page's links
pThis->m_nLevel++;
}
// Generate a unique filename for this page
pThis->m_Inet.GenerateUniqueFileName(strPage,strFileName,
pThis->m_arrPagesDone,TRUE);
// Write a log entry for this page -- leave room for the result
strLogData.Format("[%02d] Getting page %s ",pThis->m_nLevel+1,strPage);
pThis->m_fileLog.Write(strLogData,strLogData.GetLength());
CString strOrigPage = strPage;
// Get the page from Inet or from local file
if(pThis->GetPage(strPage,strFileName,pThis->m_aLinks[pThis->m_nLevel]))
{
MAP_FILES *pMapEntry;
// Get the count of links
nCount = pThis->m_aLinks[pThis->m_nLevel].arrLinks.GetSize();
// Did we just download this new page??
if(pThis->ShouldGetPage(strPage,pMapEntry))
{
// Yes, add it to the list of retrieved pages
pThis->SetPageCacheEntry(strPage,strFileName,pThis->m_nLevel);
// If the page was redirected then add its original name too
if(strPage != strOrigPage && pThis->ShouldGetPage(strOrigPage,pMapEntry))
pThis->SetPageCacheEntry(strOrigPage,strFileName,pThis->m_nLevel);
// Prefix offsite pages with their URL (i.e. http://www.xxx.yyy)
if(bIsOffsite)
strText = strPage+" - ";
else strText.Empty();
// Add the page's title and local filename
strText += pThis->m_strPageTitle+" ("+
strFileName.SpanExcluding("#")+")";
htreePage = pView->AddTreePage(strText,bIsOffsite);
strText.Format("<a href=%s><li> %s (%s - %s)<br>\r\n",strFileName,
pThis->m_strPageTitle,
strFileName.SpanExcluding("#"),strPage);
pThis->m_fileContents.Write(strText,strText.GetLength());
// Update the statistics
pThis->m_nGottenPageCount++;
pThis->m_nGottenFileCount++;
}
else
{
// Set the new depth level if necessary
if(nMaxDepth)
{
// Have we gone to the max level yet???
if(pThis->m_nLevel >= pMapEntry->nMaxLevel)
nCount = 0;
else pMapEntry->nMaxLevel = pThis->m_nLevel;
}
}
// Log the results
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -