⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mydocument.cpp

📁 Visual C++网络通信编程实用案例精逊配套源码 光盘中存放的是书中涉及的所有实例的源代码和经过编译后的应用程序。所有程序均经过测试
💻 CPP
📖 第 1 页 / 共 2 页
字号:
#include "stdafx.h"

#include "SiteDownload.h"
#include "MyDocument.h"
#include "MyView.h"

#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif


/////////////////////////////////////////////////////////////////////////////
// CMyDoc

IMPLEMENT_DYNCREATE(CMyDoc, CDocument)

BEGIN_MESSAGE_MAP(CMyDoc, CDocument)
	//{{AFX_MSG_MAP(CMyDoc)
	//}}AFX_MSG_MAP
END_MESSAGE_MAP()


/////////////////////////////////////////////////////////////////////////////
// CMyDoc construction/destruction

CMyDoc::CMyDoc()
{
	m_Options.nMaxDepth = 3;
	m_Options.nMaxPages = 0;
	m_Options.bFixupLinks = TRUE;
	m_Options.bContents = TRUE;
	m_Options.bMultimedia = TRUE;
	m_Options.bOffsiteLinks = TRUE;

	// 设置哈希表尺寸
	m_arrPagesDone.InitHashTable(1200);
	m_arrMediaDone.InitHashTable(2400);

	m_bProjectLoaded = FALSE;//空项目
	m_bAutoMode = FALSE;

	m_nLevel = 0;//处在第零层
}

CMyDoc::~CMyDoc()
{
	try
	{
		ClearCacheMaps();
	}
	catch(...)
	{
	}
}

//处理打开的新文档,这个函数只在程序刚执行的时候调用一次
BOOL CMyDoc::OnNewDocument()
{

	static bFirstTime = TRUE;

	if(bFirstTime)
	{
		bFirstTime = FALSE;

		CString strDefName;
		strDefName.LoadString(IDS_NO_PROJECT);
		SetTitle(strDefName);

		if (!CDocument::OnNewDocument())
			return FALSE;
		return TRUE;
	}

	if (!CDocument::OnNewDocument())
		return FALSE;

	m_bProjectLoaded = TRUE;
	m_strStartPage.Empty();
	m_nGottenPageCount = 0;
	m_nGottenFileCount = 0;
	m_nQueuedPageCount = 0;
	m_nTotalBytes = 0;

	POSITION pos = GetFirstViewPosition();
	CMyView* pView = (CMyView *) GetNextView(pos);
	return TRUE;
}

//打开一个文件
BOOL CMyDoc::OnOpenDocument(LPCTSTR lpszPathName) 
{
	POSITION pos = GetFirstViewPosition();
	CMyView* pView = (CMyView *) GetNextView(pos);

	// 先要保存当前的项目
	SaveModified();
	//清空树形控件显示的内容
	pView->ClearTree();

	if (!CDocument::OnOpenDocument(lpszPathName))
		return FALSE;

	SetPathName(lpszPathName);
	m_strDirectory = CInternetDownload::SplitFileName(lpszPathName,
				CInternetDownload::DRIVE|CInternetDownload::PATH|CInternetDownload::FNAME)+"\\";

	SetTitle(CInternetDownload::SplitFileName(lpszPathName,CInternetDownload::FNAME|CInternetDownload::EXT));
	m_bProjectLoaded = TRUE;

	SetModifiedFlag(TRUE);

	return TRUE;
}

//保存修改的信息
BOOL CMyDoc::SaveModified() 
{
	if(IsModified())
		return CDocument::DoFileSave();
	return(TRUE);
}

//判断用户是否可以安全的退出程序
BOOL CMyDoc::CanCloseFrame(CFrameWnd* pFrame) 
{
	POSITION pos = GetFirstViewPosition();
	CMyView* pView = (CMyView *) GetNextView(pos);

	return(!pView->GetSnagging());
}

//重置一个新的文档:所有的文件信息和统计信息都需要重置
void CMyDoc::Reset(LPCTSTR lpszProjName)
{
	CString strNewProjName;
		
	if(lpszProjName)
		strNewProjName = lpszProjName;

	strNewProjName.LoadString(IDS_NO_PROJECT);
	m_strPathName.Empty();
	m_strDirectory.Empty();
	m_bProjectLoaded = FALSE;
	SetModifiedFlag(FALSE);
	SetTitle(strNewProjName);

	m_strStartPage.Empty();
	m_nGottenPageCount = 0;
	m_nGottenFileCount = 0;
	m_nQueuedPageCount = 0;
	m_nTotalBytes = 0;

	POSITION pos = GetFirstViewPosition();
	CMyView* pView = (CMyView *) GetNextView(pos);
}

//保存然后关闭文档
void CMyDoc::OnCloseDocument() 
{
	SaveModified();
	CDocument::OnCloseDocument();
}

//得到项目下载的配置信息
void CMyDoc::GetOptions(CConfigure& Options)
{
	Options.nMaxDepth = m_Options.nMaxDepth;
	Options.nMaxPages = m_Options.nMaxPages;
	Options.bFixupLinks = m_Options.bFixupLinks;
	Options.bContents = m_Options.bContents;
	Options.bMultimedia = m_Options.bMultimedia;
	Options.bOffsiteLinks = m_Options.bOffsiteLinks;
}

//设置项目下载的配置信息
void CMyDoc::SetOptions(CConfigure& Options) 
{ 
	m_Options.nMaxDepth = Options.nMaxDepth;
	m_Options.nMaxPages = Options.nMaxPages;
	m_Options.bFixupLinks = Options.bFixupLinks;
	m_Options.bContents = Options.bContents;
	m_Options.bMultimedia = Options.bMultimedia;
	m_Options.bOffsiteLinks = Options.bOffsiteLinks;
}

//文档信息的串行化
void CMyDoc::Serialize(CArchive& ar)
{
	POSITION pos = GetFirstViewPosition();
	CMyView* pView = (CMyView *) GetNextView(pos);	

	if (ar.IsStoring())
	{
		ar << m_nGottenPageCount;
		ar << m_nGottenFileCount;
		ar << m_nTotalBytes;
	}
	else
	{
		ar >> m_nGottenPageCount;
		ar >> m_nGottenFileCount;
		ar >> m_nTotalBytes;
		m_nQueuedPageCount = 0;
	}

	m_Options.Serialize(ar);

	// 视图中的树形控件信息也要串行化
	pView->SerializeTree(ar);

	m_nLevel = 0;
}


//使用CInternetDownload类或者直接从硬盘上得到想要的具体的页面。
//如果页面是从网络中得到的,那么将它保存在硬盘中,并且使用分析器分析该页面所包含的链接和多媒体资源。
BOOL CMyDoc::GetPage(CString& strPage, CString& strFileName, LINKS& linkEntry)
{
	BYTE *pbyBuffer = m_byBuffer;
	int nLen;
	BOOL bPageInCache = FALSE;
	BOOL bRet = FALSE;
	CInternetDownload::RESULTS ret;
	MAP_FILES* pMapEntry;

	//初始化链接栈的入口信息
	linkEntry.arrLinks.SetSize(0,100);
	linkEntry.arrMedia.SetSize(0,100);
	linkEntry.arrOffsite.SetSize(0,100);
	linkEntry.nIndex = 0;

	//判断是否应该从网络中得到该页面,还是从本地硬盘中得到
	if(ShouldGetPage(strPage,pMapEntry))//网络资源
	{
		ret = m_Inet.GetPage(strPage,&pbyBuffer,nLen,TRUE);
		if(ret == CInternetDownload::SUCCESS)
		{
			bRet = TRUE;
			m_nTotalBytes += nLen;
		}
	}
	else//硬盘资源
	{
		CFile fileIn;
		CFileException ex;

		strFileName = pMapEntry->strFileName;
		CString strTempFileName = m_strDirectory+strFileName;

		if(fileIn.Open(strTempFileName,CFile::modeRead,&ex))
		{
			nLen = fileIn.Read(pbyBuffer,MAX_INET_BUFFER);
			fileIn.Close();
			bRet = TRUE;
		}

		// 标注我们不是从CInternetDownload中得到的页面
		bPageInCache = TRUE;
	}

	if(bRet)
	{
		// 用分析器分析该页面所包含的链接和媒体信息
		CHTMLFileParser Parser;
		Parser.SetPageURL(strPage);

		if(nLen > MAX_INET_BUFFER)
			nLen = MAX_INET_BUFFER;

		pbyBuffer = m_byBuffer;
		Parser.SetFixupMode(FALSE);
		Parser.ResetArrays();
		Parser.SetGetMedia(m_Options.bMultimedia);
		Parser.ParseText((char *)pbyBuffer,nLen);
		m_strPageTitle = Parser.GetTitle();

		//把新下载的页面保存到硬盘中
		if(!bPageInCache)
		{
			pbyBuffer = m_byBuffer;
			m_Inet.SaveFile(strFileName,m_strDirectory,pbyBuffer,nLen);
		}

		// 判断链接的数目
		int nLinks;
		BOOL bOffsite;
		CString strNewPage;
		nLinks = Parser.GetLinks().GetSize();

		//评估每一个链接然后决定是否将他们放入下载的队列中
		for(int i = 0; i < nLinks; i++)
		{
			// 得到这个链接的url
			strNewPage = Parser.GetLinks().GetAt(i);

			// 得到这个页面的偏移链接号
			bOffsite = Parser.GetOffsiteFlags().GetAt(i);

			// 判断是否应该将其放入下载队列中
			if(ShouldQueuePage(strNewPage,bOffsite)) 
			{
				linkEntry.arrLinks.Add(strNewPage);
				linkEntry.arrOffsite.Add(bOffsite);
			}
		}

		//是否是旧页面
		if(!bPageInCache)
		{
			int nMedia = Parser.GetMedia().GetSize();
			CString strMedia;
			for(i = 0; i < nMedia; i++)
			{
				strMedia = Parser.GetMedia().GetAt(i);
				if(ShouldGetMedia(strMedia,pMapEntry))
					linkEntry.arrMedia.Add(strMedia);
			}
		}

		bRet = TRUE;
	}

	return(bRet);
}

//使用CInternetDownload类得到网页中的多媒体项,并保存
BOOL CMyDoc::GetMedia(CString& strMedia, CString& strFileName)
{
	BYTE *pbyBuffer = m_byBuffer;
	int nLen;
	BOOL bRet = FALSE;

	CInternetDownload::RESULTS ret;

	// 从Inet中得到文件
	ret	= m_Inet.GetFile(strMedia,&pbyBuffer,nLen);

	if(ret == CInternetDownload::SUCCESS)
	{
		m_nTotalBytes += nLen;
		// 保存文件
		m_Inet.SaveFile(strFileName,m_strDirectory,pbyBuffer,nLen);
		bRet = TRUE;
	}
	return bRet;
}

//判断这个页面是否已经通过INet下载了,如果已经下载了,
//那么就用一个指针指向返回页面列表的该页面
BOOL CMyDoc::ShouldGetPage(CString& strPage, MAP_FILES*& pMapEntry)
{
	CString strNewPage = strPage;
	strNewPage.MakeLower();
	strNewPage = strNewPage.SpanExcluding("#");
	return(!m_arrPagesDone.Lookup(strNewPage,(CObject *&) pMapEntry));
}

//判断这个多媒体项是否已经通过INet下载了,如果已经下载了,
//那么就用一个指针指向返回多媒体项列表的该元素
BOOL CMyDoc::ShouldGetMedia(CString& strMedia, MAP_FILES*& pMapEntry)
{
	CString strNewMedia = strMedia;
	strNewMedia.MakeLower();
	strNewMedia = strNewMedia.SpanExcluding("#");
	return(!m_arrMediaDone.Lookup(strNewMedia,(CObject *&) pMapEntry));
}

// Returns TRUE if the specified page should be added to the download queue.
// This basically means that the page is not in the list of queued pages at a 
// lower level or that it hasn't been previously downloaded at a lower level.
// If the above criteria are met -- it thens check to see if this is an offsite
// page and determines whether it should be downloaded.

BOOL CMyDoc::ShouldQueuePage(CString& strNewPage, BOOL bOffsite)
{
	MAP_FILES* pMapEntry;

	// Have we downloaded this page yet???
	if(ShouldGetPage(strNewPage,pMapEntry))
	{
		// No...then look for it in the queue of waiting pages at previous
		// levels
		for(int i = 0; i < m_nLevel; i++)
		{
			for(int j = 0; j < m_aLinks[i].arrLinks.GetSize(); j++)
			{
				if(strNewPage == m_aLinks[i].arrLinks.GetAt(j))
					return(FALSE);
			}
		}
	}
	else
	{
		// Yes...did we follow its links all the way to the 
		// maximum level?
		if(m_Options.nMaxDepth && m_nLevel >= pMapEntry->nMaxLevel)
			return(TRUE);
	}

	// Make sure that we allow offsite links for offsite pages
	if(bOffsite && !m_Options.bOffsiteLinks)
		return(FALSE);

	return(TRUE);
}

// Initializes the specified link stack entry
void CMyDoc::ResetLink(int nLevel)
{
	m_aLinks[nLevel].nIndex = 0;
	m_aLinks[nLevel].arrLinks.SetSize(0,100);
	m_aLinks[nLevel].arrMedia.SetSize(0,100);
	m_aLinks[nLevel].arrOffsite.SetSize(0,100);
}



// The workhouse thread routine that recursively navigates linked web pages and
// retrieves each of them along with their multimedia files.  This process is
// spawned indirectrly in RecursiveDownload() using the AfxBeginThread() call.
UINT CMyDoc::DownloadThread(LPVOID lpvData)
{
	HTREEITEM htreePage;

	// Static methods can't have a "this" pointer to get the parent class's 
	// pointer which the call passes as a parameter
	CMyDoc *pThis = (CMyDoc *) lpvData;

	int nMaxDepth = pThis->m_Options.nMaxDepth-1;
	int nCount;
	CString strPage = pThis->m_strStartPage;
	CString strFileName;
	CString strLogData;
	CString strText;
	POSITION pos = pThis->GetFirstViewPosition();
	CMyView* pView = (CMyView *) pThis->GetNextView(pos);	
	BOOL bIsOffsite = FALSE;

	// Establish the WinInet Session
	try
	{
		pThis->m_Inet.OpenSession(pThis->m_Options.bUseProxy,pThis->m_Options.strProxyName);
	}
	catch(...)
	{
	}

	// Create the log file
	pThis->m_fileLog.Open(pThis->m_strDirectory+"sitesnag.log",
						CFile::modeCreate|CFile::modeWrite);

	// Create the table of contents file
	if(pThis->m_Options.bContents)
	{
		pThis->m_fileContents.Open(pThis->m_strDirectory+"SnagCon1.htm",
								CFile::modeCreate|CFile::modeWrite);

		// Add the TOC to the list of downloaded files 
		pThis->SetPageCacheEntry("snagcon1.htm","SnagCon1.htm",0);

		// Add the TOC to the tree control
		CString strTitle = "Contents Page 1 (SnagCon1.htm)";
		pView->AddTreeContent(strTitle);

		// Write the beginning of the first TOC page
		strText = "<HTML>\r\n<HEAD>\r\n<TITLE>SiteSnagger Contents</TITLE>\r\n";
		strText += "</HEAD\r\n<BODY>\r\n";
		strText += "<H1><center>SiteSnagger Table of Contents</center><br><br></H1>\r\n<UL>\r\n";
		pThis->m_fileContents.Write(strText,strText.GetLength());
	}

	// Initialize the index for the first link level, start with the first level
	pThis->m_nLevel = 0;
	pThis->m_aLinks[0].nIndex = 0;
	pThis->m_Inet.ResetUniqueCount();

	// Recusively search web links until either we've searched them all (m_nLevel is
	// -1 or if the user decides to abort
	while(pThis->m_nLevel >= 0 )
	{
		// Get the name of a new page in a second dimension element
		if(pThis->m_aLinks[pThis->m_nLevel].nIndex > 0)
		{
			// Save the URL and whether it's offsite
			int nIndex = pThis->m_aLinks[pThis->m_nLevel].nIndex;
			strPage = pThis->m_aLinks[pThis->m_nLevel].arrLinks.GetAt(nIndex);
			bIsOffsite = pThis->m_aLinks[pThis->m_nLevel].arrOffsite.GetAt(nIndex);

			// Bump to the next level so we can get the page's links
			pThis->m_nLevel++;
		}

		// Generate a unique filename for this page
		pThis->m_Inet.GenerateUniqueFileName(strPage,strFileName,
						pThis->m_arrPagesDone,TRUE);

		// Write a log entry for this page -- leave room for the result
		strLogData.Format("[%02d] Getting page %s ",pThis->m_nLevel+1,strPage);
		pThis->m_fileLog.Write(strLogData,strLogData.GetLength());

		CString strOrigPage = strPage;

		// Get the page from Inet or from local file
		if(pThis->GetPage(strPage,strFileName,pThis->m_aLinks[pThis->m_nLevel]))
		{
			MAP_FILES *pMapEntry;
		
			// Get the count of links
			nCount = pThis->m_aLinks[pThis->m_nLevel].arrLinks.GetSize();

			// Did we just download this new page??
			if(pThis->ShouldGetPage(strPage,pMapEntry))
			{
				// Yes, add it to the list of retrieved pages
				pThis->SetPageCacheEntry(strPage,strFileName,pThis->m_nLevel);

				// If the page was redirected then add its original name too
				if(strPage != strOrigPage && pThis->ShouldGetPage(strOrigPage,pMapEntry))
					pThis->SetPageCacheEntry(strOrigPage,strFileName,pThis->m_nLevel);

				// Prefix offsite pages with their URL (i.e. http://www.xxx.yyy)
				if(bIsOffsite)
					strText = strPage+" - ";
				else strText.Empty();

				// Add the page's title and local filename 
				strText += pThis->m_strPageTitle+"  ("+
							strFileName.SpanExcluding("#")+")";

				htreePage = pView->AddTreePage(strText,bIsOffsite);
				strText.Format("<a href=%s><li> %s (%s - %s)<br>\r\n",strFileName,
									pThis->m_strPageTitle,
									strFileName.SpanExcluding("#"),strPage);
				pThis->m_fileContents.Write(strText,strText.GetLength());

				// Update the statistics
				pThis->m_nGottenPageCount++;
				pThis->m_nGottenFileCount++;
			}
			else
			{
				// Set the new depth level if necessary
				if(nMaxDepth)
				{
					// Have we gone to the max level yet???
					if(pThis->m_nLevel >= pMapEntry->nMaxLevel)
						nCount = 0;
					else pMapEntry->nMaxLevel = pThis->m_nLevel;
				}
			}

			// Log the results

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -