📄 document.cpp
字号:
bRet = TRUE;
}
return(bRet);
}
// 获得指定的多媒体文件
BOOL CSnaggerDoc::GetMedia(CString& strMedia, CString& strFileName)
{
BYTE *pbyBuffer = m_byBuffer;
int nLen;
BOOL bRet = FALSE;
// 获得指定的页面
CInet::RESULTS ret;
// 从INet获得文件
ret = m_Inet.GetFile(strMedia,&pbyBuffer,nLen);
if(ret == CInet::SUCCESS)
{
// 总下载数据增加该下载文件大小
m_nTotalBytes += nLen;
// 写入文件
m_pProgress->SetActionTitle("Saving File: "+strMedia);
m_Inet.SaveFile(strFileName,m_strDirectory,pbyBuffer,nLen);
bRet = TRUE;
}
return(bRet);
}
//旁断是否该页面已经下载过,如果已经下载过,则返回true
//如果已经下载,则返回指向指针
BOOL CSnaggerDoc::ShouldGetPage(CString& strPage, MAP_FILES*& pMapEntry)
{
// Page names shouldn't be case sensitive
CString strNewPage = strPage;
strNewPage.MakeLower();
strNewPage = strNewPage.SpanExcluding("#");
// 设定一个相对路径
CString strExt = CInet::SplitFileName(strNewPage,CInet::EXT);
if(strExt.IsEmpty() && strNewPage.Right(1) != "/")
strNewPage += "/";
// Did we find it??
return(!m_arrPagesDone.Lookup(strNewPage,(CObject *&) pMapEntry));
}
//判断是否下载多媒体文件,如果以前已经下载过,则返回true
//如果已经下载过,则返回一个指针真想已经下载的多媒体列表的入口
BOOL CSnaggerDoc::ShouldGetMedia(CString& strMedia, MAP_FILES*& pMapEntry)
{
// Page names shouldn't be case sensitive
CString strNewMedia = strMedia;
strNewMedia.MakeLower();
strNewMedia = strNewMedia.SpanExcluding("#");
// Page names shouldn't be case sensitive
return(!m_arrMediaDone.Lookup(strNewMedia,(CObject *&) pMapEntry));
}
//该函数返回true,如果指定的页面要被增加到下载队列中
//主要是确定是否该页面是否在前一层中被下载
//同时还要检查是否是本站页面
BOOL CSnaggerDoc::ShouldQueuePage(CString& strNewPage, BOOL bOffsite)
{
MAP_FILES* pMapEntry;
// 判断是否以前下载过
if(ShouldGetPage(strNewPage,pMapEntry))
{
// 如果还没有,则在等待列表中寻找是否已经在前一层的等待列表中
for(int i = 0; i < m_nLevel; i++)
{
for(int j = 0; j < m_aLinks[i].arrLinks.GetSize(); j++)
{
if(strNewPage == m_aLinks[i].arrLinks.GetAt(j))
return(FALSE);
}
}
}
else
{
// 如果达到最大层数,则返回true
if(m_Options.nMaxDepth && m_nLevel >= pMapEntry->nMaxLevel)
return(TRUE);
}
// 是否允许其他站点的页面下载
if(bOffsite && !m_Options.bOffsiteLinks)
return(FALSE);
return(TRUE);
}
// 初始化指定链接的堆栈入口
void CSnaggerDoc::ResetLink(int nLevel)
{
m_aLinks[nLevel].nIndex = 0;
m_aLinks[nLevel].arrLinks.SetSize(0,100);
m_aLinks[nLevel].arrMedia.SetSize(0,100);
m_aLinks[nLevel].arrOffsite.SetSize(0,100);
}
// 更新参数显示
void CSnaggerDoc::UpdateStatus()
{
// Does the statistics window exist?
if(m_pProgress)
{
// Yep...update the info in its fields
m_pProgress->SetQueuedFiles(m_nQueuedPageCount);
m_pProgress->SetDownloadedPages(m_nGottenPageCount);
m_pProgress->SetDownloadedFiles(m_nGottenFileCount);
m_pProgress->SetKBDownloaded(m_nTotalBytes);
m_pProgress->SetLevel(m_nLevel+1);
}
}
// 现在网页以及网页中的多媒体元素
UINT CSnaggerDoc::DownloadThread(LPVOID lpvData)
{
HTREEITEM htreePage;
CSnaggerDoc *pThis = (CSnaggerDoc *) lpvData;
int nMaxDepth = pThis->m_Options.nMaxDepth-1;
int nCount;
CString strPage = pThis->m_strStartPage;
CString strFileName;
CString strLogData;
CString strText;
POSITION pos = pThis->GetFirstViewPosition();
CSnaggerView* pView = (CSnaggerView *) pThis->GetNextView(pos);
BOOL bIsOffsite = FALSE;
// 建立WinInet会话
try
{
pThis->m_Inet.OpenSession(pThis->m_Options.bUseProxy,pThis->m_Options.strProxyName);
}
catch(...)
{
}
// 创建日志文件
pThis->m_fileLog.Open(pThis->m_strDirectory+"sitesnag.log",
CFile::modeCreate|CFile::modeWrite);
// 创建内容列表文件
if(pThis->m_Options.bContents)
{
pThis->m_fileContents.Open(pThis->m_strDirectory+"SnagCon1.htm",
CFile::modeCreate|CFile::modeWrite);
// 将下载内容增加到下载文件列表中
pThis->SetPageCacheEntry("snagcon1.htm","SnagCon1.htm",0);
// 内容列表加入到树形控件中
CString strTitle = "Contents Page 1 (SnagCon1.htm)";
pView->AddTreeContent(strTitle);
// 写入第一个内容列表也的开始
strText = "<HTML>\r\n<HEAD>\r\n<TITLE>SiteSnagger Contents</TITLE>\r\n";
strText += "</HEAD\r\n<BODY>\r\n";
strText += "<H1><center>SiteSnagger Table of Contents</center><br><br></H1>\r\n<UL>\r\n";
pThis->m_fileContents.Write(strText,strText.GetLength());
}
// 初始化第一层文件链接索引
//从第一层开始
pThis->m_nLevel = 0;
pThis->m_aLinks[0].nIndex = 0;
pThis->m_Inet.ResetUniqueCount();
// 搜索网络链接知道已经完成所有搜索
//或者被人为停止
while(pThis->m_nLevel >= 0 && !pThis->m_pProgress->IsAborted())
{
// 获得第二级的新页面名
if(pThis->m_aLinks[pThis->m_nLevel].nIndex > 0)
{
// 保存本页面UR,兵判断是否是本站网页
int nIndex = pThis->m_aLinks[pThis->m_nLevel].nIndex;
strPage = pThis->m_aLinks[pThis->m_nLevel].arrLinks.GetAt(nIndex);
bIsOffsite = pThis->m_aLinks[pThis->m_nLevel].arrOffsite.GetAt(nIndex);
// 连接到下一层
pThis->m_nLevel++;
}
// 为该页生成一个唯一的名称
pThis->m_Inet.GenerateUniqueFileName(strPage,strFileName,
pThis->m_arrPagesDone,TRUE);
pThis->m_pProgress->SetActionTitle("Getting Page: "+strPage);
// 为该页写一个日志入口,方便写入结果
strLogData.Format("[%02d] Getting page %s ",pThis->m_nLevel+1,strPage);
pThis->m_fileLog.Write(strLogData,strLogData.GetLength());
CString strOrigPage = strPage;
// 获得该页从Inet本地文件
if(pThis->GetPage(strPage,strFileName,pThis->m_aLinks[pThis->m_nLevel]))
{
MAP_FILES *pMapEntry;
// 获得连接数量
nCount = pThis->m_aLinks[pThis->m_nLevel].arrLinks.GetSize();
// 是否只需要下载文件
if(pThis->ShouldGetPage(strPage,pMapEntry))
{
// 增加到已经接收页面的列表中
pThis->SetPageCacheEntry(strPage,strFileName,pThis->m_nLevel);
// 如果页面被重新定向,则
//也添加原始名字
if(strPage != strOrigPage && pThis->ShouldGetPage(strOrigPage,pMapEntry))
pThis->SetPageCacheEntry(strOrigPage,strFileName,pThis->m_nLevel);
// 如果是其他站点的网页,加上前缀,如主机域名
if(bIsOffsite)
strText = strPage+" - ";
else strText.Empty();
// 加入页面的名称和文件名
strText += pThis->m_strPageTitle+" ("+
strFileName.SpanExcluding("#")+")";
htreePage = pView->AddTreePage(strText,bIsOffsite);
strText.Format("<a href=%s><li> %s (%s - %s)<br>\r\n",strFileName,
pThis->m_strPageTitle,
strFileName.SpanExcluding("#"),strPage);
if(pThis->m_Options.bContents)
pThis->m_fileContents.Write(strText,strText.GetLength());
// 更新参数
pThis->m_nGottenPageCount++;
pThis->m_nGottenFileCount++;
pThis->UpdateStatus();
}
else
{
// 设置新的搜索层数
if(nMaxDepth)
{
// 是否已经到达最大层数
if(pThis->m_nLevel >= pMapEntry->nMaxLevel)
nCount = 0;
else pMapEntry->nMaxLevel = pThis->m_nLevel;
}
}
// 将结果写入日志文件
pThis->m_fileLog.Write("[OK]\n",5);
// 检查其他站点的连接
if(bIsOffsite)
nCount = 0;
// 是否下载多媒体文件
if(pThis->m_Options.bMultimedia)
{
// 分析多媒体文件链接列表
CString strMedia;
for(int j = 0; j < pThis->m_aLinks[pThis->m_nLevel].arrMedia.GetSize() &&
!pThis->m_pProgress->IsAborted(); j++)
{
strMedia = pThis->m_aLinks[pThis->m_nLevel].arrMedia.GetAt(j);
// 是否获取该文件
if(pThis->ShouldGetMedia(strMedia,pMapEntry))
{
// 保证是一个唯一的文件名
pThis->m_Inet.GenerateUniqueFileName(strMedia,
strFileName,pThis->m_arrMediaDone,FALSE);
pThis->m_pProgress->SetActionTitle("Getting File: "+strFileName);
// 写入日志文件
strLogData.Format("[%02d] Getting media %s ",pThis->m_nLevel,
strMedia);
pThis->m_fileLog.Write(strLogData,strLogData.GetLength());
// 下载EMAIL链接
BOOL bMail;
if(strMedia.Left(7) == "mailto:")
{
bMail = TRUE;
strFileName = strMedia;
}
else bMail = FALSE;
//如果成功
if(bMail || pThis->GetMedia(strMedia,strFileName))
{
//将文件增加到文件列表中
pThis->SetMediaCacheEntry(strMedia,strFileName);
pView->AddTreeMedia(strFileName.SpanExcluding("#"),
CTree::GetMediaType(strFileName));
// 将静态参数更新
if(!bMail)
pThis->m_nGottenFileCount++;
pThis->UpdateStatus();
//将结果写入日志
pThis->m_fileLog.Write("[OK]\n",5);
}
else
{
// 将失败结果写入日志
pThis->m_fileLog.Write("[FAILED] ",9);
// 写入详细的错误日志
CString strError = pThis->m_Inet.GetErrorText();
pThis->m_fileLog.Write(strError,strError.GetLength());
pThis->m_fileLog.Write("\n",1);
}
}
}
}
}
else
{
// 写入日志
pThis->m_fileLog.Write("[FAILED] ",9);
// 显示详细的错误
CString strError = pThis->m_Inet.GetErrorText();
pThis->m_fileLog.Write(strError,strError.GetLength());
pThis->m_fileLog.Write("\n",1);
nCount = 0;
}
// 更新参数
pThis->UpdateStatus();
// 如果到达了设定的最大页数,则退出
if(pThis->m_Options.nMaxPages > 0 &&
pThis->m_nGottenPageCount >= pThis->m_Options.nMaxPages)
break;
// 只要还有连接,则继续下载
if(pThis->m_nLevel < nMaxDepth && nCount > 0)
{
// 获得下一个页面
strPage = pThis->m_aLinks[pThis->m_nLevel].arrLinks.GetAt(0);
bIsOffsite = pThis->m_aLinks[pThis->m_nLevel].arrOffsite.GetAt(0);
// 进入下一层,并初始化
pThis->m_nLevel++;
pThis->ResetLink(pThis->m_nLevel);
// Queue the links
pThis->m_nQueuedPageCount += nCount;
continue;
}
// 完成所有链接,重置链接信息
pThis->ResetLink(pThis->m_nLevel);
// 移到前一层
pThis->m_nLevel--;
// 寻找下一页
if(pThis->m_nLevel >= 0)
{
int nMaxCount;
//
while(pThis->m_nLevel >= 0)
{
// 获得第二层的入口数量
nMaxCount = pThis->m_aLinks[pThis->m_nLevel].arrLinks.GetSize();
// 是否还有其他合法的页面在这一层
if(pThis->m_aLinks[pThis->m_nLevel].nIndex < nMaxCount-1)
{
// 获得下一个页面
pThis->m_aLinks[pThis->m_nLevel].nIndex++;
pThis->m_nQueuedPageCount--;
break;
}
else
{
// 在树形控件上回到前一层
pThis->m_nLevel--;
pThis->m_nQueuedPageCount--;
}
}
}
}
// Make sure the "stopping, please wait" message isn't displayed
pView->EndWait();
// 确定nNodeCount总是0
pThis->m_nLevel = pThis->m_nQueuedPageCount;
// 是否需要整理链接
if(pThis->m_Options.bFixupLinks)
{
// 用户取消下载
if(pThis->m_pProgress->IsAborted() &&
pView->ShowPrompt(IDS_FIXUP_PAGES,MB_ICONQUESTION|MB_YESNO) != IDYES)
;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -