📄 imagedoc.cpp
字号:
// imageDoc.cpp : implementation of the CImageDoc class
//
#include "stdafx.h"
#include "image.h"
#include "imageDoc.h"
#include "imageView.h"
#include "Progress.h"
#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif
#define SECTION "LOCALSET"
#define ENTRY "DIRECTORY"
/////////////////////////////////////////////////////////////////////////////
// CImageDoc
IMPLEMENT_DYNCREATE(CImageDoc, CDocument)
BEGIN_MESSAGE_MAP(CImageDoc, CDocument)
//{{AFX_MSG_MAP(CImageDoc)
// NOTE - the ClassWizard will add and remove mapping macros here.
// DO NOT EDIT what you see in these blocks of generated code!
//}}AFX_MSG_MAP
END_MESSAGE_MAP()
/////////////////////////////////////////////////////////////////////////////
// CImageDoc construction/destruction
CImageDoc::CImageDoc()
{
// TODO: add one-time construction code here
//m_Option.bUseProxy=false;
m_Option.date=0;
m_Option.image_type=0;
m_Option.max_length=0;
m_Option.max_level=-1;//没有限制要下载的层数
//m_Option.max_thread_num=-1;//没有限制要开的最大线程数
m_Option.max_width=0;
m_Option.min_length=0;
m_Option.min_width=0;
//m_Option.szProxy=_T("");
localset.bProxy=FALSE;
m_pProgress=NULL;
m_pThread=NULL;
localset.strDirectory=theApp.GetProfileString(SECTION,ENTRY,"d:\\zhang\\");
localset.image_min_size=theApp.GetProfileInt(SECTION,_T("IMAGE_SIZE"),10);
localset.max_thread=theApp.GetProfileInt(SECTION,_T("NUMOFTHREAD"),3);
localset.unlike_level=theApp.GetProfileInt(SECTION,_T("LEVEL"),3);
//localset.image_min_size=10;
//localset.unlike_level=2;
m_doneurl=new CDoneurl(&theApp.m_db);
m_history=new CHistory(&theApp.m_db);
m_todownloadurl=new CTodownloadurl(&theApp.m_db);
m_log=new CLog(&theApp.m_db);
m_save=new CSave(&theApp.m_db);
m_exclusive=new CExclusive(&theApp.m_db);
m_include=new CInclude(&theApp.m_db);
m_log->Open();
if(!(m_log->IsEOF()&&m_log->IsBOF()))
{
SITEINFO site;
m_log->MoveFirst();
while(!m_log->IsEOF())
{
site.bdownload=m_log->m_bdownload;
site.imagetype=m_log->m_imagetype;
site.lastnum=m_log->m_lastnum;
site.lasttime=m_log->m_lasttime;
site.max_height=m_log->m_max_height;
site.max_width=m_log->m_max_width;
site.min_width=m_log->m_min_width;
site.min_height=m_log->m_min_height;
site.priority=m_log->m_priority;
site.sitename=m_log->m_sitename;
site.status=m_log->m_status;
site.m_exclusive_level=m_log->m_exclusive_level;
site.url=m_log->m_url;
site.max_level=m_log->m_max_level;
m_siteinfo.Add(site);
TRY{
m_log->MoveNext();
}
CATCH(CDBException,e)
{
AfxMessageBox(e->m_strError);
}
END_CATCH
}
}
m_log->Close();
}
CImageDoc::~CImageDoc()
{
theApp.WriteProfileString(SECTION,ENTRY,localset.strDirectory);
theApp.WriteProfileInt(SECTION,_T("IMAGE_SIZE"),localset.image_min_size);
theApp.WriteProfileInt(SECTION,_T("LEVEL"),localset.unlike_level);
theApp.WriteProfileInt(SECTION,_T("NUMOFTHREAD"),localset.max_thread);
delete m_doneurl;
delete m_history;
delete m_todownloadurl;
delete m_log;
delete m_save;
delete m_exclusive;
delete m_include;
}
BOOL CImageDoc::OnNewDocument()
{
//static bFirstTime=true;
//是否是空的项目文件
/*if(bFirstTime)
{
bFirstTime=false;
CString strDefName;
}
*/
if (!CDocument::OnNewDocument())
return FALSE;
// TODO: add reinitialization code here
// (SDI documents will reuse this document)
//以后要加上相关的判断
return TRUE;
}
/////////////////////////////////////////////////////////////////////////////
// CImageDoc serialization
void CImageDoc::Serialize(CArchive& ar)
{
if (ar.IsStoring())
{
// TODO: add storing code here
}
else
{
// TODO: add loading code here
}
}
/////////////////////////////////////////////////////////////////////////////
// CImageDoc diagnostics
#ifdef _DEBUG
void CImageDoc::AssertValid() const
{
CDocument::AssertValid();
}
void CImageDoc::Dump(CDumpContext& dc) const
{
CDocument::Dump(dc);
}
#endif //_DEBUG
/////////////////////////////////////////////////////////////////////////////
// CImageDoc commands
bool CImageDoc::ShouldGetLink(CString &strPage)
{
//从数据库中查询strPage如果存在则返回false 否则返回true
//return true;
//下面的部分的东西是否需要跟函数ShouldQueuePage有关
CString temp;
temp.Format(_T("%s%s%s"),"select * from doneurl where url='",strPage,"'");
if(m_doneurl->IsOpen())
m_doneurl->Close();
TRY
{
if(!m_doneurl->Open(CRecordset::snapshot,temp,CRecordset::readOnly))
return false;
}
CATCH(CDBException,e)
{
AfxMessageBox(e->m_strError);
}
END_CATCH
//if (!m_doneurl->Open()) {
// return false;
//}
//m_doneurl->MoveFirst();
if(m_doneurl->IsBOF()&&m_doneurl->IsEOF())
//if (m_doneurl-)
{
m_doneurl->Close();
return true;
}
else
{
m_doneurl->Close();
return false;
}
}
//说明strFilePath为strImage保存的完整路径
bool CImageDoc::GetImage(CString &strImage, CString &strFilePath)
{
//下载strImage指定的url,把文件保存为strFileName
BYTE *pbyBuffer=m_byBuffer;
int nLen;
//BOOL bRet=false;
CInet::RESULTS ret;
m_Inet.m_pProgress=this->m_pProgress;
m_Inet.m_minsize=localset.image_min_size;
ret=m_Inet.GetFile(strImage,&pbyBuffer,nLen,&m_Option.date);
if(ret==CInet::SUCCESS)
{
m_Inet.SaveFile(strFilePath,pbyBuffer,nLen);
//下面需要把下载的信息保存到数据库中
//............
return true;
}//下载成功但是由于文件太大,换需要多次读
return false ;
}
bool CImageDoc::GetPage(CString &strPage, int level)
{
//下载strPage指定的url,分析页面内的链接,把叶面链接保存到数据库中
//把图片的链接保存到m_arrImage中,在分析完页面后开始下载,m_arrImage
//中的图片链接,当下载图片的线程数大于m_Option.max_thread_num时本函数
//挂起。
BYTE *pbyBuffer=m_byBuffer;
int nLen;
CInet::RESULTS ret;
m_Inet.m_pProgress=this->m_pProgress;
ret=m_Inet.GetPage(strPage,&pbyBuffer,nLen,true);
if(ret==CInet::IMAGE_TYPE)
{
//本次下载的内容是图片不需要分析只需要保存
CString strDirectory=m_Directory;
strDirectory+=CInet::SplitFileName(strPage,CInet::FNAME);
strDirectory+=".jpg";
if((m_Option.max_level-m_Option.exclusive_level)>=(level+1))
if(nLen>=localset.image_min_size*1000)
m_Inet.SaveFile(strDirectory,pbyBuffer,nLen);
return false;
}
//下载成功进行页面的分析
if(ret==CInet::SUCCESS)
{
CHtmlParser Parser(m_Option,localset.unlike_level);
Parser.SetLevel(level);
Parser.SetPageURL(m_strServer,m_strObject);
m_pProgress->SetActionTitle("正在分析网页:"+strPage);
//下面求下载的网页的相对目录并保存到m_parenturl中共Parser的MakeQualifiedUrl使用
Parser.m_parenturl=strPage.SpanExcluding("?");
CString strServer,strObject,strUser,strPassword;
INTERNET_PORT nPort;
DWORD dwServiceType;
char szOut[1000];
unsigned long ulLen=sizeof(szOut);
if(AfxParseURLEx(Parser.m_parenturl,dwServiceType,strServer,strObject,nPort,
strUser,strPassword,ICU_NO_ENCODE))
{
int kk=strObject.ReverseFind('/');
if(kk!=-1)
strObject=strObject.Left(kk+1);
else
strObject="/";
strServer="http://"+strServer;
::InternetCombineUrl(strServer,strObject,szOut,&ulLen,ICU_NO_ENCODE);
Parser.m_parenturl=szOut;
}
if(nLen>MAX_INET_BUFFER)
nLen=MAX_INET_BUFFER;
pbyBuffer=m_byBuffer;
//下面开始进行分析可能需要进行初始化处理
//比如
Parser.ParseText((char *)pbyBuffer,nLen);
m_pProgress->SetActionTitle("对分析出的链接进行处理");
//下面需要对分析的结果进行处理
//把分析的图片结果放到m_arrImage中
m_arrImage.Append(Parser.GetImage());
CString sql;
for(int i=0;i<Parser.GetArrLink().GetSize();i++)
{
//URL_LEVEL er=Parser.m_url_level[i];
MakeURLValid(Parser.GetArrLink()[i].m_url);
if(ShouldQueuePage(Parser.GetArrLink()[i].m_url))
{
sql.Format("insert into todownloadurl values('%s',%d,%s)",Parser.GetArrLink()[i].m_url,Parser.GetArrLink()[i].level,"1990-12-16");
TRY{
theApp.m_db.ExecuteSQL(sql);
}
CATCH(CDBException,e) {
}
END_CATCH
}
}
return true;
}// if(ret==CInet::SUCCESS)
return false;
}
//本函数的实现需要使用数据库,即从数据库中查寻看是否有相同的文件有
bool CImageDoc::GenerateUniqueFileName(CString &strName)
{
//int i;
CString str;
//BOOL b;
//不确定是否周密
str.Format("select * from history where filename like '%s.%%'",strName);
if(m_history->IsOpen())
m_history->Close();
if(!m_history->Open(CRecordset::snapshot,str,CRecordset::readOnly))
return false;
//m_history->MoveFirst();
//b=m_history->IsEOF();
//b=m_history->IsBOF();
if(!(m_history->IsEOF()&&m_history->IsBOF()))
{
//srand((unsigned)time(NULL));
//i=rand();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -