📄 imagedoc.cpp

📁 网络图片收集软件
💻 CPP
📖 第 1 页 / 共 2 页
字号:
上一页 12
		CTime time=CTime::GetCurrentTime();
		str=time.Format("%Y%m%d%H%M%S");
		strName=strName+str;
	}
	m_history->Close();
	return true;
}

UINT CImageDoc::DownloadThread(LPVOID lpvData)
{
	int nMaxLevel;
	CString strStartPage;
	CString strDirectory;
	CString strFileName;
	CString temp;
				
	CImageDoc *pThis=(CImageDoc *)lpvData;
	
	//建立和INTERNET的链接
	try
	{
		pThis->m_Inet.OpenSession(pThis->localset.bProxy,pThis->localset.strProxy);
	}
	catch(...)
	{

	}

	for(int i=0;i < pThis->m_siteinfo.GetSize();i++)
	{
		//如果本网站不需要下载转到下一个网站
		if(!pThis->m_siteinfo[i].bdownload)
			continue;

		nMaxLevel=pThis->m_siteinfo[i].max_level;
		strStartPage=pThis->m_siteinfo[i].url;//取得当前要下载的url
		strDirectory=pThis->localset.strDirectory;//当前下载的图片的保存目录
		
		if(strDirectory.Right(1)!="\\")
			strDirectory+="\\";
		strDirectory=strDirectory+pThis->m_siteinfo[i].sitename;
		CTime t=CTime::GetCurrentTime();
		strDirectory=strDirectory+" "+t.Format("%d-%m-%Y")+"\\";
		pThis->m_Directory=strDirectory;
		CreateDirectory(strDirectory,NULL);
		
		
		pThis->m_Option.image_type=pThis->m_siteinfo[i].imagetype;
		pThis->m_Option.max_length=pThis->m_siteinfo[i].max_height;
		pThis->m_Option.min_length=pThis->m_siteinfo[i].min_height;
		pThis->m_Option.max_width=pThis->m_siteinfo[i].max_width;
		pThis->m_Option.min_width=pThis->m_siteinfo[i].min_width;
		pThis->m_Option.exclusive_level=pThis->m_siteinfo[i].m_exclusive_level;
		pThis->m_Option.date=pThis->m_siteinfo[i].lasttime;
		pThis->m_Option.max_level=pThis->m_siteinfo[i].max_level;
		//判断strStartPage是否是合法的url
		if (!pThis->URLParser(strStartPage)) 
		{
			if(AfxMessageBox("当前下载的网站不是合法的url\n"+pThis->m_siteinfo[i].sitename+"\n请你查看本网站对应的url\n是否继续处理",MB_YESNO)==IDYES)
				continue;
			else
				break;
		}

		//从数据库中提取要排除的数据	
		pThis->m_exclusiveList.RemoveAll();
		temp.Format(_T("select * from exclusive where sitename='%s'"),pThis->m_siteinfo[i].sitename);
		pThis->m_exclusive->Open(CRecordset::snapshot,temp,CRecordset::readOnly);
		if(!pThis->m_exclusive->IsEOF()||!pThis->m_exclusive->IsBOF())
		{
			pThis->m_exclusive->MoveFirst();
			while(!pThis->m_exclusive->IsEOF())
			{	
				pThis->m_exclusiveList.Add(pThis->m_exclusive->m_url);
				pThis->m_exclusive->MoveNext();
			}
		}
		pThis->m_exclusive->Close();
		//从数据库中提取出要包含的数据
		pThis->m_includeList.RemoveAll();
		temp.Format(_T("select * from include where sitename='%s'"),pThis->m_siteinfo[i].sitename);
		pThis->m_include->Open(CRecordset::snapshot,temp,CRecordset::readOnly);
		if(!pThis->m_include->IsEOF()||!pThis->m_include->IsBOF())
		{
			pThis->m_include->MoveFirst();
			while(!pThis->m_include->IsEOF())
			{
				pThis->m_includeList.Add(pThis->m_include->m_url);
				pThis->m_include->MoveNext();
			}
		}
		pThis->m_include->Close();

		pThis->m_count=0;
		
		//while(!strStartPage.IsEmpty() && !pThis->m_pProgress->IsAborted())
		while(!strStartPage.IsEmpty())
		{
			if(pThis->m_pProgress->IsAborted())
				goto abort;

			pThis->MakeURLValid(strStartPage);
			pThis->m_pProgress->SetActionTitle("正在下载网页"+strStartPage);
			//保存要下载的url以和调用GetPage之后的strStartPage相比较以判断是否发生了重定向
			CString strOrigPage=strStartPage;
			
			//现在要进行的操作清空网页和图片的链接这样是因为这些连接要被从新写了
			pThis->m_arrImage.RemoveAll();
			pThis->m_arrLink.RemoveAll();			

			if(pThis->GetPage(strStartPage,nMaxLevel))
			{
				for(int index=0;index<pThis->m_arrImage.GetSize();index++)
				{
					if(pThis->m_pProgress->IsAborted())
						goto abort;
					//现在filename变量中存放的是文件名
					//判断是否以前已经下载过
					if(!pThis->ShouldGetLink(pThis->m_arrImage[index]))
						continue;

					strFileName=CInet::SplitFileName(pThis->m_arrImage[index],CInet::FNAME);
					strFileName=strDirectory+strFileName;
					if(!pThis->GenerateUniqueFileName(strFileName))
					{
						srand((unsigned)time(NULL));
						strFileName.Format("%s%d",strFileName,rand());
					}
					temp=CInet::SplitFileName(pThis->m_arrImage[index],CInet::EXT);
					if(temp.IsEmpty())
						temp=".jpg";
					strFileName+=temp;
					pThis->m_pProgress->SetActionTitle("下载图片"+pThis->m_arrImage[index]);
					if(pThis->GetImage(pThis->m_arrImage[index],strFileName))
					{
						TRY
						{
							temp.Format(_T("%s%s%s%s%s"),"insert into history values('",pThis->m_arrImage[index],"','",strFileName,"')");
							theApp.m_db.ExecuteSQL(temp);

						}
						CATCH(CDBException,e)
						{}
						END_CATCH
						//更新下载的图片的数量
						pThis->m_count++;
					}
					else
					{	//留下来将来进行扩充使用
					}
					TRY{
						temp.Format(_T("%s%s%s"),"insert into doneurl values('",pThis->m_arrImage[index],"')");
						theApp.m_db.ExecuteSQL(temp);
					}
					CATCH(CDBException,e)
					{}
					END_CATCH

				}//end for(int index=0;index<pThis->m_arrImage

			}//pThis->GetPage(strStartPage,nMaxLevel)
			else
			{	/*将来扩充用现在定为空*/			}

			TRY
			{
				temp.Format(_T("%s%s%s"),"insert into doneurl values('",strOrigPage,"')");
				theApp.m_db.ExecuteSQL(temp);
				//如果请求的网页出现了重定向需要保存被定向的url				
				if(strStartPage!=strOrigPage)
				{
					temp.Format(_T("%s%s%s"),"insert into doneurl values('",strStartPage,"')");
					theApp.m_db.ExecuteSQL(temp);
				}				
			}
			CATCH(CDBException,e)
			{}
			END_CATCH

			//本链接已经处理完成，处理本网站的下一条链接
			if(pThis->m_todownloadurl->IsOpen())
				pThis->m_todownloadurl->Close();
			TRY{
				temp="select * from todownloadurl";
				pThis->m_todownloadurl->Open(CRecordset::dynamic,temp,CRecordset::none);
			}
			CATCH(CDBException,e)
			{}
			END_CATCH			
			if(pThis->m_todownloadurl->IsBOF()&&pThis->m_todownloadurl->IsEOF())
				strStartPage="";
			else
			{
				strStartPage=pThis->m_todownloadurl->m_url;
				nMaxLevel=pThis->m_todownloadurl->m_level;
				TRY{
					pThis->m_todownloadurl->Delete();
					pThis->m_todownloadurl->MoveNext();
				}
				CATCH(CDBException,e) {
				}
				END_CATCH
			}
			pThis->m_todownloadurl->Close();

		}//end while 
		
		TRY{//本网站已经处理完成，现在在这里更改对本网站的各种参数信息
			temp.Format(_T("update log set lasttime=%s,lastnum=%d,status=1 where sitename='%s'"),	t.Format("%Y-%m-%d"),pThis->m_count,pThis->m_siteinfo[i].sitename);
			theApp.m_db.ExecuteSQL(temp);			
		}
		CATCH(CDBException,e)
		{
		}
		END_CATCH
		//现在进行断点续传处理
abort:	if(pThis->m_pProgress->IsAborted())
		{
			//保存还没有下载的网站的url
			for(int k=i;k<pThis->m_siteinfo.GetSize();k++)
			{
				if(pThis->m_siteinfo[k].bdownload)
				{
					TRY{
						temp.Format(_T("insert into save values('%s')"),pThis->m_siteinfo[k].sitename);
						theApp.m_db.ExecuteSQL(temp);
						temp.Format(_T("update log set status=0,lastnum=%d where sitename='%s'"),pThis->m_count,pThis->m_siteinfo[k].sitename);
						theApp.m_db.ExecuteSQL(temp);
					}CATCH(CDBException,e)
					{
					}
					END_CATCH
				}
			}
			break;//跳出最外层的for循环
		}


	}//end for(int i=0;i<m_arrLink.GetSize();i++)
	pThis->m_pProgress->SetActionTitle("现在已经完成任务线程将要退出");
	//关闭Inet会话
	try 
	{
		pThis->m_Inet.CloseSession();
	}catch (...) {
	}
return 0;
}
//本函数的要做的事如下，先从todownloadurl表中查看是否存在此条记录，如果存在返回false
//然后从doneurl中查寻如果存在则返回false,否则返回true
bool CImageDoc::ShouldQueuePage(CString &strPage)
{
	CString sql;
	int exclusive;
	int i;
	bool k=false;
	for(i=0;i<m_exclusiveList.GetSize();i++)
	{
		exclusive=strPage.Find(m_exclusiveList[i],0);
		//说明本链接被排除了
		if(exclusive!=-1)
			return false;
	}
	for(i=0;i<m_includeList.GetSize();i++)
	{
		exclusive=strPage.Find(m_includeList[i],0);
		//说明本链接不是我们需要的连接
		if(exclusive!=-1)
			k=true;

	}
	if((!k)&&(i!=0))
		return false;

	//用来查找相 www.china.com类的url	
	sql.Format(_T("%s%s%s"),"select * from todownloadurl where url='",strPage,"'");
	
	if(m_todownloadurl->IsOpen())
		m_todownloadurl->Close();
	if(!m_todownloadurl->Open(CRecordset::snapshot,sql,CRecordset::readOnly))
		return false;
	if(m_todownloadurl->IsBOF()&&m_todownloadurl->IsEOF())
	{
		m_todownloadurl->Close();
		//return true;
	}
	else
	{
		m_todownloadurl->Close();
		return false;
	}
	
	sql.Format(_T("%s%s%s"),"select * from doneurl where url='",strPage,"'");
	if(!m_doneurl->Open(CRecordset::snapshot,sql,CRecordset::readOnly))
		return false;
	if(m_doneurl->IsBOF()&&m_doneurl->IsEOF())
	{
		m_doneurl->Close();
		//return true;
	}
	else
	{
		m_doneurl->Close();
		return false;	
	}
	//用来查找相www.china.com类型的url
	sql.Format(_T("%s%s%s"),"select * from doneurl where url='",strPage,"'");
	if(!m_doneurl->Open(CRecordset::snapshot,sql,CRecordset::readOnly))
		return false;
	if(m_doneurl->IsBOF()&&m_doneurl->IsEOF())
	{
		m_doneurl->Close();
		return true;
	}
	else
	{
		m_doneurl->Close();
		return false;	
	}

}


//本函数要被视图事件处理函数调用
void CImageDoc::BeginToDownLoad()
{
	//在这里进行判断是否要进行下载。这里省去判断
	POSITION pos=GetFirstViewPosition();
	CImageView * pView =(CImageView *) GetNextView(pos);
	
	m_pProgress=pView->GetProgress();
	m_pProgress->SetActionTitle("新建文档");
	
	
	m_pThread=AfxBeginThread(DownloadThread,this);

}

bool CImageDoc::URLParser(LPCTSTR lpszURL)
{
	CString strServer,strObject,strUser,strPassword;
	INTERNET_PORT nPort;
	DWORD dwServiceType;
	if(AfxParseURLEx(lpszURL,dwServiceType,strServer,strObject,nPort,
		strUser,strPassword,ICU_NO_ENCODE))
	{
		m_strServer=strServer;
		m_strObject=strObject;
		return true;
	}
				
	return false;
	

}



void CImageDoc::MakeURLValid(CString &strURL)
{
	int i;
	CString temp;
	while((i=strURL.Find('\''))!=-1)
	{
		temp=strURL.Left(i);
		temp+=strURL.Mid(i+1);
		strURL=temp;		
	}
}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -