webextract.cpp
来自「从htm/html格式的网页文件中提取内容。将要提取内容的网页文件用鼠标拖入窗口」· C++ 代码 · 共 69 行
CPP
69 行
#include "StdAfx.h"
#include "stdio.h"
#include "Extract.h"
#include "time.h"
void funProcessDir(string strDir);
int main(int argc,char *argv[])
{
char strFilename[100];
Extract extract;
cout << "***********************************************" << endl ;
cout << "§ Function: §" << endl ;
cout << "§ Extract contents from htm/html files. §" << endl ;
cout << "§ 网页内容提取 Version1.0 §" << endl ;
cout << "***********************************************" << endl ;
cout << "************************ ablenavy 2006-07-20***" << endl<<endl ;
cout << " 请输入文件夹或文件名,按回车键。 " << endl ;
cout << " =========支持鼠标拖拽==========" << endl ;
gets(strFilename);
string str = strFilename;
string strDest = "";
int nPos;
clock_t start, finish;
start=clock();
//strDest="E:\aa";
if (!strDest.empty())
{
while (nPos != -1) {
strDest = strDest.substr(0,nPos) + "\\" + strDest.substr(nPos) ;
nPos = strDest.find("\\",nPos+2);
}
strDest += "\\" ;
}
nPos = str.find("\\");
while (nPos != -1) {
str = str.substr(0,nPos) + "\\" + str.substr(nPos) ;
nPos = str.find("\\",nPos+2);
}
if (str.find("\"") != -1)
{
str = str.substr(1,str.length()-2);
}
if (str.find(".") != -1)
{
extract.funProcessFile(str);
}
else
{
str += "\\" ;
//extract.funProcessDirectory(str,"","htm");
//extract.funProcessDirectory(str,"","html");
extract.funProcessDirectory(str,strDest,"htm");
extract.funProcessDirectory(str,strDest,"html");
}
finish=clock();
float nTime=(float)(finish-start)/CLOCKS_PER_SEC;
cout << endl << "OK ! It took " << nTime << " seconds." << endl;
getchar();
getchar();
return 0;
}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?