⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 parselink_d.cpp

📁 此源码功能是捕获网页中的链接并进行分析
💻 CPP
字号:
/***************************************************************************                          ParseLink_d.cpp  -  description                             -------------------    begin                : April 3, 2005    copyright            : (C) 2005 by Tianwang    ParseLink_d: ParseLink links from raw data.    usage: ParseLink_d DestDir SrcDir ***************************************************************************/#include <sys/types.h>#include <sys/stat.h>#include <unistd.h>#include <dirent.h>#include <iostream>#include <string>#include <vector>#include <cstdlib>#include <time.h>using namespace std;void NormallizeDir(string& strPathName);string f_strSrcDir  = ".";string f_strDestDir  = ".";int main(int argc, char* argv[]){	if( argc < 3 ){		cout << "ExtractUrl_d: Extract Urls from raw data." << endl;		cout << "usage:\t ExtractUrl_d DestDbDir RawDataDir" << endl;		return -1;	}	f_strDestDir = argv[1];	f_strSrcDir = argv[2];	NormallizeDir(f_strSrcDir);	NormallizeDir(f_strDestDir);	DIR* pDir = opendir(f_strSrcDir.c_str());	if(pDir == NULL){ 		std::cout << "Can't open directory \"" << f_strSrcDir << "\"" << std::endl; 		return -2;        }	dirent* pDirent;        bool bSuccess = true;        vector<string> vstrDirFiles;        while((pDirent = readdir(pDir)) != NULL){                if((strcmp(pDirent->d_name, ".") == 0)                                || (strcmp(pDirent->d_name, "..") == 0)){                        continue;                }		string strFileName = pDirent->d_name;                if(strFileName.length() < 4){			continue;		}		vstrDirFiles.push_back(strFileName);	}	closedir(pDir);	sort(vstrDirFiles.begin(), vstrDirFiles.end());	vector<string>::iterator itr;	for(itr=vstrDirFiles.begin(); itr!=vstrDirFiles.end(); itr++){		string strFileName = *itr;		if(strFileName.find("Tianwang.raw.") == 0){			string strSrcFile = f_strSrcDir + "/" + strFileName;			string strDestFile = f_strDestDir + "/log." + strFileName;			string strCmd = "ParseLink "+strSrcFile+ " > " + strDestFile;			cout << strCmd << endl;			int nRes1 = system(strCmd.c_str());			if(nRes1 == 0){				cout << "Success to extract " << strFileName << "!" << endl;			} else {				bSuccess = false;				cout << "Fail to extract " << strFileName << "!" << endl;			}		}	}}void NormallizeDir(string& strPathName){	if(strPathName[strPathName.length()-1] == '/'){		strPathName = strPathName.substr(0, strPathName.length()-1);	}}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -