⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 preprocessdlg.cpp

📁 此代码运行于visual c++ 6.0的环境下
💻 CPP
字号:
// preprocessDlg.cpp : implementation file
//

#include "stdafx.h"
#include "utility.h"
#include "preprocess.h"
#include "preprocessDlg.h"
#include "Data.h"
#include "BstTreeNode.h"
#include "BstTree.h"
#include "Hash.h"
#include "Timer.h"
#include "MergHash.h"
#include "MergeHashElem.h"
#include "Segmentation.h"
#include <direct.h>
#include <fstream>
#include <string>
#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif
using namespace std;
const int MODOL = 2000;
/////////////////////////////////////////////////////////////////////////////
// CAboutDlg dialog used for App About

class CAboutDlg : public CDialog
{
public:
	CAboutDlg();

// Dialog Data
	//{{AFX_DATA(CAboutDlg)
	enum { IDD = IDD_ABOUTBOX };
	//}}AFX_DATA

	// ClassWizard generated virtual function overrides
	//{{AFX_VIRTUAL(CAboutDlg)
	protected:
	virtual void DoDataExchange(CDataExchange* pDX);    // DDX/DDV support
	//}}AFX_VIRTUAL

// Implementation
protected:
	//{{AFX_MSG(CAboutDlg)
	//}}AFX_MSG
	DECLARE_MESSAGE_MAP()
};

CAboutDlg::CAboutDlg() : CDialog(CAboutDlg::IDD)
{
	//{{AFX_DATA_INIT(CAboutDlg)
	//}}AFX_DATA_INIT
}

void CAboutDlg::DoDataExchange(CDataExchange* pDX)
{
	CDialog::DoDataExchange(pDX);
	//{{AFX_DATA_MAP(CAboutDlg)
	//}}AFX_DATA_MAP
}

BEGIN_MESSAGE_MAP(CAboutDlg, CDialog)
	//{{AFX_MSG_MAP(CAboutDlg)
		// No message handlers
	//}}AFX_MSG_MAP
END_MESSAGE_MAP()

/////////////////////////////////////////////////////////////////////////////
// CPreprocessDlg dialog

CPreprocessDlg::CPreprocessDlg(CWnd* pParent /*=NULL*/)
	: CDialog(CPreprocessDlg::IDD, pParent)
{
	//{{AFX_DATA_INIT(CPreprocessDlg)
	m_name = _T("");
	m_desname = _T("");
	m_srcname = _T("");
	//}}AFX_DATA_INIT
	// Note that LoadIcon does not require a subsequent DestroyIcon in Win32
	m_hIcon = AfxGetApp()->LoadIcon(IDR_MAINFRAME);
}

void CPreprocessDlg::DoDataExchange(CDataExchange* pDX)
{
	CDialog::DoDataExchange(pDX);
	//{{AFX_DATA_MAP(CPreprocessDlg)
	DDX_Text(pDX, IDC_EDIT2, m_name);
	DDX_Text(pDX, IDC_EDITDES, m_desname);
	DDX_Text(pDX, IDC_EDITSRC, m_srcname);
	//}}AFX_DATA_MAP
}

BEGIN_MESSAGE_MAP(CPreprocessDlg, CDialog)
	//{{AFX_MSG_MAP(CPreprocessDlg)
	ON_WM_SYSCOMMAND()
	ON_WM_PAINT()
	ON_WM_QUERYDRAGICON()
	ON_COMMAND(ID_HASH, OnHash)
	ON_COMMAND(ID_INVERTED, OnInverted)
	ON_COMMAND(ID_SEGANL, OnSeganl)
	ON_COMMAND(IDR_CANCEL, OnCancel)
	ON_COMMAND(IDC_MERGE, OnMerge)
	//}}AFX_MSG_MAP
END_MESSAGE_MAP()

/////////////////////////////////////////////////////////////////////////////
// CPreprocessDlg message handlers

BOOL CPreprocessDlg::OnInitDialog()
{
	CDialog::OnInitDialog();
	InitCommonControls();
	// Add "About..." menu item to system menu.

	// IDM_ABOUTBOX must be in the system command range.
	ASSERT((IDM_ABOUTBOX & 0xFFF0) == IDM_ABOUTBOX);
	ASSERT(IDM_ABOUTBOX < 0xF000);

	CMenu* pSysMenu = GetSystemMenu(FALSE);
	if (pSysMenu != NULL)
	{
		CString strAboutMenu;
		strAboutMenu.LoadString(IDS_ABOUTBOX);
		if (!strAboutMenu.IsEmpty())
		{
			pSysMenu->AppendMenu(MF_SEPARATOR);
			pSysMenu->AppendMenu(MF_STRING, IDM_ABOUTBOX, strAboutMenu);
		}
	}

	// Set the icon for this dialog.  The framework does this automatically
	//  when the application's main window is not a dialog
	SetIcon(m_hIcon, TRUE);			// Set big icon
	SetIcon(m_hIcon, FALSE);		// Set small icon
	
	// TODO: Add extra initialization here
	char path[200];
	::GetCurrentDirectory(sizeof(path),path);
	m_homedir = path;
	
	return TRUE;  // return TRUE  unless you set the focus to a control
}

void CPreprocessDlg::OnSysCommand(UINT nID, LPARAM lParam)
{
	if ((nID & 0xFFF0) == IDM_ABOUTBOX)
	{
		CAboutDlg dlgAbout;
		dlgAbout.DoModal();
	}
	else
	{
		CDialog::OnSysCommand(nID, lParam);
	}
}

// If you add a minimize button to your dialog, you will need the code below
//  to draw the icon.  For MFC applications using the document/view model,
//  this is automatically done for you by the framework.

void CPreprocessDlg::OnPaint() 
{
	if (IsIconic())
	{
		CPaintDC dc(this); // device context for painting

		SendMessage(WM_ICONERASEBKGND, (WPARAM) dc.GetSafeHdc(), 0);

		// Center icon in client rectangle
		int cxIcon = GetSystemMetrics(SM_CXICON);
		int cyIcon = GetSystemMetrics(SM_CYICON);
		CRect rect;
		GetClientRect(&rect);
		int x = (rect.Width() - cxIcon + 1) / 2;
		int y = (rect.Height() - cyIcon + 1) / 2;

		// Draw the icon
		dc.DrawIcon(x, y, m_hIcon);
	}
	else
	{
		CDialog::OnPaint();
	}
}

// The system calls this to obtain the cursor to display while the user drags
//  the minimized window.
HCURSOR CPreprocessDlg::OnQueryDragIcon()
{
	return (HCURSOR) m_hIcon;
}

void CPreprocessDlg::OnCancel() 
{
	// TODO: Add extra cleanup here
	
	CDialog::OnCancel();
}


//建立hash索引
void CPreprocessDlg::OnHash() 
{
	// TODO: Add your command handler code here
		Settime();
		CString status;
		status = _T("当前状态:建立hash");
		SetDlgItemText(IDC_STATICS,status);
		_chdir( (LPCTSTR)m_homedir);
		fstream hash("hash.txt",ios::out);
		CHash Hash;
		CHashElem elem;
		CData data;
		fstream index;
		index.open("BST.txt",ios::in);
		while( !index.eof() ){
			long num = 0;
			string str;
			char temp[100];
			memset( temp,'\0',sizeof(temp) );
			long pos = index.tellg();
			index>>temp;
			if( strlen(temp) == 0 )
				index>>temp;
			str = temp;
			index>>num;
			elem = CHashElem(str,pos);
			Hash.insert( elem );
			for( int i = 0; i < num; i++ )
				index>>data;

		}
		index.close();
		Hash.VisitAll( hash );
		hash.close();
		double time = Gettime();
		status.Format("时间为:%lf s",time);
		SetDlgItemText(IDC_STATICS,status);
}


//建立倒排索引
void CPreprocessDlg::OnInverted() 
{
	// TODO: Add your command handler code here
	_chdir((LPCTSTR)m_homedir);
	deldir("index");
	_mkdir("index");
	UpdateData(TRUE);
	if(m_desname == "" || _chdir((LPCTSTR)m_desname)){
		AfxMessageBox("找不到指定文件夹:"+m_desname);
		return;
	}
	UpdateData(FALSE);
	Settime();
	CString status = _T("当前状态:建立倒排文件……");
	SetDlgItemText(IDC_STATICS,status);
	CBstTree tree;
	CData data;	
	CBstTreeNode* node;
	WIN32_FIND_DATA stFindClientData;
	HANDLE hFindClient;
	hFindClient = FindFirstFile( "*.*",&stFindClientData);	
	int count=0;
	int now=0;
	while ( FindNextFile( hFindClient,& stFindClientData) ){
		if(strcmp( stFindClientData.cFileName,"..")!=0&&strcmp( stFindClientData.cFileName,".")!=0){//判断不是".",".."
			fstream file;
			now++;
			count++;
			file.open( stFindClientData.cFileName , ios::in );
			if( file.fail() )
				AfxMessageBox("Open file error!");
			char firstline[1000];
			file.getline(firstline,1000);
			while( !file.eof() ){
				string str;
				char read[100];
				long pos;
				pos = file.tellg();
				data = CData( pos,stFindClientData.cFileName );			
				file>>read;
				str = read;
				if( str.length() >= 4 ){
					CBstTreeNode * tempnode = tree.find( str );
					if( tempnode == NULL ){
						node = new CBstTreeNode( str);
						node->insert( data );
						tree.insert( *node );
						delete node;
					}
					else
						tempnode->insert( data );
				}
			}
			file.close();
		}
		if(!(count%MODOL) && count != 0){
				now = 0;
				fstream out;
				char name[200];
				sprintf(name,"..\\index\\%d.txt",count/MODOL);
				out.open(name,ios::out);
				tree.InOrder(tree.root,out);
				out.close();
				tree.RemoveAll();
		}
	}
	if(now != 0){
		char name[200];
		fstream out;
		sprintf(name,"..\\index\\%d.txt",count/MODOL+1);
		out.open(name,ios::out);
		tree.InOrder( tree.root ,out);
		out.close();
	}
	FindClose(hFindClient);
	double time = Gettime();
	CString str;
	status.Format("时间为:%lf s",time);
	SetDlgItemText(IDC_STATICS,status);
}

//分析网页和切分词语
void CPreprocessDlg::OnSeganl() 
{
	// TODO: Add your command handler code here
	int nFlag;
	CString name;
	UpdateData(TRUE);
	name = m_homedir+"\\"+m_desname;
	nFlag = _mkdir((LPCTSTR)name);
	if(nFlag !=0){
		AfxMessageBox("建立目的文件夹失败!");
		return;
	}
	name = m_homedir+ "\\"+ m_srcname;
	nFlag = _chdir((LPCTSTR)m_srcname);
	if(nFlag !=0){
		AfxMessageBox("不存在指定的文件夹!");
		return;
	}
	Settime();
	CString status = _T("当前状态:分析网页和切分词语");
	SetDlgItemText(IDC_STATICS,status);

	Segmentation SegFiles;
	ifstream Dictfin;
	char DictFileName[200] = "";
	strcat(DictFileName,(LPCTSTR)m_homedir);
	strcat(DictFileName,"\\字典.txt");
	Dictfin.open(DictFileName);
	if(Dictfin.fail ()){
		AfxMessageBox("打开字典失败!");
		return;
	}
	//建散列表,将词典散列到表中
	SegFiles.BuildHash(Dictfin);
	Dictfin.close ();

	WIN32_FIND_DATA stFindClientData;		
	HANDLE hFindClient;

	hFindClient = FindFirstFile( "*.*",&stFindClientData );

	while( hFindClient != INVALID_HANDLE_VALUE ){//当文件夹非空时
		if(!strcmp(stFindClientData.cFileName,".")||!strcmp(stFindClientData.cFileName,"..")){
			//是文件夹,且不是自身和父文件夹,就递归	
			FindNextFile(hFindClient,&stFindClientData);
			continue;
		}	   
		else{
			string profile;
			fstream file;
			fstream result;
			string read;
			string r;
			string cResult;

			file.open (stFindClientData.cFileName,ios::in);
			if(file.fail ()){
				AfxMessageBox("打开文件失败!");
				return;
			}

			CString ResFileName;
			ResFileName = m_homedir+"\\"+m_desname+"\\"+stFindClientData.cFileName;
			result.open(ResFileName,ios::out);
			if(result.fail ()){
				AfxMessageBox("创建文件失败!");
				return;
			}
			file >> read;

			//如果是链接,加上协议保留
			if(read.find ("http://") == -1)
				read = "http://" + read;
			result << read << "\n";

			while(!file.eof ()){
				file >> read;
				r+= read;
			}

			if(!GetHTMLText(r,profile)){
				result.close();
				file.close();
				return;
			}
			string t; 	
			t = ProcessingFile(profile);
			cResult = SegFiles.Segment(t);
			result << cResult;
			result.close ();
			file.close();
			if(!FindNextFile(hFindClient,&stFindClientData))
				break;
		}
	}
	//关闭句柄
	FindClose( hFindClient);
	status = _T("当前状态:网页分析切分词语结束");
	SetDlgItemText(IDC_STATICS,status);
	double time = Gettime();
	status.Format("时间为:%lf s",time);
	SetDlgItemText(IDC_STATICS,status);
	UpdateData(FALSE);
}

void CPreprocessDlg::OnMerge() 
{
	// TODO: Add your command handler code here
	CString dir;
	dir = m_homedir + _T("\\index");
	_chdir((LPCTSTR)dir);
	WIN32_FIND_DATA stFindClientData;
	HANDLE hFindClient;
	hFindClient = FindFirstFile( "*.*",&stFindClientData);
	CString status;
	CMergHash hash;
	while ( FindNextFile( hFindClient,& stFindClientData ) )
	{
		if( strcmp( stFindClientData.cFileName,"..")!=0
			&&strcmp( stFindClientData.cFileName,".")!=0)//判断不是".",".."//
		{
			fstream out;
			out.open(stFindClientData.cFileName,ios::in);
			status = _T("正在处理:")+CString(stFindClientData.cFileName);
			SetDlgItemText(IDC_STATICS,status);
			string name = stFindClientData.cFileName;
			while( !out.eof() )
			{
				CMergeHashElem elem;
				string key;
				char temp[100];
				long num,pos;
				pos = out.tellg();
				out>>temp;

				key = temp;
				out>>num;
				elem = CMergeHashElem( name,key.c_str(), pos );
				hash.insert( elem );
				for( int j = 0; j< num; j++ )
				{
					out>>temp;
					out>>pos;
				}
			}
			out.close();
		}
	}
	status = _T("正在写入倒排文件:BST.txt...");
	SetDlgItemText(IDC_STATICS,status);
	fstream file("..\\BST.txt",ios::out);
	hash.VisitAll(file);
	file.close();
	status = _T("处理结束。");
	SetDlgItemText(IDC_STATICS,status);
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -