📄 preprocessdlg.cpp
字号:
// preprocessDlg.cpp : implementation file
//
#include "stdafx.h"
#include "utility.h"
#include "preprocess.h"
#include "preprocessDlg.h"
#include "Data.h"
#include "BstTreeNode.h"
#include "BstTree.h"
#include "Hash.h"
#include "Timer.h"
#include "MergHash.h"
#include "MergeHashElem.h"
#include "Segmentation.h"
#include <direct.h>
#include <fstream>
#include <string>
#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif
using namespace std;
const int MODOL = 2000;
/////////////////////////////////////////////////////////////////////////////
// CAboutDlg dialog used for App About
class CAboutDlg : public CDialog
{
public:
CAboutDlg();
// Dialog Data
//{{AFX_DATA(CAboutDlg)
enum { IDD = IDD_ABOUTBOX };
//}}AFX_DATA
// ClassWizard generated virtual function overrides
//{{AFX_VIRTUAL(CAboutDlg)
protected:
virtual void DoDataExchange(CDataExchange* pDX); // DDX/DDV support
//}}AFX_VIRTUAL
// Implementation
protected:
//{{AFX_MSG(CAboutDlg)
//}}AFX_MSG
DECLARE_MESSAGE_MAP()
};
CAboutDlg::CAboutDlg() : CDialog(CAboutDlg::IDD)
{
//{{AFX_DATA_INIT(CAboutDlg)
//}}AFX_DATA_INIT
}
void CAboutDlg::DoDataExchange(CDataExchange* pDX)
{
CDialog::DoDataExchange(pDX);
//{{AFX_DATA_MAP(CAboutDlg)
//}}AFX_DATA_MAP
}
BEGIN_MESSAGE_MAP(CAboutDlg, CDialog)
//{{AFX_MSG_MAP(CAboutDlg)
// No message handlers
//}}AFX_MSG_MAP
END_MESSAGE_MAP()
/////////////////////////////////////////////////////////////////////////////
// CPreprocessDlg dialog
CPreprocessDlg::CPreprocessDlg(CWnd* pParent /*=NULL*/)
: CDialog(CPreprocessDlg::IDD, pParent)
{
//{{AFX_DATA_INIT(CPreprocessDlg)
m_name = _T("");
m_desname = _T("");
m_srcname = _T("");
//}}AFX_DATA_INIT
// Note that LoadIcon does not require a subsequent DestroyIcon in Win32
m_hIcon = AfxGetApp()->LoadIcon(IDR_MAINFRAME);
}
void CPreprocessDlg::DoDataExchange(CDataExchange* pDX)
{
CDialog::DoDataExchange(pDX);
//{{AFX_DATA_MAP(CPreprocessDlg)
DDX_Text(pDX, IDC_EDIT2, m_name);
DDX_Text(pDX, IDC_EDITDES, m_desname);
DDX_Text(pDX, IDC_EDITSRC, m_srcname);
//}}AFX_DATA_MAP
}
BEGIN_MESSAGE_MAP(CPreprocessDlg, CDialog)
//{{AFX_MSG_MAP(CPreprocessDlg)
ON_WM_SYSCOMMAND()
ON_WM_PAINT()
ON_WM_QUERYDRAGICON()
ON_COMMAND(ID_HASH, OnHash)
ON_COMMAND(ID_INVERTED, OnInverted)
ON_COMMAND(ID_SEGANL, OnSeganl)
ON_COMMAND(IDR_CANCEL, OnCancel)
ON_COMMAND(IDC_MERGE, OnMerge)
//}}AFX_MSG_MAP
END_MESSAGE_MAP()
/////////////////////////////////////////////////////////////////////////////
// CPreprocessDlg message handlers
BOOL CPreprocessDlg::OnInitDialog()
{
CDialog::OnInitDialog();
InitCommonControls();
// Add "About..." menu item to system menu.
// IDM_ABOUTBOX must be in the system command range.
ASSERT((IDM_ABOUTBOX & 0xFFF0) == IDM_ABOUTBOX);
ASSERT(IDM_ABOUTBOX < 0xF000);
CMenu* pSysMenu = GetSystemMenu(FALSE);
if (pSysMenu != NULL)
{
CString strAboutMenu;
strAboutMenu.LoadString(IDS_ABOUTBOX);
if (!strAboutMenu.IsEmpty())
{
pSysMenu->AppendMenu(MF_SEPARATOR);
pSysMenu->AppendMenu(MF_STRING, IDM_ABOUTBOX, strAboutMenu);
}
}
// Set the icon for this dialog. The framework does this automatically
// when the application's main window is not a dialog
SetIcon(m_hIcon, TRUE); // Set big icon
SetIcon(m_hIcon, FALSE); // Set small icon
// TODO: Add extra initialization here
char path[200];
::GetCurrentDirectory(sizeof(path),path);
m_homedir = path;
return TRUE; // return TRUE unless you set the focus to a control
}
void CPreprocessDlg::OnSysCommand(UINT nID, LPARAM lParam)
{
if ((nID & 0xFFF0) == IDM_ABOUTBOX)
{
CAboutDlg dlgAbout;
dlgAbout.DoModal();
}
else
{
CDialog::OnSysCommand(nID, lParam);
}
}
// If you add a minimize button to your dialog, you will need the code below
// to draw the icon. For MFC applications using the document/view model,
// this is automatically done for you by the framework.
void CPreprocessDlg::OnPaint()
{
if (IsIconic())
{
CPaintDC dc(this); // device context for painting
SendMessage(WM_ICONERASEBKGND, (WPARAM) dc.GetSafeHdc(), 0);
// Center icon in client rectangle
int cxIcon = GetSystemMetrics(SM_CXICON);
int cyIcon = GetSystemMetrics(SM_CYICON);
CRect rect;
GetClientRect(&rect);
int x = (rect.Width() - cxIcon + 1) / 2;
int y = (rect.Height() - cyIcon + 1) / 2;
// Draw the icon
dc.DrawIcon(x, y, m_hIcon);
}
else
{
CDialog::OnPaint();
}
}
// The system calls this to obtain the cursor to display while the user drags
// the minimized window.
HCURSOR CPreprocessDlg::OnQueryDragIcon()
{
return (HCURSOR) m_hIcon;
}
void CPreprocessDlg::OnCancel()
{
// TODO: Add extra cleanup here
CDialog::OnCancel();
}
//建立hash索引
void CPreprocessDlg::OnHash()
{
// TODO: Add your command handler code here
Settime();
CString status;
status = _T("当前状态:建立hash");
SetDlgItemText(IDC_STATICS,status);
_chdir( (LPCTSTR)m_homedir);
fstream hash("hash.txt",ios::out);
CHash Hash;
CHashElem elem;
CData data;
fstream index;
index.open("BST.txt",ios::in);
while( !index.eof() ){
long num = 0;
string str;
char temp[100];
memset( temp,'\0',sizeof(temp) );
long pos = index.tellg();
index>>temp;
if( strlen(temp) == 0 )
index>>temp;
str = temp;
index>>num;
elem = CHashElem(str,pos);
Hash.insert( elem );
for( int i = 0; i < num; i++ )
index>>data;
}
index.close();
Hash.VisitAll( hash );
hash.close();
double time = Gettime();
status.Format("时间为:%lf s",time);
SetDlgItemText(IDC_STATICS,status);
}
//建立倒排索引
void CPreprocessDlg::OnInverted()
{
// TODO: Add your command handler code here
_chdir((LPCTSTR)m_homedir);
deldir("index");
_mkdir("index");
UpdateData(TRUE);
if(m_desname == "" || _chdir((LPCTSTR)m_desname)){
AfxMessageBox("找不到指定文件夹:"+m_desname);
return;
}
UpdateData(FALSE);
Settime();
CString status = _T("当前状态:建立倒排文件……");
SetDlgItemText(IDC_STATICS,status);
CBstTree tree;
CData data;
CBstTreeNode* node;
WIN32_FIND_DATA stFindClientData;
HANDLE hFindClient;
hFindClient = FindFirstFile( "*.*",&stFindClientData);
int count=0;
int now=0;
while ( FindNextFile( hFindClient,& stFindClientData) ){
if(strcmp( stFindClientData.cFileName,"..")!=0&&strcmp( stFindClientData.cFileName,".")!=0){//判断不是".",".."
fstream file;
now++;
count++;
file.open( stFindClientData.cFileName , ios::in );
if( file.fail() )
AfxMessageBox("Open file error!");
char firstline[1000];
file.getline(firstline,1000);
while( !file.eof() ){
string str;
char read[100];
long pos;
pos = file.tellg();
data = CData( pos,stFindClientData.cFileName );
file>>read;
str = read;
if( str.length() >= 4 ){
CBstTreeNode * tempnode = tree.find( str );
if( tempnode == NULL ){
node = new CBstTreeNode( str);
node->insert( data );
tree.insert( *node );
delete node;
}
else
tempnode->insert( data );
}
}
file.close();
}
if(!(count%MODOL) && count != 0){
now = 0;
fstream out;
char name[200];
sprintf(name,"..\\index\\%d.txt",count/MODOL);
out.open(name,ios::out);
tree.InOrder(tree.root,out);
out.close();
tree.RemoveAll();
}
}
if(now != 0){
char name[200];
fstream out;
sprintf(name,"..\\index\\%d.txt",count/MODOL+1);
out.open(name,ios::out);
tree.InOrder( tree.root ,out);
out.close();
}
FindClose(hFindClient);
double time = Gettime();
CString str;
status.Format("时间为:%lf s",time);
SetDlgItemText(IDC_STATICS,status);
}
//分析网页和切分词语
void CPreprocessDlg::OnSeganl()
{
// TODO: Add your command handler code here
int nFlag;
CString name;
UpdateData(TRUE);
name = m_homedir+"\\"+m_desname;
nFlag = _mkdir((LPCTSTR)name);
if(nFlag !=0){
AfxMessageBox("建立目的文件夹失败!");
return;
}
name = m_homedir+ "\\"+ m_srcname;
nFlag = _chdir((LPCTSTR)m_srcname);
if(nFlag !=0){
AfxMessageBox("不存在指定的文件夹!");
return;
}
Settime();
CString status = _T("当前状态:分析网页和切分词语");
SetDlgItemText(IDC_STATICS,status);
Segmentation SegFiles;
ifstream Dictfin;
char DictFileName[200] = "";
strcat(DictFileName,(LPCTSTR)m_homedir);
strcat(DictFileName,"\\字典.txt");
Dictfin.open(DictFileName);
if(Dictfin.fail ()){
AfxMessageBox("打开字典失败!");
return;
}
//建散列表,将词典散列到表中
SegFiles.BuildHash(Dictfin);
Dictfin.close ();
WIN32_FIND_DATA stFindClientData;
HANDLE hFindClient;
hFindClient = FindFirstFile( "*.*",&stFindClientData );
while( hFindClient != INVALID_HANDLE_VALUE ){//当文件夹非空时
if(!strcmp(stFindClientData.cFileName,".")||!strcmp(stFindClientData.cFileName,"..")){
//是文件夹,且不是自身和父文件夹,就递归
FindNextFile(hFindClient,&stFindClientData);
continue;
}
else{
string profile;
fstream file;
fstream result;
string read;
string r;
string cResult;
file.open (stFindClientData.cFileName,ios::in);
if(file.fail ()){
AfxMessageBox("打开文件失败!");
return;
}
CString ResFileName;
ResFileName = m_homedir+"\\"+m_desname+"\\"+stFindClientData.cFileName;
result.open(ResFileName,ios::out);
if(result.fail ()){
AfxMessageBox("创建文件失败!");
return;
}
file >> read;
//如果是链接,加上协议保留
if(read.find ("http://") == -1)
read = "http://" + read;
result << read << "\n";
while(!file.eof ()){
file >> read;
r+= read;
}
if(!GetHTMLText(r,profile)){
result.close();
file.close();
return;
}
string t;
t = ProcessingFile(profile);
cResult = SegFiles.Segment(t);
result << cResult;
result.close ();
file.close();
if(!FindNextFile(hFindClient,&stFindClientData))
break;
}
}
//关闭句柄
FindClose( hFindClient);
status = _T("当前状态:网页分析切分词语结束");
SetDlgItemText(IDC_STATICS,status);
double time = Gettime();
status.Format("时间为:%lf s",time);
SetDlgItemText(IDC_STATICS,status);
UpdateData(FALSE);
}
void CPreprocessDlg::OnMerge()
{
// TODO: Add your command handler code here
CString dir;
dir = m_homedir + _T("\\index");
_chdir((LPCTSTR)dir);
WIN32_FIND_DATA stFindClientData;
HANDLE hFindClient;
hFindClient = FindFirstFile( "*.*",&stFindClientData);
CString status;
CMergHash hash;
while ( FindNextFile( hFindClient,& stFindClientData ) )
{
if( strcmp( stFindClientData.cFileName,"..")!=0
&&strcmp( stFindClientData.cFileName,".")!=0)//判断不是".",".."//
{
fstream out;
out.open(stFindClientData.cFileName,ios::in);
status = _T("正在处理:")+CString(stFindClientData.cFileName);
SetDlgItemText(IDC_STATICS,status);
string name = stFindClientData.cFileName;
while( !out.eof() )
{
CMergeHashElem elem;
string key;
char temp[100];
long num,pos;
pos = out.tellg();
out>>temp;
key = temp;
out>>num;
elem = CMergeHashElem( name,key.c_str(), pos );
hash.insert( elem );
for( int j = 0; j< num; j++ )
{
out>>temp;
out>>pos;
}
}
out.close();
}
}
status = _T("正在写入倒排文件:BST.txt...");
SetDlgItemText(IDC_STATICS,status);
fstream file("..\\BST.txt",ios::out);
hash.VisitAll(file);
file.close();
status = _T("处理结束。");
SetDlgItemText(IDC_STATICS,status);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -