⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 分词实验dlg.cpp

📁 分词实验,能够对一句话准确的分辨出中文单词,采用VC6开发
💻 CPP
字号:
// 分词实验Dlg.cpp : implementation file
//

#include "stdafx.h"
#include "分词实验.h"
#include "分词实验Dlg.h"

#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif

/////////////////////////////////////////////////////////////////////////////
// CAboutDlg dialog used for App About

class CAboutDlg : public CDialog
{
public:
	CAboutDlg();

// Dialog Data
	//{{AFX_DATA(CAboutDlg)
	enum { IDD = IDD_ABOUTBOX };
	//}}AFX_DATA

	// ClassWizard generated virtual function overrides
	//{{AFX_VIRTUAL(CAboutDlg)
	protected:
	virtual void DoDataExchange(CDataExchange* pDX);    // DDX/DDV support
	//}}AFX_VIRTUAL

// Implementation
protected:
	//{{AFX_MSG(CAboutDlg)
	//}}AFX_MSG
	DECLARE_MESSAGE_MAP()
};

CAboutDlg::CAboutDlg() : CDialog(CAboutDlg::IDD)
{
	//{{AFX_DATA_INIT(CAboutDlg)
	//}}AFX_DATA_INIT
}

void CAboutDlg::DoDataExchange(CDataExchange* pDX)
{
	CDialog::DoDataExchange(pDX);
	//{{AFX_DATA_MAP(CAboutDlg)
	//}}AFX_DATA_MAP
}

BEGIN_MESSAGE_MAP(CAboutDlg, CDialog)
	//{{AFX_MSG_MAP(CAboutDlg)
		// No message handlers
	//}}AFX_MSG_MAP
END_MESSAGE_MAP()

/////////////////////////////////////////////////////////////////////////////
// CMyDlg dialog

CMyDlg::CMyDlg(CWnd* pParent /*=NULL*/)
	: CDialog(CMyDlg::IDD, pParent)
{
	//{{AFX_DATA_INIT(CMyDlg)
	m_isentence = _T("");
	m_losentence = _T("");
	m_rosentence = _T("");
	m_osentence = _T("");
	//}}AFX_DATA_INIT
	// Note that LoadIcon does not require a subsequent DestroyIcon in Win32
	m_hIcon = AfxGetApp()->LoadIcon(IDR_MAINFRAME);
}

void CMyDlg::DoDataExchange(CDataExchange* pDX)
{
	CDialog::DoDataExchange(pDX);
	//{{AFX_DATA_MAP(CMyDlg)
	DDX_Text(pDX, IDC_INPUT, m_isentence);
	DDX_Text(pDX, IDC_LOUTPUT, m_losentence);
	DDX_Text(pDX, IDC_ROUTPUT, m_rosentence);
	DDX_Text(pDX, IDC_OSENTENCE, m_osentence);
	//}}AFX_DATA_MAP
}

BEGIN_MESSAGE_MAP(CMyDlg, CDialog)
	//{{AFX_MSG_MAP(CMyDlg)
	ON_WM_SYSCOMMAND()
	ON_WM_PAINT()
	ON_WM_QUERYDRAGICON()
	ON_BN_CLICKED(IDC_CLOSE, OnClose)
	ON_BN_CLICKED(IDC_SURE, OnSure)
	//}}AFX_MSG_MAP
END_MESSAGE_MAP()

/////////////////////////////////////////////////////////////////////////////
// CMyDlg message handlers

BOOL CMyDlg::OnInitDialog()
{
	CDialog::OnInitDialog();

	// Add "About..." menu item to system menu.

	// IDM_ABOUTBOX must be in the system command range.
	ASSERT((IDM_ABOUTBOX & 0xFFF0) == IDM_ABOUTBOX);
	ASSERT(IDM_ABOUTBOX < 0xF000);

	CMenu* pSysMenu = GetSystemMenu(FALSE);
	if (pSysMenu != NULL)
	{
		CString strAboutMenu;
		strAboutMenu.LoadString(IDS_ABOUTBOX);
		if (!strAboutMenu.IsEmpty())
		{
			pSysMenu->AppendMenu(MF_SEPARATOR);
			pSysMenu->AppendMenu(MF_STRING, IDM_ABOUTBOX, strAboutMenu);
		}
	}

	// Set the icon for this dialog.  The framework does this automatically
	//  when the application's main window is not a dialog
	SetIcon(m_hIcon, TRUE);			// Set big icon
	SetIcon(m_hIcon, FALSE);		// Set small icon
	
	// TODO: Add extra initialization here
	
	return TRUE;  // return TRUE  unless you set the focus to a control
}

void CMyDlg::OnSysCommand(UINT nID, LPARAM lParam)
{
	if ((nID & 0xFFF0) == IDM_ABOUTBOX)
	{
		CAboutDlg dlgAbout;
		dlgAbout.DoModal();
	}
	else
	{
		CDialog::OnSysCommand(nID, lParam);
	}
}

// If you add a minimize button to your dialog, you will need the code below
//  to draw the icon.  For MFC applications using the document/view model,
//  this is automatically done for you by the framework.

void CMyDlg::OnPaint() 
{
	if (IsIconic())
	{
		CPaintDC dc(this); // device context for painting

		SendMessage(WM_ICONERASEBKGND, (WPARAM) dc.GetSafeHdc(), 0);

		// Center icon in client rectangle
		int cxIcon = GetSystemMetrics(SM_CXICON);
		int cyIcon = GetSystemMetrics(SM_CYICON);
		CRect rect;
		GetClientRect(&rect);
		int x = (rect.Width() - cxIcon + 1) / 2;
		int y = (rect.Height() - cyIcon + 1) / 2;

		// Draw the icon
		dc.DrawIcon(x, y, m_hIcon);
	}
	else
	{
		CDialog::OnPaint();
	}
}

// The system calls this to obtain the cursor to display while the user drags
//  the minimized window.
HCURSOR CMyDlg::OnQueryDragIcon()
{
	return (HCURSOR) m_hIcon;
}

void CMyDlg::OnClose() 
{
	// TODO: Add your control notification handler code here
	CDialog::OnOK();
}


//进行分词处理;
void CMyDlg::OnSure() 
{
	// TODO: Add your control notification handler code here
	int  t=1;
	MAX_LENGTH=10;
	UpdateData(TRUE);//从窗口读取输入;

    if(m_isentence.IsEmpty())//输入为空给出提示;
	{
		MessageBox("请输入分词语句!",NULL,MB_OK);
	}
	while(!m_isentence.IsEmpty())//输入不空则进行分词
	{
		m_losentence.Empty();//左匹配输出编辑框初始化;
	    m_rosentence.Empty();//右匹配输出编辑框初始化;
		m_osentence.Empty();//匹配结果输出编辑框初始化;
	    m_losentence=LtoRMatching(m_isentence);//从左至右进行分词;
	    m_losentence=m_losentence.Left(m_losentence.GetLength()-1);//将左分词结果末尾的空格去除;
	    
		m_rosentence=RtoLMatching(m_isentence);//从右至左进行分词;
        m_rosentence=m_rosentence.Mid(1);//将右分词的结果的开头的空格去除;

	    t=m_losentence.Compare(m_rosentence);//将左分词结果和右分词结果进行比较,相等则输出最终结果,不等则句子歧义;
	    if(t==0)  m_osentence=m_losentence;
	    else  m_osentence="该句有歧义,无法进行分词!";
		m_isentence.Empty();//将输入清控;
	}
	
	UpdateData(FALSE);
}




//将分割的字符串在数据库中进行查找;
int CMyDlg::Find(CString ostr)
{
    _variant_t vFieldValue;//定义一个变量保存记录集结果;
	CString   result;//定义一个变量保存字段结果;
	CString	strsql;//定义变量保存查询语句;
	CString   sign="'";
	//设置SQL语句;
    strsql="SELECT WORD FROM List WHERE WORD=" +sign;
    strsql=strsql+ostr;
    strsql=strsql+sign;

	CoInitialize(NULL);
	_ConnectionPtr  pConn(__uuidof(Connection));
	_RecordsetPtr   pRst(__uuidof(Recordset));

	pConn->ConnectionString="File Name=conn.udl";
	pConn->Open("","","",-1);//打开数据库;
	pRst=pConn->Execute((_bstr_t)strsql,NULL,adCmdText);
	if(pRst->rsEOF)  Isfind=1
	else  Isfind=0;

	pRst->Close();//关闭结果集;
	pConn->Close();//关闭连接;
	pRst.Release();//释放结果集对象指针;
	pConn.Release();
	CoUninitialize();
   return  Isfind; 
}


//中文从右到左进行查找;
CString CMyDlg::LtoRCheck(CString ss1)
{
	CString   ss2="";

	int  length;//定义变量保存输入串长度;
	int  i;//定义变量,控制循环;
	int  mem;//定义变量来保存改变后的串的长度;
	int quertyresult=1;//声明一个整型变量保存查询结果,0为在数据库中找到,1为没有找到,初始化为1;
	CString   midstr;
    length=ss1.GetLength();

// 	if(length<MAX_LENGTH)  MAX_LENGTH=length;//如果句子长度小于最大分词长度,则调整最大分词长度
    while(!ss1.IsEmpty())//输入串不空则分割查找;
	{   
		  mem=length;
          i=MAX_LENGTH;
		  while(i>1&&quertyresult!=0)//当没有匹配时则继续;
			   {
                 midstr=ss1.Left(i); //从左取出切分串;
				 quertyresult=Find(midstr);//调用查找函数在数据库中查找;
				 if(quertyresult==0)//如果找到,则记录结果;
				 {
                   ss2=ss2+midstr;
				   ss2=ss2+" ";
                   mem=length-i;//找到一个匹配的串,求出剩余串的长度;
				 }
                 if(i==2&&quertyresult!=0) i--;//如果分词长度为1且没有找到,则将i减1,便于后续处理;
				 else  i=i-2;
			   }
		  if(i==1||i==2&&quertyresult!=0)//如果分词长度为1且没有找到,则将该字符作为一个切分结果字符;
		  {
			  midstr=ss1.Left(2);
			  ss2=ss2+midstr;
		      ss2=ss2+" ";
			  mem=mem-2;
		  }
		  ss1=ss1.Right(mem);//将输入串中已匹配的部分从输入中移除,便于继续切分;
		  length=ss1.GetLength();//修改输入串的长度;
          quertyresult=1;//查询结果重置为没有找到,使继续切分查找;
	}
    return  ss2;
}

//从左到右进行分词;
CString CMyDlg::LtoRMatching(CString s1)
{
    CString  s2="";	
	int   i,length;
	while(!s1.IsEmpty())
	{
		length=s1.GetLength();
		unsigned  char  ch=(unsigned  char) s1[0];
		if(ch<128)//西文字符;
		{
			i=1;
			while(i<length&&(unsigned char) s1[i]<128)  i++;
            s2=s2+s1.Left(i);
			s2=s2+" ";
			s1=s1.Mid(i);
			continue;
		}
		else if(ch<176)//中文标点等字符;
		{
			s2=s2+s1.Left(2);
			s2=s2+" ";
            s1=s1.Mid(2);
			continue;
		}
		//处理中文字符;
		i=2;
		while(i<length&&(unsigned  char)s1[i]>=176)
			i+=2;
		s2=s2+LtoRCheck(s1.Left(i));
		s1=s1.Right(length-i);
	}
	return   s2;
}


//从右到左进行分词
CString CMyDlg::RtoLMatching(CString s1)
{
    CString  s2="";
	int     i,length;
   
	while(!s1.IsEmpty())
	{
		length=s1.GetLength();
		unsigned  char  ch=(unsigned  char) s1[length-1];
        unsigned  char  th=(unsigned  char) s1[length-2];

		if(ch<128)//西文字符;
		{
			i=length-1;
			while(i>=0&&(unsigned char)s1[i]<128)  i--;
			s2=s1.Right(length-i-1)+s2;
            s2=" "+s2;
			s1=s1.Left(i+1);
			continue;
		}
		else if(ch<176&&th<176)//中文标点等字符;
		{
			s2=s1.Right(2)+s2;
			s2=" "+s2;
			s1=s1.Left(length-2);
			continue;
		}
		//处理中文字符;
		i=length-4;
		while(i>=0&&(unsigned  char)s1[i]>=176) i-=2;
	    s2=RtoLCheck(s1.Right(length-i-2))+s2;
        s1=s1.Left(i+2);
	}
	return   s2;
}


//中文从右到左进行查找;
CString CMyDlg::RtoLCheck(CString ss1)
{
     CString  ss2="";

	int  length;//定义变量保存输入串长度;
	int  i;//定义变量,控制循环;
	int  mem;//定义变量来保存改变后的串的长度;
	int quertyresult=1;//声明一个整型变量保存查询结果,0为在数据库中找到,1为没有找到,初始化为1;
	CString   midstr;
    length=ss1.GetLength();

// 	if(length<MAX_LENGTH)  MAX_LENGTH=length;//如果句子长度小于最大分词长度,则调整最大分词长度
    while(!ss1.IsEmpty())//输入串不空则分割查找;
	{    
		  mem=length;
          i=MAX_LENGTH;
		  while(i>1&&quertyresult!=0)//当没有匹配时则继续;
			   {
                 midstr=ss1.Right(i); //从右取出切分串;
				 quertyresult=Find(midstr);//调用查找函数在数据库中查找;
				 if(quertyresult==0)//如果找到,则记录结果;
				 {
                   ss2=midstr+ss2;
				   ss2=" "+ss2;
                   mem=length-i;//找到一个匹配的串,求出剩余串的长度;
				 }
                 if(i==2&&quertyresult!=0) i--;//如果分词长度为1且没有找到,则将i减1,便于后续处理;
				 else  i=i-2;
			   }
		  if(i==1||i==2&&quertyresult!=0)//如果分词长度为1且没有找到,则将该字符作为一个切分结果字符;
		  {
			  midstr=ss1.Right(2);
			  ss2=midstr+ss2;
		      ss2=" "+ss2;
			  mem=mem-2;
		  }
		  ss1=ss1.Left(mem);//将输入串中已匹配的部分从输入中移除,便于继续切分;
		  length=ss1.GetLength();//修改输入串的长度;
          quertyresult=1;//查询结果重置为没有找到,使继续切分查找;
	}
	 return   ss2;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -