⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mainfrm.cpp

📁 中文信息处理的源程序代码。VC++实现
💻 CPP
字号:
// MainFrm.cpp : CMainFrame 类的实现
//

#include "stdafx.h"
#include "现代汉语自动分析.h"

#include "MainFrm.h"

#ifdef _DEBUG
#define new DEBUG_NEW
#endif


// CMainFrame

IMPLEMENT_DYNAMIC(CMainFrame, CMDIFrameWnd)

BEGIN_MESSAGE_MAP(CMainFrame, CMDIFrameWnd)
	ON_WM_CREATE()
END_MESSAGE_MAP()

static UINT indicators[] =
{
	ID_SEPARATOR,           // 状态行指示器
	ID_INDICATOR_CAPS,
	ID_INDICATOR_NUM,
	ID_INDICATOR_SCRL,
};

void LoadHZFreq(),StoreHZFreq();   // 声明两个函数

CCoMatrix coMatrix;
// CMainFrame 构造/析构

CMainFrame::CMainFrame()
{
	// TODO: 在此添加成员初始化代码
	LoadHZFreq();  //读入字频数据
}

BOOL ADD_HZNZI=FALSE;   //定义一个变量,用来表示字频数据有无变化

CMainFrame::~CMainFrame()
{
	if(ADD_HANZI) StoreHZFreq();    //输出字频数据
}


int CMainFrame::OnCreate(LPCREATESTRUCT lpCreateStruct)
{
	if (CMDIFrameWnd::OnCreate(lpCreateStruct) == -1)
		return -1;
	
	if (!m_wndToolBar.CreateEx(this, TBSTYLE_FLAT, WS_CHILD | WS_VISIBLE | CBRS_TOP
		| CBRS_GRIPPER | CBRS_TOOLTIPS | CBRS_FLYBY | CBRS_SIZE_DYNAMIC) ||
		!m_wndToolBar.LoadToolBar(IDR_MAINFRAME))
	{
		TRACE0("未能创建工具栏\n");
		return -1;      // 未能创建
	}

	if (!m_wndStatusBar.Create(this) ||
		!m_wndStatusBar.SetIndicators(indicators,
		  sizeof(indicators)/sizeof(UINT)))
	{
		TRACE0("未能创建状态栏\n");
		return -1;      // 未能创建
	}

	// TODO: 如果不需要工具栏可停靠,则删除这三行
	m_wndToolBar.EnableDocking(CBRS_ALIGN_ANY);
	EnableDocking(CBRS_ALIGN_ANY);
	DockControlBar(&m_wndToolBar);

	return 0;
}

BOOL CMainFrame::PreCreateWindow(CREATESTRUCT& cs)
{
	if( !CMDIFrameWnd::PreCreateWindow(cs) )
		return FALSE;
	// TODO: 在此处通过修改
	//  CREATESTRUCT cs 来修改窗口类或样式

	return TRUE;
}


// CMainFrame 诊断

#ifdef _DEBUG
void CMainFrame::AssertValid() const
{
	CMDIFrameWnd::AssertValid();
}

void CMainFrame::Dump(CDumpContext& dc) const
{
	CMDIFrameWnd::Dump(dc);
}

#endif //_DEBUG


// CMainFrame 消息处理程序
CMainFrame::OnGB2312()
{
	//产生国标字符集
	FILE *outFile;  //输出文件
	unsigned char i,j;
	outFile = fopen("gb2312-80.chr","wt");
	for (i=161;i<255;i++)
		for (j=161;j<255;j++)
			fprintf(ouFile,"%c%c,%d,%d\n",i,j,i,j);
	fclose(outFile);
	AfxMessageBox("已生成国标字符集文件:gb2312-80.chr!");
	return;
}

int charType(unsigned char *s)
{
	if( *s<128)   return 0;      //单字节西文字符
	else if(*s>=176)  return 1;  //汉字
	else return 2;               //其他国际码字符
}


//字符数据的输入输出函数
void LoadHZFreq()
{
	char buf[512];
	FILE *in;
	CFile inFile;
	if(inFile.Open("f:work\\现代汉语自动分析\\hzpairs.dat",CFile::modeRead)){
		CArchive ar(&inFile, CArchive::load,512,buf);     //设置为装载模式
		hzPair.Serialize(ar);    //从文件中装载双字字表
		inFile.Close();
	}
	in=fopen("f:work\\现代汉语自动分析\\hzpairs.dat","rb");
	if(in){
		fread(HZFreq,sizeof(int),HZ_NUM,in);   //从文件中装载单字字频
		fclose(in);
	}
	else for(int i=0;i<HZ_NUM;i++)HZFreq[i]=0;
}

void StoreHZFreq()
{
	char buf[512];
	FILE *out;
	CFile outFile;
	if(outFile.Open("f:work\\现代汉语自动分析\\hzpairs.dat",CFile::modeWrite|CFile::modeCreate)){
		CArchive ar(&outFile, CArchive::store,512,buf);     //设置为存放模式
		hzPair.Serialize(ar);    //将双字字表写回文件
	}
	out=fopen("f:work\\现代汉语自动分析\\hzpairs.dat","wb");
	if(out){
		fwrite(HZFreq,sizeof(int),HZ_NUM,out);   //从文件中装载单字字频
		fclose(out);
	}
}

//统计双字字频的菜单项
void CMainFrame::OnHZPairs()
{
	int n=ProcessFiles("snt","*.snt",HZPairInFile);  //统计成批文件中的汉字
	if(n>0)  ADD_HANZI=TRUE;
}

void CMainFrame::OnHzReport()
{
	int hzCount=0,CorpusSize=0;
	for (int id=0;id<HZ_NUM;id++){
		if(HZFreq[id]>0)  {    //只报告出现过的汉字的信息
			hzCount++;
			CorpusSize+=HZFreq[id];
		}
	}
	CString msg;
	msg.Format("已统计语料共%d字;\n其中不同汉字%d个;\n不同双字 %d种",
		CorpusSize,hzCount,hzPairs.GetSize());
	AfxMessageBox(msg);
}

//单字字频和双字字频
void CMainFrame::OnSeekHz()
{
	CString key,msg;
	if(!GetData("输入要找的单字或双字:",key))  return;
	key.TrimRight();key.TrimLeft();
	int id, n=key.GetLength();
	if(n==2){            //如果输入单字
		id=HZ_ID((unsigned char)key[0],(unsigned char)key[1]);
        msg.Format("该汉字出现%d次",HZFreq[id]);
	}
	else {    //如果输入双字
		if(hzPairs.Search(const char*)key,id);{
		msg.Format("该双字出现%d次",hp->freq);
		}
	}
	if(msg.IsEmpty())  msg="找不到,或者输入错误";
	AfxMessageBox(msg);
}

CMyDictionary Dict;
void CMainFrame::OnBindDynamic()
{
	CString w,msg;
	CObArray a;
	if(!GetData("输入待查找的词:",w))   return;
	int i,n=Dict.GetFreq(w,a);
	if(n==0)  AfxMessageBox("词库中没有这个词");
	else {
		msg=w+":\n";
		CTagFreq *pt;
		for (i=0;i<n;i++){
			pt=(CTagFreq *)a.GetAt(i);
			w.Format("标记:%s;出现次数:%d\n",pt->Tag,pt->Freq);
			msg+=w;
		}
		AfxMessageBox(msg);
	}
}


//用DAO创建词库、表及关系
void CMainFrame::OnCreateDatabase()
{
	//该函数应该只调用一次
	CDaoDatabase myDatabase;
	myDatabase.Create("mydict");

	CDaoTableDef pTable;
	CDaoIndexFieldInfo fieldInfo;
	CDaoIndexInfo indexInfo;

	pTable = new CDaoTableDef(&myDatabase);
	pTable->eate("words");
	pTable->CreateField("word",dbText,10);
	pTable->CreateField("wfreq",dbLong,4);
	pTable->CreateField("wid",dbLong,4,dbAutoIncrField);

	fieldInfo.m_strName ="word";    //索引字段
	fieldInfo.m_bDescending=FALSE;  //升序
	indexInfo.m_strName="word";     //索引名
	indexInfo.m_pFieldInfos=&fieldInfo;  //索引字段信息
	indexInfo.m_nFields=1;          //索引字段数
	indexInfo.m_bPrimary=FALSE;     //不是主索引
	indexInfo.m_bUnique=TRUE;       //该索引无重复记录
	pTable->CreateIndex(indexInfo); //创建索引

	fieldInfo.m_strName ="wid";    //索引字段
	fieldInfo.m_bDescending=FALSE;  //升序
	indexInfo.m_strName="wid";     //索引名
	indexInfo.m_pFieldInfos=&fieldInfo;  //索引字段信息
	indexInfo.m_nFields=1;          //索引字段数
	indexInfo.m_bPrimary=TRUE;     //不是主索引
	indexInfo.m_bUnique=TRUE;       //该索引无重复记录
	pTable->CreateIndex(indexInfo); //创建索引

	pTable->Append();//将词表结构和索引存入词库
	pTable->Close();
	dalete pTable;
	pTable=new CDaoTableDef(&myDatabase);
	pTable->Create("poss");
	pTable->CreateField("pos",dbText,4);
    pTable->CreateField("pfreq",dbLong,4);
	pTable->CreateField("wid",dbText,4);
	pTable->CreateField("pid",dbText,4,dbAutoIncrField);

    fieldInfo.m_strName ="pid";    //索引字段
	fieldInfo.m_bDescending=FALSE;  //升序
	indexInfo.m_strName="pid";     //索引名
	indexInfo.m_pFieldInfos=&fieldInfo;  //索引字段信息
	indexInfo.m_nFields=1;          //索引字段数
	indexInfo.m_bPrimary=TRUE;     //不是主索引
	indexInfo.m_bUnique=TRUE;       //该索引无重复记录
	pTable->CreateIndex(indexInfo); //创建索引

    pTable->Append();//将词表结构和索引存入词库
	pTable->Close();
	dalete pTable;

	pTable=new CDaoTableDef(&myDatabase);
	pTable->Create("senses");
	pTable->CreateField("sense",dbText,50);
    pTable->CreateField("sfreq",dbLong,4);
	pTable->CreateField("sid",dbText,4);
	pTable->CreateField("pid",dbText,4);

	pTable->Append();//将义项表结构和索引存入词库
	pTable->Close();
	dalete pTable;

	myDatabase.CreateRelation("words_poss","words","poss",
		dbRelationUnique,"wid","wid");
	myDatabase.CreateRelation("poss_senses","poss","senses",
		dbRelationUnique,"pid","pid");
	myDatabase.Close();
	AfxMessageBox("已创建数据库mydict");
}



void CMainFrame::OnGetTagset()
{   //获取词性标记集
	CFileDialog dlg(TRUE);
	if(dlg.DoModal()!= IDOK)  return;
	coMatrix.FileName = dlg.GetPathName();
	if(coMatrix.FileName.Right(3) == "@#$") {   //如果是二进制文件
		CFile tf; char buf[512];
		if(tf.Open((const char *)coMatrix.FileName,CFile::modeRead)){
			CArchive ar(&tf, CArchive::load,512,buf);
			coMatrix.Serialize(ar);   //序列化函数读入数据
		}
		coMatrix.Modified = FALSE;
	}
	else {     //如果是文本文件
		FILE *in;
		in = fopen((const char *)dlg.GetPathName(),"rt");
		if(!in) {AfxMessageBox("无法打开词性标记集文件!");return;}
		CStdioFile inFile(in);
		coMatrix.Create(inFile);    
		coMatrix.FileName = ChangeExt(coMatrix.FileName,"@#$");
		//修改后缀后,以后就用这个文件名存盘
	}
}


//训练
void TrainFile(CString fileName)
{
	FILE *in;
	in = fopen((const char *)fileName,"rt");
	if(!in) {AfxMessageBox("无法打开语料文件!"+fileName);return;}
	CStdioFile inFile(in);
	coMatrix.AddCorpus(inFile);
}


void CMainFrame::OnTrainFile()
{
	//用二元法训练语料
	if(! pDict) pDict = new CMyDictionary;   
	ProcessFiles("pos","*.pos",TrainFile);
}


//标注
void TaggingFile(CString fileName)
{   //对一个文件进行词性标注
	FILE *in, *out;
	in = fopen((const char *)fileName,"rt");
	if(!in) {AfxMessageBox("无法打开语料文件!"+fileName);return;}
	out = fopen((const char *)ChangeExt(fileName,"pos"),"wt");
	if(!out) {AfxMessageBox("无法创建词性标注文件!");fclose(in);return;}
	CStdioFile inFile(in),outFile(out);
	CSpan span;
	char line[2000];
	while(inFile.ReadString(line,2000))  {
		CString s(line);  
		s.TrimLeft;
		s.TrimRight();
		while(!s.IsEmpty())  {
			span.GetFrom(s);
			span.Disamb();
			span.WriteTo(out);
			span.Reset();
		}
		outFile.WriteString("\n";)
	}
	inFile.Close();
	outFile.Close();
}

//标注一批文件
void CMainFrame::OnTaggingFiles()
{   //用二元语法标注词性
	if(! pDict)  pDict = new CMyDictionary; 
	if(! coMatrix.Ready()) OnGetDictionary();
	ProcessFiles("cut","*.cut",TaggingFile);
}






⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -