📄 mainfrm.cpp
字号:
// MainFrm.cpp : CMainFrame 类的实现
//
#include "stdafx.h"
#include "现代汉语自动分析.h"
#include "MainFrm.h"
#ifdef _DEBUG
#define new DEBUG_NEW
#endif
// CMainFrame
IMPLEMENT_DYNAMIC(CMainFrame, CMDIFrameWnd)
BEGIN_MESSAGE_MAP(CMainFrame, CMDIFrameWnd)
ON_WM_CREATE()
END_MESSAGE_MAP()
static UINT indicators[] =
{
ID_SEPARATOR, // 状态行指示器
ID_INDICATOR_CAPS,
ID_INDICATOR_NUM,
ID_INDICATOR_SCRL,
};
void LoadHZFreq(),StoreHZFreq(); // 声明两个函数
CCoMatrix coMatrix;
// CMainFrame 构造/析构
CMainFrame::CMainFrame()
{
// TODO: 在此添加成员初始化代码
LoadHZFreq(); //读入字频数据
}
BOOL ADD_HZNZI=FALSE; //定义一个变量,用来表示字频数据有无变化
CMainFrame::~CMainFrame()
{
if(ADD_HANZI) StoreHZFreq(); //输出字频数据
}
int CMainFrame::OnCreate(LPCREATESTRUCT lpCreateStruct)
{
if (CMDIFrameWnd::OnCreate(lpCreateStruct) == -1)
return -1;
if (!m_wndToolBar.CreateEx(this, TBSTYLE_FLAT, WS_CHILD | WS_VISIBLE | CBRS_TOP
| CBRS_GRIPPER | CBRS_TOOLTIPS | CBRS_FLYBY | CBRS_SIZE_DYNAMIC) ||
!m_wndToolBar.LoadToolBar(IDR_MAINFRAME))
{
TRACE0("未能创建工具栏\n");
return -1; // 未能创建
}
if (!m_wndStatusBar.Create(this) ||
!m_wndStatusBar.SetIndicators(indicators,
sizeof(indicators)/sizeof(UINT)))
{
TRACE0("未能创建状态栏\n");
return -1; // 未能创建
}
// TODO: 如果不需要工具栏可停靠,则删除这三行
m_wndToolBar.EnableDocking(CBRS_ALIGN_ANY);
EnableDocking(CBRS_ALIGN_ANY);
DockControlBar(&m_wndToolBar);
return 0;
}
BOOL CMainFrame::PreCreateWindow(CREATESTRUCT& cs)
{
if( !CMDIFrameWnd::PreCreateWindow(cs) )
return FALSE;
// TODO: 在此处通过修改
// CREATESTRUCT cs 来修改窗口类或样式
return TRUE;
}
// CMainFrame 诊断
#ifdef _DEBUG
void CMainFrame::AssertValid() const
{
CMDIFrameWnd::AssertValid();
}
void CMainFrame::Dump(CDumpContext& dc) const
{
CMDIFrameWnd::Dump(dc);
}
#endif //_DEBUG
// CMainFrame 消息处理程序
CMainFrame::OnGB2312()
{
//产生国标字符集
FILE *outFile; //输出文件
unsigned char i,j;
outFile = fopen("gb2312-80.chr","wt");
for (i=161;i<255;i++)
for (j=161;j<255;j++)
fprintf(ouFile,"%c%c,%d,%d\n",i,j,i,j);
fclose(outFile);
AfxMessageBox("已生成国标字符集文件:gb2312-80.chr!");
return;
}
int charType(unsigned char *s)
{
if( *s<128) return 0; //单字节西文字符
else if(*s>=176) return 1; //汉字
else return 2; //其他国际码字符
}
//字符数据的输入输出函数
void LoadHZFreq()
{
char buf[512];
FILE *in;
CFile inFile;
if(inFile.Open("f:work\\现代汉语自动分析\\hzpairs.dat",CFile::modeRead)){
CArchive ar(&inFile, CArchive::load,512,buf); //设置为装载模式
hzPair.Serialize(ar); //从文件中装载双字字表
inFile.Close();
}
in=fopen("f:work\\现代汉语自动分析\\hzpairs.dat","rb");
if(in){
fread(HZFreq,sizeof(int),HZ_NUM,in); //从文件中装载单字字频
fclose(in);
}
else for(int i=0;i<HZ_NUM;i++)HZFreq[i]=0;
}
void StoreHZFreq()
{
char buf[512];
FILE *out;
CFile outFile;
if(outFile.Open("f:work\\现代汉语自动分析\\hzpairs.dat",CFile::modeWrite|CFile::modeCreate)){
CArchive ar(&outFile, CArchive::store,512,buf); //设置为存放模式
hzPair.Serialize(ar); //将双字字表写回文件
}
out=fopen("f:work\\现代汉语自动分析\\hzpairs.dat","wb");
if(out){
fwrite(HZFreq,sizeof(int),HZ_NUM,out); //从文件中装载单字字频
fclose(out);
}
}
//统计双字字频的菜单项
void CMainFrame::OnHZPairs()
{
int n=ProcessFiles("snt","*.snt",HZPairInFile); //统计成批文件中的汉字
if(n>0) ADD_HANZI=TRUE;
}
void CMainFrame::OnHzReport()
{
int hzCount=0,CorpusSize=0;
for (int id=0;id<HZ_NUM;id++){
if(HZFreq[id]>0) { //只报告出现过的汉字的信息
hzCount++;
CorpusSize+=HZFreq[id];
}
}
CString msg;
msg.Format("已统计语料共%d字;\n其中不同汉字%d个;\n不同双字 %d种",
CorpusSize,hzCount,hzPairs.GetSize());
AfxMessageBox(msg);
}
//单字字频和双字字频
void CMainFrame::OnSeekHz()
{
CString key,msg;
if(!GetData("输入要找的单字或双字:",key)) return;
key.TrimRight();key.TrimLeft();
int id, n=key.GetLength();
if(n==2){ //如果输入单字
id=HZ_ID((unsigned char)key[0],(unsigned char)key[1]);
msg.Format("该汉字出现%d次",HZFreq[id]);
}
else { //如果输入双字
if(hzPairs.Search(const char*)key,id);{
msg.Format("该双字出现%d次",hp->freq);
}
}
if(msg.IsEmpty()) msg="找不到,或者输入错误";
AfxMessageBox(msg);
}
CMyDictionary Dict;
void CMainFrame::OnBindDynamic()
{
CString w,msg;
CObArray a;
if(!GetData("输入待查找的词:",w)) return;
int i,n=Dict.GetFreq(w,a);
if(n==0) AfxMessageBox("词库中没有这个词");
else {
msg=w+":\n";
CTagFreq *pt;
for (i=0;i<n;i++){
pt=(CTagFreq *)a.GetAt(i);
w.Format("标记:%s;出现次数:%d\n",pt->Tag,pt->Freq);
msg+=w;
}
AfxMessageBox(msg);
}
}
//用DAO创建词库、表及关系
void CMainFrame::OnCreateDatabase()
{
//该函数应该只调用一次
CDaoDatabase myDatabase;
myDatabase.Create("mydict");
CDaoTableDef pTable;
CDaoIndexFieldInfo fieldInfo;
CDaoIndexInfo indexInfo;
pTable = new CDaoTableDef(&myDatabase);
pTable->eate("words");
pTable->CreateField("word",dbText,10);
pTable->CreateField("wfreq",dbLong,4);
pTable->CreateField("wid",dbLong,4,dbAutoIncrField);
fieldInfo.m_strName ="word"; //索引字段
fieldInfo.m_bDescending=FALSE; //升序
indexInfo.m_strName="word"; //索引名
indexInfo.m_pFieldInfos=&fieldInfo; //索引字段信息
indexInfo.m_nFields=1; //索引字段数
indexInfo.m_bPrimary=FALSE; //不是主索引
indexInfo.m_bUnique=TRUE; //该索引无重复记录
pTable->CreateIndex(indexInfo); //创建索引
fieldInfo.m_strName ="wid"; //索引字段
fieldInfo.m_bDescending=FALSE; //升序
indexInfo.m_strName="wid"; //索引名
indexInfo.m_pFieldInfos=&fieldInfo; //索引字段信息
indexInfo.m_nFields=1; //索引字段数
indexInfo.m_bPrimary=TRUE; //不是主索引
indexInfo.m_bUnique=TRUE; //该索引无重复记录
pTable->CreateIndex(indexInfo); //创建索引
pTable->Append();//将词表结构和索引存入词库
pTable->Close();
dalete pTable;
pTable=new CDaoTableDef(&myDatabase);
pTable->Create("poss");
pTable->CreateField("pos",dbText,4);
pTable->CreateField("pfreq",dbLong,4);
pTable->CreateField("wid",dbText,4);
pTable->CreateField("pid",dbText,4,dbAutoIncrField);
fieldInfo.m_strName ="pid"; //索引字段
fieldInfo.m_bDescending=FALSE; //升序
indexInfo.m_strName="pid"; //索引名
indexInfo.m_pFieldInfos=&fieldInfo; //索引字段信息
indexInfo.m_nFields=1; //索引字段数
indexInfo.m_bPrimary=TRUE; //不是主索引
indexInfo.m_bUnique=TRUE; //该索引无重复记录
pTable->CreateIndex(indexInfo); //创建索引
pTable->Append();//将词表结构和索引存入词库
pTable->Close();
dalete pTable;
pTable=new CDaoTableDef(&myDatabase);
pTable->Create("senses");
pTable->CreateField("sense",dbText,50);
pTable->CreateField("sfreq",dbLong,4);
pTable->CreateField("sid",dbText,4);
pTable->CreateField("pid",dbText,4);
pTable->Append();//将义项表结构和索引存入词库
pTable->Close();
dalete pTable;
myDatabase.CreateRelation("words_poss","words","poss",
dbRelationUnique,"wid","wid");
myDatabase.CreateRelation("poss_senses","poss","senses",
dbRelationUnique,"pid","pid");
myDatabase.Close();
AfxMessageBox("已创建数据库mydict");
}
void CMainFrame::OnGetTagset()
{ //获取词性标记集
CFileDialog dlg(TRUE);
if(dlg.DoModal()!= IDOK) return;
coMatrix.FileName = dlg.GetPathName();
if(coMatrix.FileName.Right(3) == "@#$") { //如果是二进制文件
CFile tf; char buf[512];
if(tf.Open((const char *)coMatrix.FileName,CFile::modeRead)){
CArchive ar(&tf, CArchive::load,512,buf);
coMatrix.Serialize(ar); //序列化函数读入数据
}
coMatrix.Modified = FALSE;
}
else { //如果是文本文件
FILE *in;
in = fopen((const char *)dlg.GetPathName(),"rt");
if(!in) {AfxMessageBox("无法打开词性标记集文件!");return;}
CStdioFile inFile(in);
coMatrix.Create(inFile);
coMatrix.FileName = ChangeExt(coMatrix.FileName,"@#$");
//修改后缀后,以后就用这个文件名存盘
}
}
//训练
void TrainFile(CString fileName)
{
FILE *in;
in = fopen((const char *)fileName,"rt");
if(!in) {AfxMessageBox("无法打开语料文件!"+fileName);return;}
CStdioFile inFile(in);
coMatrix.AddCorpus(inFile);
}
void CMainFrame::OnTrainFile()
{
//用二元法训练语料
if(! pDict) pDict = new CMyDictionary;
ProcessFiles("pos","*.pos",TrainFile);
}
//标注
void TaggingFile(CString fileName)
{ //对一个文件进行词性标注
FILE *in, *out;
in = fopen((const char *)fileName,"rt");
if(!in) {AfxMessageBox("无法打开语料文件!"+fileName);return;}
out = fopen((const char *)ChangeExt(fileName,"pos"),"wt");
if(!out) {AfxMessageBox("无法创建词性标注文件!");fclose(in);return;}
CStdioFile inFile(in),outFile(out);
CSpan span;
char line[2000];
while(inFile.ReadString(line,2000)) {
CString s(line);
s.TrimLeft;
s.TrimRight();
while(!s.IsEmpty()) {
span.GetFrom(s);
span.Disamb();
span.WriteTo(out);
span.Reset();
}
outFile.WriteString("\n";)
}
inFile.Close();
outFile.Close();
}
//标注一批文件
void CMainFrame::OnTaggingFiles()
{ //用二元语法标注词性
if(! pDict) pDict = new CMyDictionary;
if(! coMatrix.Ready()) OnGetDictionary();
ProcessFiles("cut","*.cut",TaggingFile);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -