📄 word.cpp
字号:
#include "stdafx.h"
#include "word.h"
int ComparePre(WordVector& vr, WordVector& vo, const char* fileinfo) //词语匹配
{
int count=0;
int ir=0,io=0;
WordInfo *pwir, *pwio;
ofstream offf(fileinfo);
ofstream offf_wrong("分词错误排序.txt");
map<string,int> smap;
while ( (ir<vr.size()) && (io<vo.size()) )
{
pwir = &vr[ir]; //评测文本
pwio = &vo[io]; //标准文本
if(pwir->content != pwio->content)
{
offf<<ir<<"\t"<<pwir->content<<"\t"<<io<<"\t"<<pwio->content<<endl;
string tt = pwir->content + "\t" + pwio->content;
smap[tt]++;
ir++; io++;
if(ir==vr.size()) break;
if(io==vo.size()) break;
pwir = &vr[ir]; //评测文本
pwio = &vo[io]; //标准文本
while (pwir->startPos != pwio->startPos)
{
if (pwir->startPos > pwio->startPos)
{
io++;
if (io==vo.size()) break;
pwio = &vo[io];
}
else
{
ir++;
if (ir==vr.size()) break;
pwir = &vr[ir];
}
}
}
else
{
count++;
ir++; io++;
}
}
offf.close();
multimap<int,string> smulmap;
map<string,int>::iterator its; //分析有多少词是不同的
for(its = smap.begin();its!=smap.end();its++)
{
string ss = its->first;
int ii = its->second;
smulmap.insert(multimap<int,string>::value_type(ii,ss));
}
multimap<int,string>::iterator itss;
for(itss = smulmap.end(); itss != smulmap.begin(); itss--)
offf_wrong<<itss->first<<"\t"<<itss->second<<endl;
offf_wrong.close();
return count;
}
void Compare(const char* filepath_cmp, const char* filepath_std, const char* filename_cmp, const char* filename_std, const char* curDirectory)
{
ifstream fr(filepath_cmp);//评测文本
ifstream fo(filepath_std);//标准文本
string hispath = curDirectory;
hispath += "\\";
hispath += "history.txt";
ofstream historyfile(hispath.c_str(),iostream::app);
ofstream resultfile("result.txt");
string sr,so;
WordVector vr,vo;
int pos = 0; //记录实际 汉字的起始位置
while (fr>>sr){ //构造结果数组
WordInfo wi;
wi.startPos = pos;
if(sr[0] == '[')
{
sr = sr.substr(1);
}
int chineseLen = sr.find_first_of('/');
if (chineseLen == string::npos)
{
chineseLen = sr.length();
wi.content = sr;
}
else
{
string ssr = sr.substr(0,chineseLen);
wi.content = ssr;
}
pos += chineseLen;
vr.push_back(wi);
}
string fileinfo;
string filename1 = filename_cmp;
//pos = filename1.find_last_of("\\");
//fileinfo = filename1.substr(pos+1);
//pos = filename1.find_last_of(".");
fileinfo = filename1;//.substr(0,pos);
// pos = filename1.find_last_of(".");
//string filename2 = filename1.substr(0,pos);
//string filename3 = filename1.substr(pos);
filename1 = ".\\" + filename1 + "_word" + ".txt";
ofstream oof(filename1.c_str());
vector<WordInfo>::iterator itsv ;
for(itsv = vr.begin(); itsv != vr.end(); itsv++)
{
WordInfo temps = *itsv;
oof<<temps.content<<endl;
}
oof.close();
pos = 0; //记录实际 汉字的起始位置
while (fo>>so){ //构造对照数组
WordInfo wi;
//wi.content = so;
wi.startPos = pos;
if(so[0] == '[')
{
so = so.substr(1);
}
int chineseLen = so.find_first_of('/');
if (chineseLen == string::npos)
{
chineseLen = so.length();
wi.content = so;
}
else
{
string ssr = so.substr(0,chineseLen);
wi.content = ssr;
}
pos += chineseLen;
vo.push_back(wi);
}
string fileinfo1;
filename1 = filename_std;
//pos = filename1.find_last_of("\\");
//fileinfo1 = filename1.substr(pos+1);
//pos = filename1.find_last_of(".");
fileinfo1 = filename1;//.substr(0,pos);
fileinfo = ".\\" + fileinfo +"_" + fileinfo1 + ".txt";
//pos = filename1.find_last_of(".");
//filename2 = filename1.substr(0,pos);
//filename3 = filename1.substr(pos);
filename1 = ".\\" + filename1 + "_word" + ".txt";
ofstream oof1(filename1.c_str());
for(itsv = vo.begin(); itsv != vo.end(); itsv++)
{
WordInfo temps = *itsv;
oof1<<temps.content<<endl;
}
oof1.close();
int sc = ComparePre(vr,vo,fileinfo.c_str());
float s1 = (float)sc/(float)vr.size();
float s2 = (float)sc/(float)vo.size();
float s3 = 2*s1*s2/(s1+s2);
resultfile<<"\n\n"<<filepath_cmp<<endl<<filepath_std<<endl
<<"切分出的词语总数 "<<vr.size()<<endl
<<"标准结果中的词语总数 "<<vo.size()<<endl
<<"切分出的词语中出现在标准结果中的词语数 "<<sc<<endl
<<"分词正确率 "<<s1*100<<"%"<<endl
<<"分词召回率 "<<s2*100<<"%"<<endl
<<"分词F值 "<<s3*100<<"%"<<endl;
historyfile<<"\n\n"<<filepath_cmp<<endl<<filepath_std<<endl
<<"切分出的词语总数 "<<vr.size()<<endl
<<"标准结果中的词语总数 "<<vo.size()<<endl
<<"切分出的词语中出现在标准结果中的词语数 "<<sc<<endl
<<"分词正确率 "<<s1*100<<"%"<<endl
<<"分词召回率 "<<s2*100<<"%"<<endl
<<"分词F值 "<<s3*100<<"%"<<endl;
resultfile.close();
historyfile.close();
fr.close();
fo.close();
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -