📄 xiaoqichuli.cpp
字号:
// xiaoqichuli.cpp: implementation of the xiaoqichuli class.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
//#include "DMBGT.h"
#include "xiaoqichuli.h"
#include <algorithm>
#include "math.h"
//#define LAM 0.00000001
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
xiaoqichuli::xiaoqichuli()
{
CreateWordIDList();
CreatePskList();
}
xiaoqichuli::~xiaoqichuli()
{
// AfxMessageBox(" testing is over!");
}
////////////////////////////////////////////////////
void xiaoqichuli::CreateWordIDList()
{
ifstream finID("Cup_词ID词义ID表.txt");
wordID wordid;
string wordTmp;
string strCompare = "";
int ID;
//float P;
string P;
while(finID>>wordTmp>>ID>>P)
{
if(wordTmp != strCompare)
{
strCompare = wordTmp;
wordid.word = wordTmp;
wordid.IDw = ID;
vectWordID.push_back(wordid);
}
}
}
////////////////////////////////////////////////////
void xiaoqichuli::CreatePskList()
{
fstream finPsk("Psk-hebing.txt",ios::in);
wordPsk wordpsk;
int idw;
// int ids;
string ids;
float psk;
float P_LAM;
long B;
long E;
while(finPsk>>idw>>ids>>psk>>P_LAM>>B>>E)
{
wordpsk.IDw = idw;
wordpsk.IDs = ids;
wordpsk.Psk = psk;
wordpsk.P_LAM = P_LAM;
wordpsk.begin = B;
wordpsk.end = E;
vectWordPsk.push_back(wordpsk);
}
sort(vectWordPsk.begin(), vectWordPsk.end());
}
////////////////////////////////////////////////////
void xiaoqichuli::ReadCorpus(string Sentence, char * chSentence)
{
// string Sentence;
string m_sentence;
word_Tmp wordTmp;
int m_count = 0;
int m_pointer = 0;
int m_WordBegin = 0;
int m_PartEnd = 0;
while((m_pointer=Sentence.find("/",m_pointer+1)) != -1)
{
wordTmp.word = Sentence.substr(m_WordBegin,m_pointer-m_WordBegin);
m_count ++;
m_PartEnd = Sentence.find(" ",m_pointer);
m_WordBegin = m_PartEnd + 1;
vectWordTmp.push_back(wordTmp);
}
SearchIDw();
CountSore(m_sentence);
strcpy(chSentence, m_sentence.c_str());
}
/////////////////////////////////////////////////////
void xiaoqichuli::SearchIDw()
{
unsigned i = 0;
vector<wordID>::iterator pIt;
wordID ToSearch;
int IDTmp; //临时IDw
for(i; i < vectWordTmp.size(); i++)
{
ToSearch.word = vectWordTmp[i].word;
pIt = lower_bound(vectWordID.begin(),vectWordID.end(),ToSearch);
if(pIt != vectWordID.end() && pIt->word == vectWordTmp[i].word)
{
IDTmp = pIt->IDw;
}
else
{
IDTmp = -1; //找不到的令IDw为-1
}
vectIDTmp.push_back(IDTmp); //每句的词汇IDw压入临时表
}
}
////////////////////////////////////////////////////////////////////////////////////
void xiaoqichuli::ReadPvs(vector<wordPvs> vectPvs, vector<wordPsk> ::iterator pIt)
{
wordPvs Els;
wordPvs PvsTmp;
int i = 0;
int j = (pIt->end - pIt->begin)/sizeof(wordPvs);
fstream finPvs("Pvs-hebing", ios::in|ios::binary);
finPvs.seekg(pIt->begin);
for(i; i < j; i++)
{
finPvs.read((char *) & Els,sizeof(wordPvs));
PvsTmp.IDw = Els.IDw;
PvsTmp.Pvs = Els.Pvs;
vectPvs.push_back(PvsTmp);
}
int m = vectPvs.size();
}
vector<wordPvs> xiaoqichuli::_ReadPvs(vector<wordPsk> ::iterator pIt)
{
vector<wordPvs> vectPvs;
wordPvs Els;
wordPvs PvsTmp;
int i = 0;
int j = (pIt->end - pIt->begin)/sizeof(wordPvs);
fstream finPvs("Pvs-hebing", ios::in|ios::binary);
finPvs.seekg(pIt->begin);
for(i; i < j; i++)
{
finPvs.read((char *) & Els,sizeof(wordPvs));
PvsTmp.IDw = Els.IDw;
PvsTmp.Pvs = Els.Pvs;
vectPvs.push_back(PvsTmp);
}
int m = vectPvs.size();
return vectPvs;
}
//////////////////////////////////////////////////////////////////////////////////////
void xiaoqichuli::CountPvs(int i, vector<wordPvs> vectPvsTmp, vector<wordPsk> ::iterator pIt)
{
vector<int> vectConID;
float Pvs;
vector<wordPvs> ::iterator pIt1;
pair<double,string> SoreTmp;
SoreTmp.first = 0;
SoreTmp.second = pIt->IDs;
wordPvs Tosearch;
vectConID = vectIDTmp;
vectConID.erase(vectConID.begin() + i); //确定上下文的IDw并放入向量中
int n = 0, m = vectConID.size();
int count = 0;
if(count ==0)
{
SoreTmp.first = log(pIt->Psk);
vectSore.push_back(SoreTmp); //将Sore值计入表中
}
else
{
SoreTmp.first += log(pIt->Psk); //乘上Psk并取对数
vectSore_T.push_back(SoreTmp); //将Score值计入表中
SoreTmp.first = log(pIt->Psk);
vectSore.push_back(SoreTmp); //只考虑发射概率的Score
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////////
void xiaoqichuli::CountSore(string & m_sentence)
{
vector<wordPsk> ::iterator pIt;
vector<wordPvs> vectWordPvsTmp;
wordPsk ToSearch;
PP pIt_tmp1, pIt_tmp2;
PP pit;
vector<PP> vectPit; // 记录歧义词汇的位置
int n_count = 0;
// int N;
int i =0;
int j =vectWordTmp.size();
for(i; i < j; i++) //计算每个词汇的IDs
{
if(vectIDTmp[i] == -1) //IDw = -1 的词汇是在表中找不到的
{
vectWordTmp[i].IDs = "-1";
}
else //对多义词进行消岐
{
ToSearch.IDw = vectIDTmp[i];
if(vectIDTmp[i] == 20682)
Sleep(1);
pIt = lower_bound(vectWordPsk.begin(),vectWordPsk.end(),ToSearch);
if(pIt != vectWordPsk.end() && pIt->IDw == vectIDTmp[i])
{
pIt_tmp1 = pIt - 1;
pIt_tmp2 = pIt + 1;
if((pIt_tmp1->IDw != pIt->IDw) && (pIt_tmp2->IDw != pIt->IDw))
{
vectWordTmp[i].IDs = pIt->IDs; //没有歧义
}
else //if(pIt->begin == pIt->end) //有歧义,但这个不符合条件
{
pit = pIt+1;
for(pIt; pIt != vectWordPsk.begin(); pIt--)
{
if(pIt->IDw == vectIDTmp[i])
{
// ReadPvs(vectWordPvsTmp, pIt);
vectWordPvsTmp = _ReadPvs(pIt);
CountPvs(i, vectWordPvsTmp, pIt);
vectWordPvsTmp.clear();
}
else
break;
}
for(pit; pit != vectWordPsk.end(); pit++)
{
if(pit->IDw == vectIDTmp[i])
{
// ReadPvs(vectWordPvsTmp, pit);
vectWordPvsTmp = _ReadPvs(pit);
CountPvs(i, vectWordPvsTmp, pit);
vectWordPvsTmp.clear();
}
else
break;
}
if(vectSore_T.size() >0)
{
unsigned int y = 1;
double STmp =vectSore_T[0].first;
vectWordTmp[i].IDs =vectSore_T[0].second ; //确定最优的IDs
for(y; y < vectSore_T.size(); y++) //计算最大的Sore值,并确定最优的IDs
{
if(vectSore_T[y].first > STmp)
{
STmp = vectSore_T[y].first;
vectWordTmp[i].IDs =vectSore_T[y].second ; //确定最优的IDs
}
}
}
else
{
unsigned int x = 0;
double STmp =vectSore[0].first;
vectWordTmp[i].IDs =vectSore[0].second; //确定最优的IDs
for(x; x < vectSore.size(); x++) //计算最大的Sore值,并确定最优的IDs
{
if(vectSore[x].first > STmp)
{
STmp = vectSore[x].first;
vectWordTmp[i].IDs =vectSore[x].second; //确定最优的IDs
}
}
}
vectSore.clear();
vectSore_T.clear();
}
}
else
AfxMessageBox("Can't find");
}
}
unsigned int y;
m_sentence = "";
for(y = 0; y < vectWordTmp.size(); y ++)
{
m_sentence = m_sentence + vectWordTmp[y].word + "/" +
vectWordTmp[y].IDs + " " ;
}
vectIDTmp.clear();
vectWordTmp.clear();
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -