📄 hypothesis.cpp
字号:
for(int i = 0; (i < stackIsize) && (POS != hypothesisStack[len].end()); i++, ++POS)
{//查看是否有可合并或减掉的假设
set<int>::iterator POSi = --(vecHypo[*POS].oldphrase).end();
set<int>::iterator newHypi = --((vecHypo[newHyp].oldphrase).end());
if((vecHypo[*POS].oldphrase == vecHypo[newHyp].oldphrase)
&&(vecHypo[*POS].lastEWI == vecHypo[newHyp].lastEWI) && (vecHypo[*POS].lastEWII == vecHypo[newHyp].lastEWII) \
&& (*POSi == *newHypi)
&& (vecHypo[*POS].totalScore - vecHypo[newHyp].totalScore < avs))
{
if(printmore) {
cout << "better path, overwriting exisiting hypothesis " << vecHypo[*POS].thisID << endl;
}
Arc arcTmp;
arcTmp.from = vecHypo[*POS].lastID;
if (len == stackSize) {
arcTmp.to = -1;//应该是最大假设的ID加1
}
else {
arcTmp.to = vecHypo[*POS].thisID;
}
arcTmp.diffCost = (vecHypo[*POS].totalScore - vecHypo[*POS].futureScore - vecHypo[*POS].baseScore) ;//- \
//(vecHypo[*POS].prev->totalScore - vecHypo[*POS].prev->futureScore - vecHypo[*POS].prev->baseScore);
arcTmp.tosPhrase = vecHypo[*POS].newPhrase;
arc.push_back(arcTmp);
vecHypo[*POS] = vecHypo[newHyp];
vecNotUsed.push_back(newHyp);
return ;
}
else if((vecHypo[*POS].oldphrase == vecHypo[newHyp].oldphrase)
&&(vecHypo[*POS].lastEWI == vecHypo[newHyp].lastEWI) && (vecHypo[*POS].lastEWII == vecHypo[newHyp].lastEWII) \
&& (*POSi == *newHypi)
&& (vecHypo[*POS].totalScore - vecHypo[newHyp].totalScore >= avs))
{
if(printmore) {
cout << "worse than existing path, discarding" << endl;
}
Arc arcTmp;
arcTmp.from = vecHypo[newHyp].lastID;
if (len == stackSize) {
arcTmp.to = -1;//应该是最大假设的ID加1
}
else {
arcTmp.to = vecHypo[*POS].thisID;
}
arcTmp.diffCost = (vecHypo[newHyp].totalScore - vecHypo[newHyp].futureScore - vecHypo[newHyp].baseScore);// - \
//(newHyp->prev->totalScore - newHyp->prev->futureScore - newHyp->prev->baseScore);
arcTmp.tosPhrase = vecHypo[newHyp].newPhrase;
arc.push_back(arcTmp);
vecNotUsed.push_back(newHyp);
return ;
}
}
hypothesisStack[len].push_back(newHyp);
if (hypothesisStack[len].size() >= 2 * eachStackSize) {
if(printmore) {
cout << "merged hypothesis on stack " << len << ", now size " << hypothesisStack[len].size() << " need cut!!!" << endl;
}
cutStack(hypothesisStack[len], eachStackSize, len);
}
if(printmore) {
cout << "merged hypothesis on stack " << len << ", now size " << hypothesisStack[len].size() << endl;
} //一般的假设就直接入栈
}
else
{
if(printmore) {
cout << "estimate below threshold, discarding" << endl;
}
vecNotUsed.push_back(newHyp);
}
}
else
{
THRESHOLD[len] = threshold + vecHypo[newHyp].totalScore;
hypothesisStack[len].push_back(newHyp);
if (hypothesisStack[len].size() >= 2 * eachStackSize) {
if(printmore) {
cout << "merged hypothesis on stack " << len << ", now size " << hypothesisStack[len].size() << " need cut!!!" << endl;
}
cutStack(hypothesisStack[len], eachStackSize, len);
}
if(printmore) {
cout << "new best estimate for this stack" << endl;
cout << "merged hypothesis on stack " << len << ", now size " << hypothesisStack[len].size() << endl;
}
}
}
void Hypothesis::dealout(string& str)
{
if (str.length() > 0) {
if ((isalpha(str[0])) && (islower(str[0]))) {
str[0] = toupper(str[0]);
}
}
int startPos = str.find(" ", 0);
while (startPos != string::npos)
{
str.erase(startPos, 1);
startPos = str.find(" ", startPos);
}
startPos = str.find(" ?", 0);
while (startPos != string::npos)
{
str.erase(startPos, 1);
startPos = str.find(" ?", startPos);
}
startPos = str.find(" .", 0);
while (startPos != string::npos)
{
str.erase(startPos, 1);
startPos = str.find(" .", startPos);
}
startPos = str.find(" n't", 0);
while (startPos != string::npos)
{
str.erase(startPos, 1);
startPos = str.find(" n", startPos);
}
startPos = str.find(" ,", 0);
while (startPos != string::npos)
{
str.erase(startPos, 1);
startPos = str.find(" ,", startPos);
}
startPos = str.find(" '", 0);
while (startPos != string::npos)
{
str.erase(startPos, 1);
startPos = str.find(" '", startPos);
}
startPos = str.find(" !", 0);
while (startPos != string::npos)
{
str.erase(startPos, 1);
startPos = str.find(" !", startPos);
}
return ;
}
string Hypothesis::findBest()
{
/* ofstream output;
output.open(outputFile.c_str(), std::ios::out | std::ios::app);
if (!output) {
cout << "Open output file error!" << endl;
return ;
}*/
int nfSize = hypothesisStack[stackSize].size();
if(printmore) {
cout << "final stack " << stackSize << " size is " << nfSize << endl;
cout << "output decode result! " << endl;
}
// multisetHYE forSort = hypothesisStack[stackSize];
// sort(forSort.begin(), forSort.end(), Greaters());
// multisetHYE::iterator p = forSort.begin();
// int i = 0;
HypothesisElement tmp = vecHypo[*max_element(hypothesisStack[stackSize].begin(), hypothesisStack[stackSize].end(), Less())];
string out;
// for(; ((p != forSort.end()) && (i < NBEST)); ++p, ++i)
// {
// HypothesisElement *tmp = *p;
deque<int> sentence;
while (tmp.prev != -1) {
int len = tmp.newPhrase.size();
for(int i = len - 1; i >=0; --i)
{
sentence.push_front(tmp.newPhrase[i]);
}
int test = tmp.prev;
tmp = vecHypo[tmp.prev];
}
out = enVcb->IDsTosen(sentence);
dealout(out);
// output << out << " " << endl;
// }
cnVcb->mapClear();
Vocab::ID = -1;
return out;
}
//
void Hypothesis::findNBest(string outputFile)
{
ofstream output;
output.open(outputFile.c_str(), std::ios::out | std::ios::app);
if (!output) {
cout << "Open output file error!" << endl;
return ;
}
output << HypothesisElement::baseID + 1<< endl;
for (int i = 1; i < stackSize; i++)//输出栈中的
{
int sizeI = hypothesisStack[i].size();
for (int j = 0; j < sizeI; j++)
{
output << "(" << vecHypo[hypothesisStack[i][j]].lastID << " (" << vecHypo[hypothesisStack[i][j]].thisID << \
" \"" << enVcb->getWords(vecHypo[hypothesisStack[i][j]].newPhrase) << "\" " << \
exp(vecHypo[hypothesisStack[i][j]].totalScore - vecHypo[hypothesisStack[i][j]].futureScore - vecHypo[hypothesisStack[i][j]].baseScore) << "))" << endl;
}
}
int sizeI = hypothesisStack[stackSize].size();
for (int j = 0; j < sizeI; j++)
{
output << "(" << vecHypo[hypothesisStack[stackSize][j]].lastID << " (" << HypothesisElement::baseID + 1 << \
" \"" << enVcb->getWords(vecHypo[hypothesisStack[stackSize][j]].newPhrase) << "\" " << \
exp(vecHypo[hypothesisStack[stackSize][j]].totalScore - vecHypo[hypothesisStack[stackSize][j]].futureScore - vecHypo[hypothesisStack[stackSize][j]].baseScore) << "))" << endl;
}
//输出ARC中的
int arclen = arc.size();
for(int aj = 0; aj < arclen; aj++)
{
output << "(" << arc[aj].from << " (";
if (arc[aj].to == -1) {
output << HypothesisElement::baseID + 1;
}
else {
output << arc[aj].to;
}
output << " \"" << enVcb->getWords(arc[aj].tosPhrase) << "\" " << \
exp(arc[aj].diffCost) << "))" << endl;
}
output.clear();
output.close();
}
Hypothesis::~Hypothesis()
{
delete enVcb;
delete cnVcb;
delete lm;
delete to;
}
class THRES
{
private:
double THRESHOLD;
public:
THRES(double thre):THRESHOLD(thre)
{
}
bool operator() (int hyp)
{
return vecHypo[hyp].totalScore - THRESHOLD >=avs;
}
};
/*
void Hypothesis::CutStack(multisetHYE& multisethye, int limit, int stackNO)
{
double MAX = vecHypo[*max_element(multisethye.begin(), multisethye.end(), Less())].totalScore;
double thresholdT;
if(printmore) {
cout << "\tmax value is " << MAX << endl;
}
multisetHYE multitmp = multisethye;
sort(multitmp.begin(), multitmp.end(), Less());
thresholdT = (multitmp[limit])->totalScore;
THRESHOLD[stackNO] = thresholdT;
multisetHYE::iterator posdel = multisethye.begin();
multisetHYE::iterator posdelnext = multisethye.begin();
while (posdelnext != multisethye.end())
{
if ((**posdelnext).totalScore - thresholdT <= avs)
{
posdel = posdelnext;
delete *posdel;
posdelnext = multisethye.erase(posdel);
continue;
}
++posdelnext;
}
}
*/
void Hypothesis::cutStack(multisetHYE& multisethye, int limit, int stackNO) //
{
double MAX = vecHypo[*max_element(multisethye.begin(), multisethye.end(), Less())].totalScore;
if(printmore) {
cout << "\tmax value is " << MAX << endl;
}
double step = LN;
double thresholdT = MAX - step;
bool ok = false;
while (!ok) {
int stacknum = count_if(multisethye.begin(), multisethye.end(), THRES(thresholdT));
if(printmore) {
cout << "\tthreshold " << thresholdT << ", count " << stacknum << ", step " << step << endl;
}
if ((stacknum == limit) || (step - STEP < avs)) { // || (step - STEP < avs)
ok = true;
}
else if (stacknum > limit) {
step = step / 4;
thresholdT = thresholdT + step;
}
else if (stacknum < limit) {
thresholdT = thresholdT - step;
}
}
THRESHOLD[stackNO] = thresholdT;
multisetHYE::iterator posdel = multisethye.begin();
multisetHYE::iterator posdelnext = multisethye.begin();
while (posdelnext != multisethye.end())
{
if (vecHypo[*posdelnext].totalScore - thresholdT <= avs)
{
//
posdel = posdelnext;
vecNotUsed.push_back(*posdel);
posdelnext = multisethye.erase(posdel);
continue;
}
++posdelnext;
}
}
//在pool中找一个没有用到的位置,如果没有返回-1
int Hypothesis::findProper(VECUNSED& vecnused)
{
int len = vecnused.size();
if (len > 0)
{
int k = vecnused.front();
vecnused.pop_front();
return k;
}
return -1;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -