⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hypothesis.cpp

📁 解码器是基于短语的统计机器翻译系统的核心模块
💻 CPP
📖 第 1 页 / 共 2 页
字号:
			for(int i = 0; (i < stackIsize) && (POS != hypothesisStack[len].end()); i++, ++POS)
			{//查看是否有可合并或减掉的假设
				set<int>::iterator POSi = --(vecHypo[*POS].oldphrase).end();
				set<int>::iterator newHypi = --((vecHypo[newHyp].oldphrase).end()); 
				if((vecHypo[*POS].oldphrase == vecHypo[newHyp].oldphrase)
					&&(vecHypo[*POS].lastEWI == vecHypo[newHyp].lastEWI) && (vecHypo[*POS].lastEWII == vecHypo[newHyp].lastEWII) \
					&& (*POSi == *newHypi)
					&& (vecHypo[*POS].totalScore - vecHypo[newHyp].totalScore < avs)) 
				{
					if(printmore) {
						cout << "better path, overwriting exisiting hypothesis " << vecHypo[*POS].thisID << endl;
						
					}
					Arc arcTmp;
					arcTmp.from = vecHypo[*POS].lastID;
					if (len == stackSize) {
						arcTmp.to = -1;//应该是最大假设的ID加1
					}
					else {
						arcTmp.to = vecHypo[*POS].thisID;
					}
					arcTmp.diffCost = (vecHypo[*POS].totalScore - vecHypo[*POS].futureScore - vecHypo[*POS].baseScore) ;//- \
					//(vecHypo[*POS].prev->totalScore - vecHypo[*POS].prev->futureScore - vecHypo[*POS].prev->baseScore);
					arcTmp.tosPhrase = vecHypo[*POS].newPhrase;
					arc.push_back(arcTmp);
					
					vecHypo[*POS] = vecHypo[newHyp];
					vecNotUsed.push_back(newHyp);
					return ;
				}
				else if((vecHypo[*POS].oldphrase == vecHypo[newHyp].oldphrase)
					&&(vecHypo[*POS].lastEWI == vecHypo[newHyp].lastEWI) && (vecHypo[*POS].lastEWII == vecHypo[newHyp].lastEWII) \
					&& (*POSi == *newHypi)
					&& (vecHypo[*POS].totalScore - vecHypo[newHyp].totalScore >= avs))
				{
					if(printmore) {
						cout << "worse than existing path, discarding" << endl;
					}
					Arc arcTmp;
					arcTmp.from = vecHypo[newHyp].lastID;
					if (len == stackSize) {
						arcTmp.to = -1;//应该是最大假设的ID加1
					}
					else {
						arcTmp.to = vecHypo[*POS].thisID;
					}
					
					arcTmp.diffCost = (vecHypo[newHyp].totalScore - vecHypo[newHyp].futureScore - vecHypo[newHyp].baseScore);// - \
					//(newHyp->prev->totalScore - newHyp->prev->futureScore - newHyp->prev->baseScore);
					arcTmp.tosPhrase = vecHypo[newHyp].newPhrase;
					arc.push_back(arcTmp);
					
					vecNotUsed.push_back(newHyp);
					return ;
				}
			}
			hypothesisStack[len].push_back(newHyp);
			if (hypothesisStack[len].size() >= 2 * eachStackSize) {
				if(printmore) {
					cout << "merged hypothesis on stack " << len << ", now size " << hypothesisStack[len].size() << " need cut!!!" << endl;
				}
				cutStack(hypothesisStack[len], eachStackSize, len);
			}
			if(printmore) {
				cout << "merged hypothesis on stack " << len << ", now size " << hypothesisStack[len].size() << endl;
			}  //一般的假设就直接入栈
			
		}
		else
		{
			if(printmore) {
				cout << "estimate below threshold, discarding" << endl;
			}
			vecNotUsed.push_back(newHyp);
		}
	}
	else
	{
		THRESHOLD[len] = threshold + vecHypo[newHyp].totalScore;
		hypothesisStack[len].push_back(newHyp);
		if (hypothesisStack[len].size() >= 2 * eachStackSize) {
			if(printmore) {
				cout << "merged hypothesis on stack " << len << ", now size " << hypothesisStack[len].size() << " need cut!!!" << endl;
			}
			cutStack(hypothesisStack[len], eachStackSize, len);
		}
		if(printmore) {
			cout << "new best estimate for this stack" << endl;
			cout << "merged hypothesis on stack " << len << ", now size " << hypothesisStack[len].size() << endl;
		}
		
	}
}

void Hypothesis::dealout(string& str)
{
	if (str.length() > 0) {
		if ((isalpha(str[0])) && (islower(str[0]))) {
			str[0] = toupper(str[0]);
		}
	} 
	int startPos = str.find("  ", 0);
	while (startPos != string::npos) 
	{
		str.erase(startPos, 1);
		startPos = str.find("  ", startPos);
	}
	
	startPos = str.find(" ?", 0);
	while (startPos != string::npos) 
	{
		str.erase(startPos, 1);
		startPos = str.find(" ?", startPos);
	}
	
	startPos = str.find(" .", 0);
	while (startPos != string::npos) 
	{
		str.erase(startPos, 1);
		startPos = str.find(" .", startPos);
	}
	
	startPos = str.find(" n't", 0);
	while (startPos != string::npos) 
	{
		str.erase(startPos, 1);
		startPos = str.find(" n", startPos);
	}
	
	startPos = str.find(" ,", 0);
	while (startPos != string::npos) 
	{
		str.erase(startPos, 1);
		startPos = str.find(" ,", startPos);
	}
	
	startPos = str.find(" '", 0);
	while (startPos != string::npos) 
	{
		str.erase(startPos, 1);
		startPos = str.find(" '", startPos);
	}
	
	startPos = str.find(" !", 0);
	while (startPos != string::npos) 
	{
		str.erase(startPos, 1);
		startPos = str.find(" !", startPos);
	}
	return ;
}

string Hypothesis::findBest()
{
/*	ofstream output;
output.open(outputFile.c_str(), std::ios::out | std::ios::app);
if (!output) {
cout << "Open output file error!" << endl;
return ;
}*/
	int nfSize = hypothesisStack[stackSize].size();
	
	if(printmore) {
		cout << "final stack " << stackSize << " size is " << nfSize << endl;
		cout << "output decode result! " << endl;
	}
	
	//	multisetHYE forSort = hypothesisStack[stackSize];
	//	sort(forSort.begin(), forSort.end(), Greaters());
	//	multisetHYE::iterator p = forSort.begin();
	//	int i = 0;
	HypothesisElement tmp = vecHypo[*max_element(hypothesisStack[stackSize].begin(), hypothesisStack[stackSize].end(), Less())];
	string out;
	//	for(; ((p != forSort.end()) && (i < NBEST)); ++p, ++i)
	//	{
	//		HypothesisElement *tmp = *p;
	deque<int> sentence;
	while (tmp.prev != -1) {
		int len = tmp.newPhrase.size();
		for(int i = len - 1; i >=0; --i)
		{
			sentence.push_front(tmp.newPhrase[i]);
		}
		int test = tmp.prev;
		tmp = vecHypo[tmp.prev];
	}
	out = enVcb->IDsTosen(sentence);
	dealout(out);
	
	//		output <<  out << " " << endl;
	//	}
	cnVcb->mapClear();
	Vocab::ID = -1;
	return out;
}
//

void Hypothesis::findNBest(string outputFile)
{
	ofstream output;
	output.open(outputFile.c_str(), std::ios::out | std::ios::app);
	if (!output) {
		cout << "Open output file error!" << endl;
		return ;
	}
	output << HypothesisElement::baseID + 1<< endl;
	for	(int i = 1; i < stackSize; i++)//输出栈中的
	{
		int sizeI = hypothesisStack[i].size();
		for (int j = 0; j < sizeI; j++)
		{
			output << "(" << vecHypo[hypothesisStack[i][j]].lastID << " ("	<< vecHypo[hypothesisStack[i][j]].thisID << \
				" \"" << enVcb->getWords(vecHypo[hypothesisStack[i][j]].newPhrase) << "\" " << \
				exp(vecHypo[hypothesisStack[i][j]].totalScore - vecHypo[hypothesisStack[i][j]].futureScore - vecHypo[hypothesisStack[i][j]].baseScore) << "))" << endl;
		}
	}
	
	int sizeI = hypothesisStack[stackSize].size();
	for (int j = 0; j < sizeI; j++)
	{
		output << "(" << vecHypo[hypothesisStack[stackSize][j]].lastID << " ("	<< HypothesisElement::baseID + 1 << \
			" \"" << enVcb->getWords(vecHypo[hypothesisStack[stackSize][j]].newPhrase) << "\" " << \
			exp(vecHypo[hypothesisStack[stackSize][j]].totalScore - vecHypo[hypothesisStack[stackSize][j]].futureScore - vecHypo[hypothesisStack[stackSize][j]].baseScore) << "))" << endl;
	}
	//输出ARC中的
	int arclen = arc.size();
	for(int aj = 0; aj < arclen; aj++)
	{
		output << "(" << arc[aj].from << " (";
		if (arc[aj].to == -1) {
			output << HypothesisElement::baseID + 1;
		}
		else {
			output <<  arc[aj].to;
		}
		output << " \"" << enVcb->getWords(arc[aj].tosPhrase) << "\" " << \
			exp(arc[aj].diffCost) << "))" << endl;
	}
	output.clear();
	output.close();
}


Hypothesis::~Hypothesis()
{
	delete enVcb;
	delete cnVcb;
	delete lm;
	delete to;
}

class THRES
{
private:
	double THRESHOLD;
public:
	THRES(double thre):THRESHOLD(thre)
	{
	}
	bool operator() (int hyp)
	{
		return vecHypo[hyp].totalScore - THRESHOLD >=avs;
	}
};
/*
void Hypothesis::CutStack(multisetHYE& multisethye, int limit, int stackNO)
{	
	double MAX = vecHypo[*max_element(multisethye.begin(), multisethye.end(), Less())].totalScore;
	double thresholdT;
	if(printmore) {
		cout << "\tmax value is " << MAX << endl;
	}
	multisetHYE multitmp = multisethye;
	sort(multitmp.begin(), multitmp.end(), Less());
	thresholdT = (multitmp[limit])->totalScore;
	THRESHOLD[stackNO] = thresholdT;
	multisetHYE::iterator posdel = multisethye.begin();
	multisetHYE::iterator posdelnext = multisethye.begin();
	while (posdelnext != multisethye.end())
	{
		if ((**posdelnext).totalScore - thresholdT <= avs) 
		{
			posdel = posdelnext;
			delete *posdel;
			posdelnext = multisethye.erase(posdel);
			continue;
		}
		++posdelnext;
	}
}

*/

void Hypothesis::cutStack(multisetHYE& multisethye, int limit, int stackNO)  //
{
	double MAX = vecHypo[*max_element(multisethye.begin(), multisethye.end(), Less())].totalScore;
	if(printmore) {
		cout << "\tmax value is " << MAX << endl;
	}
	double step = LN;
	double thresholdT = MAX - step;

	bool ok = false;
	while (!ok) {
		int stacknum = count_if(multisethye.begin(), multisethye.end(), THRES(thresholdT));
		if(printmore) {
			cout << "\tthreshold " << thresholdT << ", count " << stacknum << ", step " << step << endl;
		}
		if ((stacknum == limit) || (step - STEP < avs)) {  // || (step - STEP < avs)
			ok = true;
		}
		else if (stacknum > limit) {
			step = step / 4;          
			thresholdT = thresholdT + step;
		}
		else if (stacknum < limit) {
			thresholdT = thresholdT - step;
		}
	}
	THRESHOLD[stackNO] = thresholdT;
	multisetHYE::iterator posdel = multisethye.begin();
	multisetHYE::iterator posdelnext = multisethye.begin();
	while (posdelnext != multisethye.end())
	{
		if (vecHypo[*posdelnext].totalScore - thresholdT <= avs) 
		{

			//
			posdel = posdelnext;
			vecNotUsed.push_back(*posdel);
			posdelnext = multisethye.erase(posdel);
			continue;
		}
		++posdelnext;
	}
}

//在pool中找一个没有用到的位置,如果没有返回-1

int Hypothesis::findProper(VECUNSED& vecnused)
{
	int len = vecnused.size();

	if (len > 0)
	{
		int k = vecnused.front();
		vecnused.pop_front();
		return k;
	}
	return -1;
}




⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -