hypothesiscollection.cpp.svn-base

来自「解码器是基于短语的统计机器翻译系统的核心模块」· SVN-BASE 代码 · 共 257 行

SVN-BASE
257
字号
// $Id$/***********************************************************************Moses - factored phrase-based language decoderCopyright (C) 2006 University of EdinburghThis library is free software; you can redistribute it and/ormodify it under the terms of the GNU Lesser General PublicLicense as published by the Free Software Foundation; eitherversion 2.1 of the License, or (at your option) any later version.This library is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNULesser General Public License for more details.You should have received a copy of the GNU Lesser General PublicLicense along with this library; if not, write to the Free SoftwareFoundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA***********************************************************************/#include <algorithm>#include <set>#include <queue>#include "HypothesisCollection.h"#include "TypeDef.h"#include "Util.h"#include "StaticData.h"using namespace std;HypothesisCollection::HypothesisCollection(){	m_nBestIsEnabled = StaticData::Instance()->IsNBestEnabled();	m_bestScore = -std::numeric_limits<float>::infinity();	m_worstScore = -std::numeric_limits<float>::infinity();}/** remove all hypotheses from the collection */void HypothesisCollection::RemoveAll(){	while (m_hypos.begin() != m_hypos.end())	{		Remove(m_hypos.begin());	}}/** add a hypothesis to the collection, prune if necessary */void HypothesisCollection::Add(Hypothesis *hypo){	AddNoPrune(hypo);	VERBOSE(3,"added hyp to stack");	// Update best score, if this hypothesis is new best	if (hypo->GetTotalScore() > m_bestScore)	{		VERBOSE(3,", best on stack");		m_bestScore = hypo->GetTotalScore();		// this may also affect the worst score        if ( m_bestScore + m_beamThreshold > m_worstScore )          m_worstScore = m_bestScore + m_beamThreshold;	}    // Prune only if stack is twice as big as needed (lazy pruning)	VERBOSE(3,", now size " << m_hypos.size());	if (m_hypos.size() > 2*m_maxHypoStackSize-1)	{		PruneToSize(m_maxHypoStackSize);	}	else {	  VERBOSE(3,std::endl);	}}void HypothesisCollection::AddPrune(Hypothesis *hypo){ 	if (hypo->GetTotalScore() < m_worstScore)	{ // really bad score. don't bother adding hypo into collection	  StaticData::Instance()->GetSentenceStats().AddDiscarded();	  VERBOSE(3,"discarded, too bad for stack" << std::endl);		ObjectPool<Hypothesis> &pool = Hypothesis::GetObjectPool();		pool.freeObject(hypo);				return;	}	// over threshold			// recombine if ngram-equivalent to another hypo	iterator iter = m_hypos.find(hypo);	if (iter == m_hypos.end())	{ // nothing found. add to collection		Add(hypo);		return;  }	StaticData::Instance()->GetSentenceStats().AddRecombination(*hypo, **iter);		// found existing hypo with same target ending.	// keep the best 1	Hypothesis *hypoExisting = *iter;	if (hypo->GetTotalScore() > hypoExisting->GetTotalScore())	{ // incoming hypo is better than the one we have		VERBOSE(3,"better than matching hyp " << hypoExisting->GetId() << ", recombining, ");		if (m_nBestIsEnabled) {			hypo->AddArc(hypoExisting);			Detach(iter);		} else {			Remove(iter);		}		Add(hypo);				return;	}	else	{ // already storing the best hypo. discard current hypo 	  VERBOSE(3,"worse than matching hyp " << hypoExisting->GetId() << ", recombining" << std::endl)		if (m_nBestIsEnabled) {			(*iter)->AddArc(hypo);		} else {			ObjectPool<Hypothesis> &pool = Hypothesis::GetObjectPool();			pool.freeObject(hypo);						}		return;	}}void HypothesisCollection::PruneToSize(size_t newSize){	if (m_hypos.size() > newSize) // ok, if not over the limit	{		priority_queue<float> bestScores;				// push all scores to a heap		// (but never push scores below m_bestScore+m_beamThreshold)		iterator iter = m_hypos.begin();		float score = 0;		while (iter != m_hypos.end())		{			Hypothesis *hypo = *iter;			score = hypo->GetTotalScore();			if (score > m_bestScore+m_beamThreshold) 			{				bestScores.push(score);			}			++iter;    }				// pop the top newSize scores (and ignore them, these are the scores of hyps that will remain)		//  ensure to never pop beyond heap size		size_t minNewSizeHeapSize = newSize > bestScores.size() ? bestScores.size() : newSize;		for (size_t i = 1 ; i < minNewSizeHeapSize ; i++)			bestScores.pop();						// and remember the threshold		float scoreThreshold = bestScores.top();		// TRACE_ERR( "threshold: " << scoreThreshold << endl);				// delete all hypos under score threshold		iter = m_hypos.begin();		while (iter != m_hypos.end())		{			Hypothesis *hypo = *iter;			float score = hypo->GetTotalScore();			if (score < scoreThreshold)				{					iterator iterRemove = iter++;					Remove(iterRemove);					StaticData::Instance()->GetSentenceStats().AddPruning();				}			else				{					++iter;				}		}		VERBOSE(3,", pruned to size " << size() << endl);				IFVERBOSE(3) 		{			TRACE_ERR("stack now contains: ");			for(iter = m_hypos.begin(); iter != m_hypos.end(); iter++) 			{				Hypothesis *hypo = *iter;				TRACE_ERR( hypo->GetId() << " (" << hypo->GetTotalScore() << ") ");			}			TRACE_ERR( endl);		}		// set the worstScore, so that newly generated hypotheses will not be added if worse than the worst in the stack		m_worstScore = scoreThreshold;	}}const Hypothesis *HypothesisCollection::GetBestHypothesis() const{	if (!m_hypos.empty())	{		const_iterator iter = m_hypos.begin();		Hypothesis *bestHypo = *iter;		while (++iter != m_hypos.end())		{			Hypothesis *hypo = *iter;			if (hypo->GetTotalScore() > bestHypo->GetTotalScore())				bestHypo = hypo;		}		return bestHypo;	}	return NULL;}// sorting helperstruct HypothesisSortDescending{	const bool operator()(const Hypothesis* hypo1, const Hypothesis* hypo2) const	{		return hypo1->GetTotalScore() > hypo2->GetTotalScore();	}};vector<const Hypothesis*> HypothesisCollection::GetSortedList() const{	vector<const Hypothesis*> ret; ret.reserve(m_hypos.size());	std::copy(m_hypos.begin(), m_hypos.end(), std::inserter(ret, ret.end()));	sort(ret.begin(), ret.end(), HypothesisSortDescending());	return ret;}void HypothesisCollection::InitializeArcs(){	// only necessary if n-best calculations are enabled	if (!m_nBestIsEnabled) return;	iterator iter;	for (iter = m_hypos.begin() ; iter != m_hypos.end() ; ++iter)	{		Hypothesis *mainHypo = *iter;		mainHypo->InitializeArcs();	}}TO_STRING_BODY(HypothesisCollection);// friendstd::ostream& operator<<(std::ostream& out, const HypothesisCollection& hypoColl){	HypothesisCollection::const_iterator iter;		for (iter = hypoColl.begin() ; iter != hypoColl.end() ; ++iter)	{		const Hypothesis &hypo = **iter;		out << hypo << endl;			}	return out;}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?