⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 manager.cpp.svn-base

📁 moses开源的机器翻译系统
💻 SVN-BASE
字号:
// $Id$// vim:tabstop=2/***********************************************************************Moses - factored phrase-based language decoderCopyright (C) 2006 University of EdinburghThis library is free software; you can redistribute it and/ormodify it under the terms of the GNU Lesser General PublicLicense as published by the Free Software Foundation; eitherversion 2.1 of the License, or (at your option) any later version.This library is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNULesser General Public License for more details.You should have received a copy of the GNU Lesser General PublicLicense along with this library; if not, write to the Free SoftwareFoundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA***********************************************************************/#ifdef WIN32#include <hash_set>#else#include <ext/hash_set>#endif#include <limits>#include <cmath>#include "Manager.h"#include "TypeDef.h"#include "Util.h"#include "TargetPhrase.h"#include "TrellisPath.h"#include "TrellisPathCollection.h"#include "TranslationOption.h"#include "LMList.h"#include "TranslationOptionCollection.h"using namespace std;#undef DEBUGLATTICE#ifdef DEBUGLATTICEstatic bool debug2 = false;#endifManager::Manager(InputType const& source):m_source(source),m_hypoStackColl(source.GetSize() + 1),m_transOptColl(source.CreateTranslationOptionCollection()),m_initialTargetPhrase(Output),m_start(clock()){	VERBOSE(1, "Translating: " << m_source << endl);	const StaticData &staticData = StaticData::Instance();	staticData.InitializeBeforeSentenceProcessing(source);	std::vector < HypothesisStack >::iterator iterStack;	for (iterStack = m_hypoStackColl.begin() ; iterStack != m_hypoStackColl.end() ; ++iterStack)	{		HypothesisStack &sourceHypoColl = *iterStack;		sourceHypoColl.SetMaxHypoStackSize(staticData.GetMaxHypoStackSize());		sourceHypoColl.SetBeamWidth(staticData.GetBeamWidth());	}}Manager::~Manager() {  delete m_transOptColl;	StaticData::Instance().CleanUpAfterSentenceProcessing();      	clock_t end = clock();	float et = (end - m_start);	et /= (float)CLOCKS_PER_SEC;	VERBOSE(1, "Translation took " << et << " seconds" << endl);	VERBOSE(1, "Finished translating" << endl);}/** * Main decoder loop that translates a sentence by expanding * hypotheses stack by stack, until the end of the sentence. */void Manager::ProcessSentence(){		const StaticData &staticData = StaticData::Instance();	staticData.ResetSentenceStats(m_source);	const vector <DecodeGraph*>			&decodeStepVL = staticData.GetDecodeStepVL();		// create list of all possible translations	// this is only valid if:	//		1. generation of source sentence is not done 1st	//		2. initial hypothesis factors are given in the sentence	//CreateTranslationOptions(m_source, phraseDictionary, lmListInitial);	m_transOptColl->CreateTranslationOptions(decodeStepVL);	// initial seed hypothesis: nothing translated, no words produced	{		Hypothesis *hypo = Hypothesis::Create(m_source, m_initialTargetPhrase);		m_hypoStackColl[0].AddPrune(hypo);	}		// go through each stack	std::vector < HypothesisStack >::iterator iterStack;	for (iterStack = m_hypoStackColl.begin() ; iterStack != m_hypoStackColl.end() ; ++iterStack)	{		HypothesisStack &sourceHypoColl = *iterStack;		// the stack is pruned before processing (lazy pruning):		VERBOSE(3,"processing hypothesis from next stack");		sourceHypoColl.PruneToSize(staticData.GetMaxHypoStackSize());		VERBOSE(3,std::endl);		sourceHypoColl.CleanupArcList();		// go through each hypothesis on the stack and try to expand it		HypothesisStack::const_iterator iterHypo;		for (iterHypo = sourceHypoColl.begin() ; iterHypo != sourceHypoColl.end() ; ++iterHypo)			{				Hypothesis &hypothesis = **iterHypo;				ProcessOneHypothesis(hypothesis); // expand the hypothesis			}		// some logging		IFVERBOSE(2) { OutputHypoStackSize(); }	}	// some more logging	VERBOSE(2, staticData.GetSentenceStats());}/** Find all translation options to expand one hypothesis, trigger expansion * this is mostly a check for overlap with already covered words, and for * violation of reordering limits.  * \param hypothesis hypothesis to be expanded upon */void Manager::ProcessOneHypothesis(const Hypothesis &hypothesis){	// since we check for reordering limits, its good to have that limit handy	int maxDistortion = StaticData::Instance().GetMaxDistortion();	bool isWordLattice = StaticData::Instance().GetInputType() == WordLatticeInput;	// no limit of reordering: only check for overlap	if (maxDistortion < 0)	{			const WordsBitmap hypoBitmap	= hypothesis.GetWordsBitmap();		const size_t hypoFirstGapPos	= hypoBitmap.GetFirstGapPos()								, sourceSize			= m_source.GetSize();		for (size_t startPos = hypoFirstGapPos ; startPos < sourceSize ; ++startPos)		{      size_t maxSize = sourceSize - startPos;      size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();      maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase;			for (size_t endPos = startPos ; endPos < startPos + maxSize ; ++endPos)			{				if (!hypoBitmap.Overlap(WordsRange(startPos, endPos)))				{					ExpandAllHypotheses(hypothesis												, m_transOptColl->GetTranslationOptionList(WordsRange(startPos, endPos)));				}			}		}		return; // done with special case (no reordering limit)	}	// if there are reordering limits, make sure it is not violated	// the coverage bitmap is handy here (and the position of the first gap)	const WordsBitmap hypoBitmap = hypothesis.GetWordsBitmap();	const size_t	hypoFirstGapPos	= hypoBitmap.GetFirstGapPos()							, sourceSize			= m_source.GetSize();		// MAIN LOOP. go through each possible hypo	for (size_t startPos = hypoFirstGapPos ; startPos < sourceSize ; ++startPos)	{    size_t maxSize = sourceSize - startPos;    size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();#ifdef DEBUGLATTICE		const int INTEREST = 114;#endif    maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase;		if (isWordLattice) {			// first question: is there a path from the closest translated word to the left			// of the hypothesized extension to the start of the hypothesized extension?			size_t closestLeft = hypoBitmap.GetEdgeToTheLeftOf(startPos);			if (closestLeft != startPos && closestLeft != 0 && !m_source.CanIGetFromAToB(closestLeft+1, startPos+1)) {#ifdef DEBUGLATTICE			  if (startPos == INTEREST) {				  std::cerr << hypothesis <<"\n";				  std::cerr << "Die0: " << (closestLeft) << " " << startPos << "\n";				}#endif			  continue;			}		}		for (size_t endPos = startPos ; endPos < startPos + maxSize ; ++endPos)		{			// check for overlap		  WordsRange extRange(startPos, endPos);#ifdef DEBUGLATTICE	    bool debug = (startPos > (INTEREST-25) && hypoFirstGapPos > 0 && startPos <= INTEREST && endPos >=INTEREST && endPos < (INTEREST+150) && hypoFirstGapPos == INTEREST);	    debug2 = debug && (startPos==INTEREST && endPos >=INTEREST);			if (debug) { std::cerr << (startPos==INTEREST? "LOOK-->" : "") << "XP: " << hypothesis << "\next: " << extRange << "\n"; }#endif			if (hypoBitmap.Overlap(extRange) ||			      (isWordLattice && (!m_source.IsCoveragePossible(extRange) ||					                     !m_source.IsExtensionPossible(hypothesis.GetCurrSourceWordsRange(), extRange))					  )			   )		  {#ifdef DEBUGLATTICE			  if (debug) { std::cerr << "Die1\n"; }#endif			  continue;			}		  // TODO ask second question here			if (isWordLattice) {				size_t closestRight = hypoBitmap.GetEdgeToTheRightOf(endPos);				if (closestRight != endPos && closestRight != sourceSize && !m_source.CanIGetFromAToB(endPos, closestRight)) {#ifdef DEBUGLATTICE			    if (debug) { std::cerr << "Can't get to right edge\n"; }#endif				  continue;				}			}						bool leftMostEdge = (hypoFirstGapPos == startPos);						// any length extension is okay if starting at left-most edge			if (leftMostEdge)			{#ifdef DEBUGLATTICE			  size_t vl = StaticData::Instance().GetVerboseLevel();			  if (debug2) { std::cerr << "Ext!\n"; StaticData::Instance().SetVerboseLevel(4); }#endif				ExpandAllHypotheses(hypothesis							,m_transOptColl->GetTranslationOptionList(extRange));#ifdef DEBUGLATTICE			  StaticData::Instance().SetVerboseLevel(vl);#endif			}			// starting somewhere other than left-most edge, use caution			else			{				// the basic idea is this: we would like to translate a phrase starting				// from a position further right than the left-most open gap. The				// distortion penalty for the following phrase will be computed relative				// to the ending position of the current extension, so we ask now what				// its maximum value will be (which will always be the value of the				// hypothesis starting at the left-most edge).  If this vlaue is than				// the distortion limit, we don't allow this extension to be made.				WordsRange bestNextExtension(hypoFirstGapPos, hypoFirstGapPos);				int required_distortion =					m_source.ComputeDistortionDistance(extRange, bestNextExtension);				if (required_distortion <= maxDistortion) {					ExpandAllHypotheses(hypothesis								,m_transOptColl->GetTranslationOptionList(extRange));				}#ifdef DEBUGLATTICE				else			    if (debug) { std::cerr << "Distortion violation\n"; }#endif			}		}	}}/** * Expand a hypothesis given a list of translation options * \param hypothesis hypothesis to be expanded upon * \param transOptList list of translation options to be applied */void Manager::ExpandAllHypotheses(const Hypothesis &hypothesis,const TranslationOptionList &transOptList){	TranslationOptionList::const_iterator iter;	for (iter = transOptList.begin() ; iter != transOptList.end() ; ++iter)	{		ExpandHypothesis(hypothesis, **iter);	}}/** * Expand one hypothesis with a translation option. * this involves initial creation, scoring and adding it to the proper stack * \param hypothesis hypothesis to be expanded upon * \param transOpt translation option (phrase translation)  *        that is applied to create the new hypothesis */void Manager::ExpandHypothesis(const Hypothesis &hypothesis, const TranslationOption &transOpt) {	// create hypothesis and calculate all its scores#ifdef DEBUGLATTICE	if (debug2) { std::cerr << "::EXT: " << transOpt << "\n"; }#endif	Hypothesis *newHypo = hypothesis.CreateNext(transOpt);	newHypo->CalcScore(m_transOptColl->GetFutureScore());		// logging for the curious	IFVERBOSE(3) {		const StaticData &staticData = StaticData::Instance();	  newHypo->PrintHypothesis(m_source														, staticData.GetWeightDistortion()														, staticData.GetWeightWordPenalty());	}	// add to hypothesis stack	size_t wordsTranslated = newHypo->GetWordsBitmap().GetNumWordsCovered();		m_hypoStackColl[wordsTranslated].AddPrune(newHypo);}/** * Find best hypothesis on the last stack. * This is the end point of the best translation, which can be traced back from here */const Hypothesis *Manager::GetBestHypothesis() const{	const HypothesisStack &hypoColl = m_hypoStackColl.back();	return hypoColl.GetBestHypothesis();}/** * Logging of hypothesis stack sizes */void Manager::OutputHypoStackSize(){	std::vector < HypothesisStack >::const_iterator iterStack = m_hypoStackColl.begin();	TRACE_ERR( "Stack sizes: " << (int)iterStack->size());	for (++iterStack; iterStack != m_hypoStackColl.end() ; ++iterStack)	{		TRACE_ERR( ", " << (int)iterStack->size());	}	TRACE_ERR( endl);}/** * Logging of hypothesis stack contents * \param stack number of stack to be reported, report all stacks if 0  */void Manager::OutputHypoStack(int stack){	if (stack >= 0)	{		TRACE_ERR( "Stack " << stack << ": " << endl << m_hypoStackColl[stack] << endl);	}	else	{ // all stacks		int i = 0;		vector < HypothesisStack >::iterator iterStack;		for (iterStack = m_hypoStackColl.begin() ; iterStack != m_hypoStackColl.end() ; ++iterStack)		{			HypothesisStack &hypoColl = *iterStack;			TRACE_ERR( "Stack " << i++ << ": " << endl << hypoColl << endl);		}	}}/** * After decoding, the hypotheses in the stacks and additional arcs * form a search graph that can be mined for n-best lists. * The heavy lifting is done in the TrellisPath and TrellisPathCollection * this function controls this for one sentence. * * \param count the number of n-best translations to produce * \param ret holds the n-best list that was calculated */void Manager::CalcNBest(size_t count, TrellisPathList &ret,bool onlyDistinct) const{	if (count <= 0)		return;	vector<const Hypothesis*> sortedPureHypo = m_hypoStackColl.back().GetSortedList();	if (sortedPureHypo.size() == 0)		return;	TrellisPathCollection contenders;	set<Phrase> distinctHyps;	// add all pure paths	vector<const Hypothesis*>::const_iterator iterBestHypo;	for (iterBestHypo = sortedPureHypo.begin() 			; iterBestHypo != sortedPureHypo.end()			; ++iterBestHypo)	{		contenders.Add(new TrellisPath(*iterBestHypo));	}	// MAIN loop	for (size_t iteration = 0 ; (onlyDistinct ? distinctHyps.size() : ret.GetSize()) < count && contenders.GetSize() > 0 && (iteration < count * 20) ; iteration++)	{		// get next best from list of contenders		TrellisPath *path = contenders.pop();		assert(path);		if(onlyDistinct)		{			Phrase tgtPhrase = path->GetSurfacePhrase();			distinctHyps.insert(tgtPhrase).second;		}				ret.Add(path);		// create deviations from current best		path->CreateDeviantPaths(contenders);				if(onlyDistinct)		{			const size_t nBestFactor = StaticData::Instance().GetNBestFactor();			if (nBestFactor > 0)				contenders.Prune(count * nBestFactor);		}		else		{			contenders.Prune(count);		}	}}void Manager::CalcDecoderStatistics() const {  const Hypothesis *hypo = GetBestHypothesis();	if (hypo != NULL)  {		StaticData::Instance().GetSentenceStats().CalcFinalStats(*hypo);    IFVERBOSE(2) {		 	if (hypo != NULL) {		   	string buff;		  	string buff2;		   	TRACE_ERR( "Source and Target Units:"		 							<< *StaticData::Instance().GetInput());				buff2.insert(0,"] ");				buff2.insert(0,(hypo->GetCurrTargetPhrase()).ToString());				buff2.insert(0,":");				buff2.insert(0,(hypo->GetCurrSourceWordsRange()).ToString());				buff2.insert(0,"[");								hypo = hypo->GetPrevHypo();				while (hypo != NULL) {					//dont print out the empty final hypo				  buff.insert(0,buff2);				  buff2.clear();				  buff2.insert(0,"] ");				  buff2.insert(0,(hypo->GetCurrTargetPhrase()).ToString());				  buff2.insert(0,":");				  buff2.insert(0,(hypo->GetCurrSourceWordsRange()).ToString());				  buff2.insert(0,"[");				  hypo = hypo->GetPrevHypo();				}				TRACE_ERR( buff << endl);      }    }  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -