translationoptioncollection.cpp.svn-base

来自「moses开源的机器翻译系统」· SVN-BASE 代码 · 共 593 行 · 第 1/2 页

SVN-BASE
593
字号
    }	IFVERBOSE(3)	{		      int total = 0;      for(size_t row=0; row<size; row++)      {        size_t maxSize = size - row;        size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();        maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase;        for(size_t col=row; col<row+maxSize; col++)        {        	int count = GetTranslationOptionList(row, col).size();	        TRACE_ERR( "translation options spanning from  "	        				<< row <<" to "<< col <<" is "	        				<< count <<endl);       		total += count;        }      }      TRACE_ERR( "translation options generated in total: "<< total << endl);      for(size_t row=0; row<size; row++)        for(size_t col=row; col<size; col++)					TRACE_ERR( "future cost from "<< row <<" to "<< col <<" is "<< m_futureScore.GetScore(row, col) <<endl);    }}/** Create all possible translations from the phrase tables * for a particular input sentence. This implies applying all * translation and generation steps. Also computes future cost matrix. * \param decodeStepList list of decoding steps * \param factorCollection input sentence with all factors */void TranslationOptionCollection::CreateTranslationOptions(const vector <DecodeGraph*> &decodeStepVL){		// loop over all substrings of the source sentence, look them up	// in the phraseDictionary (which is the- possibly filtered-- phrase	// table loaded on initialization), generate TranslationOption objects	// for all phrases	for (size_t startVL = 0 ; startVL < decodeStepVL.size() ; startVL++) 	{	  const DecodeGraph &decodeStepList = *decodeStepVL[startVL];		for (size_t startPos = 0 ; startPos < m_source.GetSize() ; startPos++)		{      size_t maxSize = m_source.GetSize() - startPos;      size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();      maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase;			for (size_t endPos = startPos ; endPos < startPos + maxSize ; endPos++)			{				CreateTranslationOptionsForRange( decodeStepList, startPos, endPos, true);				 			}		}	}	VERBOSE(3,"Translation Option Collection\n " << *this << endl);		ProcessUnknownWord(decodeStepVL);		// Prune	Prune();	// future score matrix	CalcFutureScore();	// Cached lex reodering costs	CacheLexReordering();}/** create translation options that exactly cover a specific input span.  * Called by CreateTranslationOptions() and ProcessUnknownWord() * \param decodeStepList list of decoding steps * \param factorCollection input sentence with all factors * \param startPos first position in input sentence * \param lastPos last position in input sentence  * \param adhereTableLimit whether phrase & generation table limits are adhered to */void TranslationOptionCollection::CreateTranslationOptionsForRange(																													 const DecodeGraph &decodeStepList																													 , size_t startPos																													 , size_t endPos																													 , bool adhereTableLimit){		if ((StaticData::Instance().GetXmlInputType() != XmlExclusive) || !HasXmlOptionsOverlappingRange(startPos,endPos))	{	  Phrase *sourcePhrase = NULL; // can't initialise with substring, in case it's confusion network		// consult persistent (cross-sentence) cache for stored translation options		bool skipTransOptCreation = false				, useCache = StaticData::Instance().GetUseTransOptCache();		if (useCache) 		{		  const WordsRange wordsRange(startPos, endPos);		  sourcePhrase = new Phrase(m_source.GetSubString(wordsRange));		  			const TranslationOptionList *transOptList = StaticData::Instance().FindTransOptListInCache(*sourcePhrase);			// is phrase in cache?			if (transOptList != NULL) {				skipTransOptCreation = true;		    TranslationOptionList::const_iterator iterTransOpt;		    for (iterTransOpt = transOptList->begin() ; iterTransOpt != transOptList->end() ; ++iterTransOpt)				{					TranslationOption *transOpt = new TranslationOption(**iterTransOpt, wordsRange);					Add(transOpt);				}			}		} // useCache		if (!skipTransOptCreation)		{			// partial trans opt stored in here			PartialTranslOptColl* oldPtoc = new PartialTranslOptColl;			size_t totalEarlyPruned = 0;						// initial translation step			list <const DecodeStep* >::const_iterator iterStep = decodeStepList.begin();			const DecodeStep &decodeStep = **iterStep;			static_cast<const DecodeStepTranslation&>(decodeStep).ProcessInitialTranslation																(m_source, *oldPtoc																, startPos, endPos, adhereTableLimit );			// do rest of decode steps			int indexStep = 0;			for (++iterStep ; iterStep != decodeStepList.end() ; ++iterStep) 			{				const DecodeStep &decodeStep = **iterStep;				PartialTranslOptColl* newPtoc = new PartialTranslOptColl;				// go thru each intermediate trans opt just created				const vector<TranslationOption*>& partTransOptList = oldPtoc->GetList();				vector<TranslationOption*>::const_iterator iterPartialTranslOpt;				for (iterPartialTranslOpt = partTransOptList.begin() ; iterPartialTranslOpt != partTransOptList.end() ; ++iterPartialTranslOpt)				{					TranslationOption &inputPartialTranslOpt = **iterPartialTranslOpt;					decodeStep.Process(inputPartialTranslOpt																		 , decodeStep																		 , *newPtoc																		 , this																		 , adhereTableLimit);				}				// last but 1 partial trans not required anymore				totalEarlyPruned += newPtoc->GetPrunedCount();				delete oldPtoc;				oldPtoc = newPtoc;				indexStep++;			} // for (++iterStep 				// add to fully formed translation option list			PartialTranslOptColl &lastPartialTranslOptColl	= *oldPtoc;			const vector<TranslationOption*>& partTransOptList = lastPartialTranslOptColl.GetList();			vector<TranslationOption*>::const_iterator iterColl;			for (iterColl = partTransOptList.begin() ; iterColl != partTransOptList.end() ; ++iterColl)			{				TranslationOption *transOpt = *iterColl;				transOpt->CalcScore();				Add(transOpt);			}			// storing translation options in persistent cache (kept across sentences) 			if (useCache) 			{				if (partTransOptList.size() > 0)				{					vector<TranslationOption*> cachedTransOptList = GetTranslationOptionList(startPos, endPos);					vector<TranslationOption*>::iterator iterList;					for (size_t i = 0 ; i < cachedTransOptList.size() ; ++i)					{						cachedTransOptList[i] = new TranslationOption(*cachedTransOptList[i]);					}						StaticData::Instance().AddTransOptListToCache(*sourcePhrase, cachedTransOptList);				}							}			lastPartialTranslOptColl.DetachAll();			totalEarlyPruned += oldPtoc->GetPrunedCount();			delete oldPtoc;			// TRACE_ERR( "Early translation options pruned: " << totalEarlyPruned << endl);		} // if (!skipTransOptCreation)		if (useCache) 			delete sourcePhrase;	} // if ((StaticData::Instance().GetXmlInputType() != XmlExclusive) || !HasXmlOptionsOverlappingRange(startPos,endPos))	if ((StaticData::Instance().GetXmlInputType() != XmlPassThrough) && HasXmlOptionsOverlappingRange(startPos,endPos)) 	{		CreateXmlOptionsForRange(startPos, endPos);	} }	/** Check if this range overlaps with any XML options. This doesn't need to be an exact match, only an overlap.	 * by default, we don't support XML options. subclasses need to override this function.	 * called by CreateTranslationOptionsForRange()	 * \param startPos first position in input sentence	 * \param lastPos last position in input sentence 	 * \param adhereTableLimit whether phrase & generation table limits are adhered to	 */	bool TranslationOptionCollection::HasXmlOptionsOverlappingRange(size_t, size_t) const {		return false;		}		/** Populates the current Collection with XML options exactly covering the range specified. Default implementation does nothing.	 * called by CreateTranslationOptionsForRange()	 * \param startPos first position in input sentence	 * \param lastPos last position in input sentence 	 */	 void TranslationOptionCollection::CreateXmlOptionsForRange(size_t, size_t) {		//not implemented for base class	 };/** add translation option to the list * \param translationOption translation option to be added */void TranslationOptionCollection::Add(TranslationOption *translationOption){	const WordsRange &coverage = translationOption->GetSourceWordsRange();	m_collection[coverage.GetStartPos()][coverage.GetEndPos() - coverage.GetStartPos()].push_back(translationOption);}TO_STRING_BODY(TranslationOptionCollection);inline std::ostream& operator<<(std::ostream& out, const TranslationOptionCollection& coll){	size_t size = coll.GetSize();	for (size_t startPos = 0 ; startPos < size ; ++startPos)	{    size_t maxSize = size - startPos;    size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();    maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase;		for (size_t endPos = startPos ; endPos < startPos + maxSize ; ++endPos)		{			TranslationOptionList fullList = coll.GetTranslationOptionList(startPos, endPos);			size_t sizeFull = fullList.size();		  for (size_t i = 0; i < sizeFull; i++) 			{			  out << *fullList[i] << std::endl;			}		}	}   //std::vector< std::vector< TranslationOptionList > >::const_iterator i = coll.m_collection.begin();	//size_t j = 0;	//for (; i!=coll.m_collection.end(); ++i) {    //out << "s[" << j++ << "].size=" << i->size() << std::endl;	//}	return out;}void TranslationOptionCollection::CacheLexReordering(){	const std::vector<LexicalReordering*> &lexReorderingModels = StaticData::Instance().GetReorderModels();	std::vector<LexicalReordering*>::const_iterator iterLexreordering;	for (iterLexreordering = lexReorderingModels.begin() ; iterLexreordering != lexReorderingModels.end() ; ++iterLexreordering)	{		LexicalReordering &lexreordering = **iterLexreordering;		for (size_t startPos = 0 ; startPos < m_source.GetSize() ; startPos++)		{      size_t maxSize =  m_source.GetSize() - startPos;      size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();      maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase;			for (size_t endPos = startPos ; endPos < startPos + maxSize; endPos++)			{				TranslationOptionList &transOptList = GetTranslationOptionList( startPos, endPos);								TranslationOptionList::iterator iterTransOpt;				for(iterTransOpt = transOptList.begin() ; iterTransOpt != transOptList.end() ; ++iterTransOpt) 				{					TranslationOption &transOpt = **iterTransOpt;					const Phrase *sourcePhrase = transOpt.GetSourcePhrase();					if (sourcePhrase)					{						Score score = lexreordering.GetProb(*sourcePhrase																							, transOpt.GetTargetPhrase());						// TODO should have better handling of unknown reordering entries						if (!score.empty())							transOpt.CacheReorderingProb(lexreordering, score);					}				}			}		}	}}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?