translationoptioncollection.cpp.svn-base

来自「解码器是基于短语的统计机器翻译系统的核心模块」· SVN-BASE 代码 · 共 460 行 · 第 1/2 页

SVN-BASE
460
字号
	* This matrix used in search.	* Call this function once translation option collection has been filled with translation options*/void TranslationOptionCollection::CalcFutureScore(){  // setup the matrix (ignore lower triangle, set upper triangle to -inf  size_t size = m_source.GetSize(); // the width of the matrix  for(size_t row=0; row<size; row++) {    for(size_t col=row; col<size; col++) {      m_futureScore.SetScore(row, col, -numeric_limits<float>::infinity());    }  }  // walk all the translation options and record the cheapest option for each span	for (size_t startPos = 0 ; startPos < m_source.GetSize() ; ++startPos)	{		for (size_t endPos = startPos ; endPos < m_source.GetSize() ; ++endPos)		{			TranslationOptionList &transOptList = GetTranslationOptionList(startPos, endPos);			TranslationOptionList::const_iterator iterTransOpt;			for(iterTransOpt = transOptList.begin() ; iterTransOpt != transOptList.end() ; ++iterTransOpt) 			{				const TranslationOption &transOpt = **iterTransOpt;				float score = transOpt.GetFutureScore();				if (score > m_futureScore.GetScore(startPos, endPos))					m_futureScore.SetScore(startPos, endPos, score);			}		}	}  // now fill all the cells in the strictly upper triangle  //   there is no way to modify the diagonal now, in the case  //   where no translation option covers a single-word span,  //   we leave the +inf in the matrix  // like in chart parsing we want each cell to contain the highest score  // of the full-span trOpt or the sum of scores of joining two smaller spans	for(size_t colstart = 1; colstart < size ; colstart++) {		for(size_t diagshift = 0; diagshift < size-colstart ; diagshift++) {            size_t startPos = diagshift;            size_t endPos = colstart+diagshift;			for(size_t joinAt = startPos; joinAt < endPos ; joinAt++)  {              float joinedScore = m_futureScore.GetScore(startPos, joinAt)                                + m_futureScore.GetScore(joinAt+1, endPos);              /* // uncomment to see the cell filling scheme              TRACE_ERR( "[" <<startPos<<","<<endPos<<"] <-? ["<<startPos<<","<<joinAt<<"]+["<<joinAt+1<<","<<endPos                << "] (colstart: "<<colstart<<", diagshift: "<<diagshift<<")"<<endl);              */              if (joinedScore > m_futureScore.GetScore(startPos, endPos))                m_futureScore.SetScore(startPos, endPos, joinedScore);            }        }    }	IFVERBOSE(3)	{		      int total = 0;      for(size_t row=0; row<size; row++)      {        for(size_t col=row; col<size; col++)        {        	int count = GetTranslationOptionList(row, col).size();	        TRACE_ERR( "translation options spanning from  "	        				<< row <<" to "<< col <<" is "	        				<< count <<endl);       		total += count;        }      }      TRACE_ERR( "translation options generated in total: "<< total << endl);      for(size_t row=0; row<size; row++)        for(size_t col=row; col<size; col++)					TRACE_ERR( "future cost from "<< row <<" to "<< col <<" is "<< m_futureScore.GetScore(row, col) <<endl);    }}/** Create all possible translations from the phrase tables * for a particular input sentence. This implies applying all * translation and generation steps. Also computes future cost matrix. * \param decodeStepList list of decoding steps * \param factorCollection input sentence with all factors */void TranslationOptionCollection::CreateTranslationOptions(const list < DecodeStep* > &decodeStepList																													 , FactorCollection &factorCollection){	m_factorCollection = &factorCollection;		// loop over all substrings of the source sentence, look them up	// in the phraseDictionary (which is the- possibly filtered-- phrase	// table loaded on initialization), generate TranslationOption objects	// for all phrases		for (size_t startPos = 0 ; startPos < m_source.GetSize() ; startPos++)	{		for (size_t endPos = startPos ; endPos < m_source.GetSize() ; endPos++)		{			CreateTranslationOptionsForRange( decodeStepList, factorCollection, startPos, endPos, true);		}	}	ProcessUnknownWord(decodeStepList, factorCollection);		// Prune	Prune();	// future score matrix	CalcFutureScore();}/** create translation options that exactly cover a specific input span.  * Called by CreateTranslationOptions() and ProcessUnknownWord() * \param decodeStepList list of decoding steps * \param factorCollection input sentence with all factors * \param startPos first position in input sentence * \param lastPos last position in input sentence  * \param adhereTableLimit whether phrase & generation table limits are adhered to */void TranslationOptionCollection::CreateTranslationOptionsForRange(																													 const list < DecodeStep* > &decodeStepList																													 , FactorCollection &factorCollection																													 , size_t startPos																													 , size_t endPos																													 , bool adhereTableLimit){	// partial trans opt stored in here	PartialTranslOptColl* oldPtoc = new PartialTranslOptColl;		// initial translation step	list < DecodeStep* >::const_iterator iterStep = decodeStepList.begin();	const DecodeStep &decodeStep = **iterStep;	ProcessInitialTranslation(decodeStep, factorCollection														, *oldPtoc														, startPos, endPos, adhereTableLimit );	// do rest of decode steps	size_t totalEarlyPruned = 0;	int indexStep = 0;	for (++iterStep ; iterStep != decodeStepList.end() ; ++iterStep) 		{			const DecodeStep &decodeStep = **iterStep;			PartialTranslOptColl* newPtoc = new PartialTranslOptColl;			// go thru each intermediate trans opt just created			const vector<TranslationOption*>& partTransOptList = oldPtoc->GetList();			vector<TranslationOption*>::const_iterator iterPartialTranslOpt;			for (iterPartialTranslOpt = partTransOptList.begin() ; iterPartialTranslOpt != partTransOptList.end() ; ++iterPartialTranslOpt)			{				TranslationOption &inputPartialTranslOpt = **iterPartialTranslOpt;				decodeStep.Process(inputPartialTranslOpt																	 , decodeStep																	 , *newPtoc																	 , factorCollection																	 , this																	 , adhereTableLimit);			}			// last but 1 partial trans not required anymore			totalEarlyPruned += newPtoc->GetPrunedCount();			delete oldPtoc;			oldPtoc = newPtoc;			indexStep++;		} // for (++iterStep 	// add to fully formed translation option list	PartialTranslOptColl &lastPartialTranslOptColl	= *oldPtoc;	const vector<TranslationOption*>& partTransOptList = lastPartialTranslOptColl.GetList();	vector<TranslationOption*>::const_iterator iterColl;	for (iterColl = partTransOptList.begin() ; iterColl != partTransOptList.end() ; ++iterColl)		{			TranslationOption *transOpt = *iterColl;			transOpt->CalcScore();			Add(transOpt);		}	lastPartialTranslOptColl.DetachAll();	totalEarlyPruned += oldPtoc->GetPrunedCount();	delete oldPtoc;	// TRACE_ERR( "Early translation options pruned: " << totalEarlyPruned << endl);}/** initialize list of partial translation options by applying the first translation step 	* Ideally, this function should be in DecodeStepTranslation class	*/void TranslationOptionCollection::ProcessInitialTranslation(															const DecodeStep &decodeStep															, FactorCollection &factorCollection															, PartialTranslOptColl &outputPartialTranslOptColl															, size_t startPos															, size_t endPos															, bool adhereTableLimit){	const PhraseDictionary &phraseDictionary = decodeStep.GetPhraseDictionary();	const size_t tableLimit = phraseDictionary.GetTableLimit();	const WordsRange wordsRange(startPos, endPos);	const TargetPhraseCollection *phraseColl =	phraseDictionary.GetTargetPhraseCollection(m_source,wordsRange); 	if (phraseColl != NULL)	{		VERBOSE(3,"[" << m_source.GetSubString(wordsRange) << "; " << startPos << "-" << endPos << "]\n");					TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;		iterEnd = (!adhereTableLimit || tableLimit == 0 || phraseColl->GetSize() < tableLimit) ? phraseColl->end() : phraseColl->begin() + tableLimit;				for (iterTargetPhrase = phraseColl->begin() ; iterTargetPhrase != iterEnd ; ++iterTargetPhrase)		{			const TargetPhrase	&targetPhrase = **iterTargetPhrase;			outputPartialTranslOptColl.Add ( new TranslationOption(wordsRange, targetPhrase) );						VERBOSE(3,"\t" << targetPhrase << "\n");		}		VERBOSE(3,endl);	}}/** add translation option to the list * \param translationOption translation option to be added */void TranslationOptionCollection::Add(const TranslationOption *translationOption){	const WordsRange &coverage = translationOption->GetSourceWordsRange();	m_collection[coverage.GetStartPos()][coverage.GetEndPos() - coverage.GetStartPos()].push_back(translationOption);}TO_STRING_BODY(TranslationOptionCollection);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?