translationoptioncollection.cpp.svn-base
来自「解码器是基于短语的统计机器翻译系统的核心模块」· SVN-BASE 代码 · 共 460 行 · 第 1/2 页
SVN-BASE
460 行
* This matrix used in search. * Call this function once translation option collection has been filled with translation options*/void TranslationOptionCollection::CalcFutureScore(){ // setup the matrix (ignore lower triangle, set upper triangle to -inf size_t size = m_source.GetSize(); // the width of the matrix for(size_t row=0; row<size; row++) { for(size_t col=row; col<size; col++) { m_futureScore.SetScore(row, col, -numeric_limits<float>::infinity()); } } // walk all the translation options and record the cheapest option for each span for (size_t startPos = 0 ; startPos < m_source.GetSize() ; ++startPos) { for (size_t endPos = startPos ; endPos < m_source.GetSize() ; ++endPos) { TranslationOptionList &transOptList = GetTranslationOptionList(startPos, endPos); TranslationOptionList::const_iterator iterTransOpt; for(iterTransOpt = transOptList.begin() ; iterTransOpt != transOptList.end() ; ++iterTransOpt) { const TranslationOption &transOpt = **iterTransOpt; float score = transOpt.GetFutureScore(); if (score > m_futureScore.GetScore(startPos, endPos)) m_futureScore.SetScore(startPos, endPos, score); } } } // now fill all the cells in the strictly upper triangle // there is no way to modify the diagonal now, in the case // where no translation option covers a single-word span, // we leave the +inf in the matrix // like in chart parsing we want each cell to contain the highest score // of the full-span trOpt or the sum of scores of joining two smaller spans for(size_t colstart = 1; colstart < size ; colstart++) { for(size_t diagshift = 0; diagshift < size-colstart ; diagshift++) { size_t startPos = diagshift; size_t endPos = colstart+diagshift; for(size_t joinAt = startPos; joinAt < endPos ; joinAt++) { float joinedScore = m_futureScore.GetScore(startPos, joinAt) + m_futureScore.GetScore(joinAt+1, endPos); /* // uncomment to see the cell filling scheme TRACE_ERR( "[" <<startPos<<","<<endPos<<"] <-? ["<<startPos<<","<<joinAt<<"]+["<<joinAt+1<<","<<endPos << "] (colstart: "<<colstart<<", diagshift: "<<diagshift<<")"<<endl); */ if (joinedScore > m_futureScore.GetScore(startPos, endPos)) m_futureScore.SetScore(startPos, endPos, joinedScore); } } } IFVERBOSE(3) { int total = 0; for(size_t row=0; row<size; row++) { for(size_t col=row; col<size; col++) { int count = GetTranslationOptionList(row, col).size(); TRACE_ERR( "translation options spanning from " << row <<" to "<< col <<" is " << count <<endl); total += count; } } TRACE_ERR( "translation options generated in total: "<< total << endl); for(size_t row=0; row<size; row++) for(size_t col=row; col<size; col++) TRACE_ERR( "future cost from "<< row <<" to "<< col <<" is "<< m_futureScore.GetScore(row, col) <<endl); }}/** Create all possible translations from the phrase tables * for a particular input sentence. This implies applying all * translation and generation steps. Also computes future cost matrix. * \param decodeStepList list of decoding steps * \param factorCollection input sentence with all factors */void TranslationOptionCollection::CreateTranslationOptions(const list < DecodeStep* > &decodeStepList , FactorCollection &factorCollection){ m_factorCollection = &factorCollection; // loop over all substrings of the source sentence, look them up // in the phraseDictionary (which is the- possibly filtered-- phrase // table loaded on initialization), generate TranslationOption objects // for all phrases for (size_t startPos = 0 ; startPos < m_source.GetSize() ; startPos++) { for (size_t endPos = startPos ; endPos < m_source.GetSize() ; endPos++) { CreateTranslationOptionsForRange( decodeStepList, factorCollection, startPos, endPos, true); } } ProcessUnknownWord(decodeStepList, factorCollection); // Prune Prune(); // future score matrix CalcFutureScore();}/** create translation options that exactly cover a specific input span. * Called by CreateTranslationOptions() and ProcessUnknownWord() * \param decodeStepList list of decoding steps * \param factorCollection input sentence with all factors * \param startPos first position in input sentence * \param lastPos last position in input sentence * \param adhereTableLimit whether phrase & generation table limits are adhered to */void TranslationOptionCollection::CreateTranslationOptionsForRange( const list < DecodeStep* > &decodeStepList , FactorCollection &factorCollection , size_t startPos , size_t endPos , bool adhereTableLimit){ // partial trans opt stored in here PartialTranslOptColl* oldPtoc = new PartialTranslOptColl; // initial translation step list < DecodeStep* >::const_iterator iterStep = decodeStepList.begin(); const DecodeStep &decodeStep = **iterStep; ProcessInitialTranslation(decodeStep, factorCollection , *oldPtoc , startPos, endPos, adhereTableLimit ); // do rest of decode steps size_t totalEarlyPruned = 0; int indexStep = 0; for (++iterStep ; iterStep != decodeStepList.end() ; ++iterStep) { const DecodeStep &decodeStep = **iterStep; PartialTranslOptColl* newPtoc = new PartialTranslOptColl; // go thru each intermediate trans opt just created const vector<TranslationOption*>& partTransOptList = oldPtoc->GetList(); vector<TranslationOption*>::const_iterator iterPartialTranslOpt; for (iterPartialTranslOpt = partTransOptList.begin() ; iterPartialTranslOpt != partTransOptList.end() ; ++iterPartialTranslOpt) { TranslationOption &inputPartialTranslOpt = **iterPartialTranslOpt; decodeStep.Process(inputPartialTranslOpt , decodeStep , *newPtoc , factorCollection , this , adhereTableLimit); } // last but 1 partial trans not required anymore totalEarlyPruned += newPtoc->GetPrunedCount(); delete oldPtoc; oldPtoc = newPtoc; indexStep++; } // for (++iterStep // add to fully formed translation option list PartialTranslOptColl &lastPartialTranslOptColl = *oldPtoc; const vector<TranslationOption*>& partTransOptList = lastPartialTranslOptColl.GetList(); vector<TranslationOption*>::const_iterator iterColl; for (iterColl = partTransOptList.begin() ; iterColl != partTransOptList.end() ; ++iterColl) { TranslationOption *transOpt = *iterColl; transOpt->CalcScore(); Add(transOpt); } lastPartialTranslOptColl.DetachAll(); totalEarlyPruned += oldPtoc->GetPrunedCount(); delete oldPtoc; // TRACE_ERR( "Early translation options pruned: " << totalEarlyPruned << endl);}/** initialize list of partial translation options by applying the first translation step * Ideally, this function should be in DecodeStepTranslation class */void TranslationOptionCollection::ProcessInitialTranslation( const DecodeStep &decodeStep , FactorCollection &factorCollection , PartialTranslOptColl &outputPartialTranslOptColl , size_t startPos , size_t endPos , bool adhereTableLimit){ const PhraseDictionary &phraseDictionary = decodeStep.GetPhraseDictionary(); const size_t tableLimit = phraseDictionary.GetTableLimit(); const WordsRange wordsRange(startPos, endPos); const TargetPhraseCollection *phraseColl = phraseDictionary.GetTargetPhraseCollection(m_source,wordsRange); if (phraseColl != NULL) { VERBOSE(3,"[" << m_source.GetSubString(wordsRange) << "; " << startPos << "-" << endPos << "]\n"); TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd; iterEnd = (!adhereTableLimit || tableLimit == 0 || phraseColl->GetSize() < tableLimit) ? phraseColl->end() : phraseColl->begin() + tableLimit; for (iterTargetPhrase = phraseColl->begin() ; iterTargetPhrase != iterEnd ; ++iterTargetPhrase) { const TargetPhrase &targetPhrase = **iterTargetPhrase; outputPartialTranslOptColl.Add ( new TranslationOption(wordsRange, targetPhrase) ); VERBOSE(3,"\t" << targetPhrase << "\n"); } VERBOSE(3,endl); }}/** add translation option to the list * \param translationOption translation option to be added */void TranslationOptionCollection::Add(const TranslationOption *translationOption){ const WordsRange &coverage = translationOption->GetSourceWordsRange(); m_collection[coverage.GetStartPos()][coverage.GetEndPos() - coverage.GetStartPos()].push_back(translationOption);}TO_STRING_BODY(TranslationOptionCollection);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?