translationoptioncollection.cpp.svn-base
来自「moses开源的机器翻译系统」· SVN-BASE 代码 · 共 593 行 · 第 1/2 页
SVN-BASE
593 行
} IFVERBOSE(3) { int total = 0; for(size_t row=0; row<size; row++) { size_t maxSize = size - row; size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength(); maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase; for(size_t col=row; col<row+maxSize; col++) { int count = GetTranslationOptionList(row, col).size(); TRACE_ERR( "translation options spanning from " << row <<" to "<< col <<" is " << count <<endl); total += count; } } TRACE_ERR( "translation options generated in total: "<< total << endl); for(size_t row=0; row<size; row++) for(size_t col=row; col<size; col++) TRACE_ERR( "future cost from "<< row <<" to "<< col <<" is "<< m_futureScore.GetScore(row, col) <<endl); }}/** Create all possible translations from the phrase tables * for a particular input sentence. This implies applying all * translation and generation steps. Also computes future cost matrix. * \param decodeStepList list of decoding steps * \param factorCollection input sentence with all factors */void TranslationOptionCollection::CreateTranslationOptions(const vector <DecodeGraph*> &decodeStepVL){ // loop over all substrings of the source sentence, look them up // in the phraseDictionary (which is the- possibly filtered-- phrase // table loaded on initialization), generate TranslationOption objects // for all phrases for (size_t startVL = 0 ; startVL < decodeStepVL.size() ; startVL++) { const DecodeGraph &decodeStepList = *decodeStepVL[startVL]; for (size_t startPos = 0 ; startPos < m_source.GetSize() ; startPos++) { size_t maxSize = m_source.GetSize() - startPos; size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength(); maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase; for (size_t endPos = startPos ; endPos < startPos + maxSize ; endPos++) { CreateTranslationOptionsForRange( decodeStepList, startPos, endPos, true); } } } VERBOSE(3,"Translation Option Collection\n " << *this << endl); ProcessUnknownWord(decodeStepVL); // Prune Prune(); // future score matrix CalcFutureScore(); // Cached lex reodering costs CacheLexReordering();}/** create translation options that exactly cover a specific input span. * Called by CreateTranslationOptions() and ProcessUnknownWord() * \param decodeStepList list of decoding steps * \param factorCollection input sentence with all factors * \param startPos first position in input sentence * \param lastPos last position in input sentence * \param adhereTableLimit whether phrase & generation table limits are adhered to */void TranslationOptionCollection::CreateTranslationOptionsForRange( const DecodeGraph &decodeStepList , size_t startPos , size_t endPos , bool adhereTableLimit){ if ((StaticData::Instance().GetXmlInputType() != XmlExclusive) || !HasXmlOptionsOverlappingRange(startPos,endPos)) { Phrase *sourcePhrase = NULL; // can't initialise with substring, in case it's confusion network // consult persistent (cross-sentence) cache for stored translation options bool skipTransOptCreation = false , useCache = StaticData::Instance().GetUseTransOptCache(); if (useCache) { const WordsRange wordsRange(startPos, endPos); sourcePhrase = new Phrase(m_source.GetSubString(wordsRange)); const TranslationOptionList *transOptList = StaticData::Instance().FindTransOptListInCache(*sourcePhrase); // is phrase in cache? if (transOptList != NULL) { skipTransOptCreation = true; TranslationOptionList::const_iterator iterTransOpt; for (iterTransOpt = transOptList->begin() ; iterTransOpt != transOptList->end() ; ++iterTransOpt) { TranslationOption *transOpt = new TranslationOption(**iterTransOpt, wordsRange); Add(transOpt); } } } // useCache if (!skipTransOptCreation) { // partial trans opt stored in here PartialTranslOptColl* oldPtoc = new PartialTranslOptColl; size_t totalEarlyPruned = 0; // initial translation step list <const DecodeStep* >::const_iterator iterStep = decodeStepList.begin(); const DecodeStep &decodeStep = **iterStep; static_cast<const DecodeStepTranslation&>(decodeStep).ProcessInitialTranslation (m_source, *oldPtoc , startPos, endPos, adhereTableLimit ); // do rest of decode steps int indexStep = 0; for (++iterStep ; iterStep != decodeStepList.end() ; ++iterStep) { const DecodeStep &decodeStep = **iterStep; PartialTranslOptColl* newPtoc = new PartialTranslOptColl; // go thru each intermediate trans opt just created const vector<TranslationOption*>& partTransOptList = oldPtoc->GetList(); vector<TranslationOption*>::const_iterator iterPartialTranslOpt; for (iterPartialTranslOpt = partTransOptList.begin() ; iterPartialTranslOpt != partTransOptList.end() ; ++iterPartialTranslOpt) { TranslationOption &inputPartialTranslOpt = **iterPartialTranslOpt; decodeStep.Process(inputPartialTranslOpt , decodeStep , *newPtoc , this , adhereTableLimit); } // last but 1 partial trans not required anymore totalEarlyPruned += newPtoc->GetPrunedCount(); delete oldPtoc; oldPtoc = newPtoc; indexStep++; } // for (++iterStep // add to fully formed translation option list PartialTranslOptColl &lastPartialTranslOptColl = *oldPtoc; const vector<TranslationOption*>& partTransOptList = lastPartialTranslOptColl.GetList(); vector<TranslationOption*>::const_iterator iterColl; for (iterColl = partTransOptList.begin() ; iterColl != partTransOptList.end() ; ++iterColl) { TranslationOption *transOpt = *iterColl; transOpt->CalcScore(); Add(transOpt); } // storing translation options in persistent cache (kept across sentences) if (useCache) { if (partTransOptList.size() > 0) { vector<TranslationOption*> cachedTransOptList = GetTranslationOptionList(startPos, endPos); vector<TranslationOption*>::iterator iterList; for (size_t i = 0 ; i < cachedTransOptList.size() ; ++i) { cachedTransOptList[i] = new TranslationOption(*cachedTransOptList[i]); } StaticData::Instance().AddTransOptListToCache(*sourcePhrase, cachedTransOptList); } } lastPartialTranslOptColl.DetachAll(); totalEarlyPruned += oldPtoc->GetPrunedCount(); delete oldPtoc; // TRACE_ERR( "Early translation options pruned: " << totalEarlyPruned << endl); } // if (!skipTransOptCreation) if (useCache) delete sourcePhrase; } // if ((StaticData::Instance().GetXmlInputType() != XmlExclusive) || !HasXmlOptionsOverlappingRange(startPos,endPos)) if ((StaticData::Instance().GetXmlInputType() != XmlPassThrough) && HasXmlOptionsOverlappingRange(startPos,endPos)) { CreateXmlOptionsForRange(startPos, endPos); } } /** Check if this range overlaps with any XML options. This doesn't need to be an exact match, only an overlap. * by default, we don't support XML options. subclasses need to override this function. * called by CreateTranslationOptionsForRange() * \param startPos first position in input sentence * \param lastPos last position in input sentence * \param adhereTableLimit whether phrase & generation table limits are adhered to */ bool TranslationOptionCollection::HasXmlOptionsOverlappingRange(size_t, size_t) const { return false; } /** Populates the current Collection with XML options exactly covering the range specified. Default implementation does nothing. * called by CreateTranslationOptionsForRange() * \param startPos first position in input sentence * \param lastPos last position in input sentence */ void TranslationOptionCollection::CreateXmlOptionsForRange(size_t, size_t) { //not implemented for base class };/** add translation option to the list * \param translationOption translation option to be added */void TranslationOptionCollection::Add(TranslationOption *translationOption){ const WordsRange &coverage = translationOption->GetSourceWordsRange(); m_collection[coverage.GetStartPos()][coverage.GetEndPos() - coverage.GetStartPos()].push_back(translationOption);}TO_STRING_BODY(TranslationOptionCollection);inline std::ostream& operator<<(std::ostream& out, const TranslationOptionCollection& coll){ size_t size = coll.GetSize(); for (size_t startPos = 0 ; startPos < size ; ++startPos) { size_t maxSize = size - startPos; size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength(); maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase; for (size_t endPos = startPos ; endPos < startPos + maxSize ; ++endPos) { TranslationOptionList fullList = coll.GetTranslationOptionList(startPos, endPos); size_t sizeFull = fullList.size(); for (size_t i = 0; i < sizeFull; i++) { out << *fullList[i] << std::endl; } } } //std::vector< std::vector< TranslationOptionList > >::const_iterator i = coll.m_collection.begin(); //size_t j = 0; //for (; i!=coll.m_collection.end(); ++i) { //out << "s[" << j++ << "].size=" << i->size() << std::endl; //} return out;}void TranslationOptionCollection::CacheLexReordering(){ const std::vector<LexicalReordering*> &lexReorderingModels = StaticData::Instance().GetReorderModels(); std::vector<LexicalReordering*>::const_iterator iterLexreordering; for (iterLexreordering = lexReorderingModels.begin() ; iterLexreordering != lexReorderingModels.end() ; ++iterLexreordering) { LexicalReordering &lexreordering = **iterLexreordering; for (size_t startPos = 0 ; startPos < m_source.GetSize() ; startPos++) { size_t maxSize = m_source.GetSize() - startPos; size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength(); maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase; for (size_t endPos = startPos ; endPos < startPos + maxSize; endPos++) { TranslationOptionList &transOptList = GetTranslationOptionList( startPos, endPos); TranslationOptionList::iterator iterTransOpt; for(iterTransOpt = transOptList.begin() ; iterTransOpt != transOptList.end() ; ++iterTransOpt) { TranslationOption &transOpt = **iterTransOpt; const Phrase *sourcePhrase = transOpt.GetSourcePhrase(); if (sourcePhrase) { Score score = lexreordering.GetProb(*sourcePhrase , transOpt.GetTargetPhrase()); // TODO should have better handling of unknown reordering entries if (!score.empty()) transOpt.CacheReorderingProb(lexreordering, score); } } } } }}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?