📄 hypothesis.cpp.svn-base
字号:
// $Id$// vim:tabstop=2/***********************************************************************Moses - factored phrase-based language decoderCopyright (C) 2006 University of EdinburghThis library is free software; you can redistribute it and/ormodify it under the terms of the GNU Lesser General PublicLicense as published by the Free Software Foundation; eitherversion 2.1 of the License, or (at your option) any later version.This library is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNULesser General Public License for more details.You should have received a copy of the GNU Lesser General PublicLicense along with this library; if not, write to the Free SoftwareFoundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA***********************************************************************/#include <cassert>#include <iostream>#include <limits>#include <vector>#include <algorithm>#include "TranslationOption.h"#include "TranslationOptionCollection.h"#include "DummyScoreProducers.h"#include "Hypothesis.h"#include "Util.h"#include "SquareMatrix.h"#include "LexicalReordering.h"#include "StaticData.h"#include "InputType.h"#include "LMList.h"#include "hash.h"using namespace std;unsigned int Hypothesis::s_HypothesesCreated = 0;#ifdef USE_HYPO_POOL ObjectPool<Hypothesis> Hypothesis::s_objectPool("Hypothesis", 300000);#endifHypothesis::Hypothesis(InputType const& source, const TargetPhrase &emptyTarget) : m_prevHypo(NULL) , m_transOpt(NULL) , m_targetPhrase(emptyTarget) , m_sourcePhrase(0) , m_sourceCompleted(source.GetSize()) , m_sourceInput(source) , m_currSourceWordsRange(NOT_FOUND, NOT_FOUND) , m_currTargetWordsRange(NOT_FOUND, NOT_FOUND) , m_wordDeleted(false) , m_languageModelStates(StaticData::Instance().GetLMSize(), LanguageModelSingleFactor::UnknownState) , m_arcList(NULL) , m_id(0) , m_lmstats(NULL){ // used for initial seeding of trans process // initialize scores //_hash_computed = false; s_HypothesesCreated = 1; ResetScore(); }/*** * continue prevHypo by appending the phrases in transOpt */Hypothesis::Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt) : m_prevHypo(&prevHypo) , m_transOpt(&transOpt) , m_targetPhrase(transOpt.GetTargetPhrase()) , m_sourcePhrase(transOpt.GetSourcePhrase()) , m_sourceCompleted (prevHypo.m_sourceCompleted ) , m_sourceInput (prevHypo.m_sourceInput) , m_currSourceWordsRange (transOpt.GetSourceWordsRange()) , m_currTargetWordsRange ( prevHypo.m_currTargetWordsRange.GetEndPos() + 1 ,prevHypo.m_currTargetWordsRange.GetEndPos() + transOpt.GetTargetPhrase().GetSize()) , m_wordDeleted(false) , m_totalScore(0.0f) , m_futureScore(0.0f) , m_scoreBreakdown (prevHypo.m_scoreBreakdown) , m_languageModelStates(prevHypo.m_languageModelStates) , m_arcList(NULL) , m_id(s_HypothesesCreated++) , m_lmstats(NULL){ // assert that we are not extending our hypothesis by retranslating something // that this hypothesis has already translated! assert(!m_sourceCompleted.Overlap(m_currSourceWordsRange)); //_hash_computed = false; m_sourceCompleted.SetValue(m_currSourceWordsRange.GetStartPos(), m_currSourceWordsRange.GetEndPos(), true); m_wordDeleted = transOpt.IsDeletionOption(); m_scoreBreakdown.PlusEquals(transOpt.GetScoreBreakdown());}Hypothesis::~Hypothesis(){ if (m_arcList) { ArcList::iterator iter; for (iter = m_arcList->begin() ; iter != m_arcList->end() ; ++iter) { FREEHYPO(*iter); } m_arcList->clear(); delete m_arcList; m_arcList = NULL; delete m_lmstats; m_lmstats = NULL; }}void Hypothesis::AddArc(Hypothesis *loserHypo){ if (!m_arcList) { if (loserHypo->m_arcList) // we don't have an arcList, but loser does { this->m_arcList = loserHypo->m_arcList; // take ownership, we'll delete loserHypo->m_arcList = 0; // prevent a double deletion } else { this->m_arcList = new ArcList(); } } else { if (loserHypo->m_arcList) { // both have an arc list: merge. delete loser size_t my_size = m_arcList->size(); size_t add_size = loserHypo->m_arcList->size(); this->m_arcList->resize(my_size + add_size, 0); std::memcpy(&(*m_arcList)[0] + my_size, &(*m_arcList)[0], add_size * sizeof(Hypothesis *)); delete loserHypo->m_arcList; loserHypo->m_arcList = 0; } else { // loserHypo doesn't have any arcs // DO NOTHING } } m_arcList->push_back(loserHypo);}/*** * return the subclass of Hypothesis most appropriate to the given translation option */Hypothesis* Hypothesis::CreateNext(const TranslationOption &transOpt) const{ return Create(*this, transOpt);}/*** * return the subclass of Hypothesis most appropriate to the given translation option */Hypothesis* Hypothesis::Create(const Hypothesis &prevHypo, const TranslationOption &transOpt){#ifdef USE_HYPO_POOL Hypothesis *ptr = s_objectPool.getPtr(); return new(ptr) Hypothesis(prevHypo, transOpt);#else return new Hypothesis(prevHypo, transOpt);#endif}/*** * return the subclass of Hypothesis most appropriate to the given target phrase */Hypothesis* Hypothesis::Create(InputType const& m_source, const TargetPhrase &emptyTarget){#ifdef USE_HYPO_POOL Hypothesis *ptr = s_objectPool.getPtr(); return new(ptr) Hypothesis(m_source, emptyTarget);#else return new Hypothesis(m_source, emptyTarget);#endif}/** check, if two hypothesis can be recombined. this is actually a sorting function that allows us to keep an ordered list of hypotheses. This makes recombination much quicker. */int Hypothesis::NGramCompare(const Hypothesis &compare) const{ // -1 = this < compare // +1 = this > compare // 0 = this ==compare if (m_languageModelStates < compare.m_languageModelStates) return -1; if (m_languageModelStates > compare.m_languageModelStates) return 1; if (m_sourceCompleted.GetCompressedRepresentation() < compare.m_sourceCompleted.GetCompressedRepresentation()) return -1; if (m_sourceCompleted.GetCompressedRepresentation() > compare.m_sourceCompleted.GetCompressedRepresentation()) return 1; if (m_currSourceWordsRange.GetEndPos() < compare.m_currSourceWordsRange.GetEndPos()) return -1; if (m_currSourceWordsRange.GetEndPos() > compare.m_currSourceWordsRange.GetEndPos()) return 1; if (! StaticData::Instance().GetSourceStartPosMattersForRecombination()) return 0; if (m_currSourceWordsRange.GetStartPos() < compare.m_currSourceWordsRange.GetStartPos()) return -1; if (m_currSourceWordsRange.GetStartPos() > compare.m_currSourceWordsRange.GetStartPos()) return 1; return 0;}/** Calculates the overall language model score by combining the scores * of language models generated for each of the factors. Because the factors * represent a variety of tag sets, and because factors with smaller tag sets * (such as POS instead of words) allow us to calculate richer statistics, we * allow a different length of n-gram to be specified for each factor. * /param lmListInitial todo - describe this parameter * /param lmListEnd todo - describe this parameter */void Hypothesis::CalcLMScore(const LMList &languageModels){ const size_t startPos = m_currTargetWordsRange.GetStartPos(); LMList::const_iterator iterLM; // will be null if LM stats collection is disabled if (StaticData::Instance().IsComputeLMBackoffStats()) { m_lmstats = new vector<vector<unsigned int> >(languageModels.size(), vector<unsigned int>(0)); } size_t lmIdx = 0; // already have LM scores from previous and trigram score of poss trans. // just need trigram score of the words of the start of current phrase for (iterLM = languageModels.begin() ; iterLM != languageModels.end() ; ++iterLM,++lmIdx) { const LanguageModel &languageModel = **iterLM; size_t nGramOrder = languageModel.GetNGramOrder(); size_t currEndPos = m_currTargetWordsRange.GetEndPos(); float lmScore; size_t nLmCallCount = 0; if(m_currTargetWordsRange.GetNumWordsCovered() == 0) { lmScore = 0; //the score associated with dropping source words is not part of the language model } else { //non-empty target phrase if (m_lmstats) (*m_lmstats)[lmIdx].resize(m_currTargetWordsRange.GetNumWordsCovered(), 0); // 1st n-gram vector<const Word*> contextFactor(nGramOrder); size_t index = 0; for (int currPos = (int) startPos - (int) nGramOrder + 1 ; currPos <= (int) startPos ; currPos++) { if (currPos >= 0) contextFactor[index++] = &GetWord(currPos); else contextFactor[index++] = &languageModel.GetSentenceStartArray(); } lmScore = languageModel.GetValue(contextFactor); if (m_lmstats) { languageModel.GetState(contextFactor, &(*m_lmstats)[lmIdx][nLmCallCount++]); } //cout<<"context factor: "<<languageModel.GetValue(contextFactor)<<endl; // main loop size_t endPos = std::min(startPos + nGramOrder - 2 , currEndPos); for (size_t currPos = startPos + 1 ; currPos <= endPos ; currPos++) { // shift all args down 1 place for (size_t i = 0 ; i < nGramOrder - 1 ; i++) contextFactor[i] = contextFactor[i + 1]; // add last factor contextFactor.back() = &GetWord(currPos); lmScore += languageModel.GetValue(contextFactor); if (m_lmstats) languageModel.GetState(contextFactor, &(*m_lmstats)[lmIdx][nLmCallCount++]);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -