📄 hypothesis.cpp.svn-base

📁 moses开源的机器翻译系统
💻 SVN-BASE
📖 第 1 页 / 共 2 页
字号:
12 下一页
// $Id$// vim:tabstop=2/***********************************************************************Moses - factored phrase-based language decoderCopyright (C) 2006 University of EdinburghThis library is free software; you can redistribute it and/ormodify it under the terms of the GNU Lesser General PublicLicense as published by the Free Software Foundation; eitherversion 2.1 of the License, or (at your option) any later version.This library is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNULesser General Public License for more details.You should have received a copy of the GNU Lesser General PublicLicense along with this library; if not, write to the Free SoftwareFoundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA***********************************************************************/#include <cassert>#include <iostream>#include <limits>#include <vector>#include <algorithm>#include "TranslationOption.h"#include "TranslationOptionCollection.h"#include "DummyScoreProducers.h"#include "Hypothesis.h"#include "Util.h"#include "SquareMatrix.h"#include "LexicalReordering.h"#include "StaticData.h"#include "InputType.h"#include "LMList.h"#include "hash.h"using namespace std;unsigned int Hypothesis::s_HypothesesCreated = 0;#ifdef USE_HYPO_POOL	ObjectPool<Hypothesis> Hypothesis::s_objectPool("Hypothesis", 300000);#endifHypothesis::Hypothesis(InputType const& source, const TargetPhrase &emptyTarget)	: m_prevHypo(NULL)	, m_transOpt(NULL)	, m_targetPhrase(emptyTarget)	, m_sourcePhrase(0)	, m_sourceCompleted(source.GetSize())	, m_sourceInput(source)	, m_currSourceWordsRange(NOT_FOUND, NOT_FOUND)	, m_currTargetWordsRange(NOT_FOUND, NOT_FOUND)	, m_wordDeleted(false)	, m_languageModelStates(StaticData::Instance().GetLMSize(), LanguageModelSingleFactor::UnknownState)	, m_arcList(NULL)	, m_id(0)	, m_lmstats(NULL){	// used for initial seeding of trans process		// initialize scores	//_hash_computed = false;	s_HypothesesCreated = 1;	ResetScore();	}/*** * continue prevHypo by appending the phrases in transOpt */Hypothesis::Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt)	: m_prevHypo(&prevHypo)	, m_transOpt(&transOpt)	, m_targetPhrase(transOpt.GetTargetPhrase())	, m_sourcePhrase(transOpt.GetSourcePhrase())	, m_sourceCompleted				(prevHypo.m_sourceCompleted )	, m_sourceInput						(prevHypo.m_sourceInput)	, m_currSourceWordsRange	(transOpt.GetSourceWordsRange())	, m_currTargetWordsRange	( prevHypo.m_currTargetWordsRange.GetEndPos() + 1														 ,prevHypo.m_currTargetWordsRange.GetEndPos() + transOpt.GetTargetPhrase().GetSize())	, m_wordDeleted(false)	,	m_totalScore(0.0f)	,	m_futureScore(0.0f)	, m_scoreBreakdown				(prevHypo.m_scoreBreakdown)	, m_languageModelStates(prevHypo.m_languageModelStates)	, m_arcList(NULL)	, m_id(s_HypothesesCreated++)	, m_lmstats(NULL){	// assert that we are not extending our hypothesis by retranslating something	// that this hypothesis has already translated!	assert(!m_sourceCompleted.Overlap(m_currSourceWordsRange));		//_hash_computed = false;  m_sourceCompleted.SetValue(m_currSourceWordsRange.GetStartPos(), m_currSourceWordsRange.GetEndPos(), true);  m_wordDeleted = transOpt.IsDeletionOption();	m_scoreBreakdown.PlusEquals(transOpt.GetScoreBreakdown());}Hypothesis::~Hypothesis(){	if (m_arcList) 	{		ArcList::iterator iter;		for (iter = m_arcList->begin() ; iter != m_arcList->end() ; ++iter)		{			FREEHYPO(*iter);		}		m_arcList->clear();		delete m_arcList;		m_arcList = NULL;		delete m_lmstats; m_lmstats = NULL;	}}void Hypothesis::AddArc(Hypothesis *loserHypo){	if (!m_arcList) {		if (loserHypo->m_arcList)  // we don't have an arcList, but loser does		{			this->m_arcList = loserHypo->m_arcList;  // take ownership, we'll delete			loserHypo->m_arcList = 0;                // prevent a double deletion		}		else			{ this->m_arcList = new ArcList(); }	} else {		if (loserHypo->m_arcList) {  // both have an arc list: merge. delete loser			size_t my_size = m_arcList->size();			size_t add_size = loserHypo->m_arcList->size();			this->m_arcList->resize(my_size + add_size, 0);			std::memcpy(&(*m_arcList)[0] + my_size, &(*m_arcList)[0], add_size * sizeof(Hypothesis *));			delete loserHypo->m_arcList;			loserHypo->m_arcList = 0;		} else { // loserHypo doesn't have any arcs		  // DO NOTHING		}	}	m_arcList->push_back(loserHypo);}/*** * return the subclass of Hypothesis most appropriate to the given translation option */Hypothesis* Hypothesis::CreateNext(const TranslationOption &transOpt) const{	return Create(*this, transOpt);}/*** * return the subclass of Hypothesis most appropriate to the given translation option */Hypothesis* Hypothesis::Create(const Hypothesis &prevHypo, const TranslationOption &transOpt){#ifdef USE_HYPO_POOL	Hypothesis *ptr = s_objectPool.getPtr();	return new(ptr) Hypothesis(prevHypo, transOpt);#else	return new Hypothesis(prevHypo, transOpt);#endif}/*** * return the subclass of Hypothesis most appropriate to the given target phrase */Hypothesis* Hypothesis::Create(InputType const& m_source, const TargetPhrase &emptyTarget){#ifdef USE_HYPO_POOL	Hypothesis *ptr = s_objectPool.getPtr();	return new(ptr) Hypothesis(m_source, emptyTarget);#else	return new Hypothesis(m_source, emptyTarget);#endif}/** check, if two hypothesis can be recombined.    this is actually a sorting function that allows us to    keep an ordered list of hypotheses. This makes recombination    much quicker. */int Hypothesis::NGramCompare(const Hypothesis &compare) const{ // -1 = this < compare	// +1 = this > compare	// 0	= this ==compare	if (m_languageModelStates < compare.m_languageModelStates) return -1;	if (m_languageModelStates > compare.m_languageModelStates) return 1;	if (m_sourceCompleted.GetCompressedRepresentation() < compare.m_sourceCompleted.GetCompressedRepresentation()) return -1;	if (m_sourceCompleted.GetCompressedRepresentation() > compare.m_sourceCompleted.GetCompressedRepresentation()) return 1;	if (m_currSourceWordsRange.GetEndPos() < compare.m_currSourceWordsRange.GetEndPos()) return -1;	if (m_currSourceWordsRange.GetEndPos() > compare.m_currSourceWordsRange.GetEndPos()) return 1;	if (! StaticData::Instance().GetSourceStartPosMattersForRecombination()) return 0;	if (m_currSourceWordsRange.GetStartPos() < compare.m_currSourceWordsRange.GetStartPos()) return -1;	if (m_currSourceWordsRange.GetStartPos() > compare.m_currSourceWordsRange.GetStartPos()) return 1;	return 0;}/** Calculates the overall language model score by combining the scores * of language models generated for each of the factors.  Because the factors * represent a variety of tag sets, and because factors with smaller tag sets  * (such as POS instead of words) allow us to calculate richer statistics, we * allow a different length of n-gram to be specified for each factor. * /param lmListInitial todo - describe this parameter  * /param lmListEnd todo - describe this parameter */void Hypothesis::CalcLMScore(const LMList &languageModels){	const size_t startPos	= m_currTargetWordsRange.GetStartPos();	LMList::const_iterator iterLM;	// will be null if LM stats collection is disabled	if (StaticData::Instance().IsComputeLMBackoffStats()) {		m_lmstats = new vector<vector<unsigned int> >(languageModels.size(), vector<unsigned int>(0));	}	size_t lmIdx = 0;	// already have LM scores from previous and trigram score of poss trans.	// just need trigram score of the words of the start of current phrase		for (iterLM = languageModels.begin() ; iterLM != languageModels.end() ; ++iterLM,++lmIdx)	{		const LanguageModel &languageModel = **iterLM;		size_t nGramOrder			= languageModel.GetNGramOrder();		size_t currEndPos			= m_currTargetWordsRange.GetEndPos();		float lmScore;		size_t nLmCallCount = 0;		if(m_currTargetWordsRange.GetNumWordsCovered() == 0) {			lmScore = 0; //the score associated with dropping source words is not part of the language model		} else { //non-empty target phrase			if (m_lmstats)				(*m_lmstats)[lmIdx].resize(m_currTargetWordsRange.GetNumWordsCovered(), 0);			// 1st n-gram			vector<const Word*> contextFactor(nGramOrder);			size_t index = 0;			for (int currPos = (int) startPos - (int) nGramOrder + 1 ; currPos <= (int) startPos ; currPos++)			{				if (currPos >= 0)					contextFactor[index++] = &GetWord(currPos);				else								contextFactor[index++] = &languageModel.GetSentenceStartArray();			}			lmScore	= languageModel.GetValue(contextFactor);			if (m_lmstats) { languageModel.GetState(contextFactor, &(*m_lmstats)[lmIdx][nLmCallCount++]); }			//cout<<"context factor: "<<languageModel.GetValue(contextFactor)<<endl;			// main loop			size_t endPos = std::min(startPos + nGramOrder - 2															, currEndPos);			for (size_t currPos = startPos + 1 ; currPos <= endPos ; currPos++)			{				// shift all args down 1 place				for (size_t i = 0 ; i < nGramOrder - 1 ; i++)					contextFactor[i] = contextFactor[i + 1];					// add last factor				contextFactor.back() = &GetWord(currPos);				lmScore	+= languageModel.GetValue(contextFactor);				if (m_lmstats) 					languageModel.GetState(contextFactor, &(*m_lmstats)[lmIdx][nLmCallCount++]);
12 下一页
💿 文件大小 8836 K
👤 上传用户 myhpgnl
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#moses #开源 #机器翻译系统
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -