languagemodelirst.cpp.svn-base

来自「moses开源的机器翻译系统」· SVN-BASE 代码 · 共 223 行

SVN-BASE

223 行

// $Id$/***********************************************************************Moses - factored phrase-based language decoderCopyright (C) 2006 University of EdinburghThis library is free software; you can redistribute it and/ormodify it under the terms of the GNU Lesser General PublicLicense as published by the Free Software Foundation; eitherversion 2.1 of the License, or (at your option) any later version.This library is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNULesser General Public License for more details.You should have received a copy of the GNU Lesser General PublicLicense along with this library; if not, write to the Free SoftwareFoundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA***********************************************************************/#include <cassert>#include <limits>#include <iostream>#include <fstream>#include "dictionary.h"#include "n_gram.h"#include "lmtable.h"#include "lmmacro.h"#include "LanguageModelIRST.h"#include "TypeDef.h"#include "Util.h"#include "FactorCollection.h"#include "Phrase.h"#include "InputFileStream.h"#include "StaticData.h"using namespace std;LanguageModelIRST::LanguageModelIRST(bool registerScore, ScoreIndexManager &scoreIndexManager):LanguageModelSingleFactor(registerScore, scoreIndexManager),m_lmtb(0){}LanguageModelIRST::~LanguageModelIRST(){  delete m_lmtb;  delete m_lmtb_ng;}bool LanguageModelIRST::Load(const std::string &filePath, 			     FactorType factorType, 			     float weight,			     size_t nGramOrder){  char *SepString = " \t\n";  cerr << "In LanguageModelIRST::Load: nGramOrder = " << nGramOrder << "\n";  FactorCollection &factorCollection = FactorCollection::Instance();  m_factorType 	 = factorType;  m_weight			 = weight;  m_nGramOrder	 = nGramOrder;  // get name of LM file and, if any, of the micro-macro map file  char *filenames = strdup(filePath.c_str());  m_filePath = strsep(&filenames, SepString);  // Open the input file (possibly gzipped)  InputFileStream inp(m_filePath);  if (filenames) {    // case LMfile + MAPfile: create an object of lmmacro class and load both LM file and map    cerr << "Loading LM file + MAP\n";    m_mapFilePath = strsep(&filenames, SepString);    if (!FileExists(m_mapFilePath)) {      cerr << "ERROR: Map file <" << m_mapFilePath << "> does not exist\n";      return false;    }    InputFileStream inpMap(m_mapFilePath);    m_lmtb = new lmmacro(m_filePath, inp, inpMap);  } else {    // case (standard) LMfile only: create an object of lmtable    cerr << "Loading LM file (no MAP)\n";    m_lmtb  = (lmtable *)new lmtable;  // Load the (possibly binary) model#ifdef WIN32    m_lmtb->load(inp); //don't use memory map#else    if (m_filePath.compare(m_filePath.size()-3,3,".mm")==0)      m_lmtb->load(inp,m_filePath.c_str(),NULL,1);    else       m_lmtb->load(inp,m_filePath.c_str(),NULL,0);#endif    }  m_lmtb_ng=new ngram(m_lmtb->getDict()); // ngram of words/micro tags  m_lmtb_size=m_lmtb->maxlevel();  // LM can be ok, just outputs warnings  // Mauro: in the original, the following two instructions are wrongly switched:  m_unknownId = m_lmtb->getDict()->oovcode(); // at the level of micro tags  CreateFactors(factorCollection);  VERBOSE(1, "IRST: m_unknownId=" << m_unknownId << std::endl);  //install caches  m_lmtb->init_probcache();  m_lmtb->init_statecache();  m_lmtb->init_lmtcaches(m_lmtb->maxlevel()>2?m_lmtb->maxlevel()-1:2);  return true;}void LanguageModelIRST::CreateFactors(FactorCollection &factorCollection){ // add factors which have srilm id	// code copied & paste from SRI LM class. should do template function	std::map<size_t, int> lmIdMap;	size_t maxFactorId = 0; // to create lookup vector later on		dict_entry *entry;	dictionary_iter iter(m_lmtb->getDict()); // at the level of micro tags	while ( (entry = iter.next()) != NULL)	{		size_t factorId = factorCollection.AddFactor(Output, m_factorType, entry->word)->GetId();		lmIdMap[factorId] = entry->code;		maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;	}		size_t factorId;		m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, BOS_);	factorId = m_sentenceStart->GetId();	m_lmtb_sentenceStart=lmIdMap[factorId] = GetLmID(BOS_);	maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;	m_sentenceStartArray[m_factorType] = m_sentenceStart;	m_sentenceEnd		= factorCollection.AddFactor(Output, m_factorType, EOS_);	factorId = m_sentenceEnd->GetId();	m_lmtb_sentenceEnd=lmIdMap[factorId] = GetLmID(EOS_);	maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;	m_sentenceEndArray[m_factorType] = m_sentenceEnd;		// add to lookup vector in object	m_lmIdLookup.resize(maxFactorId+1);		fill(m_lmIdLookup.begin(), m_lmIdLookup.end(), m_unknownId);	map<size_t, int>::iterator iterMap;	for (iterMap = lmIdMap.begin() ; iterMap != lmIdMap.end() ; ++iterMap)	{		m_lmIdLookup[iterMap->first] = iterMap->second;	}    }int LanguageModelIRST::GetLmID( const std::string &str ) const{  return m_lmtb->getDict()->encode( str.c_str() ); // at the level of micro tags}float LanguageModelIRST::GetValue(const vector<const Word*> &contextFactor, State* finalState, unsigned int* len) const{	unsigned int dummy;	if (!len) { len = &dummy; }	FactorType factorType = GetFactorType();	// set up context	size_t count = contextFactor.size();    	m_lmtb_ng->size=0;	if (count< (size_t)(m_lmtb_size-1)) m_lmtb_ng->pushc(m_lmtb_sentenceEnd);	if (count< (size_t)m_lmtb_size) m_lmtb_ng->pushc(m_lmtb_sentenceStart);  	for (size_t i = 0 ; i < count ; i++)	{	  //int lmId = GetLmID((*contextFactor[i])[factorType]);#ifdef DEBUG	  cout << "i=" << i << " -> " << (*contextFactor[i])[factorType]->GetString() << "\n";#endif	  int lmId = GetLmID((*contextFactor[i])[factorType]->GetString());	  m_lmtb_ng->pushc(lmId);	}  	if (finalState){        		*finalState=(State *)m_lmtb->cmaxsuffptr(*m_lmtb_ng);			// back off stats not currently available		*len = 0;		}	return TransformIRSTScore((float) m_lmtb->clprob(*m_lmtb_ng));}void LanguageModelIRST::CleanUpAfterSentenceProcessing(){  TRACE_ERR( "reset caches\n");  m_lmtb->reset_caches(); #ifndef WIN32  TRACE_ERR( "reset mmap\n");  m_lmtb->reset_mmap();#endif  }void LanguageModelIRST::InitializeBeforeSentenceProcessing(){  //nothing to do#ifdef TRACE_CACHE m_lmtb->sentence_id++;#endif}

languagemodelirst.cpp.svn-base - 源码说明

本页面展示了「moses开源的机器翻译系统」中的 languagemodelirst.cpp.svn-base 源码文件，采用 SVN-BASE 编程语言编写，共 223 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。

虫虫下载站收录了大量与moses相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。

⌨️ 快捷键说明

复制代码Ctrl + C

搜索代码Ctrl + F

全屏模式F11

增大字号Ctrl + =

减小字号Ctrl + -

显示快捷键?