phrasedictionarymemory.cpp.svn-base

来自「moses开源的机器翻译系统」· SVN-BASE 代码 · 共 201 行

SVN-BASE
201
字号
// $Id$// vim:tabstop=2/***********************************************************************Moses - factored phrase-based language decoderCopyright (C) 2006 University of EdinburghThis library is free software; you can redistribute it and/ormodify it under the terms of the GNU Lesser General PublicLicense as published by the Free Software Foundation; eitherversion 2.1 of the License, or (at your option) any later version.This library is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNULesser General Public License for more details.You should have received a copy of the GNU Lesser General PublicLicense along with this library; if not, write to the Free SoftwareFoundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA***********************************************************************/#include <fstream>#include <string>#include <iterator>#include <algorithm>#include <sys/stat.h>#include "PhraseDictionaryMemory.h"#include "FactorCollection.h"#include "Word.h"#include "Util.h"#include "InputFileStream.h"#include "StaticData.h"#include "WordsRange.h"#include "UserMessage.h"using namespace std;bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input																			, const std::vector<FactorType> &output																			, const string &filePath																			, const vector<float> &weight																			, size_t tableLimit																			, const LMList &languageModels														          , float weightWP){	const StaticData &staticData = StaticData::Instance();	m_tableLimit = tableLimit;	m_filePath = filePath;	//factors		m_inputFactors = FactorMask(input);	m_outputFactors = FactorMask(output);	VERBOSE(2,"PhraseDictionaryMemory: input=" << m_inputFactors << "  output=" << m_outputFactors << std::endl);	// data from file	InputFileStream inFile(filePath);	// create hash file if necessary	ofstream tempFile;	string tempFilePath;	vector< vector<string> >	phraseVector;	string line, prevSourcePhrase = "";	size_t count = 0;  size_t line_num = 0;  size_t numElement = NOT_FOUND; // 3=old format, 5=async format which include word alignment info  	while(getline(inFile, line)) 	{		++line_num;		vector<string> tokens = TokenizeMultiCharSeparator( line , "|||" );				if (numElement == NOT_FOUND) 		{ // init numElement			numElement = tokens.size();			assert(numElement == 3 || numElement == 5);		}			 		if (tokens.size() != numElement)		{			stringstream strme;			strme << "Syntax error at " << filePath << ":" << line_num;			UserMessage::Add(strme.str());			return false;		}		bool isLHSEmpty = (tokens[1].find_first_not_of(" \t", 0) == string::npos);		if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) {			TRACE_ERR( filePath << ":" << line_num << ": pt entry contains empty target, skipping\n");			continue;		}		const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();		if (tokens[0] != prevSourcePhrase)			phraseVector = Phrase::Parse(tokens[0], input, factorDelimiter);		vector<float> scoreVector = Tokenize<float>(tokens[(numElement==3) ? 2 : 4]);		if (scoreVector.size() != m_numScoreComponent) 		{			stringstream strme;			strme << "Size of scoreVector != number (" <<scoreVector.size() << "!=" <<m_numScoreComponent<<") of score components on line " << line_num;			UserMessage::Add(strme.str());			return false;		}//		assert(scoreVector.size() == m_numScoreComponent);					// source		Phrase sourcePhrase(Input);		sourcePhrase.CreateFromString( input, phraseVector);		//target		TargetPhrase targetPhrase(Output);		targetPhrase.CreateFromString( output, tokens[1], factorDelimiter);		// component score, for n-best output		std::vector<float> scv(scoreVector.size());		std::transform(scoreVector.begin(),scoreVector.end(),scv.begin(),TransformScore);		std::transform(scv.begin(),scv.end(),scv.begin(),FloorScore);		targetPhrase.SetScore(this, scv, weight, weightWP, languageModels);		AddEquivPhrase(sourcePhrase, targetPhrase);		count++;	}	// sort each target phrase collection	m_collection.Sort(m_tableLimit);	return true;}TargetPhraseCollection *PhraseDictionaryMemory::CreateTargetPhraseCollection(const Phrase &source){	const size_t size = source.GetSize();		PhraseDictionaryNode *currNode = &m_collection;	for (size_t pos = 0 ; pos < size ; ++pos)	{		const Word& word = source.GetWord(pos);		currNode = currNode->GetOrCreateChild(word);		if (currNode == NULL)			return NULL;	}	return currNode->CreateTargetPhraseCollection();}void PhraseDictionaryMemory::AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase){	TargetPhraseCollection &phraseColl = *CreateTargetPhraseCollection(source);	phraseColl.Add(new TargetPhrase(targetPhrase));}const TargetPhraseCollection *PhraseDictionaryMemory::GetTargetPhraseCollection(const Phrase &source) const{ // exactly like CreateTargetPhraseCollection, but don't create	const size_t size = source.GetSize();		const PhraseDictionaryNode *currNode = &m_collection;	for (size_t pos = 0 ; pos < size ; ++pos)	{		const Word& word = source.GetWord(pos);		currNode = currNode->GetChild(word);		if (currNode == NULL)			return NULL;	}	return currNode->GetTargetPhraseCollection();}PhraseDictionaryMemory::~PhraseDictionaryMemory(){}void PhraseDictionaryMemory::SetWeightTransModel(const vector<float> &weightT){	PhraseDictionaryNode::iterator iterDict;	for (iterDict = m_collection.begin() ; iterDict != m_collection.end() ; ++iterDict)	{		PhraseDictionaryNode &phraseDictionaryNode = iterDict->second;		// recursively set weights in nodes		phraseDictionaryNode.SetWeightTransModel(this, weightT);	}}TO_STRING_BODY(PhraseDictionaryMemory);// friendostream& operator<<(ostream& out, const PhraseDictionaryMemory& phraseDict){	const PhraseDictionaryNode &coll = phraseDict.m_collection;	PhraseDictionaryNode::const_iterator iter;		for (iter = coll.begin() ; iter != coll.end() ; ++iter)	{		const Word &word = (*iter).first;		out << word;	}	return out;}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?