⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 phrase.cpp.svn-base

📁 moses开源的机器翻译系统
💻 SVN-BASE
字号:
 // $Id$// vim:tabstop=2/***********************************************************************Moses - factored phrase-based language decoderCopyright (C) 2006 University of EdinburghThis library is free software; you can redistribute it and/ormodify it under the terms of the GNU Lesser General PublicLicense as published by the Free Software Foundation; eitherversion 2.1 of the License, or (at your option) any later version.This library is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNULesser General Public License for more details.You should have received a copy of the GNU Lesser General PublicLicense along with this library; if not, write to the Free SoftwareFoundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA***********************************************************************/#include <cassert>#include <algorithm>#include <sstream>#include <string>#include "memory.h"#include "FactorCollection.h"#include "Phrase.h"#include "StaticData.h"  // GetMaxNumFactorsusing namespace std;Phrase::Phrase(const Phrase &copy):m_direction(copy.m_direction),m_phraseSize(copy.m_phraseSize),m_arraySize(copy.m_arraySize),m_words(copy.m_words){}Phrase& Phrase::operator=(const Phrase& x) {	if(this!=&x)		{			m_direction=x.m_direction;			m_phraseSize=x.m_phraseSize;			m_arraySize=x.m_arraySize;			m_words = x.m_words;		}	return *this;}Phrase::Phrase(FactorDirection direction)	: m_direction(direction)	, m_phraseSize(0)	, m_arraySize(ARRAY_SIZE_INCR)	, m_words(ARRAY_SIZE_INCR){}Phrase::Phrase(FactorDirection direction, const vector< const Word* > &mergeWords):m_direction(direction),m_phraseSize(0),m_arraySize(ARRAY_SIZE_INCR),m_words(ARRAY_SIZE_INCR){	for (size_t currPos = 0 ; currPos < mergeWords.size() ; currPos++)	{		AddWord(*mergeWords[currPos]);	}}Phrase::~Phrase(){}void Phrase::MergeFactors(const Phrase &copy){	assert(GetSize() == copy.GetSize());	size_t size = GetSize();	const size_t maxNumFactors = StaticData::Instance().GetMaxNumFactors(this->GetDirection());	for (size_t currPos = 0 ; currPos < size ; currPos++)	{		for (unsigned int currFactor = 0 ; currFactor < maxNumFactors ; currFactor++)		{			FactorType factorType = static_cast<FactorType>(currFactor);			const Factor *factor = copy.GetFactor(currPos, factorType);			if (factor != NULL)				SetFactor(currPos, factorType, factor);		}	}}void Phrase::MergeFactors(const Phrase &copy, FactorType factorType){	assert(GetSize() == copy.GetSize());	for (size_t currPos = 0 ; currPos < GetSize() ; currPos++)			SetFactor(currPos, factorType, copy.GetFactor(currPos, factorType));}void Phrase::MergeFactors(const Phrase &copy, const std::vector<FactorType>& factorVec){	assert(GetSize() == copy.GetSize());	for (size_t currPos = 0 ; currPos < GetSize() ; currPos++)		for (std::vector<FactorType>::const_iterator i = factorVec.begin();		     i != factorVec.end(); ++i)		{			SetFactor(currPos, *i, copy.GetFactor(currPos, *i));		}}Phrase Phrase::GetSubString(const WordsRange &wordsRange) const{	Phrase retPhrase(m_direction);	for (size_t currPos = wordsRange.GetStartPos() ; currPos <= wordsRange.GetEndPos() ; currPos++)	{		Word &word = retPhrase.AddWord();		word = GetWord(currPos);	}	return retPhrase;}std::string Phrase::GetStringRep(const vector<FactorType> factorsToPrint) const{	Phrase retPhrase(m_direction);	stringstream strme;	for (size_t pos = 0 ; pos < GetSize() ; pos++)	{		strme << GetWord(pos).GetString(factorsToPrint, true);	}	return strme.str();}Word &Phrase::AddWord(){	if ((m_phraseSize+1) % ARRAY_SIZE_INCR == 0)	{ // need to expand array		m_arraySize += ARRAY_SIZE_INCR;		m_words.resize(m_arraySize);	}	return m_words[m_phraseSize++];}void Phrase::Append(const Phrase &endPhrase){		for (size_t i = 0; i < endPhrase.GetSize();i++){		AddWord(endPhrase.GetWord(i));		}}vector< vector<string> > Phrase::Parse(const std::string &phraseString, const std::vector<FactorType> &factorOrder, const std::string& factorDelimiter){	bool isMultiCharDelimiter = factorDelimiter.size() > 1;	// parse	vector< vector<string> > phraseVector;	vector<string> annotatedWordVector = Tokenize(phraseString);	// KOMMA|none ART|Def.Z NN|Neut.NotGen.Sg VVFIN|none 	//		to	// "KOMMA|none" "ART|Def.Z" "NN|Neut.NotGen.Sg" "VVFIN|none"	for (size_t phrasePos = 0 ; phrasePos < annotatedWordVector.size() ; phrasePos++)	{		string &annotatedWord = annotatedWordVector[phrasePos];		vector<string> factorStrVector;		if (isMultiCharDelimiter) {			factorStrVector = TokenizeMultiCharSeparator(annotatedWord, factorDelimiter);		} else {			factorStrVector = Tokenize(annotatedWord, factorDelimiter);		}		// KOMMA|none		//    to		// "KOMMA" "none"		if (factorStrVector.size() != factorOrder.size()) {			TRACE_ERR( "[ERROR] Malformed input at " << /*StaticData::Instance().GetCurrentInputPosition() <<*/ std::endl			          << "  Expected input to have words composed of " << factorOrder.size() << " factor(s) (form FAC1|FAC2|...)" << std::endl								<< "  but instead received input with " << factorStrVector.size() << " factor(s).\n");			abort();		}		phraseVector.push_back(factorStrVector);	}	return phraseVector;}void Phrase::CreateFromString(const std::vector<FactorType> &factorOrder															, const vector< vector<string> > &phraseVector){	FactorCollection &factorCollection = FactorCollection::Instance();	for (size_t phrasePos = 0 ; phrasePos < phraseVector.size() ; phrasePos++)	{		// add word this phrase		Word &word = AddWord();		for (size_t currFactorIndex= 0 ; currFactorIndex < factorOrder.size() ; currFactorIndex++)		{			FactorType factorType = factorOrder[currFactorIndex];			const string &factorStr = phraseVector[phrasePos][currFactorIndex];			const Factor *factor = factorCollection.AddFactor(m_direction, factorType, factorStr); 			word[factorType] = factor;		}	}}void Phrase::CreateFromString(const std::vector<FactorType> &factorOrder															, const string &phraseString															, const string &factorDelimiter){	vector< vector<string> > phraseVector = Parse(phraseString, factorOrder, factorDelimiter);	CreateFromString(factorOrder, phraseVector);}bool Phrase::operator < (const Phrase &compare) const{	#ifdef min#undef min#endif	size_t thisSize			= GetSize()				,compareSize	= compare.GetSize();	// decide by using length. quick decision	if (thisSize != compareSize)	{		return thisSize < compareSize;	}	else	{		size_t minSize = std::min( thisSize , compareSize );		const size_t maxNumFactors = StaticData::Instance().GetMaxNumFactors(this->GetDirection());		// taken from word.Compare()		for (size_t i = 0 ; i < maxNumFactors ; i++)		{			FactorType factorType = static_cast<FactorType>(i);			for (size_t currPos = 0 ; currPos < minSize ; currPos++)			{				const Factor *thisFactor		= GetFactor(currPos, factorType)										,*compareFactor	= compare.GetFactor(currPos, factorType);				if (thisFactor != NULL && compareFactor != NULL)				{					const int result = thisFactor->Compare(*compareFactor);					if (result == 0)					{						continue;					}					else 					{						return (result < 0);					}				}			}		}		// identical		return false;	}}bool Phrase::Contains(const vector< vector<string> > &subPhraseVector										, const vector<FactorType> &inputFactor) const{	const size_t subSize = subPhraseVector.size()							,thisSize= GetSize();	if (subSize > thisSize)		return false;	// try to match word-for-word	for (size_t currStartPos = 0 ; currStartPos < (thisSize - subSize + 1) ; currStartPos++)	{		bool match = true;		for (size_t currFactorIndex = 0 ; currFactorIndex < inputFactor.size() ; currFactorIndex++)		{			FactorType factorType = inputFactor[currFactorIndex];			for (size_t currSubPos = 0 ; currSubPos < subSize ; currSubPos++)			{				size_t currThisPos = currSubPos + currStartPos;				const string &subStr	= subPhraseVector[currSubPos][currFactorIndex]										,&thisStr	= GetFactor(currThisPos, factorType)->GetString();				if (subStr != thisStr)				{					match = false;					break;				}			}			if (!match)				break;		}		if (match)			return true;	}	return false;}bool Phrase::IsCompatible(const Phrase &inputPhrase) const{	if (inputPhrase.GetSize() != GetSize())	{		return false;	}	const size_t size = GetSize();	const size_t maxNumFactors = StaticData::Instance().GetMaxNumFactors(this->GetDirection());	for (size_t currPos = 0 ; currPos < size ; currPos++)	{		for (unsigned int currFactor = 0 ; currFactor < maxNumFactors ; currFactor++)		{			FactorType factorType = static_cast<FactorType>(currFactor);			const Factor *thisFactor 		= GetFactor(currPos, factorType)									,*inputFactor	= inputPhrase.GetFactor(currPos, factorType);			if (thisFactor != NULL && inputFactor != NULL && thisFactor != inputFactor)				return false;		}	}	return true;}bool Phrase::IsCompatible(const Phrase &inputPhrase, FactorType factorType) const{	if (inputPhrase.GetSize() != GetSize())	{ return false;	}	for (size_t currPos = 0 ; currPos < GetSize() ; currPos++)	{		if (GetFactor(currPos, factorType) != inputPhrase.GetFactor(currPos, factorType))			return false;	}	return true;}bool Phrase::IsCompatible(const Phrase &inputPhrase, const std::vector<FactorType>& factorVec) const{	if (inputPhrase.GetSize() != GetSize())	{ return false;	}	for (size_t currPos = 0 ; currPos < GetSize() ; currPos++)	{		for (std::vector<FactorType>::const_iterator i = factorVec.begin();		     i != factorVec.end(); ++i)		{			if (GetFactor(currPos, *i) != inputPhrase.GetFactor(currPos, *i))				return false;		}	}	return true;}void Phrase::InitializeMemPool(){}void Phrase::FinalizeMemPool(){}TO_STRING_BODY(Phrase);// friendostream& operator<<(ostream& out, const Phrase& phrase){//	out << "(size " << phrase.GetSize() << ") ";	for (size_t pos = 0 ; pos < phrase.GetSize() ; pos++)	{		const Word &word = phrase.GetWord(pos);		out << word;	}	return out;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -