phrasedictionarymemory.cpp.svn-base
来自「moses开源的机器翻译系统」· SVN-BASE 代码 · 共 201 行
SVN-BASE
201 行
// $Id$// vim:tabstop=2/***********************************************************************Moses - factored phrase-based language decoderCopyright (C) 2006 University of EdinburghThis library is free software; you can redistribute it and/ormodify it under the terms of the GNU Lesser General PublicLicense as published by the Free Software Foundation; eitherversion 2.1 of the License, or (at your option) any later version.This library is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNULesser General Public License for more details.You should have received a copy of the GNU Lesser General PublicLicense along with this library; if not, write to the Free SoftwareFoundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA***********************************************************************/#include <fstream>#include <string>#include <iterator>#include <algorithm>#include <sys/stat.h>#include "PhraseDictionaryMemory.h"#include "FactorCollection.h"#include "Word.h"#include "Util.h"#include "InputFileStream.h"#include "StaticData.h"#include "WordsRange.h"#include "UserMessage.h"using namespace std;bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input , const std::vector<FactorType> &output , const string &filePath , const vector<float> &weight , size_t tableLimit , const LMList &languageModels , float weightWP){ const StaticData &staticData = StaticData::Instance(); m_tableLimit = tableLimit; m_filePath = filePath; //factors m_inputFactors = FactorMask(input); m_outputFactors = FactorMask(output); VERBOSE(2,"PhraseDictionaryMemory: input=" << m_inputFactors << " output=" << m_outputFactors << std::endl); // data from file InputFileStream inFile(filePath); // create hash file if necessary ofstream tempFile; string tempFilePath; vector< vector<string> > phraseVector; string line, prevSourcePhrase = ""; size_t count = 0; size_t line_num = 0; size_t numElement = NOT_FOUND; // 3=old format, 5=async format which include word alignment info while(getline(inFile, line)) { ++line_num; vector<string> tokens = TokenizeMultiCharSeparator( line , "|||" ); if (numElement == NOT_FOUND) { // init numElement numElement = tokens.size(); assert(numElement == 3 || numElement == 5); } if (tokens.size() != numElement) { stringstream strme; strme << "Syntax error at " << filePath << ":" << line_num; UserMessage::Add(strme.str()); return false; } bool isLHSEmpty = (tokens[1].find_first_not_of(" \t", 0) == string::npos); if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) { TRACE_ERR( filePath << ":" << line_num << ": pt entry contains empty target, skipping\n"); continue; } const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter(); if (tokens[0] != prevSourcePhrase) phraseVector = Phrase::Parse(tokens[0], input, factorDelimiter); vector<float> scoreVector = Tokenize<float>(tokens[(numElement==3) ? 2 : 4]); if (scoreVector.size() != m_numScoreComponent) { stringstream strme; strme << "Size of scoreVector != number (" <<scoreVector.size() << "!=" <<m_numScoreComponent<<") of score components on line " << line_num; UserMessage::Add(strme.str()); return false; }// assert(scoreVector.size() == m_numScoreComponent); // source Phrase sourcePhrase(Input); sourcePhrase.CreateFromString( input, phraseVector); //target TargetPhrase targetPhrase(Output); targetPhrase.CreateFromString( output, tokens[1], factorDelimiter); // component score, for n-best output std::vector<float> scv(scoreVector.size()); std::transform(scoreVector.begin(),scoreVector.end(),scv.begin(),TransformScore); std::transform(scv.begin(),scv.end(),scv.begin(),FloorScore); targetPhrase.SetScore(this, scv, weight, weightWP, languageModels); AddEquivPhrase(sourcePhrase, targetPhrase); count++; } // sort each target phrase collection m_collection.Sort(m_tableLimit); return true;}TargetPhraseCollection *PhraseDictionaryMemory::CreateTargetPhraseCollection(const Phrase &source){ const size_t size = source.GetSize(); PhraseDictionaryNode *currNode = &m_collection; for (size_t pos = 0 ; pos < size ; ++pos) { const Word& word = source.GetWord(pos); currNode = currNode->GetOrCreateChild(word); if (currNode == NULL) return NULL; } return currNode->CreateTargetPhraseCollection();}void PhraseDictionaryMemory::AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase){ TargetPhraseCollection &phraseColl = *CreateTargetPhraseCollection(source); phraseColl.Add(new TargetPhrase(targetPhrase));}const TargetPhraseCollection *PhraseDictionaryMemory::GetTargetPhraseCollection(const Phrase &source) const{ // exactly like CreateTargetPhraseCollection, but don't create const size_t size = source.GetSize(); const PhraseDictionaryNode *currNode = &m_collection; for (size_t pos = 0 ; pos < size ; ++pos) { const Word& word = source.GetWord(pos); currNode = currNode->GetChild(word); if (currNode == NULL) return NULL; } return currNode->GetTargetPhraseCollection();}PhraseDictionaryMemory::~PhraseDictionaryMemory(){}void PhraseDictionaryMemory::SetWeightTransModel(const vector<float> &weightT){ PhraseDictionaryNode::iterator iterDict; for (iterDict = m_collection.begin() ; iterDict != m_collection.end() ; ++iterDict) { PhraseDictionaryNode &phraseDictionaryNode = iterDict->second; // recursively set weights in nodes phraseDictionaryNode.SetWeightTransModel(this, weightT); }}TO_STRING_BODY(PhraseDictionaryMemory);// friendostream& operator<<(ostream& out, const PhraseDictionaryMemory& phraseDict){ const PhraseDictionaryNode &coll = phraseDict.m_collection; PhraseDictionaryNode::const_iterator iter; for (iter = coll.begin() ; iter != coll.end() ; ++iter) { const Word &word = (*iter).first; out << word; } return out;}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?