📄 pdtaimp.h.svn-base
字号:
// $Id$// vim:tabstop=2#pragma once#include "StaticData.h" // needed for factor splitterinline bool existsFile(const char* filePath) { struct stat mystat; return (stat(filePath,&mystat)==0);}double addLogScale(double x,double y) { if(x>y) return addLogScale(y,x); else return x+log(1.0+exp(y-x));}double Exp(double x){ return exp(x);}class PDTAimp { // only these classes are allowed to instantiate this class friend class PhraseDictionaryTreeAdaptor; protected: PDTAimp(PhraseDictionaryTreeAdaptor *p,unsigned nis) : m_languageModels(0),m_weightWP(0.0),m_dict(0), m_obj(p),useCache(1),m_numInputScores(nis),totalE(0),distinctE(0) {} public: std::vector<float> m_weights; LMList const* m_languageModels; float m_weightWP; std::vector<FactorType> m_input,m_output; PhraseDictionaryTree *m_dict; typedef std::vector<TargetPhraseCollection const*> vTPC; mutable vTPC m_tgtColls; typedef std::map<Phrase,TargetPhraseCollection const*> MapSrc2Tgt; mutable MapSrc2Tgt m_cache; PhraseDictionaryTreeAdaptor *m_obj; int useCache; std::vector<vTPC> m_rangeCache; unsigned m_numInputScores; UniqueObjectManager<Phrase> uniqSrcPhr; size_t totalE,distinctE; std::vector<size_t> path1Best,pathExplored; std::vector<double> pathCN; ~PDTAimp() { CleanUp(); delete m_dict; if (StaticData::Instance().GetVerboseLevel() >= 2) { TRACE_ERR("tgt candidates stats: total="<<totalE<<"; distinct=" <<distinctE<<" ("<<distinctE/(0.01*totalE)<<"); duplicates=" <<totalE-distinctE<<" ("<<(totalE-distinctE)/(0.01*totalE) <<")\n"); TRACE_ERR("\npath statistics\n"); if(path1Best.size()) { TRACE_ERR("1-best: "); std::copy(path1Best.begin()+1,path1Best.end(), std::ostream_iterator<size_t>(std::cerr," \t")); TRACE_ERR("\n"); } if(pathCN.size()) { TRACE_ERR("CN (full): "); std::transform(pathCN.begin()+1 ,pathCN.end() ,std::ostream_iterator<double>(std::cerr," \t") ,Exp); TRACE_ERR("\n"); } if(pathExplored.size()) { TRACE_ERR("CN (explored): "); std::copy(pathExplored.begin()+1,pathExplored.end(), std::ostream_iterator<size_t>(std::cerr," \t")); TRACE_ERR("\n"); } } } void Factors2String(Word const& w,std::string& s) const { s=w.GetString(m_input,false); } void CleanUp() { assert(m_dict); m_dict->FreeMemory(); for(size_t i=0;i<m_tgtColls.size();++i) delete m_tgtColls[i]; m_tgtColls.clear(); m_cache.clear(); m_rangeCache.clear(); uniqSrcPhr.clear(); } void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase) { assert(GetTargetPhraseCollection(source)==0); VERBOSE(2, "adding unk source phrase "<<source<<"\n"); std::pair<MapSrc2Tgt::iterator,bool> p =m_cache.insert(std::make_pair(source,static_cast<TargetPhraseCollection const*>(0))); if(p.second || p.first->second==0) { TargetPhraseCollection *ptr=new TargetPhraseCollection; ptr->Add(new TargetPhrase(targetPhrase)); p.first->second=ptr; m_tgtColls.push_back(ptr); } else VERBOSE(2, "WARNING: you added an already existing phrase!\n"); } TargetPhraseCollection const* GetTargetPhraseCollection(Phrase const &src) const { assert(m_dict); if(src.GetSize()==0) return 0; std::pair<MapSrc2Tgt::iterator,bool> piter; if(useCache) { piter=m_cache.insert(std::make_pair(src,static_cast<TargetPhraseCollection const*>(0))); if(!piter.second) return piter.first->second; } else if (m_cache.size()) { MapSrc2Tgt::const_iterator i=m_cache.find(src); return (i!=m_cache.end() ? i->second : 0); } std::vector<std::string> srcString(src.GetSize()); // convert source Phrase into vector of strings for(size_t i=0;i<srcString.size();++i) { Factors2String(src.GetWord(i),srcString[i]); } // get target phrases in string representation std::vector<StringTgtCand> cands; m_dict->GetTargetCandidates(srcString,cands); if(cands.empty()) { return 0; } std::vector<TargetPhrase> tCands;tCands.reserve(cands.size()); std::vector<std::pair<float,size_t> > costs;costs.reserve(cands.size()); // convert into TargetPhrases for(size_t i=0;i<cands.size();++i) { TargetPhrase targetPhrase(Output); StringTgtCand::first_type const& factorStrings=cands[i].first; StringTgtCand::second_type const& probVector=cands[i].second; std::vector<float> scoreVector(probVector.size()); std::transform(probVector.begin(),probVector.end(),scoreVector.begin(), TransformScore); std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(), FloorScore); CreateTargetPhrase(targetPhrase,factorStrings,scoreVector); costs.push_back(std::make_pair(-targetPhrase.GetFutureScore(), tCands.size())); tCands.push_back(targetPhrase); } TargetPhraseCollection *rv=PruneTargetCandidates(tCands,costs); if(rv->IsEmpty()) { delete rv; return 0; } else { if(useCache) piter.first->second=rv; m_tgtColls.push_back(rv); return rv; } } void Create(const std::vector<FactorType> &input , const std::vector<FactorType> &output , const std::string &filePath , const std::vector<float> &weight , const LMList &languageModels , float weightWP ) { // set my members m_dict=new PhraseDictionaryTree(weight.size()-m_numInputScores); m_input=input; m_output=output; m_languageModels=&languageModels; m_weightWP=weightWP; m_weights=weight; std::string binFname=filePath+".binphr.idx"; if(!existsFile(binFname.c_str())) { TRACE_ERR( "bin ttable does not exist -> create it\n"); InputFileStream in(filePath); m_dict->Create(in,filePath); } TRACE_ERR( "reading bin ttable\n"); m_dict->Read(filePath); } typedef PhraseDictionaryTree::PrefixPtr PPtr; typedef unsigned short Position; typedef std::pair<Position,Position> Range; struct State { PPtr ptr; Range range; float score; Position realWords; Phrase src; State() : range(0,0),score(0.0),realWords(0),src(Input) {} State(Position b,Position e,const PPtr& v,float sc=0.0,Position rw=0) : ptr(v),range(b,e),score(sc),realWords(rw),src(Input) {} State(Range const& r,const PPtr& v,float sc=0.0,Position rw=0) : ptr(v),range(r),score(sc),realWords(rw),src(Input) {} Position begin() const {return range.first;} Position end() const {return range.second;} float GetScore() const {return score;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -