📄 lexicalreorderingtable.cpp.svn-base

📁 moses开源的机器翻译系统
💻 SVN-BASE
📖 第 1 页 / 共 2 页
字号:
上一页 12
	  TRACE_ERR(".");	}    IPhrase key;    Score   score;    std::vector<std::string> tokens = TokenizeMultiCharSeparator(line, "|||");    std::string w;	if(1 == lnc){	  //do some init stuff in the first line	  numTokens = tokens.size();	  if(tokens.size() == 2){ //f ||| score		numKeyTokens = 1;		voc[0] = new WordVoc();		voc[1] = 0;	  } else if(3 == tokens.size() || 4 == tokens.size()){ //either f ||| e ||| score or f ||| e ||| c ||| score		numKeyTokens = 2;		voc[0] = new WordVoc(); //f voc		voc[1] = new WordVoc(); //e voc		voc[2] = voc[1];        //c & e share voc	  }	} else {	  //sanity check ALL lines must have same number of tokens	  assert(numTokens == tokens.size());	}	int phrase = 0;    for(; phrase < numKeyTokens; ++phrase){      //conditioned on more than just f... need |||	  if(phrase >=1){		key.push_back(PrefixTreeMap::MagicWord);	  }      std::istringstream is(tokens[phrase]);      while(is >> w) {		key.push_back(voc[phrase]->add(w));      }    }    //collect all non key phrases, i.e. c    std::vector<IPhrase> tgt_phrases;    tgt_phrases.resize(numTokens - numKeyTokens - 1);    for(int j = 0; j < tgt_phrases.size(); ++j, ++phrase){      std::istringstream is(tokens[numKeyTokens + j]);      while(is >> w) {		tgt_phrases[j].push_back(voc[phrase]->add(w));      }    }    //last token is score    std::istringstream is(tokens[numTokens-1]);    while(is >> w) {      score.push_back(atof(w.c_str()));    }    //transform score now...    std::transform(score.begin(),score.end(),score.begin(),TransformScore);    std::vector<Score> scores;    scores.push_back(score);        if(key.empty()) {      TRACE_ERR("WARNING: empty source phrase in line '"<<line<<"'\n");      continue;    }    //first time inits    if(currFirstWord == InvalidLabelId){       currFirstWord = key[0];    }    if(currKey.empty()){      currKey = key;      //insert key into tree      assert(psa);      PSA::Data& d = psa->insert(key);      if(d == InvalidOffT) { 		d = fTell(ot);      } else {		TRACE_ERR("ERROR: source phrase already inserted (A)!\nline(" << lnc << "): '" << line << "\n");		return false;      }    }    if(currKey != key){      //ok new key      currKey = key;      //a) write cands for old key      cands.writeBin(ot);      cands.clear();      //b) check if we need to move on to new tree root      if(key[0] != currFirstWord){		// write key prefix tree to file and clear		PTF pf;		if(currFirstWord >= vo.size()){ 		  vo.resize(currFirstWord+1,InvalidOffT);		}		vo[currFirstWord] = fTell(os);		pf.create(*psa, os);		// clear		delete psa; psa = new PSA;		currFirstWord = key[0];      }      //c) insert key into tree      assert(psa);      PSA::Data& d = psa->insert(key);      if(d == InvalidOffT) { 		d = fTell(ot);      } else {		TRACE_ERR("ERROR: source phrase already inserted (A)!\nline(" << lnc << "): '" << line << "\n");		return false;      }    }	cands.push_back(GenericCandidate(tgt_phrases, scores));  }  //flush remainders  cands.writeBin(ot);  cands.clear();  //process last currFirstWord  PTF pf;  if(currFirstWord >= vo.size()) {    vo.resize(currFirstWord+1,InvalidOffT);  }  vo[currFirstWord] = fTell(os);  pf.create(*psa,os);  delete psa;  psa=0;    fClose(os);  fClose(ot);  /*  std::vector<size_t> inv;  for(size_t i = 0; i < vo.size(); ++i){    if(vo[i] == InvalidOffT){       inv.push_back(i);    }  }  if(inv.size()) {    TRACE_ERR("WARNING: there are src voc entries with no phrase "	      "translation: count "<<inv.size()<<"\n"	      "There exists phrase translations for "<<vo.size()-inv.size()	      <<" entries\n");  }  */  FILE *oi = fOpen(ofi.c_str(),"wb");  fWriteVector(oi,vo);  fClose(oi);    if(voc[0]){	voc[0]->Write(ofsv);	delete voc[0];  }  if(voc[1]){	voc[1]->Write(oftv);	delete voc[1];  }  return true;}std::string LexicalReorderingTableTree::MakeCacheKey(const Phrase& f, 						     const Phrase& e) const {  std::string key;  if(!m_FactorsF.empty()){    key += auxClearString(f.GetStringRep(m_FactorsF));  }  if(!m_FactorsE.empty()){    if(!key.empty()){      key += "|||";    }    key += auxClearString(e.GetStringRep(m_FactorsE));  }  return key;};IPhrase LexicalReorderingTableTree::MakeTableKey(const Phrase& f, 						 const Phrase& e) const {  IPhrase key;  std::vector<std::string> keyPart;  if(!m_FactorsF.empty()){    for(int i = 0; i < f.GetSize(); ++i){	  /* old code      std::string s = f.GetWord(i).ToString(m_FactorsF);      keyPart.push_back(s.substr(0,s.size()-1));      */	  keyPart.push_back(f.GetWord(i).GetString(m_FactorsF, false));    }    auxAppend(key, m_Table.ConvertPhrase(keyPart, SourceVocId));	keyPart.clear();  }  if(!m_FactorsE.empty()){	if(!key.empty()){      key.push_back(PrefixTreeMap::MagicWord);	}    for(int i = 0; i < e.GetSize(); ++i){	  /* old code      std::string s = e.GetWord(i).ToString(m_FactorsE);      keyPart.push_back(s.substr(0,s.size()-1));      */	  keyPart.push_back(e.GetWord(i).GetString(m_FactorsE, false));    }      	auxAppend(key, m_Table.ConvertPhrase(keyPart,TargetVocId));	//keyPart.clear();  }  return key;};struct State {  State(PPimp* t, const std::string& p) : pos(t), path(p){  }  PPimp*      pos;  std::string path;};void LexicalReorderingTableTree::auxCacheForSrcPhrase(const Phrase& f){  if(m_FactorsE.empty()){	//f is all of key...	Candidates cands;	m_Table.GetCandidates(MakeTableKey(f,Phrase(Output)),&cands);	m_Cache[MakeCacheKey(f,Phrase(Output))] = cands;  } else {	ObjectPool<PPimp>     pool;	PPimp* pPos  = m_Table.GetRoot();	//1) goto subtree for f	for(int i = 0; i < f.GetSize() && 0 != pPos && pPos->isValid(); ++i){	  /* old code	  pPos = m_Table.Extend(pPos, auxClearString(f.GetWord(i).ToString(m_FactorsF)), SourceVocId);	  */	  pPos = m_Table.Extend(pPos, f.GetWord(i).GetString(m_FactorsF, false), SourceVocId);	}	if(0 != pPos && pPos->isValid()){	  pPos = m_Table.Extend(pPos, PrefixTreeMap::MagicWord);	}	if(0 == pPos || !pPos->isValid()){	  return;	}	//2) explore whole subtree depth first & cache	std::string cache_key = auxClearString(f.GetStringRep(m_FactorsF)) + "|||";		std::vector<State> stack;	stack.push_back(State(pool.get(PPimp(pPos->ptr()->getPtr(pPos->idx),0,0)),""));	Candidates cands;	while(!stack.empty()){	  if(stack.back().pos->isValid()){		LabelId w = stack.back().pos->ptr()->getKey(stack.back().pos->idx);		std::string next_path = stack.back().path + " " + m_Table.ConvertWord(w,TargetVocId);		//cache this 		m_Table.GetCandidates(*stack.back().pos,&cands);		if(!cands.empty()){ 		  m_Cache[cache_key + auxClearString(next_path)] = cands;		}		cands.clear();		PPimp* next_pos = pool.get(PPimp(stack.back().pos->ptr()->getPtr(stack.back().pos->idx),0,0));		++stack.back().pos->idx;		stack.push_back(State(next_pos,next_path));	  } else {		stack.pop_back();	  }	}  }}void LexicalReorderingTableTree::Cache(const ConfusionNet& input){  return;}void LexicalReorderingTableTree::Cache(const Sentence& input){  //only works with sentences...  int prev_cache_size = m_Cache.size();  int max_phrase_length = input.GetSize();  for(size_t len = 0; len <= max_phrase_length; ++len){ 	for(size_t start = 0; start+len <= input.GetSize(); ++start){	  Phrase f    = input.GetSubString(WordsRange(start, start+len));	  auxCacheForSrcPhrase(f);	}  }  std::cerr << "Cached " << m_Cache.size() - prev_cache_size << " new primary reordering table keys\n"; }/*Pre fetching implementation using Phrase and Generation Dictionaries *//*void LexicalReorderingTableTree::Cache(const ConfusionNet& input){  typedef TargetPhraseCollection::iterator Iter;  typedef TargetPhraseCollection::const_iterator ConstIter;  //not implemented for confusion networks...  Sentence const* s = dynamic_cast<Sentence const*>(&input);  if(!s){	return;  }  int max_phrase_length = input.GetSize();    std::vector<PhraseDictionaryBase*> PhraseTables = StaticData::Instance()->GetPhraseDictionaries();  //new code:  //std::vector<PhraseDictionary*> PhraseTables = StaticData::Instance()->GetPhraseDictionaries();  std::vector<GenerationDictionary*> GenTables = StaticData::Instance()->GetGenerationDictionaries();  for(size_t len = 1; len <= max_phrase_length; ++len){ 	for(size_t start = 0; start+len <= input.GetSize(); ++start){	  Phrase f = s->GetSubString(WordsRange(start, start+len));	  //find all translations of f	  TargetPhraseCollection list;	  for(size_t t = 0; t < PhraseTables.size(); ++t){		//if(doIntersect(PhraseTables[t]->GetOutputFactorMask(),FactorMask(m_FactorsE))){		  //this table gives us something we need		  		  const TargetPhraseCollection* new_list = PhraseTables[t]->GetTargetPhraseCollection(f);		  TargetPhraseCollection curr_list;		  for(ConstIter i = new_list->begin(); i != new_list->end(); ++i){			for(Iter j = list.begin(); j != list.end(); ++j){			  curr_list.Add((*j)->MergeNext(*(*i)));			}		  }		  if(list.IsEmpty()){			list = *new_list;		  } else {			list = curr_list;		  }		  //}	  }	  for(size_t g = 0; g < GenTables.size(); ++g){		//if(doIntersect(GenTables[g]->GetOutputFactorMask(),FactorMask(m_FactorsE))){		  TargetPhraseCollection curr_list;		  for(Iter j = list.begin(); j != list.end(); ++j){			for(size_t w = 0; w < (*j)->GetSize(); ++w){			  const OutputWordCollection* words = GenTables[g]->FindWord((*j)->GetWord(w));			  for(OutputWordCollection::const_iterator i = words->begin(); i != words->end(); ++i){				TargetPhrase* p = new TargetPhrase(*(*j));				Word& pw = p->GetWord(w);				pw.Merge(i->first);				curr_list.Add(p);			  }			}		  }		  list = curr_list;		  //}	  }	  //cache for each translation	  for(Iter e = list.begin(); e < list.end(); ++e){		Candidates cands;		m_Table.GetCandidates(MakeTableKey(f,*(*e)), &cands);		m_Cache.insert(std::make_pair(MakeCacheKey(f,*(*e)),cands));	  }	}  }};*/
上一页 12
💿 文件大小 8836 K
👤 上传用户 myhpgnl
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#moses #开源 #机器翻译系统
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -