⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 prefixtreemap.cpp.svn-base

📁 moses开源的机器翻译系统
💻 SVN-BASE
字号:
#include "PrefixTreeMap.h"#include "TypeDef.h"void GenericCandidate::readBin(FILE* f){  m_PhraseList.clear();  m_ScoreList.clear();  UINT32 num_phrases;  // on older compilers, <stdint.h> may need to be included  fRead(f, num_phrases);  for(unsigned int i = 0; i < num_phrases; ++i){	IPhrase phrase;	fReadVector(f, phrase);	m_PhraseList.push_back(phrase);  };  UINT32 num_scores;  fRead(f, num_scores);  for(unsigned int j = 0; j < num_scores; ++j){	std::vector<float> score;	fReadVector(f, score);	m_ScoreList.push_back(score);  };};void GenericCandidate::writeBin(FILE* f) const {  // cast is necessary to ensure compatibility between 32- and 64-bit platforms  fWrite(f, static_cast<UINT32>(m_PhraseList.size()));  for(size_t i = 0; i < m_PhraseList.size(); ++i){	fWriteVector(f, m_PhraseList[i]);  }  fWrite(f, static_cast<UINT32>(m_ScoreList.size()));  for(size_t j = 0; j < m_ScoreList.size(); ++j){	fWriteVector(f, m_ScoreList[j]);  }};void Candidates::writeBin(FILE* f) const {  UINT32 s = this->size();  fWrite(f,s);  for(size_t i = 0; i < s; ++i) {	MyBase::operator[](i).writeBin(f);  }}void Candidates::readBin(FILE* f) {  UINT32 s;  fRead(f,s);  this->resize(s);  for(size_t i = 0; i<s; ++i) {	MyBase::operator[](i).readBin(f);  }}const LabelId PrefixTreeMap::MagicWord = InvalidLabelId - 1; void PrefixTreeMap::FreeMemory() {  for(Data::iterator i = m_Data.begin(); i != m_Data.end(); ++i){	i->free();  }  for(size_t i = 0; i < m_Voc.size(); ++i){	delete m_Voc[i];	m_Voc[i] = 0;  }  m_PtrPool.reset();}int PrefixTreeMap::Read(const std::string& fileNameStem, int numVocs){  std::string ifs(fileNameStem + ".srctree"),	ift(fileNameStem + ".tgtdata"),	ifi(fileNameStem + ".idx"),	ifv(fileNameStem + ".voc");    std::vector<OFF_T> srcOffsets;  FILE *ii=fOpen(ifi.c_str(),"rb");  fReadVector(ii,srcOffsets);  fClose(ii);   m_FileSrc = fOpen(ifs.c_str(),"rb");  m_FileTgt = fOpen(ift.c_str(),"rb");    m_Data.resize(srcOffsets.size());    for(size_t i = 0; i < m_Data.size(); ++i){	m_Data[i] = CPT(m_FileSrc, srcOffsets[i]);  }    if(-1 == numVocs){	char num[5];	numVocs = 0;	sprintf(num, "%d", numVocs);	while(FileExists(ifv + num)){	  ++numVocs;	  sprintf(num, "%d", numVocs);	}  }  char num[5];	m_Voc.resize(numVocs);	for(int i = 0; i < numVocs; ++i){	  sprintf(num, "%d", i);	  m_Voc[i] = new WordVoc();	  m_Voc[i]->Read(ifv + num);	}	TRACE_ERR("binary file loaded, default OFF_T: "<< PTF::getDefault()<<"\n");	return 1;};	void PrefixTreeMap::GetCandidates(const IPhrase& key, Candidates* cands) {  //check if key is valid  if(key.empty() || key[0] >= m_Data.size() || !m_Data[key[0]]){	return;  }  assert(m_Data[key[0]]->findKey(key[0])<m_Data[key[0]]->size());    OFF_T candOffset = m_Data[key[0]]->find(key);  if(candOffset == InvalidOffT){ 	return;  }  fSeek(m_FileTgt,candOffset);  cands->readBin(m_FileTgt);}void PrefixTreeMap::GetCandidates(const PPimp& p, Candidates* cands) {  assert(p.isValid());  if(p.isRoot()) { 	return;   };  OFF_T candOffset = p.ptr()->getData(p.idx);  if(candOffset == InvalidOffT) { 	return;  }  fSeek(m_FileTgt,candOffset);  cands->readBin(m_FileTgt);}std::vector< std::string const * > PrefixTreeMap::ConvertPhrase(const IPhrase& p, unsigned int voc) const{  assert(voc < m_Voc.size() && m_Voc[voc] != 0);  std::vector< std::string const * > result; result.reserve(p.size());  for(IPhrase::const_iterator i = p.begin(); i != p.end(); ++i){	result.push_back(&(m_Voc[voc]->symbol(*i)));  }  return result;}IPhrase PrefixTreeMap::ConvertPhrase(const std::vector< std::string >& p, unsigned int voc) const{  assert(voc < m_Voc.size() && m_Voc[voc] != 0);  IPhrase result;   result.reserve(p.size());  for(size_t i = 0; i < p.size(); ++i){	result.push_back(m_Voc[voc]->index(p[i]));  }  return result;}LabelId PrefixTreeMap::ConvertWord(const std::string& w, unsigned int voc) const {  assert(voc < m_Voc.size() && m_Voc[voc] != 0);  return m_Voc[voc]->index(w);}std::string PrefixTreeMap::ConvertWord(LabelId w, unsigned int voc) const {  assert(voc < m_Voc.size() && m_Voc[voc] != 0);  if(w == PrefixTreeMap::MagicWord){	return "|||";  } else if (w == InvalidLabelId){	return "<invalid>";  } else {	return m_Voc[voc]->symbol(w);  }}PPimp* PrefixTreeMap::GetRoot() {  return m_PtrPool.get(PPimp(0,0,1));} PPimp* PrefixTreeMap::Extend(PPimp* p, LabelId wi) {	  assert(p->isValid());    if(wi == InvalidLabelId) { 	return 0; // unknown word, return invalid pointer	  } else if(p->isRoot()) {	if(wi < m_Data.size() && m_Data[wi]){	  assert(m_Data[wi]->findKeyPtr(wi));	  return m_PtrPool.get(PPimp(m_Data[wi],m_Data[wi]->findKey(wi),0));	}  } else if(PTF const* nextP = p->ptr()->getPtr(p->idx)) {	return m_PtrPool.get(PPimp(nextP, nextP->findKey(wi),0));  }		  return 0; // should never get here, return invalid pointer   }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -