n_gram.cpp.svn-base
来自「解码器是基于短语的统计机器翻译系统的核心模块」· SVN-BASE 代码 · 共 215 行
SVN-BASE
215 行
/****************************************************************************** IrstLM: IRST Language Model Toolkit Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA******************************************************************************/#include <iomanip>#include <cassert>#include "mempool.h"#include "htable.h"#include "dictionary.h"#include "n_gram.h"#include "index.h"using namespace std;ngram::ngram(dictionary* d,int sz){ dict=d; size=sz; succ=0; freq=0; info=0; pinfo=0; link=NULL; isym=-1; memset(word,0,sizeof(int)*MAX_NGRAM); memset(midx,0,sizeof(int)*MAX_NGRAM);}ngram::ngram(ngram& ng){ size=ng.size; freq=ng.freq; succ=0; info=0; pinfo=0; link=NULL; isym=-1; dict=ng.dict; memcpy(word,ng.word,sizeof(int)*MAX_NGRAM); memcpy(midx,ng.word,sizeof(int)*MAX_NGRAM);}void ngram::trans (const ngram& ng){ size=ng.size; freq=ng.freq; if (dict == ng.dict){ info=ng.info; isym=ng.isym; memcpy(word,ng.word,sizeof(int)*MAX_NGRAM); memcpy(midx,ng.midx,sizeof(int)*MAX_NGRAM); } else{ info=0; memset(midx,0,sizeof(int)*MAX_NGRAM); isym=-1; for (int i=1;i<=size;i++) word[MAX_NGRAM-i]=dict->encode(ng.dict->decode(*ng.wordp(i))); }}ifstream& operator>> ( ifstream& fi , ngram& ng){ char w[MAX_WORD]; memset(w,0,MAX_WORD); w[0]='\0'; if (!(fi >> setw(MAX_WORD) >> w)) return fi; if (strlen(w)==(MAX_WORD-1)) cerr << "ngram: a too long word was read (" << w << ")\n"; if (ng.dict->intsymb() && (strlen(w)==1) && (index(ng.dict->intsymb(),w[0])!=NULL)){ ng.isym=(long)index(ng.dict->intsymb(),w[0]) - (long)ng.dict->intsymb(); ng.size=0; return fi; } int c=ng.dict->encode(w); if (c == -1 ){ cerr << "ngram: " << w << " is OOV \n"; exit(1); } memcpy(ng.word,ng.word+1,(MAX_NGRAM-1)*sizeof(int)); ng.word[MAX_NGRAM-1]=(int)c; ng.freq=1; if (ng.size<MAX_NGRAM) ng.size++; return fi;}int ngram::pushw(char* w){ assert(dict!=NULL); int c=dict->encode(w); if (c == -1 ){ cerr << "ngram: " << w << " is OOV \n"; exit(1); } pushc(c); return 1;}int ngram::pushc(int c){ int buff[MAX_NGRAM-1]; memcpy(buff,word+1,(MAX_NGRAM-1)*sizeof(int)); memcpy(word,buff,(MAX_NGRAM-1)*sizeof(int)); word[MAX_NGRAM-1]=(int)c; if (size<MAX_NGRAM) size++; return 1;}istream& operator>> ( istream& fi , ngram& ng){ char w[MAX_WORD]; memset(w,0,MAX_WORD); w[0]='\0'; assert(ng.dict != NULL); if (!(fi >> setw(MAX_WORD) >> w)) return fi; if (strlen(w)==(MAX_WORD-1)) cerr << "ngram: a too long word was read (" << w << ")\n"; if (ng.dict->intsymb() && (strlen(w)==1) && (index(ng.dict->intsymb(),w[0])!=NULL)){ ng.isym=(long)index(ng.dict->intsymb(),w[0])-(long)ng.dict->intsymb(); ng.size=0; return fi; } ng.pushw(w); ng.freq=1; return fi;}ofstream& operator<< (ofstream& fo,ngram& ng){ assert(ng.dict != NULL); for (int i=ng.size;i>0;i--) fo << ng.dict->decode(ng.word[MAX_NGRAM-i]) << " "; //fo << "[size " << ng.size << " freq " << ng.freq << "]"; fo << ng.freq; return fo;}ostream& operator<< (ostream& fo,ngram& ng){ assert(ng.dict != NULL); for (int i=ng.size;i>0;i--) fo << ng.dict->decode(ng.word[MAX_NGRAM-i]) << " "; //fo << "[size " << ng.size << " freq " << ng.freq << "]"; fo << ng.freq; return fo;}/*main(int argc, char** argv){ dictionary d(argv[1]); ifstream txt(argv[1]); ngram ng(&d); while (txt >> ng){ cout << ng << "\n"; } ngram ng2=ng; cerr << "copia l'ultimo =" << ng << "\n";}*/
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?