⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ngram_write_bin.c

📁 julius version 4.12.about sound recognition.
💻 C
字号:
/** * @file   ngram_write_bin.c *  * <JA> * @brief  N-gramをバイナリ妨及でファイルに今き叫す * * rev.3.5 より·粕み哈みの光庐拉を雇胃して今き叫しのバイトオ〖ダ〖を * Big endian 盖年からマシン巴赂に恃构されたˉまたインデックスの 24bit 步 * および 2-gram のバックオフデ〖タの暗教も乖うなど·ファイル妨及の * 柒婶慌屯が办婶恃构されたˉこれにより·3.5 笆惯の mkbingram で * で栏喇したバイナリN-gramは, 3.4.2笆涟の Julius では蝗えないˉ * (ヘッダチェックでエラ〖となる) * * なお 3.5 笆惯の Julius では骄丸のモデルも啼玛なく粕めるˉこの眷圭, * インデックスの 24bit 步とバックオフの暗教はモデル粕み哈み箕に * その旁刨乖われるˉ * * バイトオ〖ダ〖に簇してヘッダに淡揭することで·粕み哈み箕に冉年して * 粕み哈むˉこれにより·佰なるバイトオ〖ダ〖のマシンで栏喇した * バイナリN-gramでも啼玛なく粕めるˉもちろん骄丸のモデルもそのまま * 粕み哈めるˉ * </JA> *  * <EN> * @brief  Write a whole N-gram data to a file in binary format * * From 3.5, internal format of binary N-gram has changed for using * machine-dependent natural byte order (previously fixed to big endian), * 24bit index and 2-gram backoff compression.  So, binary N-gram * generated by mkbingram of 3.5 and later will not work on 3.4.2 and * earlier versions. * * There is full upward- and cross-machine compatibility in 3.5.  Old * binary N-gram files still can be read directly, in which case the conversion * to 24bit index will performed just after model has been read. * Byte order will also considered by header information, so * binary N-gram still can be used among different machines. * </EN> *  * @author Akinobu LEE * @date   Wed Feb 16 17:23:16 2005 * * $Revision: 1.4 $ *  *//* * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology * All rights reserved */#include <sent/stddefs.h>#include <sent/ngram2.h>static boolean need_swap; ///< TRUE if need byte swap#define wrt(A,B,C,D) if (wrtfunc(A,B,C,D) == FALSE) return FALSEstatic unsigned int count;voidreset_wrt_counter(){  count = 0;}static unsigned intget_wrt_counter(){  return count;}     /**  * Binary write function, with byte swapping if needed. *  * @param fp [in] file pointer * @param buf [in] data buffer to write * @param unitbyte [in] unit size in bytes * @param unitnum [in] number of unit to write */static booleanwrtfunc(FILE *fp, void *buf, size_t unitbyte, size_t unitnum){  if (need_swap == TRUE && unitbyte != 1) {    swap_bytes((char *)buf, unitbyte, unitnum);  }  if (myfwrite(buf, unitbyte, unitnum, fp) < unitnum) {    jlog("Error: write_ngram_bin: failed to write %d bytes", unitbyte*unitnum);    return FALSE;  }  if (need_swap == TRUE && unitbyte != 1) {    swap_bytes((char *)buf, unitbyte, unitnum);  }  count += unitbyte * unitnum;  return TRUE;}/**  * Write header information, with identifier string. *  * @param fp [in] file pointer * @param str [in] user header string (any string within BINGRAM_HDSIZE * bytes is allowed) * @param version [in] file format version id */static booleanwrite_header(FILE *fp, char *str){  char buf[BINGRAM_HDSIZE];  int i, totallen;  for(i=0;i<BINGRAM_HDSIZE;i++) buf[i] = EOF;  totallen = strlen(BINGRAM_IDSTR_V5) + 1 + strlen(BINGRAM_SIZESTR_HEAD) + strlen(BINGRAM_SIZESTR_BODY) + 1 + strlen(BINGRAM_BYTEORDER_HEAD) + strlen(BINGRAM_NATURAL_BYTEORDER) + 1 + strlen(str);  if (totallen >= BINGRAM_HDSIZE) {    jlog("Warning: write_bingram: header too long, last will be truncated\n");    i = strlen(str) - (totallen - BINGRAM_HDSIZE);    str[i] = '\0';  }  sprintf(buf, "%s\n%s%s %s%s\n%s", BINGRAM_IDSTR_V5, BINGRAM_SIZESTR_HEAD, BINGRAM_SIZESTR_BODY, BINGRAM_BYTEORDER_HEAD, BINGRAM_NATURAL_BYTEORDER, str);  wrt(fp, buf, 1, BINGRAM_HDSIZE);  return TRUE;}/**  * Write a whole N-gram data in binary format. *  * @param fp [in] file pointer * @param ndata [in] N-gram data to write * @param headerstr [in] user header string *  * @return TRUE on success, FALSE on failure */booleanngram_write_bin(FILE *fp, NGRAM_INFO *ndata, char *headerstr){  int i,n;  unsigned int len;  int wlen;  NGRAM_TUPLE_INFO *t;  reset_wrt_counter();  /* write initial header */  if (write_header(fp, headerstr) == FALSE) return FALSE;  /* swap not needed any more */  need_swap = FALSE;  /* write some header info */  wrt(fp, &(ndata->n), sizeof(int), 1);  wrt(fp, &(ndata->dir), sizeof(int), 1);  wrt(fp, &(ndata->bigram_index_reversed), sizeof(boolean), 1);  /* write total info */  for(n=0;n<ndata->n;n++) {    wrt(fp, &(ndata->d[n].totalnum), sizeof(NNID), 1);    /*jlog("ngram %d=%d\n",n+1,ndata->ngram_num[n]);*/  }  /* unk_*, isopen, max_word_num are set after read, so need not save */  /* write wname */  wlen = 0;  for(i=0;i<ndata->max_word_num;i++) {    wlen += strlen(ndata->wname[i]) + 1;  }  wrt(fp, &wlen, sizeof(int), 1);  for(i=0;i<ndata->max_word_num;i++) {    wrt(fp, ndata->wname[i], 1, strlen(ndata->wname[i]) + 1); /* include \0 */  }  /* write N-gram */  for(n=0;n<ndata->n;n++) {    t = &(ndata->d[n]);    wrt(fp, &(t->is24bit), sizeof(boolean), 1);    wrt(fp, &(t->ct_compaction), sizeof(boolean), 1);    wrt(fp, &(t->bgnlistlen), sizeof(NNID), 1);    wrt(fp, &(t->context_num), sizeof(NNID), 1);    if (n > 0) {      if (t->is24bit) {	wrt(fp, t->bgn_upper, sizeof(NNID_UPPER), t->bgnlistlen);	wrt(fp, t->bgn_lower, sizeof(NNID_LOWER), t->bgnlistlen);      } else {	wrt(fp, t->bgn, sizeof(NNID), t->bgnlistlen);      }      wrt(fp, t->num, sizeof(WORD_ID), t->bgnlistlen);      wrt(fp, t->nnid2wid, sizeof(WORD_ID), t->totalnum);    }    wrt(fp, t->prob, sizeof(LOGPROB), t->totalnum);    if (t->bo_wt) {      i = 1;      wrt(fp, &i, sizeof(int), 1);      wrt(fp, t->bo_wt, sizeof(LOGPROB), t->context_num);    } else {      i = 0;      wrt(fp, &i, sizeof(int), 1);    }    if (t->nnid2ctid_upper) {      i = 1;      wrt(fp, &i, sizeof(int), 1);      wrt(fp, t->nnid2ctid_upper, sizeof(NNID_UPPER), t->totalnum);      wrt(fp, t->nnid2ctid_lower, sizeof(NNID_LOWER), t->totalnum);    } else {      i = 0;      wrt(fp, &i, sizeof(int), 1);    }  }  /* write additional LR 2-gram */  if (ndata->bo_wt_1) {    i = 1;    wrt(fp, &i, sizeof(int), 1);    wrt(fp, ndata->bo_wt_1, sizeof(LOGPROB), ndata->d[0].context_num);  } else {    i = 0;    wrt(fp, &i, sizeof(int), 1);  }  if (ndata->p_2) {    i = 1;    wrt(fp, &i, sizeof(int), 1);    wrt(fp, ndata->p_2, sizeof(LOGPROB), ndata->d[1].totalnum);  } else {    i = 0;    wrt(fp, &i, sizeof(int), 1);  }  len = get_wrt_counter();  jlog("Stat: ngram_write_bin: wrote %lu bytes (%.1f MB)\n", len, len / 1048576.0);  return TRUE;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -