⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ngram_write_bin.c

📁 about sound recognition.i want to downlod
💻 C
字号:
/** * @file   ngram_write_bin.c * @author Akinobu LEE * @date   Wed Feb 16 17:23:16 2005 *  * <JA> * @brief  N-gramをバイナリ妨及でファイルに今き叫す * * rev.3.5 より·粕み哈みの光庐拉を雇胃して今き叫しのバイトオ〖ダ〖を * Big endian 盖年からマシン巴赂に恃构されたˉまたインデックスの 24bit 步 * および 2-gram のバックオフデ〖タの暗教も乖うなど·ファイル妨及の * 柒婶慌屯が办婶恃构されたˉこれにより·3.5 笆惯の mkbingram で * で栏喇したバイナリN-gramは, 3.4.2笆涟の Julius では蝗えないˉ * (ヘッダチェックでエラ〖となる) * * なお 3.5 笆惯の Julius では骄丸のモデルも啼玛なく粕めるˉこの眷圭, * インデックスの 24bit 步とバックオフの暗教はモデル粕み哈み箕に * その旁刨乖われるˉ * * バイトオ〖ダ〖に簇してヘッダに淡揭することで·粕み哈み箕に冉年して * 粕み哈むˉこれにより·佰なるバイトオ〖ダ〖のマシンで栏喇した * バイナリN-gramでも啼玛なく粕めるˉもちろん骄丸のモデルもそのまま * 粕み哈めるˉ * </JA> *  * <EN> * @brief  Write a whole N-gram data to a file in binary format * * From 3.5, internal format of binary N-gram has changed for using * machine-dependent natural byte order (previously fixed to big endian), * 24bit index and 2-gram backoff compression.  So, binary N-gram * generated by mkbingram of 3.5 and later will not work on 3.4.2 and * earlier versions. * * There is full upward- and cross-machine compatibility in 3.5.  Old * binary N-gram files still can be read directly, in which case the conversion * to 24bit index will performed just after model has been read. * Byte order will also considered by header information, so * binary N-gram still can be used among different machines. * </EN> *  * $Revision: 1.4 $ *  *//* * Copyright (c) 1991-2006 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2006 Julius project team, Nagoya Institute of Technology * All rights reserved */#include <sent/stddefs.h>#include <sent/ngram2.h>static boolean need_swap; ///< TRUE if need byte swap/**  * Binary write function, with byte swapping if needed. *  * @param fp [in] file pointer * @param buf [in] data buffer to write * @param unitbyte [in] unit size in bytes * @param unitnum [in] number of unit to write */static voidwrt(FILE *fp, void *buf, size_t unitbyte, int unitnum){  if (need_swap == TRUE && unitbyte != 1) {    swap_bytes((char *)buf, unitbyte, unitnum);  }  if (myfwrite(buf, unitbyte, unitnum, fp) < (size_t)unitnum) {    perror("write_ngram_bin: wrt");    j_error("write failed\n");  }  if (need_swap == TRUE && unitbyte != 1) {    swap_bytes((char *)buf, unitbyte, unitnum);  }}/**  * Write header information, with identifier string. *  * @param fp [in] file pointer * @param str [in] user header string (any string within BINGRAM_HDSIZE * bytes is allowed) * @param version [in] file format version id */static voidwrite_header(FILE *fp, char *str, int version){  char buf[BINGRAM_HDSIZE];  int i, totallen;  for(i=0;i<BINGRAM_HDSIZE;i++) buf[i] = EOF;  switch(version) {  case 4:    totallen = strlen(BINGRAM_IDSTR_V4) + 1 + strlen(BINGRAM_SIZESTR_HEAD) + strlen(BINGRAM_SIZESTR_BODY) + 1 + strlen(BINGRAM_BYTEORDER_HEAD) + strlen(BINGRAM_NATURAL_BYTEORDER) + 1 + strlen(str);    break;  case 3:    totallen = strlen(BINGRAM_IDSTR) + 1 + strlen(BINGRAM_SIZESTR_HEAD) + strlen(BINGRAM_SIZESTR_BODY) + 1 + strlen(str);    break;  }  if (totallen >= BINGRAM_HDSIZE) {    j_printerr("Warning: user header too long, last will be truncated\n");    i = strlen(str) - (totallen - BINGRAM_HDSIZE);    str[i] = '\0';  }  switch(version) {  case 4:    sprintf(buf, "%s\n%s%s %s%s\n%s", BINGRAM_IDSTR_V4, BINGRAM_SIZESTR_HEAD, BINGRAM_SIZESTR_BODY, BINGRAM_BYTEORDER_HEAD, BINGRAM_NATURAL_BYTEORDER, str);    break;  case 3:    sprintf(buf, "%s\n%s%s\n%s", BINGRAM_IDSTR, BINGRAM_SIZESTR_HEAD, BINGRAM_SIZESTR_BODY, str);    break;  }  wrt(fp, buf, 1, BINGRAM_HDSIZE);}/**  * Write a whole N-gram data in binary format. *  * @param fp [in] file pointer * @param ndata [in] N-gram data to write * @param headerstr [in] user header string *  * @return TRUE on success, FALSE on failure */booleanngram_write_bin(FILE *fp, NGRAM_INFO *ndata, char *headerstr){  int i,n,len;  /* write initial header */  write_header(fp, headerstr, ndata->version);  /* set swap requirement */  if (ndata->version == 4) {    need_swap = FALSE;  } else {#ifdef WORDS_BIGENDIAN    need_swap = FALSE;#else    need_swap = TRUE;#endif  }   /* write total info */  for(n=0;n<MAX_N;n++) {    wrt(fp, &(ndata->ngram_num[n]), sizeof(NNID), 1);    /*j_printf("ngram %d=%d\n",n+1,ndata->ngram_num[n]);*/  }  if (ndata->version == 4) {    wrt(fp, &(ndata->bigram_bo_num), sizeof(NNID), 1);  }  j_printf("wrote total info\n");  /* unk_*, isopen, max_word_num are set after read, so need not save */  /* write wname */  len = 0;  for(i=0;i<ndata->ngram_num[0];i++) {    len += strlen(ndata->wname[i]) + 1;  }  wrt(fp, &len, sizeof(int), 1);  for(i=0;i<ndata->ngram_num[0];i++) {    wrt(fp, ndata->wname[i], 1, strlen(ndata->wname[i]) + 1); /* include \0 */  }  j_printf("wrote wnames (%d bytes)\n", len + sizeof(int));    /* write 1-gram */  wrt(fp, ndata->p, sizeof(LOGPROB), ndata->ngram_num[0]);  wrt(fp, ndata->bo_wt_lr, sizeof(LOGPROB), ndata->ngram_num[0]);  wrt(fp, ndata->bo_wt_rl, sizeof(LOGPROB), ndata->ngram_num[0]);  wrt(fp, ndata->n2_bgn, sizeof(NNID), ndata->ngram_num[0]);  wrt(fp, ndata->n2_num, sizeof(WORD_ID), ndata->ngram_num[0]);  j_printf("wrote 1-gram (%d KB)\n",	   ((sizeof(LOGPROB)*3 + sizeof(NNID) + sizeof(WORD_ID)) * ndata->ngram_num[0]) / 1024);    /* write 2-gram*/  wrt(fp, ndata->n2tonid, sizeof(WORD_ID), ndata->ngram_num[1]);  wrt(fp, ndata->p_lr, sizeof(LOGPROB), ndata->ngram_num[1]);  wrt(fp, ndata->p_rl, sizeof(LOGPROB), ndata->ngram_num[1]);  switch (ndata->version) {  case 4:    wrt(fp, ndata->n2bo_upper, sizeof(NNID_UPPER), ndata->ngram_num[1]);    wrt(fp, ndata->n2bo_lower, sizeof(NNID_LOWER), ndata->ngram_num[1]);    wrt(fp, ndata->bo_wt_rrl, sizeof(LOGPROB), ndata->bigram_bo_num);    wrt(fp, ndata->n3_bgn_upper, sizeof(NNID_UPPER), ndata->bigram_bo_num);    wrt(fp, ndata->n3_bgn_lower, sizeof(NNID_LOWER), ndata->bigram_bo_num);    wrt(fp, ndata->n3_num, sizeof(WORD_ID), ndata->bigram_bo_num);    j_printf("wrote 2-gram (%d KB)\n",	     ((sizeof(LOGPROB)*2 + sizeof(NNID_UPPER) + sizeof(NNID_LOWER) + sizeof(WORD_ID)) * ndata->ngram_num[1] + (sizeof(LOGPROB) + sizeof(NNID_UPPER) + sizeof(NNID_LOWER) + sizeof(WORD_ID)) * ndata->bigram_bo_num) / 1024);    break;  case 3:    wrt(fp, ndata->bo_wt_rrl, sizeof(LOGPROB), ndata->ngram_num[1]);    wrt(fp, ndata->n3_bgn, sizeof(NNID), ndata->ngram_num[1]);    wrt(fp, ndata->n3_num, sizeof(WORD_ID), ndata->ngram_num[1]);    j_printf("wrote 2-gram (%d KB)\n",	     ((sizeof(LOGPROB)*3 + sizeof(NNID) + sizeof(WORD_ID)*2) * ndata->ngram_num[1]) / 1024);    break;  }    /* write 3-gram*/  wrt(fp, ndata->n3tonid, sizeof(WORD_ID), ndata->ngram_num[2]);  wrt(fp, ndata->p_rrl, sizeof(LOGPROB), ndata->ngram_num[2]);  j_printf("wrote 3-gram (%d KB)\n",	   ((sizeof(LOGPROB) + sizeof(WORD_ID)) * ndata->ngram_num[2]) / 1024);  return TRUE;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -