⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ngram_read_bin.c

📁 julius version 4.12.about sound recognition.
💻 C
📖 第 1 页 / 共 2 页
字号:
      rdn(fp, t->bo_wt, sizeof(LOGPROB), t->context_num);    } else {      t->bo_wt = NULL;    }    rdn(fp, &i, sizeof(int), 1);    if (i == 1) {      t->nnid2ctid_upper = (NNID_UPPER *)mymalloc_big(sizeof(NNID_UPPER), t->totalnum);      t->nnid2ctid_lower = (NNID_LOWER *)mymalloc_big(sizeof(NNID_LOWER), t->totalnum);      rdn(fp, t->nnid2ctid_upper, sizeof(NNID_UPPER), t->totalnum);      rdn(fp, t->nnid2ctid_lower, sizeof(NNID_LOWER), t->totalnum);    } else {      t->nnid2ctid_upper = NULL;      t->nnid2ctid_lower = NULL;    }  }  rdn(fp, &i, sizeof(int), 1);  if (i == 1) {    ndata->bo_wt_1 = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), ndata->d[0].context_num);    rdn(fp, ndata->bo_wt_1, sizeof(LOGPROB), ndata->d[0].context_num);  } else {    ndata->bo_wt_1 = NULL;  }  rdn(fp, &i, sizeof(int), 1);  if (i == 1) {    ndata->p_2 = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), ndata->d[1].totalnum);    rdn(fp, ndata->p_2, sizeof(LOGPROB), ndata->d[1].totalnum);  } else {    ndata->p_2 = NULL;  }  return TRUE;}static booleanngram_read_bin_compat(FILE *fp, NGRAM_INFO *ndata, int *retry_ret){  int i,n,len;  char *w, *p;  NNID *n3_bgn;  NNID d, ntmp;#ifdef WORDS_INT  unsigned short *buf;#endif  NGRAM_TUPLE_INFO *t, *tt, *ttt;  /* old binary N-gram assumes these types */  ndata->bigram_index_reversed = TRUE;  ndata->n = 3;  ndata->dir = DIR_RL;  /* read total info and set max_word_num */  for(n=0;n<ndata->n;n++) {    rdn(fp, &(ndata->d[n].totalnum), sizeof(NNID), 1);  }  ndata->max_word_num = ndata->d[0].totalnum;  if (file_version == 4) {    rdn(fp, &(ndata->d[1].context_num), sizeof(NNID), 1);  }  for(n=0;n<ndata->n;n++) {    if (n < 2) {      ndata->d[n].is24bit = FALSE;    } else {      if (ndata->d[n].totalnum >= NNID_MAX_24) {	jlog("Warning: ngram_read_bin_compat: num of %d-gram exceeds 24bit, now switch to %dbit index\n", n+1, sizeof(NNID) * 8);	ndata->d[n].is24bit = FALSE;      } else {	ndata->d[n].is24bit = TRUE;      }    }    ndata->d[n].nnid2ctid_upper = NULL;    ndata->d[n].nnid2ctid_lower = NULL;  }  /* always do back-off compaction for 3-gram and up */  /* mark 2-gram and up */  ndata->d[0].ct_compaction = FALSE;  for(n=1;n<ndata->n;n++) {    ndata->d[n].ct_compaction = TRUE;  }  /* read wname */  rdn(fp, &len, sizeof(int), 1);  w = mymalloc(len);  rdn(fp, w, 1, len);  /* assign... */  ndata->wname = (char **)mymalloc(sizeof(char *) * ndata->max_word_num);  p = w; i = 0;  while (p < w + len) {    ndata->wname[i++] = p;    while(*p != '\0') p++;    p++;  }  if (i != ndata->max_word_num) {    jlog("Error: ngram_read_bin_compat: wname error??\n");    return FALSE;  }  /* malloc 1-gram */  t = &(ndata->d[0]);  tt = &(ndata->d[1]);  ttt = &(ndata->d[2]);  t->bgn_upper = NULL;  t->bgn_lower = NULL;  t->bgn = NULL;  t->num = NULL;  t->bgnlistlen = 0;  t->nnid2wid = NULL;  t->nnid2ctid_upper = NULL;  t->nnid2ctid_lower = NULL;  t->context_num = t->totalnum;  t->prob = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), t->totalnum);  ndata->bo_wt_1 = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), t->context_num);  t->bo_wt = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), t->context_num);  tt->bgnlistlen = t->context_num;  tt->bgn = (NNID *)mymalloc_big(sizeof(NNID), tt->bgnlistlen);  tt->num = (WORD_ID *)mymalloc_big(sizeof(WORD_ID), tt->bgnlistlen);  /* read 1-gram */  jlog("stat: ngram_read_bin_compat: reading 1-gram\n");  rdn(fp, t->prob, sizeof(LOGPROB), t->totalnum);  rdn(fp, ndata->bo_wt_1, sizeof(LOGPROB), t->context_num);  rdn(fp, t->bo_wt, sizeof(LOGPROB), t->context_num);  rdn(fp, tt->bgn, sizeof(NNID), tt->bgnlistlen);#ifdef WORDS_INT  rdn_wordid(fp, tt->num, tt->bgnlistlen, need_conv);#else  rdn(fp, tt->num, sizeof(WORD_ID), tt->bgnlistlen);#endif#ifdef WORDS_INT  {    /* check if we are wrongly reading word_id=2byte bingram       (if bingram version >= 4, this should not be happen because        header correctly tells the word_id byte size.  This will 	occur only if matches all the conditions below:	- you run Julius with --enable-words-int,	- you use old bingram of version <= 3, and	- you use bingram file converted without --enable-words-int     */    WORD_ID w;    for(w=0;w<ndata->max_word_num;w++) {      if (ndata->d[1].num[w] > ndata->max_word_num) {	if (words_int_retry) {	  jlog("Error: ngram_read_bin_compat: retry failed, wrong bingram format\n");	  return FALSE;	}	jlog("Warning: ngram_read_bin_compat: incorrect data, may be a 2-byte v3 bingram, retry with conversion\n");	free(ndata->wname[0]);	free(ndata->wname);	free(t->prob);	free(ndata->bo_wt_1);	free(t->bo_wt);	free(tt->bgn);	free(tt->num);	myfrewind(fp);	words_int_retry = TRUE;	*retry_ret = 1;	return FALSE;      }    }  }#endif  /* malloc the rest */  tt->nnid2wid = (WORD_ID *)mymalloc_big(sizeof(WORD_ID), tt->totalnum);  tt->prob = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), tt->totalnum);  ndata->p_2 = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), tt->totalnum);  if (file_version == 4) {	/* context compaction and 24bit */    tt->nnid2ctid_upper = (NNID_UPPER *)mymalloc_big(sizeof(NNID_UPPER), tt->totalnum);    tt->nnid2ctid_lower = (NNID_LOWER *)mymalloc_big(sizeof(NNID_LOWER), tt->totalnum);    tt->bo_wt = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), tt->context_num);    ttt->bgnlistlen = tt->context_num;    ttt->bgn_upper = (NNID_UPPER *)mymalloc_big(sizeof(NNID_UPPER), ttt->bgnlistlen);    ttt->bgn_lower = (NNID_LOWER *)mymalloc_big(sizeof(NNID_LOWER), ttt->bgnlistlen);    ttt->num = (WORD_ID *)mymalloc_big(sizeof(WORD_ID), ttt->bgnlistlen);  } else {    tt->context_num = tt->totalnum;    tt->bo_wt = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), tt->context_num);    ttt->bgnlistlen = tt->context_num;    ttt->num = (WORD_ID *)mymalloc_big(sizeof(WORD_ID), ttt->bgnlistlen);    if (ttt->is24bit) {      ttt->bgn_upper = (NNID_UPPER *)mymalloc_big(sizeof(NNID_UPPER), ttt->bgnlistlen);      ttt->bgn_lower = (NNID_LOWER *)mymalloc_big(sizeof(NNID_LOWER), ttt->bgnlistlen);      n3_bgn = (NNID *)mymalloc_big(sizeof(NNID), ttt->bgnlistlen);    } else {      ttt->bgn = (NNID *)mymalloc_big(sizeof(NNID), ttt->bgnlistlen);    }  }        ttt->nnid2wid = (WORD_ID *)mymalloc_big(sizeof(WORD_ID), ttt->totalnum);  ttt->prob = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), ttt->totalnum);  ttt->bo_wt = NULL;    /* read 2-gram*/  jlog("Stat: ngram_read_bin_compat: reading 2-gram\n");#ifdef WORDS_INT  rdn_wordid(fp, tt->nnid2wid, tt->totalnum, need_conv);#else  rdn(fp, tt->nnid2wid, sizeof(WORD_ID), tt->totalnum);#endif  rdn(fp, ndata->p_2, sizeof(LOGPROB), tt->totalnum);  rdn(fp, tt->prob, sizeof(LOGPROB), tt->totalnum);  if (file_version == 4) {    rdn(fp, tt->nnid2ctid_upper, sizeof(NNID_UPPER), tt->totalnum);    rdn(fp, tt->nnid2ctid_lower, sizeof(NNID_LOWER), tt->totalnum);    rdn(fp, tt->bo_wt, sizeof(LOGPROB), tt->context_num);    rdn(fp, ttt->bgn_upper, sizeof(NNID_UPPER), ttt->bgnlistlen);    rdn(fp, ttt->bgn_lower, sizeof(NNID_LOWER), ttt->bgnlistlen);#ifdef WORDS_INT    rdn_wordid(fp, ttt->num, ttt->bgnlistlen, need_conv);#else    rdn(fp, ttt->num, sizeof(WORD_ID), ttt->bgnlistlen);#endif  } else {    rdn(fp, tt->bo_wt, sizeof(LOGPROB), tt->context_num);    if (ttt->is24bit) {      rdn(fp, n3_bgn, sizeof(NNID), ttt->bgnlistlen);      for(d=0;d<ttt->bgnlistlen;d++) {	if (n3_bgn[d] == NNID_INVALID) {	  ttt->bgn_lower[d] = 0;	  ttt->bgn_upper[d] = NNID_INVALID_UPPER;	} else {	  ntmp = n3_bgn[d] & 0xffff;	  ttt->bgn_lower[d] = ntmp;	  ntmp = n3_bgn[d] >> 16;	  ttt->bgn_upper[d] = ntmp;	}      }    } else {      rdn(fp, ttt->bgn, sizeof(NNID), ttt->bgnlistlen);    }#ifdef WORDS_INT    rdn_wordid(fp, ttt->num, ttt->bgnlistlen, need_conv);#else    rdn(fp, ttt->num, sizeof(WORD_ID), ttt->bgnlistlen);#endif  }  /* read 3-gram*/  jlog("Stat: ngram_read_bin_compat: reading 3-gram\n");#ifdef WORDS_INT  rdn_wordid(fp, ttt->nnid2wid, ttt->totalnum, need_conv);#else  rdn(fp, ttt->nnid2wid, sizeof(WORD_ID), ttt->totalnum);#endif  rdn(fp, ttt->prob, sizeof(LOGPROB), ttt->totalnum);  /* compact the 2-gram back-off and 3-gram links */  if (file_version != 4) {    if (ttt->is24bit) {      free(n3_bgn);      if (ngram_compact_context(ndata, 2) == FALSE) return FALSE;    }  }    return TRUE;}/**  * Read a N-gram binary file and store to data. *  * @param fp [in] file pointer * @param ndata [out] N-gram data to store the read data *  * @return TRUE on success, FALSE on failure. */booleanngram_read_bin(FILE *fp, NGRAM_INFO *ndata){  int retry;#ifdef WORDS_INT  /* reset retry flag */  words_int_retry = FALSE;  /* when retrying, it restarts from here with words_int_retry = TRUE */ ngram_read_bin_start:#endif    ndata->from_bin = TRUE;  /* check initial header */  if (check_header(fp) == FALSE) return FALSE;  #ifdef WORDS_INT  /* in retry mode, force word_id conversion  */  if (words_int_retry) need_conv = TRUE;#endif  #ifdef WORDS_INT  if (need_conv) jlog("Stat: ngram_read_bin: word-id size conversion enabled\n");#endif  if (file_version <= 4) {    retry = 0;    if (ngram_read_bin_compat(fp, ndata, &retry) == FALSE) {#ifdef WORDS_INT      if (retry == 1) {	goto ngram_read_bin_start;      } else {	return FALSE;      }#else      return FALSE;#endif    }  } else {    if (ngram_read_bin_v5(fp, ndata) == FALSE) return FALSE;  }  /* make word search tree for later lookup */  jlog("Stat: ngram_read_bin: making entry name index\n");  ngram_make_lookup_tree(ndata);  bi_prob_func_set(ndata);  return TRUE;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -