main_align.c
来自「CMU大名鼎鼎的SPHINX-3大词汇量连续语音识别系统」· C语言 代码 · 共 1,189 行 · 第 1/3 页
C
1,189 行
strcpy (buf, dir); buf[k-4] = '/'; strcpy (buf+k-3, ctlspec); } else strcpy (buf, ctlspec); } else { strcpy (buf, dir); buf[k] = '/'; strcpy (buf+k+1, uttid); }}/* * Write state segmentation in Sphinx-II format. (Must be written in BIG-ENDIAN * format!) */static void write_s2stseg (char *dir, align_stseg_t *stseg, char *uttid, char *ctlspec){ char filename[1024]; FILE *fp; align_stseg_t *tmp; int32 k; s3cipid_t ci[3]; word_posn_t wpos; int16 s2_info; char buf[8]; static int32 byterev = -1; /* Whether to byte reverse output data */ build_output_uttfile (filename, dir, uttid, ctlspec); strcat (filename, ".v8_seg"); /* .v8_seg for compatibility */ E_INFO("Writing Sphinx-II format state segmentation to: %s\n", filename); if ((fp = fopen (filename, "wb")) == NULL) { E_ERROR("fopen(%s,wb) failed\n", filename); return; } if (byterev < 0) { /* Byte ordering of host machine unknown; first figure it out */ k = (int32) BYTE_ORDER_MAGIC; if (fwrite (&k, sizeof(int32), 1, fp) != 1) goto write_error; fclose (fp); if ((fp = fopen (filename, "rb")) == NULL) { E_ERROR ("fopen(%s,rb) failed\n", filename); return; } if (fread (buf, 1, sizeof(int32), fp) != sizeof(int32)) { E_ERROR ("fread(%s) failed\n", filename); return; } fclose (fp); /* If buf[0] == lsB of BYTE_ORDER_MAGIC, we are little-endian. Need to byterev */ byterev = (buf[0] == (BYTE_ORDER_MAGIC & 0x000000ff)) ? 1 : 0; if ((fp = fopen (filename, "wb")) == NULL) { E_ERROR("fopen(%s,wb) failed\n", filename); return; } } /* Write #frames */ for (k = 0, tmp = stseg; tmp; k++, tmp = tmp->next); if (byterev) SWAP_INT32(&k); if (fwrite (&k, sizeof(int32), 1, fp) != 1) goto write_error; /* Write state info for each frame */ for (; stseg; stseg = stseg->next) { mdef_phone_components (mdef, stseg->pid, ci, &(ci[1]), &(ci[2]), &wpos); s2_info = ci[0] * mdef->n_emit_state + stseg->state; if (stseg->start) s2_info |= 0x8000; if (byterev) SWAP_INT16(&s2_info); if (fwrite (&s2_info, sizeof(int16), 1, fp) != 1) goto write_error; } fclose (fp); return; write_error: E_ERROR("fwrite(%s) failed\n", filename); fclose (fp);}/* Write state segmentation output file */static void write_stseg (char *dir, align_stseg_t *stseg, char *uttid, char *ctlspec){ char filename[1024]; FILE *fp; align_stseg_t *tmp; int32 i, k; s3cipid_t ci[3]; uint8 pos; char *str; word_posn_t wpos; build_output_uttfile (filename, dir, uttid, ctlspec); strcat (filename, ".stseg"); E_INFO("Writing state segmentation to: %s\n", filename); if ((fp = fopen (filename, "wb")) == NULL) { E_ERROR("fopen(%s,wb) failed\n", filename); return; } /* Write version no. */ if (fwrite ("0.1\n", sizeof(char), 4, fp) != 4) goto write_error; /* Write CI phone names */ for (k = 0; k < mdef->n_ciphone; k++) { const char *str = mdef_ciphone_str (mdef, k); if (fwrite (str, sizeof(char), strlen(str), fp) != strlen(str)) goto write_error; if (fwrite (" ", sizeof(char), 1, fp) != 1) goto write_error; } str = WPOS_NAME; if (fwrite (str, sizeof(char), strlen(str), fp) != strlen(str)) goto write_error; /* Write format "description" */ str = "\nCI.8 LC.8 RC.8 POS.3(HI)-ST.5(LO) SCR(32)\n"; if (fwrite (str, sizeof(char), strlen(str), fp) != strlen(str)) goto write_error; /* Write binary comment string */ if (fwrite ("*end_comment*\n", sizeof(char), 14, fp) != 14) goto write_error; /* Write byte-ordering magic number */ k = BYTE_ORDER_MAGIC; if (fwrite (&k, sizeof(int32), 1, fp) != 1) goto write_error; /* Write #frames */ for (k = 0, tmp = stseg; tmp; k++, tmp = tmp->next); if (fwrite (&k, sizeof(int32), 1, fp) != 1) goto write_error; /* Write state segmentation for each frame */ for (i = 0; stseg; i++, stseg = stseg->next) { mdef_phone_components (mdef, stseg->pid, ci, &(ci[1]), &(ci[2]), &wpos); assert ((wpos >= 0) && (wpos < 8)); assert ((stseg->state >= 0) && (stseg->state < 32)); if (fwrite (ci, sizeof(s3cipid_t), 3, fp) != 3) goto write_error; pos = (wpos << 5) | (stseg->state & 0x001f); if (fwrite (&pos, sizeof(uint8), 1, fp) != 1) goto write_error; k = stseg->score + senscale[i]; if (fwrite (&k, sizeof(int32), 1, fp) != 1) goto write_error; } fclose (fp); return; write_error: E_ERROR("fwrite(%s) failed\n", filename); fclose (fp);}/* Write phone segmentation output file */static void write_phseg (char *dir, align_phseg_t *phseg, char *uttid, char *ctlspec){ char str[1024]; FILE *fp; int32 uttscr, f, scale; /* Attempt to write segmentation for this utt to a separate file */ build_output_uttfile (str, dir, uttid, ctlspec); strcat (str, ".phseg"); E_INFO("Writing phone segmentation to: %s\n", str); if ((fp = fopen (str, "w")) == NULL) { E_ERROR("fopen(%s,w) failed\n", str); fp = stdout; /* Segmentations can be directed to stdout this way */ E_INFO ("Phone segmentation (%s):\n", uttid); dir = NULL; /* Flag to indicate fp shouldn't be closed at the end */ } if (! dir){ fprintf (fp, "PH:%s>", uttid); fflush(fp); } fprintf (fp, "\t%5s %5s %9s %s\n", "SFrm", "EFrm", "SegAScr", "Phone"); fflush(fp); uttscr = 0; for (; phseg; phseg = phseg->next) { mdef_phone_str (mdef, phseg->pid, str); /* Account for senone score scaling in each frame */ scale = 0; for (f = phseg->sf; f <= phseg->ef; f++){ scale += senscale[f]; } if (! dir){ fprintf (fp, "ph:%s>", uttid); fflush(fp); } fprintf (fp, "\t%5d %5d %9d %s\n", phseg->sf, phseg->ef, phseg->score + scale, str); fflush(fp); uttscr += (phseg->score + scale); } if (! dir){ fprintf (fp, "PH:%s>", uttid); fflush(fp); } fprintf (fp, " Total score: %11d\n", uttscr); fflush(fp); if (dir) fclose (fp); else{ fprintf (fp, "\n"); fflush(fp); }}/* Write word segmentation output file */static void write_wdseg (char *dir, align_wdseg_t *wdseg, char *uttid, char *ctlspec){ char str[1024]; FILE *fp; int32 uttscr, f, scale; /* Attempt to write segmentation for this utt to a separate file */ build_output_uttfile (str, dir, uttid, ctlspec); strcat (str, ".wdseg"); E_INFO("Writing word segmentation to: %s\n", str); if ((fp = fopen (str, "w")) == NULL) { E_ERROR("fopen(%s,w) failed\n", str); fp = stdout; /* Segmentations can be directed to stdout this way */ E_INFO ("Word segmentation (%s):\n", uttid); dir = NULL; /* Flag to indicate fp shouldn't be closed at the end */ } if (! dir){ fprintf (fp, "WD:%s>", uttid); fflush(fp); } fprintf (fp, "\t%5s %5s %10s %s\n", "SFrm", "EFrm", "SegAScr", "Word"); fflush(fp); uttscr = 0; for (; wdseg; wdseg = wdseg->next) { /* Account for senone score scaling in each frame */ scale = 0; for (f = wdseg->sf; f <= wdseg->ef; f++) scale += senscale[f]; if (! dir){ fprintf (fp, "wd:%s>", uttid); fflush(fp); } fprintf (fp, "\t%5d %5d %10d %s\n", wdseg->sf, wdseg->ef, wdseg->score + scale, dict_wordstr (dict, wdseg->wid)); fflush(fp); uttscr += (wdseg->score + scale); } if (! dir){ fprintf (fp, "WD:%s>", uttid); fflush(fp); } fprintf (fp, " Total score: %11d\n", uttscr); fflush(fp); if (dir) fclose (fp); else{ fprintf (fp, "\n"); fflush(fp); }}/* Write exact transcription (pronunciation and silence/noise words included) */static void write_outsent (FILE *fp, align_wdseg_t *wdseg, char *uttid){ for (; wdseg; wdseg = wdseg->next) fprintf (fp, "%s ", dict_wordstr (dict, wdseg->wid)); fprintf (fp, " (%s)\n", uttid); fflush (fp);}/* * Find Viterbi alignment. */static void align_utt (char *sent, /* In: Reference transcript */ int32 nfr, /* In: #frames of input */ char *ctlspec, /* In: Utt specifiction from control file */ char *uttid) /* In: Utterance id, for logging and other use */{ static int32 w; static int32 topn; static gauden_dist_t ***dist; static int32 *senscr = NULL; static s3senid_t *sen_active; static int8 *mgau_active; static char *s2stsegdir; static char *stsegdir; static char *phsegdir; static char *wdsegdir; int32 i, s, sid, gid, n_sen_active, best; char *arg; align_stseg_t *stseg; align_phseg_t *phseg; align_wdseg_t *wdseg; float32 **fv; if (! senscr) { /* One-time allocation of necessary intermediate variables */ /* Allocate space for top-N codeword density values in a codebook */ w = feat_window_size (fcb); /* #MFC vectors needed on either side of current frame to compute one feature vector */ topn = *((int32 *) cmd_ln_access("-topn")); if (topn > g->n_density) { E_ERROR("-topn argument (%d) > #density codewords (%d); set to latter\n", topn, g->n_density); topn = g->n_density; } dist = (gauden_dist_t ***) ckd_calloc_3d (g->n_mgau, g->n_feat, topn, sizeof(gauden_dist_t)); /* Space for one frame of senone scores, and per frame active flags */ senscr = (int32 *) ckd_calloc (sen->n_sen, sizeof(int32)); sen_active = (s3senid_t *) ckd_calloc (sen->n_sen, sizeof(s3senid_t)); mgau_active = (int8 *) ckd_calloc (g->n_mgau, sizeof(int8)); /* Note various output directories */ s2stsegdir = NULL; stsegdir = NULL; phsegdir = NULL; wdsegdir = NULL; if ((arg = (char *) cmd_ln_access ("-s2stsegdir")) != NULL) s2stsegdir = (char *) ckd_salloc (arg); if ((arg = (char *) cmd_ln_access ("-stsegdir")) != NULL) stsegdir = (char *) ckd_salloc (arg); if ((arg = (char *) cmd_ln_access ("-phsegdir")) != NULL) phsegdir = (char *) ckd_salloc (arg); if ((arg = (char *) cmd_ln_access ("-wdsegdir")) != NULL) wdsegdir = (char *) ckd_salloc (arg); } if (nfr <= (w<<1)) { E_ERROR("Utterance %s < %d frames (%d); ignored\n", uttid, (w<<1)+1, nfr); return; } ptmr_reset_all (timers); ptmr_reset (&tm_utt); ptmr_start (&tm_utt); ptmr_start (timers+tmr_utt); if (align_build_sent_hmm (sent) != 0) { align_destroy_sent_hmm (); ptmr_stop (timers+tmr_utt); E_ERROR("No sentence HMM; no alignment for %s\n", uttid); return; } align_start_utt (uttid); for (i = 0; i < nfr; i++) { ptmr_start (timers+tmr_utt); fv = feat[i]; /* * Evaluate gaussian density codebooks and senone scores for input codeword. * Evaluate only active codebooks and senones.
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?