main_allphone.c
来自「CMU大名鼎鼎的SPHINX-3大词汇量连续语音识别系统」· C语言 代码 · 共 688 行 · 第 1/2 页
C
688 行
sprintf (str, "%s/%s.allp", dir, uttid); E_INFO("Writing phone segmentation to: %s\n", str); if ((fp = fopen (str, "w")) == NULL) { E_ERROR("fopen(%s,w) failed\n", str); dir = NULL; /* Flag to indicate fp shouldn't be closed at the end */ } } if (! dir) { fp = stdout; /* Segmentations can be directed to stdout this way */ E_INFO ("Phone segmentation (%s):\n", uttid); fprintf (fp, "PH:%s>", uttid); fflush(fp); } fprintf (fp, "\t%5s %5s %9s %s\n", "SFrm", "EFrm", "SegAScr", "Phone"); fflush(fp); uttscr = 0; for (; phseg; phseg = phseg->next) { /* Account for senone score scaling in each frame */ scale = 0; for (f = phseg->sf; f <= phseg->ef; f++) scale += senscale[f]; if (! dir){ fprintf (fp, "ph:%s>", uttid); fflush(fp); } fprintf (fp, "\t%5d %5d %9d %s\n", phseg->sf, phseg->ef, phseg->score + scale, mdef_ciphone_str (mdef, phseg->ci)); fflush(fp); uttscr += (phseg->score + scale); } if (! dir){ fprintf (fp, "PH:%s>", uttid); fflush(fp); } fprintf (fp, " Total score: %11d\n", uttscr); fflush(fp); if (dir) fclose (fp); else{ fprintf (fp, "\n"); fflush(fp); }}#define GAUDEN_EVAL_WINDOW 8/* Lists of senones sharing each mixture Gaussian codebook */typedef struct mgau2sen_s { s3senid_t sen; /* Senone shared by this mixture Gaussian */ struct mgau2sen_s *next; /* Next entry in list for this mixture Gaussian */} mgau2sen_t;/* * Find Viterbi allphone decoding. */static void allphone_utt (int32 nfr, char *uttid){ static int32 w; static int32 topn; static gauden_dist_t **dist; /* Density values for one mgau in one frame */ static int32 **senscr = NULL; /* Senone scores for window of frames */ static mgau2sen_t **mgau2sen; /* Senones sharing mixture Gaussian codebooks */ int32 i, j, k, s, gid, best; phseg_t *phseg; mgau2sen_t *m2s; float32 **fv; if (! senscr) { /* One-time allocation of necessary intermediate variables */ /* Allocate space for top-N codeword density values in a codebook */ w = feat_window_size (fcb); /* #MFC vectors needed on either side of current frame to compute one feature vector */ topn = *((int32 *) cmd_ln_access("-topn")); if (topn > g->n_density) { E_WARN("-topn argument (%d) > #density codewords (%d); set to latter\n", topn, g->n_density); topn = g->n_density; } dist = (gauden_dist_t **) ckd_calloc_2d (g->n_feat, topn, sizeof(gauden_dist_t)); /* Space for one frame of senone scores, and per frame active flags */ senscr = (int32 **) ckd_calloc_2d (GAUDEN_EVAL_WINDOW, sen->n_sen, sizeof(int32)); /* Initialize mapping from mixture Gaussian to senones */ mgau2sen = (mgau2sen_t **) ckd_calloc (g->n_mgau, sizeof(mgau2sen_t *)); for (s = 0; s < sen->n_sen; s++) { m2s = (mgau2sen_t *) listelem_alloc (sizeof(mgau2sen_t)); m2s->sen = s; m2s->next = mgau2sen[sen->mgau[s]]; mgau2sen[sen->mgau[s]] = m2s; } } ptmr_reset (&tm_utt); ptmr_reset (&tm_gausen); ptmr_reset (&tm_allphone); if (nfr <= (w<<1)) { E_ERROR("Utterance %s < %d frames (%d); ignored\n", uttid, (w<<1)+1, nfr); return; } ptmr_start (&tm_utt); allphone_start_utt (uttid); for (j = 0; j < nfr; j += GAUDEN_EVAL_WINDOW) { /* Compute Gaussian densities and senone scores for window of frames */ ptmr_start (&tm_gausen); for (gid = 0; gid < g->n_mgau; gid++) { for (i = j, k = 0; (k < GAUDEN_EVAL_WINDOW) && (i < nfr); i++, k++) { fv = feat[i]; /* Evaluate mixture Gaussian densities */ gauden_dist (g, gid, topn, fv, dist); /* Compute senone scores */ if (g->n_mgau > 1) { for (m2s = mgau2sen[gid]; m2s; m2s = m2s->next) { s = m2s->sen; senscr[k][s] = senone_eval (sen, s, dist, topn); } } else { /* Semi-continuous special case; single shared codebook */ senone_eval_all (sen, dist, topn, senscr[k]); } } } /* Find best phone scores for each frame in window */ for (i = j, k = 0; (k < GAUDEN_EVAL_WINDOW) && (i < nfr); i++, k++) {#ifdef INTERP /* Interpolate senones for each frame in window */ if (interp) interp_all (interp, senscr[k], mdef->cd2cisen, mdef->n_ci_sen);#endif /* Normalize senone scores */ best = (int32)0x80000000; for (s = 0; s < sen->n_sen; s++) if (best < senscr[k][s]) best = senscr[k][s]; for (s = 0; s < sen->n_sen; s++) senscr[k][s] -= best; senscale[i] = best; } ptmr_stop (&tm_gausen); /* Step search one frame forward */ ptmr_start (&tm_allphone); for (i = j, k = 0; (k < GAUDEN_EVAL_WINDOW) && (i < nfr); i++, k++) { allphone_frame (senscr[k]); if ((i%10) == 9) { printf ("."); fflush (stdout); } } ptmr_stop (&tm_allphone); } printf ("\n"); phseg = allphone_end_utt (uttid); write_phseg ((char *) cmd_ln_access ("-phsegdir"), uttid, phseg); ptmr_stop (&tm_utt); printf ("%s: TMR:[frm %5d]", uttid, nfr); printf ("[el %6.2fx]", tm_utt.t_elapsed * 100.0 / nfr); printf ("[cpu %6.2fx]", tm_utt.t_cpu * 100.0 / nfr); if (tm_utt.t_cpu > 0.0) { printf ("[gau+sen %6.2fx %2d%%]", tm_gausen.t_cpu * 100.0 / nfr, (int32) ((tm_gausen.t_cpu * 100.0) / tm_utt.t_cpu)); printf ("[srch %6.2fx %2d%%]", tm_allphone.t_cpu * 100.0 / nfr, (int32) ((tm_allphone.t_cpu * 100.0) / tm_utt.t_cpu)); } printf ("\n"); fflush (stdout);}/* Process utterances in the control file (-ctl argument) */static void process_ctlfile ( void ){ FILE *ctlfp; char *ctlfile, *cepdir, *cepext; char line[1024], ctlspec[1024]; int32 ctloffset, ctlcount, sf, ef, nfr; char uttid[1024]; int32 k,i; ctlfile = (char *) cmd_ln_access("-ctl"); if ((ctlfp = fopen (ctlfile, "r")) == NULL) E_FATAL("fopen(%s,r) failed\n", ctlfile); E_INFO("Processing ctl file %s\n", ctlfile); cepdir = (char *) cmd_ln_access("-cepdir"); cepext = (char *) cmd_ln_access("-cepext"); assert ((cepdir != NULL) && (cepext != NULL)); ctloffset = *((int32 *) cmd_ln_access("-ctloffset")); if (! cmd_ln_access("-ctlcount")) ctlcount = 0x7fffffff; /* All entries processed if no count specified */ else ctlcount = *((int32 *) cmd_ln_access("-ctlcount")); if (ctlcount == 0) { E_INFO("-ctlcount argument = 0!!\n"); fclose (ctlfp); return; } /* Skipping initial offset */ if (ctloffset > 0) E_INFO("Skipping %d utterances in the beginning of control file\n", ctloffset); while ((ctloffset > 0) && (fgets(line, sizeof(line), ctlfp) != NULL)) { if (sscanf (line, "%s", ctlspec) > 0) --ctloffset; } /* Process the specified number of utterance or until end of control file */ while ((ctlcount > 0) && (fgets(line, sizeof(line), ctlfp) != NULL)) { printf ("\n"); E_INFO("Utterance: %s", line); sf = 0; ef = (int32)0x7ffffff0; if ((k = sscanf (line, "%s %d %d %s", ctlspec, &sf, &ef, uttid)) <= 0) continue; /* Empty line */ if ((k == 2) || ( (k >= 3) && ((sf >= ef) || (sf < 0))) ) { E_ERROR("Error in ctlfile spec; skipped\n"); /* What happens to ctlcount??? */ continue; } if (k < 4) { /* Create utt-id from mfc-filename (and sf/ef if specified) */ for (i = strlen(ctlspec)-1; (i >= 0) && (ctlspec[i] != '/'); --i); if (k == 3) sprintf (uttid, "%s_%d_%d", ctlspec+i+1, sf, ef); else strcpy (uttid, ctlspec+i+1); } if (! feat) feat = feat_array_alloc (fcb, S3_MAX_FRAMES); nfr = feat_s2mfc2feat(fcb, ctlspec, cepdir, cepext, sf, ef, feat, S3_MAX_FRAMES); if (nfr <= 0){ E_ERROR("Utt %s: Input file read (%s) with dir (%s) and extension (%s) failed \n", uttid, ctlspec,cepdir, cepext); } else { E_INFO ("%s: %d input frames\n", uttid, nfr); allphone_utt (nfr, uttid); } --ctlcount; } printf ("\n"); while (fgets(line, sizeof(line), ctlfp) != NULL) { if (sscanf (line, "%s", ctlspec) > 0) { E_INFO("Skipping rest of control file beginning with:\n\t%s", line); break; } } fclose (ctlfp);}intmain (int32 argc, char *argv[]){ /* kb_t kb; ptmr_t tm;*/ print_appl_info(argv[0]); cmd_ln_appl_enter(argc,argv,"default.arg",defn); unlimit (); /* * Initialize log(S3-base). All scores (probs...) computed in log domain to avoid * underflow. At the same time, log base = 1.0001 (1+epsilon) to allow log values * to be maintained in int32 variables without significant loss of precision. */ { float32 logbase; logbase = *((float32 *) cmd_ln_access("-logbase")); if (logbase <= 1.0) E_FATAL("Illegal log-base: %e; must be > 1.0\n", logbase); if (logbase > 1.1) E_WARN("Logbase %e perhaps too large??\n", logbase); logs3_init ((float64) logbase); } /* Initialize feature stream type */ fcb = feat_init ( (char *) cmd_ln_access ("-feat"), (char *) cmd_ln_access ("-cmn"), (char *) cmd_ln_access ("-varnorm"), (char *) cmd_ln_access ("-agc")); /* Read in input databases */ models_init (); /* Senone scaling factor in each frame */ senscale = (int32 *) ckd_calloc (S3_MAX_FRAMES, sizeof(int32)); /* Initialize allphone decoder module */ allphone_init (mdef, tmat); printf ("\n"); tot_nfr = 0; process_ctlfile (); if (tot_nfr > 0) { printf ("\n"); printf("TOTAL FRAMES: %8d\n", tot_nfr); printf("TOTAL CPU TIME: %11.2f sec, %7.2f xRT\n", tm_utt.t_tot_cpu, tm_utt.t_tot_cpu/(tot_nfr*0.01)); printf("TOTAL ELAPSED TIME: %11.2f sec, %7.2f xRT\n", tm_utt.t_tot_elapsed, tm_utt.t_tot_elapsed/(tot_nfr*0.01)); }#if (! WIN32) system ("ps aguxwww | grep s3allphone");#endif cmd_ln_appl_exit(); return 0;}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?