main_align.c
来自「CMU大名鼎鼎的SPHINX-3大词汇量连续语音识别系统」· C语言 代码 · 共 1,189 行 · 第 1/3 页
C
1,189 行
*/ /* Obtain active senone flags */ ptmr_start (timers+tmr_senone); align_sen_active (sen_active, sen->n_sen); /* Turn active flags into list (for faster access) */ if (interp) { for (s = 0; s < mdef->n_ci_sen; s++) sen_active[s] = 1; } n_sen_active = 0; for (s = 0; s < mdef->n_sen; s++) { if (sen_active[s]) sen_active[n_sen_active++] = s; } ptmr_stop (timers+tmr_senone); /* Flag all active mixture-gaussian codebooks */ ptmr_start (timers+tmr_gauden); for (gid = 0; gid < g->n_mgau; gid++) mgau_active[gid] = 0; for (s = 0; s < n_sen_active; s++) { sid = sen_active[s]; mgau_active[sen->mgau[sid]] = 1; } /* Compute topn gaussian density values (for active codebooks) */ for (gid = 0; gid < g->n_mgau; gid++) if (mgau_active[gid]) gauden_dist (g, gid, topn, fv, dist[gid]); ptmr_start (timers+tmr_gauden); /* Evaluate active senones */ ptmr_start (timers+tmr_senone); best = (int32) 0x80000000; for (s = 0; s < n_sen_active; s++) { sid = sen_active[s]; senscr[sid] = senone_eval (sen, sid, dist[sen->mgau[sid]], topn); if (best < senscr[sid]) best = senscr[sid]; } if (interp) { for (s = 0; s < n_sen_active; s++) { if ((sid = sen_active[s]) >= mdef->n_ci_sen) interp_cd_ci (interp, senscr, sid, mdef->cd2cisen[sid]); } } /* Normalize senone scores (interpolation above can only lower best score) */ for (s = 0; s < n_sen_active; s++) { sid = sen_active[s]; senscr[sid] -= best; } senscale[i] = best; ptmr_stop (timers+tmr_senone); /* Step alignment one frame forward */ ptmr_start (timers+tmr_align); align_frame (senscr); ptmr_stop (timers+tmr_align); ptmr_stop (timers+tmr_utt); } ptmr_stop (&tm_utt); printf ("\n"); /* Wind up alignment for this utterance */ if (align_end_utt (&stseg, &phseg, &wdseg) < 0) E_ERROR("Final state not reached; no alignment for %s\n\n", uttid); else { if (s2stsegdir) write_s2stseg (s2stsegdir, stseg, uttid, ctlspec); if (stsegdir) write_stseg (stsegdir, stseg, uttid, ctlspec); if (phsegdir) write_phseg (phsegdir, phseg, uttid, ctlspec); if (wdsegdir) write_wdseg (wdsegdir, wdseg, uttid, ctlspec); if (outsentfp) write_outsent (outsentfp, wdseg, uttid); } align_destroy_sent_hmm (); ptmr_print_all (stdout, timers, nfr*0.1); printf("EXECTIME: %5d frames, %7.2f sec CPU, %6.2f xRT; %7.2f sec elapsed, %6.2f xRT\n", nfr, tm_utt.t_cpu, tm_utt.t_cpu * 100.0 / nfr, tm_utt.t_elapsed, tm_utt.t_elapsed * 100.0 / nfr); tot_nfr += nfr;}#define UPPER_CASE(c) ((((c) >= 'a') && ((c) <= 'z')) ? (c-32) : c)/* Case insensitive string compare */static int32 id_cmp (char *str1, char *str2){ char c1, c2; for (;;) { c1 = *(str1++); c1 = UPPER_CASE(c1); c2 = *(str2++); c2 = UPPER_CASE(c2); if (c1 != c2) return (c1-c2); if (c1 == '\0') return 0; }}/* Process utterances in the control file (ctl argument) */static void process_ctlfile ( void ){ FILE *ctlfp, *sentfp, *mllrctlfp; char *ctlfile, *cepdir, *cepext, *sentfile, *outsentfile, *mllrctlfile; char line[1024], ctlspec[1024]; int32 ctloffset, ctlcount, sf, ef, nfr; char mllrfile[4096], prevmllr[4096], sent[16384]; char uttid[1024]; int32 i, k; ctlfile = (char *) cmd_ln_access("-ctl"); if ((ctlfp = fopen (ctlfile, "r")) == NULL) E_FATAL("fopen(%s,r) failed\n", ctlfile); if ((mllrctlfile = (char *) cmd_ln_access("-mllrctl")) != NULL) { if ((mllrctlfp = fopen (mllrctlfile, "r")) == NULL) E_FATAL("fopen(%s,r) failed\n", mllrctlfile); } else mllrctlfp = NULL; prevmllr[0] = '\0'; sentfile = (char *) cmd_ln_access("-insent"); if ((sentfp = fopen (sentfile, "r")) == NULL) E_FATAL("fopen(%s,r) failed\n", sentfile); if ((outsentfile = (char *) cmd_ln_access("-outsent")) != NULL) { if ((outsentfp = fopen (outsentfile, "w")) == NULL) E_FATAL("fopen(%s,r) failed\n", outsentfile); } E_INFO("Processing ctl file %s\n", ctlfile); cepdir = (char *) cmd_ln_access("-cepdir"); cepext = (char *) cmd_ln_access("-cepext"); assert ((cepdir != NULL) && (cepext != NULL)); ctloffset = *((int32 *) cmd_ln_access("-ctloffset")); if (! cmd_ln_access("-ctlcount")) ctlcount = 0x7fffffff; /* All entries processed if no count specified */ else ctlcount = *((int32 *) cmd_ln_access("-ctlcount")); if (ctlcount == 0) { E_INFO("-ctlcount argument = 0!!\n"); fclose (ctlfp); fclose (sentfp); if (outsentfp) fclose (outsentfp); return; } /* Skipping initial offset */ if (ctloffset > 0) E_INFO("Skipping %d utterances in the beginning of control file\n", ctloffset); while ((ctloffset > 0) && (fgets(line, sizeof(line), ctlfp) != NULL)) { if (sscanf (line, "%s", ctlspec) > 0) { if (fgets (sent, sizeof(sent), sentfp) == NULL) { E_ERROR("EOF(%s)\n", sentfile); ctlcount = 0; break; } if (mllrctlfp) { if (fscanf (mllrctlfp, "%s", mllrfile) != 1) E_FATAL ("Unexpected EOF(%s)\n", mllrctlfile); } --ctloffset; } } /* Process the specified number of utterance or until end of control file */ while ((ctlcount > 0) && (fgets(line, sizeof(line), ctlfp) != NULL)) { printf ("\n"); E_INFO("Utterance: %s", line); sf = 0; ef = (int32)0x7ffffff0; if ((k = sscanf (line, "%s %d %d %s", ctlspec, &sf, &ef, uttid)) <= 0) continue; /* Empty line */ if ((k == 2) || ( (k >= 3) && ((sf >= ef) || (sf < 0))) ) E_FATAL("Bad ctlfile line: %s\n", line); if (k < 4) { /* Create utt-id from mfc-filename (and sf/ef if specified) */ for (i = strlen(ctlspec)-1; (i >= 0) && (ctlspec[i] != '/'); --i); if (k == 3) sprintf (uttid, "%s_%d_%d", ctlspec+i+1, sf, ef); else strcpy (uttid, ctlspec+i+1); } if (mllrctlfp) { if (fscanf (mllrctlfp, "%s", mllrfile) != 1) E_FATAL ("Unexpected EOF(%s)\n", mllrctlfile); if (strcmp (prevmllr, mllrfile) != 0) { float32 ***A, **B; int32 gid, sid; uint8 *mgau_xform; gauden_mean_reload (g, (char *) cmd_ln_access("-mean")); if (ms_mllr_read_regmat (mllrfile, &A, &B, fcb->stream_len, feat_n_stream(fcb)) < 0) E_FATAL("ms_mllr_read_regmat failed\n"); mgau_xform = (uint8 *) ckd_calloc (g->n_mgau, sizeof(uint8)); /* Transform each non-CI mixture Gaussian */ for (sid = 0; sid < sen->n_sen; sid++) { if (mdef->cd2cisen[sid] != sid) { /* Otherwise it's a CI senone */ gid = sen->mgau[sid]; if (! mgau_xform[gid]) { ms_mllr_norm_mgau (g->mean[gid], g->n_density, A, B, fcb->stream_len, feat_n_stream(fcb)); mgau_xform[gid] = 1; } } } ckd_free (mgau_xform); ms_mllr_free_regmat (A, B, fcb->stream_len, feat_n_stream(fcb)); strcpy (prevmllr, mllrfile); } } /* Read utterance transcript */ if (fgets (sent, sizeof(sent), sentfp) == NULL) { E_ERROR("EOF(%s)\n", sentfile); break; } /* Strip utterance id from the end of the transcript */ for (k = strlen(sent) - 1; (k > 0) && ((sent[k] == '\n') || (sent[k] == '\t') || (sent[k] == ' ')); --k); if ((k > 0) && (sent[k] == ')')) { for (--k; (k >= 0) && (sent[k] != '('); --k); if ((k >= 0) && (sent[k] == '(')) { sent[k] = '\0'; /* Check that uttid in transcript and control file match */ for (i = ++k; sent[i] && (sent[i] != ')') && (sent[i] != '\n') && (sent[i] != '\t') && (sent[i] != ' '); i++); sent[i] = '\0'; if (id_cmp (sent+k, uttid) != 0) E_ERROR("Uttid mismatch: ctlfile = \"%s\"; transcript = \"%s\"\n", uttid, sent+k); } } if (! feat) feat = feat_array_alloc (fcb, S3_MAX_FRAMES); /* Read and process mfc/feature speech input file */ nfr = feat_s2mfc2feat(fcb, ctlspec, cepdir, cepext, sf, ef, feat, S3_MAX_FRAMES); if (nfr <= 0){ E_ERROR("Utt %s: Input file read (%s) with dir (%s) and extension (%s) failed \n", uttid, ctlspec,cepdir, cepext); } else { E_INFO ("%s: %d input frames\n", uttid, nfr); align_utt (sent, nfr, ctlspec, uttid); } --ctlcount; } printf ("\n"); while (fgets(line, sizeof(line), ctlfp) != NULL) { if (sscanf (line, "%s", ctlspec) > 0) { E_INFO("Skipping rest of control file beginning with:\n\t%s", line); break; } } fclose (ctlfp); fclose (sentfp); if (outsentfp) fclose (outsentfp); if (mllrctlfp) fclose (mllrctlfp);}intmain (int32 argc, char *argv[]){ /* kb_t kb; ptmr_t tm;*/ print_appl_info(argv[0]); cmd_ln_appl_enter(argc,argv,"default.arg",defn); unlimit(); if ((cmd_ln_access("-mdef") == NULL) || (cmd_ln_access("-mean") == NULL) || (cmd_ln_access("-var") == NULL) || (cmd_ln_access("-mixw") == NULL) || (cmd_ln_access("-tmat") == NULL) || (cmd_ln_access("-dict") == NULL)) E_FATAL("Missing -mdef, -mean, -var, -mixw, -tmat, or -dict argument\n"); if ((cmd_ln_access("-ctl") == NULL) || (cmd_ln_access("-insent") == NULL)) E_FATAL("Missing -ctl or -insent argument\n"); if ((cmd_ln_access ("-s2stsegdir") == NULL) && (cmd_ln_access ("-stsegdir") == NULL) && (cmd_ln_access ("-phsegdir") == NULL) && (cmd_ln_access ("-wdsegdir") == NULL) && (cmd_ln_access ("-outsent") == NULL)) E_FATAL("Missing output file/directory argument(s)\n"); /* * Initialize log(S3-base). All scores (probs...) computed in log domain to avoid * underflow. At the same time, log base = 1.0001 (1+epsilon) to allow log values * to be maintained in int32 variables without significant loss of precision. */ if (cmd_ln_access("-logbase") == NULL) logs3_init (1.0001); else { float32 logbase; logbase = *((float32 *) cmd_ln_access("-logbase")); if (logbase <= 1.0) E_FATAL("Illegal log-base: %e; must be > 1.0\n", logbase); if (logbase > 1.1) E_WARN("Logbase %e perhaps too large??\n", logbase); logs3_init ((float64) logbase); } /*E_INFO("Log value of 3.785471 is %d\n", log_to_logs3(3.785471)); */ /* Initialize feaure stream type */ fcb = feat_init ( (char *) cmd_ln_access ("-feat"), (char *) cmd_ln_access ("-cmn"), (char *) cmd_ln_access ("-varnorm"), (char *) cmd_ln_access ("-agc")); /* Read in input databases */ models_init (); senscale = (int32 *) ckd_calloc (S3_MAX_FRAMES, sizeof(int32)); timers[tmr_utt].name = "U"; timers[tmr_gauden].name = "G"; timers[tmr_senone].name = "S"; timers[tmr_align].name = "A"; /* Initialize align module */ align_init (mdef, tmat, dict); printf ("\n"); tot_nfr = 0; process_ctlfile (); if (tot_nfr > 0) { printf ("\n"); printf("TOTAL FRAMES: %8d\n", tot_nfr); printf("TOTAL CPU TIME: %11.2f sec, %7.2f xRT\n", tm_utt.t_tot_cpu, tm_utt.t_tot_cpu/(tot_nfr*0.01)); printf("TOTAL ELAPSED TIME: %11.2f sec, %7.2f xRT\n", tm_utt.t_tot_elapsed, tm_utt.t_tot_elapsed/(tot_nfr*0.01)); }#if (! WIN32) system ("ps aguxwww | grep s3align");#endif cmd_ln_appl_exit(); return 0;}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?