main_align.c

来自「CMU大名鼎鼎的SPHINX-3大词汇量连续语音识别系统」· C语言 代码 · 共 1,189 行 · 第 1/3 页

C
1,189
字号
	 */	/* Obtain active senone flags */	ptmr_start (timers+tmr_senone);	align_sen_active (sen_active, sen->n_sen);	/* Turn active flags into list (for faster access) */	if (interp) {	    for (s = 0; s < mdef->n_ci_sen; s++)		sen_active[s] = 1;	}	n_sen_active = 0;	for (s = 0; s < mdef->n_sen; s++) {	    if (sen_active[s])		sen_active[n_sen_active++] = s;	}	ptmr_stop (timers+tmr_senone);		/* Flag all active mixture-gaussian codebooks */	ptmr_start (timers+tmr_gauden);	for (gid = 0; gid < g->n_mgau; gid++)	    mgau_active[gid] = 0;	for (s = 0; s < n_sen_active; s++) {	    sid = sen_active[s];	    mgau_active[sen->mgau[sid]] = 1;	}		/* Compute topn gaussian density values (for active codebooks) */	for (gid = 0; gid < g->n_mgau; gid++)	    if (mgau_active[gid])		gauden_dist (g, gid, topn, fv, dist[gid]);	ptmr_start (timers+tmr_gauden);		/* Evaluate active senones */	ptmr_start (timers+tmr_senone);	best = (int32) 0x80000000;	for (s = 0; s < n_sen_active; s++) {	    sid = sen_active[s];	    senscr[sid] = senone_eval (sen, sid, dist[sen->mgau[sid]], topn);	    if (best < senscr[sid])		best = senscr[sid];	}	if (interp) {	    for (s = 0; s < n_sen_active; s++) {		if ((sid = sen_active[s]) >= mdef->n_ci_sen)		    interp_cd_ci (interp, senscr, sid, mdef->cd2cisen[sid]);	    }	}	/* Normalize senone scores (interpolation above can only lower best score) */	for (s = 0; s < n_sen_active; s++) {	    sid = sen_active[s];	    senscr[sid] -= best;	}	senscale[i] = best;	ptmr_stop (timers+tmr_senone);		/* Step alignment one frame forward */	ptmr_start (timers+tmr_align);	align_frame (senscr);	ptmr_stop (timers+tmr_align);	ptmr_stop (timers+tmr_utt);    }    ptmr_stop (&tm_utt);    printf ("\n");    /* Wind up alignment for this utterance */    if (align_end_utt (&stseg, &phseg, &wdseg) < 0)	E_ERROR("Final state not reached; no alignment for %s\n\n", uttid);    else {	if (s2stsegdir)	    write_s2stseg (s2stsegdir, stseg, uttid, ctlspec);	if (stsegdir)	    write_stseg (stsegdir, stseg, uttid, ctlspec);	if (phsegdir)	    write_phseg (phsegdir, phseg, uttid, ctlspec);	if (wdsegdir)	    write_wdseg (wdsegdir, wdseg, uttid, ctlspec);	if (outsentfp)	    write_outsent (outsentfp, wdseg, uttid);    }        align_destroy_sent_hmm ();        ptmr_print_all (stdout, timers, nfr*0.1);    printf("EXECTIME: %5d frames, %7.2f sec CPU, %6.2f xRT; %7.2f sec elapsed, %6.2f xRT\n",	   nfr,	   tm_utt.t_cpu, tm_utt.t_cpu * 100.0 / nfr,	   tm_utt.t_elapsed, tm_utt.t_elapsed * 100.0 / nfr);    tot_nfr += nfr;}#define UPPER_CASE(c)   ((((c) >= 'a') && ((c) <= 'z')) ? (c-32) : c)/* Case insensitive string compare */static int32 id_cmp (char *str1, char *str2){    char c1, c2;        for (;;) {        c1 = *(str1++);        c1 = UPPER_CASE(c1);        c2 = *(str2++);        c2 = UPPER_CASE(c2);        if (c1 != c2)            return (c1-c2);        if (c1 == '\0')            return 0;    }}/* Process utterances in the control file (ctl argument) */static void process_ctlfile ( void ){    FILE *ctlfp, *sentfp, *mllrctlfp;    char *ctlfile, *cepdir, *cepext, *sentfile, *outsentfile, *mllrctlfile;    char line[1024], ctlspec[1024];    int32 ctloffset, ctlcount, sf, ef, nfr;    char mllrfile[4096], prevmllr[4096], sent[16384];    char uttid[1024];    int32 i, k;        ctlfile = (char *) cmd_ln_access("-ctl");    if ((ctlfp = fopen (ctlfile, "r")) == NULL)	E_FATAL("fopen(%s,r) failed\n", ctlfile);        if ((mllrctlfile = (char *) cmd_ln_access("-mllrctl")) != NULL) {	if ((mllrctlfp = fopen (mllrctlfile, "r")) == NULL)	    E_FATAL("fopen(%s,r) failed\n", mllrctlfile);    } else	mllrctlfp = NULL;    prevmllr[0] = '\0';        sentfile = (char *) cmd_ln_access("-insent");    if ((sentfp = fopen (sentfile, "r")) == NULL)	E_FATAL("fopen(%s,r) failed\n", sentfile);    if ((outsentfile = (char *) cmd_ln_access("-outsent")) != NULL) {	if ((outsentfp = fopen (outsentfile, "w")) == NULL)	    E_FATAL("fopen(%s,r) failed\n", outsentfile);    }        E_INFO("Processing ctl file %s\n", ctlfile);        cepdir = (char *) cmd_ln_access("-cepdir");    cepext = (char *) cmd_ln_access("-cepext");    assert ((cepdir != NULL) && (cepext != NULL));        ctloffset = *((int32 *) cmd_ln_access("-ctloffset"));    if (! cmd_ln_access("-ctlcount"))	ctlcount = 0x7fffffff;	/* All entries processed if no count specified */    else	ctlcount = *((int32 *) cmd_ln_access("-ctlcount"));    if (ctlcount == 0) {	E_INFO("-ctlcount argument = 0!!\n");	fclose (ctlfp);	fclose (sentfp);	if (outsentfp)	    fclose (outsentfp);		return;    }        /* Skipping initial offset */    if (ctloffset > 0)	E_INFO("Skipping %d utterances in the beginning of control file\n",	       ctloffset);    while ((ctloffset > 0) && (fgets(line, sizeof(line), ctlfp) != NULL)) {	if (sscanf (line, "%s", ctlspec) > 0) {	    if (fgets (sent, sizeof(sent), sentfp) == NULL) {		E_ERROR("EOF(%s)\n", sentfile);		ctlcount = 0;		break;	    }	    if (mllrctlfp) {		if (fscanf (mllrctlfp, "%s", mllrfile) != 1)		    E_FATAL ("Unexpected EOF(%s)\n", mllrctlfile);	    }	    --ctloffset;	}    }    /* Process the specified number of utterance or until end of control file */    while ((ctlcount > 0) && (fgets(line, sizeof(line), ctlfp) != NULL)) {	printf ("\n");	E_INFO("Utterance: %s", line);		sf = 0;	ef = (int32)0x7ffffff0;	if ((k = sscanf (line, "%s %d %d %s", ctlspec, &sf, &ef, uttid)) <= 0)	    continue;	    /* Empty line */	if ((k == 2) || ( (k >= 3) && ((sf >= ef) || (sf < 0))) )	    E_FATAL("Bad ctlfile line: %s\n", line);	if (k < 4) {	    /* Create utt-id from mfc-filename (and sf/ef if specified) */	    for (i = strlen(ctlspec)-1; (i >= 0) && (ctlspec[i] != '/'); --i);	    if (k == 3)		sprintf (uttid, "%s_%d_%d", ctlspec+i+1, sf, ef);	    else		strcpy (uttid, ctlspec+i+1);	}	if (mllrctlfp) {	    if (fscanf (mllrctlfp, "%s", mllrfile) != 1)		E_FATAL ("Unexpected EOF(%s)\n", mllrctlfile);	    	    if (strcmp (prevmllr, mllrfile) != 0) {		float32 ***A, **B;		int32 gid, sid;		uint8 *mgau_xform;				gauden_mean_reload (g, (char *) cmd_ln_access("-mean"));				if (ms_mllr_read_regmat (mllrfile, &A, &B,				      fcb->stream_len, feat_n_stream(fcb)) < 0)		    E_FATAL("ms_mllr_read_regmat failed\n");				mgau_xform = (uint8 *) ckd_calloc (g->n_mgau, sizeof(uint8));		/* Transform each non-CI mixture Gaussian */		for (sid = 0; sid < sen->n_sen; sid++) {		    if (mdef->cd2cisen[sid] != sid) {	/* Otherwise it's a CI senone */			gid = sen->mgau[sid];			if (! mgau_xform[gid]) {			    ms_mllr_norm_mgau (g->mean[gid], g->n_density, A, B,					    fcb->stream_len, feat_n_stream(fcb));			    mgau_xform[gid] = 1;			}		    }		}		ckd_free (mgau_xform);				ms_mllr_free_regmat (A, B, fcb->stream_len, feat_n_stream(fcb));		strcpy (prevmllr, mllrfile);	    }	}	/* Read utterance transcript */	if (fgets (sent, sizeof(sent), sentfp) == NULL) {	    E_ERROR("EOF(%s)\n", sentfile);	    break;	}	/* Strip utterance id from the end of the transcript */	for (k = strlen(sent) - 1;	     (k > 0) && ((sent[k] == '\n') || (sent[k] == '\t') || (sent[k] == ' '));	     --k);	if ((k > 0) && (sent[k] == ')')) {	    for (--k; (k >= 0) && (sent[k] != '('); --k);	    if ((k >= 0) && (sent[k] == '(')) {		sent[k] = '\0';		/* Check that uttid in transcript and control file match */		for (i = ++k;		     sent[i] && (sent[i] != ')') &&			 (sent[i] != '\n') && (sent[i] != '\t') && (sent[i] != ' ');		     i++);		sent[i] = '\0';		if (id_cmp (sent+k, uttid) != 0)		    E_ERROR("Uttid mismatch: ctlfile = \"%s\"; transcript = \"%s\"\n",			   uttid, sent+k);	    }	}		if (! feat)	    feat = feat_array_alloc (fcb, S3_MAX_FRAMES);		/* Read and process mfc/feature speech input file */	nfr = feat_s2mfc2feat(fcb, ctlspec, cepdir, cepext, sf, ef, feat, S3_MAX_FRAMES);		if (nfr <= 0){	  E_ERROR("Utt %s: Input file read (%s) with dir (%s) and extension (%s) failed \n", uttid, ctlspec,cepdir, cepext);	}	else {	    E_INFO ("%s: %d input frames\n", uttid, nfr);	    align_utt (sent, nfr, ctlspec, uttid);	}		--ctlcount;    }    printf ("\n");    while (fgets(line, sizeof(line), ctlfp) != NULL) {	if (sscanf (line, "%s", ctlspec) > 0) {	    E_INFO("Skipping rest of control file beginning with:\n\t%s", line);	    break;	}    }    fclose (ctlfp);    fclose (sentfp);    if (outsentfp)	fclose (outsentfp);    if (mllrctlfp)	fclose (mllrctlfp);}intmain (int32 argc, char *argv[]){  /*  kb_t kb;      ptmr_t tm;*/  print_appl_info(argv[0]);  cmd_ln_appl_enter(argc,argv,"default.arg",defn);      unlimit();    if ((cmd_ln_access("-mdef") == NULL) ||      (cmd_ln_access("-mean") == NULL) ||      (cmd_ln_access("-var") == NULL)  ||      (cmd_ln_access("-mixw") == NULL)  ||      (cmd_ln_access("-tmat") == NULL) ||      (cmd_ln_access("-dict") == NULL))    E_FATAL("Missing -mdef, -mean, -var, -mixw, -tmat, or -dict argument\n");        if ((cmd_ln_access("-ctl") == NULL) || (cmd_ln_access("-insent") == NULL))	E_FATAL("Missing -ctl or -insent argument\n");    if ((cmd_ln_access ("-s2stsegdir") == NULL) &&	(cmd_ln_access ("-stsegdir") == NULL) &&	(cmd_ln_access ("-phsegdir") == NULL) &&	(cmd_ln_access ("-wdsegdir") == NULL) &&	(cmd_ln_access ("-outsent") == NULL))	E_FATAL("Missing output file/directory argument(s)\n");        /*     * Initialize log(S3-base).  All scores (probs...) computed in log domain to avoid     * underflow.  At the same time, log base = 1.0001 (1+epsilon) to allow log values     * to be maintained in int32 variables without significant loss of precision.     */    if (cmd_ln_access("-logbase") == NULL)	logs3_init (1.0001);    else {	float32 logbase;	logbase = *((float32 *) cmd_ln_access("-logbase"));	if (logbase <= 1.0)	    E_FATAL("Illegal log-base: %e; must be > 1.0\n", logbase);	if (logbase > 1.1)	    E_WARN("Logbase %e perhaps too large??\n", logbase);	logs3_init ((float64) logbase);    }    /*E_INFO("Log value of 3.785471 is %d\n", log_to_logs3(3.785471)); */    /* Initialize feaure stream type */    fcb = feat_init ( (char *) cmd_ln_access ("-feat"),		      (char *) cmd_ln_access ("-cmn"),		      (char *) cmd_ln_access ("-varnorm"),		      (char *) cmd_ln_access ("-agc"));        /* Read in input databases */    models_init ();        senscale = (int32 *) ckd_calloc (S3_MAX_FRAMES, sizeof(int32));        timers[tmr_utt].name = "U";    timers[tmr_gauden].name = "G";    timers[tmr_senone].name = "S";    timers[tmr_align].name = "A";    /* Initialize align module */    align_init (mdef, tmat, dict);    printf ("\n");        tot_nfr = 0;        process_ctlfile ();    if (tot_nfr > 0) {	printf ("\n");	printf("TOTAL FRAMES:       %8d\n", tot_nfr);	printf("TOTAL CPU TIME:     %11.2f sec, %7.2f xRT\n",	       tm_utt.t_tot_cpu, tm_utt.t_tot_cpu/(tot_nfr*0.01));	printf("TOTAL ELAPSED TIME: %11.2f sec, %7.2f xRT\n",	       tm_utt.t_tot_elapsed, tm_utt.t_tot_elapsed/(tot_nfr*0.01));    }#if (! WIN32)    system ("ps aguxwww | grep s3align");#endif    cmd_ln_appl_exit();    return 0;}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?