main_allphone.c

来自「CMU大名鼎鼎的SPHINX-3大词汇量连续语音识别系统」· C语言 代码 · 共 688 行 · 第 1/2 页

C
688
字号
	sprintf (str, "%s/%s.allp", dir, uttid);	E_INFO("Writing phone segmentation to: %s\n", str);	if ((fp = fopen (str, "w")) == NULL) {	    E_ERROR("fopen(%s,w) failed\n", str);	    dir = NULL;	/* Flag to indicate fp shouldn't be closed at the end */	}    }        if (! dir) {	fp = stdout;	/* Segmentations can be directed to stdout this way */	E_INFO ("Phone segmentation (%s):\n", uttid);	fprintf (fp, "PH:%s>", uttid);	fflush(fp);    }        fprintf (fp, "\t%5s %5s %9s %s\n",	     "SFrm", "EFrm", "SegAScr", "Phone");    fflush(fp);    uttscr = 0;    for (; phseg; phseg = phseg->next) {	/* Account for senone score scaling in each frame */	scale = 0;	for (f = phseg->sf; f <= phseg->ef; f++)	    scale += senscale[f];		if (! dir){	    fprintf (fp, "ph:%s>", uttid);	    fflush(fp);	}	fprintf (fp, "\t%5d %5d %9d %s\n",		 phseg->sf, phseg->ef, phseg->score + scale,		 mdef_ciphone_str (mdef, phseg->ci));	fflush(fp);	uttscr += (phseg->score + scale);    }    if (! dir){	fprintf (fp, "PH:%s>", uttid);	fflush(fp);    }    fprintf (fp, " Total score: %11d\n", uttscr);    fflush(fp);    if (dir)	fclose (fp);    else{	fprintf (fp, "\n");	fflush(fp);    }}#define GAUDEN_EVAL_WINDOW	8/* Lists of senones sharing each mixture Gaussian codebook */typedef struct mgau2sen_s {    s3senid_t sen;		/* Senone shared by this mixture Gaussian */    struct mgau2sen_s *next;	/* Next entry in list for this mixture Gaussian */} mgau2sen_t;/* * Find Viterbi allphone decoding. */static void allphone_utt (int32 nfr, char *uttid){    static int32 w;    static int32 topn;    static gauden_dist_t **dist;	/* Density values for one mgau in one frame */    static int32 **senscr = NULL;	/* Senone scores for window of frames */    static mgau2sen_t **mgau2sen;	/* Senones sharing mixture Gaussian codebooks */    int32 i, j, k, s, gid, best;    phseg_t *phseg;    mgau2sen_t *m2s;    float32 **fv;    if (! senscr) {	/* One-time allocation of necessary intermediate variables */		/* Allocate space for top-N codeword density values in a codebook */	w = feat_window_size (fcb);	/* #MFC vectors needed on either side of current					   frame to compute one feature vector */	topn = *((int32 *) cmd_ln_access("-topn"));	if (topn > g->n_density) {	    E_WARN("-topn argument (%d) > #density codewords (%d); set to latter\n",		   topn, g->n_density);	    topn = g->n_density;	}	dist = (gauden_dist_t **) ckd_calloc_2d (g->n_feat, topn, sizeof(gauden_dist_t));		/* Space for one frame of senone scores, and per frame active flags */	senscr = (int32 **) ckd_calloc_2d (GAUDEN_EVAL_WINDOW, sen->n_sen, sizeof(int32));		/* Initialize mapping from mixture Gaussian to senones */	mgau2sen = (mgau2sen_t **) ckd_calloc (g->n_mgau, sizeof(mgau2sen_t *));	for (s = 0; s < sen->n_sen; s++) {	    m2s = (mgau2sen_t *) listelem_alloc (sizeof(mgau2sen_t));	    m2s->sen = s;	    m2s->next = mgau2sen[sen->mgau[s]];	    mgau2sen[sen->mgau[s]] = m2s;	}    }    ptmr_reset (&tm_utt);    ptmr_reset (&tm_gausen);    ptmr_reset (&tm_allphone);    if (nfr <= (w<<1)) {	E_ERROR("Utterance %s < %d frames (%d); ignored\n", uttid, (w<<1)+1, nfr);	return;    }    ptmr_start (&tm_utt);    allphone_start_utt (uttid);    for (j = 0; j < nfr; j += GAUDEN_EVAL_WINDOW) {	/* Compute Gaussian densities and senone scores for window of frames */	ptmr_start (&tm_gausen);	for (gid = 0; gid < g->n_mgau; gid++) {	    for (i = j, k = 0; (k < GAUDEN_EVAL_WINDOW) && (i < nfr); i++, k++) {		fv = feat[i];				/* Evaluate mixture Gaussian densities */		gauden_dist (g, gid, topn, fv, dist);				/* Compute senone scores */		if (g->n_mgau > 1) {		    for (m2s = mgau2sen[gid]; m2s; m2s = m2s->next) {			s = m2s->sen;			senscr[k][s] = senone_eval (sen, s, dist, topn);		    }		} else {		    /* Semi-continuous special case; single shared codebook */		    senone_eval_all (sen, dist, topn, senscr[k]);		}	    }	}		/* Find best phone scores for each frame in window */	for (i = j, k = 0; (k < GAUDEN_EVAL_WINDOW) && (i < nfr); i++, k++) {#ifdef INTERP	  /* Interpolate senones for each frame in window */	  if (interp)	      interp_all (interp, senscr[k], mdef->cd2cisen, mdef->n_ci_sen);#endif	  /* Normalize senone scores */	  best = (int32)0x80000000;	  for (s = 0; s < sen->n_sen; s++)	      if (best < senscr[k][s])		  best = senscr[k][s];	  for (s = 0; s < sen->n_sen; s++)	      senscr[k][s] -= best;	  senscale[i] = best;      }      ptmr_stop (&tm_gausen);      /* Step search one frame forward */      ptmr_start (&tm_allphone);      for (i = j, k = 0; (k < GAUDEN_EVAL_WINDOW) && (i < nfr); i++, k++) {	  allphone_frame (senscr[k]);	  if ((i%10) == 9) {	      printf ("."); fflush (stdout);	  }      }      ptmr_stop (&tm_allphone);  }  printf ("\n");    phseg = allphone_end_utt (uttid);  write_phseg ((char *) cmd_ln_access ("-phsegdir"), uttid, phseg);    ptmr_stop (&tm_utt);    printf ("%s: TMR:[frm %5d]", uttid, nfr);  printf ("[el %6.2fx]", tm_utt.t_elapsed * 100.0 / nfr);  printf ("[cpu %6.2fx]", tm_utt.t_cpu * 100.0 / nfr);  if (tm_utt.t_cpu > 0.0) {      printf ("[gau+sen %6.2fx %2d%%]", tm_gausen.t_cpu * 100.0 / nfr,	      (int32) ((tm_gausen.t_cpu * 100.0) / tm_utt.t_cpu));      printf ("[srch %6.2fx %2d%%]", tm_allphone.t_cpu * 100.0 / nfr,	      (int32) ((tm_allphone.t_cpu * 100.0) / tm_utt.t_cpu));  }  printf ("\n");  fflush (stdout);}/* Process utterances in the control file (-ctl argument) */static void process_ctlfile ( void ){  FILE *ctlfp;  char *ctlfile, *cepdir, *cepext;  char line[1024], ctlspec[1024];  int32 ctloffset, ctlcount, sf, ef, nfr;  char uttid[1024];  int32 k,i;    ctlfile = (char *) cmd_ln_access("-ctl");  if ((ctlfp = fopen (ctlfile, "r")) == NULL)      E_FATAL("fopen(%s,r) failed\n", ctlfile);    E_INFO("Processing ctl file %s\n", ctlfile);    cepdir = (char *) cmd_ln_access("-cepdir");  cepext = (char *) cmd_ln_access("-cepext");  assert ((cepdir != NULL) && (cepext != NULL));    ctloffset = *((int32 *) cmd_ln_access("-ctloffset"));  if (! cmd_ln_access("-ctlcount"))      ctlcount = 0x7fffffff;	/* All entries processed if no count specified */  else      ctlcount = *((int32 *) cmd_ln_access("-ctlcount"));  if (ctlcount == 0) {      E_INFO("-ctlcount argument = 0!!\n");      fclose (ctlfp);      return;  }    /* Skipping initial offset */  if (ctloffset > 0)      E_INFO("Skipping %d utterances in the beginning of control file\n",	     ctloffset);  while ((ctloffset > 0) && (fgets(line, sizeof(line), ctlfp) != NULL)) {      if (sscanf (line, "%s", ctlspec) > 0)	  --ctloffset;  }  /* Process the specified number of utterance or until end of control file */  while ((ctlcount > 0) && (fgets(line, sizeof(line), ctlfp) != NULL)) {      printf ("\n");      E_INFO("Utterance: %s", line);            sf = 0;      ef = (int32)0x7ffffff0;      if ((k = sscanf (line, "%s %d %d %s", ctlspec, &sf, &ef, uttid)) <= 0)	  continue;	    /* Empty line */      if ((k == 2) || ( (k >= 3) && ((sf >= ef) || (sf < 0))) ) {	  E_ERROR("Error in ctlfile spec; skipped\n");	  /* What happens to ctlcount??? */	  continue;      }      if (k < 4) {	/* Create utt-id from mfc-filename (and sf/ef if specified) */	for (i = strlen(ctlspec)-1; (i >= 0) && (ctlspec[i] != '/'); --i);	if (k == 3)	  sprintf (uttid, "%s_%d_%d", ctlspec+i+1, sf, ef);	else	  strcpy (uttid, ctlspec+i+1);      }      if (! feat) 	  feat = feat_array_alloc (fcb, S3_MAX_FRAMES);      nfr = feat_s2mfc2feat(fcb, ctlspec, cepdir, cepext, sf, ef, feat, S3_MAX_FRAMES);      if (nfr <= 0){	  E_ERROR("Utt %s: Input file read (%s) with dir (%s) and extension (%s) failed \n", uttid, ctlspec,cepdir, cepext);      }      else {	  E_INFO ("%s: %d input frames\n", uttid, nfr);	  allphone_utt (nfr, uttid);      }	      --ctlcount;    }    printf ("\n");    while (fgets(line, sizeof(line), ctlfp) != NULL) {	if (sscanf (line, "%s", ctlspec) > 0) {	    E_INFO("Skipping rest of control file beginning with:\n\t%s", line);	    break;	}    }    fclose (ctlfp);}intmain (int32 argc, char *argv[]){    /*  kb_t kb;      ptmr_t tm;*/  print_appl_info(argv[0]);  cmd_ln_appl_enter(argc,argv,"default.arg",defn);  unlimit ();        /*     * Initialize log(S3-base).  All scores (probs...) computed in log domain to avoid     * underflow.  At the same time, log base = 1.0001 (1+epsilon) to allow log values     * to be maintained in int32 variables without significant loss of precision.     */    {	float32 logbase;    	logbase = *((float32 *) cmd_ln_access("-logbase"));	if (logbase <= 1.0)	    E_FATAL("Illegal log-base: %e; must be > 1.0\n", logbase);	if (logbase > 1.1)	    E_WARN("Logbase %e perhaps too large??\n", logbase);	logs3_init ((float64) logbase);    }    /* Initialize feature stream type */    fcb = feat_init ( (char *) cmd_ln_access ("-feat"),		      (char *) cmd_ln_access ("-cmn"),		      (char *) cmd_ln_access ("-varnorm"),		      (char *) cmd_ln_access ("-agc"));        /* Read in input databases */    models_init ();        /* Senone scaling factor in each frame */    senscale = (int32 *) ckd_calloc (S3_MAX_FRAMES, sizeof(int32));        /* Initialize allphone decoder module */    allphone_init (mdef, tmat);    printf ("\n");        tot_nfr = 0;        process_ctlfile ();    if (tot_nfr > 0) {	printf ("\n");	printf("TOTAL FRAMES:       %8d\n", tot_nfr);	printf("TOTAL CPU TIME:     %11.2f sec, %7.2f xRT\n",	       tm_utt.t_tot_cpu, tm_utt.t_tot_cpu/(tot_nfr*0.01));	printf("TOTAL ELAPSED TIME: %11.2f sec, %7.2f xRT\n",	       tm_utt.t_tot_elapsed, tm_utt.t_tot_elapsed/(tot_nfr*0.01));    }#if (! WIN32)    system ("ps aguxwww | grep s3allphone");#endif    cmd_ln_appl_exit();        return 0;}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?