main_dag.c

来自「CMU大名鼎鼎的SPHINX-3大词汇量连续语音识别系统」· C语言 代码 · 共 664 行 · 第 1/2 页

C
664
字号
/* * Write exact hypothesis.  Format: *   <id> T <scr> A <ascr> L <lscr> {<sf> <wascr> <wlscr> <word>}... <ef> * where: *   scr = ascr + (lscr*lw+N*wip), where N = #words excluding <s> *   ascr = scaled acoustic score for entire utterance *   lscr = LM score (without lw or wip) for entire utterance *   sf = start frame for word *   wascr = scaled acoustic score for word *   wlscr = LM score (without lw or wip) for word *   ef = end frame for utterance. */static void log_hypseg (char *uttid,			FILE *fp,	/* Out: output file */			srch_hyp_t *hypptr,	/* In: Hypothesis */			int32 nfrm)	/* In: #frames in utterance */{    srch_hyp_t *h;    int32 ascr, lscr, tscr;        ascr = lscr = tscr = 0;    for (h = hypptr; h; h = h->next) {	ascr += h->ascr;	if (dict_basewid(dict,h->wid) != startwid) {	    lscr += lm_rawscore (lm,h->lscr, 1.0);	} else {	    assert (h->lscr == 0);	}	tscr += h->ascr + h->lscr;    }    fprintf (fp, "%s T %d A %d L %d", uttid, tscr, ascr, lscr);        if (! hypptr)	/* HACK!! */	fprintf (fp, " (null)\n");    else {	for (h = hypptr; h; h = h->next) {	    lscr = (dict_basewid(dict,h->wid) != startwid) ? lm_rawscore (lm,h->lscr, 1.0) : 0;	    fprintf (fp, " %d %d %d %s", h->sf, h->ascr, lscr, dict_wordstr (dict,h->wid));	}	fprintf (fp, " %d\n", nfrm);    }        fflush (fp);}/* Write hypothesis in old (pre-Nov95) NIST format */static void log_hypstr (FILE *fp, srch_hyp_t *hypptr, char *uttid, int32 scr){    srch_hyp_t *h;    s3wid_t w;    if (! hypptr)	/* HACK!! */	fprintf (fp, "(null)");        for (h = hypptr; h; h = h->next) {	w = dict_basewid (dict,h->wid);	if ((w != startwid) && (w != finishwid) && (! dict_filler_word (dict,w)))	    fprintf (fp, "%s ", dict_wordstr(dict,w));    }    if (scr != 0)	fprintf (fp, " (%s %d)\n", uttid, scr);    else	fprintf (fp, " (%s)\n", uttid);    fflush (fp);}/* Log hypothesis in detail with word segmentations, acoustic and LM scores  */static void log_hyp_detailed (FILE *fp, srch_hyp_t *hypptr, char *uttid, char *LBL, char *lbl){    srch_hyp_t *h;    int32 ascr_norm, lscr;    ascr_norm = 0;    lscr = 0;        fprintf (fp, "%s:%s> %20s %5s %5s %11s %10s\n", LBL, uttid,	     "WORD", "SFrm", "EFrm", "AScr(Norm)", "LMScore");        for (h = hypptr; h; h = h->next) {	fprintf (fp, "%s:%s> %20s %5d %5d %11d %10d\n", lbl, uttid,		 h->word, h->sf, h->ef, h->ascr, h->lscr);	ascr_norm += h->ascr;	lscr += h->lscr;    }    fprintf (fp, "%s:%s> %20s %5s %5s %11d %10d\n", LBL, uttid,	     "TOTAL", "", "", ascr_norm, lscr);}/* Decode the given mfc file and write result to matchfp and matchsegfp */static void decode_utt (char *uttid, FILE *matchfp, FILE *matchsegfp){    char dagfile[1024];    srch_hyp_t *h, *hyp;    char *latdir, *latext;    int32 nfrm, ascr, lscr;    ptmr_reset (&tm_utt);    ptmr_start (&tm_utt);        latdir = (char *) cmd_ln_access ("-inlatdir");    latext = (char *) cmd_ln_access ("-latext");    if (latdir)	sprintf (dagfile, "%s/%s.%s", latdir, uttid, latext);    else	sprintf (dagfile, "%s.%s", uttid, latext);        if ((nfrm = s3dag_dag_load (dagfile)) >= 0) {	hyp = s3dag_dag_search (uttid);	if(hyp!=NULL){	  if ( *((int32 *) cmd_ln_access("-backtrace")) )	    log_hyp_detailed (stdout, hyp, uttid, "BP", "bp");	  	  /* Total scaled acoustic score and LM score */	  ascr = lscr = 0;	  for (h = hyp; h; h = h->next) {	    ascr += h->ascr;	    lscr += h->lscr;	  }	  	  printf ("BSTPTH: ");	  log_hypstr (stdout, hyp, uttid, ascr+lscr);	  	  printf ("BSTXCT: ");	  log_hypseg (uttid, stdout, hyp, nfrm);	  	  lm_cache_stats_dump (lm);	  lm_cache_reset (lm);	}else{	  E_ERROR("DAG search (%s) failed\n", uttid);	  hyp = NULL;	}    } else {	E_ERROR("DAG search (%s) failed\n", uttid);	hyp = NULL;    }        /* Log recognition output to the standard match and matchseg files */    if (matchfp)	log_hypstr (matchfp, hyp, uttid, 0);    if (matchsegfp)	log_hypseg (uttid, matchsegfp, hyp, nfrm);        dag_destroy ();    ptmr_stop (&tm_utt);        printf ("%s: TMR: %5d Frm", uttid, nfrm);    if (nfrm > 0) {	printf (" %6.2f xEl", tm_utt.t_elapsed * 100.0 / nfrm);	printf (" %6.2f xCPU", tm_utt.t_cpu * 100.0 / nfrm);    }    printf ("\n");    fflush (stdout);    tot_nfr += nfrm;}/* Process utterances in the control file (-ctl argument) */static void process_ctlfile ( void ){    FILE *ctlfp, *matchfp, *matchsegfp;    char *ctlfile;    char *matchfile, *matchsegfile;    char line[1024], ctlspec[1024], uttid[1024];    int32 ctloffset, ctlcount;    int32 i, k, sf, ef;        if ((ctlfile = (char *) cmd_ln_access("-ctl")) == NULL)	E_FATAL("No -ctl argument\n");        E_INFO("Processing ctl file %s\n", ctlfile);        if ((ctlfp = fopen (ctlfile, "r")) == NULL)	E_FATAL("fopen(%s,r) failed\n", ctlfile);        if ((matchfile = (char *) cmd_ln_access("-match")) == NULL) {	E_WARN("No -match argument\n");	matchfp = NULL;    } else {	if ((matchfp = fopen (matchfile, "w")) == NULL)	    E_ERROR("fopen(%s,w) failed\n", matchfile);    }        if ((matchsegfile = (char *) cmd_ln_access("-matchseg")) == NULL) {	E_WARN("No -matchseg argument\n");	matchsegfp = NULL;    } else {	if ((matchsegfp = fopen (matchsegfile, "w")) == NULL)	    E_ERROR("fopen(%s,w) failed\n", matchsegfile);    }        ctloffset = *((int32 *) cmd_ln_access("-ctloffset"));    if (! cmd_ln_access("-ctlcount"))	ctlcount = 0x7fffffff;	/* All entries processed if no count specified */    else	ctlcount = *((int32 *) cmd_ln_access("-ctlcount"));    if (ctlcount == 0) {	E_INFO("-ctlcount argument = 0!!\n");	fclose (ctlfp);	return;    }    if (ctloffset > 0)	E_INFO("Skipping %d utterances in the beginning of control file\n",	       ctloffset);    while ((ctloffset > 0) && (fgets(line, sizeof(line), ctlfp) != NULL)) {	if (sscanf (line, "%s", ctlspec) > 0)	    --ctloffset;    }        while ((ctlcount > 0) && (fgets(line, sizeof(line), ctlfp) != NULL)) {	printf ("\n");	E_INFO("Utterance: %s", line);	sf = 0;	ef = (int32)0x7ffffff0;	if ((k = sscanf (line, "%s %d %d %s", ctlspec, &sf, &ef, uttid)) <= 0)	    continue;	    /* Empty line */	if ((k == 2) || ( (k >= 3) && ((sf >= ef) || (sf < 0))) ) {	    E_ERROR("Error in ctlfile spec; skipped\n");	    /* What happens to ctlcount??? */	    continue;	}	if (k < 4) {	    /* Create utt-id from mfc-filename (and sf/ef if specified) */	    for (i = strlen(ctlspec)-1; (i >= 0) && (ctlspec[i] != '/'); --i);	    if (k == 3)		sprintf (uttid, "%s_%d_%d", ctlspec+i+1, sf, ef);	    else		strcpy (uttid, ctlspec+i+1);	}	decode_utt (uttid, matchfp, matchsegfp);	--ctlcount;    }    printf ("\n");    if (fscanf (ctlfp, "%s", line) == 1)	E_INFO("Skipping rest of control file beginning with:\n\t%s\n", line);    if (matchfp)	fclose (matchfp);    if (matchsegfp)	fclose (matchsegfp);    fclose (ctlfp);}int main (int32 argc, char *argv[]){  /*  kb_t kb;      ptmr_t tm;*/  print_appl_info(argv[0]);  cmd_ln_appl_enter(argc,argv,"default.arg",defn);  unlimit ();      if ((cmd_ln_access("-mdef") == NULL) ||      (cmd_ln_access("-dict") == NULL) ||      (cmd_ln_access("-lm") == NULL))    E_FATAL("Missing -mdef, -dict, or -lm argument\n");    /*   * Initialize log(S3-base).  All scores (probs...) computed in log domain to avoid   * underflow.  At the same time, log base = 1.0001 (1+epsilon) to allow log values   * to be maintained in int32 variables without significant loss of precision.   */  if (cmd_ln_access("-logbase") == NULL)    logs3_init (1.0001);  else {    float32 logbase;        logbase = *((float32 *) cmd_ln_access("-logbase"));    if (logbase <= 1.0)      E_FATAL("Illegal log-base: %e; must be > 1.0\n", logbase);    if (logbase > 1.1)      E_WARN("Logbase %e perhaps too large??\n", logbase);    logs3_init ((float64) logbase);  }      /* Read in input databases */  models_init ();    /* Allocate timing object */  ptmr_init(&tm_utt);  tot_nfr = 0;      /* Initialize forward Viterbi search module */  dag_init (dict);  printf ("\n");    process_ctlfile ();  printf ("\n");  printf("TOTAL FRAMES:       %8d\n", tot_nfr);  if (tot_nfr > 0) {    printf("TOTAL CPU TIME:     %11.2f sec, %7.2f xRT\n",	   tm_utt.t_tot_cpu, tm_utt.t_tot_cpu/(tot_nfr*0.01));    printf("TOTAL ELAPSED TIME: %11.2f sec, %7.2f xRT\n",	   tm_utt.t_tot_elapsed, tm_utt.t_tot_elapsed/(tot_nfr*0.01));  }  fflush (stdout);#if (! WIN32)    system ("ps auxwww | grep s3dag");#endif    /* Hack!! To avoid hanging problem under Linux */    cmd_ln_appl_exit();        return 0;}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?