main_align.c

来自「CMU大名鼎鼎的SPHINX-3大词汇量连续语音识别系统」· C语言 代码 · 共 1,189 行 · 第 1/3 页

C
1,189
字号
	    strcpy (buf, dir);	    buf[k-4] = '/';	    strcpy (buf+k-3, ctlspec);	} else	    strcpy (buf, ctlspec);    } else {	strcpy (buf, dir);	buf[k] = '/';	strcpy (buf+k+1, uttid);    }}/* * Write state segmentation in Sphinx-II format.  (Must be written in BIG-ENDIAN * format!) */static void write_s2stseg (char *dir, align_stseg_t *stseg, char *uttid, char *ctlspec){    char filename[1024];    FILE *fp;    align_stseg_t *tmp;    int32 k;    s3cipid_t ci[3];    word_posn_t wpos;    int16 s2_info;    char buf[8];    static int32 byterev = -1;	/* Whether to byte reverse output data */        build_output_uttfile (filename, dir, uttid, ctlspec);    strcat (filename, ".v8_seg");		/* .v8_seg for compatibility */    E_INFO("Writing Sphinx-II format state segmentation to: %s\n", filename);    if ((fp = fopen (filename, "wb")) == NULL) {	E_ERROR("fopen(%s,wb) failed\n", filename);	return;    }    if (byterev < 0) {	/* Byte ordering of host machine unknown; first figure it out */	k = (int32) BYTE_ORDER_MAGIC;	if (fwrite (&k, sizeof(int32), 1, fp) != 1)	    goto write_error;	fclose (fp);	if ((fp = fopen (filename, "rb")) == NULL) {	    E_ERROR ("fopen(%s,rb) failed\n", filename);	    return;	}	if (fread (buf, 1, sizeof(int32), fp) != sizeof(int32)) {	    E_ERROR ("fread(%s) failed\n", filename);	    return;	}	fclose (fp);		/* If buf[0] == lsB of BYTE_ORDER_MAGIC, we are little-endian.  Need to byterev */	byterev = (buf[0] == (BYTE_ORDER_MAGIC & 0x000000ff)) ? 1 : 0;	if ((fp = fopen (filename, "wb")) == NULL) {	    E_ERROR("fopen(%s,wb) failed\n", filename);	    return;	}    }        /* Write #frames */    for (k = 0, tmp = stseg; tmp; k++, tmp = tmp->next);    if (byterev)	SWAP_INT32(&k);    if (fwrite (&k, sizeof(int32), 1, fp) != 1)	goto write_error;        /* Write state info for each frame */    for (; stseg; stseg = stseg->next) {	mdef_phone_components (mdef, stseg->pid, ci, &(ci[1]), &(ci[2]), &wpos);	s2_info = ci[0] * mdef->n_emit_state + stseg->state;	if (stseg->start)	    s2_info |= 0x8000;	if (byterev)	    SWAP_INT16(&s2_info);		if (fwrite (&s2_info, sizeof(int16), 1, fp) != 1)	    goto write_error;    }        fclose (fp);    return;    write_error:    E_ERROR("fwrite(%s) failed\n", filename);    fclose (fp);}/* Write state segmentation output file */static void write_stseg (char *dir, align_stseg_t *stseg, char *uttid, char *ctlspec){    char filename[1024];    FILE *fp;    align_stseg_t *tmp;    int32 i, k;    s3cipid_t ci[3];    uint8 pos;    char *str;    word_posn_t wpos;        build_output_uttfile (filename, dir, uttid, ctlspec);    strcat (filename, ".stseg");    E_INFO("Writing state segmentation to: %s\n", filename);    if ((fp = fopen (filename, "wb")) == NULL) {	E_ERROR("fopen(%s,wb) failed\n", filename);	return;    }        /* Write version no. */    if (fwrite ("0.1\n", sizeof(char), 4, fp) != 4)	goto write_error;    /* Write CI phone names */    for (k = 0; k < mdef->n_ciphone; k++) {        const char *str = mdef_ciphone_str (mdef, k);	if (fwrite (str, sizeof(char), strlen(str), fp) != strlen(str))	    goto write_error;	if (fwrite (" ", sizeof(char), 1, fp) != 1)	    goto write_error;    }    str = WPOS_NAME;    if (fwrite (str, sizeof(char), strlen(str), fp) != strlen(str))	goto write_error;    /* Write format "description" */    str = "\nCI.8 LC.8 RC.8 POS.3(HI)-ST.5(LO) SCR(32)\n";    if (fwrite (str, sizeof(char), strlen(str), fp) != strlen(str))	goto write_error;    /* Write binary comment string */    if (fwrite ("*end_comment*\n", sizeof(char), 14, fp) != 14)	goto write_error;    /* Write byte-ordering magic number */    k = BYTE_ORDER_MAGIC;    if (fwrite (&k, sizeof(int32), 1, fp) != 1)	goto write_error;        /* Write #frames */    for (k = 0, tmp = stseg; tmp; k++, tmp = tmp->next);    if (fwrite (&k, sizeof(int32), 1, fp) != 1)	goto write_error;        /* Write state segmentation for each frame */    for (i = 0; stseg; i++, stseg = stseg->next) {	mdef_phone_components (mdef, stseg->pid, ci, &(ci[1]), &(ci[2]), &wpos);	assert ((wpos >= 0) && (wpos < 8));	assert ((stseg->state >= 0) && (stseg->state < 32));		if (fwrite (ci, sizeof(s3cipid_t), 3, fp) != 3)	    goto write_error;	pos = (wpos << 5) | (stseg->state & 0x001f);	if (fwrite (&pos, sizeof(uint8), 1, fp) != 1)	    goto write_error;	k = stseg->score + senscale[i];	if (fwrite (&k, sizeof(int32), 1, fp) != 1)	    goto write_error;    }        fclose (fp);    return;    write_error:    E_ERROR("fwrite(%s) failed\n", filename);    fclose (fp);}/* Write phone segmentation output file */static void write_phseg (char *dir, align_phseg_t *phseg, char *uttid, char *ctlspec){    char str[1024];    FILE *fp;    int32 uttscr, f, scale;        /* Attempt to write segmentation for this utt to a separate file */    build_output_uttfile (str, dir, uttid, ctlspec);    strcat (str, ".phseg");    E_INFO("Writing phone segmentation to: %s\n", str);    if ((fp = fopen (str, "w")) == NULL) {	E_ERROR("fopen(%s,w) failed\n", str);	fp = stdout;	/* Segmentations can be directed to stdout this way */	E_INFO ("Phone segmentation (%s):\n", uttid);	dir = NULL;	/* Flag to indicate fp shouldn't be closed at the end */    }        if (! dir){	fprintf (fp, "PH:%s>", uttid);	fflush(fp);    }    fprintf (fp, "\t%5s %5s %9s %s\n",	     "SFrm", "EFrm", "SegAScr", "Phone");    fflush(fp);    uttscr = 0;    for (; phseg; phseg = phseg->next) {	mdef_phone_str (mdef, phseg->pid, str);		/* Account for senone score scaling in each frame */	scale = 0;	for (f = phseg->sf; f <= phseg->ef; f++){	    scale += senscale[f];	}		if (! dir){	    fprintf (fp, "ph:%s>", uttid);	    fflush(fp);	}	fprintf (fp, "\t%5d %5d %9d %s\n",		 phseg->sf, phseg->ef, phseg->score + scale, str);	fflush(fp);	uttscr += (phseg->score + scale);    }    if (! dir){	fprintf (fp, "PH:%s>", uttid);	fflush(fp);    }    fprintf (fp, " Total score: %11d\n", uttscr);    fflush(fp);    if (dir)	fclose (fp);    else{	fprintf (fp, "\n");	fflush(fp);    }}/* Write word segmentation output file */static void write_wdseg (char *dir, align_wdseg_t *wdseg, char *uttid, char *ctlspec){    char str[1024];    FILE *fp;    int32 uttscr, f, scale;        /* Attempt to write segmentation for this utt to a separate file */    build_output_uttfile (str, dir, uttid, ctlspec);    strcat (str, ".wdseg");    E_INFO("Writing word segmentation to: %s\n", str);    if ((fp = fopen (str, "w")) == NULL) {	E_ERROR("fopen(%s,w) failed\n", str);	fp = stdout;	/* Segmentations can be directed to stdout this way */	E_INFO ("Word segmentation (%s):\n", uttid);	dir = NULL;	/* Flag to indicate fp shouldn't be closed at the end */    }        if (! dir){	fprintf (fp, "WD:%s>", uttid);	fflush(fp);    }    fprintf (fp, "\t%5s %5s %10s %s\n",	     "SFrm", "EFrm", "SegAScr", "Word");    fflush(fp);    uttscr = 0;    for (; wdseg; wdseg = wdseg->next) {	/* Account for senone score scaling in each frame */	scale = 0;	for (f = wdseg->sf; f <= wdseg->ef; f++)	    scale += senscale[f];	if (! dir){	    fprintf (fp, "wd:%s>", uttid);	    fflush(fp);	}	fprintf (fp, "\t%5d %5d %10d %s\n",		 wdseg->sf, wdseg->ef, wdseg->score + scale, dict_wordstr (dict, wdseg->wid));	fflush(fp);	uttscr += (wdseg->score + scale);    }    if (! dir){	fprintf (fp, "WD:%s>", uttid);	fflush(fp);    }    fprintf (fp, " Total score: %11d\n", uttscr);    fflush(fp);    if (dir)	fclose (fp);    else{	fprintf (fp, "\n");	fflush(fp);    }}/* Write exact transcription (pronunciation and silence/noise words included) */static void write_outsent (FILE *fp, align_wdseg_t *wdseg, char *uttid){    for (; wdseg; wdseg = wdseg->next)	fprintf (fp, "%s ", dict_wordstr (dict, wdseg->wid));    fprintf (fp, " (%s)\n", uttid);    fflush (fp);}/* * Find Viterbi alignment. */static void align_utt (char *sent,	/* In: Reference transcript */		       int32 nfr,	/* In: #frames of input */		       char *ctlspec,	/* In: Utt specifiction from control file */		       char *uttid)	/* In: Utterance id, for logging and other use */{    static int32 w;    static int32 topn;    static gauden_dist_t ***dist;    static int32 *senscr = NULL;    static s3senid_t *sen_active;    static int8 *mgau_active;    static char *s2stsegdir;    static char *stsegdir;    static char *phsegdir;    static char *wdsegdir;        int32 i, s, sid, gid, n_sen_active, best;    char *arg;    align_stseg_t *stseg;    align_phseg_t *phseg;    align_wdseg_t *wdseg;    float32 **fv;        if (! senscr) {	/* One-time allocation of necessary intermediate variables */	/* Allocate space for top-N codeword density values in a codebook */	w = feat_window_size (fcb);	/* #MFC vectors needed on either side of current					   frame to compute one feature vector */	topn = *((int32 *) cmd_ln_access("-topn"));	if (topn > g->n_density) {	    E_ERROR("-topn argument (%d) > #density codewords (%d); set to latter\n",		   topn, g->n_density);	    topn = g->n_density;	}	dist = (gauden_dist_t ***) ckd_calloc_3d (g->n_mgau, g->n_feat, topn,						  sizeof(gauden_dist_t));		/* Space for one frame of senone scores, and per frame active flags */	senscr = (int32 *) ckd_calloc (sen->n_sen, sizeof(int32));	sen_active = (s3senid_t *) ckd_calloc (sen->n_sen, sizeof(s3senid_t));	mgau_active = (int8 *) ckd_calloc (g->n_mgau, sizeof(int8));	/* Note various output directories */	s2stsegdir = NULL;	stsegdir = NULL;	phsegdir = NULL;	wdsegdir = NULL;	if ((arg = (char *) cmd_ln_access ("-s2stsegdir")) != NULL)	    s2stsegdir = (char *) ckd_salloc (arg);	if ((arg = (char *) cmd_ln_access ("-stsegdir")) != NULL)	    stsegdir = (char *) ckd_salloc (arg);	if ((arg = (char *) cmd_ln_access ("-phsegdir")) != NULL)	    phsegdir = (char *) ckd_salloc (arg);	if ((arg = (char *) cmd_ln_access ("-wdsegdir")) != NULL)	    wdsegdir = (char *) ckd_salloc (arg);    }        if (nfr <= (w<<1)) {	E_ERROR("Utterance %s < %d frames (%d); ignored\n", uttid, (w<<1)+1, nfr);	return;    }        ptmr_reset_all (timers);        ptmr_reset (&tm_utt);    ptmr_start (&tm_utt);    ptmr_start (timers+tmr_utt);    if (align_build_sent_hmm (sent) != 0) {	align_destroy_sent_hmm ();	ptmr_stop (timers+tmr_utt);	E_ERROR("No sentence HMM; no alignment for %s\n", uttid);		return;    }        align_start_utt (uttid);        for (i = 0; i < nfr; i++) {	ptmr_start (timers+tmr_utt);	fv = feat[i];		/*	 * Evaluate gaussian density codebooks and senone scores for input codeword.	 * Evaluate only active codebooks and senones.

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?