📄 s3_align.c

📁 CMU大名鼎鼎的SPHINX－3大词汇量连续语音识别系统
💻 C
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
#if _DEBUG_ALIGN_    dump_sent_hmm ();	/* For debugging */#endif    k = n_pnode * mdef->n_emit_state;    if (k > active_list_size) {	/* Need to grow active list arrays */	if (active_list_size > 0) {	    ckd_free (cur_active);	    ckd_free (next_active);	}	for (; active_list_size <= k; active_list_size += ACTIVE_LIST_SIZE_INCR);	cur_active = (snode_t **) ckd_calloc (active_list_size, sizeof(snode_t *));	next_active = (snode_t **) ckd_calloc (active_list_size, sizeof(snode_t *));    }        return 0;}int32 align_destroy_sent_hmm ( void ){    pnode_t *p;        destroy_state_dag ();        for (p = pnode_list; p; p = p->alloc_next) {	plinks_free (p->succlist);	plinks_free (p->predlist);    }    pnodes_free ();        plinks_free (phead.succlist);    plinks_free (ptail.predlist);        return 0;}static history_t *lat_entry (snode_t *s){    history_t *h;        h = (history_t *) listelem_alloc (sizeof(history_t));    h->snode = s;    h->score = s->newscore;    h->pred = s->newhist;        h->alloc_next = hist_head;    hist_head = h;        return h;}static void activate (snode_t *s, int32 frm){    if (s->active_frm != frm) {	assert (s->active_frm < frm);	s->active_frm = frm;	next_active[n_active++] = s;    }}/** * Flag the active senones. */void align_sen_active (s3senid_t *senlist, int32 n_sen){    int32 i, sen;        for (sen = 0; sen < n_sen; sen++)	senlist[sen] = 0;        for (i = 0; cur_active[i]; i++) {	assert (IS_S3SENID(cur_active[i]->sen));	senlist[cur_active[i]->sen] = 1;    }}/** * Start Viterbi alignment using the sentence HMM previously built. * Assumes that each utterance will only be aligned once; state member variables * initialized during sentence HMM building. */int32 align_start_utt (char *uttid){    slink_t *l;        curfrm = 0;    shead.score = 0;    shead.hist = NULL;    hist_head = NULL;        n_active = 0;    for (l = shead.succlist; l; l = l->next) {	assert (l->node->active_frm < 0);	l->node->active_frm = 0;	cur_active[n_active++] = l->node;    }    cur_active[n_active++] = NULL;        return 0;}/** * One frame of Viterbi time alignment. */int32 align_frame (int32 *senscr){    int32 i, scr, tmpbest, bestscore, nf, thresh;    snode_t *s, *ps;    slink_t *l;    history_t *tmphist = NULL;    snode_t **tmpswap;        nf = curfrm + 1;    n_active = 0;    /* For each active state update state score and history */    bestscore = (int32) 0x80000000;    for (i = 0; cur_active[i]; i++) {	s = cur_active[i];	assert (IS_S3SENID(s->sen));	tmpbest = (int32) 0x80000000;	for (l = s->predlist; l; l = l->next) {	    ps = l->node;	    if (ps->active_frm == curfrm) {		scr = ps->score + l->prob;				if (scr > tmpbest) {		    tmpbest = scr;		    tmphist = ps->hist;		}	    }	}	assert (tmpbest > (int32) 0x80000000);		s->newscore = tmpbest + senscr[s->sen];	s->newhist = tmphist;	if (s->newscore > bestscore)	    bestscore = s->newscore;    }        if (bestscore <= S3_LOGPROB_ZERO)	E_ERROR("Bestscore= %d in frame %d\n", bestscore, curfrm);    score_scale[curfrm] = bestscore;    thresh = bestscore + beam;        /* Update history lattice for each active state */    for (i = 0; cur_active[i]; i++) {	s = cur_active[i];	if (s->newscore >= thresh) {	    s->newscore -= bestscore;	/* Scale, to avoid underflow */	    s->score = s->newscore;	    s->hist = lat_entry (s);	    activate (s, nf);	    /* Also activate successor nodes of s as they are reachable next frame */	    for (l = s->succlist; l; l = l->next) {		if (IS_S3SENID(l->node->sen))		    activate (l->node, nf);	    }	} else {	    s->score = S3_LOGPROB_ZERO;	    s->hist = NULL;	}    }    /* Update active state list */    next_active[n_active] = NULL;    tmpswap = cur_active;    cur_active = next_active;    next_active = tmpswap;    curfrm = nf;    return 0;}static void build_stseg (history_t *rooth){    history_t *h, *prevh;    align_stseg_t *stseg, *tail = NULL;    int32 f, prevscr;        assert (align_stseg == NULL);    prevscr = 0;    prevh = NULL;    for (f = 0, h = rooth; h; h = h->pred, f++) {	stseg = (align_stseg_t *) listelem_alloc (sizeof(align_stseg_t));	if (! align_stseg)	    align_stseg = stseg;	else	    tail->next = stseg;	tail = stseg;	stseg->next = NULL;		stseg->pid = h->snode->pnode->pid;	stseg->state = h->snode->state;	stseg->start = ((! prevh) || (prevh->snode->pnode->id != h->snode->pnode->id));	stseg->score = h->score - prevscr + score_scale[f];	stseg->bsdiff = h->score;	prevscr = h->score;	prevh = h;    }}static void build_phseg (history_t *rooth){    history_t *h, *nh;    align_phseg_t *phseg, *tail = NULL;    int32 f, prevf, prevscr, scale, bsdiff;        assert (align_phseg == NULL);    prevscr = 0;    bsdiff = 0;    scale = 0;    prevf = -1;        for (f = 0, h = rooth; h; h = h->pred, f++) {	bsdiff += h->score;	scale += score_scale[f];		nh = h->pred;	if ((! nh) || (nh->snode->pnode->id != h->snode->pnode->id)) {	    phseg = (align_phseg_t *) listelem_alloc (sizeof(align_phseg_t));	    if (! align_phseg)		align_phseg = phseg;	    else		tail->next = phseg;	    tail = phseg;	    phseg->next = NULL;	    	    phseg->pid = h->snode->pnode->pid;	    phseg->sf = prevf+1;	    phseg->ef = f;	    phseg->score = h->score - prevscr + scale,	    phseg->bsdiff = bsdiff;	    	    bsdiff = 0;	    scale = 0;	    prevscr = h->score;	    prevf = f;	}    }}static void build_wdseg (history_t *rooth){    history_t *h, *nh;    align_wdseg_t *wdseg, *tail = NULL;    int32 f, prevf, prevscr, scale, bsdiff;        assert (align_wdseg == NULL);    prevscr = 0;    bsdiff = 0;    scale = 0;    prevf = -1;        for (f = 0, h = rooth; h; h = h->pred, f++) {	bsdiff += h->score;	scale += score_scale[f];		nh = h->pred;	if ((! nh) ||	    ((nh->snode->pnode->id != h->snode->pnode->id) &&	     (nh->snode->pnode->pos == 0))) {	/* End of current word */	    wdseg = (align_wdseg_t *) listelem_alloc (sizeof(align_wdseg_t));	    if (! align_wdseg)		align_wdseg = wdseg;	    else		tail->next = wdseg;	    tail = wdseg;	    wdseg->next = NULL;	    	    wdseg->wid = h->snode->pnode->wid;	    wdseg->sf = prevf+1;	    wdseg->ef = f;	    wdseg->score = h->score - prevscr + scale,	    wdseg->bsdiff = bsdiff;	    	    bsdiff = 0;	    scale = 0;	    prevscr = h->score;	    prevf = f;	}    }}/** * All frames consumed.  Trace back best Viterbi state sequence and dump it out. */int32 align_end_utt (align_stseg_t **stseg_out,		     align_phseg_t **phseg_out,		     align_wdseg_t **wdseg_out){    slink_t *l;    snode_t *s;    history_t *h, *ph, *nh;    align_stseg_t *stseg;    align_phseg_t *phseg;    align_wdseg_t *wdseg;        /* Free up previous result, if any */    while (align_stseg) {	stseg = align_stseg->next;	listelem_free ((char *) align_stseg, sizeof(align_stseg_t));	align_stseg = stseg;    }    while (align_phseg) {	phseg = align_phseg->next;	listelem_free ((char *) align_phseg, sizeof(align_phseg_t));	align_phseg = phseg;    }    while (align_wdseg) {	wdseg = align_wdseg->next;	listelem_free ((char *) align_wdseg, sizeof(align_wdseg_t));	align_wdseg = wdseg;    }        /* First find best ending history and link to stail */    stail.score = (int32)0x80000000;    stail.hist = NULL;    for (l = stail.predlist; l; l = l->next) {	s = l->node;	if ((s->active_frm == curfrm) && (s->score + l->prob > stail.score)) {	    stail.score = s->score + l->prob;	    stail.hist = s->hist;	}    }        if (stail.hist) {	/* Reverse the best Viterbi path (back trace) so it is forward in time */	nh = NULL;	for (h = stail.hist; h; h = ph) {	    ph = h->pred;	    h->pred = nh;	    nh = h;	}		/* Trace state, phone, and word segmentations */	build_stseg (nh);	build_phseg (nh);	build_wdseg (nh);    }        *stseg_out = align_stseg;    *phseg_out = align_phseg;    *wdseg_out = align_wdseg;        /* delete history list */    while (hist_head) {	h = hist_head->alloc_next;	listelem_free ((char *) hist_head, sizeof(history_t));	hist_head = h;    }    return (stail.hist ? 0 : -1);}int32 align_init ( mdef_t *_mdef, tmat_t *_tmat, dict_t *_dict){    int32 k;    s3wid_t w;    float64 *f64arg;        mdef = _mdef;    tmat = _tmat;    dict = _dict;        assert (mdef && tmat && dict);        startwid = dict_wordid (dict, S3_START_WORD);    finishwid = dict_wordid (dict, S3_FINISH_WORD);    silwid = dict_wordid (dict, S3_SILENCE_WORD);        if ((NOT_S3WID(startwid)) || (NOT_S3WID(finishwid)))	E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD);    if (NOT_S3WID(silwid))	E_ERROR("%s not in dictionary; no optional silence inserted between words\n",	       S3_SILENCE_WORD);    /* Create list of optional filler words to be inserted between transcript words */    fillwid = (s3wid_t *) ckd_calloc ((dict->filler_end - dict->filler_start + 3),				      sizeof(s3wid_t));    k = 0;    if (IS_S3WID(silwid))	fillwid[k++] = silwid;    for (w = dict->filler_start; w <= dict->filler_end; w++) {	if ((dict_basewid (dict, w) == w) &&	    (w != silwid) && (w != startwid) && (w != finishwid))	    fillwid[k++] = w;    }    fillwid[k] = BAD_S3WID;    f64arg = (float64 *) cmd_ln_access ("-beam");    beam = logs3 (*f64arg);    E_INFO ("logs3(beam)= %d\n", beam);    score_scale = (int32 *) ckd_calloc (S3_MAX_FRAMES, sizeof(int32));        hist_head = NULL;        align_stseg = NULL;    align_phseg = NULL;    align_wdseg = NULL;    return 0;}
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -