s3_dag.c

来自「CMU大名鼎鼎的SPHINX-3大词汇量连续语音识别系统」· C语言 代码 · 共 943 行 · 第 1/2 页

C
943
字号
    final = k;    /* Read bestsegscore entries */    if ((k = dag_param_read (fp, "BestSegAscr", &lineno)) < 0) {	E_ERROR("BestSegAscr parameter missing\n");	goto load_error;    }    lat = (struct lat_s *) ckd_calloc (k, sizeof(struct lat_s));    frm2lat = (int32 *) ckd_calloc (nfrm+1, sizeof(int32));        j = -1;    for (i = 0; i < k; i++) {	if (fgets (line, 1024, fp) == NULL) {	    E_ERROR("Premature EOF(%s)\n", line);	    goto load_error;	}		lineno++;	if (sscanf (line, "%d %d %d", &seqid, &ef, &ascr) != 3) {	    E_ERROR("Bad line: %s\n", line);	    goto load_error;	}		if ((seqid < 0) || (seqid >= nnode)) {	    E_ERROR("Seqno error: %s\n", line);	    goto load_error;	}		if (ef != j) {	    for (j++; j <= ef; j++)		frm2lat[j] = i;	    --j;	}	lat[i].node = darray[seqid];	lat[i].ef = ef;	lat[i].ascr = ascr;		if ((seqid == final) && (ef == dag.exit.node->lef))	    dag.exit.ascr = ascr;    }    for (j++; j <= nfrm; j++)	frm2lat[j] = k;        /* Read in edges */    while (fgets (line, 1024, fp) != NULL) {	lineno++;	if (line[0] == '#')	    continue;	if ((sscanf (line, "%s%d", wd,&k) == 1) && (strcmp (wd, "Edges") == 0))	    break;    }    k = 0;    while (fgets (line, 1024, fp) != NULL) {	lineno++;	if (sscanf (line, "%d %d %d", &from, &to, &ascr) != 3)	    break;	pd = darray[from];	if (pd->wid == finishwid)	    continue;	d = darray[to];	/* Skip short-lived nodes */	if ((pd == dag.entry.node) || (d == dag.exit.node) ||	    ((d->lef - d->fef >= min_ef_range-1) && (pd->lef - pd->fef >= min_ef_range-1))) {	    if (dag_link (pd, d, ascr, d->sf-1, NULL) < 0) {		E_ERROR ("%s: maxedge limit (%d) exceeded\n", file, maxedge);		goto load_error;	    }	    	    k++;	}    }    if (strcmp (line, "End\n") != 0) {	E_ERROR("Terminating 'End' missing\n");	goto load_error;    }    #if 0    /* Build edges from lattice end-frame scores if no edges input */    if (k == 0) {	E_INFO("No edges in dagfile; using lattice scores\n");	for (d = dag.list; d; d = d->alloc_next) {	    if (d->sf == 0)		assert (d->wid == startwid);	    else if ((d == dag.exit.node) || (d->lef - d->fef >= min_ef_range-1)) {		/* Link from all end points == d->sf-1 to d */		for (l = frm2lat[d->sf-1]; l < frm2lat[d->sf]; l++) {		    pd = lat[l].node;		/* Predecessor DAG node */		    if (pd->wid == finishwid)			continue;		    if ((pd == dag.entry.node) || (pd->lef - pd->fef >= min_ef_range-1)) {			dag_link (pd, d, lat[l].ascr, d->sf-1, NULL);			k++;		    }		}	    }	}    }#endif    fudge = *((int32 *) cmd_ln_access ("-dagfudge"));    if (fudge > 0) {	/* Add "illegal" links that are near misses */	for (d = dag.list; d; d = d->alloc_next) {	    if (d->lef - d->fef < min_ef_range-1)		continue;	    	    /* Links to d from nodes that first ended just when d started */	    for (l = frm2lat[d->sf]; l < frm2lat[d->sf+1]; l++) {		pd = lat[l].node;		/* Predecessor DAG node */		if ((pd->wid != finishwid) && (pd->fef == d->sf) &&		    (pd->lef - pd->fef >= min_ef_range-1)) {		    dag_link (pd, d, lat[l].ascr, d->sf-1, NULL);		    k++;		}	    }	    	    if (fudge < 2)		continue;	    	    /* Links to d from nodes that first ended just BEYOND when d started */	    for (l = frm2lat[d->sf+1]; l < frm2lat[d->sf+2]; l++) {		pd = lat[l].node;		/* Predecessor DAG node */		if ((pd->wid != finishwid) && (pd->fef == d->sf+1) &&		    (pd->lef - pd->fef >= min_ef_range-1)) {		    dag_link (pd, d, lat[l].ascr, d->sf-1, NULL);		    k++;		}	    }	}    }        fclose_comp (fp, ispipe);    ckd_free (darray);    ckd_free (lat);    ckd_free (frm2lat);        /*     * HACK!! Change dag.exit.node wid to finishwid if some other filler word,     * to avoid complications with LM scores at this point.     */    dag.orig_exitwid = dag.exit.node->wid;    if (filler_word(dag.exit.node->wid))	dag.exit.node->wid = finishwid;        /* Add links bypassing filler nodes */    if (dag_remove_filler_nodes () < 0) {	E_ERROR ("%s: maxedge limit (%d) exceeded\n", file, maxedge);	return -1;    }        /* Attach a dummy predecessor link from <<s>,0> to nowhere */    dag_link (NULL, dag.entry.node, 0, -1, NULL);        E_INFO("%5d frames, %6d nodes, %8d edges\n", nfrm, nnode, dag.nlink);        return dag.nfrm;load_error:    E_ERROR("Failed to load %s\n", file);    if (fp)	fclose_comp (fp, ispipe);    if (darray)	ckd_free (darray);    if (lat)	ckd_free (lat);    if (frm2lat)	ckd_free (frm2lat);    return -1;}int32 dag_destroy ( void ){    dagnode_t *d, *nd;    daglink_t *l, *nl;        for (d = dag.list; d; d = nd) {	nd = d->alloc_next;		for (l = d->succlist; l; l = nl) {	    nl = l->next;	    listelem_free ((char *)l, sizeof(daglink_t));	}	for (l = d->predlist; l; l = nl) {	    nl = l->next;	    listelem_free ((char *)l, sizeof(daglink_t));	}	listelem_free ((char *)d, sizeof(dagnode_t));    }    dag.list = NULL;    return 0;}/* * Recursive step in dag_search:  best backward path from src to root beginning with l. * Return value: 0 if successful, -1 otherwise. */static int32 dag_bestpath (			   daglink_t *l,	/* Backward link! */			   dagnode_t *src)	/* Source node for backward link l */{    dagnode_t *d, *pd;    daglink_t *pl;    int32 lscr, score;        assert (! l->pscr_valid);        if ((d = l->node) == NULL) {	/* If no destination at end of l, src is root node.  Recursion termination */	assert (dict_basewid(dict,src->wid) == startwid);	l->lscr = 0;	l->pscr = 0;	l->pscr_valid = 1;	l->history = NULL;		return 0;    }        /* Search all predecessor links of l */    for (pl = d->predlist; pl; pl = pl->next) {	pd = pl->node;	if (pd && filler_word (pd->wid))	/* Skip filler node */	    continue;	/* Evaluate best path along pl if not yet evaluated (recursive step) */	if (! pl->pscr_valid)	    if (dag_bestpath (pl, d) < 0)		return -1;		/* Accumulated path score along pl->l */	if (pl->pscr > (int32)0x80000000) {	    score = pl->pscr + l->ascr;	    if (score > l->pscr) {	/* rkm: Added 20-Nov-1996 */		if (pd)		    lscr = lm_tg_score (lm,					dict2lmwid[dict_basewid(dict,pd->wid)],					dict2lmwid[dict_basewid(dict,d->wid)],					dict2lmwid[dict_basewid(dict,src->wid)],					src->wid);		else		    lscr = lm_bg_score (lm,					dict2lmwid[dict_basewid(dict,d->wid)], 					dict2lmwid[dict_basewid(dict,src->wid)],					src->wid);		score += lscr;		if (lmop++ >= maxlmop)		    return -1;		/* Update best path and score beginning with l */		if (score > l->pscr) {		    l->lscr = lscr;		    l->pscr = score;		    l->history = pl;		}	    }	}    }    #if 0    printf ("%s,%d -> %s,%d = %d\n",	    dict_wordstr (dict,d->wid), d->sf,	    dict_wordstr (dict,src->wid), src->sf,	    l->pscr);#endif    l->pscr_valid = 1;    return 0;}/* * Recursive backtrace through DAG (from final node to root) using daglink_t.history. * Restore bypassed links during backtrace. */static srch_hyp_t *dag_backtrace (daglink_t *l){    srch_hyp_t *h, *hhead, *htail;    int32 pscr;    dagnode_t *src, *dst;    daglink_t *bl, *hist;        hyp = NULL;    dst = NULL;        for (; l; l = hist) {	hist = l->history;		if (hyp)	    hyp->lscr = l->lscr;	/* As lscr actually applies to successor node */		if (! l->node) {	    assert (! l->history);	    break;	}		if (! l->bypass) {	    /* Link did not bypass any filler node */	    h = (srch_hyp_t *) listelem_alloc (sizeof(srch_hyp_t));	    h->wid = l->node->wid;	    h->word = dict_wordstr (dict,h->wid);	    h->sf = l->node->sf;	    h->ef = l->ef;	    h->ascr = l->ascr;	    	    h->next = hyp;	    hyp = h;	} else {	    /* Link bypassed one or more filler nodes; restore bypassed link seq. */	    hhead = htail = NULL;	    	    src = l->node;	/* Note that l is a PREDECESSOR link */	    for (; l; l = l->bypass) {		h = (srch_hyp_t *) listelem_alloc (sizeof(srch_hyp_t));		h->wid = src->wid;		h->word = dict_wordstr (dict,h->wid);		h->sf = src->sf;		if (hhead)		    h->lscr = fillpen (fpen,dict_basewid (dict,src->wid));				if (l->bypass) {		    dst = l->bypass->src;		    assert (filler_word (dst->wid));		    bl = find_succlink (src, dst);		    assert (bl);		} else		    bl = l;		h->ef = bl->ef;		h->ascr = bl->ascr;		if (htail)		    htail->next = h;		else		    hhead = h;		htail = h;		src = dst;	    }	    htail->next = hyp;	    hyp = hhead;	}    }    /* Compute path score for each node in hypothesis */    pscr = 0;    for (h = hyp; h; h = h->next) {	pscr = pscr + h->lscr + h->ascr;	h->pscr = pscr;    }        return hyp;}static int32 dag_chk_linkscr (dag_t *dag){    dagnode_t *d;    daglink_t *l;        for (d = dag->list; d; d = d->alloc_next) {	for (l = d->succlist; l; l = l->next) {	  /*	  E_INFO("l->ascr %d\n",l->ascr); */	  /* 20040909: I change this from >= to > because s3.5 sometimes generate lattice which has 0 as the beginning node of the lattice.  This should be regarded as temporary change*/	  if (l->ascr > 0){		return -1;	  }	}    }    return 0;}/* * Final global best path through DAG constructed from the word lattice. * Assumes that the DAG has already been constructed and is consistent with the word * lattice. * The search uses a recursive algorithm to find the best (reverse) path from the final * DAG node to the root:  The best path from any node (beginning with a particular link L) * depends on a similar best path for all links leaving the endpoint of L.  (This is * sufficient to handle trigram LMs.) */srch_hyp_t *s3dag_dag_search (char *utt){    daglink_t *l, *bestl;    dagnode_t *d, *final;    int32 bestscore;    int32 k;        /* Free any old hypothesis */    hyp_free ();        /*     * Set limit on max LM ops allowed after which utterance is aborted.     * Limit is lesser of absolute max and per frame max.     */    maxlmop = *((int32 *) cmd_ln_access ("-maxlmop"));    k = *((int32 *) cmd_ln_access ("-maxlpf"));    k *= dag.nfrm;    if (maxlmop > k)	maxlmop = k;    lmop = 0;    /* Find the backward link from the final DAG node that has the best path to root */    final = dag.exit.node;    bestscore = (int32) 0x80000000;    bestl = NULL;    /* Check that all edge scores are -ve; error if not so. */    if (dag_chk_linkscr (&dag) < 0){      E_ERROR("Some edges are not negative\n");	return NULL;    }    for (l = final->predlist; l; l = l->next) {	d = l->node;	if (! filler_word (d->wid)) {	/* Ignore filler node */	    /* Best path to root beginning with l */	    if (dag_bestpath (l, final) < 0) {		E_ERROR("%s: Max LM ops (%d) exceeded\n", utt, maxlmop);		bestl = NULL;		break;	    }	    if (l->pscr > bestscore) {		bestscore = l->pscr;		bestl = l;	    }	}    }    if (! bestl) {	E_ERROR("Bestpath search failed for %s\n", utt);	return NULL;    }        /*     * At this point bestl is the best (reverse) link/path leaving the final node.  But     * this does not include the acoustic score for the final node itself.  Add it.     */    l = &(dag.exit);    l->history = bestl;    l->pscr += bestl->pscr + l->ascr;    l->ef = dag.nfrm - 1;        /* Backtrack through DAG for best path */    hyp = dag_backtrace (l);    assert(hyp);    if(hyp==NULL){      E_INFO("At this point hyp is NULL\n");    }else{      E_INFO("At this point hyp it is not NULL\n");    }    return (hyp);}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?