s3_astar.c

来自「CMU大名鼎鼎的SPHINX-3大词汇量连续语音识别系统」· C语言 代码 · 共 1,267 行 · 第 1/3 页

C
1,267
字号
        E_INFO("Reading DAG file: %s\n", file);    if ((fp = fopen_compchk (file, &ispipe)) == NULL) {	E_ERROR("fopen_compchk (%s) failed\n", file);	return -1;    }        /* Read and verify logbase (ONE BIG HACK!!) */    if (fgets (line, sizeof(line), fp) == NULL) {	E_ERROR ("Premature EOF(%s)\n", file);	goto load_error;    }    if (strncmp (line, "# getcwd: ", 10) != 0) {	E_ERROR ("%s does not begin with '# getcwd: '\n", file);	goto load_error;    }    if (fgets (line, sizeof(line), fp) == NULL) {	E_ERROR ("Premature EOF(%s)\n", file);	goto load_error;    }    f32arg = *((float32 *) cmd_ln_access ("-logbase"));    if ((strncmp (line, "# -logbase ", 11) != 0) || (sscanf (line+11, "%f", &lb) != 1)) {	E_WARN ("%s: Cannot find -logbase in header\n", file);	lb = f32arg;    }        if ((lb <= 1.0) || (lb > 2.0) || (f32arg <= 1.0) || (f32arg > 2.0))	E_ERROR ("%s: logbases out of range; cannot be verified\n", file);    else {	int32 orig, this;	float64 diff;		orig = logs3 (lb - 1.0);	this = logs3 (f32arg - 1.0);	diff = ((orig - this) * 1000.0) / orig;	if (diff < 0)	    diff = -diff;	if (diff > 1.0)		/* Hack!! Hardwired tolerance limits on logbase */	    E_ERROR ("%s: logbase inconsistent: %e\n", file, lb);    }        /* Min. endframes value that a node must persist for it to be not ignored */    min_ef_range = *((int32 *) cmd_ln_access ("-min_endfr"));        /* Read Frames parameter */    nfrm = dag_param_read (fp, "Frames", &lineno);    if (nfrm <= 0) {	E_ERROR("Frames parameter missing or invalid\n");	goto load_error;    }    dag.nfrm = nfrm;        /* Read Nodes parameter */    lineno = 0;    nnode = dag_param_read (fp, "Nodes", &lineno);    if (nnode <= 0) {	E_ERROR("Nodes parameter missing or invalid\n");	goto load_error;    }    /* Read nodes */    darray = (dagnode_t **) ckd_calloc (nnode, sizeof(dagnode_t *));    for (i = 0; i < nnode; i++) {	if (fgets (line, 1024, fp) == NULL) {	    E_ERROR ("Premature EOF(%s) while loading Nodes\n", file);	    goto load_error;	}	lineno++;		if ((k = sscanf (line, "%d %s %d %d %d", &seqid, wd, &sf, &fef, &lef)) != 5) {	    E_ERROR("Cannot parse line: %s\n", line);	    goto load_error;	}		w = dict_wordid (dict, wd);	if (NOT_S3WID(w)) {	    E_ERROR("Unknown word in line: %s\n", line);	    goto load_error;	}		if (seqid != i) {	    E_ERROR("Seqno error: %s\n", line);	    goto load_error;	}		d = (dagnode_t *) listelem_alloc (sizeof(dagnode_t));	darray[i] = d;		d->wid = w;	d->seqid = seqid;	d->sf = sf;	d->fef = fef;	d->lef = lef;	d->reachable = 0;	d->succlist = NULL;	d->predlist = NULL;	d->alloc_next = NULL;		if (! dag.list)	    dag.list = d;	else	    tail->alloc_next = d;	tail = d;    }    /* Read initial node ID */    k = dag_param_read (fp, "Initial", &lineno);    if ((k < 0) || (k >= nnode)) {	E_ERROR("Initial node parameter missing or invalid\n");	goto load_error;    }    dag.entry.node = darray[k];    dag.entry.ascr = 0;    dag.entry.next = NULL;    dag.entry.pscr_valid = 0;    /* Read final node ID */    k = dag_param_read (fp, "Final", &lineno);    if ((k < 0) || (k >= nnode)) {	E_ERROR("Final node parameter missing or invalid\n");	goto load_error;    }    dag.exit.node = darray[k];    dag.exit.ascr = 0;    dag.exit.next = NULL;    dag.exit.pscr_valid = 0;    dag.exit.bypass = NULL;    final = k;    /* Read bestsegscore entries */    if ((k = dag_param_read (fp, "BestSegAscr", &lineno)) < 0) {	E_ERROR("BestSegAscr parameter missing\n");	goto load_error;    }    lat = (struct lat_s *) ckd_calloc (k, sizeof(struct lat_s));    frm2lat = (int32 *) ckd_calloc (nfrm+1, sizeof(int32));        j = -1;    for (i = 0; i < k; i++) {	if (fgets (line, 1024, fp) == NULL) {	    E_ERROR ("Premature EOF(%s) while loading BestSegAscr\n", file);	    goto load_error;	}		lineno++;	if (sscanf (line, "%d %d %d", &seqid, &ef, &ascr) != 3) {	    E_ERROR("Cannot parse line: %s\n", line);	    goto load_error;	}		if ((seqid < 0) || (seqid >= nnode)) {	    E_ERROR("Seqno error: %s\n", line);	    goto load_error;	}		if (ef != j) {	    for (j++; j <= ef; j++)		frm2lat[j] = i;	    --j;	}	lat[i].node = darray[seqid];	lat[i].ef = ef;	lat[i].ascr = ascr;		if ((seqid == final) && (ef == dag.exit.node->lef))	    dag.exit.ascr = ascr;    }    for (j++; j <= nfrm; j++)	frm2lat[j] = k;        /* Read in edges */    while (fgets (line, 1024, fp) != NULL) {	lineno++;	if (line[0] == '#')	    continue;	if ((sscanf (line, "%s%d", wd, &k) == 1) && (strcmp (wd, "Edges") == 0))	    break;    }    k = 0;    while (fgets (line, 1024, fp) != NULL) {	lineno++;	if (sscanf (line, "%d %d %d", &from, &to, &ascr) != 3)	    break;	pd = darray[from];	if (pd->wid == finishwid)	    continue;	d = darray[to];	/* Skip short-lived nodes */	if ((pd == dag.entry.node) || (d == dag.exit.node) ||	    ((d->lef - d->fef >= min_ef_range-1) && (pd->lef - pd->fef >= min_ef_range-1))) {	    if (dag_link (pd, d, ascr) < 0) {		E_ERROR ("%s: maxedge limit (%d) exceeded\n", file, maxedge);		goto load_error;	    }	    	    k++;	}    }    if (strcmp (line, "End\n") != 0) {	E_ERROR("Terminating End missing\n");	goto load_error;    }    #if 0    /* Build edges from lattice end-frame scores if no edges input */    if (k == 0) {	E_INFO("No edges in dagfile; using lattice scores\n");	for (d = dag.list; d; d = d->alloc_next) {	    if (d->sf == 0)		assert (d->wid == startwid);	    else if ((d == dag.exit.node) || (d->lef - d->fef >= min_ef_range-1)) {		/* Link from all end points == d->sf-1 to d */		for (l = frm2lat[d->sf-1]; l < frm2lat[d->sf]; l++) {		    pd = lat[l].node;		/* Predecessor DAG node */		    if (pd->wid == finishwid)			continue;		    if ((pd == dag.entry.node) || (pd->lef - pd->fef >= min_ef_range-1)) {			dag_link (pd, d, lat[l].ascr);			k++;		    }		}	    }	}    }#endif    fudge = *((int32 *) cmd_ln_access ("-dagfudge"));    if (fudge > 0) {	/* Add "illegal" links that are near misses */	for (d = dag.list; d; d = d->alloc_next) {	    if (d->lef - d->fef < min_ef_range-1)		continue;	    	    /* Links to d from nodes that first ended just when d started */	    for (l = frm2lat[d->sf]; l < frm2lat[d->sf+1]; l++) {		pd = lat[l].node;		/* Predecessor DAG node */		if ((pd->wid != finishwid) && (pd->fef == d->sf) &&		    (pd->sf < d->sf) &&		    (pd->lef - pd->fef >= min_ef_range-1)) {		    dag_link (pd, d, lat[l].ascr);		    k++;		}	    }	    	    if (fudge < 2)		continue;	    	    /* Links to d from nodes that first ended just BEYOND when d started */	    for (l = frm2lat[d->sf+1]; l < frm2lat[d->sf+2]; l++) {		pd = lat[l].node;		/* Predecessor DAG node */		if ((pd->wid != finishwid) && (pd->fef == d->sf+1) &&		    (pd->sf < d->sf) &&		    (pd->lef - pd->fef >= min_ef_range-1)) {		    dag_link (pd, d, lat[l].ascr);		    k++;		}	    }	}    }        fclose_comp (fp, ispipe);    ckd_free (darray);    ckd_free (lat);    ckd_free (frm2lat);        /*     * HACK!! Change dag.exit.node wid to finishwid if some other filler word,     * to avoid complications with LM scores at this point.     */    dag.orig_exitwid = dag.exit.node->wid;    if (filler_word(dag.exit.node->wid))	dag.exit.node->wid = finishwid;        /*     * Mark nodes from which final exit node is reachable.  Add links bypassing filler     * nodes, and compute heuristic score (hscr) from each node to end of utterance.     */    dag_mark_reachable (dag.exit.node);    dag_remove_unreachable ();    if (dag_bypass_filler_nodes () < 0) {	E_ERROR ("%s: maxedge limit (%d) exceeded\n", file, maxedge);	return -1;    }        dag_compute_hscr ();    dag_remove_bypass_links ();    E_INFO("%5d frames, %6d nodes, %8d edges, %8d bypass\n",	   nfrm, nnode, dag.nlink, dag.nbypass);        return dag.nfrm;load_error:    E_ERROR("Failed to load %s\n", file);    if (fp)	fclose_comp (fp, ispipe);    if (darray)	ckd_free (darray);    if (lat)	ckd_free (lat);    if (frm2lat)	ckd_free (frm2lat);    return -1;}/* ---------------------------- NBEST CODE ---------------------------- *//** * A node along each partial path.  Partial paths form a tree structure rooted at the * start node. */typedef struct ppath_s {    struct ppath_s *hist;	/** Immediately previous ppath node; NULL if none */    struct ppath_s *lmhist;	/** Previous LM (non-filler) ppath node; NULL if none */    dagnode_t *dagnode;		/** Dagnode (word,startfrm) represented by ppath node */    int32 lscr;		/** LM score for this node given past history */    int32 pscr;		/** Path score (from initial node) ending at this node startfrm */    int32 tscr;		/** pscr + heuristic score (this node startfrm -> end of utt) */    uint32 histhash;	/** Hash value of complete history, for spotting duplicates */    int32 pruned;	/** If superseded by another with same history and better score */    struct ppath_s *hashnext;	/** Next node with same hashmod value */    struct ppath_s *next;	/** Links all allocated nodes for reclamation at the end;				   NULL if last in list */} ppath_t;static ppath_t *ppath_list;	/** Complete list of allocated ppath nodes */static int32 n_ppath;		/** #Partial paths allocated (to control memory usage) */static int32 maxppath;		/** Max partial paths allowed before aborting *//** Heap (sorted) partial path nodes */typedef struct heap_s {    ppath_t *ppath;	/** Partial path node */    int32 nl, nr;	/** #left/right descendants from this node (for balancing tree) */    struct heap_s *left;	/** Root of left descendant heap */    struct heap_s *right;	/** Root of right descendant heap */} aheap_t;static aheap_t *heap_root;/** * For tracking ppath nodes with identical histories.  For rapid location of duplicates * keep a separate list of nodes for each (ppath_t.histhash % HISTHASH_MOD) value. * Two paths have identical histories (are duplicates) iff: * 	1. Their tail nodes have the same dagnode (same <wid,sf> value), and * 	2. Their LM histories are identical. */#define HISTHASH_MOD	200003	/* A prime */static ppath_t **hash_list;	/* A separate list for each hashmod value (see above) */#if 0static void heap_dump (aheap_t *top, int32 level){    int32 i;        if (! top)	return;        for (i = 0; i < level; i++)	printf ("  ");        printf ("%s %d %d %d %d\n", dict_wordstr(dict, top->nb->dagnode->wid),	    top->nb->dagnode->sf, top->nb->dagnode->fef, top->nb->dagnode->lef,	    top->nb->score);    heap_dump (top->left, level+1);    heap_dump (top->right, level+1);}#endif/** * Insert the given new ppath node in sorted (sub)heap rooted at the given heap node * Heap is sorted by tscr (i.e., total path score + heuristic score to end of utt). * Return the root of the new, updated (sub)heap. */static aheap_t *aheap_insert (aheap_t *root, ppath_t *new){    aheap_t *h;    ppath_t *pp;        if (! root) {	h = (aheap_t *) listelem_alloc (sizeof(aheap_t));	h->ppath = new;	h->left = h->right = NULL;	h->nl = h->nr = 0;	return h;    }    /* Root already exists; if new node better, replace root with it */    pp = root->ppath;    if (pp->tscr < new->tscr) {	root->ppath = new;	new = pp;    }    /* Insert new or old (replaced) node in right or left subtree; keep them balanced */    if (root->nl > root->nr) {	root->right = aheap_insert (root->right, new);	root->nr++;    } else {	root->left = aheap_insert (root->left, new);	root->nl++;    }    return root;}/** * Pop the top element off the heap and return root of adjust heap. * Root must be non-NULL when this function to be called. */static aheap_t *aheap_pop (aheap_t *root){    aheap_t *l, *r;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?