s3_astar.c
来自「CMU大名鼎鼎的SPHINX-3大词汇量连续语音识别系统」· C语言 代码 · 共 1,267 行 · 第 1/3 页
C
1,267 行
E_INFO("Reading DAG file: %s\n", file); if ((fp = fopen_compchk (file, &ispipe)) == NULL) { E_ERROR("fopen_compchk (%s) failed\n", file); return -1; } /* Read and verify logbase (ONE BIG HACK!!) */ if (fgets (line, sizeof(line), fp) == NULL) { E_ERROR ("Premature EOF(%s)\n", file); goto load_error; } if (strncmp (line, "# getcwd: ", 10) != 0) { E_ERROR ("%s does not begin with '# getcwd: '\n", file); goto load_error; } if (fgets (line, sizeof(line), fp) == NULL) { E_ERROR ("Premature EOF(%s)\n", file); goto load_error; } f32arg = *((float32 *) cmd_ln_access ("-logbase")); if ((strncmp (line, "# -logbase ", 11) != 0) || (sscanf (line+11, "%f", &lb) != 1)) { E_WARN ("%s: Cannot find -logbase in header\n", file); lb = f32arg; } if ((lb <= 1.0) || (lb > 2.0) || (f32arg <= 1.0) || (f32arg > 2.0)) E_ERROR ("%s: logbases out of range; cannot be verified\n", file); else { int32 orig, this; float64 diff; orig = logs3 (lb - 1.0); this = logs3 (f32arg - 1.0); diff = ((orig - this) * 1000.0) / orig; if (diff < 0) diff = -diff; if (diff > 1.0) /* Hack!! Hardwired tolerance limits on logbase */ E_ERROR ("%s: logbase inconsistent: %e\n", file, lb); } /* Min. endframes value that a node must persist for it to be not ignored */ min_ef_range = *((int32 *) cmd_ln_access ("-min_endfr")); /* Read Frames parameter */ nfrm = dag_param_read (fp, "Frames", &lineno); if (nfrm <= 0) { E_ERROR("Frames parameter missing or invalid\n"); goto load_error; } dag.nfrm = nfrm; /* Read Nodes parameter */ lineno = 0; nnode = dag_param_read (fp, "Nodes", &lineno); if (nnode <= 0) { E_ERROR("Nodes parameter missing or invalid\n"); goto load_error; } /* Read nodes */ darray = (dagnode_t **) ckd_calloc (nnode, sizeof(dagnode_t *)); for (i = 0; i < nnode; i++) { if (fgets (line, 1024, fp) == NULL) { E_ERROR ("Premature EOF(%s) while loading Nodes\n", file); goto load_error; } lineno++; if ((k = sscanf (line, "%d %s %d %d %d", &seqid, wd, &sf, &fef, &lef)) != 5) { E_ERROR("Cannot parse line: %s\n", line); goto load_error; } w = dict_wordid (dict, wd); if (NOT_S3WID(w)) { E_ERROR("Unknown word in line: %s\n", line); goto load_error; } if (seqid != i) { E_ERROR("Seqno error: %s\n", line); goto load_error; } d = (dagnode_t *) listelem_alloc (sizeof(dagnode_t)); darray[i] = d; d->wid = w; d->seqid = seqid; d->sf = sf; d->fef = fef; d->lef = lef; d->reachable = 0; d->succlist = NULL; d->predlist = NULL; d->alloc_next = NULL; if (! dag.list) dag.list = d; else tail->alloc_next = d; tail = d; } /* Read initial node ID */ k = dag_param_read (fp, "Initial", &lineno); if ((k < 0) || (k >= nnode)) { E_ERROR("Initial node parameter missing or invalid\n"); goto load_error; } dag.entry.node = darray[k]; dag.entry.ascr = 0; dag.entry.next = NULL; dag.entry.pscr_valid = 0; /* Read final node ID */ k = dag_param_read (fp, "Final", &lineno); if ((k < 0) || (k >= nnode)) { E_ERROR("Final node parameter missing or invalid\n"); goto load_error; } dag.exit.node = darray[k]; dag.exit.ascr = 0; dag.exit.next = NULL; dag.exit.pscr_valid = 0; dag.exit.bypass = NULL; final = k; /* Read bestsegscore entries */ if ((k = dag_param_read (fp, "BestSegAscr", &lineno)) < 0) { E_ERROR("BestSegAscr parameter missing\n"); goto load_error; } lat = (struct lat_s *) ckd_calloc (k, sizeof(struct lat_s)); frm2lat = (int32 *) ckd_calloc (nfrm+1, sizeof(int32)); j = -1; for (i = 0; i < k; i++) { if (fgets (line, 1024, fp) == NULL) { E_ERROR ("Premature EOF(%s) while loading BestSegAscr\n", file); goto load_error; } lineno++; if (sscanf (line, "%d %d %d", &seqid, &ef, &ascr) != 3) { E_ERROR("Cannot parse line: %s\n", line); goto load_error; } if ((seqid < 0) || (seqid >= nnode)) { E_ERROR("Seqno error: %s\n", line); goto load_error; } if (ef != j) { for (j++; j <= ef; j++) frm2lat[j] = i; --j; } lat[i].node = darray[seqid]; lat[i].ef = ef; lat[i].ascr = ascr; if ((seqid == final) && (ef == dag.exit.node->lef)) dag.exit.ascr = ascr; } for (j++; j <= nfrm; j++) frm2lat[j] = k; /* Read in edges */ while (fgets (line, 1024, fp) != NULL) { lineno++; if (line[0] == '#') continue; if ((sscanf (line, "%s%d", wd, &k) == 1) && (strcmp (wd, "Edges") == 0)) break; } k = 0; while (fgets (line, 1024, fp) != NULL) { lineno++; if (sscanf (line, "%d %d %d", &from, &to, &ascr) != 3) break; pd = darray[from]; if (pd->wid == finishwid) continue; d = darray[to]; /* Skip short-lived nodes */ if ((pd == dag.entry.node) || (d == dag.exit.node) || ((d->lef - d->fef >= min_ef_range-1) && (pd->lef - pd->fef >= min_ef_range-1))) { if (dag_link (pd, d, ascr) < 0) { E_ERROR ("%s: maxedge limit (%d) exceeded\n", file, maxedge); goto load_error; } k++; } } if (strcmp (line, "End\n") != 0) { E_ERROR("Terminating End missing\n"); goto load_error; } #if 0 /* Build edges from lattice end-frame scores if no edges input */ if (k == 0) { E_INFO("No edges in dagfile; using lattice scores\n"); for (d = dag.list; d; d = d->alloc_next) { if (d->sf == 0) assert (d->wid == startwid); else if ((d == dag.exit.node) || (d->lef - d->fef >= min_ef_range-1)) { /* Link from all end points == d->sf-1 to d */ for (l = frm2lat[d->sf-1]; l < frm2lat[d->sf]; l++) { pd = lat[l].node; /* Predecessor DAG node */ if (pd->wid == finishwid) continue; if ((pd == dag.entry.node) || (pd->lef - pd->fef >= min_ef_range-1)) { dag_link (pd, d, lat[l].ascr); k++; } } } } }#endif fudge = *((int32 *) cmd_ln_access ("-dagfudge")); if (fudge > 0) { /* Add "illegal" links that are near misses */ for (d = dag.list; d; d = d->alloc_next) { if (d->lef - d->fef < min_ef_range-1) continue; /* Links to d from nodes that first ended just when d started */ for (l = frm2lat[d->sf]; l < frm2lat[d->sf+1]; l++) { pd = lat[l].node; /* Predecessor DAG node */ if ((pd->wid != finishwid) && (pd->fef == d->sf) && (pd->sf < d->sf) && (pd->lef - pd->fef >= min_ef_range-1)) { dag_link (pd, d, lat[l].ascr); k++; } } if (fudge < 2) continue; /* Links to d from nodes that first ended just BEYOND when d started */ for (l = frm2lat[d->sf+1]; l < frm2lat[d->sf+2]; l++) { pd = lat[l].node; /* Predecessor DAG node */ if ((pd->wid != finishwid) && (pd->fef == d->sf+1) && (pd->sf < d->sf) && (pd->lef - pd->fef >= min_ef_range-1)) { dag_link (pd, d, lat[l].ascr); k++; } } } } fclose_comp (fp, ispipe); ckd_free (darray); ckd_free (lat); ckd_free (frm2lat); /* * HACK!! Change dag.exit.node wid to finishwid if some other filler word, * to avoid complications with LM scores at this point. */ dag.orig_exitwid = dag.exit.node->wid; if (filler_word(dag.exit.node->wid)) dag.exit.node->wid = finishwid; /* * Mark nodes from which final exit node is reachable. Add links bypassing filler * nodes, and compute heuristic score (hscr) from each node to end of utterance. */ dag_mark_reachable (dag.exit.node); dag_remove_unreachable (); if (dag_bypass_filler_nodes () < 0) { E_ERROR ("%s: maxedge limit (%d) exceeded\n", file, maxedge); return -1; } dag_compute_hscr (); dag_remove_bypass_links (); E_INFO("%5d frames, %6d nodes, %8d edges, %8d bypass\n", nfrm, nnode, dag.nlink, dag.nbypass); return dag.nfrm;load_error: E_ERROR("Failed to load %s\n", file); if (fp) fclose_comp (fp, ispipe); if (darray) ckd_free (darray); if (lat) ckd_free (lat); if (frm2lat) ckd_free (frm2lat); return -1;}/* ---------------------------- NBEST CODE ---------------------------- *//** * A node along each partial path. Partial paths form a tree structure rooted at the * start node. */typedef struct ppath_s { struct ppath_s *hist; /** Immediately previous ppath node; NULL if none */ struct ppath_s *lmhist; /** Previous LM (non-filler) ppath node; NULL if none */ dagnode_t *dagnode; /** Dagnode (word,startfrm) represented by ppath node */ int32 lscr; /** LM score for this node given past history */ int32 pscr; /** Path score (from initial node) ending at this node startfrm */ int32 tscr; /** pscr + heuristic score (this node startfrm -> end of utt) */ uint32 histhash; /** Hash value of complete history, for spotting duplicates */ int32 pruned; /** If superseded by another with same history and better score */ struct ppath_s *hashnext; /** Next node with same hashmod value */ struct ppath_s *next; /** Links all allocated nodes for reclamation at the end; NULL if last in list */} ppath_t;static ppath_t *ppath_list; /** Complete list of allocated ppath nodes */static int32 n_ppath; /** #Partial paths allocated (to control memory usage) */static int32 maxppath; /** Max partial paths allowed before aborting *//** Heap (sorted) partial path nodes */typedef struct heap_s { ppath_t *ppath; /** Partial path node */ int32 nl, nr; /** #left/right descendants from this node (for balancing tree) */ struct heap_s *left; /** Root of left descendant heap */ struct heap_s *right; /** Root of right descendant heap */} aheap_t;static aheap_t *heap_root;/** * For tracking ppath nodes with identical histories. For rapid location of duplicates * keep a separate list of nodes for each (ppath_t.histhash % HISTHASH_MOD) value. * Two paths have identical histories (are duplicates) iff: * 1. Their tail nodes have the same dagnode (same <wid,sf> value), and * 2. Their LM histories are identical. */#define HISTHASH_MOD 200003 /* A prime */static ppath_t **hash_list; /* A separate list for each hashmod value (see above) */#if 0static void heap_dump (aheap_t *top, int32 level){ int32 i; if (! top) return; for (i = 0; i < level; i++) printf (" "); printf ("%s %d %d %d %d\n", dict_wordstr(dict, top->nb->dagnode->wid), top->nb->dagnode->sf, top->nb->dagnode->fef, top->nb->dagnode->lef, top->nb->score); heap_dump (top->left, level+1); heap_dump (top->right, level+1);}#endif/** * Insert the given new ppath node in sorted (sub)heap rooted at the given heap node * Heap is sorted by tscr (i.e., total path score + heuristic score to end of utt). * Return the root of the new, updated (sub)heap. */static aheap_t *aheap_insert (aheap_t *root, ppath_t *new){ aheap_t *h; ppath_t *pp; if (! root) { h = (aheap_t *) listelem_alloc (sizeof(aheap_t)); h->ppath = new; h->left = h->right = NULL; h->nl = h->nr = 0; return h; } /* Root already exists; if new node better, replace root with it */ pp = root->ppath; if (pp->tscr < new->tscr) { root->ppath = new; new = pp; } /* Insert new or old (replaced) node in right or left subtree; keep them balanced */ if (root->nl > root->nr) { root->right = aheap_insert (root->right, new); root->nr++; } else { root->left = aheap_insert (root->left, new); root->nl++; } return root;}/** * Pop the top element off the heap and return root of adjust heap. * Root must be non-NULL when this function to be called. */static aheap_t *aheap_pop (aheap_t *root){ aheap_t *l, *r;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?