📄 searchlat.c
字号:
/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- *//* ==================================================================== * Copyright (c) 1999-2001 Carnegie Mellon University. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * This work was supported in part by funding from the Defense Advanced * Research Projects Agency and the National Science Foundation of the * United States of America, and the CMU Sphinx Speech Consortium. * * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== * *//* * searchlat.c -- construct word lattice and search all paths for best path. * * HISTORY * * * 12-Aug-2004 M K Ravishankar (rkm@cs) at Carnegie Mellon University * Bugfix: Obtained current start_wid at the start of lattice_rescore(). * * 03-Apr-98 M K Ravishankar (rkm@cs) at Carnegie Mellon University * Added searchlat_set_rescore_lm(). * * 24-Mar-98 M K Ravishankar (rkm@cs) at Carnegie Mellon University * Added phone perplexity measure into search_hyp_t structure hypothesis * for each utterance. * * 08-Mar-98 M K Ravishankar (rkm@cs) at Carnegie Mellon University * Added lattice density measure into search_hyp_t structure generated * as hypothesis for each utterance. * * 27-May-97 M K Ravishankar (rkm@cs) at Carnegie Mellon University * Added Bob Brennan's code for handling multiple saved lattices. * * 04-Apr-97 M K Ravishankar (rkm@cs) at Carnegie Mellon University * Added search_remove_context() call at the end of lattice_rescore. * * 30-Oct-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. * Added dump_lattice() similar to Sphinx-III format lattices (almost). * * 08-Dec-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. * Changed hyp[] result to contain actual frame ids instead of * post-silence-compression ids. * * 08-Dec-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. * In building the lattice, added a check to ignore BPTable words not * in the current LM. * * Revision 8.3 94/10/11 12:41:26 rkm * Print back trace conditionally, depending on -backtrace argument. * Use actual pronunciation in shortest-path search, depending on * -reportpron argument. * Added acoustic score for </s> to final path score. * * Revision 8.2 94/07/29 11:59:06 rkm * Added code to limit search for n-best alternatives. * Added search_save_lattice(). * * Revision 8.1 94/05/26 16:55:56 rkm * Non-recursive implementation of lattice rescoring using trigrams. * Created. * */#ifndef TRUE/* For those infuriating HPs */#define TRUE 1#define FALSE 0#endif#include <stdio.h>#include <string.h>#include <stdlib.h>#include <assert.h>#include "s2types.h"#include "ckd_alloc.h"#include "cmd_ln.h"#include "basic_types.h"#include "strfuncs.h"#include "linklist.h"#include "list.h"#include "err.h"#include "dict.h"#include "lm.h"#include "lmclass.h"#include "lm_3g.h"#include "search_const.h"#include "msd.h"#include "kb.h"#include "fbs.h"#include "search.h"#define QUIT(x) {fprintf x; exit(-1);}/* Externally obtained variables */static int32 finish_wid, start_wid, sil_wid, last_frame, sil_pen, filler_pen;static BPTBL_T *bptbl;static int32 *BScoreStack;static int32 **rc_fwdperm;static search_hyp_t *hyp;static int32 altpron = FALSE;extern BPTBL_T *search_get_bptable();extern int32 *search_get_bscorestack();extern lw_t search_get_lw();extern search_hyp_t *search_get_hyp();#if 0extern char *query_utt_id(void);#endifstatic latnode_t *latnode_list = NULL;/* All paths start and end between these two lattice nodes */static latnode_t *start_node, *final_node;int32 final_node_ascr; /* Acoustic score for the final </s> node *//* Structure to hold a lattice (semi-)permanently (for dictation/correction...) */static struct permanent_lattice_s { latnode_t *latnode_list; latnode_t *start_node, *final_node;} lattice;/* Structure added by Bob Brennan for maintaining several lattices at a time */#define MAX_LAT_QUEUE 20static struct lattice_queue_s { struct permanent_lattice_s lattice; char lmName[256]; char uttid[256]; int32 addIndex;} latQueue[MAX_LAT_QUEUE];static int32 latQueueInit = 0;static int32 latQueueAddIndex = 0;static char altLMName[256];static int32 n_node, n_link;#define ISA_FILLER_WORD(x) ((x)>=sil_wid)#define ISA_REAL_WORD(x) ((x)<finish_wid)static lw_t lw_factor; /* Lang-weight factor (lw(2nd pass)/lw(1st pass)) */static char *rescore_lmname = NULL;voidsearchlat_set_rescore_lm(char const *lmname){ if (rescore_lmname) free(rescore_lmname); rescore_lmname = ckd_salloc(lmname);}/* * Create a directed link between "from" and "to" nodes, but if a link already exists, * choose one with the best link_scr. */static voidlink_latnodes(latnode_t * from, latnode_t * to, int32 score, int32 ef){ latlink_t *link; assert(to->reachable); /* Look for an existing link between "from" and "to" nodes */ for (link = from->links; link && (link->to != to); link = link->next); if (!link) { /* No link between the two nodes; create a new one */ link = (latlink_t *) listelem_alloc(sizeof(latlink_t)); link->from = from; link->to = to; link->link_scr = score; link->ef = ef; link->best_prev = NULL; link->next = from->links; from->links = link; } else { /* Link already exists; just retain the best link_scr */ if (link->link_scr < score) { link->link_scr = score; link->ef = ef; } }}static voidbypass_filler_nodes(void){ latnode_t *node, *to, *from, *prev_node, *t_node; latlink_t *link, *f_link, *t_link, *prev_link; rev_latlink_t *revlink, *t_revlink; int32 score; /* Create reverse links for all links pointing to filler nodes */ for (node = latnode_list; node; node = node->next) { for (link = node->links; link; link = link->next) { to = link->to; if (ISA_FILLER_WORD(to->wid)) { revlink = (rev_latlink_t *) listelem_alloc(sizeof(rev_latlink_t)); revlink->link = link; revlink->next = to->revlinks; to->revlinks = revlink; } } } /* Bypass filler nodes */ for (node = latnode_list; node; node = node->next) { if (!ISA_FILLER_WORD(node->wid)) continue; /* Replace each link entering filler node with links to all its successors */ for (revlink = node->revlinks; revlink; revlink = revlink->next) { link = revlink->link; /* link entering filler node */ from = link->from; score = (node->wid == sil_wid) ? sil_pen : filler_pen; score += link->link_scr; /* * Make links from predecessor of filler (from) to successors of filler. * But if successor is a filler, it has already been eliminated since it * appears earlier in latnode_list (see build...). So it can be skipped. * Likewise, no reverse links needed for the new links; none of them * points to a filler node. */ for (f_link = node->links; f_link; f_link = f_link->next) { if (!ISA_FILLER_WORD(f_link->to->wid)) link_latnodes(from, f_link->to, score + f_link->link_scr, link->ef); } } } /* Delete filler nodes and all links and reverse links from it */ prev_node = NULL; for (node = latnode_list; node; node = t_node) { t_node = node->next; if (ISA_FILLER_WORD(node->wid)) { for (revlink = node->revlinks; revlink; revlink = t_revlink) { t_revlink = revlink->next; revlink->link->to = NULL; listelem_free(revlink, sizeof(rev_latlink_t)); } for (link = node->links; link; link = t_link) { t_link = link->next; listelem_free(link, sizeof(latlink_t)); } if (prev_node) prev_node->next = t_node; else latnode_list = t_node; listelem_free(node, sizeof(latnode_t)); } else prev_node = node; } /* Reclaim links pointing nowhere */ for (node = latnode_list; node; node = node->next) { prev_link = NULL; for (link = node->links; link; link = t_link) { t_link = link->next; if (link->to == NULL) { if (prev_link) prev_link->next = t_link; else node->links = t_link; listelem_free(link, sizeof(latlink_t)); } else prev_link = link; } }}static voidmark_reachable(latnode_t * from){ latlink_t *link; from->reachable = TRUE; for (link = from->links; link; link = link->next) { if (!link->to->reachable) mark_reachable(link->to); }}static voiddelete_unreachable(void){ latnode_t *node, *t_node, *prev_node; latlink_t *link, *t_link; prev_node = NULL; for (node = latnode_list; node; node = t_node) { t_node = node->next; if (!node->reachable) { /* Node and its links can be removed */ if (prev_node) prev_node->next = node->next; else latnode_list = node->next; for (link = node->links; link; link = t_link) { t_link = link->next; listelem_free(link, sizeof(latlink_t)); } listelem_free(node, sizeof(latnode_t)); } else prev_node = node; }}static int32latnode_seqid(latnode_t * target){ int32 i; latnode_t *d; for (i = 0, d = latnode_list; d && (d != target); d = d->next, i++); assert(d); return (i);}static int32dump_lattice(char *filename){ FILE *fp; int32 i; latnode_t *d, *initial, *final; /* char str[1024]; *//* !!! */ initial = start_node; final = final_node; E_INFO("Writing lattice file: %s\n", filename); if ((fp = fopen(filename, "w")) == NULL) { E_ERROR("fopen(%s,w) failed\n", filename); return -1; } fprintf(fp, "# Generated by FBS8 using Sphinx-II semicontinuous models\n"); fprintf(fp, "# -logbase %e\n", 1.0001); fprintf(fp, "#\n"); fprintf(fp, "Frames %d\n", last_frame + 1); fprintf(fp, "#\n"); for (i = 0, d = latnode_list; d; d = d->next, i++); fprintf(fp, "Nodes %d (NODEID WORD STARTFRAME FIRST-ENDFRAME LAST-ENDFRAME)\n", i); for (i = 0, d = latnode_list; d; d = d->next, i++) { fprintf(fp, "%d %s %d %d %d\n", i, kb_get_word_str(d->wid), d->sf, d->fef, d->lef); } fprintf(fp, "#\n"); fprintf(fp, "Initial %d\nFinal %d\n", latnode_seqid(initial), latnode_seqid(final)); fprintf(fp, "#\n"); /* Best score (i.e., regardless of Right Context) for word segments in word lattice */ fprintf(fp, "BestSegAscr %d (NODEID ENDFRAME ASCORE)\n", 0 /* #BPTable entries */ );#if 0 for (i = 0; i < n_lat_entry; i++) { int32 ascr, lscr; lat_seg_ascr_lscr(i, BAD_WID, &ascr, &lscr); fprintf(fp, "%d %d %d\n", (lattice[i].dagnode)->seqid, lattice[i].frm, ascr); }#endif fprintf(fp, "#\n"); fprintf(fp, "Edges (FROM-NODEID TO-NODEID ASCORE)\n");#if 0 for (d = latnode_list; d; d = d->next) { latlink_t *l; for (l = d->links; l; l = l->next) fprintf(fp, "%d %d %d\n", latnode_seqid(d), latnode_seqid(l->to), l->link_scr); }#endif fprintf(fp, "End\n"); fclose(fp); return 0;}/* * Build lattice from bptable and identify the start and final nodes. * Return 1 if successful, 0 otherwise. */static int32build_lattice(int32 bptbl_sz){ int32 i, sf, ef, lef, wid, score, bss_offset; BPTBL_T *bp_ptr; latnode_t *node, *from, *to; /* , *prev_node, *t_node; */ latlink_t *link; /* *t_link, *prev_link; */ int32 nn, nl; char const *dumplatdir; /* Create lattice nodes; not all these may be useful */ last_frame = searchFrame(); latnode_list = NULL; for (i = 0, bp_ptr = bptbl; i < bptbl_sz; i++, bp_ptr++) { if (!bp_ptr->valid) continue; sf = (bp_ptr->bp < 0) ? 0 : bptbl[bp_ptr->bp].frame + 1; ef = bp_ptr->frame; wid = bp_ptr->wid; /* Skip non-final </s> entries; note bptable entry for final </s> */ if ((wid == finish_wid) && (ef < last_frame)) continue; /* Skip if word not in LM */ if ((!ISA_FILLER_WORD(wid)) && (!dictwd_in_lm(word_dict->dict_list[wid]->fwid))) continue; /* See if bptbl entry <wid,sf> already in lattice */ for (node = latnode_list; node; node = node->next) { if ((node->wid == wid) && (node->sf == sf)) break; } /* For the moment, store bptbl indices in node.{fef,lef} */ if (node) node->lef = i; else { /* New node; link to head of list */ node = (latnode_t *) listelem_alloc(sizeof(latnode_t)); node->wid = wid; node->sf = sf; node->fef = node->lef = i; node->reachable = FALSE; node->links = NULL; node->revlinks = NULL; node->next = latnode_list; latnode_list = node; } } /* * At this point, latnode_list is ordered such that nodes earlier in the list * can follow (in time) those later in the list, but not vice versa. Now * create precedence links and simultanesously mark all nodes that can reach * the final </s> node. (All nodes are reached from <s>.0; no problem there.) */ /* Find start node <s>.0 */ for (node = latnode_list; node; node = node->next) { if ((node->wid == start_wid) && (node->sf == 0)) break;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -