📄 word_fsg.c
字号:
/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- *//* ==================================================================== * Copyright (c) 1999-2004 Carnegie Mellon University. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== * *//* * word_fsg.c -- Finite state LM handling * * ********************************************** * CMU ARPA Speech Project * * Copyright (c) 2003 Carnegie Mellon University. * ALL RIGHTS RESERVED. * ********************************************** * * HISTORY * * $Log: word_fsg.c,v $ * Revision 1.1.1.1 2006/05/23 18:45:02 dhuggins * re-importation * * Revision 1.2 2004/07/23 23:36:34 egouvea * Ravi's merge, with the latest fixes in the FSG code, and making the log files generated by FSG, LM, and allphone have the same 'look and feel', with the backtrace information presented consistently * * Revision 1.8 2004/07/20 20:48:40 rkm * Added uttproc_load_fsg(). * * Revision 1.7 2004/07/20 13:40:55 rkm * Added FSG get/set start/final state functions. * * Revision 1.1 2004/07/16 00:57:12 egouvea * Added Ravi's implementation of FSG support. * * Revision 1.6 2004/07/12 18:47:43 rkm * *** empty log message *** * * Revision 1.5 2004/06/21 18:16:12 rkm * Omitted noise words from FSG if noise penalty = 0 * * Revision 1.4 2004/06/21 18:12:19 rkm * *** empty log message *** * * Revision 1.3 2004/06/21 18:09:17 rkm * *** empty log message *** * * Revision 1.2 2004/05/27 14:22:57 rkm * FSG cross-word triphones completed (but for single-phone words) * * Revision 1.1.1.1 2004/03/01 14:30:30 rkm * * * Revision 1.8 2004/02/27 19:33:01 rkm * *** empty log message *** * * Revision 1.7 2004/02/27 16:15:13 rkm * Added FSG switching * * Revision 1.6 2004/02/27 15:05:21 rkm * *** empty log message *** * * Revision 1.5 2004/02/26 15:35:50 rkm * *** empty log message *** * * Revision 1.4 2004/02/26 01:14:48 rkm * *** empty log message *** * * Revision 1.3 2004/02/25 15:08:19 rkm * *** empty log message *** * * Revision 1.2 2004/02/24 18:13:05 rkm * Added NULL transition handling * * Revision 1.1 2004/02/23 15:53:45 rkm * Renamed from fst to fsg * * Revision 1.6 2004/02/19 21:16:54 rkm * Added fsg_search.{c,h} * * Revision 1.5 2004/02/17 21:11:49 rkm * *** empty log message *** * * Revision 1.4 2004/02/16 21:10:10 rkm * *** empty log message *** * * Revision 1.3 2004/02/09 21:19:18 rkm * *** empty log message *** * * Revision 1.2 2004/02/09 17:30:20 rkm * *** empty log message *** * * Revision 1.1 2004/02/03 21:08:05 rkm * *** empty log message *** * * * 13-Jan-2004 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon * Started. */#include <stdio.h>#include <string.h>#include <assert.h>#include <time.h>#include <err.h>#include <ckd_alloc.h>#include <cmd_ln.h>#include "s2types.h"#include "str2words.h"#include "kb.h"#include "phone.h"#include "dict.h"#include "log.h"#include "word_fsg.h"#define __FSG_DBG__ 0#define WORD_FSG_MAX_WORDPTR 4096#define WORD_FSG_BEGIN_DECL "FSG_BEGIN"#define WORD_FST_BEGIN_DECL "FST_BEGIN"#define WORD_FSG_END_DECL "FSG_END"#define WORD_FST_END_DECL "FST_END"#define WORD_FSG_N_DECL "N"#define WORD_FSG_NUM_STATES_DECL "NUM_STATES"#define WORD_FSG_S_DECL "S"#define WORD_FSG_START_STATE_DECL "START_STATE"#define WORD_FSG_F_DECL "F"#define WORD_FSG_FINAL_STATE_DECL "FINAL_STATE"#define WORD_FSG_T_DECL "T"#define WORD_FSG_TRANSITION_DECL "TRANSITION"#define WORD_FSG_COMMENT_CHAR '#'static int32nextline_str2words(FILE * fp, int32 * lineno, char **wordptr, int32 max_ptr){ char line[16384]; int32 n; for (;;) { if (fgets(line, sizeof(line), fp) == NULL) return -1; (*lineno)++; if (line[0] != WORD_FSG_COMMENT_CHAR) { /* Skip comment lines */ if ((n = str2words(line, wordptr, max_ptr)) < 0) E_FATAL("Line[%d] too long\n", *lineno); if (n > 0) /* Skip blank lines */ break; } } return n;}/* * Add the given transition to the FSG transition matrix. Duplicates (i.e., * two transitions between the same states, with the same word label) are * flagged and only the highest prob retained. */static voidword_fsg_trans_add(word_fsg_t * fsg, int32 from, int32 to, int32 logp, int32 wid){ word_fsglink_t *link; gnode_t *gn; /* Check for duplicate link (i.e., link already exists with label=wid) */ for (gn = fsg->trans[from][to]; gn; gn = gnode_next(gn)) { link = (word_fsglink_t *) gnode_ptr(gn); if (link->wid == wid) {#if 0 E_WARN ("Duplicate transition %d -> %d ('%s'); highest prob kept\n", from, to, kb_get_word_str(wid));#endif if (link->logs2prob < logp) link->logs2prob = logp; return; } } /* Create transition object */ link = (word_fsglink_t *) ckd_calloc(1, sizeof(word_fsglink_t)); link->from_state = from; link->to_state = to; link->logs2prob = logp; link->wid = wid; fsg->trans[from][to] = glist_add_ptr(fsg->trans[from][to], (void *) link);}/* * Link word_fsg_trans_add, but for a null transition between the given * states. Also, there can be at most one null transition between the * given states; duplicates are flagged and only the best prob retained. * Transition probs must be <= 1 (i.e., logprob <= 0). * Return value: 1 if a new transition was added, 0 if the prob of an existing * transition was upgraded; -1 if nothing was changed. */static int32word_fsg_null_trans_add(word_fsg_t * fsg, int32 from, int32 to, int32 logp){ word_fsglink_t *link; /* Check for transition probability */ if (logp > 0) { E_FATAL("Null transition prob must be <= 1.0 (state %d -> %d)\n", from, to); } /* Self-loop null transitions (with prob <= 1.0) are redundant */ if (from == to) return -1; /* Check for a duplicate link; if found, keep the higher prob */ link = fsg->null_trans[from][to]; if (link) { assert(link->wid < 0);#if 0 E_WARN("Duplicate null transition %d -> %d; highest prob kept\n", from, to);#endif if (link->logs2prob < logp) { link->logs2prob = logp; return 0; } else return -1; } /* Create null transition object */ link = (word_fsglink_t *) ckd_calloc(1, sizeof(word_fsglink_t)); link->from_state = from; link->to_state = to; link->logs2prob = logp; link->wid = -1; fsg->null_trans[from][to] = link; return 1;}/* * Obtain transitive closure of NULL transitions in the given FSG. (Initial * list of such transitions is given.) * Return value: Updated list of null transitions. */static glist_tword_fsg_null_trans_closure(word_fsg_t * fsg, glist_t nulls){ gnode_t *gn1, *gn2; boolean updated; word_fsglink_t *tl1, *tl2; int32 k, n; E_INFO("Computing transitive closure for null transitions\n"); /* * Probably not the most efficient closure implementation, in general, but * probably reasonably efficient for a sparse null transition matrix. */ n = 0; do { updated = FALSE; for (gn1 = nulls; gn1; gn1 = gnode_next(gn1)) { tl1 = (word_fsglink_t *) gnode_ptr(gn1); assert(tl1->wid < 0); for (gn2 = nulls; gn2; gn2 = gnode_next(gn2)) { tl2 = (word_fsglink_t *) gnode_ptr(gn2); if (tl1->to_state == tl2->from_state) { k = word_fsg_null_trans_add(fsg, tl1->from_state, tl2->to_state, tl1->logs2prob + tl2->logs2prob); if (k >= 0) { updated = TRUE; if (k > 0) { nulls = glist_add_ptr(nulls, (void *) fsg-> null_trans[tl1-> from_state][tl2-> to_state]); n++; } } } } } } while (updated); E_INFO("%d null transitions added\n", n); return nulls;}/*
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -