📄 hparse.c
字号:
/* ----------------------------------------------------------- *//* *//* ___ *//* |_| | |_/ SPEECH *//* | | | | \ RECOGNITION *//* ========= SOFTWARE */ /* *//* *//* ----------------------------------------------------------- *//* Copyright: Microsoft Corporation *//* 1995-2000 Redmond, Washington USA *//* http://www.microsoft.com *//* *//* Use of this software is governed by a License Agreement *//* ** See the file License for the Conditions of Use ** *//* ** This banner notice must not be removed ** *//* *//* ----------------------------------------------------------- *//* File: HParse.c: HParse based word-network definition *//* ----------------------------------------------------------- */char *hparse_version = "!HVER!HParse: 3.3 [CUED 28/04/05]";char *hparse_vc_id = "$Id: HParse.c,v 1.2 2005/05/12 15:51:28 jal58 Exp $";/* The HParse program reads in a set of HTK HParse rewrite rules (as used in HTK V1.x) and writes out an HTK V2 lattice and if operating in V1.x compatability mode possibly a dictionary. In compatability mode the interpretation of the HParse network is that used by the HTK V1.5 HVite program i.e. the reserved node names WD_BEGIN and WD_END are used to delimit word boundaries - nodes between a WD_BEGIN/WD_END pair are called "word-internal" while all other nodes are "word-external". All WD_BEGIN/WD_END nodes must have an external name attached that denotes the word. The connectivity of the words is output in an HTK V2 word lattice format and the pronunciation information stored in an HTK V2 dictionary. Word-external nodes are treated as words and stored in the lattice with corresponding entries in the dictionary. When not operating in compatability mode all nodes are treated as words. Note that information regarding the "external name" as used in HTK V1.x is ignored when not operating in compatability mode. In compatability mode it is only allowed for a particular external name to be used for one WD_BEGIN/WD_END pair. For wdExternal nodes the external name is ignored. The definition of the rewrite rules for the textual definition are as follows:- name = char{char} -- any sequence of characters except the meta chars {}[]<>|=$();*\/ - the latter must be escaped using backslash model = name [ "%" ("%" | name)] -- option for external name variable = $ name variable = @ name factor = "(" expr ")" | -- simple factoring "{" expr "}" | -- 0 or more repetitions "<" expr ">" | -- 1 or more repetitions "[" expr "]" | -- 0 or 1 repetition ie optional "<<" expr ">>" | -- triphone loop model | -- the name of a HMM variable -- must be already defined sequence = factor {factor} -- sequences expr = sequence {"|" sequence} -- alternatives subnet = variable "=" expr ";" -- define a variable network = {subnet} "(" expr ")" -- the network itself All variables must be defined by a subnet definition before being used in an expression (this prohibits recursion and makes it easier to implement). C style comments may be placed anywhere in the text The network build process proceeds in 3 steps: 1) the parser builds a set of subnetworks for each defined in the textual representaion. 2) the subnetworks are (recursively) substituted to fully expand the network. Each sub-network (& many components thereof) use additional 'glue' nodes 3) all glue nodes are removed*//* Trace Flags */#define T_TOP 0001 /* Top Level tracing */#define T_HPNET 0002 /* print HParse final network */#define T_HPMEMSTAT 0004 /* print memory stacks after HP net built */#define T_HPREMGLUE 0010 /* print progress through Remove Glue */#include "HShell.h" /* HMM ToolKit Modules */#include "HMem.h"#include "HMath.h"#include "HSigP.h"#include "HAudio.h"#include "HWave.h"#include "HVQ.h"#include "HParm.h" #include "HLabel.h"#include "HModel.h"#include "HUtil.h" #include "HDict.h"#include "HNet.h"/* ------------ HParse network types and definitions -------------- */typedef struct _Node *Link;typedef struct { int nUse; /* num sharing this LinkSet */ short numLinks; /* number of links in set */ short maxLinks; /* max number of links */ Link *links; /* array[1..numLinks]of Link */ Ptr user; /* for attaching user defined data */} LinkSet;typedef struct _Node{ LabId modelName; /* name of node */ LabId extName; /* external name (used in compatability mode) */ LinkSet *succ; /* successors to this node */ LinkSet *pred; /* predecessors to this node */ Link chain; /* simple linked list of all nodes */ Ptr user; /* for attaching user defined data */} Node;typedef struct { Link entryNode; Link exitNode; Link chain;} HPNetwork;typedef struct _SubNetDef{ LabId netName; /* variable name (LHS of rule) */ HPNetwork network; /* sub-network (RHS of rule) */ struct _SubNetDef *next;} SubNetDef;/* ------ Network Node Labelling (used in conversion process) -------- */typedef enum {unknown, wdInternal, wdExternal, wdBegin, wdEnd, nullNode} NodeType;typedef struct _NodeInfo{ NodeType nType; /* the type of this node */ Boolean seen; /* flag used when scanning network */ Link history; /* used for word pronunciation expansion */ int nodeNum; /* store node numbers */}NodeInfo;static int trace = 0; /* Trace flags *//* ------------------- Global variables -------------------------- */static LabId enterId; /* LabId of the ENTER node name */static LabId exitId; /* LabId of the EXIT node name */static LabId wdBeginId; /* LabId of WD_BEGIN nodes */static LabId wdEndId; /* LabId of WD_END nodes */static int numWdBegin=0; /* number of WORD_BEGIN nodes */static int numWdEnd=0; /* number of WORD_END nodes */static Boolean v1Compat=FALSE; /* compatability mode? */static Boolean saveLatLM=FALSE; /* output lattice probabilities */static Boolean saveLatBin=FALSE; /* save lattice in binary */ /* ---------------- Configuration Parameters --------------------- */static ConfParam *cParm[MAXGLOBS];static int nParm = 0; /* total num params *//* ---------------- Process Command Line ------------------------- *//* SetConfParms: set conf parms relevant to this tool */void SetConfParms(void){ int i; Boolean b; nParm = GetConfig("HPARSE", TRUE, cParm, MAXGLOBS); if (nParm>0){ if (GetConfInt(cParm,nParm,"TRACE",&i)) trace = i; if (GetConfBool(cParm,nParm,"V1COMPAT",&b)) v1Compat = TRUE; }}void ReportUsage(void){ printf("\nUSAGE: HParse [options] netFile latFile\n\n"); printf(" Option Default\n\n"); printf(" -b output lattice in binary ascii\n"); printf(" -c set V1.x compatability mode off\n"); printf(" -d s output dictionary to file s none\n"); printf(" -l include LM log probs in lattice off\n"); PrintStdOpts(""); printf("\n\n");}static HPNetwork CreateHParseNetwork(char *fname);static void PrintHParseNetwork(HPNetwork *network);static void ConvertHParseNetwork(HPNetwork *network,char *latf,char *dictf);int main(int argc, char *argv[]){ char *netFn,*latFn,*dictFn=NULL; char *s; HPNetwork theNet; if(InitShell(argc,argv,hparse_version,hparse_vc_id)<SUCCESS) HError(3100,"HParse: InitShell failed"); InitMem(); InitLabel(); InitMath(); InitDict(); InitNet(); if (!InfoPrinted() && NumArgs() == 0) ReportUsage(); if (NumArgs() == 0) Exit(0); SetConfParms(); while (NextArg() == SWITCHARG) { s = GetSwtArg(); if (strlen(s)!=1) HError(3119,"HParse: Bad switch %s; must be single letter",s); switch(s[0]){ case 'b': saveLatBin=TRUE; break; case 'c': v1Compat=TRUE; break; case 'd': if (NextArg()!=STRINGARG) HError(3119,"HParse: Output dictionary name expected"); dictFn = GetStrArg(); break; case 'l': saveLatLM=TRUE; break; case 'T': trace = GetChkedInt(0,511,s); break; default: HError(3119,"HParse: Unknown switch %s",s); } } if (NextArg()!=STRINGARG) HError(3119,"HParse: network file name expected"); netFn = GetStrArg(); if (NextArg()!=STRINGARG) HError(3119,"HParse: output lattice file name expected"); latFn = GetStrArg(); if ((dictFn != NULL) && (!v1Compat)) HError(3119,"HParse: Dictionary only valid in compatability mode"); /* Set Up The Network using specification in 'netfn' */ exitId = GetLabId("$$$HPARSE_EXIT",TRUE); enterId = GetLabId("$$$HPARSE_ENTER",TRUE); wdBeginId = GetLabId("WD_BEGIN",TRUE); wdEndId = GetLabId("WD_END",TRUE); if (trace > 0) printf("Creating HParse net from file %s\n",netFn); theNet = CreateHParseNetwork(netFn); if (trace&T_HPNET) PrintHParseNetwork(&theNet); ConvertHParseNetwork(&theNet,latFn,dictFn); Exit(0); return (0); /* never reached -- make compiler happy */}/* --------------------- HParse network building ----------------------- */#define LINKCHUNKSIZE 3#define LINKEXTENTFACTOR 1.5#define MAXIDENT 80typedef char Ident[MAXIDENT+1];#define NODEBLOCK 512#define LSBLOCK 1024static long numLinkSets = 0; /* usage counters */static long numLinks = 0;static long numNodes = 0;static MemHeap nodeHeap;static MemHeap lsHeap;static MemHeap lsChunkHeap;static MemHeap lsLargeHeap;/* ---------------- Node and LinkSet Manipulation ---------------------- *//* CreateLinkSet: allocate space for a LinkSet with >= size slots */LinkSet *CreateLinkSet(int size){ LinkSet *p; Link *q; p = (LinkSet *) New(&lsHeap,sizeof(LinkSet)); if ( size <= LINKCHUNKSIZE) { size = LINKCHUNKSIZE; q = (Link *) New(&lsChunkHeap,LINKCHUNKSIZE*sizeof(Link)); } else /* create a new one */ q = (Link *) New(&lsLargeHeap,size*sizeof(Link)); p->links = q-1; p->user = NULL; p->nUse = 1; /* set usage counter to 1 */ p->numLinks = 0; p->maxLinks = size; ++numLinkSets; numLinks += size; /* update statistics */ return p;}/* FreeLinkSet: free the given linkset */static void FreeLinkSet(LinkSet *p){ Link *q; if (p == NULL) return; if (p->nUse >1 ) { --(p->nUse); return; } q = p->links+1; if (p->maxLinks == LINKCHUNKSIZE) /* give back to the block store */ Dispose(&lsChunkHeap,q); else Dispose(&lsLargeHeap,q); numLinks -= p->maxLinks; --numLinkSets; Dispose(&lsHeap,p);}/* ResizeLinkSet: change number of slots in ls to newSize slots */static void ResizeLinkSet(LinkSet *ls, int newSize){ Link *p; int oldSize; int oldEntries; int i; oldSize = ls->maxLinks; oldEntries = ls->numLinks; if (newSize==0){ numLinks -= ls->maxLinks; if (ls->maxLinks == LINKCHUNKSIZE) Dispose(&lsChunkHeap,ls->links+1); else Dispose(&lsLargeHeap,ls->links+1); ls->numLinks = 0; ls->maxLinks = 0; ls->links = NULL; }else if (ls->maxLinks != LINKCHUNKSIZE) { p = (Link *)New(&lsLargeHeap,newSize*sizeof(Link)); p--; for (i=1; (i<=oldEntries) && (i <= newSize); i++) p[i] = ls->links[i]; Dispose(&lsLargeHeap,ls->links+1); ls->links = p; ls->maxLinks = newSize; numLinks += newSize-oldSize; }else if (newSize > ls->maxLinks) { /* ls->maxLinks == LINKCHUNKSIZE */ p = (Link *)New(&lsLargeHeap,newSize*sizeof(Link)); p--; for (i=1; i<=ls->numLinks; i++) p[i] = ls->links[i]; Dispose(&lsChunkHeap,ls->links+1); ls->links = p; ls->maxLinks = newSize; numLinks += newSize-oldSize; }}/* PrModelName: print name of model p to stdout */static void PrModelName(Link p){ Ident name; if (p->modelName==NULL || p->modelName->name==NULL) strcpy(name,"****"); else if (strlen(p->modelName->name) > MAXIDENT) strcpy(name,"????"); else strcpy(name,p->modelName->name); printf("%s[%03d] ",name, ((int)p % 4000) / 4 );}/* PrintLinkSet: print first n slots of given LinkSet to stdout */static void PrintLinkSet(int n, LinkSet *p){ int i; if (p==NULL || n==0) printf(" ---"); else{ if (n > p->numLinks) n = p->numLinks; printf("[U:%d] ",p->nUse); for (i=1;i<=n;i++) PrModelName(p->links[i]); } printf("\n");}/* CreateNode: create node and link it into given chain */Link CreateNode(LabId name, Link *chain, int maxSucc, int maxPred){ Link p; p = (Link) New(&nodeHeap,sizeof(Node)); p->modelName=name; p->extName = name; p->succ = (maxSucc>0)?CreateLinkSet(maxSucc):NULL; p->pred = (maxPred>0)?CreateLinkSet(maxPred):NULL; p->chain = *chain; *chain=p; p->user = NULL; ++numNodes; return p;}/* FreeNode: release storage allocated to p */static void FreeNode(Link p){ FreeLinkSet(p->succ);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -