📄 hdman.c
字号:
/* ----------------------------------------------------------- *//* *//* ___ *//* |_| | |_/ SPEECH *//* | | | | \ RECOGNITION *//* ========= SOFTWARE */ /* *//* *//* ----------------------------------------------------------- *//* developed at: *//* *//* Speech Vision and Robotics group *//* Cambridge University Engineering Department *//* http://svr-www.eng.cam.ac.uk/ *//* *//* Entropic Cambridge Research Laboratory *//* (now part of Microsoft) *//* *//* ----------------------------------------------------------- *//* Copyright: Microsoft Corporation *//* 1995-2000 Redmond, Washington USA *//* http://www.microsoft.com *//* *//* 2001 Cambridge University *//* Engineering Department *//* *//* Use of this software is governed by a License Agreement *//* ** See the file License for the Conditions of Use ** *//* ** This banner notice must not be removed ** *//* *//* ----------------------------------------------------------- *//* File: HDMan: pronunciation dictionary manager *//* ----------------------------------------------------------- */char *hdman_version = "!HVER!HDMan: 3.3 [CUED 28/04/05]";char *hdman_vc_id = "$Id: HDMan.c,v 1.2 2005/05/12 15:51:28 jal58 Exp $";#include "HShell.h"#include "HMem.h"#include "HMath.h"#include "HWave.h"#include "HLabel.h"#include "HDict.h"/* -------------------------- Trace Flags & Vars ------------------------ */#define T_TOP 00001 /* basic progress reporting */ #define T_WBUF 00002 /* word buffer operations */ #define T_VALI 00004 /* show valid inputs */ #define T_EDW0 00010 /* word level editing */ #define T_EDW1 00020 /* word level editing in detail */ #define T_SCPT 00040 /* print edit scripts */ #define T_NPHN 00100 /* new phone recording */ #define T_DSOP 00200 /* pron deletions */ #define T_DWOP 00400 /* word deletions */ static int trace = 0; /* trace level */static ConfParam *cParm[MAXGLOBS]; /* configuration parameters */static int nParm = 0; /* total num params */#define MAXARGS 100 /* max args in any command *//* MAXPHONES (max phones in any pronunciation) is defined in HDict.h */#define MAXPRONS 100 /* max number of pronunciations per word */#define MAXDICTS 100 /* max number of source dictionaries */#define MAXCONS 20 /* max number of contexts per script */#define MAXPVOC 500 /* max num distinct phones *//* ---------------------- Global Data Structures ------------------------------ */typedef enum {UCWORD, UCPHONE, LCWORD, LCPHONE, DELETEW, DELDEF, FUNCW, DEFCON, REPLACEP, CONREPLACE, MERGEP, SPLITP, DELETEP, DELSOURCE, LCTXT, RCTXT, TCTXT, APPSIL, REMSTRESS, REPLACEW, RAWMODE, NOCMD} EdOp;static char *cmdMap[] = { "UW","UP","LW","LP","DW","DD","FW","DC", "RP","CR","MP","SP","DP","DS", "LC","RC", "TC","AS","RS","RW","IR"};static int nCmds = 21;typedef struct{ /* a single edit command */ EdOp op; short nArgs; LabId args[MAXARGS];}EditCmd;typedef struct _ScriptItem{ /* internal rep of a complete edit script */ EditCmd cmd; struct _ScriptItem *next;}ScriptItem;typedef struct { /* a single pronunciation */ short nPhone; float prob; LabId phone[MAXPHONES]; LabId source; /* name of source dict */}Pronunciation;typedef struct { LabId word; /* a word + its pronunciations */ LabId outsym; /* name of output symbol if any */ short nPron; Pronunciation pron[MAXPRONS];}WordBuf;typedef struct { Boolean rawMode; /* Raw input mode */ EdOp wop; /* Used to apply UCWORD/LCWORD before sorting inputs */ int numCons; /* number of context defs */ EditCmd contexts[MAXCONS]; /* array of context defs */ ScriptItem *script; /* edit script for this input dictionary */ int headSkip; /* num header lines to skip on input */ Source src; /* input file source */ Boolean isPipe; /* dictionary is input thru pipe */ char *name; /* full path of dictionary file */ LabId source; /* name of source dict */ WordBuf wbuf; /* current input word */ LabId nextWord; /* next input word - for lookahead */ LabId nextOutSym; /* next input output sym - for lookahead */ Pronunciation pbuf; /* and its pronunciation - for lookahead */ int totalWords; /* total words in this source */ int totalProns; /* total prons in this source */ int wordsUsed; /* num words actually used */ int pronsUsed; /* num prons actually used */}DBuffer;/* Global storage */static int nInputs = 0; /* number of input dictionaries */static DBuffer inbuf[MAXDICTS]; /* the input buffers and associated scripts */static DBuffer outbuf; /* the output buffer with its global script */static FILE *outfile = NULL; /* The output file */static int nWords = 0; /* number of words in word list */static LabId *wList = NULL; /* filter word list */static FILE *newPhones = NULL; /* file of newly created phones */static char *scriptDir = NULL; /* directory to look for scripts */static char *gScriptFN = NULL; /* name of global edit script */static char *wListFN = NULL; /* name of word list file */static char *pListFN = NULL; /* name of phone list file */static int numOut = 0; /* num words processed */static int numMissing = 0; /* num words not found */static int numActive; /* num active input dictionaries */static int widx = 0; /* next word to take from wordList */static LabId required; /* current required word */static MemHeap memStak; /* all storage allocated in this *//* Flags etc */static Boolean mergeProns = FALSE; /* merge prons from all sources */static Boolean nullOutput = FALSE; /* suppress generation of output dict */static Boolean incOutSyms = FALSE; /* write out extra field */static Boolean incProbs = FALSE; /* write out extra field */static Boolean tagSources = FALSE; /* tag output words with name of source dict */static char commentChars[10] = "#"; /* default dictionary comment char */static char wdBndSym[10] = "#"; /* word boundary symbol *//* Global names */static LabId asterix; /* LabId of a "*" */static LabId wdBnd; /* LabId of word boundary symbol */ static LabId cmuId; /* "cmu" *//* Log Information */static Boolean isLogging = FALSE;static FILE *logF = NULL; /* log file if any */static int nNewPhones = 0; /* num new phones encountered */static int nDefPhones = 0; /* num predefined phones */static LabId newList[MAXPVOC]; /* list of new phones encountered */static LabId defList[MAXPVOC]; /* list of predefined phones *//* ------------------ Process Command Line ------------------------- */void Summary(void){ printf("\nHDMan Command Summary\n\n"); printf("AS A B ... - append silence models A, B, etc to each pronunciation\n"); printf("CR X A Y B - replace phone Y in the context of A_B by X. Contexts\n"); printf(" may include '*' [any] or defined context set (see DC)\n"); printf("DC X A B ... - define set A B .... as context X\n"); printf("DD X A B ... - delete definition for word X starting with phones A B ...\n"); printf("DP A B C ... - delete any occurrences of phones A or B or C ...\n"); printf("DS A - delete pron from source A unless it is only one\n"); printf("DW X Y Z ... - delete words (& definitions) X,Y,Z\n"); printf("FW X Y Z ... - define X Y Z as function words and change\n"); printf(" each phone in the definition to a function word\n"); printf(" specific phone. In word W phone A becomes W.A etc.\n"); printf("IR - select raw input mode. Each input word is single white\n"); printf(" space delimited string (',\" and \\ not treated specially).\n"); printf("LC [X] - convert phones to Left-context dependent. If X given\n"); printf(" then 1st phone in word -> X-a otherwise it is unchanged\n"); printf("LP - convert all phones to lowercase\n"); printf("LW - convert all words to lowercase\n"); printf("MP X A B ... - merge any sequence of phones A B .. by X\n"); printf("RC [X] - convert phones to riGht-context dependent. If X given\n"); printf(" then last phone in word -> z+X otherwise it is unchanged\n"); printf("RP X A B ... - replace all occurrences of phones A or B .. by X\n"); printf("RS system - remove stress marking: system = cmu\n"); printf("RW X A B ... - replace all occurrences of word A or B .. by X\n"); printf("SP X A B ... - split phone X into sequence A B C ...\n"); printf("TC [X [Y]] - convert phones to Triphones. If X is given then 1st\n"); printf(" phone -> X-a+b otherwise it is unchanged. If Y is given\n"); printf(" last phone -> y-z+Y otherwise if X is given\n"); printf(" then it -> y-z+X otherwise it is unchanged.\n"); printf("UP - convert all phones to uppercase\n"); printf("UW - convert all words to uppercase\n\n"); Exit(0);}void ReportUsage(void){ printf("\nUSAGE: HDMan [options] newDict srcDict1 srcDict2 ... \n\n"); printf(" Option Default\n\n"); printf(" -a s chars in s start comment lines #\n"); printf(" -b s define word boundary symbol #\n"); printf(" -e dir look for edit scripts in dir\n"); printf(" -g f global dictionary is in file f global.ded\n"); printf(" -h i j skip 1st i lines of j'th dic file 0\n"); printf(" -i include output symbols off\n"); printf(" -j include pronunciation probabilities off\n"); printf(" -l s write log file in file s no logging\n"); printf(" -m merge prons from all sources first_only\n"); printf(" -n f output union of all phones to f off\n"); printf(" -o disable dictionary output enabled\n"); printf(" -p f load phone list stored in f\n"); printf(" -t tag output words with source off\n"); printf(" -w f load word list stored in f\n"); PrintStdOpts("Q"); printf("\n\n");}int main(int argc, char *argv[]){ char *s,*fn; int i,skip; void Initialise(void); void EditFile(char *labfn); void CreateBuffer(char *dName, Boolean isInput); void EditAndMerge(void); void LoadWordList(void); void LoadPhoneList(void); void PrintLog(void); if(InitShell(argc,argv,hdman_version,hdman_vc_id)<SUCCESS) HError(1400,"HDMan: InitShell failed"); InitMem(); InitMath(); InitWave(); InitLabel(); if (!InfoPrinted() && NumArgs() == 0) ReportUsage(); if (NumArgs() == 0) Exit(0); for (i=0; i<MAXDICTS; i++) inbuf[i].headSkip = 0; Initialise(); while (NextArg() == SWITCHARG) { s = GetSwtArg(); if (strlen(s)!=1) HError(1419,"HDMan: Bad switch %s; must be single letter",s); switch(s[0]){ case 'a': if (NextArg() != STRINGARG) HError(1419,"HDMan: String of comment chars expected"); strcpy(commentChars,GetStrArg()); break; case 'b': if (NextArg() != STRINGARG) HError(1419,"HDMan: Word boundary symbol expected"); strcpy(wdBndSym,GetStrArg()); break; case 'e': if (NextArg() != STRINGARG) HError(1419,"HDMan: edit script directory expected"); scriptDir = GetStrArg(); break; case 'g': if (NextArg() != STRINGARG) HError(1419,"HDMan: name of global edit script expected"); gScriptFN = GetStrArg(); break; case 'h': skip = GetChkedInt(0,1000,s); i = GetChkedInt(1,MAXDICTS,s); inbuf[i-1].headSkip = skip; break; case 'i': incOutSyms = TRUE; break; case 'j': incProbs = TRUE; break; case 'l': if (NextArg() != STRINGARG) HError(1419,"HDMan: Log file name expected"); fn = GetStrArg(); if ((logF = fopen(fn,"w")) == NULL) HError(1411,"HDMan: Cannot create log file %s",fn); isLogging = TRUE; break; case 'm': mergeProns = TRUE; break; case 'n': if (NextArg() != STRINGARG) HError(1419,"HDMan: New phone list file name expected"); fn = GetStrArg(); if ((newPhones = fopen(fn,"w")) == NULL) HError(1411,"HDMan: Cannot create new phone file %s",fn); break; case 'o': nullOutput = TRUE; break; case 'p': if (NextArg() != STRINGARG) HError(1419,"HDMan: name of phone list expected"); pListFN = GetStrArg(); break; case 't': tagSources = TRUE; break; case 'w': if (NextArg() != STRINGARG) HError(1419,"HDMan: name of word list expected"); wListFN = GetStrArg(); break; case 'Q': Summary(); break; case 'T': trace = GetChkedInt(0,01777,s); break; default: HError(1419,"HDMan: Unknown switch %s",s); } } wdBnd = GetLabId(wdBndSym,TRUE); if (NumArgs() < 2) ReportUsage(); if (NextArg() != STRINGARG) HError(1419,"HDMan: Output dictionary file name expected"); CreateBuffer(GetStrArg(),FALSE); i = 0; while (NumArgs()>0){ if (NextArg() != STRINGARG) HError(1419,"HDMan: Input dictionary file name expected"); if( ++i > MAXDICTS ) HError(1430,"HDMan: Number of srcDicts exceeded %d",MAXDICTS); CreateBuffer(GetStrArg(),TRUE); } if (wListFN != NULL) LoadWordList(); if (pListFN != NULL) LoadPhoneList(); EditAndMerge(); if (isLogging) PrintLog(); Exit(0); return (0); /* never reached -- make compiler happy */}/* --------------------- Initialisation ----------------------- *//* SetConfParms: set conf parms relevant to HLEd */void SetConfParms(void){ int i; nParm = GetConfig("HDMAN", TRUE, cParm, MAXGLOBS); if (nParm>0) { if (GetConfInt(cParm,nParm,"TRACE",&i)) trace = i; }}/* Initialise: confparms and globals */void Initialise(void){ SetConfParms(); asterix = GetLabId("*",TRUE); cmuId = GetLabId("cmu",TRUE); CreateHeap(&memStak, "memStak", MSTAK, 1, 1.4, 10000, 100000);}/* ------------------- New Phone Recording -------------------- *//* LoadPhoneList: load list of phones in pListFN */void LoadPhoneList(void){ Source src; char buf[MAXSTRLEN]; LabId id; if(InitSource(pListFN,&src,NoFilter)<SUCCESS) HError(1410,"LoadPhoneList: Can't open file %s", pListFN); if (trace&T_NPHN) printf("Loading predefined phones from file %s\n",pListFN); while(ReadString(&src,buf)) { if (nDefPhones == MAXPVOC) HError(1430,"LoadPhoneList: MAXPVOC exceeded"); id = defList[nDefPhones++] = GetLabId(buf,TRUE); SkipLine(&src); id->aux = (Ptr)-1; } CloseSource(&src);}/* PutPhone: if given output phone new then output it to newPhones *//* aux = 0, if undef phone, aux = -1 if defined *//* aux = -2, if undef and printed, aux = -3 if defd and printed */void PutPhone(LabId id){ char buf[80]; LabId baseId; if (((int)id->aux == 0 || (int)id->aux == -1) && newPhones != NULL) { fprintf(newPhones,"%s\n",ReWriteString(id->name,NULL,ESCAPE_CHAR)); /* avoid printing it again */ id->aux = (Ptr)((int)id->aux - 2); } strcpy(buf,id->name); TriStrip(buf); baseId=GetLabId(buf,TRUE); if ((int)baseId->aux <= 0 ) { /* not seen this label before */ if ((int)baseId->aux == 0 || (int)baseId->aux == -2){ if (nNewPhones == MAXPVOC) HError(1430,"PutPhone: MAXPVOC exceeded"); newList[nNewPhones++] = baseId; } baseId->aux = (Ptr)0; } baseId->aux = (Ptr)((int)baseId->aux + 1);}/* ListNewPhones: list new phones to log file along with counts */void ListNewPhones(void){ int i,c; if (nDefPhones>0){ fprintf(logF,"Def Phone Usage Counts\n"); fprintf(logF,"---------------------\n"); for (i=0; i<nDefPhones; i++) { c = (int)defList[i]->aux; if (c<0) c=0; fprintf(logF," %2d. %-5s : %5d\n",i+1,defList[i]->name,c); } } if (nNewPhones>0){ fprintf(logF,"New Phone Usage Counts\n"); fprintf(logF,"---------------------\n"); for (i=0; i<nNewPhones; i++){ c = (int)newList[i]->aux; if (c<0) c=0; fprintf(logF," %2d. %-5s : %5d\n",i+1, newList[i]->name,c); } }}/* ------------------- Load and Print Script ------------------ *//* PrintIdList: print list of ids */void PrintIdList(LabId *i){ while (*i != NULL) { printf(" %s",(*i)->name); ++i; }}/* PrintScript: prints the given script - for tracing only */void PrintScript(char *name, DBuffer *db){ ScriptItem *i; int j=0; LabId src;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -