⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hdman.c

📁 该压缩包为最新版htk的源代码,htk是现在比较流行的语音处理软件,请有兴趣的朋友下载使用
💻 C
📖 第 1 页 / 共 4 页
字号:
/* ----------------------------------------------------------- *//*                                                             *//*                          ___                                *//*                       |_| | |_/   SPEECH                    *//*                       | | | | \   RECOGNITION               *//*                       =========   SOFTWARE                  */ /*                                                             *//*                                                             *//* ----------------------------------------------------------- *//* developed at:                                               *//*                                                             *//*      Speech Vision and Robotics group                       *//*      Cambridge University Engineering Department            *//*      http://svr-www.eng.cam.ac.uk/                          *//*                                                             *//*      Entropic Cambridge Research Laboratory                 *//*      (now part of Microsoft)                                *//*                                                             *//* ----------------------------------------------------------- *//*         Copyright: Microsoft Corporation                    *//*          1995-2000 Redmond, Washington USA                  *//*                    http://www.microsoft.com                 *//*                                                             *//*              2001  Cambridge University                     *//*                    Engineering Department                   *//*                                                             *//*   Use of this software is governed by a License Agreement   *//*    ** See the file License for the Conditions of Use  **    *//*    **     This banner notice must not be removed      **    *//*                                                             *//* ----------------------------------------------------------- *//*    File: HDMan:   pronunciation dictionary manager          *//* ----------------------------------------------------------- */char *hdman_version = "!HVER!HDMan:   3.3 [CUED 28/04/05]";char *hdman_vc_id = "$Id: HDMan.c,v 1.2 2005/05/12 15:51:28 jal58 Exp $";#include "HShell.h"#include "HMem.h"#include "HMath.h"#include "HWave.h"#include "HLabel.h"#include "HDict.h"/* -------------------------- Trace Flags & Vars ------------------------ */#define T_TOP     00001           /* basic progress reporting */         #define T_WBUF    00002           /* word buffer operations */           #define T_VALI    00004           /* show valid inputs */                #define T_EDW0    00010           /* word level editing */               #define T_EDW1    00020           /* word level editing in detail */     #define T_SCPT    00040           /* print edit scripts */               #define T_NPHN    00100           /* new phone recording */              #define T_DSOP    00200           /* pron deletions */                   #define T_DWOP    00400           /* word deletions */                   static int  trace    = 0;           /* trace level */static ConfParam *cParm[MAXGLOBS];   /* configuration parameters */static int nParm = 0;               /* total num params */#define MAXARGS  100    /* max args in any command *//* MAXPHONES (max phones in any pronunciation) is defined in HDict.h */#define MAXPRONS 100     /* max number of pronunciations per word */#define MAXDICTS 100     /* max number of source dictionaries */#define MAXCONS  20     /* max number of contexts per script */#define MAXPVOC  500    /* max num distinct phones *//* ---------------------- Global Data Structures ------------------------------ */typedef enum {UCWORD, UCPHONE, LCWORD, LCPHONE, DELETEW, DELDEF, FUNCW, DEFCON,              REPLACEP, CONREPLACE, MERGEP, SPLITP, DELETEP, DELSOURCE,               LCTXT, RCTXT,              TCTXT, APPSIL, REMSTRESS, REPLACEW, RAWMODE,               NOCMD} EdOp;static char *cmdMap[] = { "UW","UP","LW","LP","DW","DD","FW","DC",                          "RP","CR","MP","SP","DP","DS",                          "LC","RC",                          "TC","AS","RS","RW","IR"};static int  nCmds = 21;typedef struct{                 /* a single edit command */   EdOp op;   short nArgs;   LabId args[MAXARGS];}EditCmd;typedef struct _ScriptItem{     /* internal rep of a complete edit script */   EditCmd cmd;   struct _ScriptItem *next;}ScriptItem;typedef struct {                /* a single pronunciation */   short nPhone;   float prob;   LabId phone[MAXPHONES];   LabId source;                /* name of source dict */}Pronunciation;typedef struct {   LabId word;                  /* a word + its pronunciations */   LabId outsym;                /* name of output symbol if any */   short nPron;   Pronunciation pron[MAXPRONS];}WordBuf;typedef struct {   Boolean rawMode;             /* Raw input mode */   EdOp wop;                    /* Used to apply UCWORD/LCWORD before sorting inputs */   int numCons;                 /* number of context defs */   EditCmd contexts[MAXCONS];   /* array of context defs */   ScriptItem *script;          /* edit script for this input dictionary */   int headSkip;                /* num header lines to skip on input */   Source src;                  /* input file source */   Boolean isPipe;              /* dictionary is input thru pipe */   char *name;                  /* full path of dictionary file */   LabId source;                /* name of source dict */   WordBuf wbuf;                /* current input word */   LabId nextWord;              /* next input word - for lookahead */   LabId nextOutSym;            /* next input output sym - for lookahead */   Pronunciation pbuf;          /* and its pronunciation - for lookahead */   int totalWords;              /* total words in this source */   int totalProns;              /* total prons in this source */   int wordsUsed;               /* num words actually used */   int pronsUsed;               /* num prons actually used */}DBuffer;/* Global storage */static int nInputs = 0;              /* number of input dictionaries */static DBuffer inbuf[MAXDICTS];      /* the input buffers and associated scripts */static DBuffer outbuf;               /* the output buffer with its global script */static FILE *outfile = NULL;         /* The output file */static int nWords = 0;               /* number of words in word list */static LabId *wList = NULL;          /* filter word list */static FILE *newPhones = NULL;       /* file of newly created phones */static char *scriptDir = NULL;       /* directory to look for scripts */static char *gScriptFN = NULL;       /* name of global edit script */static char *wListFN = NULL;         /* name of word list file */static char *pListFN = NULL;         /* name of phone list file */static int numOut = 0;               /* num words processed */static int numMissing = 0;           /* num words not found */static int numActive;                /* num active input dictionaries */static int widx = 0;                 /* next word to take from wordList */static LabId required;               /* current required word */static MemHeap memStak;              /* all storage allocated in this *//* Flags etc */static Boolean mergeProns = FALSE;   /* merge prons from all sources */static Boolean nullOutput = FALSE;   /* suppress generation of output dict */static Boolean incOutSyms = FALSE;   /* write out extra field */static Boolean incProbs = FALSE;     /* write out extra field */static Boolean tagSources = FALSE;   /* tag output words with name of source dict */static char commentChars[10] = "#";  /* default dictionary comment char */static char wdBndSym[10] = "#";      /* word boundary symbol *//* Global names */static LabId asterix;                /* LabId of a "*" */static LabId wdBnd;                  /* LabId of word boundary symbol  */   static LabId cmuId;                  /* "cmu" *//* Log Information */static Boolean isLogging = FALSE;static FILE *logF = NULL;            /* log file if any */static int nNewPhones = 0;               /* num new phones encountered */static int nDefPhones = 0;               /* num predefined phones */static LabId newList[MAXPVOC];       /* list of new phones encountered */static LabId defList[MAXPVOC];       /* list of predefined phones *//* ------------------ Process Command Line ------------------------- */void Summary(void){   printf("\nHDMan Command Summary\n\n");    printf("AS A B ...   - append silence models A, B, etc to each pronunciation\n");   printf("CR X A Y B   - replace phone Y in the context of A_B by X.  Contexts\n");   printf("               may include '*' [any] or defined context set (see DC)\n");   printf("DC X A B ... - define set A B .... as context X\n");   printf("DD X A B ... - delete definition for word X starting with phones A B ...\n");   printf("DP A B C ... - delete any occurrences of phones A or B or C ...\n");   printf("DS A         - delete pron from source A unless it is only one\n");   printf("DW X Y Z ... - delete words (& definitions) X,Y,Z\n");   printf("FW X Y Z ... - define X Y Z as function words and change\n");   printf("               each phone in the definition to a function word\n");   printf("               specific phone. In word W phone A becomes W.A etc.\n");   printf("IR           - select raw input mode.  Each input word is single white\n");   printf("               space delimited string (',\" and \\ not treated specially).\n");   printf("LC [X]       - convert phones to Left-context dependent. If X given\n");   printf("               then 1st phone in word -> X-a otherwise it is unchanged\n");   printf("LP           - convert all phones to lowercase\n");   printf("LW           - convert all words to lowercase\n");   printf("MP X A B ... - merge any sequence of phones A B .. by X\n");   printf("RC [X]       - convert phones to riGht-context dependent. If X given\n");   printf("               then last phone in word -> z+X otherwise it is unchanged\n");   printf("RP X A B ... - replace all occurrences of phones A or B .. by X\n");   printf("RS system    - remove stress marking: system = cmu\n");   printf("RW X A B ... - replace all occurrences of word A or B .. by X\n");   printf("SP X A B ... - split phone X into sequence A B C ...\n");   printf("TC [X [Y]]   - convert phones to Triphones. If X is given then 1st\n");   printf("               phone -> X-a+b otherwise it is unchanged. If Y is given\n");   printf("               last phone -> y-z+Y otherwise if X is given\n");   printf("               then it -> y-z+X otherwise it is unchanged.\n");   printf("UP           - convert all phones to uppercase\n");   printf("UW           - convert all words to uppercase\n\n");   Exit(0);}void ReportUsage(void){   printf("\nUSAGE: HDMan [options] newDict srcDict1 srcDict2 ... \n\n");   printf(" Option                                       Default\n\n");   printf(" -a s    chars in s start comment lines       #\n");   printf(" -b s    define word boundary symbol          #\n");   printf(" -e dir  look for edit scripts in dir\n");   printf(" -g f    global dictionary is in file f       global.ded\n");   printf(" -h i j  skip 1st i lines of j'th dic file    0\n");   printf(" -i      include output symbols               off\n");   printf(" -j      include pronunciation probabilities  off\n");   printf(" -l s    write log file in file s             no logging\n");   printf(" -m      merge prons from all sources         first_only\n");   printf(" -n f    output union of all phones to f      off\n");   printf(" -o      disable dictionary output            enabled\n");   printf(" -p f    load phone list stored in f\n");   printf(" -t      tag output words with source         off\n");   printf(" -w f    load word list stored in f\n");   PrintStdOpts("Q");   printf("\n\n");}int main(int argc, char *argv[]){   char *s,*fn;   int i,skip;   void Initialise(void);   void EditFile(char *labfn);   void CreateBuffer(char *dName, Boolean isInput);   void EditAndMerge(void);   void LoadWordList(void);   void LoadPhoneList(void);   void PrintLog(void);   if(InitShell(argc,argv,hdman_version,hdman_vc_id)<SUCCESS)      HError(1400,"HDMan: InitShell failed");   InitMem();   InitMath();   InitWave();  InitLabel();   if (!InfoPrinted() && NumArgs() == 0)      ReportUsage();   if (NumArgs() == 0) Exit(0);   for (i=0; i<MAXDICTS; i++)      inbuf[i].headSkip = 0;   Initialise();   while (NextArg() == SWITCHARG) {      s = GetSwtArg();      if (strlen(s)!=1)          HError(1419,"HDMan: Bad switch %s; must be single letter",s);      switch(s[0]){      case 'a':         if (NextArg() != STRINGARG)            HError(1419,"HDMan: String of comment chars expected");         strcpy(commentChars,GetStrArg());         break;      case 'b':         if (NextArg() != STRINGARG)            HError(1419,"HDMan: Word boundary symbol expected");         strcpy(wdBndSym,GetStrArg());         break;      case 'e':         if (NextArg() != STRINGARG)            HError(1419,"HDMan: edit script directory expected");         scriptDir = GetStrArg();          break;        case 'g':         if (NextArg() != STRINGARG)            HError(1419,"HDMan: name of global edit script expected");         gScriptFN = GetStrArg();          break;        case 'h':         skip = GetChkedInt(0,1000,s);          i = GetChkedInt(1,MAXDICTS,s);          inbuf[i-1].headSkip = skip;         break;      case 'i':         incOutSyms = TRUE; break;      case 'j':         incProbs = TRUE; break;      case 'l':         if (NextArg() != STRINGARG)            HError(1419,"HDMan: Log file name expected");         fn = GetStrArg();         if ((logF = fopen(fn,"w")) == NULL)            HError(1411,"HDMan: Cannot create log file %s",fn);         isLogging = TRUE;         break;      case 'm':          mergeProns = TRUE; break;      case 'n':         if (NextArg() != STRINGARG)            HError(1419,"HDMan: New phone list file name expected");         fn = GetStrArg();         if ((newPhones = fopen(fn,"w")) == NULL)            HError(1411,"HDMan: Cannot create new phone file %s",fn);         break;      case 'o':          nullOutput = TRUE; break;      case 'p':         if (NextArg() != STRINGARG)            HError(1419,"HDMan: name of phone list expected");         pListFN = GetStrArg();          break;        case 't':          tagSources = TRUE; break;      case 'w':         if (NextArg() != STRINGARG)            HError(1419,"HDMan: name of word list expected");         wListFN = GetStrArg();          break;        case 'Q':         Summary(); break;      case 'T':         trace = GetChkedInt(0,01777,s); break;      default:         HError(1419,"HDMan: Unknown switch %s",s);      }   }   wdBnd = GetLabId(wdBndSym,TRUE);   if (NumArgs() < 2)      ReportUsage();   if (NextArg() != STRINGARG)      HError(1419,"HDMan: Output dictionary file name expected");   CreateBuffer(GetStrArg(),FALSE);   i = 0;   while (NumArgs()>0){      if (NextArg() != STRINGARG)         HError(1419,"HDMan: Input dictionary file name expected");      if( ++i > MAXDICTS )         HError(1430,"HDMan: Number of srcDicts exceeded %d",MAXDICTS);      CreateBuffer(GetStrArg(),TRUE);   }   if (wListFN != NULL) LoadWordList();   if (pListFN != NULL) LoadPhoneList();   EditAndMerge();   if (isLogging)      PrintLog();   Exit(0);   return (0);          /* never reached -- make compiler happy */}/* --------------------- Initialisation ----------------------- *//* SetConfParms: set conf parms relevant to HLEd */void SetConfParms(void){   int i;      nParm = GetConfig("HDMAN", TRUE, cParm, MAXGLOBS);   if (nParm>0) {      if (GetConfInt(cParm,nParm,"TRACE",&i)) trace = i;   }}/* Initialise: confparms and globals */void Initialise(void){   SetConfParms();   asterix = GetLabId("*",TRUE);   cmuId = GetLabId("cmu",TRUE);   CreateHeap(&memStak, "memStak", MSTAK, 1, 1.4, 10000, 100000);}/* ------------------- New Phone Recording -------------------- *//* LoadPhoneList: load list of phones in pListFN */void LoadPhoneList(void){   Source src;   char buf[MAXSTRLEN];   LabId id;      if(InitSource(pListFN,&src,NoFilter)<SUCCESS)      HError(1410,"LoadPhoneList: Can't open file %s", pListFN);   if (trace&T_NPHN)      printf("Loading predefined phones from file %s\n",pListFN);   while(ReadString(&src,buf)) {      if (nDefPhones == MAXPVOC)         HError(1430,"LoadPhoneList: MAXPVOC exceeded");      id = defList[nDefPhones++] = GetLabId(buf,TRUE);      SkipLine(&src);      id->aux = (Ptr)-1;   }   CloseSource(&src);}/* PutPhone: if given output phone new then output it to newPhones *//*           aux = 0, if undef phone, aux = -1 if defined  *//*           aux = -2, if undef and printed, aux = -3 if defd and printed */void PutPhone(LabId id){   char buf[80];   LabId baseId;   if (((int)id->aux == 0 || (int)id->aux == -1) && newPhones != NULL) {      fprintf(newPhones,"%s\n",ReWriteString(id->name,NULL,ESCAPE_CHAR));      /* avoid printing it again */      id->aux = (Ptr)((int)id->aux - 2);   }   strcpy(buf,id->name);   TriStrip(buf);   baseId=GetLabId(buf,TRUE);   if ((int)baseId->aux <= 0 ) {  /* not seen this label before */      if ((int)baseId->aux == 0 || (int)baseId->aux == -2){         if (nNewPhones == MAXPVOC)            HError(1430,"PutPhone: MAXPVOC exceeded");         newList[nNewPhones++] = baseId;      }      baseId->aux = (Ptr)0;               }   baseId->aux = (Ptr)((int)baseId->aux + 1);}/* ListNewPhones: list new phones to log file along with counts */void ListNewPhones(void){   int i,c;   if (nDefPhones>0){      fprintf(logF,"Def Phone Usage Counts\n");      fprintf(logF,"---------------------\n");      for (i=0; i<nDefPhones; i++) {         c = (int)defList[i]->aux;         if (c<0) c=0;         fprintf(logF," %2d. %-5s : %5d\n",i+1,defList[i]->name,c);      }   }   if (nNewPhones>0){      fprintf(logF,"New Phone Usage Counts\n");      fprintf(logF,"---------------------\n");      for (i=0; i<nNewPhones; i++){         c = (int)newList[i]->aux;         if (c<0) c=0;         fprintf(logF," %2d. %-5s : %5d\n",i+1, newList[i]->name,c);      }   }}/* ------------------- Load and Print Script ------------------ *//* PrintIdList: print list of ids */void PrintIdList(LabId *i){   while (*i != NULL) {      printf(" %s",(*i)->name);      ++i;   }}/* PrintScript: prints the given script - for tracing only */void PrintScript(char *name, DBuffer *db){   ScriptItem *i;   int j=0;   LabId src;   

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -