⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ladapt.c

📁 该压缩包为最新版htk的源代码,htk是现在比较流行的语音处理软件,请有兴趣的朋友下载使用
💻 C
📖 第 1 页 / 共 2 页
字号:
/* ----------------------------------------------------------- *//*                                                             *//*                          ___                                *//*                       |_| | |_/   SPEECH                    *//*                       | | | | \   RECOGNITION               *//*                       =========   SOFTWARE                  */ /*                                                             *//*                                                             *//* ----------------------------------------------------------- *//* developed at:                                               *//*                                                             *//*      Speech Vision and Robotics group                       *//*      Cambridge University Engineering Department            *//*      http://svr-www.eng.cam.ac.uk/                          *//*                                                             *//* main authors: Valtcho Valtchev, Steve Young,                *//*               Julian Odell, Gareth Moore                    *//* ----------------------------------------------------------- *//*         Copyright:                                          *//*                                                             *//*          1994-2002 Cambridge University                     *//*                    Engineering Department                   *//*                                                             *//*   Use of this software is governed by a License Agreement   *//*    ** See the file License for the Conditions of Use  **    *//*    **     This banner notice must not be removed      **    *//*                                                             *//* ----------------------------------------------------------- *//*         File: LAdapt.c - adapt LM with new text             *//* ----------------------------------------------------------- */char *ladapt_version = "!HVER!LAdapt:   3.3 [CUED 28/04/05]";char *ladapt_vc_id = "$Id: LAdapt.c,v 1.1.1.1 2005/05/12 10:52:19 jal58 Exp $";#include "HShell.h"#include "HMem.h"#include "HMath.h"#include "HWave.h"#include "HLabel.h"#include "LWMap.h"#include "LCMap.h"#include "LGBase.h"#include "LUtil.h"#include "LModel.h"#include "LPCalc.h"#include "LPMerge.h"/*    This tool processes source texts and updates an existing LM.   Text passes through a window word by word and each n-gram is recorded.   The text in the window can also be modified by match and replace rules    and in this case the ngrams in the original matched text are stored    in a set of 'negative' gram files and the ngrams in the modified text    are stored in a set of 'positive' gram files.*/   /* -------------------------- Trace Flags ------------------------ */static int trace = 0;#define T_TOP  0001     /* Top Level tracing */#define T_SAV  0002     /* Monitor Buffer Saving */#define T_INP  0004     /* Trace word input stream */#define T_SHR  0010     /* Trace shift register input *//* ---------------- Configuration Parameters --------------------- */static ConfParam *cParm[MAXGLOBS];static int nParm = 0;   /* total num params *//* ------------------- Word Shift Registers ----------------------- */typedef struct {   int used;            /* actual words in register */   UInt ng[MAXNG+1];    /* ng[0] is oldest word */   NGBuffer *ngb;       /* output ngram buffer */} ShiftReg;/* ---------------------- Global Variables ----------------------- */static int nSize     = 3;           /* ngram size */static int ngbSize   = 2000000;     /* ngram buffer size */static int newWords  =  100000;     /* max new words to accommodate */static char *rootFN  = "gram";      /* gbase root file name */static char *outFN   = NULL;        /* output LM filename */static char *dbsDir  = NULL;        /* directory to store gbase files */static char *wlistFN = NULL;        /* file containing edit rules */static char *omapFN  = "wmap";      /* output word map file name */static char *txtSrc  = NULL;        /* gram file text source descriptor */static MemHeap langHeap;            /* memory for NGBuffers and LMs*/static BackOffLM *newLM;            /* the generated LM */static BackOffLM *adpLM;            /* the adapted final LM */static WordMap   *tgtVoc = NULL;    /* target vocabulary */          static WordMap   wlist;             /* restricting the word list */static ShiftReg  stdBuf;            /* used for normal N-gram processing */static Boolean pruneWords = FALSE;    /* prune input text according to word list */static Boolean saveFiles = TRUE;      /* save intermediate files */ static Boolean htkEscape = TRUE;      /* string escaping for output word map */static Boolean mapUpdated;            /* used optimise sort/saving */static Boolean processText = TRUE;    /* generate model from raw text data */static char *defMapName = "LAdapt";   /* map name */static LabId unkId = NULL;                   /* OOV marker */static char  unkStr[256] = DEF_UNKNOWNNAME;  /* OOV class string *//* This MAX_NGRAM_FILES limit is arbitrary and can be removed */#define MAX_NGRAM_FILES  4096static int     nLModel;                     /* number of loaded LMs */static LMInfo  lmInfo[MAX_LMODEL];          /* array of loaded LMs */static WordMap    wmap;              /* word map for this corpus */static NGInputSet inSet;             /* input set of files */static BuildInfo  binfo;             /* build parameters *//* ---------------- Process Command Line ------------------------- *//* SetConfParms: set conf parms relevant to this tool */void SetConfParms(void){   int i;   char s[256];      nParm = GetConfig("LADAPT", TRUE, cParm, MAXGLOBS);   if (nParm>0){      if (GetConfInt(cParm,nParm, "TRACE",&i))      trace = i;      if (GetConfStr(cParm,nParm, "UNKNOWNNAME",s)) strcpy(unkStr,s);   }   }char *ReturnLMName(int fmt){   switch(fmt) {      case LMF_TEXT:	 return LM_TXT_TEXT;      case LMF_BINARY:	 return LM_TXT_BINARY;      case LMF_ULTRA:	 return LM_TXT_ULTRA;      default:	 return LM_TXT_OTHER;   }   }void ReportUsage(void){   printf("\nUSAGE: LAdapt [options] langModel txtfile ....\n\n");   printf(" Option                                       Default\n\n");   printf(" -a n    allow n new words in input text      %d\n", newWords);   printf(" -b n    set ngram buffer size                %d\n", ngbSize);   printf(" -c n c  set pruning for n-gram to c          %d\n", DEF_CUTOFF);   printf(" -d s    set root n-gram data file name       %s\n", rootFN);   printf(" -f s    set output LM format to s            %s\n", ReturnLMName(DEF_SAVEFMT));   printf(" -g      use existing n-gram files            off\n");   printf(" -i f s  interpolate with model s, weight f   off\n");   printf(" -j n c  set weighted discount pruning to c   off\n");   printf(" -n n    set n-gram size                      %d\n", nSize);#ifndef HTK_TRANSCRIBER   printf(" -s s    store s in gram header source flds   none\n");   printf(" -t      use Turing-Good discounting          off\n");#endif   printf(" -w fn   load word list from fn               none\n");#ifndef HTK_TRANSCRIBER   printf(" -x      save model with counts               off\n");#endif   PrintStdOpts("");   printf("\n\n");}int main(int argc, char *argv[]){   int i;   char *c,*s,*fn;   char sBuf[256],fmt[256];   void       Initialise(void);   void       ProcessText(char *fn,Boolean lastFile);   Boolean    Exists(char *fn);   BackOffLM *CombineModels(MemHeap *heap,LMInfo *lmi,int nLModel,int nSize,WordMap *wl) ;   InitShell(argc,argv,ladapt_version,ladapt_vc_id);   InitMem();   InitMath();   InitWave();   InitLabel();   InitLUtil();   InitWMap();   InitGBase();   InitLModel();   InitPCalc();   InitPMerge();   SetConfParms();   if (!InfoPrinted() && NumArgs() == 0)      ReportUsage();   if (NumArgs() == 0) Exit(EXIT_SUCCESS);   InitBuildInfo(&binfo);    binfo.dctype = DC_ABSOLUTE;   nLModel = 1;   while (NextArg() == SWITCHARG) {      s = GetSwtArg();      if (strlen(s)!=1)          HError(16419,"Bad switch %s; must be single letter",s);      switch(s[0]){         case 'a':            newWords = GetChkedInt(10,10000000,s); break;         case 'b':            ngbSize = GetChkedInt(10,10000000,s); break;         case 'c':            i = GetChkedInt(2,LM_NSIZE,s); 	    binfo.cutOff[i] = GetChkedInt(0,1000,s);	    break;         case 'd':            if (NextArg()!=STRINGARG)               HError(16419,"Gram base root file name expected");            rootFN = GetStrArg(); 	    break;         case 'f':	    strcpy(fmt, GetStrArg());	    for (c=fmt; *c; *c=toupper(*c), c++); /* To uppercase */	    if (strcmp(fmt, LM_TXT_TEXT)==0)	      binfo.saveFmt = LMF_TEXT;	    else if (strcmp(fmt, LM_TXT_BINARY)==0)	       binfo.saveFmt = LMF_BINARY;	    else if (strcmp(fmt, LM_TXT_ULTRA)==0)	       binfo.saveFmt = LMF_ULTRA;	    else	       HError(16419,"Unrecognised LM format, should be one of [%s, %s, %s]",		      LM_TXT_TEXT, LM_TXT_BINARY, LM_TXT_ULTRA);	    break;         case 'g':            processText = FALSE; break;	 case 'i':            if (NextArg()!=FLOATARG)	       HError(16419,"Interpolation weight expected");	    lmInfo[nLModel].weight = GetChkedFlt(0.0,1.0,s);            if (NextArg()!=STRINGARG)	       HError(16419,"Interpolation LM filename expected");	    lmInfo[nLModel].fn = GetStrArg();	    nLModel++;	    break;         case 'j':            i = GetChkedInt(2,LM_NSIZE,s); 	    binfo.wdThresh[i] = GetChkedFlt(0.0,1E10,s);	    break;         case 'n':            nSize = GetChkedInt(1, MAXNG, s); break;#ifdef HTK_TRANSCRIBER         case 's':            if (NextArg()!=STRINGARG)               HError(16419,"Gram file text source descriptor expected");            txtSrc = GetStrArg(); break;         case 't':	    binfo.dctype = DC_KATZ; break;#endif         case 'w':            if (NextArg()!=STRINGARG)               HError(16419,"Word list file name expected");            wlistFN = GetStrArg(); break;#ifndef HTK_TRANSCRIBER         case 'x':            binfo.ptype = LMP_COUNT; break;#endif         case 'T':            trace = GetChkedInt(0,077,s); break;         default:            HError(16419,"LAdapt: Unknown switch %s",s);      }   }#ifdef HTK_TRANSCRIBER   if (nLModel==1) {  /* must interpolate with at least one model */      HError(16419,"LAdapt: at least one model must be specified with -i option");   }   if (binfo.saveFmt==LMF_TEXT) { /* save fomat cannot be TEXT */       binfo.saveFmt=LMF_BINARY;   }#endif   if (NextArg() != STRINGARG)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -