⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lmodel.c

📁 该压缩包为最新版htk的源代码,htk是现在比较流行的语音处理软件,请有兴趣的朋友下载使用
💻 C
📖 第 1 页 / 共 5 页
字号:
/* ----------------------------------------------------------- *//*                                                             *//*                          ___                                *//*                       |_| | |_/   SPEECH                    *//*                       | | | | \   RECOGNITION               *//*                       =========   SOFTWARE                  */ /*                                                             *//*                                                             *//* ----------------------------------------------------------- *//* developed at:                                               *//*                                                             *//*      Speech Vision and Robotics group                       *//*      Cambridge University Engineering Department            *//*      http://svr-www.eng.cam.ac.uk/                          *//*                                                             *//* main authors: Valtcho Valtchev, Steve Young,                *//*               Julian Odell, Gareth Moore                    *//* ----------------------------------------------------------- *//*         Copyright:                                          *//*                                                             *//*          1994-2002 Cambridge University                     *//*                    Engineering Department                   *//*                                                             *//*   Use of this software is governed by a License Agreement   *//*    ** See the file License for the Conditions of Use  **    *//*    **     This banner notice must not be removed      **    *//*                                                             *//* ----------------------------------------------------------- *//*      File: LModel:    ARPA style LM handling                *//* ----------------------------------------------------------- */char *lmodel_version = "!HVER!LModel:   3.3 [CUED 28/04/05]";char *lmodel_vc_id = "$Id: LModel.c,v 1.1.1.1 2005/05/12 10:52:18 jal58 Exp $";#include "HShell.h"     /* HMM ToolKit Modules */#include "HMem.h"#include "HMath.h"#include "HWave.h"#include "HLabel.h"#ifdef ULTRA_LM#include "HDict.h"#endif#include "LWMap.h"#include "LUtil.h"#include "LModel.h"#include "HLM.h"#define T_TOP    0001       /* top level tracing */#define T_LOAD   0002       /* loading of LMs */#define T_SAVE   0004       /* saving of LMs */#define T_MAPS   0010       /* word mappings */#define T_PROB   0020       /* n-gram lookup */static int trace = 0;typedef struct _AccessInfo{   int count;           /* count for access */   int nboff;           /* times computed using the back-off weight */   int nmiss;           /* times not available */   int nhits;           /* times available */   double prob;         /* sum of prob   returned */   double prob2;        /* sum of prob^2 returned */} accessinfo;static ConfParam *cParm[MAXGLOBS];      /* config parameters */static int nParm = 0;static char *nGramName[LM_NSIZE] = {   "NULLGRAM", "UNIGRAM",   "BIGRAM", "TRIGRAM",   "FOURGRAM", "PENTAGRAM",   "HEXAGRAM","SEPTAGRAM","OCTAGRAM",   "NONAGRAM","DECAGRAM","11-GRAM",   "12-GRAM","13-GRAM","14-GRAM","15-GRAM"};static char *dcTypeName[] = {   "Katz",   "Absolute",   "Linear"};static Boolean defIntID = FALSE;        /* Don't use 4-byte IDs */static Boolean htkEsc = FALSE;          /* Don't use HTK quoting and escapes */static Boolean natReadOrder = FALSE;    /* Preserve natural read byte order */static Boolean natWriteOrder = FALSE;   /* Preserve natural write byte order */extern Boolean vaxOrder;                /* True if byteswapping needed to preserve SUNSO */#ifdef ULTRA_LMstatic short   ultraKey[KEY_LENGTH];    /* Key used to identify ultra LMs */#endif/* EXPORT->InitLModel: initialise module */void InitLModel(void){   int i;   Boolean b;   Register(lmodel_version,lmodel_vc_id);   nParm = GetConfig("LMODEL", TRUE, cParm, MAXGLOBS);   if (nParm>0){      if (GetConfInt(cParm,nParm,"TRACE",&i)) trace = i;#ifdef HTK_TRANSCRIBER      if (trace&T_PROB) trace=trace^T_PROB;#endif      if (GetConfBool(cParm,nParm,"RAWMITFORMAT",&b)) htkEsc = !b;      if (GetConfBool(cParm,nParm,"USEINTID",&b)) defIntID = b;      if (GetConfBool(cParm,nParm,"NATURALREADORDER",&b)) natReadOrder = b;      if (GetConfBool(cParm,nParm,"NATURALWRITEORDER",&b)) natWriteOrder = b;   }#ifdef ULTRA_LM   COMPOSE_KEY(ultraKey);#endif}/*----------------------- Input scanner ------------------------*/#define MAXSYMLEN  2048/* GetInLine: read a complete line from source */static char *GetInLine(Source *src,char *buf){   int  i, c;   if ((c = GetCh(src))==EOF)      return NULL;   i = 0;   while (c!='\n' && i<MAXSYMLEN) {      buf[i++] = c;      c = GetCh(src);   }   buf[i] = '\0';   return buf;}/* SyncStr: read input until str found */void SyncStr(Source *src, char *str){   char buf[MAXSYMLEN];   do {      if (GetInLine(src,buf)==NULL)         HError(15450,"SyncStr: EOF searching for %s", str);   } while (strcmp(buf,str)!=0);}/*----------------------- Access statistics ------------------------*/void ResetAccessInfo(BackOffLM *lm){   int i;   NGramInfo *gi;   AccessInfo *ai;   for (gi=lm->gInfo+1,i=1; i<=lm->nSize; i++,gi++) {      if ((ai=gi->aInfo)==NULL)	 HError(15490,"ResetAccessInfo: Access info not present");      ai->count = 0;      ai->nboff = ai->nmiss = ai->nhits = 0;      ai->prob = ai->prob2 = 0.0;   }}/* EXPORT->AttachAccessInfo: attach and initialise access info */void AttachAccessInfo(BackOffLM *lm){   int i;   NGramInfo *gi;   for (gi=lm->gInfo+1,i=1; i<=lm->nSize; i++,gi++) {      if (gi->aInfo!=NULL)	 HError(15490,"AttachAccessInfo: Access info already present");      gi->aInfo = (AccessInfo *) New(lm->heap,sizeof(AccessInfo));   }   ResetAccessInfo(lm);}/* ShowStats: print back-off statistics */static void ShowStats(FILE *f, AccessInfo *acs, char *lmstr){   int count;   float f1, f2, f3;   double a, b, avg, stdev;   count = (acs->count>0) ? acs->count : 1;   a = acs->prob  / (double) count;   b = acs->prob2 / (double) count;   avg = a; stdev = sqrt(b - a*a);   f1 = 100.0 * (float) acs->nhits / (float) count;   f2 = 100.0 * (float) acs->nboff / (float) count;   f3 = 100.0 * (float) acs->nmiss / (float) count;   fprintf(f,"%10s %10d %5.1f%% %5.1f%% %5.1f%% %8.2f %8.2f\n",	   lmstr, acs->count, f1, f2, f3, avg, stdev);}/* EXPORT -> PrintTotalAccessStats: print access statistics */void PrintTotalAccessStats(FILE *f,BackOffLM *lm){   int i;   NGramInfo *gi;   static char *lmstr[] = {      "nullgram", "unigram", "bigram", "trigram", "fourgram", "pentagram",      "hexagram", "septagram", "octagram", "nonagram", "decagram"   };   static int max_text = 10; /* size of lmstr[] array */   char tmpstr[10];   fprintf(f,"%10s %10s %6s %6s %6s %8s %8s\n", "Lang model",	  "requested", "exact", "backed", "n/a", "mean", "stdev");   for (gi=lm->gInfo+2,i=2; i<=lm->nSize; i++, gi++)      ShowStats(f,gi->aInfo, i<=max_text?lmstr[i]:(sprintf(tmpstr, "%d", i), tmpstr));}/*----------------------- float compression ----------------------*/#define MIN_PROB -8.0#ifdef LM_COMPACTstatic UShort Prob2Shrt(float f){   if (f < MIN_PROB)      return USHRT_MAX;   return (f / MIN_PROB * (float) (USHRT_MAX-1));}static float Shrt2Prob(UShort s){   if (s == USHRT_MAX)      return LZERO;   return ((float) s / (float) (USHRT_MAX-1)) * MIN_PROB;}#endif/*-------------------------- LM access ----------------------------*//* EXPORT-> CmpSE: qsort comparison for short LM entries */int CmpSE(const void *p1, const void *p2){   if (((SMEntry *)p1)->ndx < ((SMEntry *)p2)->ndx)      return -1;   if (((SMEntry *)p1)->ndx > ((SMEntry *)p2)->ndx)      return +1;   return 0;}/* EXPORT-> CmpFE: qsort comparison for full LM entries */int CmpFE(const void *p1, const void *p2){   if (((FLEntry *)p1)->ndx < ((FLEntry *)p2)->ndx)      return -1;   if (((FLEntry *)p1)->ndx > ((FLEntry *)p2)->ndx)      return +1;   return 0;}/* EXPORT-> FindSE: find SEntry in a sorted list */SMEntry *FindSE(SMEntry *sptr, int lo, int hi, LM_Id key){   int cen;   LM_Id cmp;   if (sptr==NULL)      return NULL;   hi--;   if ((key < sptr[lo].ndx) || (key > sptr[hi].ndx))      return NULL;   do {      cen = (lo + hi) / 2;      cmp = sptr[cen].ndx;      if (key == cmp)	 return sptr+cen;      if (key > cmp)	 lo = cen+1;      else	 hi = cen-1;   } while (lo <= hi);   return NULL;}/* EXPORT-> FindFE: find FEntry in a sorted list */FLEntry *FindFE(FLEntry *fptr, int lo, int hi, LM_Id key){   int cen;   LM_Id cmp;   if (fptr==NULL)      return NULL;   hi--;   if ((key < fptr[lo].ndx) || (key > fptr[hi].ndx))      return NULL;   do {      cen = (lo + hi) / 2;      cmp = fptr[cen].ndx;      if (key == cmp)	 return fptr+cen;      if (key > cmp)	 lo = cen+1;      else	 hi = cen-1;   } while (lo <= hi);   return NULL;}/* FindSE1: find bigram entry in a sorted list, also return index */static SMEntry *FindSE1(SMEntry *sptr, int lo, int hi, LM_Id key, int *fcen){   int cen;   LM_Id cmp;   hi--;   if ((key < sptr[lo].ndx) || (key > sptr[hi].ndx))      return NULL;   do {      cen = (lo + hi) / 2;      cmp = sptr[cen].ndx;      if (key == cmp) {	 *fcen = cen; return sptr+cen;      }      if (key > cmp)	 lo = cen+1;      else	 hi = cen-1;   } while (lo <= hi);   return NULL;}/* -------------------- Ultra format I/O ---------------------- */#ifdef ULTRA_LMstatic CNEntry *qs_cneBuf;      /* global table of read CNEntry *//* FRead: fread spec function for Source src */static size_t FRead(void *ptr, size_t size, size_t nitems, Source *src){   int nr,i;   unsigned char *c;   nr = fread(ptr,size,nitems,src->f);#ifdef HTK_CRYPT   if (src->crypt!=NULL) {      for (c=ptr,i=0; i<size*nr; i++,c++)	 *c = DecryptChar(src->crypt,*c);   }#endif   src->chcount+=nr*size;   return nr;}#ifdef LMPROB_SHORT/*   The following compress/decompress LOG10 float to/from short.*/#define PROB_LOG_TO_SHORT(prob) \  ((int) (-prob/0.0002+0.5) > 65534 ? 65535 : (int) (-prob/0.0002+0.5))#define PROB_SHORT_TO_LOG(prob) \  (prob<=65534 ? -prob*0.0002 : LZERO)#define BOWT_LOG_TO_SHORT(bowt) \   ((floor(bowt/0.0002+0.5)>32766)?32767:\    (floor(bowt/0.0002+0.5)<-32767)?-32768:\    (int)(floor(bowt/0.0002+0.5)))#define BOWT_SHORT_TO_LOG(bowt) \   (bowt*0.0002)#else#define PROB_LOG_TO_SHORT(prob) (prob)#define PROB_SHORT_TO_LOG(prob) (prob)#define BOWT_LOG_TO_SHORT(bowt) (bowt)#define BOWT_SHORT_TO_LOG(bowt) (bowt)#endif#define CNE2FE(cndx,fe) {  \   CNEntry *cne = cneBuf + cndx; \   fe->nse  = cne->nse;  \   fe->sea  = smeTab[cndx]; \   fe->ndx  = cne->word[0]; \   bowt = BOWT_SHORT_TO_LOG(cne->bowt); \   fe->bowt = (ptype==LMP_FLOAT) ? LOG10_TO_FLT(bowt) : bowt*scale; \}#define INIT_CNE(cne) { \   int  i;         \   cne.nse = 0;    \   cne.bowt = 0.0; \   for (i=0; i<NSIZE-1; i++) cne.word[i]=0;   \}static int nep_cmp(const void *v1,const void *v2){   CNEntry *n1,*n2;   int res,i;   res = 0;   n1=qs_cneBuf + *((int *)v1);   n2=qs_cneBuf + *((int *)v2);   for(i=NSIZE-2;i>=0;i--)      if (n1->word[i]!=n2->word[i]) {	 res=(n1->word[i]-n2->word[i]);	 break;      }   return(res);}static void LoadUltraNGrams(Source *src, BackOffLM *lm){   float prob,bowt,scale;   Boolean newCTX;   LMProbType ptype;   int context[NSIZE+1];   int i,j,idx,cneCnt,seCnt;   SEntry se;   int *cneTab;   CNEntry *cne,*cneBuf;   FLEntry *cfe,*feBuf,*parent;   SMEntry *sme,*smeBuf,**smeTab;   Boolean mustSwap = (vaxOrder && !natReadOrder);   SyncStr(src,"\\N-grams:");   scale = lm->gScale*LN10;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -