⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hlm.c

📁 该压缩包为最新版htk的源代码,htk是现在比较流行的语音处理软件,请有兴趣的朋友下载使用
💻 C
📖 第 1 页 / 共 2 页
字号:
/* ----------------------------------------------------------- *//*                                                             *//*                          ___                                *//*                       |_| | |_/   SPEECH                    *//*                       | | | | \   RECOGNITION               *//*                       =========   SOFTWARE                  */ /*                                                             *//*                                                             *//* ----------------------------------------------------------- *//* developed at:                                               *//*                                                             *//*      Speech Vision and Robotics group                       *//*      Cambridge University Engineering Department            *//*      http://svr-www.eng.cam.ac.uk/                          *//*                                                             *//*      Entropic Cambridge Research Laboratory                 *//*      (now part of Microsoft)                                *//*                                                             *//* ----------------------------------------------------------- *//*         Copyright: Microsoft Corporation                    *//*          1995-2000 Redmond, Washington USA                  *//*                    http://www.microsoft.com                 *//*                                                             *//*          2001-2002 Cambridge University                     *//*                    Engineering Department                   *//*                                                             *//*   Use of this software is governed by a License Agreement   *//*    ** See the file License for the Conditions of Use  **    *//*    **     This banner notice must not be removed      **    *//*                                                             *//* ----------------------------------------------------------- *//*         File: HLM.c  language model handling                *//* ----------------------------------------------------------- */char *hlm_version = "!HVER!HLM:   3.3 [CUED 28/04/05]";char *hlm_vc_id = "$Id: HLM.c,v 1.1.1.1 2005/05/12 10:52:50 jal58 Exp $";#include "HShell.h"#include "HMem.h"#include "HMath.h"#include "HWave.h"#include "HLabel.h"#include "HLM.h"/* --------------------------- Trace Flags ------------------------- */#define T_TIO 1  /* Progress tracing whilst performing IO */static int trace=0;/* --------------------------- Initialisation ---------------------- */#define LN10 2.30258509299404568 /* Defined to save recalculating it */static Boolean rawMITFormat = FALSE;    /* Don't use HTK quoting and escapes */static ConfParam *cParm[MAXGLOBS];      /* config parameters */static int nParm = 0;/* EXPORT->InitLM: initialise configuration parameters */void InitLM(void){   Boolean b;   int i;   Register(hlm_version,hlm_vc_id);   nParm = GetConfig("HLM", TRUE, cParm, MAXGLOBS);   if (nParm>0){      if (GetConfInt(cParm,nParm,"TRACE",&i)) trace = i;      if (GetConfBool(cParm,nParm,"RAWMITFORMAT",&b)) rawMITFormat = b;   }}/*------------------------- Input Scanner ---------------------------*/static Source source;           /* input file *//* GetInLine: read a complete line from source */static char *GetInLine(char *buf){   int  i, c;   c = GetCh(&source);   if (c==EOF)      return NULL;   i = 0;   while (c!='\n' && i<MAXSTRLEN) {       buf[i++] = c;      c = GetCh(&source);   }    buf[i] = '\0';   return buf;}/* SyncStr: read input until str found */static void SyncStr(char *buf,char *str){   while (strcmp(buf, str)!=0) {      if (GetInLine(buf)==NULL)         HError(8150,"SyncStr: EOF searching for %s", str);   }}/* GetInt: read int from input stream */static int GetInt(void){   int x;   char buf[100];      if (!ReadInt(&source,&x,1,FALSE))      HError(8150,"GetInt: Int Expected at %s",SrcPosition(source,buf));   return x;}/* GetFLoat: read float from input stream */static float GetFloat(Boolean bin){   float x;   char buf[100];   if (!ReadFloat(&source,&x,1,bin))      HError(8150,"GetFloat: Float Expected at %s",SrcPosition(source,buf));   return x;}/* ReadLMWord: read a string from input stream */static char *ReadLMWord(char *buf){   int i, c;      if (rawMITFormat) {      while (isspace(c=GetCh(&source)));      i=0;      while (!isspace(c) && c!=EOF && i<MAXSTRLEN){         buf[i++] = c; c=GetCh(&source);      }      buf[i] = '\0';      UnGetCh(c,&source);      if (i>0)         return buf;      else         return NULL;   }   else {      if (ReadString(&source,buf))         return buf;      else         return NULL;   }}/*------------------------- NEntry handling ---------------------------*/static int hvs[]= { 165902236, 220889002, 32510287, 117809592,                    165902236, 220889002, 32510287, 117809592 };/* EXPORT->GetNEntry: Access specific NGram entry indexed by ndx */NEntry *GetNEntry(NGramLM *nglm,lmId ndx[NSIZE],Boolean create){   NEntry *ne;   unsigned int hash;   int i;   /* #define LM_HASH_CHECK */     hash=0;   for (i=0;i<NSIZE-1;i++)      hash=hash+(ndx[i]*hvs[i]);   hash=(hash>>7)&(nglm->hashsize-1);     for (ne=nglm->hashtab[hash]; ne!=NULL; ne=ne->link) {      if (ne->word[0]==ndx[0]#if NSIZE > 2          && ne->word[1]==ndx[1]#endif#if NSIZE > 3          && ne->word[2]==ndx[2]#endif#if NSIZE > 4          && ne->word[3]==ndx[3]#endif          )         break;   }   if (ne==NULL && create) {      ne=(NEntry *) New(nglm->heap,sizeof(NEntry));      nglm->counts[0]++;            for (i=0;i<NSIZE-1;i++)         ne->word[i]=ndx[i];      ne->user=0;      ne->nse=0;      ne->se=NULL;;      ne->bowt=0.0;      ne->link=nglm->hashtab[hash];      nglm->hashtab[hash]=ne;   }   return(ne);}static int se_cmp(const void *v1,const void *v2){   SEntry *s1,*s2;   s1=(SEntry*)v1;s2=(SEntry*)v2;   return((int)(s1->word-s2->word));}/*--------------------- ARPA-style NGrams ------------------------*/static int nep_cmp(const void *v1,const void *v2){   NEntry *n1,*n2;   int res,i;   res=0; n1=*((NEntry**)v1); n2=*((NEntry**)v2);   for(i=NSIZE-2;i>=0;i--)      if (n1->word[i]!=n2->word[i]) {         res=(n1->word[i]-n2->word[i]);         break;      }   return(res);}/* WriteNGram: Write n grams to file */static int WriteNGrams(FILE *file,NGramLM *nglm,int n,float scale){   NEntry *ne,*be,*ce,**neTab;   SEntry *se;   LogFloat prob;   lmId ndx[NSIZE+1];   int c,i,j,k,N,g=1,hash,neCnt,total;   if (trace&T_TIO)      printf("\nn%1d ",n),fflush(stdout);   fprintf(file,"\n\\%d-grams:\n",n);   N=VectorSize(nglm->unigrams);   neTab=(NEntry **) New(&gstack,sizeof(NEntry*)*nglm->counts[0]);   for (hash=neCnt=0;hash<nglm->hashsize;hash++)      for (ne=nglm->hashtab[hash]; ne!=NULL; ne=ne->link) {         for (i=1,ce=ne;i<n;i++)            if (ne->word[i-1]==0) {               ce=NULL;               break;            }         if (ce!=NULL)            for (i=n;i<NSIZE;i++)               if (ne->word[i-1]!=0) {                  ce=NULL;                  break;               }         if (ce!=NULL && ce->nse>0)            neTab[neCnt++]=ce;      }   qsort(neTab,neCnt,sizeof(NEntry*),nep_cmp);   total=0;   for (c=n;c<=NSIZE;c++) ndx[c]=0;   for (j=0;j<neCnt;j++) {      ne=neTab[j];      for (c=1;c<n;c++) ndx[c]=ne->word[c-1];      if (ne!=NULL && ne->nse>0) {         for (i=0,se=ne->se;i<ne->nse;i++,se++) {            if (trace&T_TIO) {               if ((g%25000)==0)                  printf(". "),fflush(stdout);               if ((g%800000)==0)                  printf("\n   "),fflush(stdout);               g++;            }            ndx[0]=se->word;            if (n<nglm->nsize) be=GetNEntry(nglm,ndx,FALSE);            else be=NULL;            if (be==NULL || be->nse==0) be=NULL;            total++;            if (n==1) prob=nglm->unigrams[se->word];            else prob=se->prob;            if (prob*scale<-99.999)               fprintf(file,"%+6.3f",-99.999);            else               fprintf(file,"%+6.4f",prob*scale);            c='\t';            for (k=n-1;k>=0;k--)               if (rawMITFormat)                  fprintf(file,"%c%s",c,nglm->wdlist[ndx[k]]->name),c=' ';               else                  fprintf(file,"%c%s",c,                          ReWriteString(nglm->wdlist[ndx[k]]->name,                                        NULL,ESCAPE_CHAR)),c=' ';            if (be!=NULL)               fprintf(file,"\t%+6.4f\n",be->bowt*scale);            else               fprintf(file,"\n");         }      }   }   Dispose(&gstack,neTab);   if (trace&T_TIO)      printf("\n"),fflush(stdout);   return(total);}#define PROGRESS(g) \   if (trace&T_TIO) { \      if ((g%25000)==0) \         printf(". "),fflush(stdout); \      if ((g%800000)==0) \         printf("\n   "),fflush(stdout); \   }#define NGHSIZE1 8192#define NGHSIZE2 32768#define NGHSIZE3 131072/* EXPORT->CreateBoNGram: Allocate and create basic NGram structures */NGramLM *CreateBoNGram(LModel *lm,int vocSize, int counts[NSIZE]){   lmId ndx[NSIZE];   int i,k;   NGramLM *nglm;   nglm = (NGramLM *) New(lm->heap, sizeof(NGramLM));   lm->data.ngram = nglm;   nglm->heap = lm->heap;   for (i=0;i<=NSIZE;i++) nglm->counts[i]=0;   for (i=1;i<=NSIZE;i++)      if (counts[i]==0) break;      else nglm->counts[i]=counts[i];   nglm->nsize=i-1;   /* Don't count final layer */   for (k=0,i=1;i<nglm->nsize;i++)       k+=nglm->counts[i];   /* Then use total to guess NEntry hash size */   if (k<25000)       nglm->hashsize=NGHSIZE1;   else if (k<250000)       nglm->hashsize=NGHSIZE2;   else       nglm->hashsize=NGHSIZE3;   nglm->hashtab=(NEntry **) New(lm->heap,sizeof(NEntry*)*nglm->hashsize);   for (i=0; i<nglm->hashsize; i++)       nglm->hashtab[i]=NULL;   nglm->vocSize = vocSize;   nglm->unigrams = CreateVector(lm->heap,nglm->vocSize);   nglm->wdlist = (LabId *) New(lm->heap,nglm->vocSize*sizeof(LabId)); nglm->wdlist--;   for (i=1;i<=nglm->vocSize;i++) nglm->wdlist[i]=NULL;   for (i=0;i<NSIZE;i++) ndx[i]=0;   GetNEntry(nglm,ndx,TRUE);   return(nglm);}   #define BIN_ARPA_HAS_BOWT 1#define BIN_ARPA_INT_LMID 2/* ReadNGrams: read n grams list from file */static int ReadNGrams(NGramLM *nglm,int n,int count, Boolean bin){   float prob;   LabId wdid;   SEntry *cse;   char wd[255];   lmId ndx[NSIZE+1];   NEntry *ne,*le=NULL;   int i, g, idx, total;   unsigned char size, flags=0;   cse = (SEntry *) New(nglm->heap,count*sizeof(SEntry));   for (i=1;i<=NSIZE;i++) ndx[i]=0;   if (trace&T_TIO)      printf("\nn%1d ",n),fflush(stdout);   total=0;   for (g=1; g<=count; g++){      PROGRESS(g);      if (bin) {         size = GetCh (&source);         flags = GetCh (&source);      }            prob = GetFloat(bin)*LN10;      if (n==1) { /* unigram treated as special */         ReadLMWord(wd);         wdid = GetLabId(wd, TRUE);         if (wdid->aux != NULL)            HError(8150,"ReadNGrams: Duplicate word (%s) in 1-gram list",                   wdid->name);         wdid->aux = (Ptr)g;         nglm->wdlist[g] = wdid;         nglm->unigrams[g] = prob;         ndx[0]=g;      } else {    /* bigram, trigram, etc. */         for (i=0;i<n;i++) {            if (bin) {               if (flags & BIN_ARPA_INT_LMID) {                  unsigned int ui;                  if (!ReadInt (&source, (int *) &ui, 1, bin))                     HError (9999, "ReadNGrams: failed reading int lm word id");                  idx = ui;               }               else {                  unsigned short us;                  if (!ReadShort (&source, (short *) &us, 1, bin))                     HError (9999, "ReadNGrams: failed reading short lm word id at");                  idx = us;               }            }            else {               ReadLMWord(wd);               wdid = GetLabId(wd, FALSE);               idx = (wdid==NULL?0:(int)wdid->aux);            }            if (idx<1 || idx>nglm->vocSize)               HError(8150,"ReadNGrams: Unseen word (%s) in %dGram",wd,n);            ndx[n-1-i]=idx;         }      }      total++;      ne = GetNEntry(nglm,ndx+1,FALSE);      if (ne == NULL)         HError(8150,"ReadNGrams: Backoff weight not seen for %dth %dGram",g,n);      if (ne!=le) {         if (le != NULL && ne->se != NULL)            HError(8150,"ReadNGrams: %dth %dGrams out of order",g,n);         if (le != NULL) {            if (le->nse==0) {               le->se=NULL;            } else {               qsort(le->se,le->nse,sizeof(SEntry),se_cmp);            }         }         ne->se = cse;         ne->nse = 0;         le = ne;      }      cse->prob = prob;      cse->word = ndx[0];      ne->nse++; cse++;      /* read back-off weight */      if (bin) {         if (flags & BIN_ARPA_HAS_BOWT) {            ne = GetNEntry(nglm,ndx,TRUE);            ne->bowt = GetFloat (TRUE)*LN10;         }      }      else {         SkipWhiteSpace(&source);         if (!source.wasNewline) {            ne=GetNEntry(nglm,ndx,TRUE);            ne->bowt = GetFloat(FALSE)*LN10;         }      }   }   /* deal with the last accumulated set */   if (le != NULL) {      if (le->nse==0) {         le->se=NULL;      } else {         qsort(le->se,le->nse,sizeof(SEntry),se_cmp);      }   }   if (trace&T_TIO)      printf("\n"),fflush(stdout);   return(total);}/* ReadBoNGram: read and store WSJ/DP format ngram */static void ReadBoNGram(LModel *lm,char *fn){   NGramLM *nglm;   int i,j,k,counts[NSIZE+1];   Boolean ngBin[NSIZE+1];   char buf[MAXSTRLEN+1],syc[64];   char ngFmtCh;   if (trace&T_TIO)      printf("\nBOffB "),fflush(stdout);   if(InitSource(fn,&source,LangModFilter)<SUCCESS)      HError(8110,"ReadBoNGram: Can't open file %s", fn);   GetInLine(buf);   SyncStr(buf,"\\data\\");   for (i=1;i<=NSIZE;i++) counts[i]=0;   for (i=1;i<=NSIZE;i++) {      GetInLine(buf);      if (sscanf(buf, "ngram %d%c%d", &j, &ngFmtCh, &k)!=3 && i>1)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -