⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lgbase.c

📁 该压缩包为最新版htk的源代码,htk是现在比较流行的语音处理软件,请有兴趣的朋友下载使用
💻 C
📖 第 1 页 / 共 2 页
字号:
/* ----------------------------------------------------------- *//*                                                             *//*                          ___                                *//*                       |_| | |_/   SPEECH                    *//*                       | | | | \   RECOGNITION               *//*                       =========   SOFTWARE                  */ /*                                                             *//*                                                             *//* ----------------------------------------------------------- *//* developed at:                                               *//*                                                             *//*      Speech Vision and Robotics group                       *//*      Cambridge University Engineering Department            *//*      http://svr-www.eng.cam.ac.uk/                          *//*                                                             *//* main authors: Valtcho Valtchev, Steve Young,                *//*               Julian Odell, Gareth Moore                    *//* ----------------------------------------------------------- *//*         Copyright:                                          *//*                                                             *//*          1994-2002 Cambridge University                     *//*                    Engineering Department                   *//*                                                             *//*   Use of this software is governed by a License Agreement   *//*    ** See the file License for the Conditions of Use  **    *//*    **     This banner notice must not be removed      **    *//*                                                             *//* ----------------------------------------------------------- *//*         File: LGBase: Gram File Database Routines           *//* ----------------------------------------------------------- */char *lgbase_version = "!HVER!LGBase:   3.3 [CUED 28/04/05]";char *lgbase_vc_id = "$Id: LGBase.c,v 1.1.1.1 2005/05/12 10:52:18 jal58 Exp $";#include "HShell.h"#include "HMem.h"#include "HMath.h"#include "HWave.h"#include "HLabel.h"#include "LUtil.h"#include "LWMap.h"#include "LGBase.h"/* ------------------------ Trace Flags --------------------- */static int trace = 0;#define T_TOP   0001       /* top level tracing */#define T_SQU   0002       /* trace squashing */#define T_SRT   0004       /* trace NG Buffer sorting */#define T_ITR   0010       /* print NG input set tree */#define T_MOP   0020       /* print max parallel input streams */#define T_IST   0040       /* trace parallel input streaming */#define T_FOF   0100       /* print info on FoF i/o *//* --------------------- Global Variables ------------------- */static ConfParam *cParm[MAXGLOBS];      /* config parameters */static int nParm = 0;static int sqOffset;                    /* squash offset, this depends on byte */static Boolean checkOrder = FALSE;      /* Check n-gram ordering */static Boolean natReadOrder = FALSE;    /* Preserve natural read byte order */static Boolean natWriteOrder = FALSE;   /* Preserve natural write byte order */extern Boolean vaxOrder;                /* True if byteswapping needed to preserve SUNSO *//* --------------------- Initialisation --------------------- *//* EXPORT -> InitGBase: initialise the module for n-grams */void InitGBase(void){   int i;   Boolean b;   Register(lgbase_version,lgbase_vc_id);   /* get config variables for this module */   nParm = GetConfig("LGBASE", TRUE, cParm, MAXGLOBS);   if (nParm>0){      if (GetConfInt(cParm,nParm,"TRACE",&i)) trace = i;      if (GetConfBool(cParm,nParm,"NATURALREADORDER",&b)) natReadOrder = b;      if (GetConfBool(cParm,nParm,"NATURALWRITEORDER",&b)) natWriteOrder = b;      if (GetConfBool(cParm,nParm,"CHECKORDER",&b)) checkOrder = b;   }   /* Set byte order */   sqOffset =  sizeof(UInt) - SQUASH;   if (trace&T_SQU)  printf("Squash offset is %d\n",sqOffset);}/* SetNGInfo: init info struct for given N-gram */static NGInfo SetNGInfo(int N){   NGInfo i;   i.N = N;   i.ng_size = N*SQUASH + 1;   i.ng_full = (N+1)*sizeof(UInt);   return i;}/* ------------------- Squashing routines ---------------------- *//* EXPORT->NGramSquash: compress each ngram to SQUASH bytes */void NGramSquash(int N, NGram ng, Byte *comp){   int i;   UInt b;   Byte *e,*c;   Boolean mustSwap = (vaxOrder && !natWriteOrder);   for (c = comp,i=0; i<N; i++, c+=SQUASH) {      b = ng[i]; e = (Byte *) &b;      if (mustSwap) SwapInt32((int *)&b);      memcpy(c,e+sqOffset,SQUASH);   }}/* EXPORT -> NGramExpand: expand ngrams from SQUASH num of bytes */void NGramExpand(int N, Byte *comp, NGram ng){   int i;   UInt b;   Byte *e,*c;   Boolean mustSwap = (vaxOrder && !natReadOrder);   for (c=comp,i=0; i<N; i++,c+=SQUASH){      e = (Byte *) &b;      memset(e,0x00,sizeof(UInt));      memcpy(e+sqOffset,c,SQUASH);      if (mustSwap) SwapInt32((int *)&b);      ng[i] = b;   }   ng[N] = 0;}/* EXPORT -> SameGrams: true if grams (ignoring counts) are equal */Boolean SameGrams(int N, NGram ng1, NGram ng2){   int i;   for (i=0; i<N; i++)      if (ng1[i] != ng2[i]) return FALSE;   return TRUE;}/* ------------------- NGram File Input/Output  --------------- *//* EXPORT->PrintNGram: print given N-gram */void PrintNGram(int N, NGram ng, WordMap *wm){   int i;   LabId id;   for (i=0; i<N; i++) {      id = WordLMName(ng[i],wm);      printf("%-12s",id->name);   }   printf(" : %d\n",ng[N]);}/* LoadHGram: read text N-gram from header */static void ReadHGram(char *name, LMFileHdr hdr, int N, LabId *ng, char *fn){   int i;   char *s,sbuf[MAXSTRLEN];   if ((s=GetLMHdrStr(name,hdr,FALSE)) == NULL)      HError(15350,"ReadHGram: No %s field in %s",name,fn);   strcpy(sbuf,s);   for (i=0; i<N; i++){      s = strtok((i==0)?sbuf:NULL," \t\r\n");      if (s==NULL)	 HError(15350,"ReadHGram: Missing Sep in %s in %s",name,fn);      ng[i] = GetLabId(s,TRUE);   }}/* WriteHGram: write text N-gram to header */static void WriteTxtHGram(FILE *f, char *name, int N, LabId *ng){   int i;   fprintf(f,"%s =",name);   for (i=0; i<N; i++) {      fprintf(f," %s",ng[i]->name);   }   fprintf(f,"\n");}/* WriteHGram: write a header for given NG Buffer */void WriteRawHGram(FILE *f, char *name, int N, NGram ng, WordMap *wm){   int i;   LabId id;   fprintf(f,"%s =",name);   for (i=0; i<N; i++) {      id = WordLMName(ng[i],wm);      fprintf(f," %s",id->name);   }   fprintf(f,"\n");}/* SameHGrams: compare raw and text N-grams */static Boolean SameHGrams(int N, NGram ng, LabId *tg){   int i,ndx;   for (i=0; i<N; i++) {      if ((ndx = WordLMIndex(tg[i]))!=-1 && ndx!=ng[i])	 return FALSE;   }   return TRUE;}/* CmpTxtNGram: compare two N-grams in text */static int CmpTxtNGram(int N, LabId *ng1, LabId *ng2){   int i,cmp;   for (i=0; i<N; i++) {      cmp = strcmp(ng1[i]->name,ng2[i]->name);      if (cmp != 0) return cmp;   }   return 0;}/* CompareMapNames: compare map name and n-gram file map name */static Boolean CompareMapNames(char *ngfMap, char *master){  char *s;  if (ngfMap==NULL || master==NULL)     return FALSE;  if ((s=strstr(master,ngfMap))==NULL)     return FALSE;  if (s!=master && *(s-1)!='%') /* not at the beginning and not preceeded by % */     return FALSE;  s += strlen(ngfMap);  if (*s!='\0' && *s!='%')      /* not at the end and not followed by % */     return FALSE;  return TRUE;}/* SetNext: initialise ngs->nxt array with the first N-gram with   all words in the map. */static void SetNext(NGSource *ngs, Byte ngRawBuf[GSIZE]){   UInt *gp;   int i, N, ng_size;   Boolean same, hasOOM;   N = ngs->info.N;   ng_size = ngs->info.ng_size;   while(ngs->nItems > 0) {      memcpy(ngs->buf,ngRawBuf,ng_size);      NGramExpand(N,ngs->buf,ngs->nxt);      hasOOM = FALSE;      for (gp=ngs->nxt,i=0; i<N; i++,gp++) {	 if (GetMEIndex(ngs->wm,*gp) < 0) {	    hasOOM = TRUE; break;	 }      }      if (hasOOM) {  /* skip remaining N-grams, same as ngs->buf */	 ngs->nItems--;	 do {	    if (fread(ngRawBuf,ng_size,1,ngs->src.f)==1) {	       same = memcmp(ngs->buf,ngRawBuf,ng_size-1) == 0;	    } else {	       same = FALSE;	    }	 } while(same);      } else {	 break;      }   }}/* EXPORT->OpenNGramFile: open an ngram file and init NGSource */void OpenNGramFile(NGSource *ngs, char *fn, WordMap *wm){   LMFileHdr hdr;   MemHeap mem;   int i,n,N;   char *s,buf[MAXSTRLEN];   Byte ngRawBuf[GSIZE];   UInt ngExpBuf[GSIZE];   /* Create and Load Header */   CreateHeap(&mem,"NGheader",MSTAK,1,0.0,1000,1000);   if (InitSource(fn, &(ngs->src), LGramFilter) == FAIL)      HError(15311,"OpenNGramFile: Can't open gram file '%s'", fn);   if (ReadLMHeader(&mem, &(ngs->src), LGramFilter, &hdr, &n) != GRAM_HDR)      HError(15350,"OpenNGramFile: Bad header in file %s",fn);   ngs->nItems = n;   /* Check Word map name and seqno */   if ((s=GetLMHdrStr("WMAP",hdr,FALSE)) == NULL)      HError(15350,"OpenNGramFile: No WMap field in %s",fn);   if (!CompareMapNames(s,wm->name))      HError(15330,"OpenNGramFile: Gram file map %s inconsistent with %s",         s,wm->name);   if (!GetLMHdrInt("SEQNO",&n,hdr))      HError(15350,"OpenNGramFile: No SeqNo field in %s",fn);   if (n > wm->seqno)      HError(15330,"OpenNGramFile: SeqNo of map file is too low [%d vs %d]",         n,wm->seqno);   /* Check map matches WMCHECK */   if ((s=GetLMHdrStr("WMCHECK",hdr,FALSE)) == NULL)      HError(15350,"OpenNGramFile: No WMCheck field in %s",fn);   strcpy(buf,s);   if ((s=strchr(buf,' ')) == NULL)      HError(15350,"OpenNGramFile: Missing Sep in WMCheck in %s",fn);   *s = '\0'; n = atoi(s+1);   if ((i=WordLMIndex(GetLabId(buf,FALSE)))!=-1 && i!=n)      HError(15330,"OpenNGramFile: WMCheck FAILURE in %s, %d vs %d",fn,i,n);   /* Ok, So Get Rest of Header Info */   if (!GetLMHdrInt("NGRAM",&N,hdr))      HError(15350,"OpenNGramFile: No Ngram field in %s",fn);   ngs->info = SetNGInfo(N);   s = GetLMHdrStr("SOURCE",hdr,FALSE);   if (s==NULL) ngs->txtsrc[0] = '\0'; else strcpy(ngs->txtsrc,s);   ReadHGram("GRAM1",hdr,N,ngs->firstGram,fn);   ReadHGram("GRAMN",hdr,N,ngs->lastGram,fn);   ngs->wm = wm;   if (trace&T_TOP) {      printf("Read Header for %s, [%d grams, size %d]\n",fn,ngs->nItems,N);      fflush(stdout);   }   /* initialise the source by reading the first gram */   if (fread(ngRawBuf,ngs->info.ng_size,1,ngs->src.f) !=1 )      HError(15350, "OpenNGramFile: Empty file %s\n", fn);   NGramExpand(N,ngRawBuf,ngExpBuf);   if (!SameHGrams(N,ngExpBuf,ngs->firstGram)) {      WriteTxtHGram(stdout,"Gram1",N,ngs->firstGram);      WriteRawHGram(stdout,"gram1",N,ngExpBuf,wm);      HError(15330, "OpenNGramFile: Header-specified 1st gram is not equal to the actual 1st gram in file %s\n", fn);   }   SetNext(ngs,ngRawBuf); /* This could well exhaust the file and reduce nItems to 0 */   DeleteHeap(&mem);}/* EXPORT->CloseNGramFile: close given ngram file source */void CloseNGramFile(NGSource *ngs){   CloseSource(&(ngs->src));}/* EXPORT->ReadNGram: read the next ngram from given source.   (The next ngram to read will already be in its buffer) */void ReadNGram(NGSource *ngs, NGram ng){   UInt a,oc,N,ng_size;   Byte c,b[GSIZE];   Boolean same;   if (ngs->nItems <= 0)      HError(15313,"ReadNGram: Gram file %s is empty",ngs->src.name);   ngs->nItems--;   oc = 0; a = 1; N = ngs->info.N;   ng_size = ngs->info.ng_size;   c = ngs->buf[ng_size-1];   do {      oc += a*c; a *= 256;      if (fread(b, ng_size, 1, ngs->src.f)==1) {         same = memcmp(ngs->buf, b, ng_size-1) == 0;         c = b[ng_size-1];      } else {         same = FALSE;      }   } while (same);   NGramExpand(N,ngs->buf,ng); ng[N] = oc;   SetNext(ngs,b);}/* EXPORT -> WriteNGram: write compressed nGram to file f */int WriteNGram(FILE *f, int N, NGram ng){   Byte b;   UInt a,c,bsize,count;   static Byte buf[GSIZE];   NGramSquash(N, ng,buf);   bsize = N*SQUASH;#ifdef LM_FLOAT_COUNT   count = (UInt) *((float *)(ng + N))#else   count = ng[N];#endif   for (a=count,c=0; a != 0; a = a / 256, c++) {      b = a % 256;      fwrite(buf, bsize, 1, f);      fwrite(&b, sizeof(Byte), 1, f);   }   return c;}/* --------------------- NGram Buffer Handling --------------- *//* EXPORT->CreateNGBuffer: Create an N-gram buffer with size slots */NGBuffer *CreateNGBuffer(MemHeap *mem, int N, int size, char *fn, WordMap *wm){   NGBuffer *ngb;   UInt poolbytes;   ngb = (NGBuffer *)New(mem,sizeof(NGBuffer));   ngb->info = SetNGInfo(N);   ngb->poolsize = size; ngb->wm = wm;   ngb->used = 0; ngb->fn = CopyString(mem,fn); ngb->fndx = 0;   poolbytes = ngb->info.ng_full*size;   ngb->next = ngb->pool = (UInt *) New(mem,poolbytes);   return ngb;}/* EXPORT->StoreNGram: store ngram in buf into ngb, return TRUE if ngb is full */Boolean StoreNGram(NGBuffer *ngb, NGram ng){   memcpy(ngb->next, ng, ngb->info.ng_full);   ngb->used++; ngb->next += ngb->info.N+1;   return (ngb->used==ngb->poolsize);}/* CmpNGram: compare N-grams ng1 and ng2 using word map wm */static int CmpNGram(WordMap *wm, int N, UInt *ng1, UInt *ng2){   int i1,i2,j,s1,s2;#ifdef SANITY   if (wm == NULL)      HError(15390,"WordLMCmp: Word map is NULL");   if (!wm->isSorted)      HError(15390,"WordLMCmp: Word map is not sorted");#endif    for (j=0; j<N; j++) {      if ((i1 = GetMEIndex(wm,ng1[j])) < 0)	 HError(15395,"WordLMCmp: Index %d not found in wordmap",ng1[j]);      if ((i2 = GetMEIndex(wm,ng2[j])) < 0)	 HError(15395,"WordLMCmp: Index %d not found in wordmap",ng2[j]);      s1 = wm->me[i1].sort; s2 = wm->me[i2].sort;      if (s1 < s2) return -1;      if (s1 > s2) return +1;   }   return 0;}static int        qs_cmpSize;   /* must set before using this routine */static WordMap    *qs_wmap;     /* word list to access mapentries */static NGInputSet *qs_inset;    /* input set *//* qs_CmpNGram: compare two N-grams, used in qsort */static int qs_CmpNGram(const void *p1, const void *p2){   return CmpNGram(qs_wmap,qs_cmpSize,(UInt *)p1,(UInt *)p2);}/* qs_CmpGFile: compare two NGSources on nxt field */static int qs_CmpGFile(const void *p1, const void *p2){   NGram p,q;   int *i1, *i2;   i1 = (int *)p1; i2 = (int *)p2;   p = qs_inset->ngs[*i1].nxt;   q = qs_inset->ngs[*i2].nxt;   return CmpNGram(qs_inset->wm,qs_inset->N,p,q);}/* EXPORT->SortNGBuffer: sort+uniqe N-grams in ngb  */void SortNGBuffer(NGBuffer *ngb){   int i, count, isize, N;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -