⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hlstats.c

📁 该压缩包为最新版htk的源代码,htk是现在比较流行的语音处理软件,请有兴趣的朋友下载使用
💻 C
📖 第 1 页 / 共 2 页
字号:
/* ----------------------------------------------------------- *//*                                                             *//*                          ___                                *//*                       |_| | |_/   SPEECH                    *//*                       | | | | \   RECOGNITION               *//*                       =========   SOFTWARE                  */ /*                                                             *//*                                                             *//* ----------------------------------------------------------- *//*         Copyright: Microsoft Corporation                    *//*          1995-2000 Redmond, Washington USA                  *//*                    http://www.microsoft.com                 *//*                                                             *//*   Use of this software is governed by a License Agreement   *//*    ** See the file License for the Conditions of Use  **    *//*    **     This banner notice must not be removed      **    *//*                                                             *//* ----------------------------------------------------------- *//*    File: HLStats.c: gather statistics from transcriptions   *//* ----------------------------------------------------------- */char *hlstats_version = "!HVER!HLStats:   3.3 [CUED 28/04/05]";char *hlstats_vc_id = "$Id: HLStats.c,v 1.1.1.1 2005/05/12 10:52:54 jal58 Exp $";#include "HShell.h"#include "HMem.h"#include "HMath.h"#include "HSigP.h"#include "HAudio.h"#include "HWave.h"#include "HVQ.h"#include "HParm.h"#include "HLabel.h"#include "HModel.h"#include "HDict.h"#include "HLM.h"#include "HUtil.h"/*   This program collects statistics (such as number of occurrences,   min, max and average duration) from a set of label files.   It is also able to generate simple backoff and matrix bigram   language models.*//* -------------------------- Trace Flags & Vars ------------------------ */#define T_BAS 0x0001                /* Trace basic progress information */#define T_MEM 0x0002                /* Trace memory usage */#define T_BIG 0x0004                /* Trace bigram statistics */#define T_FIL 0x0008                /* Trace each file name */static int trace = 0;               /* trace level *//* -------------------------- Global Variables etc ---------------------- */static Boolean doBigram = FALSE;    /* do what? */static Boolean doDurs   = FALSE;static Boolean doList   = FALSE;static Boolean doPCount = FALSE; static Boolean doLCount = FALSE;static Boolean doBOff   = FALSE;static char *listFile   = NULL;     /* file for label list */static char *bigFile    = NULL;     /* file for bigram */static float uniFloor   = 1.0;      /* min count for unigram probs */static float bigFloor   = 0.0;      /* floor for matrix bigram probs */static int bigThresh    = 0;        /* threshold for including bigram probs */static int pCountLimit  = -1;       /* max occurrences to list for pCount */static int lCountLimit  = -1;       /* max occurrences to list for lCount */static int hSize = 0;               /* hash table size, small(0), med(1), large(2)  */static LabId enterId;               /* id of ENTRY label in ngram */static LabId exitId;                /* id of EXIT label in ngram */static LabId nullId;                /* id of !NULL label in ngram */static FileFormat ff=UNDEFF;        /* Label file format */static MemHeap tmpHeap;             /* Temporary storage */static MemHeap statHeap;            /* Permenant stats storage *//* ---------------- Configuration Parameters --------------------- */static ConfParam *cParm[MAXGLOBS];static int nParm = 0;               /* total num params */static float disCount = 0.5;        /* discount for backoff *//* ------------------ Process Command Line ------------------------- *//* SetConfParms: set conf parms relevant to this tool */void SetConfParms(void){   double d;   int i;   nParm = GetConfig("HLSTATS", TRUE, cParm, MAXGLOBS);   if (nParm>0){      if (GetConfFlt(cParm,nParm,"DISCOUNT",&d)) disCount = d;      if (GetConfInt(cParm,nParm,"TRACE",&i)) trace = i;   }}void ReportUsage(void){   printf("\nUSAGE: HLStats [options] hmmList labFile...\n\n");   printf(" Option                                       Default\n\n");   printf(" -b fn    output bigram to file fn            off\n");   printf(" -c N     count num logical occs upto N       none\n");   printf(" -d       compute duration statistics         off\n");   printf(" -f f     set matrix bigram floor prob f      0.0\n");   printf(" -h N     set hashsize: medium(1), large(2)   small(0)\n");   printf(" -l s     output covering list of models to s off\n");   printf(" -o       generate wsj style back-off files   matrix\n");   printf(" -p N     count num physical occs upto N      none\n");   printf(" -s s1 s2 select start s1 and end s2 labels   !ENTER !EXIT\n");   printf(" -t n     set threshold for including bigram  0\n");   printf(" -u f     set back off unigram floor prob f   1.0\n");   PrintStdOpts("GIX");   printf("\n\n");}int main(int argc, char *argv[]){   char * labFn, *listfn, *s;   int i,fidx;   MLFEntry *me = NULL;   Transcription *t;   void InitStats(char *listfn);   void GatherStats(Transcription *t);   void OutputStats(void);   if(InitShell(argc,argv,hlstats_version,hlstats_vc_id)<SUCCESS)      HError(1300,"HLStats: InitShell failed");   InitMem();   InitMath();   InitWave();  InitLabel();   InitLM();   if (!InfoPrinted() && NumArgs() == 0)      ReportUsage();   if (NumArgs() == 0) Exit(0);   SetConfParms();      enterId=GetLabId("!ENTER",TRUE); /* All sentences should or are coerced */   exitId=GetLabId("!EXIT",TRUE);   /*  to start enterId and end exitId */   nullId=GetLabId("!NULL",TRUE);  /* Name for words not in list */   while (NextArg() == SWITCHARG) {      s = GetSwtArg();      if (strlen(s)!=1)          HError(1319,"HLStats: Bad switch %s; must be single letter",s);      switch(s[0]){      case 'b':         doBigram = TRUE;         if (NextArg() != STRINGARG)            HError(1319,"HLStats: Ngram output file name expected");         bigFile = GetStrArg();         break;      case 'c':         doLCount = TRUE;         lCountLimit = GetChkedInt(0,100000,s);         break;      case 'd':         doDurs = TRUE; break;      case 'f':         bigFloor = GetChkedFlt(0.0,1000.0,s);         break;      case 'h':         hSize =  GetChkedInt(1,2,s);         break;      case 'l':         doList = TRUE;          if (NextArg() != STRINGARG)            HError(1319,"HLStats: Output label list file name expected");         listFile = GetStrArg();         break;      case 'o':         doBOff = TRUE;         break;      case 'p':         doPCount = TRUE;         pCountLimit = GetChkedInt(0,100000,s);         break;      case 's':         if (NextArg() != STRINGARG)            HError(1319,"HLStats: ENTER label name expected");         enterId=GetLabId(GetStrArg(),TRUE);         if (NextArg() != STRINGARG)            HError(1319,"HLStats: EXIT label name expected");         exitId=GetLabId(GetStrArg(),TRUE);         break;      case 't':         bigThresh = GetChkedInt(0,100,s);         break;      case 'u':         uniFloor = GetChkedFlt(0.0,1000.0,s);         break;      case 'G':         if (NextArg() != STRINGARG)            HError(1319,"HLStats: Input label File format expected");         if((ff = Str2Format(GetStrArg())) == ALIEN)            HError(-1389,"HLStats: Warning ALIEN Label file format set");         break;      case 'I':         if (NextArg() != STRINGARG)            HError(1319,"HLStats: Input MLF file name expected");         LoadMasterFile(GetStrArg());         break;      case 'T':         if (NextArg() != INTARG)            HError(1319,"HLStats: Trace value expected");         trace = GetChkedInt(0,017,s); break;      default:         HError(1319,"HLStats: Unknown switch %s",s);      }   }   if (NextArg()!=STRINGARG)      HError(1319,"HLStats: Label list file name expected");   listfn = GetStrArg();   if (!(doDurs || doBigram || doList || doLCount || doPCount))      HError(1330,"HLStats: Nothing to do!");   InitStats(listfn);   i=0;   while (NumArgs()>0) {      if (NextArg()!=STRINGARG)         HError(1319,"HLStats: Input label file name expected");      labFn = GetStrArg();      if (IsMLFFile(labFn)) {         fidx = NumMLFFiles();         if ((me=GetMLFTable()) != NULL) {            while(me->next != NULL) me=me->next;            LoadMasterFile(labFn);            me=me->next;         }         else {            LoadMasterFile(labFn);            me=GetMLFTable();         }         while (me != NULL) {            if (me->type == MLF_IMMEDIATE && me->def.immed.fidx == fidx) {               if (trace&T_FIL) {                  printf("  Processing file %s\n",me->pattern); fflush(stdout);               }               t = LOpen(&tmpHeap,me->pattern,ff);               if (t->numLists<1)                  HError(-1330,"HLStats: Empty file %s",me->pattern);               else                  GatherStats(t),i++;               Dispose(&tmpHeap,t);            }            me = me->next;            if ((trace&T_BAS) && !(trace&T_FIL) &&                NumMLFEntries()>5000 && i%1000==0)                printf(". "),fflush(stdout);         }         if ((trace&T_BAS) && !(trace&T_FIL) && NumMLFEntries()>5000)            printf("\n");      } else {         if (trace&T_FIL) {            printf("  Processing file %s\n",labFn); fflush(stdout);         }         t = LOpen(&tmpHeap,labFn,ff);         if (t->numLists<1)            HError(-1330,"HLStats: Empty file %s",me->pattern);         else            GatherStats(t),i++;         Dispose(&tmpHeap,t);      }   }   if (trace&T_MEM)      PrintAllHeapStats();   OutputStats();   if (trace&T_MEM)      PrintAllHeapStats();   Exit(0);   return (0);          /* never reached -- make compiler happy */}/* PrintSettings: print info on stats requested */void PrintSettings(void){   if (doLCount || doPCount){      printf("Computing Label Occurrence Statistics\n");      if (doPCount)         printf("  upto %d physical\n",pCountLimit);      if (doLCount)         printf("  upto %d logical\n",lCountLimit);   }   if (doBigram) {      printf("Computing Bigram Statistics\n");      if (doBOff){         printf("  unifloor = %f\n",uniFloor);         printf("  bgthresh = %d\n",bigThresh);         printf("  discount = %f\n",disCount);      } else          printf("  bigfloor = %f\n",bigFloor);   }   if (doDurs)      printf("Computing Label Duration Statistics\n");   fflush(stdout);}/* -------------------- Gather Statistics -------------------- */typedef struct cntr{             /* Physical Label Occurrence Counters */   LabId name;                   /* Name */   int count;                    /* Times seen */} Cntr;typedef struct wordinfo{         /* Label Occurrence Counters */   LabId name;                   /* Name */   int count;                    /* Times seen */   Cntr *pCntr;                  /* Physical counter */   float minDur;                 /* Min duration */   float maxDur;                 /* Max duration */   float sumDur;                 /* Total duration */} WordInfo;#define ASIZE 2                  /* Need two words to id a bigram */typedef struct aentry {          /* Storage for counts */   unsigned short word[ASIZE];   /* Bigram id */   int count;                    /* Count */   struct aentry *link;          /* Next entry in hash table */} AEntry;static int lSize;                /* Number of logical labels */static int pSize;                /* Number of physical labels */static WordInfo *lTab;           /* Table of logical counts/durations */static Cntr *pTab;               /* Table of physical counts */static AEntry **aetab;           /* Hash table for bigram accumulators  */static int aetabsize=0;          /* Size of hash table selected from .. */static int hashsizes[4]={ 87793, 188281, 715249 };static int nae=0;                /* Number of accumulators created *//* wd_cmp: word order relation used to sort lTab */static int wd_cmp(const void *v1,const void *v2){   WordInfo *w1,*w2;      w1=(WordInfo*)v1;w2=(WordInfo*)v2;   if (w1->name==enterId) return(-1);   else if (w2->name==enterId) return(1);   else if (w1->name==exitId) return(1);   else if (w2->name==exitId) return(-1);   return(strcmp(w1->name->name,w2->name->name));}/* InitWordInfo: Initialise contents of WordInfo rec */void InitWordInfo(WordInfo *w, LabId id, Cntr *pCntr){   w->name   = id;   w->pCntr  = pCntr;   w->minDur = 1E30;   w->maxDur = 0.0;   w->sumDur = 0.0;   w->count  = 0;}/* InitStats: Create and init all necessary global accumulators */void InitStats(char *listFn){   int h,p,l;   MLink q,hm;   HLink hmm;   HMMSet *hset;   CreateHeap(&tmpHeap,"TempHeap",MSTAK,1,1.0,8000,80000);   CreateHeap(&statHeap,"StatHeap",MSTAK,1,1.0,8000,240000);   hset=(HMMSet*)New(&tmpHeap,sizeof(HMMSet));   CreateHMMSet(hset,&tmpHeap,FALSE);   if(MakeHMMSet(hset,listFn)<SUCCESS)      HError(1328,"Initstats: MakeHMMSet failed");   /* Make sure we have entries for ENTER / EXIT labels */   if (FindMacroName(hset,'l',enterId)==NULL) {      hmm=(HMMDef*)New(&tmpHeap,sizeof(HMMDef));      NewMacro(hset,0,'l',enterId,hmm);      NewMacro(hset,0,'h',enterId,hmm);   }   if (FindMacroName(hset,'l',exitId)==NULL) {      hmm=(HMMDef*)New(&tmpHeap,sizeof(HMMDef));      NewMacro(hset,0,'l',exitId,hmm);      NewMacro(hset,0,'h',exitId,hmm);   }   pSize=hset->numPhyHMM;   pTab=(Cntr*)New(&statHeap,(pSize+1)*sizeof(Cntr));      p=1;   pTab[0].name=nullId;   for (h=0; h<MACHASHSIZE; h++)      for (q=hset->mtab[h]; q!=NULL; q=q->next) {         if (q->type=='h') {            hmm=(HLink) q->structure;            hmm->hook=(Ptr)p;            pTab[p].name=q->id;            pTab[p].count=0;            p++;         }      }   lSize=hset->numLogHMM;   lTab=(WordInfo*)New(&statHeap,(lSize+1)*sizeof(WordInfo));   l=1;   InitWordInfo(lTab,nullId,pTab);   for (h=0; h<MACHASHSIZE; h++)      for (q=hset->mtab[h]; q!=NULL; q=q->next)         if (q->type=='l') {            hmm=(HLink) q->structure;            hm=FindMacroStruct(hset,'h',q->structure);            if (hm==NULL || hmm->hook==0)               HError(1390,"InitStats: No physical name found for %s",                      q->id->name);            InitWordInfo(lTab+l,q->id,pTab+(int)hmm->hook);            l++;         }   qsort(lTab+1,lSize,sizeof(WordInfo),wd_cmp);   for (l=1; l<=lSize; l++)      lTab[l].name->aux=(Ptr)l;   Dispose(&tmpHeap,hset);   if (doBigram) {   /* create aetab */      aetabsize=hashsizes[hSize];      aetab=(AEntry**)New(&statHeap,aetabsize*sizeof(AEntry*));      for (l=0;l<aetabsize;l++) aetab[l]=NULL;   }   if (trace&T_BAS) {      PrintSettings();      printf("\n\nRead Label list - %d/%d labels\n",lSize,pSize);   }}/* GetAEntry: find ngram in in aetab.  If not found and create   is set, then add new entry */AEntry *GetAEntry(int in[ASIZE],Boolean create){   AEntry *ae;   int i;   unsigned int hash;   hash=0;   for (i=0,hash=0;i<ASIZE;i++)      hash=((hash<<16)+in[i])%aetabsize;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -