lplex.c

来自「该压缩包为最新版htk的源代码,htk是现在比较流行的语音处理软件,请有兴趣的朋」· C语言代码 · 共 935 行 · 第 1/2 页
935 行
/* ----------------------------------------------------------- *//*                                                             *//*                          ___                                *//*                       |_| | |_/   SPEECH                    *//*                       | | | | \   RECOGNITION               *//*                       =========   SOFTWARE                  */ /*                                                             *//*                                                             *//* ----------------------------------------------------------- *//* developed at:                                               *//*                                                             *//*      Speech Vision and Robotics group                       *//*      Cambridge University Engineering Department            *//*      http://svr-www.eng.cam.ac.uk/                          *//*                                                             *//* main authors: Valtcho Valtchev, Steve Young,                *//*               Julian Odell, Gareth Moore                    *//* ----------------------------------------------------------- *//*         Copyright:                                          *//*                                                             *//*          1994-2002 Cambridge University                     *//*                    Engineering Department                   *//*                                                             *//*   Use of this software is governed by a License Agreement   *//*    ** See the file License for the Conditions of Use  **    *//*    **     This banner notice must not be removed      **    *//*                                                             *//* ----------------------------------------------------------- *//*    File: LPlex:  compute perplexity                         *//* ----------------------------------------------------------- */char *lplex_version = "!HVER!LPlex:     3.3 [CUED 28/04/05]";char *lplex_vc_id = "$Id: LPlex.c,v 1.1.1.1 2005/05/12 10:52:19 jal58 Exp $";#include <stdio.h>#include <stdlib.h>#include <string.h>#include <ctype.h>#include <math.h>#include <assert.h>#include "HShell.h"     /* HTK toolkit libraries */#include "HMem.h"#include "HMath.h"#include "HWave.h"#include "HLabel.h"#include "LWMap.h"      /* LM toolkit libraries */#include "LCMap.h"#include "LGBase.h"#include "LUtil.h"#include "LModel.h"#include "LPCalc.h"#include "LPMerge.h"#define VERSION   "3.2"#define T_TOP       0001#define T_SENT      0002#define T_OOV       0004#define T_PROB      0010#define T_SEL       0020#define MAX_OOV     500000#define MAX_LM      16#define MAX_TEST    16#define LBUF_SIZE   2048#define MAX_FILES   200000 typedef struct {   LabId wdid;   int count;} OOVEntry;typedef struct {   int nOOV;                /* number of OOVs */   int nTok;                /* total number of tokens */   int nUtt;                /* number of utterances */   int nWrd;                /* number of words predicted */   double logpp;            /* accumulated LM score */   double logpp2;           /* accumulated logp^2 score */   int uniqOOV;             /* number of unique oov's */   OOVEntry oov[MAX_OOV];   /* array of OOVs */} PStats;/* -------------------- Global variables ----------------------- */static int trace = 0;               /* trace level */static WordMap wList;               /* the word list */static char   *wlistFN = NULL;static int     nWords;              /* number of words in list */static int     nLModel;             /* number of loaded LMs */static LMInfo  lmInfo[MAX_LMODEL];  /* array of loaded LMs */static int     numTests;            /* number of tests to perform */static int     testInfo[MAX_TEST];  /* the array of test records */static PStats  sent;                /* per utterance accumulators */static PStats  totl;                /* global accumulator */static LabId sstId = NULL;          /* sentence start marker */static LabId senId = NULL;          /* sentence end marker */static LabId unkId = NULL;          /* sentence end marker */static LabId pLab[LBUF_SIZE];       /* label array */static Boolean skipOOV  = TRUE;     /* discard OOV in computation */static Boolean printOOV = FALSE;    /* print uniqe OOV's and their frequencies */static Boolean streamMode = FALSE;  /* stream mode */static FileFormat lff = UNDEFF;     /* label file format */static char   *nulName = "???";     /* name of null class */static LabId  nulClass;             /* Id of NULCLASS phone label */static int    unkEquiv = 0;         /* number of equivalent words outside the word list */static NameId **l2nId;              /* array of LabId -> NameId lookup tables */static LabId  *eqId;                /* label equivalence lookup table */static int cutOff[LM_NSIZE+1];      /* new cutoffs for COUNT-models */static float wdThresh[LM_NSIZE+1];  /* new wdThresh for COUNT-models */static char *outStreamFN = NULL;FILE *outStream;MemHeap tempHeap;                   /* Stores data valid only for file */MemHeap permHeap;                   /* Stores global stats *//* ---------------- Configuration Parameters --------------------- */static ConfParam *cParm[MAXGLOBS];static int nParm = 0;            /* total num params *//* ---------------- Static function prototypes required ---------- */static void Initialise(void);static void ProcessFiles(void);static void AddEquiv(char * cl, char * eq);/* ---------------- Process Command Line ------------------------- *//* SetConfParms: set conf parms relevant to this tool */void SetConfParms(void){   int i;   static char b[100];   nParm = GetConfig("LPLEX", TRUE, cParm, MAXGLOBS);   if (nParm>0){      if (GetConfInt(cParm,nParm,"TRACE",&i)) trace = i;     if (GetConfStr(cParm,nParm,"STARTWORD",b))   sstId = GetLabId(b, TRUE);      if (GetConfStr(cParm,nParm,"ENDWORD",b))     senId = GetLabId(b, TRUE);      if (GetConfStr(cParm,nParm,"UNKNOWNNAME",b)) unkId = GetLabId(b, TRUE);   }   if (!sstId) sstId = GetLabId(DEF_STARTWORD,TRUE);   if (!senId) senId = GetLabId(DEF_ENDWORD,TRUE);   if (!unkId) unkId = GetLabId(DEF_UNKNOWNNAME,TRUE);}void ReportUsage(void){   printf("\nUSAGE: LPlex [options] langmodel labelFiles...\n\n");   printf(" Option                                       Default\n\n");   printf(" -c n c  set pruning for n-gram to c          off\n");   printf(" -d n c  set weighted discount pruning to c   off\n");   printf(" -e s t  Label t is equivalent to s           off\n");   printf(" -i f s  interpolate with model s, weight f   off\n");   printf(" -n N    calculate N-gram perplexity          max in LM\n");   printf(" -o      print OOV word statistics            off\n");   printf(" -s fn   print prob stream to file fn         off\n");   printf(" -t      text stream mode                     off\n");   printf(" -u      use OOV words in context             off\n");   printf(" -w fn   use word list from fn                off\n");   printf(" -z s    Redefine null class name to s        ???\n");   PrintStdOpts("GIST");   printf("\n\n");}int main(int argc, char *argv[]){   int i;   char *s,*c,*e;   InitShell(argc,argv,lplex_version,lplex_vc_id);   InitMem();   InitMath();   InitWave();   InitLabel();   InitWMap();   InitCMap();   InitLUtil();   InitLModel();   InitPCalc();   InitPMerge();   SetConfParms();   if (!InfoPrinted() && NumArgs() == 0)      ReportUsage();   if (NumArgs() == 0) Exit(EXIT_SUCCESS);   nLModel = 1;   for (i=1; i<=LM_NSIZE; i++) cutOff[i] = 0, wdThresh[i] = 0.0;   CreateHeap(&permHeap, "permHeap", MSTAK, 1, 1.0, 4000, 20000);   CreateHeap(&tempHeap, "tempHeap", MSTAK, 1, 1.0, 8000, 40000);   while (NextArg() == SWITCHARG) {      s = GetSwtArg();      if (strlen(s)!=1)         HError(16619,"Bad switch %s; must be single letter",s);      switch(s[0]){         case 'c':            i = GetChkedInt(2,LM_NSIZE,s);	    cutOff[i] = GetChkedInt(1,1000,s);	    break;         case 'd':            i = GetChkedInt(2,LM_NSIZE,s);	    wdThresh[i] = GetChkedFlt(0.0,1E10,s);	    break;         case 'e':	    if (NextArg() != STRINGARG)	      HError(16619,"LPlex: Eq Class Name Expected");	    c = GetStrArg();	    if (NextArg() != STRINGARG)	      HError(16619,"LPlex: Eq Label Name Expected");	    e = GetStrArg();	    AddEquiv(c,e);	    break;	 case 'i':            if (NextArg()!=FLOATARG)	       HError(16619,"LPlex: Interpolation weight expected");	    lmInfo[nLModel].weight = GetChkedFlt(0.0,1.0,s);            if (NextArg()!=STRINGARG)	       HError(16619,"LPlex: Interpolation LM filename expected");	    lmInfo[nLModel].fn = GetStrArg();	    nLModel++;	    break;	 case 'n':	    testInfo[numTests++] = GetChkedInt(1, 10, s); break;	 case 'o':	    printOOV = TRUE; break;          case 's':	    if (NextArg() != STRINGARG)	       HError(16619,"LPlex: Prob Stream file name expected");	    outStreamFN = GetStrArg();	    break;	 case 't':	    streamMode = TRUE; break;	 case 'u':	    skipOOV = FALSE; break;         case 'w':	    if (NextArg() != STRINGARG)	       HError(16619,"LPlex: Word list file name expected");	    wlistFN = GetStrArg();	    break;         case 'z':	    if (NextArg() != STRINGARG)	       HError(16619,"LPlex: New null class name expected");	    nulName = GetStrArg();	    break;	 case 'G':	    if (NextArg() != STRINGARG)	       HError(16619,"Label File format expected");	    if((lff = Str2Format(GetStrArg())) == ALIEN)	       HError(16619,"Warning ALIEN Label file format set");	    break;	 case 'I':	    if (NextArg() != STRINGARG)	       HError(16619,"MLF file name expected");	    LoadMasterFile(GetStrArg()); break;	 case 'T':	    trace = GetChkedInt(0,077, s); break;         default:            HError(16619,"LPlex: Unknown switch %s",s);      }   }#ifdef HTK_TRANSCRIBER   if (trace&T_PROB) trace=trace^T_PROB;#endif   if (NextArg()!=STRINGARG)  /* load the language model */      HError(16619, "Language model filename expected");   lmInfo[0].fn = GetStrArg();   Initialise();   ProcessFiles();   Exit(EXIT_SUCCESS);   return EXIT_SUCCESS; /* never reached -- make compiler happy */}/* -------------------- Label Equivalences ----------------------- */typedef struct _Equiv Equiv;          /* list of equivalent labels */struct _Equiv{   LabId classId;   LabId equivId;   Equiv *next;};static Equiv *eqList = NULL;          /* List of equivalent label ids *//* AddEquiv: Add the equivalent pair (cl,eq) to eqlist */static void AddEquiv(char * cl, char * eq){   Equiv *p;   p=(Equiv*) New(&permHeap,sizeof(Equiv));   p->classId = GetLabId(cl,TRUE);   p->equivId = GetLabId(eq,TRUE);   p->next = eqList; eqList = p;}/* NumEquiv: return the number of equivalence sets */static int NumEquiv(void){   Equiv *p;   int count = 0;   for (p=eqList; p!=NULL; p=p->next) count++;   return count;}/* NormaliseName: convert all equiv labels to class name and upper case if set */static void LinkEquiv(void){   Equiv *p;   LabId cl,eq;   for (p=eqList; p!=NULL; p=p->next) {      cl = p->classId; eq = p->equivId;      if (eq->aux==NULL) {	 eq->aux = (Ptr) (nWords + unkEquiv);	 unkEquiv++;      }      eqId[(int) eq->aux] = cl;   }}/* Initialise: perform global initialisations */static void Initialise(void){   int i,j,ndx;   float x;   LMInfo *li;   Boolean inLM;   LabId *wid,lab;   NameId *na,nid;   Boolean isPipe;   nulClass = GetLabId(nulName,TRUE);   /* normalise weights */   for (x=0.0, i=1; i<nLModel; i++)      x += lmInfo[i].weight;   lmInfo[0].weight = 1.0-x;   /* load all models */   for (li=lmInfo, i=0; i<nLModel; i++, li++) {      if (trace&T_TOP)	 printf("Loading language model from %s\n",li->fn);      li->lm = LoadLangModel(li->fn,NULL,1.0,LMP_LOG|LMP_COUNT,&permHeap);      if (li->lm->probType==LMP_COUNT)	 RebuildLM(li->lm,cutOff,wdThresh,LMP_LOG);      AttachAccessInfo(li->lm);   }   if (trace&T_TOP) {      printf("Using language model(s): \n");      for (li=lmInfo,i=0; i<nLModel; i++,li++)	 printf("  %d-gram %s, weight %.2f\n",li->lm->nSize,li->fn,li->weight);   }   if (numTests==0) {      numTests=1; testInfo[0] = lmInfo[0].lm->nSize;   }   /* load or create word list */   if (wlistFN!=NULL) {      /* load word list from file */      CreateWordList(wlistFN,&wList,nWords+10);      nWords = wList.used;      for (wid=wList.id, i=0; i<nWords; i++,wid++) /* assign lookup indices */	 (*wid)->aux = (Ptr) (i+1);   } else {      /* derive word list from LMs */      for (nWords=0,li=lmInfo, i=0; i<nLModel; i++, li++)      { 	 /* Obtain class-LM word list in a different way */	 if (li->lm->classLM)	 {	   na = li->lm->classBM;	   for (j=0; j<li->lm->classW; j++)	   {	     lab = GetLabId(na[j+1]->name, TRUE);	     if (lab->aux==NULL)	       lab->aux = (Ptr) (++nWords);	   }	 }	 else	 {	   na = li->lm->binMap;	   for (j=0; j<li->lm->vocSize; j++)	   {	     lab = GetLabId(na[j+1]->name,TRUE);	     if (lab->aux==NULL)	       lab->aux = (Ptr) (++nWords);	   }	 }      }      CreateWordList(NULL,&wList,nWords+10);      for (li=lmInfo, i=0; i<nLModel; i++, li++) {	/* Obtain class-LM word list in a different way */	if (li->lm->classLM)	{	  na = li->lm->classBM;	  for (j=0; j<li->lm->classW; j++)	  {	    lab = GetLabId(na[j+1]->name,TRUE);	    ndx = ((int) lab->aux) - 1;	    wList.id[ndx] = lab;	  }	}	else	{	  na = li->lm->binMap;	  for (j=0; j<li->lm->vocSize; j++)	  {	    lab = GetLabId(na[j+1]->name,TRUE);	    ndx = ((int) lab->aux) - 1;	    wList.id[ndx] = lab;	  }	}      }      wList.used = nWords;   }   if (trace&T_TOP) {      printf("Found %d unique words in %d model(s)\n",nWords,nLModel);      fflush(stdout);   }   if (unkId->aux==NULL && !skipOOV) {      HError(16620,"LPlex: OOV class symbol %s not in word list",unkId->name);   }   if (sstId->aux==NULL) {      HError(16620,"LPlex: sentence start symbol %s not in word list",sstId->name);   }   if (senId->aux==NULL) {      HError(16620,"LPlex: sentence end symbol %s not in word list",senId->name);   }   /* create lookup table */   l2nId = (NameId **) New(&permHeap,nLModel*sizeof(NameId *));   /* create LabId -> NameId lookup arrays (one per LM) */   for (li=lmInfo, i=0; i<nLModel; i++, li++, na++) {      na = (NameId *) New(&permHeap,(nWords+2)*sizeof(NameId));      for (wid = wList.id, j=0; j<nWords; j++, wid++) {	if (li->lm->classLM)	{	  nid = na[(int) ((*wid)->aux)] = GetNameId(li->lm->classH, (*wid)->name, FALSE);	}	else	{	  nid = na[(int) ((*wid)->aux)] = GetNameId(li->lm->htab, (*wid)->name, FALSE);	}#ifdef SANITY
lplex.c - 源码说明

本页面展示了「该压缩包为最新版htk的源代码,htk是现在比较流行的语音处理软件,请有兴趣的朋友下载使用」中的 lplex.c 源码文件，采用 C语言编程语言编写，共 935 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与htk相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?