⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lplex.c

📁 隐马尔科夫模型工具箱
💻 C
📖 第 1 页 / 共 2 页
字号:
	 if (nid==NULL)	    HError(-16625,"Unable to find word %s in model %s\n",(*wid)->name,li->fn);#endif      }      l2nId[i] = na;   }   /* ensure words present at least in one model */   for (wid = wList.id, j=0; j<nWords; j++, wid++) {      for (inLM=FALSE,i=0; i<nLModel; i++, li++)	 if (l2nId[i][(int) ((*wid)->aux)]!=NULL)	    inLM = TRUE;      if (!inLM)	 HError(16625,"Unable to find word %s in any model\n",(*wid)->name);   }   /* create equivalence class lookup array */   eqId = (LabId *) New(&permHeap,(nWords+NumEquiv()+2)*sizeof(NameId));   for (wid = wList.id, i=0; i<nWords; i++, wid++) {      eqId[(int) ((*wid)->aux)] = NULL;   }   /* link equivalence classes */   LinkEquiv();   /* open output stream */   if (outStreamFN != NULL)     if ((outStream = FOpen(outStreamFN,NoOFilter,&isPipe)) == NULL)        HError(16610,"Initialise: unable to open output file %s",outStreamFN);}/* -------------------- OOV calculation/statistics ---------------------*//* CmpOOVE: qsort comparison for oov entries */static int CmpOOVE(const void *p1, const void *p2){   return strcmp(((OOVEntry *)p1)->wdid->name, ((OOVEntry *)p2)->wdid->name);}/* SortOOV: sort/unique OOV array of ps */static int SortOOV(PStats *ps){   int i, c;   OOVEntry *ove;   if (ps->uniqOOV==0)     return 0;   qsort(ps->oov, ps->uniqOOV, sizeof(OOVEntry), CmpOOVE);   c = 0; ove = ps->oov;   for (i=c+1; i<ps->uniqOOV; i++)     if (CmpOOVE(ove+c, ove+i)==0)       ove[c].count += ove[i].count;     else {	c++; ove[c] = ove[i];     }   return (ps->uniqOOV = c+1);}/* StoreOOV: store OOV wdid/count into ps */static void StoreOOV(PStats *ps, LabId wdid, int count){   int n;   ps->oov[ps->uniqOOV].wdid  = wdid;   ps->oov[ps->uniqOOV].count = count;   ps->uniqOOV++; ps->nOOV++;   if (ps->uniqOOV==MAX_OOV) {     n = SortOOV(ps);     printf("StoreOOV: sorting OOVs, compacting %d -> %d\n",MAX_OOV,n);     if (n==MAX_OOV)       HError(16630,"Maximum number of unique OOV's [%d] reached\n",MAX_OOV);  }}/* ZeroStats: zero counts in PStats */static void ZeroStats(PStats *ps){   ps->nOOV = 0;   ps->nUtt = 0;   ps->nWrd = 0;   ps->nTok = 0;   ps->uniqOOV = 0;   ps->logpp = ps->logpp2 = 0.0;}/* AddStats: add statistics from ps1 to ps2 */static void AddStats(PStats *ps1, PStats *ps2){   int i;   for (i=0; i<ps1->uniqOOV; i++)     StoreOOV(ps2, ps1->oov[i].wdid, ps1->oov[i].count);   ps2->nTok += ps1->nTok;   ps2->nUtt += ps1->nUtt;   ps2->nWrd += ps1->nWrd;   ps2->logpp += ps1->logpp;   ps2->logpp2 += ps1->logpp2;}/* PrintInfo: print statistics from ps */static void PrintInfo(PStats *ps, Boolean showOOV){   int i;   float ovr;   LMInfo *li;   OOVEntry *ove;   double a, b, ppl, stdev;   /* print perplexity measures */   a = (ps->logpp)/(double) (ps->nWrd); b = (ps->logpp2)/(double) (ps->nWrd);   ppl = exp(-a); stdev = b - a*a;   printf("perplexity %.4f, var %.4f, utterances %d, words predicted %d\n",	  ppl, stdev, ps->nUtt, ps->nWrd);   fflush(stdout);   /* calculate OOV rate and statistics */   ovr = ((float) (ps->nOOV) / (float) (ps->nTok - ps->nUtt))*100.0;   printf("num tokens %d, OOV %d, OOV rate %.2f%% (excl. %s)\n",	  ps->nTok, ps->nOOV, ovr, senId->name);   if (showOOV && (ps->uniqOOV > 0)) {      SortOOV(ps);      printf("unique OOVs [%d]\n", ps->uniqOOV);      for (ove=ps->oov, i=0; i<ps->uniqOOV; i++, ove++)	printf("%s \t%d\n", ove->wdid->name, ove->count);      fflush(stdout);   }   for (li=lmInfo, i=0; i<nLModel; i++, li++) {#ifndef HTK_TRANSCRIBER      printf("\nAccess statistics for %s:\n", li->fn);      PrintTotalAccessStats(stdout,li->lm);#endif      ResetAccessInfo(li->lm);   }}/*-------------------------- Perplexity calculation --------------------------*/#define IS_UNK(id)  (id==unkId || id->aux==NULL)#define IS_SST(id)  (id==sstId)#define IS_SEN(id)  (id==senId)static LabId GetEQLab(LabId id){   LabId cl;   if (id->aux==NULL)      return id;   if ((cl = (LabId) eqId[(int) (id->aux)])==NULL)      return id;   return cl;}/* GetProb: return nSize-gram probability for ngram in wlab */static double GetProb(LabId *wlab, int nSize){   /*      this routine will return the interpolated nSize-gram probability for      the words in wlab. Note that the context maybe shortened in the      case of multiple LMs and words which do not occur in some of them.   */   int i,j;   LMInfo *li;   Boolean inThisLM,inAnyLM;   double x,prob,psum;   NameId nGram[LM_NSIZE];   if (nLModel==1) {      inThisLM = TRUE;      for (j=0; j<nSize; j++) {	 if ((nGram[j] = l2nId[0][(int) (wlab[j]->aux)])==NULL)	    inThisLM = FALSE;      }      if (inThisLM) {         prob = GetNGramProb(lmInfo[0].lm, nGram, nSize);      }      else if (nSize > 1)         prob = GetProb(wlab+1,nSize-1);      else {         prob = LZERO;         HError(-16690,"GetProb: assigning zero probability");      }   } else {      psum = 0.0;      inAnyLM = FALSE;      for (li=lmInfo, i=0; i<nLModel; i++, li++) {	 for (inThisLM=TRUE, j=0; j<nSize; j++) {	    if ((nGram[j] = l2nId[i][(int) (wlab[j]->aux)])==NULL)	       inThisLM = FALSE;	 }	 if (!inThisLM)	    continue;         x = GetNGramProb(li->lm, nGram, nSize);#ifdef INTERPOLATE_MAX	 if ((x = exp(x)) > psum)	    psum = x;#else	 psum += li->weight*exp(x);#endif	 inAnyLM = TRUE;      }      if (inAnyLM)	 prob = log(psum);      else if (nSize > 1)	 prob = GetProb(wlab+1,nSize-1);      else {	 prob = LZERO;	 HError(-16690,"GetProb: assigning zero probability");      }   }   return prob;}/* CalcPerplexity: compute perplexity and other statistics */static void CalcPerplexity(PStats *sent, LabId *pLab, int numPLabs, int nSize){   int i,j;   LabId *p;   float prob;   Boolean hasOOV;   for (p=pLab, i=nSize-1; i<numPLabs; i++, p++)   {      if (pLab[i]==unkId)	 continue;	           /* cannot predict OOVs */      if (skipOOV)      {	 hasOOV = FALSE;	 for (j=1; j<nSize; j++)	 {	    if (pLab[i-j]==unkId)	    {	      hasOOV=TRUE;	      break;	    }	 }	 if (hasOOV) continue; /* skip to next label since context contains OOV */      }      prob = GetProb(p, nSize);      sent->nWrd++; sent->logpp += prob; sent->logpp2 += prob*prob;      if (outStreamFN != NULL)         fprintf(outStream,"%e\n",exp(prob));      if (trace&T_PROB)      {	 printf("logP(%s |", pLab[i]->name);	 for (j=1; j<nSize; j++)	 {	   printf(" %s%s", (j==1)?"":",", pLab[i-j]->name);	 }	 printf(") = %.4f\n", prob);	 /* if (trace&T_INST_INFO) PrintInstStats(nSize); */	 fflush(stdout);      }   }   if (trace&T_SENT)      PrintInfo(sent,FALSE);}/* ProcessLabelFile: compute perplexity and related statistics from labels */static void ProcessLabelFile(char *fn, int nSize){   LLink ll;   double ppl;   LabList *ref;   LabId lab;   Transcription *tr;   int i,numPLabs,nLabel;   tr = LOpen(&tempHeap, fn, lff);   if (tr->numLists < 1) {      HError(-16635,"ProcessLabelFile: transcription file %s is Empty",fn);      return;   }   ref = GetLabelList(tr, 1);   if (ref->head->succ == ref->tail) {      HError(-16635,"ProcessLabelFile: transcription file %s is Empty",fn);      return;   }   if (trace>0) {      printf("Processing label file: %s\n", fn);      fflush(stdout);   }   nLabel = CountLabs(ref);   ZeroStats(&sent);   sent.nTok = nLabel + 2; sent.nUtt = 1;   /* copy labels into pLab, mapping OOVs */   numPLabs = 0;   if (sstId!=NULL)             /* add sentence start marker(s) */      for (i=0; i<(nSize-1); i++) pLab[numPLabs++] = sstId;   for (i=0,ll=ref->head->succ; i<nLabel; i++,ll=ll->succ) {      lab = GetEQLab(ll->labid);      if ((i==0) && IS_SST(lab)) {	sent.nTok--; continue;      }      if ((i==(nLabel-1)) && IS_SEN(lab)) {	 sent.nTok--; continue;      }      if (IS_UNK(lab)) {	 if (trace&T_OOV)	    printf("mapping OOV: %s\n", lab->name);	 StoreOOV(&sent,lab,1); lab = unkId;      }      pLab[numPLabs++] = lab;      if (numPLabs>=LBUF_SIZE) {         HError(16650, "Maximum utterance length in a label file exceeded (limit is compiled to be %d tokens)",                LBUF_SIZE);      }   }   if (senId!=NULL)             /* add sentence end marker */     pLab[numPLabs++] = senId;   CalcPerplexity(&sent, pLab, numPLabs, nSize);   AddStats(&sent, &totl);   if (trace&T_SEL) {     /* compact info for sentence selection */      ppl = exp(-(sent.logpp)/(double) (sent.nWrd));      printf("#! %.4f", ppl);      for (i=0, ll=ref->head->succ; i<nLabel; i++, ll=ll->succ)	 printf(" %s", ll->labid->name);      printf("\n"); fflush(stdout);   }}/* PPlexStream: compute perplexity and related statistics */static void ProcessTextStream(char *fn, int nSize){   int i;   FILE *f;   LabId lab=0;   double ppl;   int numPLabs;   Boolean isPipe;   char word[256];   if (fn!=NULL) {      if ((f=FOpen(fn, LMTextFilter, &isPipe))==NULL)	 HError(16610,"ProcessTextStream: unable to open file %s", fn);   } else {      f = stdin;   }   if (trace>0) {      printf("Processing text stream: %s\n", (fn==NULL)?"<stdin>":fn);      fflush(stdout);   }   numPLabs = 0;   ZeroStats(&sent);   sent.nUtt = 1; sent.nTok = 0;   while ((fscanf(f, "%200s", word))==1) {      if (strlen(word)>=200)	 HError(-16640, "ProcessTextStream: word too long, will be split: %s\n", word);      lab = GetEQLab(GetLabId(word, TRUE));   if (IS_SST(lab)) {	 numPLabs = 0;	 for (i=0; i<(nSize-1); i++) pLab[numPLabs++] = sstId;	 ZeroStats(&sent);	 sent.nUtt = 1; sent.nTok = 1;	 continue;      }      if (IS_UNK(lab)) {	 if (trace&T_OOV)	    printf("mapping OOV: %s\n", lab->name);	 StoreOOV(&sent,lab,1); lab = unkId;      }      pLab[numPLabs++] = lab; sent.nTok++;      if (numPLabs>=LBUF_SIZE) {         HError(16645,"ProcessTextStream: word buffer size exceeded - too many words without a sentence end (%d)",LBUF_SIZE);	 CalcPerplexity(&sent,pLab,numPLabs,nSize);	 numPLabs = 0;      }      if (IS_SEN(lab)) {	 CalcPerplexity(&sent,pLab,numPLabs,nSize);	 AddStats(&sent, &totl);	 if (trace&T_SEL) {     /* compact info for sentence selection */	   ppl = exp(-(sent.logpp)/(double) (sent.nWrd));	   printf("#! %.4f", ppl);	   for (i=nSize-1; i<numPLabs; i++)	     printf(" %s", pLab[i]->name);	   printf("\n"); fflush(stdout);	 }	 ZeroStats(&sent);      }   }   AddStats(&sent,&totl);   if (fn!=NULL)      FClose(f,isPipe);}/* ProcessFiles: process label files */static void ProcessFiles(){   int nSize;   char *labFn;   MLFEntry *me;   char *inpfn[MAX_FILES];   int i,t,numFiles,fidx;   numFiles = 0;   while (NumArgs()>0){      if (NextArg()!=STRINGARG)	HError(16619,"ProcessFiles: label file (MLF) name expected");      inpfn[numFiles++] = CopyString(&gstack, GetStrArg());      if (numFiles == MAX_FILES) {	 HError(-16619,"Processing only the first %d files",MAX_FILES);      }   }   for (t=0; t<numTests; t++) {      ZeroStats(&totl);      nSize = testInfo[t];      printf("LPlex test #%d: %d-gram\n", t, nSize);      if (numFiles==0) {	 ProcessTextStream(NULL, nSize);	 continue;      }      for (i=0; i<numFiles; i++) {	 labFn = inpfn[i];	 if (streamMode) {	    ProcessTextStream(labFn,nSize);	 } else {	    if (IsMLFFile(labFn)) {	       if (trace>0) {		  printf("Processing MLF: %s\n", labFn);		  fflush(stdout);	       }	       fidx = NumMLFFiles();	       if ((me=GetMLFTable()) != NULL) {		  while(me->next != NULL) me=me->next;		  LoadMasterFile(labFn);		  me=me->next;	       }	       else{		  LoadMasterFile(labFn);		  me=GetMLFTable();	       }	       while (me != NULL) {		  if (me->type == MLF_IMMEDIATE && me->def.immed.fidx == fidx) {		     ProcessLabelFile(me->pattern,nSize);		  }		  me = me->next;	       }	    } else {	       ProcessLabelFile(labFn,nSize);	    }	 }      }      PrintInfo(&totl, printOOV);   }}/* --------------------- End of LPlex.c  ------------------------ */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -