⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hlstats.c

📁 该压缩包为最新版htk的源代码,htk是现在比较流行的语音处理软件,请有兴趣的朋友下载使用
💻 C
📖 第 1 页 / 共 2 页
字号:
   for (ae=aetab[hash];ae!=NULL;ae=ae->link)      if (ae->word[0]==in[0] && ae->word[1]==in[1])         break;   if (ae==NULL && create) {      nae++;      ae=(AEntry*)New(&statHeap,sizeof(AEntry));      for (i=0;i<ASIZE;i++)         ae->word[i]=in[i];      ae->count=0;      ae->link=aetab[hash];      aetab[hash]=ae;   }   return(ae);}/* GatherStats: update stats using given label file */void GatherStats(Transcription *t){   LLink l;   LabList *ll;   WordInfo *lt;   int i,j,st,en,lab,in[ASIZE];   float dur;   AEntry *ae;   ll=GetLabelList(t,1);   st=1;  en=CountLabs(ll);   /* If first label is enterId then we need to skip it */   l = GetLabN(ll,1);   if (l->labid==enterId) st++;   /* If the final label is exitId then it should be skipped */   l = GetLabN(ll,en);   if (l->labid==exitId) en--;   /* Coerce previous labels to be enterId */   for (i=0; i<ASIZE; i++) in[i]=(int)enterId->aux;   lt = lTab+(int)enterId->aux; ++lt->count;      /* Process actual labels in list */    for (i=st; i<=en; i++) {      l = GetLabN(ll,i);      lab=(int)l->labid->aux;      dur = (float)(l->end - l->start)/10000.0;      lt=lTab+lab;      /* increment stats */      lt->count++;      lt->sumDur += dur;      if (dur < lt->minDur) lt->minDur=dur;      if (dur > lt->maxDur) lt->maxDur=dur;      lt->pCntr->count++;      if (doBigram) {         /* We ignore all transitions into enterId and exitId */         /* May wish to warn user about badly formed sentences */         if (!(lab==(int)enterId->aux || (lab==(int)exitId->aux))) {            for (j=ASIZE-1;j>0;j--) in[j]=in[j-1];            in[0]=lab;            ae = GetAEntry(in,TRUE);            ae->count++;         }      }   }   /* Deal with transition into EXIT */   if (doBigram) {      for (j=ASIZE-1;j>0;j--) in[j]=in[j-1];      in[0]=(int)exitId->aux;      ae = GetAEntry(in,TRUE);      ae->count++;   }   lt = lTab+(int)exitId->aux; ++lt->count;}/* ----------------------- Output Results -------------------- *//* CmpCntr: return sign(c1->count - c2->count) , if equal then   use same ordering as in lTab */int CmpCntr(const void *p1, const void *p2){   Cntr *c1, *c2;   int diff;   c1=(Cntr *)p1; c2=(Cntr *)p2;   diff=c1->count-c2->count;   if (diff==0) return((int)c2->name->aux-(int)c1->name->aux);   else return(diff);}/* CmpWordInfo: return sign(c1->count - c2->count) , if equal then   use same ordering as in lTab */int CmpWordInfo(const void *p1, const void *p2){   WordInfo *c1, *c2;   int diff;      c1=(WordInfo *)p1; c2=(WordInfo *)p2;   diff=c1->count-c2->count;   if (diff==0) return((int)c2->name->aux-(int)c1->name->aux);   else return(diff);}/* OutputCounts: output logical/physical counters */void OutputCounts(void){   int i;   WordInfo *l;   Cntr *p;   if (doLCount){      qsort(lTab+1,lSize,sizeof(WordInfo),CmpWordInfo);      printf("\nLogical Model Counts:\n");      printf("       Label   LCount   PCount\n");      for (i=0,l=lTab+1; i<lSize; i++,l++){         if (l->count > lCountLimit) break;         printf("%12s %8d %8d\n",l->name->name,l->count,l->pCntr->count);      }   }   if (doPCount){                      /* Breaks log->phy relation */      qsort(pTab+1,pSize,sizeof(Cntr),CmpCntr);      printf("\nPhysical Model Counts:\n");      printf("       Label   PCount\n");      for (i=0,p=pTab+1; i<pSize; i++,p++){         if (p->count > pCountLimit) break;         printf("%12s %8d\n",p->name->name,p->count);      }   }   printf("\n");   fflush(stdout);}/* OutputDurs: output duration stats */void OutputDurs(void){   int i;   WordInfo *l;      printf("\nDuration Statistics:\n");   printf("       Label   Count  AveDur  MinDur  MaxDur\n");   for (i=0,l=lTab+1; i<lSize; i++,l++){      printf("%12s %7d",l->name->name,l->count);      if (l->count>0 && l->name != enterId && l->name != exitId) {         printf("%8.1f",l->sumDur/l->count);         if (l->minDur < 1E30)            printf("%8.1f",l->minDur);         else            printf("%8s","---");         printf("%8.1f",l->maxDur);      }      printf("\n");   }   printf("\n"); fflush(stdout);}/* OutputList: output a list of all labels that occurred at least once */void OutputList(void){   int i;   FILE *f;   WordInfo *l;   if ((f=fopen(listFile,"w"))==NULL)      HError(1311,"OutputList: Cannot create label list file %s",listFile);      for (i=0,l=lTab+1; i<lSize; i++,l++)      if (l->count>0)         fprintf(f,"%s\n",l->name->name);   fclose(f);}/* ------------------- Bigram Handling ---------------------- */#define log2(x) (log(x)/log(2.0))#define ent2(x) ((x)>0.0?((x)*log2(x)):0.0)/* RebuildAETab: rebuild the aetab in aelists such that all   ngrams  (n,x) are stored in the list aelists[n]. */void RebuildAETab(AEntry **aelists){   AEntry *ae,*nx;   int h;   for (h=0; h<aetabsize; h++) {      for (ae=aetab[h]; ae!=NULL; ae=nx) {         nx=ae->link;         if (ae->word[1]==0) continue;         ae->link=aelists[ae->word[1]];         aelists[ae->word[1]]=ae;      }      aetab[h]=NULL;   }}/* se_cmp: ordering relation for SEntrys based on word id */int se_cmp(const void *v1,const void *v2){   SEntry *s1,*s2;   s1=(SEntry*)v1;  s2=(SEntry*)v2;   return((int)(s1->word-s2->word));}/* Simple calculation of backoff weights - 0.5 subtracted from each count */static float BuildNEntry(NEntry *ne,Vector boff,float bent){   SEntry *cse;   AEntry *ae;   double bowt,bsum,cnt,tot,ent,prob;      ne->nse=0;   tot=cnt=0.0;   bsum=1.0;   if (ne->word[0]!=(int)exitId->aux)      for (ae=(AEntry *) ne->user; ae!=NULL; ae=ae->link) {         tot+=ae->count;         if (ae->word[0]!=0 && ae->word[0]!=(int)enterId->aux &&             ae->count>bigThresh)            cnt+=(ae->count-disCount),ne->nse++,bsum-=boff[ae->word[0]];      }   if (ne->nse==0) {      ne->se=NULL;      ne->bowt=0.0;      ent=bent;   }   else {      ne->se=(SEntry*)New(&statHeap,sizeof(SEntry)*ne->nse);      bowt = (bsum>0.0) ? (1.0-cnt/tot)/bsum : 0.0;      ent  = (bowt>0.0) ? bowt*(bent-log2(bowt)) : 0.0;      for (cse=ne->se,ae=(AEntry *) ne->user; ae!=NULL; ae=ae->link)         if (ae->word[0]!=0 && ae->word[0]!=(int)enterId->aux &&             ae->count>bigThresh) {            prob=((double)ae->count-disCount)/tot;            cse->word=ae->word[0];            cse->prob=log(prob);            ent -= ent2(prob);            prob = bowt*boff[cse->word];            ent += ent2(prob);            cse++;         }      if (bowt>0.0) ne->bowt=log(bowt);      else ne->bowt=LZERO;      qsort(ne->se,ne->nse,sizeof(SEntry),se_cmp);   }   return(ent);}/* OutputBoBigram: output ARPA/MIL-LL style back off bigram */void OutputBoBigram(void){   LModel lm;   NGramLM *nglm;   NEntry *ne;   SEntry *se;   AEntry **aelists;   lmId ndx[NSIZE];   int i,tot,counts[NSIZE+1];   double uent,ent,bent;   lm.heap=&statHeap;   lm.type=boNGram;   counts[1]=lSize;counts[2]=nae;   for(i=3;i<NSIZE+1;i++)      counts[i]=0;   nglm=CreateBoNGram(&lm,lSize,counts);  /* Give max size at creation */   for (i=1;i<=lSize;i++)      nglm->wdlist[i]=lTab[i].name;   aelists=(AEntry**)New(&tmpHeap,sizeof(AEntry*)*(lSize+1));   for (i=1;i<=lSize;i++) aelists[i]=NULL;   RebuildAETab(aelists);          /* Un-hash hashtable */   for (i=1,tot=0.0;i<=lSize;i++) {    /* Calculate unigrams first */      if (i==(int)enterId->aux)         nglm->unigrams[i]=0.0;      else if (lTab[i].count<uniFloor)         nglm->unigrams[i]=uniFloor;      else         nglm->unigrams[i]=lTab[i].count;      tot+=nglm->unigrams[i];   }   for (i=1,uent=0.0;i<=lSize;i++,se++) {      nglm->unigrams[i]=nglm->unigrams[i]/tot;      uent-=ent2(nglm->unigrams[i]);   }   nglm->counts[1]=lSize;           /* Calculate real sizes during build */   nglm->counts[2]=0;   for (i=0; i<NSIZE; i++) ndx[i]=0;   if (trace&T_BIG) {      printf("\n  UNIGRAM NEntry        - %4d foll, ent %.3f [= %.3f]\n\n",             lSize,uent,pow(2.0,uent));      printf("  BIGRAMS NEntries\n");      fflush(stdout);   }   for (i=1,bent=0.0;i<=lSize;i++) {      ndx[0]=i;      ne=GetNEntry(nglm,ndx,TRUE);      ne->user=aelists[i];      ent = BuildNEntry(ne,nglm->unigrams,uent);      nglm->counts[2]+=ne->nse;      if (trace&T_BIG)          if (i!=(int)exitId->aux){            if (i==(int)enterId->aux)               bent+=nglm->unigrams[(int)exitId->aux]*ent;            else                bent+=nglm->unigrams[i]*ent;            printf("   %-20s - %4d foll, ent %6.3f [= %6.2f]\n",                   lTab[i].name->name,ne->nse,ent,pow(2.0,ent));            fflush(stdout);         }   }   Dispose(&tmpHeap,aelists);      if (trace&T_BIG) {      printf("\n  BIGRAM: training data entropy %.3f (perplexity %.2f)\n",             bent,pow(2.0,bent));      fflush(stdout);   }   ndx[0]=0;                        /* Set up unigram nentry separately */   ne=GetNEntry(nglm,ndx,TRUE);   ne->nse=lSize;   se=ne->se=(SEntry*)New(nglm->heap,sizeof(SEntry)*lSize);   for (i=1;i<=lSize;i++,se++) {      se->word=i;      if (nglm->unigrams[i]>0)         se->prob=nglm->unigrams[i]=log(nglm->unigrams[i]);      else         se->prob=nglm->unigrams[i]=LZERO;   }     lm.name=CopyString(lm.heap,bigFile); /* Name and write to disk */   WriteLModel(&lm,bigFile,0);}/* OutputMatBigram: output matrix style bigram */void OutputMatBigram(void){   LModel lm;   MatBiLM *matbi;   AEntry **aelists,*ae;   Vector vec;   double vsum,fsum,tot,scale;   double ent,bent,prob,fent;   int i,j,nf,tf=0,nu,tu=0,np,tp=0,tn=0;   lm.heap=&statHeap;   lm.type=matBigram;   matbi=CreateMatBigram(&lm,lSize);   for (i=1;i<=lSize;i++)      matbi->wdlist[i]=lTab[i].name;   aelists=(AEntry**)New(&tmpHeap,sizeof(AEntry*)*(lSize+1));   for (i=1;i<=lSize;i++) aelists[i]=NULL;   RebuildAETab(aelists);          /* Un-hash hashtable */   if (trace&T_BIG) {      printf("\n  BIGRAMS from MatBigram\n");      fflush(stdout);   }   bent=0.0;   fent = ent2(bigFloor);   for (i=1;i<=lSize;i++) {      vec=matbi->bigMat[i];      for (ae=aelists[i],tot=0.0; ae!=NULL; ae=ae->link)         if (ae->word[0]!=0) tot += ae->count;      fsum = (lSize-1)*bigFloor; vsum=0.0;      for (ae=aelists[i];ae!=NULL;ae=ae->link)         if (ae->count/tot > bigFloor && ae->word[0]!=0)            fsum -= bigFloor, vsum += ae->count;         else            ae->count=0;      scale = (1.0 - fsum) / vsum;      for (j=1;j<=lSize;j++) {         if (j==(int)enterId->aux) vec[j]=0.0;         else if (tot==0.0) vec[j]=1.0/(lSize-1);         else vec[j]=bigFloor;      }      for (ae=aelists[i];ae!=NULL;ae=ae->link)         if (ae->count>0)            vec[ae->word[0]]=ae->count*scale;      if (trace&T_BIG) {         nf=nu=np=0;         if (tot==0.0)             ent=-log2(1.0/(lSize-1)),prob=1.0,nu=lSize-1;         else            ent=-(lSize-1)*fent,               prob=bigFloor*(lSize-1),               nf+=lSize-1;         for (ae=aelists[i];ae!=NULL;ae=ae->link)            if (ae->count>0) {               prob += vec[ae->word[0]]-bigFloor;               ent -= ent2(vec[ae->word[0]]);               ent += fent;               nf--;  np++;            }         if (i!=(int)exitId->aux){            j=lTab[i].count;            bent+=j*ent;tn+=j;            if (tot==0.0)               printf("   %-20s - %4d unis, ent %6.3f [= %6.2f] (P=%7.5f)\n",                      lTab[i].name->name,nu,ent,pow(2.0,ent),prob);            else               printf("   %-20s - %4d foll, ent %6.3f [= %6.2f] (P=%7.5f)\n",                      lTab[i].name->name,np,ent,pow(2.0,ent),prob);            fflush(stdout);         }         tf+=nf;tu+=nu;tp+=np;      }   }   if (trace&T_BIG) {      bent/=tn;      printf("\n  BIGRAM: training data entropy %.3f (perplexity %.2f)\n",             bent,pow(2.0,bent));      printf("         Estimated %d, floored %d, unigrammed %d for %d\n",             tp,tf,tu,lSize);      fflush(stdout);   }   Dispose(&tmpHeap,aelists);   /* convert probabilities to logs */   for (i=1;i<=matbi->numWords;i++) {      vec = matbi->bigMat[i];      for (j=1; j<=matbi->numWords; j++){         vec[j] = ((vec[j]<MINLARG)?LZERO:log(vec[j]));      }   }   lm.name=CopyString(lm.heap,bigFile); /* Name and write to disk */   WriteLModel(&lm,bigFile,0);}/* OutputStats: print desired stats on standard output */void OutputStats(void){     if (doDurs) OutputDurs();   if (doBigram) {      if (doBOff)         OutputBoBigram();      else         OutputMatBigram();   }   if (doList) OutputList();   if (doPCount || doLCount)       OutputCounts(); /* Breaks log->phy links */}/* ------------------------------------------------------------ *//*                      END:  HLStats.c                         *//* ------------------------------------------------------------ */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -