⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lpcalc.c

📁 该压缩包为最新版htk的源代码,htk是现在比较流行的语音处理软件,请有兴趣的朋友下载使用
💻 C
📖 第 1 页 / 共 3 页
字号:
      case DC_ABSOLUTE:	 uMass = ApplyABS(boi,tgtFE,tMass);	 break;      default :         HError(15590,"CalcNGramProbs: Unsupported LM type (%d)",boi->dcType);	 break;      }      /* calculate sum of (n-1)-gram probs for unseen entries */      boSum = 0.0;      GetNGramProbs(lm, feId+1, nSize-1, se_perm, nse);      if (boi->wdThresh>0) {         for (bo_se=se_perm,se=tgtFE->sea,fe=tgtFE->fea,i=j=0; i<tgtFE->nse; i++,bo_se++,se++){            prob=se->prob/tMass;            if (fabs(se->prob*(log(prob)-log(bo_se->prob)))<boi->wdThresh &&                (fe==NULL || j>=tgtFE->nfe || fe->ndx!=se->ndx)) {               uMass+=se->prob,se->prob=0.0; /* oh my goodness - who wrote that?! */               if (j<tgtFE->nfe && fe->ndx==se->ndx) /* PRUNE FE AS WELL */                  fe->nse=0,fe->nfe=0;            }            if (j<tgtFE->nfe && fe->ndx<=se->ndx) fe++,j++;         }      }      for (bo_se=se_perm,se=tgtFE->sea,i=0; i<tgtFE->nse; i++,bo_se++,se++)         if (se->prob > 0.0) boSum += bo_se->prob;      boSum = 1.0 - boSum;   }      nItem = 0;   if (uMass!=tMass) {  /* some real n-grams still left after discounting */      if (ptype == LMP_COUNT) {	 tMass = 1.0;      }      tse = se_perm;      for (se=tgtFE->sea,i=0; i<tgtFE->nse; i++,se++) {	 if (se->prob>0.0) {	    tse->prob = se->prob / tMass;	    tse->ndx  = se->ndx;	    tse++; nItem++;	 }      }      tgtFE->sea = se_perm;      tgtFE->nse = nItem;      tgtFE->ndx = feId[nSize-2];      tgtFE->bowt = (boSum <= 0.0) ? MIN_BOWT : (uMass / (tMass * boSum));      if (!rebuild) {	 tgtFE->fea = NULL;	 tgtFE->nfe = 0;      }   }   else {      tgtFE->sea = se_perm;      tgtFE->nse = nItem;      tgtFE->nfe = 0;   }   return nItem;}#define GRAM2TEXT() { \   for (s = sbuf, *sbuf='\0', j=0; j<nSize; j++) { \      sprintf(s," %s",wmap->id[gramKey[j]]->name); s+=strlen(s); \   } \}/* EXPORT CalculateNGram: calculate nSize-grams from gram files in inSet */static int CalculateNGram(BackOffLM *lm, NGInputSet *inSet, int nSize){   float count;   WordMap *wmap;   char *s, sbuf[256];   int i, j, nse, nfe, nItem;   SMEntry *se, *se_buff;   FLEntry *fe, *fe_buff, *feptr;   UInt *ge, gram[LM_NSIZE];   UInt gramKey[LM_NSIZE];   Boolean newCX1, newCX2;   if ((se = se_buff = lm->se_buff)==NULL)      HError(15590,"CalculateNGram: se_buff not initialised");   if ((fe = fe_buff = lm->fe_buff)==NULL)      HError(15590,"CalculateNGram: fe_buff not initialised");   if ((wmap = inSet->wm)==NULL)      HError(15590,"CalculateNGram: Word map not set");   if (nSize < 1 || nSize > inSet->N)      HError(15590,"CalculateNGram: Invalid nSize (%d)",nSize);   nse = 0; nfe = 0; nItem = 0;   OpenInputSet(inSet);   if (!FilterNGram(inSet,gram,&count,nSize))      HError(15513,"CalculateNGram: Unable to read first n-gram");   memcpy(gramKey,gram,nSize*sizeof(UInt));   do {#ifdef SANITY      for (i=0; i<nSize; i++)	 if (gram[i] < 1 || gram[i] > lm->vocSize)	    HError(15590,"CalculateNGram: LM index out of range (%d)",gram[i]);#endif      for (newCX1=FALSE, ge=gram, i=0; i<nSize-2; i++, ge++)	 if (gramKey[i]!=*ge) { 	    newCX1 = TRUE; break; 	 }      newCX2 = (nSize==1) ? newCX1 : newCX1 || (gramKey[nSize-2]!=gram[nSize-2]);      if (newCX2) {	 fe->nse = nse;	 fe->sea = se_buff;	 if ((nse = CalcNGramProbs(lm,gramKey,nSize,fe,FALSE)) > 0) {	    fe->fea = NULL; fe->nfe = 0;	    nItem += nse; fe++; nfe++;	 }	 if (newCX1) {	    if (nfe>0) {	       for (feptr=&lm->root, i=0; i<nSize-2; i++) {                  FLEntry *feptr2;		  if ((feptr2 = FindFE(feptr->fea,0,feptr->nfe,gramKey[i]))==NULL) {		     GRAM2TEXT();		     HError(15520,"CalculateNGram: Unable to find FLEntry to attach (%s)",sbuf);		  }                  feptr=feptr2;	       }	       if (feptr->nfe > 0 || feptr->fea!=NULL) {		  GRAM2TEXT();		  HError(15525,"CalculateNGram: Attempt to overwrite entries when attaching (%s)",sbuf);	       }	       feptr->fea = fe_buff; feptr->nfe = nfe; StoreFEA(feptr,lm->heap);	    }	    fe = fe_buff; nfe = 0;	    for (ge=gram,i=0; i<nSize-2; i++,ge++) gramKey[i] = *ge;	 }	 gramKey[nSize-2] = gram[nSize-2];	 se = se_buff; nse=0;       }      se->ndx = gram[nSize-1]; se->prob = count;      se++; nse++;#ifdef SANITY            if (nse>lm->vocSize)	 HError(15590,"CalculateNGram: SE buffer limit reached (%d)",nse);#endif   } while(FilterNGram(inSet,gram,&count,nSize));   /* finish off the remaining n-grams accumulated */   if (nSize > 1) {  /* (n>1)-grams */      fe->nse = nse;      fe->sea = se_buff;      if ((nse = CalcNGramProbs(lm,gramKey,nSize,fe,FALSE)) > 0) {	 fe->fea = NULL; fe->nfe = 0;	 nItem += nse; fe++; nfe++;      }      if (nfe > 0) {	 for (feptr=&lm->root, i=0; i<nSize-2; i++) {	    if ((feptr = FindFE(feptr->fea,0,feptr->nfe,gramKey[i]))==NULL) {	       GRAM2TEXT();	       HError(15520,"CalculateNGram: Unable to find FLEntry for (%s)",sbuf);	    }	 }	 if (feptr->nfe > 0 || feptr->fea!=NULL) {	    GRAM2TEXT();	    HError(15525,"CalculateNGram: Attempt to ovewrite entries when attaching (%s)",sbuf);	 }	 feptr->fea = fe_buff; feptr->nfe = nfe; StoreFEA(feptr,lm->heap);      }   } else {  /* unigrams */      lm->root.nse = nse;      lm->root.sea = se_buff;      nItem = CalcUniProbs(lm,&lm->root,FALSE);   }   CloseInputSet(inSet);   return nItem;}#define DEF_ABS_COEF 0.5static double CalcABSCoef(int nSize, FoFTab *ftab) {  UInt **fof;  double coef;  fof = ftab->fof;  if (fof[1][nSize]==0 || fof[2][nSize]==0)      coef = DEF_ABS_COEF;  else     coef = (double) fof[1][nSize] / (double) (fof[1][nSize] + 2.0*fof[2][nSize]);  if (trace&T_TOP)    printf("Absolute discounting term %e\n",coef);  return coef;}#define DEF_TG_COEF 0.99static void CalcTGCoefs(MemHeap *heap, BackOffInfo *boi, int nSize, FoFTab *ftab) {   int r,K;   UInt **fof;   double kTerm,gTerm;   TuringGoodInfo *tgi;   Boolean ok,allPositive;      fof = ftab->fof;    tgi = &boi->dcInfo.tgInfo;    K = tgi->kRange;   tgi->coef = (float *) New(heap,(K+1)*sizeof(float));   for (r=0; r<=K; r++) tgi->coef[r] = 0.0;      /* check for singularities */   for (ok = (fof[1][nSize]>0),r=1; ok && r<K; r++)       ok = ok && (fof[r][nSize]>0);   if (ok) {      do {         if (K <= 1) {               HError(-15560, "CalcTGCoefs: Invalid K=%d - setting default K and coefficients", K);               K = tgi->kRange;               for (r=1; r <=K; r++)                  tgi->coef[r] = (r <= boi->cutOff) ? 0.0 : DEF_TG_COEF;               return;         }         kTerm = (double) ((K+1) * fof[K+1][nSize]) / (double) fof[1][nSize];         /*         if (kTerm>DEF_TG_COEF) {            kTerm = DEF_TG_COEF;            if (trace&T_TOP)               printf("CalcTGCoefs: clamping kTerm to %f\n", DEF_TG_COEF);         }*/         /* Further check that kTerm > (r+1).c[r+1]/c[r] for 1<=r<k */         allPositive = TRUE;         for (r=(boi->cutOff?boi->cutOff:1); r<K; r++) {            gTerm = (double) ((r+1) * fof[r+1][nSize])/(double) (r*fof[r][nSize]);            if (((kTerm<=1.0) && (kTerm>=gTerm)) || ((kTerm>1.0) && (kTerm<gTerm)))               allPositive = FALSE;            printf("g[%d]=%f\n", r, gTerm);         }         if (allPositive)            break;         K--;         if (trace&T_TOP)            printf("CalcTGCoefs: lowering K to %d\n", K);      } while(TRUE);      for (r=1; r<=K; r++) {         gTerm = (double) ((r+1) * fof[r+1][nSize])/(double) (r*fof[r][nSize]);         if (r <= boi->cutOff) {            tgi->coef[r] = 0.0;         }         else {            tgi->coef[r] = ((gTerm - kTerm) / (1.0 - kTerm));            if (tgi->coef[r] < 1E-03) {                HError(-15560, "CalcTGCoefs: Invalid coefficient detected in Turing-Good discounting (%f) - clamped to 1E-03 [gTerm=%f, kTerm=%f, r=%d, cutoff=%d]", tgi->coef[r], gTerm, kTerm, r, boi->cutOff);               tgi->coef[r] = 1E-03;            }         }      }      if (trace&T_TOP)	 printf("%d-gram coefs:\n",nSize);   } else {      for (r=1; r <=K; r++)	 tgi->coef[r] = (r <= boi->cutOff) ? 0.0 : DEF_TG_COEF;   }   for (r=1; r<=K; r++) {      if (trace&T_TOP) printf("coef[%d]=%e",r,tgi->coef[r]);      if (tgi->coef[r]>1.0) {	 tgi->coef[r] = DEF_TG_COEF; 	 if (trace&T_TOP) printf(", clamped to %.4f",tgi->coef[r]);      }      if (trace&T_TOP) printf("\n");   }   tgi->kRange = K;}/* EXPORT->CalcDiscountCoefs: calculate discount coefs from fof table */static void CalcDiscountCoefs(BackOffLM *lm, FoFTab *ftab){   int ns;   BackOffInfo *boi;   for (ns=2; ns<=lm->nSize; ns++) {      if ((boi = lm->gInfo[ns].boInfo)==NULL)	 HError(15590,"CalcDiscountCoefs: Back-off info not available for %d-gram",ns);      switch (boi->dcType) {      case DC_KATZ:	CalcTGCoefs(lm->heap,boi,ns,ftab);	break;      case DC_ABSOLUTE:	boi->dcInfo.bCoef = CalcABSCoef(ns,ftab);	break;      default:	HError(15590,"CalcDiscountCoefs: Unsupported LM type (%d)",boi->dcType);      }   }}/* CheckCutoffs: check n-gram cutoffs and discounting range */static void CheckCutoffs(BackOffLM *lm){   BackOffInfo *boi;   int ns,kRange,lastCutOff;      lastCutOff=0;   for (ns=2; ns<=lm->nSize; ns++) {      if ((boi = lm->gInfo[ns].boInfo)==NULL)	 HError(15590,"CheckCutoffs: Back-off info not available for %d-gram",ns);      if (boi->cutOff < lastCutOff) {	 HError(15540,"CheckCutoffs: %d-gram cutoff = %d, %d-gram cutoff = %d",		ns,boi->cutOff,ns-1,lastCutOff);      }      if (boi->dcType!=DC_KATZ && boi->dcType!=DC_ABSOLUTE)	 HError(15590,"CheckCutoffs: Unsupported LM type (%d)",boi->dcType);      if (boi->dcType==DC_KATZ) {	 kRange = boi->dcInfo.tgInfo.kRange;	 if (boi->cutOff > kRange)	    HError(-15540,"CheckCutoffs: %d-gram cutoff out of range (%d)",ns,boi->cutOff);      }      lastCutOff = boi->cutOff;   }}/* InitTargetModel: initialise target LM structure */static BackOffLM *InitTargetModel(MemHeap *heap, BuildInfo *bi){   int i,ndx,N;   NameId nId;   BackOffLM *lm;   BackOffInfo *boi;   if (bi->nSize<1)      HError(15590,"GenerateLM: Invalid n-gram size (%d)",bi->nSize);   if (bi->ptype!=LMP_FLOAT && bi->ptype!=LMP_COUNT)      HError(15590,"GenerateLM: Invalid probability kind (%d)",bi->ptype);   lm = (BackOffLM *) New(heap,sizeof(BackOffLM));   lm->heap = heap;   lm->gScale = 1.0;   lm->nSize = bi->nSize;   lm->probType = bi->ptype;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -