lpcalc.c

来自「该压缩包为最新版htk的源代码,htk是现在比较流行的语音处理软件,请有兴趣的朋」· C语言代码 · 共 992 行 · 第 1/3 页
992 行
/* ----------------------------------------------------------- *//*                                                             *//*                          ___                                *//*                       |_| | |_/   SPEECH                    *//*                       | | | | \   RECOGNITION               *//*                       =========   SOFTWARE                  */ /*                                                             *//*                                                             *//* ----------------------------------------------------------- *//* developed at:                                               *//*                                                             *//*      Speech Vision and Robotics group                       *//*      Cambridge University Engineering Department            *//*      http://svr-www.eng.cam.ac.uk/                          *//*                                                             *//* main authors: Valtcho Valtchev, Steve Young,                *//*               Julian Odell, Gareth Moore                    *//* ----------------------------------------------------------- *//*         Copyright:                                          *//*                                                             *//*          1994-2002 Cambridge University                     *//*                    Engineering Department                   *//*                                                             *//*   Use of this software is governed by a License Agreement   *//*    ** See the file License for the Conditions of Use  **    *//*    **     This banner notice must not be removed      **    *//*                                                             *//* ----------------------------------------------------------- *//*      File: LPCalc: probability calculation                  *//* ----------------------------------------------------------- */char *lpcalc_version = "!HVER!LPCalc:   3.3 [CUED 28/04/05]";char *lpcalc_vc_id = "$Id: LPCalc.c,v 1.1.1.1 2005/05/12 10:52:18 jal58 Exp $";#include "HShell.h"     /* HMM ToolKit Modules */#include "HMem.h"#include "HMath.h"#include "HWave.h"#include "HLabel.h"#include "LWMap.h"      #include "LGBase.h"     /* LM ToolKit Modules */#include "LUtil.h"#include "LModel.h"#include "LPCalc.h"#define T_TOP    0001       /* top level tracing */#define T_FOF    0002       /* FoF table tracing *//* -------------------------- Trace Flags ------------------------ */static int trace = 0;/* ---------------- Configuration Parameters --------------------- */static ConfParam *cParm[MAXGLOBS];static int nParm = 0;            /* total num params *//* ---------------------- Global Variables ----------------------- */static LabId  sstId;                          /* sentence start marker */static char   sstStr[256] = DEF_STARTWORD;    /* sentence start marker */static float  uniFloor = 1.0;                 /* unigram floor *//* SetConfParms: set conf parms relevant to this tool */void InitPCalc(void){   int i;   char s[256];   Register(lpcalc_version,lpcalc_vc_id);   nParm = GetConfig("LPCALC", TRUE, cParm, MAXGLOBS);   if (nParm>0){#ifndef HTK_TRANSCRIBER      if (GetConfInt(cParm,nParm, "TRACE",&i))    trace = i;#endif      if (GetConfStr(cParm,nParm, "STARTWORD",s)) strcpy(sstStr,s);   }   sstId = GetLabId(sstStr,TRUE);}/* EXPORT->InitBuildInfo: initialise build parameters */void InitBuildInfo(BuildInfo *bi){   int i, j;    /* Temporary values */   char s[256]; /* Temporary string */   bi->nSize = 0;   bi->ftab  = NULL;   bi->saveFmt  = DEF_SAVEFMT;   bi->ptype    = DEF_LMPTYPE;   bi->uniFloor = DEF_UNIFLOOR;   bi->kRange   = DEF_KRANGE;   bi->dctype   = DEF_DCTYPE;   for (i=1; i<=LM_NSIZE; i++)       bi->cutOff[i] = DEF_CUTOFF;   bi->wmap = NULL;   bi->inSet = NULL;   if (GetConfInt(cParm,nParm, "UNIFLOOR",&i)) bi->uniFloor = i;   if (GetConfInt(cParm,nParm, "KRANGE",&i))   bi->kRange = i;   if (GetConfStr(cParm,nParm, "DCTYPE",s)) {     if (!strcmp(s,"TG"))        bi->dctype = DC_KATZ;     else if (!strcmp(s,"ABS"))        bi->dctype = DC_ABSOLUTE;     else if (!strcmp(s,"LIN"))        bi->dctype = DC_LINEAR;   }   /* See if any config file settings for n-gram cut-offs */   for (i=2; i<=LM_NSIZE; i++) {      sprintf(s,"%dG_CUTOFF",i);      if (GetConfInt(cParm,nParm,s,&j)) bi->cutOff[i] = j;   }}#define LMNDX(wm,i) wm->me[i].sort+1/* EXPORT->FilterNGram: read n-grams and map them to LM IDs */Boolean FilterNGram(NGInputSet *inSet, UInt *gram, float *count, int nSize){   int i;   UInt gbuf[LM_NSIZE];   if (!GetNextNGram(inSet,gbuf,count,nSize))      return FALSE;#ifdef SANITY      for (i=0; i<nSize; i++) {      if (GetMEIndex(inSet->wm,gbuf[i]) < 0) {	 HError(15590,"FilterNGram: Read n-gram contains out of map words");      }   }#endif   for (i=0; i<nSize; i++) {      gram[i] = LMNDX(inSet->wm,GetMEIndex(inSet->wm,gbuf[i]));   }   return TRUE;}/* EXPORT->CalcUniProbs: calculate unigram */static int CalcUniProbs(BackOffLM *lm, FLEntry *tgtFE, Boolean rebuild){   NameId nid;   double tMass;   SMEntry *se,*unigram;   int i, numFloored;   if (rebuild) {      memcpy(lm->se_buff,tgtFE->sea,tgtFE->nse*sizeof(SMEntry));      unigram = tgtFE->sea; tgtFE->sea = lm->se_buff;   } else {      unigram = (SMEntry *) New(lm->heap,lm->vocSize*sizeof(SMEntry));   }   for (se=unigram, i=0; i<lm->vocSize; i++,se++) {    /* initialise array */      se->prob = 0.0; se->ndx=i+1;   }   for (se=tgtFE->sea,i=0; i<tgtFE->nse; i++,se++) {   /* copy all entries across */      unigram[se->ndx-1].prob = se->prob;   }   if (tgtFE->nse!=lm->vocSize) {      printf("%d distinct unigrams found in data, %d in word list\n",tgtFE->nse,lm->vocSize);      fflush(stdout);   }   tMass = 0.0;   numFloored = 0;   for (se=unigram, i=0; i<lm->vocSize; i++,se++) {       if (se->prob < uniFloor) {	 se->prob = uniFloor;	 numFloored++;      }      tMass += se->prob;   }   if (numFloored>0) {      printf("%d unigrams floored to %.1f\n",numFloored,uniFloor);	       fflush(stdout);   }   if (lm->probType!=LMP_COUNT) {      /* clamp sentence start symbol prob */      if ((nid = GetNameId(lm->htab,sstStr,FALSE))!=NULL) {	 if ((se = FindSE(unigram,0,lm->vocSize,LM_INDEX(nid)))!=NULL) {	    tMass = tMass - se->prob; 	    se->prob = 0.0;	 }      }      for (se=unigram, i=0; i<lm->vocSize; i++, se++) {	 se->prob = se->prob/tMass;      }   }   tgtFE->sea = unigram;   tgtFE->nse = lm->vocSize;   tgtFE->ndx = 0;   tgtFE->bowt = 0.0;   if (!rebuild) {          /* initialise root FE if building from scratch */      tgtFE->fea = NULL;             tgtFE->nfe = 0;   }   return lm->vocSize;}   static double ApplyTG(BackOffInfo *boi, FLEntry *tgtFE, double tMass, int nSize){   int i,k,r;   SMEntry *se;   double uMass;   TuringGoodInfo *tgi;      tgi = &boi->dcInfo.tgInfo;      /* apply TG discounting */   for (se=tgtFE->sea,i=0; i<tgtFE->nse; i++,se++) {      if ((r = (int) se->prob) <= tgi->kRange) {         se->prob = tgi->coef[r] * se->prob;      }   }      /* accumulate unseen probability mass */   uMass = 0.0;   for (se=tgtFE->sea,i=0; i<tgtFE->nse; i++,se++)     uMass += se->prob;   uMass = tMass - uMass;       if (uMass==0.0) {  /* unable to accumulate unseen count, try alternative */     k = boi->cutOff+1;     for (se=tgtFE->sea,i=0; i<tgtFE->nse; i++,se++) {       uMass += (1.0 - tgi->coef[k]) * se->prob;             se->prob *= tgi->coef[k];       if ((k++)==tgi->kRange) break;     }   }   return uMass;}static double ApplyABS(BackOffInfo *boi, FLEntry *tgtFE, double tMass) {  int i;  SMEntry *se;  double b,uMass;    /* apply Absolute discounting */  b = boi->dcInfo.bCoef;  for (se=tgtFE->sea,i=0; i<tgtFE->nse; i++,se++) {    se->prob = se->prob - b;    if (se->prob < 0.0) se->prob = 0.0;  }    /* accumulate unseen probability mass */  uMass = 0.0;  for (se=tgtFE->sea,i=0; i<tgtFE->nse; i++,se++)    uMass += se->prob;  uMass = tMass - uMass;   return uMass;}/*    EXPORT->CalcNGramProbs: calculate and write n-gram entries   lm     - target language model (1..nSize-1)-grams should be in place   feId   - array[0..nSize-2] of LM IDs representing context   nSize  - n-gram to calculate   tgtFE  - target FLEntry   rebuld - TRUE if converting LMP_COUNT -> LMP_FLOAT */static int CalcNGramProbs(BackOffLM *lm, UInt *feId, int nSize, FLEntry *tgtFE, Boolean rebuild){   int i, j, r;   int nse, nItem;   double uMass=0, tMass, boSum, prob;   LMProbType ptype;   BackOffInfo *boi;   SMEntry *se,*bo_se,*se_perm,*tse;   FLEntry *fe;   /* se_perm -> permanent SE storage, tgtFE->sea -> lm->se_buff */   if (nSize==1) {      return CalcUniProbs(lm,tgtFE,rebuild);   }         if ((ptype = lm->probType)==LMP_LOG)      HError(15590,"CalcNGramProbs: Incompatible prob kind (%d)",ptype);   if ((boi = lm->gInfo[nSize].boInfo)==NULL)      HError(15590,"CalcNGramProbs: Back-off info not present for %d grams",nSize);   if (boi->dcType!=DC_KATZ && boi->dcType!=DC_ABSOLUTE)      HError(15590,"CalcNGramProbs: Unsupported LM type (%d)",boi->dcType);   if (rebuild) {             /* rebuilding model - no need to allocate storage */      se_perm = lm->se_buff;      memcpy(lm->se_buff,tgtFE->sea,tgtFE->nse*sizeof(SMEntry));      se_perm = tgtFE->sea; tgtFE->sea = lm->se_buff;  /* swap them round */      tMass = tgtFE->bowt;   } else {      se_perm = NULL;      tMass = 0.0;   }   /* first, accumulate total count and apply cutoff */   nse = 0;   for (se=tgtFE->sea,i=0; i<tgtFE->nse; i++,se++) {       tMass += se->prob;      if ((r = (int) se->prob) <= boi->cutOff)	 se->prob = 0.0;      if (se->prob > 0.0) nse++;   }   if (se_perm==NULL) /* allocate permanent SE storage */      se_perm = (SMEntry *) New(lm->heap,nse*sizeof(SMEntry));   /* copy entries with non-zero probabilities to se_perm */   for (tse=se_perm,se=tgtFE->sea,i=0; i<tgtFE->nse; i++,se++)      if (se->prob>0.0) *tse++=*se;   /* then copy back to tgtFE->sea */   memcpy(tgtFE->sea,se_perm,nse*sizeof(SMEntry));   tgtFE->nse = nse;   qsort(tgtFE->sea,tgtFE->nse,sizeof(SMEntry),CmpSE);     if (ptype==LMP_COUNT) {  /* building COUNT model */      /* accumulate unseen probability mass */      uMass = 0.0;      for (se=tgtFE->sea,i=0; i<tgtFE->nse; i++,se++)	 uMass += se->prob;      uMass = tMass - uMass;       boSum = 1.0;         } else {    /* building probabilistic model */        switch(boi->dcType) {      case DC_KATZ:	 uMass = ApplyTG(boi,tgtFE,tMass,nSize);	 break;
lpcalc.c - 源码说明

本页面展示了「该压缩包为最新版htk的源代码,htk是现在比较流行的语音处理软件,请有兴趣的朋友下载使用」中的 lpcalc.c 源码文件，采用 C语言编程语言编写，共 992 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与htk相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?