⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hparm.h

📁 该压缩包为最新版htk的源代码,htk是现在比较流行的语音处理软件,请有兴趣的朋友下载使用
💻 H
📖 第 1 页 / 共 2 页
字号:
/* ----------------------------------------------------------- *//*                                                             *//*                          ___                                *//*                       |_| | |_/   SPEECH                    *//*                       | | | | \   RECOGNITION               *//*                       =========   SOFTWARE                  */ /*                                                             *//*                                                             *//* ----------------------------------------------------------- *//* developed at:                                               *//*                                                             *//*      Speech Vision and Robotics group                       *//*      Cambridge University Engineering Department            *//*      http://svr-www.eng.cam.ac.uk/                          *//*                                                             *//*      Entropic Cambridge Research Laboratory                 *//*      (now part of Microsoft)                                *//*                                                             *//* ----------------------------------------------------------- *//*         Copyright: Microsoft Corporation                    *//*          1995-2000 Redmond, Washington USA                  *//*                    http://www.microsoft.com                 *//*                                                             *//*              2001  Cambridge University                     *//*                    Engineering Department                   *//*                                                             *//*   Use of this software is governed by a License Agreement   *//*    ** See the file License for the Conditions of Use  **    *//*    **     This banner notice must not be removed      **    *//*                                                             *//* ----------------------------------------------------------- *//*         File: HParm.h:   Speech Parameter Input/Output      *//* ----------------------------------------------------------- *//* !HVER!HParm:   3.3 [CUED 28/04/05] */#ifndef _HPARM_H_#define _HPARM_H_#ifdef __cplusplusextern "C" {#endifenum _BaseParmKind{      WAVEFORM,            /* Raw speech waveform (handled by HWave) */      LPC,LPREFC,LPCEPSTRA,LPDELCEP,   /* LP-based Coefficients */      IREFC,                           /* Ref Coef in 16 bit form */      MFCC,                            /* Mel-Freq Cepstra */      FBANK,                           /* Log Filter Bank */      MELSPEC,                         /* Mel-Freq Spectrum (Linear) */      USER,                            /* Arbitrary user specified data */      DISCRETE,                        /* Discrete VQ symbols (shorts) */      PLP,                             /* Standard PLP coefficients */      ANON};      typedef short ParmKind;          /* BaseParmKind + Qualifiers */                                 #define HASENERGY  0100       /* _E log energy included */#define HASNULLE   0200       /* _N absolute energy suppressed */#define HASDELTA   0400       /* _D delta coef appended */#define HASACCS   01000       /* _A acceleration coefs appended */#define HASCOMPX  02000       /* _C is compressed */#define HASZEROM  04000       /* _Z zero meaned */#define HASCRCC  010000       /* _K has CRC check */#define HASZEROC 020000       /* _0 0'th Cepstra included */#define HASVQ    040000       /* _V has VQ index attached */#define HASTHIRD 0100000       /* _T has Delta-Delta-Delta index attached */#define BASEMASK  077         /* Mask to remove qualifiers *//*   An observation contains one or more stream values each of which    is either a vector of continuous values and/or a single   discrete symbol.  The discrete vq symbol is included if the   target kind is DISCRETE or the continuous parameter has the   HASVQ qualifier. Observations are input via buffers or tables.  A   buffer is a FIFO structure of potentially infinite length and it is   always sourced via HAudio.  A table is a random access array of   observations and it is sourced from a file possibly via HWave.   Buffers are input only, a table can be input and output.   Too allow discrete systems to be used directly from continuous   data the observation also holds a separate parm kind for the   parm buffer and routines which supply observations use this to   determine stream widths when the observation kind is DISCRETE.*/typedef enum {   FALSE_dup=FALSE, /*  0 */   TRUE_dup=TRUE,   /*  1 */   TRI_UNDEF=-1     /* -1 */}TriState;typedef struct {   Boolean eSep;         /* Energy is in separate stream */   short swidth[SMAX];   /* [0]=num streams,[i]=width of stream i */   ParmKind bk;          /* parm kind of the parm buffer */   ParmKind pk;          /* parm kind of this obs (bk or DISCRETE) */   short vq[SMAX];       /* array[1..swidth[0]] of VQ index */   Vector fv[SMAX];      /* array[1..swidth[0]] of Vector */} Observation;/*   A ParmBuf holds either a static table of parameter frames   loaded from a file or a potentially infinite sequence   of frames from an audio source. The key information relating    to the speech data in a buffer or table can be obtained via    a BufferInfo Record.  A static table behaves like a stopped   buffer.*/typedef enum {    PB_INIT,     /* Buffer is initialised and empty */   PB_WAITING,  /* Buffer is waiting for speech */   PB_STOPPING, /* Buffer is waiting for silence */   PB_FILLING,  /* Buffer is filling */   PB_STOPPED,  /* Buffer has stopped but not yet empty */   PB_CLEARED   /* Buffer has been emptied */} PBStatus;typedef struct _ParmBuf  *ParmBuf;typedef struct {   ParmKind srcPK;            /* Source ParmKind */    FileFormat srcFF;          /* Source File format */    HTime srcSampRate;         /* Source Sample Rate */    int srcVecSize;            /* Size of source vector */   long nSamples;             /* Number of source samples */   int frSize;                /* Number of source samples in each frame */   int frRate;                /* Number of source samples forward each frame */   int nObs;                  /* Number of table observations */   ParmKind tgtPK;            /* Target ParmKind */    FileFormat tgtFF;          /* Target File format */    HTime tgtSampRate;         /* Target Sample Rate */    int tgtVecSize;            /* Size of target vector */   AudioIn a;                 /* the audio source - if any */   Wave w;                    /* the wave input - if any */   Ptr i;                     /* the other input - if any */   Boolean useSilDet;         /* Use Silence Detector */   int audSignal;             /* Signal Number for Audio Control */   char *vqTabFN;             /* Name of VQ Table Defn File */   Boolean saveCompressed;    /* Save in compressed format */   Boolean saveWithCRC;       /* Save with CRC check added */   Boolean spDetParmsSet;     /* Parameters set for sp/sil detector */   float spDetSil;            /* Silence level for channel */   float chPeak;              /* Peak-to-peak input level for channel */   float spDetSp;             /* Speech level for channel */   float spDetSNR;            /* Speech/noise ratio for channel */   float spDetThresh;         /* Silence/speech level threshold */   float curVol;              /* Volume level of last frame (0.0-100.0dB) */   int spDetSt;               /* Frame number of first frame of buffer */   int spDetEn;               /* Frame number of last frame of buffer */   char *matTranFN;           /* Matrix transformation name */}BufferInfo;/*   External source definition structure*/typedef struct hparmsrcdef *HParmSrcDef;/* -------------------- Initialisation ------------------ */ReturnStatus InitParm(void);/*   Initialise the module*//* -------------------- Channel functions ------------------ */ReturnStatus SetChannel(char *chanName);/*    Set the current channel to use config parameters from chanName.*/void ResetChannelSession(char *chanName);/*    Reset the session for the specified channel (NULL indicates default)*//*    The next two functions have been kept to allow for backwards    compatibility.*/void SetNewConfig(char * libmod);void ResetCurCepMean(void);/* ---------------- Buffer Input Routines ------------------ */ParmBuf OpenBuffer(MemHeap *x, char *fn, int maxObs, FileFormat ff, 		   TriState enSpeechDet, TriState silMeasure);/*   Open and return a ParmBuf object connected to the current channel.   If maxObs==0 blocks and reads whole of file/audio into memory and   returns with status==PB_STOPPED ready for table access.  All    parameters, associated with the loading and conversion of the   source are defined using configuration parameters.   If maxObs!=0 buffer may be read as a stream.  In this case reading   should be via ReadAsBuffer calls which should continue until either   ReadAsBuffer returns FALSE or buffer status >= PB_STOPPED.  Note    that for some types of input (eg pipes) end of data can only be    determined by a failed attempt to read the final frame.   If the speech detector is enabled (either by configuration or   by explicit parameter in call) then silence measurement can be   forced/prevented by setting silMeasure to TRUE/FALSE (if UNDEF   will perform measurement if it is needed by config).

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -