📄 hparm.h
字号:
/* ----------------------------------------------------------- *//* *//* ___ *//* |_| | |_/ SPEECH *//* | | | | \ RECOGNITION *//* ========= SOFTWARE */ /* *//* *//* ----------------------------------------------------------- *//* developed at: *//* *//* Speech Vision and Robotics group *//* Cambridge University Engineering Department *//* http://svr-www.eng.cam.ac.uk/ *//* *//* Entropic Cambridge Research Laboratory *//* (now part of Microsoft) *//* *//* ----------------------------------------------------------- *//* Copyright: Microsoft Corporation *//* 1995-2000 Redmond, Washington USA *//* http://www.microsoft.com *//* *//* 2001 Cambridge University *//* Engineering Department *//* *//* Use of this software is governed by a License Agreement *//* ** See the file License for the Conditions of Use ** *//* ** This banner notice must not be removed ** *//* *//* ----------------------------------------------------------- *//* File: HParm.h: Speech Parameter Input/Output *//* ----------------------------------------------------------- *//* !HVER!HParm: 3.3 [CUED 28/04/05] */#ifndef _HPARM_H_#define _HPARM_H_#ifdef __cplusplusextern "C" {#endifenum _BaseParmKind{ WAVEFORM, /* Raw speech waveform (handled by HWave) */ LPC,LPREFC,LPCEPSTRA,LPDELCEP, /* LP-based Coefficients */ IREFC, /* Ref Coef in 16 bit form */ MFCC, /* Mel-Freq Cepstra */ FBANK, /* Log Filter Bank */ MELSPEC, /* Mel-Freq Spectrum (Linear) */ USER, /* Arbitrary user specified data */ DISCRETE, /* Discrete VQ symbols (shorts) */ PLP, /* Standard PLP coefficients */ ANON}; typedef short ParmKind; /* BaseParmKind + Qualifiers */ #define HASENERGY 0100 /* _E log energy included */#define HASNULLE 0200 /* _N absolute energy suppressed */#define HASDELTA 0400 /* _D delta coef appended */#define HASACCS 01000 /* _A acceleration coefs appended */#define HASCOMPX 02000 /* _C is compressed */#define HASZEROM 04000 /* _Z zero meaned */#define HASCRCC 010000 /* _K has CRC check */#define HASZEROC 020000 /* _0 0'th Cepstra included */#define HASVQ 040000 /* _V has VQ index attached */#define HASTHIRD 0100000 /* _T has Delta-Delta-Delta index attached */#define BASEMASK 077 /* Mask to remove qualifiers *//* An observation contains one or more stream values each of which is either a vector of continuous values and/or a single discrete symbol. The discrete vq symbol is included if the target kind is DISCRETE or the continuous parameter has the HASVQ qualifier. Observations are input via buffers or tables. A buffer is a FIFO structure of potentially infinite length and it is always sourced via HAudio. A table is a random access array of observations and it is sourced from a file possibly via HWave. Buffers are input only, a table can be input and output. Too allow discrete systems to be used directly from continuous data the observation also holds a separate parm kind for the parm buffer and routines which supply observations use this to determine stream widths when the observation kind is DISCRETE.*/typedef enum { FALSE_dup=FALSE, /* 0 */ TRUE_dup=TRUE, /* 1 */ TRI_UNDEF=-1 /* -1 */}TriState;typedef struct { Boolean eSep; /* Energy is in separate stream */ short swidth[SMAX]; /* [0]=num streams,[i]=width of stream i */ ParmKind bk; /* parm kind of the parm buffer */ ParmKind pk; /* parm kind of this obs (bk or DISCRETE) */ short vq[SMAX]; /* array[1..swidth[0]] of VQ index */ Vector fv[SMAX]; /* array[1..swidth[0]] of Vector */} Observation;/* A ParmBuf holds either a static table of parameter frames loaded from a file or a potentially infinite sequence of frames from an audio source. The key information relating to the speech data in a buffer or table can be obtained via a BufferInfo Record. A static table behaves like a stopped buffer.*/typedef enum { PB_INIT, /* Buffer is initialised and empty */ PB_WAITING, /* Buffer is waiting for speech */ PB_STOPPING, /* Buffer is waiting for silence */ PB_FILLING, /* Buffer is filling */ PB_STOPPED, /* Buffer has stopped but not yet empty */ PB_CLEARED /* Buffer has been emptied */} PBStatus;typedef struct _ParmBuf *ParmBuf;typedef struct { ParmKind srcPK; /* Source ParmKind */ FileFormat srcFF; /* Source File format */ HTime srcSampRate; /* Source Sample Rate */ int srcVecSize; /* Size of source vector */ long nSamples; /* Number of source samples */ int frSize; /* Number of source samples in each frame */ int frRate; /* Number of source samples forward each frame */ int nObs; /* Number of table observations */ ParmKind tgtPK; /* Target ParmKind */ FileFormat tgtFF; /* Target File format */ HTime tgtSampRate; /* Target Sample Rate */ int tgtVecSize; /* Size of target vector */ AudioIn a; /* the audio source - if any */ Wave w; /* the wave input - if any */ Ptr i; /* the other input - if any */ Boolean useSilDet; /* Use Silence Detector */ int audSignal; /* Signal Number for Audio Control */ char *vqTabFN; /* Name of VQ Table Defn File */ Boolean saveCompressed; /* Save in compressed format */ Boolean saveWithCRC; /* Save with CRC check added */ Boolean spDetParmsSet; /* Parameters set for sp/sil detector */ float spDetSil; /* Silence level for channel */ float chPeak; /* Peak-to-peak input level for channel */ float spDetSp; /* Speech level for channel */ float spDetSNR; /* Speech/noise ratio for channel */ float spDetThresh; /* Silence/speech level threshold */ float curVol; /* Volume level of last frame (0.0-100.0dB) */ int spDetSt; /* Frame number of first frame of buffer */ int spDetEn; /* Frame number of last frame of buffer */ char *matTranFN; /* Matrix transformation name */}BufferInfo;/* External source definition structure*/typedef struct hparmsrcdef *HParmSrcDef;/* -------------------- Initialisation ------------------ */ReturnStatus InitParm(void);/* Initialise the module*//* -------------------- Channel functions ------------------ */ReturnStatus SetChannel(char *chanName);/* Set the current channel to use config parameters from chanName.*/void ResetChannelSession(char *chanName);/* Reset the session for the specified channel (NULL indicates default)*//* The next two functions have been kept to allow for backwards compatibility.*/void SetNewConfig(char * libmod);void ResetCurCepMean(void);/* ---------------- Buffer Input Routines ------------------ */ParmBuf OpenBuffer(MemHeap *x, char *fn, int maxObs, FileFormat ff, TriState enSpeechDet, TriState silMeasure);/* Open and return a ParmBuf object connected to the current channel. If maxObs==0 blocks and reads whole of file/audio into memory and returns with status==PB_STOPPED ready for table access. All parameters, associated with the loading and conversion of the source are defined using configuration parameters. If maxObs!=0 buffer may be read as a stream. In this case reading should be via ReadAsBuffer calls which should continue until either ReadAsBuffer returns FALSE or buffer status >= PB_STOPPED. Note that for some types of input (eg pipes) end of data can only be determined by a failed attempt to read the final frame. If the speech detector is enabled (either by configuration or by explicit parameter in call) then silence measurement can be forced/prevented by setting silMeasure to TRUE/FALSE (if UNDEF will perform measurement if it is needed by config).
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -