📄 htk.c
字号:
/*** Copyright (C) 2002-2004 Erik de Castro Lopo <erikd@mega-nerd.com>**** This program is free software; you can redistribute it and/or modify** it under the terms of the GNU Lesser General Public License as published by** the Free Software Foundation; either version 2.1 of the License, or** (at your option) any later version.**** This program is distributed in the hope that it will be useful,** but WITHOUT ANY WARRANTY; without even the implied warranty of** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the** GNU Lesser General Public License for more details.**** You should have received a copy of the GNU Lesser General Public License** along with this program; if not, write to the Free Software** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.*/#include <stdio.h>#include <fcntl.h>#include <string.h>#include <ctype.h>#include "sndfile.h"#include "config.h"#include "sfendian.h"#include "common.h"/*------------------------------------------------------------------------------** Macros to handle big/little endian issues.*/#define SFE_HTK_BAD_FILE_LEN 1666#define SFE_HTK_NOT_WAVEFORM 1667/*------------------------------------------------------------------------------** Private static functions.*/static int htk_close (SF_PRIVATE *psf) ;static int htk_write_header (SF_PRIVATE *psf, int calc_length) ;static int htk_read_header (SF_PRIVATE *psf) ;/*------------------------------------------------------------------------------** Public function.*/inthtk_open (SF_PRIVATE *psf){ int subformat ; int error = 0 ; if (psf->is_pipe) return SFE_HTK_NO_PIPE ; if (psf->mode == SFM_READ || (psf->mode == SFM_RDWR && psf->filelength > 0)) { if ((error = htk_read_header (psf))) return error ; } ; subformat = psf->sf.format & SF_FORMAT_SUBMASK ; if (psf->mode == SFM_WRITE || psf->mode == SFM_RDWR) { if ((psf->sf.format & SF_FORMAT_TYPEMASK) != SF_FORMAT_HTK) return SFE_BAD_OPEN_FORMAT ; psf->endian = SF_ENDIAN_BIG ; if (htk_write_header (psf, SF_FALSE)) return psf->error ; psf->write_header = htk_write_header ; } ; psf->close = htk_close ; psf->blockwidth = psf->bytewidth * psf->sf.channels ; switch (subformat) { case SF_FORMAT_PCM_16 : /* 16-bit linear PCM. */ error = pcm_init (psf) ; break ; default : break ; } ; return error ;} /* htk_open *//*------------------------------------------------------------------------------*/static inthtk_close (SF_PRIVATE *psf){ if (psf->mode == SFM_WRITE || psf->mode == SFM_RDWR) htk_write_header (psf, SF_TRUE) ; return 0 ;} /* htk_close */static inthtk_write_header (SF_PRIVATE *psf, int calc_length){ sf_count_t current ; int sample_count, sample_period ; current = psf_ftell (psf) ; if (calc_length) psf->filelength = psf_get_filelen (psf) ; /* Reset the current header length to zero. */ psf->header [0] = 0 ; psf->headindex = 0 ; psf_fseek (psf, 0, SEEK_SET) ; if (psf->filelength > 12) sample_count = (psf->filelength - 12) / 2 ; else sample_count = 0 ; sample_period = 10000000 / psf->sf.samplerate ; psf_binheader_writef (psf, "E444", sample_count, sample_period, 0x20000) ; /* Header construction complete so write it out. */ psf_fwrite (psf->header, psf->headindex, 1, psf) ; if (psf->error) return psf->error ; psf->dataoffset = psf->headindex ; if (current > 0) psf_fseek (psf, current, SEEK_SET) ; return psf->error ;} /* htk_write_header *//*** Found the following info in a comment block within Bill Schottstaedt's** sndlib library.**** HTK format files consist of a contiguous sequence of samples preceded by a** header. Each sample is a vector of either 2-byte integers or 4-byte floats.** 2-byte integers are used for compressed forms as described below and for** vector quantised data as described later in section 5.11. HTK format data** files can also be used to store speech waveforms as described in section 5.8.**** The HTK file format header is 12 bytes long and contains the following data** nSamples -- number of samples in file (4-byte integer)** sampPeriod -- sample period in 100ns units (4-byte integer)** sampSize -- number of bytes per sample (2-byte integer)** parmKind -- a code indicating the sample kind (2-byte integer)**** The parameter kind consists of a 6 bit code representing the basic** parameter kind plus additional bits for each of the possible qualifiers.** The basic parameter kind codes are**** 0 WAVEFORM sampled waveform** 1 LPC linear prediction filter coefficients** 2 LPREFC linear prediction reflection coefficients** 3 LPCEPSTRA LPC cepstral coefficients** 4 LPDELCEP LPC cepstra plus delta coefficients** 5 IREFC LPC reflection coef in 16 bit integer format** 6 MFCC mel-frequency cepstral coefficients** 7 FBANK log mel-filter bank channel outputs** 8 MELSPEC linear mel-filter bank channel outputs** 9 USER user defined sample kind** 10 DISCRETE vector quantised data**** and the bit-encoding for the qualifiers (in octal) is** _E 000100 has energy** _N 000200 absolute energy suppressed** _D 000400 has delta coefficients** _A 001000 has acceleration coefficients** _C 002000 is compressed** _Z 004000 has zero mean static coef.** _K 010000 has CRC checksum** _O 020000 has 0'th cepstral coef.*/static inthtk_read_header (SF_PRIVATE *psf){ int sample_count, sample_period, marker ; psf_binheader_readf (psf, "pE444", 0, &sample_count, &sample_period, &marker) ; if (2 * sample_count + 12 != psf->filelength) return SFE_HTK_BAD_FILE_LEN ; if (marker != 0x20000) return SFE_HTK_NOT_WAVEFORM ; psf->sf.channels = 1 ; psf->sf.samplerate = 10000000 / sample_period ; psf_log_printf (psf, "HTK Waveform file\n Sample Count : %d\n Sample Period : %d => %d Hz\n", sample_count, sample_period, psf->sf.samplerate) ; psf->sf.format = SF_FORMAT_HTK | SF_FORMAT_PCM_16 ; psf->bytewidth = 2 ; /* HTK always has a 12 byte header. */ psf->dataoffset = 12 ; psf->endian = SF_ENDIAN_BIG ; psf->datalength = psf->filelength - psf->dataoffset ; psf->close = htk_close ; psf->blockwidth = psf->sf.channels * psf->bytewidth ; if (! psf->sf.frames && psf->blockwidth) psf->sf.frames = (psf->filelength - psf->dataoffset) / psf->blockwidth ; return 0 ;} /* htk_read_header *//*** Do not edit or modify anything in this comment block.** The arch-tag line is a file identity tag for the GNU Arch ** revision control system.**** arch-tag: c350e972-082e-4c20-8934-03391a723560*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -