📄 cont_fileseg.c

📁 WinCE平台上的语音识别程序
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- *//* ==================================================================== * Copyright (c) 1999-2001 Carnegie Mellon University.  All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright *    notice, this list of conditions and the following disclaimer.  * * 2. Redistributions in binary form must reproduce the above copyright *    notice, this list of conditions and the following disclaimer in *    the documentation and/or other materials provided with the *    distribution. * * This work was supported in part by funding from the Defense Advanced  * Research Projects Agency and the National Science Foundation of the  * United States of America, and the CMU Sphinx Speech Consortium. * * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== * *//* * cont_fileseg.c -- Read input file, filter silence regions, and segment into utterances. *  * HISTORY *  * $Log: cont_fileseg.c,v $ * Revision 1.1.1.1  2006/05/23 18:45:02  dhuggins * re-importation * * Revision 1.13  2005/06/30 00:28:46  rkm * Kept within-utterance silences in rawmode * *  * 28-Jun-2005	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * 		Modified to use new state variables in cont_ad_t. *  * Revision 1.12  2005/05/31 15:54:38  rkm * *** empty log message *** * * Revision 1.11  2005/05/24 20:56:58  rkm * Added min/max-noise parameters to cont_fileseg * * Revision 1.10  2005/05/13 23:28:43  egouvea * Changed null device to system dependent one: NUL for windows, /dev/null for everything else *  * $Log: cont_fileseg.c,v $ * Revision 1.1.1.1  2006/05/23 18:45:02  dhuggins * re-importation * * Revision 1.13  2005/06/30 00:28:46  rkm * Kept within-utterance silences in rawmode * * Revision 1.12  2005/05/31 15:54:38  rkm * *** empty log message *** * * Revision 1.11  2005/05/24 20:56:58  rkm * Added min/max-noise parameters to cont_fileseg * * Revision 1.9  2005/02/13 01:29:48  rkm * Fixed cont_ad_read to never cross sil/speech boundary, and rawmode * * Revision 1.8  2005/02/01 22:21:13  rkm * Added raw data logging, and raw data pass-through mode to cont_ad * * Revision 1.7  2004/07/16 00:57:11  egouvea * Added Ravi's implementation of FSG support. * * Revision 1.3  2004/06/25 14:58:05  rkm * *** empty log message *** * * Revision 1.2  2004/06/23 20:32:08  rkm * Exposed several cont_ad config parameters * *  * 27-Jun-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * 		Created. */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <assert.h>#include <math.h>#include "s2types.h"#include "ad.h"#include "cont_ad.h"#include "err.h"static FILE *infp;              /* File being segmented */static int32 swap;/* Max size read by file_ad_read function on each invocation, for debugging */static int32 max_ad_read_size;#if defined(WIN32) && !defined(GNUWINCE)#define NULL_DEVICE "NUL"#else#define NULL_DEVICE "/dev/null"#endif/* * Need to provide cont_ad_init with a read function to read the input file. * This is it.  The ad_rec_t *r argument is ignored since there is no A/D * device involved. */static int32file_ad_read(ad_rec_t * r, int16 * buf, int32 max){    int32 i, k;    if (max > max_ad_read_size)        max = max_ad_read_size;    k = fread(buf, sizeof(int16), max, infp);    if (swap) {        for (i = 0; i < k; i++) {            buf[i] = ((buf[i] >> 8) & 0x00ff) | ((buf[i] << 8) & 0xff00);        }    }    return ((k > 0) ? k : -1);}static voidusagemsg(char *pgm){    E_INFO("Usage: %s \\\n", pgm);    E_INFOCONT("\t[-? | -h] \\\n");    E_INFOCONT("\t[-d | -debug] \\\n");    E_INFOCONT("\t[-sps <sampling-rate> (16000)] \\\n");    E_INFOCONT("\t[-b | -byteswap] \\\n");    E_INFOCONT        ("\t[{-s | -silsep} <length-silence-separator(sec) (0.5)]> \\\n");    E_INFOCONT("\t[-w | -writeseg] \\\n");    E_INFOCONT("\t[-min-noise <min-noise>] \\\n");    E_INFOCONT("\t[-max-noise <max-noise>] \\\n");    E_INFOCONT("\t[-delta-sil <delta-sil>] \\\n");    E_INFOCONT("\t[-delta-speech <delta-speech>] \\\n");    E_INFOCONT("\t[-sil-onset <sil-onset>] \\\n");    E_INFOCONT("\t[-speech-onset <speech-onset>] \\\n");    E_INFOCONT("\t[-adapt-rate <adapt-rate>] \\\n");    E_INFOCONT("\t[-max-adreadsize <ad_read_blksize>] \\\n");    E_INFOCONT("\t[-c <copy-input-file>] \\\n");    E_INFOCONT("\t[-r | -rawmode] \\\n");    E_INFOCONT("\t-i <input-file>\n");    exit(0);}/* * Read specified input file, segment it into utterances wherever a silence segment of * a given minimum duration is encountered.  Filter out long silences. * Utterances are written to files named 00000000.raw, 00000001.raw, 00000002.raw, etc. */intmain(int32 argc, char **argv){    cont_ad_t *cont;    int32 uttid, uttlen, starttime, siltime, sps, debug, writeseg, rawmode;    int16 buf[4096];    char *infile, *copyfile, segfile[1024];    FILE *fp;    float endsil;    ad_rec_t ad;    int32 i, k;    int32 winsize, leader, trailer;    int32 orig_min_noise, orig_max_noise;    int32 orig_delta_sil, orig_delta_speech;    int32 orig_speech_onset, orig_sil_onset;    int32 min_noise, max_noise;    int32 delta_sil, delta_speech;    int32 sil_onset, speech_onset;    float32 orig_adapt_rate;    float32 adapt_rate;    int32 total_speech_samples;    float32 total_speech_sec;    FILE *rawfp;    /* Set argument defaults */    cont = NULL;    sps = 16000;    swap = 0;    endsil = 0.5;    writeseg = 0;    min_noise = max_noise = -1;    delta_sil = delta_speech = -1;    sil_onset = speech_onset = -1;    adapt_rate = -1.0;    max_ad_read_size = (int32) 0x7ffffff0;    debug = 0;    infile = NULL;    copyfile = NULL;    rawfp = NULL;    rawmode = 0;    /* Parse arguments */    for (i = 1; i < argc; i++) {        if ((strcmp(argv[i], "-help") == 0)            || (strcmp(argv[i], "-h") == 0)            || (strcmp(argv[i], "-?") == 0)) {            usagemsg(argv[0]);        }        else if ((strcmp(argv[i], "-debug") == 0)                 || (strcmp(argv[i], "-d") == 0)) {            debug = 1;        }        else if (strcmp(argv[i], "-sps") == 0) {            i++;            if ((i == argc)                || (sscanf(argv[i], "%d", &sps) != 1)                || (sps <= 0)) {                E_ERROR("Invalid -sps argument\n");                usagemsg(argv[0]);            }        }        else if ((strcmp(argv[i], "-byteswap") == 0)                 || (strcmp(argv[i], "-b") == 0)) {            swap = 1;        }        else if ((strcmp(argv[i], "-silsep") == 0)                 || (strcmp(argv[i], "-s") == 0)) {            i++;            if ((i == argc)                || (sscanf(argv[i], "%f", &endsil) != 1)                || (endsil <= 0.0)) {                E_ERROR("Invalid -silsep argument\n");                usagemsg(argv[0]);            }        }        else if ((strcmp(argv[i], "-writeseg") == 0)                 || (strcmp(argv[i], "-w") == 0)) {            writeseg = 1;        }        else if (strcmp(argv[i], "-min-noise") == 0) {            i++;            if ((i == argc) ||                (sscanf(argv[i], "%d", &min_noise) != 1) ||                (min_noise < 0)) {                E_ERROR("Invalid -min-noise argument\n");                usagemsg(argv[0]);            }        }        else if (strcmp(argv[i], "-max-noise") == 0) {            i++;            if ((i == argc) ||                (sscanf(argv[i], "%d", &max_noise) != 1) ||                (max_noise < 0)) {                E_ERROR("Invalid -max-noise argument\n");                usagemsg(argv[0]);            }        }        else if (strcmp(argv[i], "-delta-sil") == 0) {            i++;
12 下一页
💿 文件大小 5925 K
👤 上传用户 flashlee2003200
📂 所属分类多媒体处理
🏷️ 相关标签

#WinCE #语音识别 #程序
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -