📄 cont_fileseg.c
字号:
/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- *//* ==================================================================== * Copyright (c) 1999-2001 Carnegie Mellon University. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * This work was supported in part by funding from the Defense Advanced * Research Projects Agency and the National Science Foundation of the * United States of America, and the CMU Sphinx Speech Consortium. * * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== * *//* * cont_fileseg.c -- Read input file, filter silence regions, and segment into utterances. * * HISTORY * * $Log: cont_fileseg.c,v $ * Revision 1.1.1.1 2006/05/23 18:45:02 dhuggins * re-importation * * Revision 1.13 2005/06/30 00:28:46 rkm * Kept within-utterance silences in rawmode * * * 28-Jun-2005 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Modified to use new state variables in cont_ad_t. * * Revision 1.12 2005/05/31 15:54:38 rkm * *** empty log message *** * * Revision 1.11 2005/05/24 20:56:58 rkm * Added min/max-noise parameters to cont_fileseg * * Revision 1.10 2005/05/13 23:28:43 egouvea * Changed null device to system dependent one: NUL for windows, /dev/null for everything else * * $Log: cont_fileseg.c,v $ * Revision 1.1.1.1 2006/05/23 18:45:02 dhuggins * re-importation * * Revision 1.13 2005/06/30 00:28:46 rkm * Kept within-utterance silences in rawmode * * Revision 1.12 2005/05/31 15:54:38 rkm * *** empty log message *** * * Revision 1.11 2005/05/24 20:56:58 rkm * Added min/max-noise parameters to cont_fileseg * * Revision 1.9 2005/02/13 01:29:48 rkm * Fixed cont_ad_read to never cross sil/speech boundary, and rawmode * * Revision 1.8 2005/02/01 22:21:13 rkm * Added raw data logging, and raw data pass-through mode to cont_ad * * Revision 1.7 2004/07/16 00:57:11 egouvea * Added Ravi's implementation of FSG support. * * Revision 1.3 2004/06/25 14:58:05 rkm * *** empty log message *** * * Revision 1.2 2004/06/23 20:32:08 rkm * Exposed several cont_ad config parameters * * * 27-Jun-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Created. */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <assert.h>#include <math.h>#include "s2types.h"#include "ad.h"#include "cont_ad.h"#include "err.h"static FILE *infp; /* File being segmented */static int32 swap;/* Max size read by file_ad_read function on each invocation, for debugging */static int32 max_ad_read_size;#if defined(WIN32) && !defined(GNUWINCE)#define NULL_DEVICE "NUL"#else#define NULL_DEVICE "/dev/null"#endif/* * Need to provide cont_ad_init with a read function to read the input file. * This is it. The ad_rec_t *r argument is ignored since there is no A/D * device involved. */static int32file_ad_read(ad_rec_t * r, int16 * buf, int32 max){ int32 i, k; if (max > max_ad_read_size) max = max_ad_read_size; k = fread(buf, sizeof(int16), max, infp); if (swap) { for (i = 0; i < k; i++) { buf[i] = ((buf[i] >> 8) & 0x00ff) | ((buf[i] << 8) & 0xff00); } } return ((k > 0) ? k : -1);}static voidusagemsg(char *pgm){ E_INFO("Usage: %s \\\n", pgm); E_INFOCONT("\t[-? | -h] \\\n"); E_INFOCONT("\t[-d | -debug] \\\n"); E_INFOCONT("\t[-sps <sampling-rate> (16000)] \\\n"); E_INFOCONT("\t[-b | -byteswap] \\\n"); E_INFOCONT ("\t[{-s | -silsep} <length-silence-separator(sec) (0.5)]> \\\n"); E_INFOCONT("\t[-w | -writeseg] \\\n"); E_INFOCONT("\t[-min-noise <min-noise>] \\\n"); E_INFOCONT("\t[-max-noise <max-noise>] \\\n"); E_INFOCONT("\t[-delta-sil <delta-sil>] \\\n"); E_INFOCONT("\t[-delta-speech <delta-speech>] \\\n"); E_INFOCONT("\t[-sil-onset <sil-onset>] \\\n"); E_INFOCONT("\t[-speech-onset <speech-onset>] \\\n"); E_INFOCONT("\t[-adapt-rate <adapt-rate>] \\\n"); E_INFOCONT("\t[-max-adreadsize <ad_read_blksize>] \\\n"); E_INFOCONT("\t[-c <copy-input-file>] \\\n"); E_INFOCONT("\t[-r | -rawmode] \\\n"); E_INFOCONT("\t-i <input-file>\n"); exit(0);}/* * Read specified input file, segment it into utterances wherever a silence segment of * a given minimum duration is encountered. Filter out long silences. * Utterances are written to files named 00000000.raw, 00000001.raw, 00000002.raw, etc. */intmain(int32 argc, char **argv){ cont_ad_t *cont; int32 uttid, uttlen, starttime, siltime, sps, debug, writeseg, rawmode; int16 buf[4096]; char *infile, *copyfile, segfile[1024]; FILE *fp; float endsil; ad_rec_t ad; int32 i, k; int32 winsize, leader, trailer; int32 orig_min_noise, orig_max_noise; int32 orig_delta_sil, orig_delta_speech; int32 orig_speech_onset, orig_sil_onset; int32 min_noise, max_noise; int32 delta_sil, delta_speech; int32 sil_onset, speech_onset; float32 orig_adapt_rate; float32 adapt_rate; int32 total_speech_samples; float32 total_speech_sec; FILE *rawfp; /* Set argument defaults */ cont = NULL; sps = 16000; swap = 0; endsil = 0.5; writeseg = 0; min_noise = max_noise = -1; delta_sil = delta_speech = -1; sil_onset = speech_onset = -1; adapt_rate = -1.0; max_ad_read_size = (int32) 0x7ffffff0; debug = 0; infile = NULL; copyfile = NULL; rawfp = NULL; rawmode = 0; /* Parse arguments */ for (i = 1; i < argc; i++) { if ((strcmp(argv[i], "-help") == 0) || (strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "-?") == 0)) { usagemsg(argv[0]); } else if ((strcmp(argv[i], "-debug") == 0) || (strcmp(argv[i], "-d") == 0)) { debug = 1; } else if (strcmp(argv[i], "-sps") == 0) { i++; if ((i == argc) || (sscanf(argv[i], "%d", &sps) != 1) || (sps <= 0)) { E_ERROR("Invalid -sps argument\n"); usagemsg(argv[0]); } } else if ((strcmp(argv[i], "-byteswap") == 0) || (strcmp(argv[i], "-b") == 0)) { swap = 1; } else if ((strcmp(argv[i], "-silsep") == 0) || (strcmp(argv[i], "-s") == 0)) { i++; if ((i == argc) || (sscanf(argv[i], "%f", &endsil) != 1) || (endsil <= 0.0)) { E_ERROR("Invalid -silsep argument\n"); usagemsg(argv[0]); } } else if ((strcmp(argv[i], "-writeseg") == 0) || (strcmp(argv[i], "-w") == 0)) { writeseg = 1; } else if (strcmp(argv[i], "-min-noise") == 0) { i++; if ((i == argc) || (sscanf(argv[i], "%d", &min_noise) != 1) || (min_noise < 0)) { E_ERROR("Invalid -min-noise argument\n"); usagemsg(argv[0]); } } else if (strcmp(argv[i], "-max-noise") == 0) { i++; if ((i == argc) || (sscanf(argv[i], "%d", &max_noise) != 1) || (max_noise < 0)) { E_ERROR("Invalid -max-noise argument\n"); usagemsg(argv[0]); } } else if (strcmp(argv[i], "-delta-sil") == 0) { i++;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -