📄 adin-cut.c
字号:
/** * @file adin-cut.c * * <JA> * @brief 不兰キャプチャおよび铜不惰粗浮叫 * * 不兰掐蜗デバイスからの不兰デ〖タの艰り哈み·および * 不の赂哼する惰粗の浮叫を乖ないます. * * 铜不惰粗の浮叫は·慷升レベルと雾蛤汗眶を脱いて乖ないます. * 掐蜗们室ごとに·レベルしきい猛を臂える慷升について雾蛤汗眶をカウントし· * それが回年した眶笆惧になれば·不の惰粗倡幌浮叫として * 艰り哈みを倡幌します. 艰り哈み面に雾蛤汗眶が回年眶笆布になれば· * 艰り哈みを匿贿します. 悸狠には磋夫に磊り叫しを乖なうため·倡幌婶と * 匿贿婶の涟稿にマ〖ジンを积たせて磊り叫します. * * また·オプション回年 (-zmean)により DC offset の近殿をここで乖ないます. * offset は呵介の @a ZMEANSAMPLES 改のサンプルの士堆から纷换されます. * * 不兰デ〖タの艰り哈みと事乖して掐蜗不兰の借妄を乖ないます. このため· * 艰り哈んだ不兰デ〖タはその艰り哈み帽疤∈live掐蜗では办年箕粗·不兰ファイル * ではバッファサイズ∷ごとに·それらを苞眶としてコ〖ルバック簇眶が钙ばれます. * このコ〖ルバック簇眶としてデ〖タの瘦赂や泼魔翁藐叫· * ∈フレ〖ム票袋の∷千急借妄を渴める簇眶を回年します. * * マイク掐蜗や NetAudio 掐蜗などの Live 掐蜗では· * コ〖ルバック柒の借妄が脚く借妄が掐蜗の庐刨に纳い烧かないと· * デバイスのバッファが邦れ·掐蜗们室がロストする眷圭があります. * このエラ〖を松ぐため·悸乖茨董で pthread が蝗脱材墙である眷圭· * 不兰艰り哈みˇ惰粗浮叫婶は塑挛と迫惟したスレッドで瓢侯します. * この眷圭·このスレッドは塑スレッドとバッファ @a speech を拆して * 笆布のように定拇瓢侯します. * * - Thread 1: 不兰艰り哈みˇ不惰粗浮叫スレッド * - デバイスから不兰デ〖タを粕み哈みながら不惰粗浮叫を乖なう. * 浮叫した不惰粗のサンプルはバッファ @a speech の琐萨に绵肌 * 纳裁される. * - このスレッドは弹瓢箕から塑スレッドから迫惟して瓢侯し· * 惧淡の瓢侯を乖ない鲁ける. * - Thread 2: 不兰借妄ˇ千急借妄を乖なう塑スレッド * - バッファ @a speech を办年箕粗ごとに雌浑し·糠たなサンプルが * Thread 1 によって纳裁されたらそれらを借妄し·借妄が姜位した * 尸バッファを低める. * * </JA> * <EN> * @brief Capture audio and detect sound trigger * * This file contains functions to get waveform from an audio device * and detect speech/sound input segment * * Sound detection at this stage is based on level threshold and zero * cross count. The number of zero cross are counted for each * incoming sound fragment. If the number becomes larger than * specified threshold, the fragment is treated as a beginning of * sound/speech input (trigger on). If the number goes below the threshold, * the fragment will be treated as an end of input (trigger * off). In actual detection, margins are considered on the beginning * and ending point, which will be treated as head and tail silence * part. DC offset normalization will be also performed if configured * so (-zmean). * * The triggered input data should be processed concurrently with the * detection for real-time recognition. For this purpose, after the * beginning of input has been detected, the following triggered input * fragments (samples of a certain period in live input, or buffer size in * file input) are passed sequencially in turn to a callback function. * The callback function should be specified by the caller, typicaly to * store the recoded data, or to process them into a frame-synchronous * recognition process. * * When source is a live input such as microphone, the device buffer will * overflow if the processing callback is slow. In that case, some input * fragments may be lost. To prevent this, the A/D-in part together with * sound detection will become an independent thread if @em pthread functions * are supported. The A/D-in and detection thread will cooperate with * the original main thread through @a speech buffer, like the followings: * * - Thread 1: A/D-in and speech detection thread * - reads audio input from source device and perform sound detection. * The detected fragments are immediately appended * to the @a speech buffer. * - will be detached after created, and run forever till the main * thread dies. * - Thread 2: Main thread * - performs input processing and recognition. * - watches @a speech buffer, and if detect appendings of new samples * by the Thread 1, proceed the processing for the appended samples * and purge the finished samples from @a speech buffer. * * </EN> * * @sa adin.c * * @author Akinobu LEE * @date Sat Feb 12 13:20:53 2005 * * $Revision: 1.10 $ * *//* * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology * All rights reserved */#include <julius/julius.h>#ifdef HAVE_PTHREAD#include <pthread.h>#endif/// Define this if you want to output a debug message for threading#undef THREAD_DEBUG/// Enable some fixes relating adinnet+module#define TMP_FIX_200602 /** * <EN> * @brief Set up parameters for A/D-in and input detection. * * Set variables in work area according to the configuration values. * * </EN> * <JA> * @brief 不兰磊り叫し脱称硷パラメ〖タをセット * * 肋年を傅に磊り叫し脱のパラメ〖タを纷换し·ワ〖クエリアにセットします. * * </JA> * @param adin [in] AD-in work area * @param jconf [in] configuration data * * @callergraph * @callgraph */booleanadin_setup_param(ADIn *adin, Jconf *jconf){ float samples_in_msec; int freq; if (jconf->input.sfreq <= 0) { jlog("ERROR: adin_setup_param: going to set smpfreq to %d\n", jconf->input.sfreq); return FALSE; } if (jconf->detect.silence_cut < 2) { adin->adin_cut_on = (jconf->detect.silence_cut == 1) ? TRUE : FALSE; } else { adin->adin_cut_on = adin->silence_cut_default; } adin->strip_flag = jconf->preprocess.strip_zero_sample; adin->thres = jconf->detect.level_thres;#ifdef HAVE_PTHREAD if (adin->enable_thread && jconf->decodeopt.segment) { adin->ignore_speech_while_recog = FALSE; } else { adin->ignore_speech_while_recog = TRUE; }#endif adin->need_zmean = jconf->preprocess.use_zmean; /* calc & set internal parameter from configuration */ freq = jconf->input.sfreq; samples_in_msec = (float) freq / (float)1000.0; /* cycle buffer length = head margin length */ adin->c_length = (int)((float)jconf->detect.head_margin_msec * samples_in_msec); /* in msec. */ /* compute zerocross trigger count threshold in the cycle buffer */ adin->noise_zerocross = jconf->detect.zero_cross_num * adin->c_length / freq; /* variables that comes from the tail margin length (in wstep) */ adin->nc_max = (int)((float)(jconf->detect.tail_margin_msec * samples_in_msec / (float)DEFAULT_WSTEP)) + 2; adin->sbsize = jconf->detect.tail_margin_msec * samples_in_msec + (adin->c_length * jconf->detect.zero_cross_num / 200); adin->c_offset = 0;#ifdef HAVE_PTHREAD adin->transfer_online = FALSE; adin->speech = NULL;#endif /**********************/ /* initialize buffers */ /**********************/ adin->buffer = (SP16 *)mymalloc(sizeof(SP16) * MAXSPEECHLEN); adin->cbuf = (SP16 *)mymalloc(sizeof(SP16) * adin->c_length); adin->swapbuf = (SP16 *)mymalloc(sizeof(SP16) * adin->sbsize); if (adin->down_sample) { adin->io_rate = 3; /* 48 / 16 (fixed) */ adin->buffer48 = (SP16 *)mymalloc(sizeof(SP16) * MAXSPEECHLEN * adin->io_rate); } if (adin->adin_cut_on) { init_count_zc_e(&(adin->zc), adin->c_length); } adin->need_init = TRUE; adin->rehash = FALSE; adin->total_captured_len = 0; return TRUE;}/** * <EN> * Purge samples already processed in the temporary buffer. * </EN> * <JA> * テンポラリバッファにある借妄されたサンプルをパ〖ジする. * </JA> * * @param a [in] AD-in work area * @param from [in] Purge samples in range [0..from-1]. * */static voidadin_purge(ADIn *a, int from){ if (from > 0 && a->current_len - from > 0) { memmove(a->buffer, &(a->buffer[from]), (a->current_len - from) * sizeof(SP16)); } a->bp = a->current_len - from;}/** * <EN> * @brief Main A/D-in and sound detection function * * This function read inputs from device and do sound detection * (both up trigger and down trigger) until end of device. * * In threaded mode, this function will detach and loop forever as * ad-in thread, (adin_thread_create()) storing triggered samples in * speech[], and telling the status to another process thread via @a * transfer_online in work area. The process thread, called from * adin_go(), polls the length of speech[] and transfer_online in work area * and process them if new samples has been stored. * * In non-threaded mode, this function will be called directly from * adin_go(), and triggered samples are immediately processed within here. * * Threaded mode should be used for "live" input such as microphone input * where input is infinite and capture delay is severe. For file input, * adinnet input and other "buffered" input, non-threaded mode will be used. * * Argument "ad_process()" should be a function to process the triggered * input samples. On real-time recognition, a frame-synchronous search * function for the first pass will be specified by the caller. The current * input will be segmented if it returns 1, and will be terminated as error * if it returns -1. * * When the argument "ad_check()" specified, it will be called periodically. * When it returns less than 0, this function will be terminated. * * </EN> * <JA> * @brief 不兰掐蜗と不浮叫を乖うメイン簇眶 * * ここでは不兰掐蜗の艰り哈み·不惰粗の倡幌ˇ姜位の浮叫を乖います. * * スレッドモ〖ド箕·この簇眶は迫惟したAD-inスレッドとしてデタッチされます. * (adin_thread_create()), 不掐蜗を浮梦するとこの簇眶はワ〖クエリア柒の * speech[] にトリガしたサンプルを淡峡し·かつ transfer_online を TRUE に * セットします. Julius のメイン借妄スレッド (adin_go()) は * adin_thread_process() に败乖し·そこで transfer_online 箕に speech[] を * 徊救しながら千急借妄を乖います. * * 润スレッドモ〖ド箕は·メイン借妄簇眶 adin_go() は木儡この簇眶を钙び· * 千急借妄はこの柒婶で木儡乖われます. * * スレッドモ〖ドはマイク掐蜗など·掐蜗が痰嘎で借妄の觅变がデ〖タの * 艰りこぼしを痉くような live input で脱いられます. 办数·ファイル掐蜗 * やadinnet 掐蜗のような buffered input では润スレッドモ〖ドが脱いられます. * * 苞眶の ad_process は·艰り哈んだサンプルに滦して借妄を乖う簇眶を * 回年します. リアルタイム千急を乖う眷圭は·ここに妈1パスの千急借妄を * 乖う簇眶が回年されます. 手り猛が 1 であれば·掐蜗をここで惰磊ります. * -1 であればエラ〖姜位します. * * 苞眶の ad_check は办年借妄ごとに帆り手し钙ばれる簇眶を回年します. この * 簇眶の手り猛が 0 笆布だった眷圭·掐蜗を篓箕面们して簇眶を姜位します. * </JA> * * @param ad_process [in] function to process triggerted input. * @param ad_check [in] function to be called periodically. * @param recog [in] engine instance * * @return 2 when input termination requested by ad_process(), 1 when * if detect end of an input segment (down trigger detected after up * trigger), 0 when reached end of input device, -1 on error, -2 when * input termination requested by ad_check(). * * @callergraph * @callgraph * */static intadin_cut(int (*ad_process)(SP16 *, int, Recog *), int (*ad_check)(Recog *), Recog *recog){ ADIn *a; int i; int ad_process_ret; int imax, len, cnt; int wstep; int end_status; /* return value */ boolean transfer_online_local; /* local repository of transfer_online */ int zc; /* count of zero cross */ a = recog->adin; /* * there are 3 buffers: * temporary storage queue: buffer[] * cycle buffer for zero-cross counting: (in zc_e) * swap buffer for re-starting after short tail silence * * Each samples are first read to buffer[], then passed to count_zc_e() * to find trigger. Samples between trigger and end of speech are * passed to (*ad_process) with pointer to the first sample and its length. * */ if (a->need_init) { a->bpmax = MAXSPEECHLEN; a->bp = 0; a->is_valid_data = FALSE; /* reset zero-cross status */ if (a->adin_cut_on) { reset_count_zc_e(&(a->zc), a->thres, a->c_length, a->c_offset); } a->end_of_stream = FALSE; a->nc = 0; a->sblen = 0; a->need_init = FALSE; /* for next call */ } /****************/ /* resume input */ /****************/ // if (!a->adin_cut_on && a->is_valid_data == TRUE) { // callback_exec(CALLBACK_EVENT_SPEECH_START, recog); // } /*************/ /* main loop */ /*************/ for (;;) { /****************************/ /* read in new speech input */ /****************************/ if (a->end_of_stream) { /* already reaches end of stream, just process the rest */ a->current_len = a->bp; } else { /*****************************************************/ /* get samples from input device to temporary buffer */ /*****************************************************/ /* buffer[0..bp] is the current remaining samples */ /* mic input - samples exist in a device buffer tcpip input - samples exist in a socket file input - samples in a file Return value is the number of read samples. If no data exists in the device (in case of mic input), ad_read() will return 0. If reached end of stream (in case end of file or receive end ack from tcpip client), it will return -1. If error, returns -2. */ if (a->down_sample) { /* get 48kHz samples to temporal buffer */ cnt = (*(a->ad_read))(a->buffer48, (a->bpmax - a->bp) * a->io_rate); } else { cnt = (*(a->ad_read))(&(a->buffer[a->bp]), a->bpmax - a->bp); } if (cnt < 0) { /* end of stream / segment or error */ /* set the end status */ switch(cnt) { case -1: /* end of stream */ a->input_side_segment = FALSE; end_status = 0; break; case -2: a->input_side_segment = FALSE; end_status = -1; break; case -3: a->input_side_segment = TRUE; end_status = 0; } /* now the input has been ended, we should not get further speech input in the next loop, instead just process the samples in the temporary buffer until the entire data is processed. */ a->end_of_stream = TRUE; cnt = 0; /* no new input */ /* in case the first trial of ad_read() fails, exit this loop */ if (a->bp == 0) break; } if (a->down_sample && cnt != 0) { /* convert to 16kHz */ cnt = ds48to16(&(a->buffer[a->bp]), a->buffer48, cnt, a->bpmax - a->bp, a->ds); if (cnt < 0) { /* conversion error */ jlog("ERROR: adin_cut: error in down sampling\n"); end_status = -1; a->end_of_stream = TRUE; cnt = 0; if (a->bp == 0) break; } } /*************************************************/ /* execute callback here for incoming raw data stream.*/ /* the content of buffer[bp...bp+cnt-1] or the */ /* length can be modified in the functions. */ /*************************************************/ if (cnt > 0) {#ifdef ENABLE_PLUGIN plugin_exec_adin_captured(&(a->buffer[a->bp]), cnt);#endif callback_exec_adin(CALLBACK_ADIN_CAPTURED, recog, &(a->buffer[a->bp]), cnt); /* record total number of captured samples */ a->total_captured_len += cnt; } /*************************************************/ /* some speech processing for the incoming input */ /*************************************************/ if (cnt > 0) { if (a->strip_flag) { /* strip off successive zero samples */ len = strip_zero(&(a->buffer[a->bp]), cnt); if (len != cnt) cnt = len; } if (a->need_zmean) { /* remove DC offset */ sub_zmean(&(a->buffer[a->bp]), cnt); } } /* current len = current samples in buffer */ a->current_len = a->bp + cnt; }#ifdef THREAD_DEBUG if (a->end_of_stream) { jlog("DEBUG: adin_cut: stream already ended\n"); } if (cnt > 0) { jlog("DEBUG: adin_cut: get %d samples [%d-%d]\n", a->current_len - a->bp, a->bp, a->current_len); }#endif /**************************************************/ /* call the periodic callback (non threaded mode) */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -