📄 speechhandler.cxx
字号:
/* ==================================================================== * The Vovida Software License, Version 1.0 * * Copyright (c) 2000 Vovida Networks, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The names "VOCAL", "Vovida Open Communication Application Library", * and "Vovida Open Communication Application Library (VOCAL)" must * not be used to endorse or promote products derived from this * software without prior written permission. For written * permission, please contact vocal@vovida.org. * * 4. Products derived from this software may not be called "VOCAL", nor * may "VOCAL" appear in their name, without prior written * permission of Vovida Networks, Inc. * * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL VOVIDA * NETWORKS, INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT DAMAGES * IN EXCESS OF $1,000, NOR FOR ANY INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * * ==================================================================== * * This software consists of voluntary contributions made by Vovida * Networks, Inc. and many individuals on behalf of Vovida Networks, * Inc. For more information on Vovida Networks, Inc., please see * <http://www.vovida.org/>. * *//* ==================================================================== * Copyright (c) 1999-2001 Carnegie Mellon University. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * This work was supported in part by funding from the Defense Advanced * Research Projects Agency and the National Science Foundation of the * United States of America, and the CMU Sphinx Speech Consortium. * * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== */static const char* const SpeechHandler_cxx_Version = "$Id: SpeechHandler.cxx,v 1.1 2002/05/23 23:07:21 sprajpat Exp $";#ifdef WIN32#include <posixwin32.h>#else#include <unistd.h>#include <sys/time.h>#endif#include <stdio.h>extern "C" {#include "s2types.h"#include "basic_types.h"#include "CM_macros.h"#include "err.h"#include "ad.h"#include "cont_ad.h"#include "search_const.h"#include "msd.h"#include "list.h"#include "hash.h"#include "lmclass.h"#include "lm_3g.h"#include "dict.h"#include "kb.h"#include "fbs.h" unsigned char linear2ulaw( int pcm_val ); int ulaw2linear( unsigned char u_val );};#include "SpeechHandler.hxx"#include "cpLog.h"#define MIN_ENDSIL 5000 //samples of silence to declare end utterance#define MAX_ALT 30int G_tCount=0;int G_tCount2=0;using namespace Vocal;voidSpeechHandler::initialize(const Data dataDir, const Data modelName){ myInitFlag = true; myModelDataDir = dataDir; myModelDataDir += "/lm/"; myModelDataDir += modelName; myModelDataDir += "/"; myModelName = modelName; myHMMDir = dataDir; myHMMDir += "/hmm/6k/"; Data filePrefix(myModelDataDir); filePrefix += modelName;#if 0 /* Open audio device and calibrate for background noise level */ if ((myAd = ad_open ()) == 0) { cpLog(LOG_ERR, "Failed to open the audio device"); return; } if ((myCont = cont_ad_init (myAd, ad_read)) == 0) { cpLog(LOG_ERR, "cont_ad_init failed"); return; } //Opened the device successfully, calibrate calibrateDevice();#endif //make the argument list for fbs_init char** argBuf = new char*[30]; int argCount = 0; for(int i =0; i < 30; i++) argBuf[i] = new char[128]; sprintf(argBuf[argCount], "-kbdumpdir"); argCount++; sprintf(argBuf[argCount], "%s", myModelDataDir.logData()); argCount++; sprintf(argBuf[argCount], "-lmfn"); argCount++; sprintf(argBuf[argCount], "%s", Data( filePrefix+ ".lm").logData()); argCount++; sprintf(argBuf[argCount], "-dictfn"); argCount++; sprintf(argBuf[argCount], "%s", Data( filePrefix+ ".dic").logData()); argCount++; sprintf(argBuf[argCount], "-noisedict"); argCount++; sprintf(argBuf[argCount], "%s", Data( myHMMDir+ "noisedic").logData()); argCount++; sprintf(argBuf[argCount], "-phnfn"); argCount++; sprintf(argBuf[argCount], "%s", Data( myHMMDir+ "phone").logData()); argCount++; sprintf(argBuf[argCount], "-mapfn"); argCount++; sprintf(argBuf[argCount], "%s", Data( myHMMDir+ "map").logData()); argCount++; sprintf(argBuf[argCount], "-hmmdir"); argCount++; sprintf(argBuf[argCount], "%s", myHMMDir.logData()); argCount++; sprintf(argBuf[argCount], "-hmmdirlist"); argCount++; sprintf(argBuf[argCount], "%s", myHMMDir.logData()); argCount++; sprintf(argBuf[argCount], "-8bsen"); argCount++; sprintf(argBuf[argCount], "TRUE"); argCount++; sprintf(argBuf[argCount], "-sendumpfn"); argCount++; sprintf(argBuf[argCount], "%s", Data( myHMMDir + "sendump").logData()); argCount++; sprintf(argBuf[argCount], "-cbdir"); argCount++; sprintf(argBuf[argCount], "%s", myHMMDir.logData()); argCount++; sprintf(argBuf[argCount], "-verbose"); argCount++; sprintf(argBuf[argCount], "7"); argCount++;// sprintf(argBuf[argCount], "-agcnoise"); argCount++;// sprintf(argBuf[argCount], "TRUE"); argCount++;// sprintf(argBuf[argCount], "-samp"); argCount++;// sprintf(argBuf[argCount], "8000"); argCount++; /* Initialize recognition engine */ fbs_init (argCount, argBuf); delete []argBuf; myStartwid = kb_get_word_id ("<s>"); cpLog(LOG_DEBUG, "Done initializing SpeechHandler");}voidSpeechHandler::calibrateDevice(){ cpLog (LOG_INFO, "Calibrating background noise level..."); ad_start_rec (myAd); cont_ad_calib (myCont); ad_stop_rec (myAd); cpLog (LOG_INFO, "done.");}SpeechHandler::~SpeechHandler(){ shutdown(); /* Close recognition engine */ fbs_end ();}voidSpeechHandler::thread(){ ///Listen for utterences int16 adbuf[4096]; int32 n_alt=0; int32 k, fr, ts, rem; char *hyp; char word1[1024], word2[1024], word3[1024]; search_hyp_t **alt; for (;;) { if(isShutdown()) break; // Resume A/D recording for next utterance if (ad_start_rec (myAd) < 0) { cpLog(LOG_ERR, "Failed to read the audio device"); return; } // Await beginning of next utterance while ((k = cont_ad_read (myCont, adbuf, 4096)) == 0) usleep(100000); if (k < 0) { cpLog(LOG_ERR, "Continuos read failed"); return; } // Non-zero amount of data received; start recognition of new utterance. // NULL argument to uttproc_begin_utt => automatic generation of // utterance-id. if (uttproc_begin_utt (0) < 0) { cpLog(LOG_ERR, "uttproc_begin_utt() failed"); return; } uttproc_rawdata (adbuf, k, 0); cpLog(LOG_DEBUG, "Listening..."); // Note timestamp for this first block of data ts = myCont->read_ts; // Decode utterance until end (marked by a "long" silence, >1sec) for (;;) { // Read non-silence audio data, if any, from continuous // listening module if ((k = cont_ad_read (myCont, adbuf, 4096)) < 0) { cpLog(LOG_ERR, "cont_ad_read failed"); break; } if (k == 0) { // No speech data available; check current timestamp // with most recent speech to see if more than min_endsil. // If so, end of utterance. if ((myCont->read_ts - ts) > MIN_ENDSIL) break; } else { // New speech data received; note current timestamp ts = myCont->read_ts; } // Decode whatever data was read above. NOTE: Non-blocking mode!! // rem = #frames remaining to be decoded upon return from the // function. rem = uttproc_rawdata (adbuf, k, 0); // If no work to be done, sleep a bit if ((rem == 0) && (k == 0)) usleep (100000); } // Utterance ended; flush any accumulated, unprocessed A/D data and stop // listening until current utterance completely decoded ad_stop_rec (myAd); while (ad_read (myAd, adbuf, 4096) >= 0); cont_ad_reset (myCont); cpLog(LOG_DEBUG, "Stopped listening"); // Finish decoding, obtain result uttproc_end_utt (); if (uttproc_result (&fr, &hyp, 1) < 0) { cpLog(LOG_ERR, "uttproc_result failed"); } cpLog(LOG_INFO, "Result %d: %s\n", fr, hyp); k = sscanf (hyp, "%s %s %s", word1, word2, word3); if (k > 0) { search_save_lattice (); n_alt = search_get_alt (MAX_ALT, 0, fr-1, -1, myStartwid, &alt); if (!playWhatYouGot(hyp, alt, n_alt)) { cpLog(LOG_ERR, "Failed to play"); return; } } // Exit if the utterance was "BYE or QUIT" if ((k == 1) && ((strcmp (word1, "bye") == 0) || (strcmp (word1, "BYE") == 0)) && ((strcmp (word2, "quit") == 0) || (strcmp (word2, "QUIT") == 0))) return; } return;}boolSpeechHandler::playWhatYouGot(char* best, search_hyp_t **alt, int32 n_alt){ //Play back using TEXT -> Speech //TODO //For debug perposes, just dump the words also // best result first search_hyp_t *h; cpLog(LOG_DEBUG, "Best result: %s", best); // Compose strings for and print each of the additional hypotheses cpLog(LOG_DEBUG, "Additional hypotheses..."); for (int i = 0; i < n_alt; i++) { for (h = alt[i]; h; h = h->next) { if (h->wid != myStartwid) { // Filter out the startword <s> cpLog(LOG_DEBUG, "%d. %s", h->word); } } } cpLog(LOG_DEBUG, "Additional hypotheses done."); return true;}voidSpeechHandler::recognize(unsigned char* buf, int size){ if(myBuf == 0) { myBuf = new unsigned char[25000]; myPktSize = 0; } //Check for noise/silence packet bool silence = true; for(int i = 55; i < 63; i++) { unsigned char c = *(buf+i); c &= 0xf0; if(!(!(c ^ 0x70) || !(c ^ 0xf0) )) { silence = false; //break; } else { silence = true; } } if(!silence) { cpLog(LOG_DEBUG, "Not silence"); if(!myStartUtterance) { myStartUtterance = true; G_tCount = 0; G_tCount2 = 0; myPktSize = 0; } memcpy(myBuf+(myPktSize), buf, size); myPktSize += size; G_tCount2++; } else { cpLog(LOG_DEBUG, "Filtering silence"); G_tCount++; if(myStartUtterance && (G_tCount > 2 )) { //Give data for recognition if(G_tCount2 > 5) { recognize_impl(myBuf, myPktSize); } myPktSize = 0; G_tCount = 0; myStartUtterance = false ; } else if((G_tCount < 2) && G_tCount2 > 0) { memcpy(myBuf+(myPktSize), buf, size); myPktSize += size; } }}DataSpeechHandler::recognize_impl(unsigned char* buf, int size){ cpLog(LOG_INFO, "In SpeechHandler::recognize_impl: %d", size); Data retData; int16* adbuf = new int16[size]; int32 n_alt=0; int32 k, fr, rem; char *hyp; char word1[1024], word2[1024], word3[1024]; search_hyp_t **alt; if (uttproc_begin_utt (0) < 0) { cpLog(LOG_ERR, "uttproc_begin_utt() failed"); return retData; } ad_mu2li(adbuf, buf, size); uttproc_rawdata (adbuf, size, 0); // Finish decoding, obtain result uttproc_end_utt (); if (uttproc_result (&fr, &hyp, 1) < 0) { cpLog(LOG_ERR, "uttproc_result failed"); return retData; } cpLog(LOG_INFO, "Result %d: %s\n", fr, hyp); k = sscanf (hyp, "%s %s %s", word1, word2, word3); if (k > 0) { search_save_lattice (); n_alt = search_get_alt (MAX_ALT, 0, fr-1, -1, myStartwid, &alt); retData = hyp; } delete []adbuf; return retData;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -