📄 vxirec_utils.cpp
字号:
/****************License************************************************
* Vocalocity OpenVXI
* Copyright (C) 2004-2005 by Vocalocity, Inc. All Rights Reserved.
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
* Vocalocity, the Vocalocity logo, and VocalOS are trademarks or
* registered trademarks of Vocalocity, Inc.
* OpenVXI is a trademark of Scansoft, Inc. and used under license
* by Vocalocity.
***********************************************************************/
#include <iostream>
#include <sstream>
#include <VXIvalue.h>
#include <VXItrd.h>
#include "VXIrec_utils.h"
#include "XMLChConverter.hpp"
#include "LogBlock.hpp"
VXIunsigned VXIrecData::diagLogBase = 0;
/* Avoid locale dependant ctype.h macros */
static inline bool VXIrecIsSpace(const wchar_t c)
{
return (((c == L' ') || (c == L'\t') || (c == L'\n') || (c == L'\r')) ?
true : false);
}
/******************************************
* VXIrecWordList : A list of words
******************************************/
class VXIrecWordList : public VXIrecGrammar {
public:
enum GTYPE {
GTYPE_NONE,
GTYPE_DTMF,
GTYPE_SPEECH
};
GRAMMARINFOLIST * grammarInfoList;
bool enabled;
GTYPE gtype;
public:
public: // VXIrecGrammar fuctions...
virtual void SetEnabled(bool e) { enabled = e; }
virtual bool IsEnabled() const { return enabled; }
virtual GRAMMARINFOLIST * GetGrammarInfoList() const { return grammarInfoList; }
virtual bool GetGrammarInfo(const VXIchar* input,
VXIrecGrammarInfo ** gramInfo) const;
virtual bool IsDtmf() const { return gtype == GTYPE_DTMF; }
public:
VXIrecWordList();
virtual ~VXIrecWordList();
};
VXIrecWordList::VXIrecWordList()
: enabled(false), gtype(VXIrecWordList::GTYPE_NONE), grammarInfoList(NULL)
{ }
VXIrecWordList::~VXIrecWordList()
{
delete grammarInfoList;
}
bool VXIrecWordList::GetGrammarInfo(const VXIchar* input,
VXIrecGrammarInfo ** gramInfo) const
{
for( GRAMMARINFOLIST::iterator i = grammarInfoList->begin();
i != grammarInfoList->end(); ++i ) {
if( input == (*i).word ) {
*gramInfo = &(*i);
return true;
}
}
return false;
}
/******************************************
* GrammarSaxHandler : The grammar sax parser
******************************************/
//////////////////////////////////////////////////////
static VXItrdMutex* gblMutex = NULL;
static VXIunsigned gblDiagBase = 0;
static VXIlogInterface* gblLog = NULL;
static VXIulong GRAM_ROOT_COUNTER = 0;
static const VXIchar * const GRAM_ROOT_PREFIX = L"_GRAMROOT_";
// recursively replace all occurence of sstr with rstr
static vxistring::size_type ReplaceChar(vxistring &modstr,
const vxistring &sstr,
const vxistring &rstr,
vxistring::size_type pos0)
{
vxistring::size_type pos1;
pos1 = modstr.find(sstr, pos0);
if (pos1 == vxistring::npos) return pos1;
modstr.replace(pos1, sstr.length(), rstr);
return ReplaceChar(modstr, sstr, rstr, pos1 + rstr.length());
}
static void FixEscapeChar(vxistring & modstr)
{
vxistring::size_type pos0 = 0;
vxistring a_sym[6] = { L"&", L"<", L">", L"&", L"<", L">" };
vxistring r_sym[6] = { L"and", L"less than", L"greater than", L"and", L"less than", L"greater than" };
for (int i = 0; i < 6; i++)
ReplaceChar(modstr, a_sym[i], r_sym[i], 0);
}
static void PruneWhitespace(vxistring & str)
{
vxistring::size_type len = str.length();
if (len == 0) return;
// Convert all whitespace to spaces.
unsigned int i;
for (i = 0; i < len; ++i)
if (str[i] == '\r' || str[i] == '\n' || str[i] == '\t') str[i] = ' ';
// Eliminate trailing and double spaces
bool lastWasSpace = true;
for (i = len; i > 0; --i) {
if (str[i-1] != ' ') {
lastWasSpace = false;
continue;
}
if (lastWasSpace)
str.erase(i-1, 1);
else
lastWasSpace = true;
}
// Eliminate space at very beginning.
if (str[0] == ' ') str.erase(0, 1);
}
GrammarSaxHandler::GrammarSaxHandler(VXIlogInterface *l)
: log(l), grammarInfoList(NULL), processTag(false),
nodeType(UNKNOWN_NODE), isDTMFGram(false)
{
}
GrammarSaxHandler::~GrammarSaxHandler()
{
}
void GrammarSaxHandler::startElement(const XMLCh* const name,
AttributeList& attributes)
{
const VXIchar* fnname = L"startElement";
LogBlock logger(log, VXIrecData::diagLogBase, fnname, VXIREC_MODULE);
XMLChToVXIchar gName(name);
logger.logDiag(DIAG_TAG_PARSE, L"%s%s", L"Element: ", gName.c_str());
// Show attributes
if( logger.isEnabled(VXIrecData::diagLogBase+DIAG_TAG_PARSE) )
{
for(unsigned int i = 0; i < attributes.getLength(); i++)
{
XMLChToVXIchar gAttr(attributes.getName(i));
XMLChToVXIchar gAttrVal(attributes.getValue(i));
logger.logDiag(DIAG_TAG_PARSE, L"%s%s%s", gAttr.c_str(),
L" = ", gAttrVal.c_str());
}
}
if( wcscmp(GRAMMAR, gName.c_str()) == 0 ) {
nodeType = GRAMMAR_NODE;
// Retrieve grammar attributes
for(unsigned int i = 0; i < attributes.getLength(); i++)
{
XMLChToVXIchar gAttr(attributes.getName(i));
XMLChToVXIchar gAttrVal(attributes.getValue(i));
// Got DTMF grammar
if( wcscmp(L"mode", gAttr.c_str()) == 0 &&
wcscmp(L"dtmf", gAttrVal.c_str()) == 0 )
{
isDTMFGram = true;
}
}
}
else if( wcscmp(ITEM, gName.c_str()) == 0 ) {
// <item>
nodeType = ITEM_NODE;
}
else if( wcscmp(TAG, gName.c_str()) == 0 ) {
// <tag>
nodeType = ITEM_NODE;
processTag = true;
}
else if( wcscmp(META, gName.c_str()) == 0 ) {
// <meta>
nodeType = META_NODE;
// Retrieve semantic interpretation
for(unsigned int i = 0; i < attributes.getLength(); i++)
{
XMLChToVXIchar gAttr(attributes.getName(i));
if( wcscmp(NAME, gAttr.c_str()) == 0 )
{
XMLChToVXIchar gAttrVal(attributes.getValue(i));
if( wcscmp(L"swirec_simple_result_key", gAttrVal.c_str()) )
break; // only know how to process SSFT's semantic interp.
}
else if( wcscmp(CONTENT, gAttr.c_str()) == 0 ) {
// Copy the semantic meaning
grammarInfo.semantic = XMLChToVXIchar(attributes.getValue(i)).c_str();
}
}
}
}
void GrammarSaxHandler::characters(const XMLCh* const chars,
const unsigned int length)
{
const VXIchar* fnname = L"characters";
LogBlock logger(log, VXIrecData::diagLogBase, fnname, VXIREC_MODULE);
XMLChToVXIchar gChars(chars);
logger.logDiag(DIAG_TAG_PARSE, L"%s", gChars.c_str());
switch( nodeType ) {
case ITEM_NODE: {
if( processTag ) {
grammarInfo.tag = gChars.c_str();
}
else grammarInfo.word = gChars.c_str();
} break;
}
}
void GrammarSaxHandler::endElement(const XMLCh* const name)
{
const VXIchar* fnname = L"endElement";
LogBlock logger(log, VXIrecData::diagLogBase, fnname, VXIREC_MODULE);
XMLChToVXIchar gName(name);
if( wcscmp(ITEM, gName.c_str()) == 0 ) {
// <item>
// prune white spaces
PruneWhitespace(grammarInfo.word);
PruneWhitespace(grammarInfo.tag);
PruneWhitespace(grammarInfo.semantic);
logger.logDiag(DIAG_TAG_PARSE, L"%s%s%s%s%s%s",
L"word: ", grammarInfo.word.c_str(),
L", tag: ", grammarInfo.tag.c_str(),
L", semantic: ", grammarInfo.semantic.c_str());
// store this item
grammarInfoList->push_back(grammarInfo);
// clear this item for next processing
grammarInfo.tag = L"";
grammarInfo.word = L"";
// don't clear the semantic, it will remain the same for the rest of grammar
}
else if( wcscmp(TAG, gName.c_str()) == 0 ) {
// <tag>
processTag = false;
}
}
void GrammarSaxHandler::processError(const SAXParseException& exception,
const VXIchar* errType)
{
LogError(352, L"%s%s%s%s%s%s%s%u%s%u",
L"errType", errType,
L"errMsg", XMLChToVXIchar(exception.getMessage()).c_str(),
L"file", XMLChToVXIchar(exception.getSystemId()).c_str(),
L"line", exception.getLineNumber(),
L"column", exception.getColumnNumber());
}
VXIlogResult GrammarSaxHandler::LogError(VXIunsigned errorID,
const VXIchar *format, ...) const
{
VXIlogResult rc;
va_list args;
if (!log)
return VXIlog_RESULT_NON_FATAL_ERROR;
if (format) {
va_start(args, format);
rc = (*log->VError)(log, COMPANY_DOMAIN L".VXIrec", errorID, format, args);
va_end(args);
} else {
rc = (*log->Error)(log, COMPANY_DOMAIN L".VXIrec", errorID, NULL);
}
return rc;
}
VXIlogResult GrammarSaxHandler::LogDiag(VXIunsigned offset, const VXIchar *subtag,
const VXIchar *format, ...) const
{
VXIlogResult rc;
va_list args;
VXIunsigned tag = offset + VXIrecData::diagLogBase;
if (!log)
return VXIlog_RESULT_NON_FATAL_ERROR;
if (format) {
va_start(args, format);
rc = (*log->VDiagnostic)(log, tag, subtag, format, args);
va_end(args);
} else {
rc = (*log->Diagnostic)(log, tag, subtag, NULL);
}
return rc;
}
/******************************************
* VXIrecData : The grammar container
******************************************/
// Initialize & Shutdown
int VXIrecData::Initialize(VXIlogInterface* log, VXIunsigned diagBase)
{
gblLog = log;
gblDiagBase = diagBase;
diagLogBase = diagBase;
if( gblMutex == NULL ) {
VXItrdMutexCreate(&gblMutex);
}
// Initialize the SAX parser
try {
XMLPlatformUtils::Initialize();
}
catch (const XMLException& exception) {
log->Error(log, L".VXIrec", 353, L"%s%s",
L"grammar parser exception",
XMLChToVXIchar(exception.getMessage()).c_str());
return VXIlog_RESULT_FAILURE;
}
return 0;
}
int VXIrecData::ShutDown()
{
if( gblMutex != NULL ) {
VXItrdMutexDestroy(&gblMutex);
gblMutex = NULL;
}
try {
XMLPlatformUtils::Terminate();
}
catch (const XMLException &) {
// do nothing
}
return 0;
}
// C'ctor
VXIrecData::VXIrecData(VXIlogInterface *l,
VXIinetInterface *i)
: log(l), inet(i), grammars(),
parser(NULL), xmlHandler(NULL)
{
parser = new SAXParser();
parser->setDoValidation(false);
parser->setDoNamespaces(false);
// Register our own handler class (callback)
xmlHandler = new GrammarSaxHandler(l);
ErrorHandler* errHandler = (ErrorHandler*) xmlHandler;
parser->setDocumentHandler((DocumentHandler *)xmlHandler);
parser->setErrorHandler(errHandler);
}
// D'ctor
VXIrecData::~VXIrecData()
{
if( !grammars.empty() )
for (GRAMMARS::iterator i = grammars.begin(); i != grammars.end(); ++i)
delete *i;
if (parser) delete parser;
if (xmlHandler) delete xmlHandler;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -