📄 answerparser.cpp
字号:
/****************License************************************************
*
* Copyright 2000-2003. ScanSoft, Inc.
*
* Use of this software is subject to notices and obligations set forth
* in the SpeechWorks Public License - Software Version 1.2 which is
* included with this software.
*
* ScanSoft is a registered trademark of ScanSoft, Inc., and OpenSpeech,
* SpeechWorks and the SpeechWorks logo are registered trademarks or
* trademarks of SpeechWorks International, Inc. in the United States
* and other countries.
*
***********************************************************************/
#include <vxibuildopts.h>
#if P_VXI
#include "GrammarManager.hpp"
#include "AnswerParser.hpp"
#include "CommonExceptions.hpp"
#include "SimpleLogger.hpp"
#include "vxi/VXIrec.h" // for a single type name (eliminate?)
#include "XMLChConverter.hpp" // for xmlcharstring
#include <string>
#include <sstream>
#include <map>
#include <list>
#include <vector>
#include "vxibuildopts.h"
#include <xercesc/framework/MemBufInputSource.hpp>
#include <xercesc/sax/ErrorHandler.hpp> // by XMLErrorReporter
#include <xercesc/dom/DOM.hpp>
#include <xercesc/sax/SAXParseException.hpp>
#include <xercesc/util/XMLChar.hpp>
using namespace xercesc;
#ifndef WIN32
inline bool Compare(const XMLCh * a, const XMLCh * b)
{
return XMLString::compareString(a, b) == 0;
}
#endif
// ---------------------------------------------------------------------------
class AnswerHolder {
public:
AnswerHolder(const XMLCh* ut, const XMLCh* im,
const XMLCh* gid, const XMLCh* cf,
GrammarManager & gm)
: utterance(ut), inputmode(im), grammarid(gid), confidence(cf),
grammarInfo(0), gramMgr(gm) { }
void SetInstance(DOMElement *is) { instanceDomList.push_back(is); }
void SetInstance(const xmlcharstring & is){ instanceStrList.push_back(is); }
bool operator<(const AnswerHolder & x)
{
switch(gramMgr.CompareGrammar(grammarInfo, x.grammarInfo)) {
case -1: return false;
default: return true;
}
}
bool operator>(const AnswerHolder & x)
{
return !operator<(x);
}
AnswerHolder & operator=(const AnswerHolder &x)
{
if (this != &x) {
utterance = x.utterance;
inputmode = x.inputmode;
grammarid = x.grammarid;
grammarInfo = x.grammarInfo;
confidence = x.confidence;
instanceDomList = x.instanceDomList;
instanceStrList = x.instanceStrList;
}
return *this;
}
xmlcharstring utterance;
const XMLCh* inputmode;
const XMLCh* grammarid;
const XMLCh* confidence;
unsigned long grammarInfo;
GrammarManager & gramMgr;
typedef std::list<DOMElement*> DOMLIST;
DOMLIST instanceDomList;
typedef std::list<xmlcharstring> STRLIST;
STRLIST instanceStrList;
};
typedef std::vector<AnswerHolder > ANSWERHOLDERVECTOR;
typedef std::map<VXIflt32, ANSWERHOLDERVECTOR, std::greater<VXIflt32> > ANSWERHOLDERMAP;
class AnswerParserErrorReporter : public ErrorHandler {
public:
AnswerParserErrorReporter() { }
~AnswerParserErrorReporter() { }
void warning(const SAXParseException& toCatch) { /* Ignore */ }
void fatalError(const SAXParseException& toCatch) { error(toCatch); }
void resetErrors() { }
void error(const SAXParseException & toCatch)
{ throw SAXParseException(toCatch); }
private:
AnswerParserErrorReporter(const AnswerParserErrorReporter &);
void operator=(const AnswerParserErrorReporter &);
};
// ---------------------------------------------------------------------------
AnswerParser::AnswerParser()
{
nlsmlParser = new XercesDOMParser();
if (nlsmlParser == NULL)
throw VXIException::OutOfMemory();
nlsmlParser->setValidationScheme( XercesDOMParser::Val_Never);
nlsmlParser->setDoNamespaces(false);
nlsmlParser->setDoSchema(false);
nlsmlParser->setValidationSchemaFullChecking(false);
nlsmlParser->setCreateEntityReferenceNodes(false);
// #pragma message("JC: Look into this")
// nlsmlParser->setToCreateXMLDeclTypeNode(true);
ErrorHandler *errReporter = new AnswerParserErrorReporter();
if (errReporter == NULL) {
delete nlsmlParser;
throw VXIException::OutOfMemory();
}
nlsmlParser->setErrorHandler(errReporter);
}
AnswerParser::~AnswerParser()
{
if (nlsmlParser != NULL) {
const ErrorHandler * reporter = nlsmlParser->getErrorHandler();
nlsmlParser->setErrorHandler(NULL);
delete reporter;
delete nlsmlParser;
nlsmlParser = NULL;
}
}
inline bool AnswerParser::IsAllWhiteSpace(const XMLCh* str)
{
if( !str ) return true;
while( *str != 0 ) {
if( *str != '\r' && *str != '\n' && *str != '\t' && *str != ' ')
return false;
++str;
}
return true;
}
int AnswerParser::Parse(SimpleLogger & log, AnswerTranslator & translator,
GrammarManager & gm, int maxnbest,
VXIContent * document, vxistring & grammar)
{
if (document == NULL) return -1;
// (1) Get document contents.
const VXIchar * docType;
const VXIbyte * docContent;
VXIulong docContentSize = 0;
if (VXIContentValue(document, &docType, &docContent, &docContentSize) !=
VXIvalue_RESULT_SUCCESS) return -1;
if (docContent == NULL || docContentSize == 0) return -1;
if (log.IsLogging(3)) {
VXIString *key = NULL, *value = NULL;
if (log.LogContent(docType, docContent, docContentSize,
&key, &value))
{
vxistring temp = VXIStringCStr(key);
temp += L": ";
temp += VXIStringCStr(value);
log.LogDiagnostic(3, temp.c_str());
}
}
// (2) Parse NLSML format.
vxistring type(docType);
if (type == VXIREC_MIMETYPE_XMLRESULT) {
try {
VXIcharToXMLCh membufURL(L"nlsml recognition result");
nlsmlParser->parse(MemBufInputSource(docContent, docContentSize,
membufURL.c_str(), false));
}
catch (const XMLException & exception) {
if (log.IsLogging(0)) {
log.StartDiagnostic(0) << L"AnswerParser::Parse - XML parsing "
L"error from DOM: " << XMLChToVXIchar(exception.getMessage());
log.EndDiagnostic();
}
return -3;
}
catch (const SAXParseException & exception) {
if (log.IsLogging(0)) {
log.StartDiagnostic(0) << L"AnswerParser::Parse - Parse error at line "
<< exception.getLineNumber()
<< L", column " << exception.getColumnNumber()
<< L" - "
<< XMLChToVXIchar(exception.getMessage());
log.EndDiagnostic();
}
return -3;
}
switch (ProcessNLSML(log, translator, gm, maxnbest, grammar)) {
case -1:
return -3;
case 0:
return 0;
case 1:
return 1;
case 2:
return 2;
default:
break;
}
}
// (3) Parse other types.
else {
return -2;
}
return 0;
}
// ---------------------------------------------------------------------------
// NLSML Parser
// ---------------------------------------------------------------------------
static const XMLCh NODE_RESULT[]
= { 'r','e','s','u','l','t','\0' };
static const XMLCh NODE_INTERPRETATION[]
= { 'i','n','t','e','r','p','r','e','t','a','t','i','o','n','\0' };
static const XMLCh NODE_INPUT[]
= { 'i','n','p','u','t','\0' };
static const XMLCh NODE_INSTANCE[]
= { 'i','n','s','t','a','n','c','e','\0' };
static const XMLCh NODE_NOINPUT[]
= { 'n','o','i','n','p','u','t','\0' };
static const XMLCh NODE_NOMATCH[]
= { 'n','o','m','a','t','c','h','\0' };
static const XMLCh ATTR_GRAMMAR[]
= { 'g','r','a','m','m','a','r','\0' };
static const XMLCh ATTR_CONFIDENCE[]
= { 'c','o','n','f','i','d','e','n','c','e','\0' };
static const XMLCh ATTR_MODE[]
= { 'm','o','d','e','\0' };
static const XMLCh DEFAULT_CONFIDENCE[]
= { '1','0','0','\0' };
static const XMLCh DEFAULT_MODE[]
= { 's','p','e','e','c','h','\0' };
// This function is used by the other NLSMLSetVars functions to set everything
// but the 'interpretation'.
//
void NLSMLSetSomeVars(vxistring variable, AnswerTranslator & translator,
const XMLCh * confidence, const XMLCh *utterance,
const XMLCh * mode)
{
// (1) Set the 'confidence'.
vxistring expr = variable + L".confidence = ";
expr += XMLChToVXIchar(confidence).c_str();
expr += L" / 100;";
translator.EvaluateExpression(expr);
// (2) Set the 'utterance'
translator.SetString(variable + L".utterance",
XMLChToVXIchar(utterance).c_str());
// (3) Set the 'inputmode'
if (Compare(mode, DEFAULT_MODE))
translator.SetString(variable + L".inputmode", L"voice");
else
translator.SetString(variable + L".inputmode",
XMLChToVXIchar(mode).c_str());
}
// This sets all of the application.lastresult$ array when the interpretation
// is a simple result.
//
bool NLSMLSetVars(AnswerTranslator & translator, int nbest,
const XMLCh * confidence, const XMLCh * utterance,
const XMLCh * mode, const XMLCh * interp)
{
std::basic_ostringstream<VXIchar> out;
out << L"application.lastresult$[" << nbest << L"]";
translator.EvaluateExpression(out.str() + L" = new Object();");
// Set everything but interpretation.
NLSMLSetSomeVars(out.str(), translator, confidence, utterance, mode);
// Set 'interpretation'.
translator.SetString(out.str() + L".interpretation",
XMLChToVXIchar(interp).c_str());
return true;
}
// This function recursively processes the data within an <instance>.
//
bool NLSMLSetInterp(AnswerTranslator & translator, SimpleLogger & log,
vxistring & path, const DOMNode * interp)
{
bool foundText = false;
// Look for text first.
DOMNode * temp = NULL;
vxistring textValue(L"");
for (temp = interp->getFirstChild(); temp != NULL;
temp = temp->getNextSibling())
{
if (temp->getNodeType() != DOMNode::TEXT_NODE &&
temp->getNodeType() != DOMNode::CDATA_SECTION_NODE ) continue;
foundText = true;
textValue += XMLChToVXIchar(temp->getNodeValue()).c_str();
}
if( foundText )
translator.SetString(path, textValue.c_str());
bool foundElement = false;
// Now try for elements.
for (temp = interp->getFirstChild(); temp != NULL;
temp = temp->getNextSibling())
{
if (temp->getNodeType() != DOMNode::ELEMENT_NODE) continue;
if (foundText) {
log.StartDiagnostic(0)
<< L"AnswerParser::ProcessNLSML - malformed <instance> at "
<< path << L"; mixed text and elements";
log.EndDiagnostic();
return false;
}
if (!foundElement) {
translator.EvaluateExpression(path + L" = new Object();");
foundElement = true;
}
// Although ECMAScript arrays are treated like properties, calling
// x.1 = 'val' is invalid. Instead, x[1] = 'val' must be used.
XMLChToVXIchar name(temp->getNodeName());
VXIchar first = *(name.c_str());
bool isArray = (first=='0' || first=='1' || first=='2' || first=='3' ||
first=='4' || first=='5' || first=='6' || first=='7' ||
first=='8' || first=='9');
vxistring newPath = path;
if (isArray)
newPath += L"[";
else
newPath += L".";
newPath += name.c_str();
if (isArray)
newPath += L"]";
NLSMLSetInterp(translator, log, newPath, temp);
}
if (!foundElement && !foundText) {
log.StartDiagnostic(0)
<< L"AnswerParser::ProcessNLSML - malformed <instance> at "
<< path << L"; no data found";
log.EndDiagnostic();
return false;
}
return true;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -