📄 answerparser.cpp
字号:
/****************License************************************************ * * Copyright 2000-2003. ScanSoft, Inc. * * Use of this software is subject to notices and obligations set forth * in the SpeechWorks Public License - Software Version 1.2 which is * included with this software. * * ScanSoft is a registered trademark of ScanSoft, Inc., and OpenSpeech, * SpeechWorks and the SpeechWorks logo are registered trademarks or * trademarks of SpeechWorks International, Inc. in the United States * and other countries. * ***********************************************************************/ #include "GrammarManager.hpp" #include "AnswerParser.hpp" #include "CommonExceptions.hpp" #include "SimpleLogger.hpp" #include "VXIrec.h" // for a single type name (eliminate?) #include "XMLChConverter.hpp" // for xmlcharstring #include <string> #include <sstream> #include <map> #include <list> #include <vector> #include <framework/MemBufInputSource.hpp> #include <sax/ErrorHandler.hpp> // by XMLErrorReporter #include <dom/DOM.hpp> #include <sax/SAXParseException.hpp> #include <util/XMLChar.hpp> using namespace xercesc; #ifndef WIN32 inline bool Compare(const XMLCh * a, const XMLCh * b) { return XMLString::compareString(a, b) == 0; } #endif // --------------------------------------------------------------------------- class AnswerHolder { public: AnswerHolder(const XMLCh* ut, const XMLCh* im, const XMLCh* gid, const XMLCh* cf, GrammarManager & gm) : utterance(ut), inputmode(im), grammarid(gid), confidence(cf), grammarInfo(0), gramMgr(gm) { } void SetInstance(DOMElement *is) { instanceDomList.push_back(is); } void SetInstance(const xmlcharstring & is){ instanceStrList.push_back(is); } bool operator<(const AnswerHolder & x) { switch(gramMgr.CompareGrammar(grammarInfo, x.grammarInfo)) { case -1: return false; default: return true; } } bool operator>(const AnswerHolder & x) { return !operator<(x); } AnswerHolder & operator=(const AnswerHolder &x) { if (this != &x) { utterance = x.utterance; inputmode = x.inputmode; grammarid = x.grammarid; grammarInfo = x.grammarInfo; confidence = x.confidence; instanceDomList = x.instanceDomList; instanceStrList = x.instanceStrList; } return *this; } xmlcharstring utterance; const XMLCh* inputmode; const XMLCh* grammarid; const XMLCh* confidence; unsigned long grammarInfo; GrammarManager & gramMgr; typedef std::list<DOMElement*> DOMLIST; DOMLIST instanceDomList; typedef std::list<xmlcharstring> STRLIST; STRLIST instanceStrList; }; typedef std::vector<AnswerHolder > ANSWERHOLDERVECTOR; typedef std::map<VXIflt32, ANSWERHOLDERVECTOR, std::greater<VXIflt32> > ANSWERHOLDERMAP; class AnswerParserErrorReporter : public ErrorHandler { public: AnswerParserErrorReporter() { } ~AnswerParserErrorReporter() { } void warning(const SAXParseException& toCatch) { /* Ignore */ } void fatalError(const SAXParseException& toCatch) { error(toCatch); } void resetErrors() { } void error(const SAXParseException & toCatch) { throw SAXParseException(toCatch); } private: AnswerParserErrorReporter(const AnswerParserErrorReporter &); void operator=(const AnswerParserErrorReporter &); }; // --------------------------------------------------------------------------- AnswerParser::AnswerParser() { nlsmlParser = new XercesDOMParser(); if (nlsmlParser == NULL) throw VXIException::OutOfMemory(); nlsmlParser->setValidationScheme( XercesDOMParser::Val_Never); nlsmlParser->setDoNamespaces(false); nlsmlParser->setDoSchema(false); nlsmlParser->setValidationSchemaFullChecking(false); nlsmlParser->setCreateEntityReferenceNodes(false); #pragma message("JC: Look into this") // nlsmlParser->setToCreateXMLDeclTypeNode(true); ErrorHandler *errReporter = new AnswerParserErrorReporter(); if (errReporter == NULL) { delete nlsmlParser; throw VXIException::OutOfMemory(); } nlsmlParser->setErrorHandler(errReporter); } AnswerParser::~AnswerParser() { if (nlsmlParser != NULL) { const ErrorHandler * reporter = nlsmlParser->getErrorHandler(); nlsmlParser->setErrorHandler(NULL); delete reporter; delete nlsmlParser; nlsmlParser = NULL; } } inline bool AnswerParser::IsAllWhiteSpace(const XMLCh* str) { if( !str ) return true; while( *str != 0 ) { if( *str != '\r' && *str != '\n' && *str != '\t' && *str != ' ') return false; ++str; } return true; } int AnswerParser::Parse(SimpleLogger & log, AnswerTranslator & translator, GrammarManager & gm, int maxnbest, VXIContent * document, vxistring & grammar) { if (document == NULL) return -1; // (1) Get document contents. const VXIchar * docType; const VXIbyte * docContent; VXIulong docContentSize = 0; if (VXIContentValue(document, &docType, &docContent, &docContentSize) != VXIvalue_RESULT_SUCCESS) return -1; if (docContent == NULL || docContentSize == 0) return -1; if (log.IsLogging(3)) { VXIString *key = NULL, *value = NULL; if (log.LogContent(docType, docContent, docContentSize, &key, &value)) { vxistring temp = VXIStringCStr(key); temp += L": "; temp += VXIStringCStr(value); log.LogDiagnostic(3, temp.c_str()); } } // (2) Parse NLSML format. vxistring type(docType); if (type == VXIREC_MIMETYPE_XMLRESULT) { try { VXIcharToXMLCh membufURL(L"nlsml recognition result"); nlsmlParser->parse(MemBufInputSource(docContent, docContentSize, membufURL.c_str(), false)); } catch (const XMLException & exception) { if (log.IsLogging(0)) { log.StartDiagnostic(0) << L"AnswerParser::Parse - XML parsing " L"error from DOM: " << XMLChToVXIchar(exception.getMessage()); log.EndDiagnostic(); } return -3; } catch (const SAXParseException & exception) { if (log.IsLogging(0)) { log.StartDiagnostic(0) << L"AnswerParser::Parse - Parse error at line " << exception.getLineNumber() << L", column " << exception.getColumnNumber() << L" - " << XMLChToVXIchar(exception.getMessage()); log.EndDiagnostic(); } return -3; } switch (ProcessNLSML(log, translator, gm, maxnbest, grammar)) { case -1: return -3; case 0: return 0; case 1: return 1; case 2: return 2; default: break; } } // (3) Parse other types. else { return -2; } return 0; } // --------------------------------------------------------------------------- // NLSML Parser // --------------------------------------------------------------------------- static const XMLCh NODE_RESULT[] = { 'r','e','s','u','l','t','\0' }; static const XMLCh NODE_INTERPRETATION[] = { 'i','n','t','e','r','p','r','e','t','a','t','i','o','n','\0' }; static const XMLCh NODE_INPUT[] = { 'i','n','p','u','t','\0' }; static const XMLCh NODE_INSTANCE[] = { 'i','n','s','t','a','n','c','e','\0' }; static const XMLCh NODE_NOINPUT[] = { 'n','o','i','n','p','u','t','\0' }; static const XMLCh NODE_NOMATCH[] = { 'n','o','m','a','t','c','h','\0' }; static const XMLCh ATTR_GRAMMAR[] = { 'g','r','a','m','m','a','r','\0' }; static const XMLCh ATTR_CONFIDENCE[] = { 'c','o','n','f','i','d','e','n','c','e','\0' }; static const XMLCh ATTR_MODE[] = { 'm','o','d','e','\0' }; static const XMLCh DEFAULT_CONFIDENCE[] = { '1','0','0','\0' }; static const XMLCh DEFAULT_MODE[] = { 's','p','e','e','c','h','\0' }; // This function is used by the other NLSMLSetVars functions to set everything // but the 'interpretation'. // void NLSMLSetSomeVars(vxistring variable, AnswerTranslator & translator, const XMLCh * confidence, const XMLCh *utterance, const XMLCh * mode) { // (1) Set the 'confidence'. vxistring expr = variable + L".confidence = "; expr += XMLChToVXIchar(confidence).c_str(); expr += L" / 100;"; translator.EvaluateExpression(expr); // (2) Set the 'utterance' translator.SetString(variable + L".utterance", XMLChToVXIchar(utterance).c_str()); // (3) Set the 'inputmode' if (Compare(mode, DEFAULT_MODE)) translator.SetString(variable + L".inputmode", L"voice"); else translator.SetString(variable + L".inputmode", XMLChToVXIchar(mode).c_str()); } // This sets all of the application.lastresult$ array when the interpretation // is a simple result. // bool NLSMLSetVars(AnswerTranslator & translator, int nbest, const XMLCh * confidence, const XMLCh * utterance, const XMLCh * mode, const XMLCh * interp) { std::basic_ostringstream<VXIchar> out; out << L"application.lastresult$[" << nbest << L"]"; translator.EvaluateExpression(out.str() + L" = new Object();"); // Set everything but interpretation. NLSMLSetSomeVars(out.str(), translator, confidence, utterance, mode); // Set 'interpretation'. translator.SetString(out.str() + L".interpretation", XMLChToVXIchar(interp).c_str()); return true; } // This function recursively processes the data within an <instance>. // bool NLSMLSetInterp(AnswerTranslator & translator, SimpleLogger & log, vxistring & path, const DOMNode * interp) { bool foundText = false; // Look for text first. DOMNode * temp = NULL; vxistring textValue(L""); for (temp = interp->getFirstChild(); temp != NULL; temp = temp->getNextSibling()) { if (temp->getNodeType() != DOMNode::TEXT_NODE && temp->getNodeType() != DOMNode::CDATA_SECTION_NODE ) continue; foundText = true; textValue += XMLChToVXIchar(temp->getNodeValue()).c_str(); } if( foundText ) translator.SetString(path, textValue.c_str()); bool foundElement = false; // Now try for elements. for (temp = interp->getFirstChild(); temp != NULL; temp = temp->getNextSibling()) { if (temp->getNodeType() != DOMNode::ELEMENT_NODE) continue; if (foundText) { log.StartDiagnostic(0) << L"AnswerParser::ProcessNLSML - malformed <instance> at " << path << L"; mixed text and elements"; log.EndDiagnostic(); return false; } if (!foundElement) { translator.EvaluateExpression(path + L" = new Object();"); foundElement = true; } // Although ECMAScript arrays are treated like properties, calling // x.1 = 'val' is invalid. Instead, x[1] = 'val' must be used. XMLChToVXIchar name(temp->getNodeName()); VXIchar first = *(name.c_str()); bool isArray = (first=='0' || first=='1' || first=='2' || first=='3' || first=='4' || first=='5' || first=='6' || first=='7' || first=='8' || first=='9'); vxistring newPath = path; if (isArray) newPath += L"["; else newPath += L"."; newPath += name.c_str(); if (isArray) newPath += L"]"; NLSMLSetInterp(translator, log, newPath, temp); } if (!foundElement && !foundText) { log.StartDiagnostic(0) << L"AnswerParser::ProcessNLSML - malformed <instance> at " << path << L"; no data found"; log.EndDiagnostic(); return false; } return true; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -