📄 documentparser.cpp
字号:
/****************License************************************************ * * Copyright 2000-2003. ScanSoft, Inc. * * Use of this software is subject to notices and obligations set forth * in the SpeechWorks Public License - Software Version 1.2 which is * included with this software. * * ScanSoft is a registered trademark of ScanSoft, Inc., and OpenSpeech, * SpeechWorks and the SpeechWorks logo are registered trademarks or * trademarks of SpeechWorks International, Inc. in the United States * and other countries. * ***********************************************************************/ #include "DocumentParser.hpp" #include "DocumentStorage.hpp" #include "CommonExceptions.hpp" #include "SimpleLogger.hpp" #include "VXML.h" // for attribute names #include "DocumentConverter.hpp" // for DocumentConverter #include "PropertyList.hpp" #include "XMLChConverter.hpp" #include "ValueLogging.hpp" // for VXIValue dumping #include "VXIinet.h" // Internal documents #include "Schema.hpp" // Xerces related #ifndef HAVE_XERCES #error Need Apache Xerces to build the VoiceXML interpreter #endif #include <util/PlatformUtils.hpp> #include <util/TransService.hpp> #include <sax2/XMLReaderFactory.hpp> #include <framework/MemBufInputSource.hpp> #include <sax/SAXParseException.hpp> // by DOMTreeErrorReporter #include <sax/EntityResolver.hpp> // by DTDResolver #include <sax/ErrorHandler.hpp> // by DOMTreeErrorReporter #include <validators/common/Grammar.hpp> using namespace xercesc; //############################################################################# // Utilities - these are specific to Xerces //############################################################################# class DOMTreeErrorReporter : public ErrorHandler { public: DOMTreeErrorReporter() { } ~DOMTreeErrorReporter() { } void warning(const SAXParseException& toCatch) { /* Ignore */ } void fatalError(const SAXParseException& toCatch) { error(toCatch); } void resetErrors() { } void error(const SAXParseException & toCatch) { throw SAXParseException(toCatch); } private: DOMTreeErrorReporter(const DOMTreeErrorReporter &); void operator=(const DOMTreeErrorReporter &); }; class DTDResolver : public EntityResolver { public: virtual ~DTDResolver() { } DTDResolver() { } virtual InputSource * resolveEntity(const XMLCh * const publicId, const XMLCh * const systemId) { if (Compare(publicId, L"SB_Defaults")) { VXIcharToXMLCh name(L"VXML Defaults DTD (for SB 1.0)"); return new MemBufInputSource(VALIDATOR_DATA + VALIDATOR_DEFAULTS_DTD, VALIDATOR_DEFAULTS_DTD_SIZE, name.c_str(), false); } if (Compare(systemId, L"http://www.w3.org/TR/voicexml20/vxml.xsd")) { VXIcharToXMLCh name(L"http://www.w3.org/TR/voicexml20/vxml.xsd (SB)"); return new MemBufInputSource(VALIDATOR_DATA + VALIDATOR_VXML, VALIDATOR_VXML_SIZE, name.c_str(), false); } if (Compare(systemId, L"vxml-datatypes.xsd")) { VXIcharToXMLCh name(L"vxml-datatypes.xsd (SB)"); return new MemBufInputSource(VALIDATOR_DATA + VALIDATOR_VXML_DATA, VALIDATOR_VXML_DATA_SIZE, name.c_str(), false); } if (Compare(systemId, L"vxml-attribs.xsd")) { VXIcharToXMLCh name(L"vxml-attribs.xsd (SB)"); return new MemBufInputSource(VALIDATOR_DATA + VALIDATOR_VXML_ATTR, VALIDATOR_VXML_ATTR_SIZE, name.c_str(), false); } if (Compare(systemId, L"vxml-grammar-extension.xsd")) { VXIcharToXMLCh name(L"vxml-grammar-extension.xsd (SB)"); return new MemBufInputSource(VALIDATOR_DATA + VALIDATOR_SRGF_EXTN, VALIDATOR_SRGF_EXTN_SIZE, name.c_str(), false); } if (Compare(systemId, L"vxml-grammar-restriction.xsd")) { VXIcharToXMLCh name(L"vxml-grammar-restriction.xsd (SB)"); return new MemBufInputSource(VALIDATOR_DATA + VALIDATOR_SRGF_RSTR, VALIDATOR_SRGF_RSTR_SIZE, name.c_str(), false); } if (Compare(systemId, L"vxml-synthesis-extension.xsd")) { VXIcharToXMLCh name(L"vxml-synthesis-extension.xsd (SB)"); return new MemBufInputSource(VALIDATOR_DATA + VALIDATOR_SSML_EXTN, VALIDATOR_SSML_EXTN_SIZE, name.c_str(), false); } if (Compare(systemId, L"vxml-synthesis-restriction.xsd")) { VXIcharToXMLCh name(L"vxml-synthesis-restriction.xsd (SB)"); return new MemBufInputSource(VALIDATOR_DATA + VALIDATOR_SSML_RSTR, VALIDATOR_SSML_RSTR_SIZE, name.c_str(), false); } if (Compare(systemId, L"synthesis-core.xsd")) { VXIcharToXMLCh name(L"synthesis-core.xsd (SB)"); return new MemBufInputSource(VALIDATOR_DATA + VALIDATOR_SSML_CORE, VALIDATOR_SSML_CORE_SIZE, name.c_str(), false); } if (Compare(systemId, L"grammar-core.xsd")) { VXIcharToXMLCh name(L"grammar-core.xsd (SB)"); return new MemBufInputSource(VALIDATOR_DATA + VALIDATOR_SRGF_CORE, VALIDATOR_SRGF_CORE_SIZE, name.c_str(), false); } if (Compare(systemId, L"http://www.w3.org/2001/xml.xsd")) { VXIcharToXMLCh name(L"http://www.w3.org/2001/xml.xsd (SB)"); return new MemBufInputSource(VALIDATOR_DATA + VALIDATOR_XML, VALIDATOR_XML_SIZE, name.c_str(), false); } if (Compare(systemId, L"XMLSchema.dtd")) { VXIcharToXMLCh name(L"XMLSchema.dtd (SB)"); return new MemBufInputSource(VALIDATOR_DATA + VALIDATOR_SCHEMA_DTD, VALIDATOR_SCHEMA_DTD_SIZE, name.c_str(), false); } if (Compare(systemId, L"datatypes.dtd")) { VXIcharToXMLCh name(L"datatypes.dtd (SB)"); return new MemBufInputSource(VALIDATOR_DATA + VALIDATOR_DATATYPE_DTD, VALIDATOR_DATATYPE_DTD_SIZE, name.c_str(), false); } if (Compare(publicId, L"-//W3C//DTD VOICEXML 2.0//EN") || Compare(systemId, L"http://www.w3.org/TR/voicexml20/vxml.dtd")) { VXIcharToXMLCh name(L"VXML DTD (SB)"); return new MemBufInputSource(VALIDATOR_DATA + VALIDATOR_VXML_DTD, VALIDATOR_VXML_DTD_SIZE, name.c_str(), false); } /* VXIcharToXMLCh name(L"VXML DTD (for SB 1.0)"); return new MemBufInputSource(VALIDATOR_DATA + VALIDATOR_VXML_DTD, VALIDATOR_VXML_DTD_SIZE, name.c_str(), false); */ return NULL; } }; //############################################################################# // Document Parser //############################################################################# // xerces crashes when on multi-thread app. that simultaneously load schema // grammar therefore use a global mutext to restrict access to only thread // at a time static VXItrdMutex* gblXMLGrammarMutex = NULL; bool DocumentParser::Initialize(unsigned int cacheSize) { try { XMLPlatformUtils::Initialize(); if (!VXMLDocumentModel::Initialize()) return false; DocumentConverter::Initialize(); VXItrdMutexCreate(&gblXMLGrammarMutex); } catch (const XMLException &) { return false; } DocumentStorageSingleton::Initialize(cacheSize); return true; } void DocumentParser::Deinitialize() { DocumentStorageSingleton::Deinitialize(); try { VXItrdMutexDestroy(&gblXMLGrammarMutex); DocumentConverter::Deinitialize(); VXMLDocumentModel::Deinitialize(); XMLPlatformUtils::Terminate(); } catch (const XMLException &) { // do nothing } } static void LockLoadGrammar(void) { if( gblXMLGrammarMutex ) VXItrdMutexLock(gblXMLGrammarMutex); } static void UnlockLoadGrammar(void) { if( gblXMLGrammarMutex ) VXItrdMutexUnlock(gblXMLGrammarMutex); } DocumentParser::DocumentParser() : parser(NULL), converter(NULL), loadedVXML20(false) { converter = new DocumentConverter(); if (converter == NULL) throw VXIException::OutOfMemory(); parser = XMLReaderFactory::createXMLReader(); if (parser == NULL) { delete converter; throw VXIException::OutOfMemory(); } DTDResolver * dtd = new DTDResolver(); if (dtd == NULL) { delete converter; delete parser; throw VXIException::OutOfMemory(); } parser->setEntityResolver(dtd); // These settings below should not change the Xerces defaults. Their // presence makes the defaults explicit. parser->setFeature(XMLUni::fgSAX2CoreNameSpaces, true); parser->setFeature(XMLUni::fgSAX2CoreValidation, true); parser->setFeature(XMLUni::fgXercesDynamic, false); parser->setFeature(XMLUni::fgXercesSchema, true); parser->setFeature(XMLUni::fgXercesSchemaFullChecking, true); parser->setFeature(XMLUni::fgSAX2CoreNameSpacePrefixes, false); ErrorHandler *errReporter = new DOMTreeErrorReporter(); parser->setErrorHandler(errReporter); parser->setContentHandler(converter); } DocumentParser::~DocumentParser() { if (parser != NULL) { const ErrorHandler * reporter = parser->getErrorHandler(); delete reporter; const EntityResolver * resolver = parser->getEntityResolver(); delete resolver; delete parser; delete converter; parser = NULL; } } //**************************************************************************** // FetchBuffer //**************************************************************************** // 1: Invalid parameter // 2: Unable to open URL // 3: Unable to read from URL int DocumentParser::FetchBuffer(const VXIchar * url, const VXIMapHolder & properties, VXIMapHolder & streamInfo, VXIinetInterface * inet, SimpleLogger & log, const VXIbyte * & result, VXIulong & read, vxistring & docURL) { if (log.IsLogging(2)) { log.StartDiagnostic(2) << L"DocumentParser::FetchBuffer(" << url << L", " << properties.GetValue() << L")"; log.EndDiagnostic(); } if (inet == NULL || url == NULL || wcslen(url) == 0) return 1; // (1) Open URL VXIinetStream * stream; // VXIMapHolder streamInfo; if (streamInfo.GetValue() == NULL) { return -1; } if (inet->Open(inet, L"vxi", url, INET_MODE_READ, 0, properties.GetValue(), streamInfo.GetValue(), &stream) != 0) { if (log.IsLogging(0)) { log.StartDiagnostic(0) << L"DocumentParser::FetchBuffer - could not " L"open URL: " << url; log.EndDiagnostic(); } return 2; } // (2) Determine document size & read into local memory buffer. const VXIValue * tempURL = NULL; tempURL = VXIMapGetProperty(streamInfo.GetValue(), INET_INFO_ABSOLUTE_NAME); if (tempURL == NULL || VXIValueGetType(tempURL) != VALUE_STRING) { inet->Close(inet, &stream); if (log.IsLogging(0)) { log.StartDiagnostic(0) << L"DocumentParser::FetchBuffer - could not " L"retrieve absolute path of document at URL: " << url; log.EndDiagnostic(); } return 2; } docURL = VXIStringCStr(reinterpret_cast<const VXIString *>(tempURL)); const VXIValue * tempSize = NULL; tempSize = VXIMapGetProperty(streamInfo.GetValue(), INET_INFO_SIZE_BYTES); if (tempSize == NULL || VXIValueGetType(tempSize) != VALUE_INTEGER) { inet->Close(inet, &stream); if (log.IsLogging(0)) { log.StartDiagnostic(0) << L"DocumentParser::FetchBuffer - could not " L"retrieve size of document at URL: " << url; log.EndDiagnostic(); } return 2; } VXIint32 bufSize = VXIIntegerValue(reinterpret_cast<const VXIInteger *>(tempSize));
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -