📄 documentconverter.cpp
字号:
/****************License************************************************
*
* Copyright 2000-2003. ScanSoft, Inc.
*
* Use of this software is subject to notices and obligations set forth
* in the SpeechWorks Public License - Software Version 1.2 which is
* included with this software.
*
* ScanSoft is a registered trademark of ScanSoft, Inc., and OpenSpeech,
* SpeechWorks and the SpeechWorks logo are registered trademarks or
* trademarks of SpeechWorks International, Inc. in the United States
* and other countries.
*
***********************************************************************/
/***********************************************************************
* This class performs several jobs. Among them:
*
* 1) <grammar> contents are collapsed and blindly copied.
*
* 2) Explicit and implicit prompts are converted into <prompt> elements with
* full SSML headers and trailers.
* NOTE: <value>, <enumerate>, and <audio> don't contribute to the copy
* depth as they require special handling.
*
* 3) PCDATA consisting only of whitespace is discarded.
*
* 4) <error>, <help>, <noinput> and <nomatch> are converted into a <catch>
* with the appropriate event name.
*
* 5) <choice> elements may be assigned dtmf values.
*
* 6) Attribute conflicts are detected whenever possible.
*
* 7) <metadata> is ignored (except in <grammar> or <prompt>).
*
* 8) Assign xml:base and xml:lang to <grammar>, <prompt>, <field>, and <menu>.
*
* NOTE: By performing these duties, the DocumentConverter greatly simplifies
* the rest of the interpreter. Unfortunately, this means that this
* code is tricky to write and maintain. PLEASE BE VERY CAREFUL THAT
* YOU UNDERSTAND WHAT IS HAPPENING HERE BEFORE MAKING ANY CHANGES.
*
***********************************************************************
*
* 1 Grammar processing
*
* The content of <grammar> elements is blindly copied. This is controlled
* by 'copyDepth' and 'inGrammar'. This is possible because VoiceXML
* grammars are entirely static. Reducing them down to PCDATA at this stage
* allows the interpreter to treat the entire grammar as a black box without
* regard to supported types.
*
***********************************************************************
*
* 2 Prompt processing
*
* A frequently noted defect of VoiceXML is that prompts may be both explicitly
* declared (i.e. within <prompt>) or implicitly declared as raw CDATA. In
* this code, we convert all implicit prompts into explicit ones and collapse
* most of the SSML into PCDATA. At the end, prompts are composed only of
* those elements requiring special handling (i.e. <audio>, <enumerate>, and
* <value>) and PCDATA.
*
***********************************************************************
*
* 3 Whitespace stripping
*
* Because of implicit prompts, the XML parser is required to treat most
* whitespace as significant information. These 'empty prompts' are stripped
* at this level to simplify later processing.
*
* Likewise, <choice> and <option> content is whitespace stripped (though this
* task must be performed by the VXMLDocumentRep since Xerces may split PCDATA
* across multiple calls to DocumentConverter::characters).
*
***********************************************************************
*
* 4 Event collapsing
*
* VoiceXML defines <error>, <help>, <noinput> and <nomatch> as aliases
* for <catch event="...">. These are converted at this layer (see
* startElement) to simplify later processing.
*
***********************************************************************
*
* 5 Menu choice processing
*
* DTMF values may be assigned to <choice>s if necessary.
*
***********************************************************************
*
* 6 Attribute conflict detection
*
* There are many cases in the VoiceXML specification where conflicting
* attributes result in error.badfetch. These should (in future) be
* detected by the XML schema and are caught at this level.
*
* One difficult case appears inside of <grammar> and <script> where either
* the 'src' attribute or content is legal, but not both. As grammars may
* not appear inside scripts and vice versa, this is detected with a simple
* scheme using the variables 'contentForbidden' and 'hasContent'.
*
***********************************************************************
*
* 7 <metadata> stripped
*
* This content has no runtime impact. It is stripped at this level
* UNLESS it is inside a <grammar> or <prompt> tag.
*
***********************************************************************
*
* 8 <prompt> & <grammar> always have xml:base and xml:lang attributes.
* <field>, <menu> always has xml:lang
*
* In an effort to keep these elements independent from the rest of VXML
* this attribute is always set. This allows the SRGS or SSML to
* incorporate an xml:base in the root element which might be useful if
* the contents are being sent to a seperate server.
*
* Likewise, a language is required for the type attribute on fields (used
* for builtins) and for generating the CDATA grammars in menu choices.
*
***********************************************************************/
#include <vxibuildopts.h>
#if P_VXI
#include "DocumentConverter.hpp"
#include "vxibuildopts.h"
#include <xercesc/sax2/Attributes.hpp>
#include <xercesc/sax/SAXParseException.hpp>
#include <xercesc/sax/Locator.hpp>
#include <vector>
#include <algorithm>
#include <sstream> // by ProcessNodeFinal
#include "VXMLDocumentRep.hpp"
#include "XMLChConverter.hpp"
#include <iostream>
//#############################################################################
#ifdef _MSC_VER
#pragma warning( disable:4062 )
#pragma warning( disable:4061 )
#endif
enum {
PRIV_ELEM_RangeStart = 0x200, // This should always be the first entry
// Events
PRIV_ELEM_ERROR,
PRIV_ELEM_HELP,
PRIV_ELEM_NOINPUT,
PRIV_ELEM_NOMATCH,
PRIV_ELEM_METADATA,
// SSML
PRIV_ELEM_BREAK,
PRIV_ELEM_DESC,
PRIV_ELEM_EMPHASIS,
PRIV_ELEM_MARK,
PRIV_ELEM_PARAGRAPH,
PRIV_ELEM_PHONEME,
PRIV_ELEM_PROSODY,
PRIV_ELEM_SAYAS,
PRIV_ELEM_SENTENCE,
PRIV_ELEM_SUB,
PRIV_ELEM_VOICE
};
enum {
PRIV_ATTRIB_RangeStart = 0x200, // This should always be the first entry
PRIV_ATTRIB_SCHEMALOC
};
struct VXMLElementInfo {
public:
const VXIchar * key;
int value;
VXMLElementInfo(const VXIchar * k, int v)
: key(k), value(v) { }
VXMLElementInfo(const VXMLElementInfo & k)
: key(k.key), value(k.value) { }
VXMLElementInfo& operator=(const VXMLElementInfo & k)
{ key = k.key; value = k.value; return *this; }
};
bool operator< (const VXMLElementInfo & x, const VXMLElementInfo & y)
{ if (x.key == NULL || y.key == NULL) return false;
return wcscmp(x.key, y.key) < 0; }
struct VXMLAttribute {
public:
const VXIchar * key;
int value;
VXMLAttribute(const VXIchar * k, int v)
: key(k), value(v) { }
VXMLAttribute(const VXMLAttribute & k)
: key(k.key), value(k.value) { }
VXMLAttribute& operator=(const VXMLAttribute & k)
{ key = k.key; value = k.value; return *this; }
};
bool operator< (const VXMLAttribute & x, const VXMLAttribute & y)
{ if (x.key == NULL || y.key == NULL) return false;
return wcscmp(x.key, y.key) < 0; }
typedef std::vector<VXMLAttribute> TABLE_ATTRS;
typedef std::vector<VXMLElementInfo> TABLE_ELEMS;
TABLE_ATTRS attrs;
TABLE_ELEMS elems;
//#############################################################################
static void InitializeTables()
{
// (1) Elements
// (1.1) VXML
elems.push_back(VXMLElementInfo(L"assign" , NODE_ASSIGN));
elems.push_back(VXMLElementInfo(L"audio" , NODE_AUDIO));
elems.push_back(VXMLElementInfo(L"block" , NODE_BLOCK));
elems.push_back(VXMLElementInfo(L"cancel" , NODE_CANCEL));
elems.push_back(VXMLElementInfo(L"catch" , NODE_CATCH));
elems.push_back(VXMLElementInfo(L"choice" , NODE_CHOICE));
elems.push_back(VXMLElementInfo(L"clear" , NODE_CLEAR));
elems.push_back(VXMLElementInfo(L"disconnect" , NODE_DISCONNECT));
elems.push_back(VXMLElementInfo(L"else" , NODE_ELSE));
elems.push_back(VXMLElementInfo(L"elseif" , NODE_ELSEIF));
elems.push_back(VXMLElementInfo(L"enumerate" , NODE_ENUMERATE));
elems.push_back(VXMLElementInfo(L"exit" , NODE_EXIT));
elems.push_back(VXMLElementInfo(L"field" , NODE_FIELD));
elems.push_back(VXMLElementInfo(L"filled" , NODE_FILLED));
elems.push_back(VXMLElementInfo(L"form" , NODE_FORM));
elems.push_back(VXMLElementInfo(L"goto" , NODE_GOTO));
elems.push_back(VXMLElementInfo(L"grammar" , NODE_GRAMMAR));
elems.push_back(VXMLElementInfo(L"if" , NODE_IF));
elems.push_back(VXMLElementInfo(L"initial" , NODE_INITIAL));
elems.push_back(VXMLElementInfo(L"link" , NODE_LINK));
elems.push_back(VXMLElementInfo(L"log" , NODE_LOG));
elems.push_back(VXMLElementInfo(L"menu" , NODE_MENU));
elems.push_back(VXMLElementInfo(L"meta" , NODE_META));
elems.push_back(VXMLElementInfo(L"object" , NODE_OBJECT));
elems.push_back(VXMLElementInfo(L"option" , NODE_OPTION));
elems.push_back(VXMLElementInfo(L"param" , NODE_PARAM));
elems.push_back(VXMLElementInfo(L"prompt" , NODE_PROMPT));
elems.push_back(VXMLElementInfo(L"property" , NODE_PROPERTY));
elems.push_back(VXMLElementInfo(L"record" , NODE_RECORD));
elems.push_back(VXMLElementInfo(L"return" , NODE_RETURN));
elems.push_back(VXMLElementInfo(L"reprompt" , NODE_REPROMPT));
elems.push_back(VXMLElementInfo(L"script" , NODE_SCRIPT));
elems.push_back(VXMLElementInfo(L"subdialog" , NODE_SUBDIALOG));
elems.push_back(VXMLElementInfo(L"submit" , NODE_SUBMIT));
elems.push_back(VXMLElementInfo(L"throw" , NODE_THROW));
elems.push_back(VXMLElementInfo(L"transfer" , NODE_TRANSFER));
elems.push_back(VXMLElementInfo(L"value" , NODE_VALUE));
elems.push_back(VXMLElementInfo(L"var" , NODE_VAR));
elems.push_back(VXMLElementInfo(L"vxml" , NODE_VXML));
// (1.2) from Defaults document
elems.push_back(VXMLElementInfo(L"defaults" , DEFAULTS_ROOT));
elems.push_back(VXMLElementInfo(L"language" , DEFAULTS_LANGUAGE));
// (1.3) Internals elements (these are converted to others)
elems.push_back(VXMLElementInfo(L"error" , PRIV_ELEM_ERROR));
elems.push_back(VXMLElementInfo(L"help" , PRIV_ELEM_HELP));
elems.push_back(VXMLElementInfo(L"noinput" , PRIV_ELEM_NOINPUT));
elems.push_back(VXMLElementInfo(L"nomatch" , PRIV_ELEM_NOMATCH));
elems.push_back(VXMLElementInfo(L"metadata" , PRIV_ELEM_METADATA));
// (1.4) SSML
elems.push_back(VXMLElementInfo(L"break" , PRIV_ELEM_BREAK));
elems.push_back(VXMLElementInfo(L"desc" , PRIV_ELEM_DESC));
elems.push_back(VXMLElementInfo(L"emphasis" , PRIV_ELEM_EMPHASIS));
elems.push_back(VXMLElementInfo(L"mark" , PRIV_ELEM_MARK));
elems.push_back(VXMLElementInfo(L"p" , PRIV_ELEM_PARAGRAPH));
elems.push_back(VXMLElementInfo(L"paragraph" , PRIV_ELEM_PARAGRAPH));
elems.push_back(VXMLElementInfo(L"phoneme" , PRIV_ELEM_PHONEME));
elems.push_back(VXMLElementInfo(L"prosody" , PRIV_ELEM_PROSODY));
elems.push_back(VXMLElementInfo(L"s" , PRIV_ELEM_SENTENCE));
elems.push_back(VXMLElementInfo(L"say-as" , PRIV_ELEM_SAYAS));
elems.push_back(VXMLElementInfo(L"sentence" , PRIV_ELEM_SENTENCE));
elems.push_back(VXMLElementInfo(L"sub" , PRIV_ELEM_SUB));
elems.push_back(VXMLElementInfo(L"voice" , PRIV_ELEM_VOICE));
// (2) Element attributes
// (2.1) VXML 1.0
attrs.push_back(VXMLAttribute(L"_itemname" , ATTRIBUTE__ITEMNAME));
attrs.push_back(VXMLAttribute(L"aai" , ATTRIBUTE_AAI));
attrs.push_back(VXMLAttribute(L"aaiexpr" , ATTRIBUTE_AAIEXPR));
attrs.push_back(VXMLAttribute(L"accept" , ATTRIBUTE_ACCEPT));
attrs.push_back(VXMLAttribute(L"application" , ATTRIBUTE_APPLICATION));
attrs.push_back(VXMLAttribute(L"archive" , ATTRIBUTE_ARCHIVE));
attrs.push_back(VXMLAttribute(L"bargein" , ATTRIBUTE_BARGEIN));
attrs.push_back(VXMLAttribute(L"bargeintype" , ATTRIBUTE_BARGEINTYPE));
attrs.push_back(VXMLAttribute(L"base" , ATTRIBUTE_BASE));
attrs.push_back(VXMLAttribute(L"beep" , ATTRIBUTE_BEEP));
attrs.push_back(VXMLAttribute(L"bridge" , ATTRIBUTE_BRIDGE));
attrs.push_back(VXMLAttribute(L"charset" , ATTRIBUTE_CHARSET));
attrs.push_back(VXMLAttribute(L"classid" , ATTRIBUTE_CLASSID));
attrs.push_back(VXMLAttribute(L"codebase" , ATTRIBUTE_CODEBASE));
attrs.push_back(VXMLAttribute(L"codetype" , ATTRIBUTE_CODETYPE));
attrs.push_back(VXMLAttribute(L"cond" , ATTRIBUTE_COND));
attrs.push_back(VXMLAttribute(L"connecttimeout", ATTRIBUTE_CONNECTTIME));
attrs.push_back(VXMLAttribute(L"content" , ATTRIBUTE_CONTENT));
attrs.push_back(VXMLAttribute(L"count" , ATTRIBUTE_COUNT));
attrs.push_back(VXMLAttribute(L"data" , ATTRIBUTE_DATA));
attrs.push_back(VXMLAttribute(L"dest" , ATTRIBUTE_DEST));
attrs.push_back(VXMLAttribute(L"destexpr" , ATTRIBUTE_DESTEXPR));
attrs.push_back(VXMLAttribute(L"dtmf" , ATTRIBUTE_DTMF));
attrs.push_back(VXMLAttribute(L"dtmfterm" , ATTRIBUTE_DTMFTERM));
attrs.push_back(VXMLAttribute(L"enctype" , ATTRIBUTE_ENCTYPE));
attrs.push_back(VXMLAttribute(L"event" , ATTRIBUTE_EVENT));
attrs.push_back(VXMLAttribute(L"eventexpr" , ATTRIBUTE_EVENTEXPR));
attrs.push_back(VXMLAttribute(L"expr" , ATTRIBUTE_EXPR));
attrs.push_back(VXMLAttribute(L"expritem" , ATTRIBUTE_EXPRITEM));
attrs.push_back(VXMLAttribute(L"fetchaudio" , ATTRIBUTE_FETCHAUDIO));
attrs.push_back(VXMLAttribute(L"fetchhint" , ATTRIBUTE_FETCHHINT));
attrs.push_back(VXMLAttribute(L"fetchtimeout" , ATTRIBUTE_FETCHTIMEOUT));
attrs.push_back(VXMLAttribute(L"finalsilence" , ATTRIBUTE_FINALSILENCE));
attrs.push_back(VXMLAttribute(L"http-equiv" , ATTRIBUTE_HTTP_EQUIV));
attrs.push_back(VXMLAttribute(L"id" , ATTRIBUTE_ID));
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -