📄 adsapien.pas
字号:
(***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is TurboPower Async Professional
*
* The Initial Developer of the Original Code is
* TurboPower Software
*
* Portions created by the Initial Developer are Copyright (C) 1991-2002
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
*
* ***** END LICENSE BLOCK ***** *)
{*********************************************************}
{* ADSAPIEN.PAS 4.06 *}
{*********************************************************}
{* TApdSAPIEngine component *}
{*********************************************************}
{
provides Text-to-Speech and Speech-to-Text using SAPI 4. SAPI 4 is available
for Win9x/ME/2K/XP. SAPI 5 is a newer, more flexible API, but it was not
functional when APRO 4 was released. You'll probably need to install SAPI 4
since it is not installed by default.
The SAPI4 SDK can be downloaded from
http://www.microsoft.com/downloads/release.asp?ReleaseID=26299. The
Spchapi.exe download from that site is 848K, and contains just the
supporting API, it does not contain any speech recognition or synthesis
engines. The SAPI4SDK.exe (8,023k) and SAPI4SDKSuite.exe (40,001K)
downloads contain the API binaries as well as recognition/systhesis engines
and the SDK examples and help. The MS-supplied speech recognition and
speech synthesis engines only support English, but other third-party
providers have engines tailored for different languages. There are several
third-party speech engine providers that can be used with the Speech API,
here are a few links:
http://www.lhsl.com/default2.htm
http://www.att.com/aspg/
http://www.lucent.com/products/solution/0,,CTID+2002-STID+10054-SOID+851-LOCL+1,00.html
http://www.dragonsys.com/products/dev_main.html
http://www-4.ibm.com/software/speech/
A www.google.com search for "speech engine" came up with a bunch of
other sources also.
}
{Global defines potentially affecting this unit}
{$I AWDEFINE.INC}
unit AdSapiEn;
interface
uses
Windows,
ActiveX,
Classes,
Graphics,
{$IFDEF Delphi5}
OleServer,
{$ENDIF}
OleCtrls,
StdVCL,
SysUtils,
Controls,
OoMisc,
AdISapi,
AdSapiGr,
{$IFDEF Delphi5}
Contnrs,
{$ENDIF}
Messages,
Forms,
Dialogs,
AdExcept,
ComObj;
const
{ Constants from the Speech API }
{ TTS Character Sets }
ApdCHARSET_TEXT = $00;
ApdCHARSET_IPAPHONETIC = $01;
ApdCHARSET_ENGINEPHONETIC = $02;
{ TTS Options }
ApdTTSDATAFLAG_TAGGED = $01;
{ TTS Ages }
ApdTTSAGE_BABY = 1;
ApdTTSAGE_TODDLER = 3;
ApdTTSAGE_CHILD = 6;
ApdTTSAGE_ADOLESCENT = 14;
ApdTTSAGE_ADULT = 30;
ApdTTSAGE_ELDERLY = 70;
{ TTS Features }
ApdTTSFEATURE_ANYWORD = $00001;
ApdTTSFEATURE_VOLUME = $00002;
ApdTTSFEATURE_SPEED = $00004;
ApdTTSFEATURE_PITCH = $00008;
ApdTTSFEATURE_TAGGED = $00010;
ApdTTSFEATURE_IPAUNICODE = $00020;
ApdTTSFEATURE_VISUAL = $00040;
ApdTTSFEATURE_WORDPOSITION = $00080;
ApdTTSFEATURE_PCOPTIMIZED = $00100;
ApdTTSFEATURE_PHONEOPTIMIZED = $00200;
ApdTTSFEATURE_FIXEDAUDIO = $00400;
ApdTTSFEATURE_SINGLEINSTANCE = $00800;
ApdTTSFEATURE_THREADSAFE = $01000;
ApdTTSFEATURE_IPATEXTDATA = $02000;
ApdTTSFEATURE_PREFERRED = $04000;
ApdTTSFEATURE_TRANSPLANTED = $08000;
ApdTTSFEATURE_SAPI4 = $10000;
{ TTS Genders }
ApdGENDER_NEUTRAL = 0;
ApdGENDER_FEMALE = 1;
ApdGENDER_MALE = 2;
{ TTS Interfaces }
ApdTTSI_ILEXPRONOUNCE = $01;
ApdTTSI_ITTSATTRIBUTES = $02;
ApdTTSI_ITTSCENTRAL = $04;
ApdTTSI_ITTSDIALOGS = $08;
ApdTTSI_ATTRIBUTES = $10;
ApdTTSI_IATTRIBUTES = $10;
ApdTTSI_ILEXPRONOUNCE2 = $20;
{ SR Features }
ApdSRFEATURE_INDEPSPEAKER = $00001;
ApdSRFEATURE_INDEPMICROPHONE = $00002;
ApdSRFEATURE_TRAINWORD = $00004;
ApdSRFEATURE_TRAINPHONETIC = $00008;
ApdSRFEATURE_WILDCARD = $00010;
ApdSRFEATURE_ANYWORD = $00020;
ApdSRFEATURE_PCOPTIMIZED = $00040;
ApdSRFEATURE_PHONEOPTIMIZED = $00080;
ApdSRFEATURE_GRAMLIST = $00100;
ApdSRFEATURE_GRAMLINK = $00200;
ApdSRFEATURE_MULTILINGUAL = $00400;
ApdSRFEATURE_GRAMRECURSIVE = $00800;
ApdSRFEATURE_IPAUNICODE = $01000;
ApdSRFEATURE_SINGLEINSTANCE = $02000;
ApdSRFEATURE_THREADSAFE = $04000;
ApdSRFEATURE_FIXEDAUDIO = $08000;
ApdSRFEATURE_IPAWORD = $10000;
ApdSRFEATURE_SAPI4 = $20000;
{ SR Supported grammar types }
ApdSRGRAM_CFG = $1;
ApdSRGRAM_DICTATION = $2;
ApdSRGRAM_LIMITEDDOMAIN = $4;
{ SR Interfaces }
ApdSRI_ILEXPRONOUNCE = $0000001;
ApdSRI_ISRATTRIBUTES = $0000002;
ApdSRI_ISRCENTRAL = $0000004;
ApdSRI_ISRDIALOGS = $0000008;
ApdSRI_ISRGRAMCOMMON = $0000010;
ApdSRI_ISRGRAMCFG = $0000020;
ApdSRI_ISRGRAMDICTATION = $0000040;
ApdSRI_ISRGRAMINSERTIONGUI = $0000080;
ApdSRI_ISRESBASIC = $0000100;
ApdSRI_ISRESMERGE = $0000200;
ApdSRI_ISRESAUDIO = $0000400;
ApdSRI_ISRESCORRECTION = $0000800;
ApdSRI_ISRESEVAL = $0001000;
ApdSRI_ISRESGRAPH = $0002000;
ApdSRI_ISRESMEMORY = $0004000;
ApdSRI_ISRESMODIFYGUI = $0008000;
ApdSRI_ISRESSPEAKER = $0010000;
ApdSRI_ISRSPEAKER = $0020000;
ApdSRI_ISRESSCORES = $0040000;
ApdSRI_ISRESAUDIOEX = $0080000;
ApdSRI_ISRGRAMLEXPRON = $0100000;
ApdSRI_ISRRESGRAPHEX = $0200000;
ApdSRI_ILEXPRONOUNCE2 = $0400000;
ApdSRI_IATTRIBUTES = $0800000;
ApdSRI_ISRSPEAKER2 = $1000000;
ApdSRI_ISRDIALOGS2 = $2000000;
{ SR Sequences }
ApdSRSEQUENCE_DISCRETE = 0;
ApdSRSEQUENCE_CONTINUOUS = 1;
ApdSRSEQUENCE_WORDSPOT = 2;
ApdSRSEQUENCE_CONTCFGDISCDICT = 3;
{ SR Interference Types }
ApdSRMSGINT_NOISE = $0001;
ApdSRMSGINT_NOSIGNAL = $0002;
ApdSRMSGINT_TOOLOUD = $0003;
ApdSRMSGINT_TOOQUIET = $0004;
ApdSRMSGINT_AUDIODATA_STOPPED = $0005;
ApdSRMSGINT_AUDIODATA_STARTED = $0006;
ApdSRMSGINT_IAUDIO_STARTED = $0007;
ApdSRMSGINT_IAUDIO_STOPPED = $0008;
{ SR Training Requests }
ApdSRGNSTRAIN_GENERAL = $01;
ApdSRGNSTRAIN_GRAMMAR = $02;
ApdSRGNSTRAIN_MICROPHONE = $04;
{ SS Error codes }
ApdTTSERR_NONE = $00000000;
ApdTTSERR_INVALIDINTERFACE = $80004002;
ApdTTSERR_OUTOFDISK = $80040205;
ApdTTSERR_NOTSUPPORTED = $80004001;
ApdTTSERR_VALUEOUTOFRANGE = $8000FFFF;
ApdTTSERR_INVALIDWINDOW = $8004000F;
ApdTTSERR_INVALIDPARAM = $80070057;
ApdTTSERR_INVALIDMODE = $80040206;
ApdTTSERR_INVALIDKEY = $80040209;
ApdTTSERR_WAVEFORMATNOTSUPPORTED = $80040202;
ApdTTSERR_INVALIDCHAR = $80040208;
ApdTTSERR_QUEUEFULL = $8004020A;
ApdTTSERR_WAVEDEVICEBUSY = $80040203;
ApdTTSERR_NOTPAUSED = $80040501;
ApdTTSERR_ALREADYPAUSED = $80040502;
{ SR Error Codes }
ApdSRERR_NONE = $00000000;
ApdSRERR_OUTOFDISK = $80040205;
ApdSRERR_NOTSUPPORTED = $80004001;
ApdSRERR_NOTENOUGHDATA = $80040201;
ApdSRERR_VALUEOUTOFRANGE = $8000FFFF;
ApdSRERR_GRAMMARTOOCOMPLEX = $80040406;
ApdSRERR_GRAMMARWRONGTYPE = $80040407;
ApdSRERR_INVALIDWINDOW = $8004000F;
ApdSRERR_INVALIDPARAM = $80070057;
ApdSRERR_INVALIDMODE = $80040206;
ApdSRERR_TOOMANYGRAMMARS = $8004040B;
ApdSRERR_INVALIDLIST = $80040207;
ApdSRERR_WAVEDEVICEBUSY = $80040203;
ApdSRERR_WAVEFORMATNOTSUPPORTED = $80040202;
ApdSRERR_INVALIDCHAR = $80040208;
ApdSRERR_GRAMTOOCOMPLEX = $80040406;
ApdSRERR_GRAMTOOLARGE = $80040411;
ApdSRERR_INVALIDINTERFACE = $80004002;
ApdSRERR_INVALIDKEY = $80040209;
ApdSRERR_INVALIDFLAG = $80040204;
ApdSRERR_GRAMMARERROR = $80040416;
ApdSRERR_INVALIDRULE = $80040417;
ApdSRERR_RULEALREADYACTIVE = $80040418;
ApdSRERR_RULENOTACTIVE = $80040419;
ApdSRERR_NOUSERSELECTED = $8004041A;
ApdSRERR_BAD_PRONUNCIATION = $8004041B;
ApdSRERR_DATAFILEERROR = $8004041C;
ApdSRERR_GRAMMARALREADYACTIVE = $8004041D;
ApdSRERR_GRAMMARNOTACTIVE = $8004041E;
ApdSRERR_GLOBALGRAMMARALREADYACTIVE = $8004041F;
ApdSRERR_LANGUAGEMISMATCH = $80040420;
ApdSRERR_MULTIPLELANG = $80040421;
ApdSRERR_LDGRAMMARNOWORDS = $80040422;
ApdSRERR_NOLEXICON = $80040423;
ApdSRERR_SPEAKEREXISTS = $80040424;
ApdSRERR_GRAMMARENGINEMISMATCH = $80040425;
ApdSRERR_BOOKMARKEXISTS = $80040426;
ApdSRERR_BOOKMARKDOESNOTEXIST = $80040427;
ApdSRERR_MICWIZARDCANCELED = $80040428;
ApdSRERR_WORDTOOLONG = $80040429;
ApdSRERR_BAD_WORD = $8004042A;
ApdE_WRONGTYPE = $8004020C;
ApdE_BUFFERTOOSMALL = $8004020D;
type
TApdSapiDuplex = (sdFull, sdHalf, sdHalfDelayed);
TApdSapiWaitMode = (wmNone, wmWaitSpeaking, wmWaitListening,
wmRestoreListen);
TApdCharacterSet = (csText, csIPAPhonetic, csEnginePhonetic);
TApdTTSOptions = set of (toTagged);
TApdTTSAge = (tsBaby, tsToddler, tsChild, tsAdolescent, tsAdult, tsElderly,
tsUnknown);
TApdTTSFeatures = set of (tfAnyWord, tfVolume, tfSpeed, tfPitch, tfTagged,
tfIPAUnicode, tfVisual, tfWordPosition,
tfPCOptimized, tfPhoneOptimized, tfFixedAudio,
tfSingleInstance, tfThreadSafe, tfIPATextData,
tfPreferred, tfTransplanted, tfSAPI4);
TApdTTSGender = (tgNeutral, tgFemale, tgMale, tgUnknown);
TApdTTSInterfaces = set of (tiLexPronounce, tiTTSAttributes, tiTTSCentral,
tiTTSDialogs, tiAttributes, tiIAttributes,
tiLexPronounce2);
TApdSRFeatures = set of (sfIndepSpeaker, sfIndepMicrophone, sfTrainWord,
sfTrainPhonetic, sfWildcard, sfAnyWord,
sfPCOptimized, sfPhoneOptimized, sfGramList,
sfGramLink, sfMultiLingual, sfGramRecursive,
sfIPAUnicode, sfSingleInstance, sfThreadSafe,
sfFixedAudio, sfIPAWord, sfSAPI4);
TApdSRGrammars = set of (sgCFG, sgDictation, sgLimitedDomain);
TApdSRInterfaces = set of (siLexPronounce, siSRAttributes, siSRCentral,
siSRGramCommon, siSRDialogs, siSRGramCFG,
siSRGramDictation, siSRGramInsertionGui,
siSREsBasic, siSREsMerge, siSREsAudio,
siSREsCorrection, siSREsEval, siSREsGraph,
siSREsMemory, siSREsModifyGui, siSREsSpeaker,
siSRSpeaker, siSREsScores, siSREsAudioEx,
siSRGramLexPron, siSREsGraphEx, siLexPronounce2,
siAttributes, siSRSpeaker2, siSRDialogs2);
TApdSRSequences = (ssDiscrete, ssContinuous, ssWordSpot, ssContCFGDiscDict,
ssUnknown);
TApdSRInterferenceType = (itAudioStarted, itAudioStopped, itDeviceOpened,
itDeviceClosed, itNoise, itTooLoud, itTooQuiet,
itUnknown);
TApdSRTrainingType = set of (ttCurrentMic, ttCurrentGrammar, ttGeneral);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -