⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 umainform.pas

📁 delphi基于MS SPEECH 5.1的语音识别演示
💻 PAS
字号:
unit uMainForm;

interface

uses
  Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
  Dialogs, ActiveX, OleServer, SpeechLib_TLB, StdCtrls, ComCtrls,
  IdAntiFreezeBase, IdAntiFreeze, IdBaseComponent, IdComponent, IdTCPConnection,
  IdTCPClient, ExtCtrls, IdGlobal;

const
  SPDUI_EngineProperties = 'EngineProperties';
  SPDUI_AddRemoveWord = 'AddRemoveWord';
  SPDUI_UserTraining = 'UserTraining';
  SPDUI_MicTraining = 'MicTraining';
  SPDUI_RecoProfileProperties = 'RecoProfileProperties';
  SPDUI_AudioProperties = 'AudioProperties';
  SPDUI_AudioVolume = 'AudioVolume';

type
  TROBORIINFO = packed record
    chHead: Char;
    m_Vel: Integer;                     //线速度
    m_RVel: Integer;                    //角速度
    m_RobPosX: Integer;                 //里程X坐标
    m_RobPosY: Integer;                 //里程Y坐标
    m_RobPosA: Integer;                 //里程朝向角
    m_CurState: Integer;                //当前状态
    m_Object: Integer;                  //目标可见
    m_ObjDist: Integer;                 //目标距离
    m_ObjAng: Integer;                  //目标角度
    m_IR: array[0..7] of Integer;
  end;

  TfrmMain = class(TForm)
    pbAudioLevel: TProgressBar;
    lbAudioLevel: TLabel;
    lbMin: TLabel;
    lbMax: TLabel;
    mmLog: TMemo;
    lbControlLogs: TLabel;
    btnMicrophone: TButton;
    btnSpeaker: TButton;
    btnUser: TButton;
    btnClose: TButton;
    TCPClient: TIdTCPClient;
    IdAntiFreeze: TIdAntiFreeze;
    gbRobInfo: TGroupBox;
    lbRobInfo: TLabel;
    Timer1: TTimer;
    procedure FormCreate(Sender: TObject);
    procedure btnMicrophoneClick(Sender: TObject);
    procedure btnSpeakerClick(Sender: TObject);
    procedure btnUserClick(Sender: TObject);
    procedure btnCloseClick(Sender: TObject);
    procedure Timer1Timer(Sender: TObject);
  private
    { Private declarations }
    SSRContext: TSpSharedRecoContext;
    SRGrammar: ISpeechRecoGrammar;
  public
    { Public declarations }
    procedure AudioLevel(Sender: TObject; StreamNumber: Integer; StreamPosition:
      OleVariant; AudioLevel: Integer);
    procedure FalseRecognition(ASender: TObject; StreamNumber: Integer;
      StreamPosition: OleVariant; const Result: ISpeechRecoResult);
    procedure Hypothesis(ASender: TObject; StreamNumber: Integer;
      StreamPosition: OleVariant; const Result: ISpeechRecoResult);
    procedure Interference(Sender: TObject; StreamNumber: Integer;
      StreamPosition: OleVariant; Interference: TOleEnum);
    procedure PhraseStart(Sender: TObject; StreamNumber: Integer;
      StreamPosition: OleVariant);
    procedure Recognition(ASender: TObject; StreamNumber: Integer;
      StreamPosition: OleVariant; RecognitionType: TOleEnum; const Result:
      ISpeechRecoResult);
    procedure RecognitionForOtherContext(Sender: TObject; StreamNumber: Integer;
      StreamPosition: OleVariant);
    procedure RequestUI(ASender: TObject; StreamNumber: Integer; StreamPosition:
      OleVariant; const UIType: WideString);
    procedure SoundStart(Sender: TObject; StreamNumber: Integer; StreamPosition:
      OleVariant);
    procedure SoundEnd(Sender: TObject; StreamNumber: Integer; StreamPosition:
      OleVariant);
    procedure StartStream(Sender: TObject; StreamNumber: Integer;
      StreamPosition: OleVariant);
    procedure EndStream(Sender: TObject; StreamNumber: Integer; StreamPosition:
      OleVariant; StreamReleased: WordBool);
    procedure Adaptation(Sender: TObject; StreamNumber: Integer; StreamPosition:
      OleVariant);
    procedure Bookmark(Sender: TObject; StreamNumber: Integer; StreamPosition,
      BookmarkId: OleVariant; Options: TOleEnum);
    procedure EnginePrivate(Sender: TObject; StreamNumber: Integer;
      StreamPosition, EngineData: OleVariant);
    procedure PropertyNumberChange(ASender: TObject; StreamNumber: Integer;
      StreamPosition: OleVariant; const PropertyName: WideString;
      NewNumberValue:
      Integer);
    procedure PropertyStringChange(ASender: TObject; StreamNumber: Integer;
      StreamPosition: OleVariant; const PropertyName: WideString; const
      NewStringValue: WideString);
    procedure RecognizerStateChange(Sender: TObject; StreamNumber: Integer;
      StreamPosition: OleVariant; NewState: TOleEnum);
    procedure InvokeUI(const TypeOfUI, Caption: WideString);
    procedure Log(const Msg: string; const Args: array of const); overload;
    procedure Log(const Msg: string); overload;
  end;

var
  frmMain           : TfrmMain;

implementation

{$R *.dfm}

function GetProp(Props: ISpeechPhraseProperties;
  const Name: string): ISpeechPhraseProperty; overload;
var
  I                 : Integer;
  Prop              : ISpeechPhraseProperty;
begin
  Result := nil;
  for I := 0 to Props.Count - 1 do
  begin
    Prop := Props.Item(I);
    if CompareText(Prop.Name, Name) = 0 then
    begin
      Result := Prop;
      Break
    end
  end
end;

function GetPropValue(SRResult: ISpeechRecoResult;
  const Path: array of string): OleVariant;
var
  Prop              : ISpeechPhraseProperty;
  PathLoop          : Integer;
begin
  for PathLoop := Low(Path) to High(Path) do
  begin
    if PathLoop = Low(Path) then        //top level property
      Prop := GetProp(SRResult.PhraseInfo.Properties, Path[PathLoop])
    else                                //nested property
      Prop := GetProp(Prop.Children, Path[PathLoop]);
    if not Assigned(Prop) then
    begin
      Result := Unassigned;
      Exit;
    end
  end;
  Result := Prop.Value
end;

procedure TfrmMain.Adaptation(Sender: TObject; StreamNumber: Integer;
  StreamPosition: OleVariant);
begin
  //不做处理
end;

procedure TfrmMain.AudioLevel(Sender: TObject; StreamNumber: Integer;
  StreamPosition: OleVariant; AudioLevel: Integer);
begin
  //声音状态检测
  pbAudioLevel.Position := AudioLevel;
  lbAudioLevel.Caption := '音量:' + IntToStr(AudioLevel)
end;

procedure TfrmMain.Bookmark(Sender: TObject; StreamNumber: Integer;
  StreamPosition, BookmarkId: OleVariant; Options: TOleEnum);
begin
  //Log('OnBookmark: %d', [Integer(BookmarkId)]);
  //不做处理
end;

procedure TfrmMain.btnCloseClick(Sender: TObject);
begin
  Close;
end;

procedure TfrmMain.btnMicrophoneClick(Sender: TObject);
begin
  //麦克风设置
  InvokeUI(SPDUI_MicTraining, 'Microphone Setup')
end;

procedure TfrmMain.btnSpeakerClick(Sender: TObject);
begin
  //语音设置
  InvokeUI(SPDUI_UserTraining, 'Speaker Training')
end;

procedure TfrmMain.btnUserClick(Sender: TObject);
begin
  //用户设置
  InvokeUI(SPDUI_RecoProfileProperties, 'User Settings')
end;

procedure TfrmMain.EndStream(Sender: TObject; StreamNumber: Integer;
  StreamPosition: OleVariant; StreamReleased: WordBool);
begin
  //Log('OnEndStream: stream %d', [StreamNumber]);
  //不做处理
end;

procedure TfrmMain.EnginePrivate(Sender: TObject; StreamNumber: Integer;
  StreamPosition, EngineData: OleVariant);
begin
  //Log('OnEnginePrivate');
  //不做处理
end;

procedure TfrmMain.FalseRecognition(ASender: TObject; StreamNumber: Integer;
  StreamPosition: OleVariant; const Result: ISpeechRecoResult);
begin
  //不做处理
end;

procedure TfrmMain.FormCreate(Sender: TObject);
begin
  SSRContext := TSpSharedRecoContext.Create(Self);
  SSRContext.OnStartStream := StartStream;
  SSRContext.OnEndStream := EndStream;
  SSRContext.OnBookmark := Bookmark;
  SSRContext.OnSoundStart := SoundStart;
  SSRContext.OnSoundEnd := SoundEnd;
  SSRContext.OnPhraseStart := PhraseStart;
  SSRContext.OnRecognition := Recognition;
  SSRContext.OnHypothesis := Hypothesis;
  SSRContext.OnPropertyNumberChange := PropertyNumberChange;
  SSRContext.OnPropertyStringChange := PropertyStringChange;
  SSRContext.OnFalseRecognition := FalseRecognition;
  SSRContext.OnInterference := Interference;
  SSRContext.OnRequestUI := RequestUI;
  SSRContext.OnRecognizerStateChange := RecognizerStateChange;
  SSRContext.OnAdaptation := Adaptation;
  SSRContext.OnRecognitionForOtherContext := RecognitionForOtherContext;
  SSRContext.OnAudioLevel := AudioLevel;
  SSRContext.OnEnginePrivate := EnginePrivate;
  //设置OnAudioLevel
  SSRContext.EventInterests := SREAllEvents;
  SRGrammar := SSRContext.CreateGrammar(0);
  SRGrammar.CmdLoadFromFile('Grammar.xml', SLODynamic);
  SRGrammar.CmdSetRuleIdState(0, SGDSActive);

  mmLog.Lines.Add('');
  mmLog.Lines.Add('------------------------------------');

  TCPClient.Host := '127.0.0.1';
  TCPClient.Port := 6000;

  try
    TCPClient.Connect;
    Log('已连接机器人控制程序');
  except
    Log('连接机器人控制程序失败');
  end;
  lbRobInfo.Caption :=
    '线速度:'#13'角速度:'#13'里程X坐标:'#13'里程Y坐标:'#13'里程朝向角:'#13'当前状态:'#13'目标可见:'#13'目标距离:'#13'目标角度:';
  Log('开始监听语音');
end;

procedure TfrmMain.Hypothesis(ASender: TObject; StreamNumber: Integer;
  StreamPosition: OleVariant; const Result: ISpeechRecoResult);
var
  S                 : string;
begin
  try
    S := Result.PhraseInfo.GetText(0, -1, True);
    Log('预识别为: %s', [S]);
  except
    //
  end;
  //不做处理
end;

procedure TfrmMain.Interference(Sender: TObject; StreamNumber: Integer;
  StreamPosition: OleVariant; Interference: TOleEnum);
var
  S                 : string;
begin
  case Interference of
    SINone: S := '无';
    SINoise: S := '噪声';
    SINoSignal: S := '无信号';
    SITooLoud: S := '无法识别的声音';
    SITooQuiet: S := '无法识别的声音';
    SITooFast: S := '无法识别的声音';
    SITooSlow: S := '声音太轻';
  end;
  Log('%s', [S]);
end;

procedure TfrmMain.InvokeUI(const TypeOfUI, Caption: WideString);
var
  U                 : OleVariant;
begin
  //调用Speech自身的设置对话框
  U := Unassigned;
  if SSRContext.Recognizer.IsUISupported(TypeOfUI, U) then
    SSRContext.Recognizer.DisplayUI(Handle, Caption, TypeOfUI, U);
end;

procedure TfrmMain.Log(const Msg: string);
begin
  //记录日志
  try
    mmLog.Lines.Add('[' + DateTimeToStr(Now) + ']' + Msg);
  except
    //
  end;
end;

procedure TfrmMain.Log(const Msg: string; const Args: array of const);
begin
  //记录日志
  try
    mmLog.Lines.Add('[' + DateTimeToStr(Now) + ']' + Format(Msg, Args));
  except
    //
  end;
end;

procedure TfrmMain.PhraseStart(Sender: TObject; StreamNumber: Integer;
  StreamPosition: OleVariant);
begin
  //Log('OnPhraseStart');
  //不做处理
end;

procedure TfrmMain.PropertyNumberChange(ASender: TObject; StreamNumber: Integer;
  StreamPosition: OleVariant; const PropertyName: WideString;
  NewNumberValue: Integer);
begin
  //Log('OnPropertyNumberChange: %s=%d', [PropertyName, NewNumberValue]);
  //不做处理
end;

procedure TfrmMain.PropertyStringChange(ASender: TObject; StreamNumber: Integer;
  StreamPosition: OleVariant; const PropertyName, NewStringValue: WideString);
begin
  //Log('OnPropertyStringChange: %s=%s', [PropertyName, NewStringValue]);
  //不做处理
end;

procedure TfrmMain.Recognition(ASender: TObject; StreamNumber: Integer;
  StreamPosition: OleVariant; RecognitionType: TOleEnum;
  const Result: ISpeechRecoResult);
begin
  try
    with Result.PhraseInfo do
    begin
      Log('识别为: %s', [GetText(0, -1, True)]);
      case GetPropValue(Result, ['chosencontrol', 'controlvalue']) of
        1: TCPClient.IOHandler.WriteLn('1');
        2: TCPClient.IOHandler.WriteLn('2');
        3: TCPClient.IOHandler.WriteLn('3');
        4: TCPClient.IOHandler.WriteLn('4');
        5: TCPClient.IOHandler.WriteLn('5');
      end
    end
  except
    on E: Exception do
      Log(E.Message);
  end;
end;

procedure TfrmMain.RecognitionForOtherContext(Sender: TObject;
  StreamNumber: Integer; StreamPosition: OleVariant);
begin
  //Log('OnRecognitionForOtherContext')
  //不做处理
end;

procedure TfrmMain.RecognizerStateChange(Sender: TObject; StreamNumber: Integer;
  StreamPosition: OleVariant; NewState: TOleEnum);
var
  S                 : string;
begin
  case NewState of
    SRSInactive: S := '禁用';
    SRSActive: S := '启用';
    SRSActiveAlways: S := '始终启用';
    SRSInactiveWithPurge: S := '始终禁用';
  end;
  Log('语音监听状态:%s', [S]);
end;

procedure TfrmMain.RequestUI(ASender: TObject; StreamNumber: Integer;
  StreamPosition: OleVariant; const UIType: WideString);
begin
  //Log('OnRequestUI: %s', [UIType]);
  //不做处理
end;

procedure TfrmMain.SoundEnd(Sender: TObject; StreamNumber: Integer;
  StreamPosition: OleVariant);
begin
  Log('语音结束');
end;

procedure TfrmMain.SoundStart(Sender: TObject; StreamNumber: Integer;
  StreamPosition: OleVariant);
begin
  Log('语音开始');
end;

procedure TfrmMain.StartStream(Sender: TObject; StreamNumber: Integer;
  StreamPosition: OleVariant);
begin
  //Log('OnStartStream: stream %d', [StreamNumber]);
  //不做处理
end;

procedure TfrmMain.Timer1Timer(Sender: TObject);
var
  aBytes            : TIdBytes;
  ROBORIINFO        : TROBORIINFO;
begin
  try
    if not TCPClient.Connected then
      Exit;
    if not TCPClient.IOHandler.InputBufferIsEmpty then
    begin
      TCPClient.IOHandler.ReadBytes(aBytes, SizeOf(TROBORIINFO));
      BytesToRaw(aBytes, ROBORIINFO, SizeOf(TROBORIINFO));
      lbRobInfo.Caption :=
        Format('线速度:%d'#13'角速度:%d'#13'里程X坐标:%d'#13'里程Y坐标:%d'#13'里程朝向角:%d'#13'当前状态:%d'#13'目标可见:%d'#13'目标距离:%d'#13'目标角度:%d',
        [ROBORIINFO.m_Vel, ROBORIINFO.m_RVel, ROBORIINFO.m_RobPosX,
        ROBORIINFO.m_RobPosY, ROBORIINFO.m_RobPosA, ROBORIINFO.m_CurState,
          ROBORIINFO.m_Object, ROBORIINFO.m_ObjDist, ROBORIINFO.m_ObjAng]);
    end;
  except
    on E: Exception do
    begin
      TCPClient.Disconnect;
    end;
  end;
end;

end.

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -