⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 bp.pas

📁 解码器是基于短语的统计机器翻译系统的核心模块
💻 PAS
字号:
(*
* BP.PAS  -  Bilingual Phrases Translation Table** Copyright (C) 2006 by Yidong Chen <ydchen@xmu.edu.cn>Institute of Artificial Intelligence, Xiamen University* Begin       : 09/18/2006* Last Change : 09/18/2006** This program is free software; you can redistribute it and/or* modify it under the terms of the GNU Lesser General Public* License as published by the Free Software Foundation; either* version 2.1 of the License, or (at your option) any later version.** This program is distributed in the hope that it will be useful,* but WITHOUT ANY WARRANTY; without even the implied warranty of* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the* GNU General Public License for more details.** You should have received a copy of the GNU Lesser General Public* License along with this program; if not, write to the Free Software* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.*)
UNIT BP;

INTERFACE

USES SysUtils, Classes, COMMON;

PROCEDURE Init(strPBFile: STRING; ptType: TPhraseType; init_bUseNeon: Boolean);
PROCEDURE LookUp(strF: STRING; lstEs: TList; IsAWord: Boolean);
PROCEDURE ClearTempWords;
PROCEDURE CleanUp;

IMPLEMENTATION

USES Math, HASH, NEON;

VAR
  hashBP: THash;
  lstTempWords: TList;
  bUseNeon: Boolean;

PROCEDURE FreeWordProbs(Value: Pointer);
VAR lstTemp: TList; iLooper: Integer; ptTemp: TPTranslation;
BEGIN
  lstTemp:=Value;
  IF lstTemp<>NIL THEN
    FOR iLooper:=lstTemp.Count-1 DOWNTO 0 DO
      BEGIN
        ptTemp:=lstTemp[iLooper];
        Dispose(ptTemp);
      END;
  lstTemp.Free;
END;

FUNCTION WordLen(strInput: STRING): Integer;
BEGIN
  strInput:=Trim(strInput);
  Result:=0;
  WHILE strInput<>'' DO
    BEGIN
      Inc(Result);
      ReadTrunc(strInput);
    END;
END;

PROCEDURE Init(strPBFile: STRING; ptType: TPhraseType; init_bUseNeon: Boolean);
VAR txtfTemp: TextFile;
    strTemp, strPair: STRING; rProbCE, rLexCE, rProbEC, rLexEC: Real;
    strF, strE: STRING;
    ptTemp: TPTranslation;
    lstTemp: TList;
    iPos: Integer;
    strBlank: STRING;
BEGIN
  IF ptType=pteWithBlank THEN strBlank:=' ' ELSE strBlank:='';

  bUseNeon:=init_bUseNeon;

  hashBP:=THash.Create(1048576, @FreeWordProbs);
  AssignFile(txtfTemp, strPBFile);
  ReSet(txtfTemp);
  WHILE NOT Eof(txtfTemp) DO
    BEGIN
      ReadLn(txtfTemp, strTemp);
      strF:='';
      WHILE strTemp<>'' DO
        BEGIN
          strPair:=ReadTrunc(strTemp);
          IF strPair=SEPARATOR THEN Break;
          strF:=strF+strPair+strBlank;
        END;
      strF:=Trim(strF);
      IF (strF='') OR (strTemp='') THEN Continue;
      strE:='';
      WHILE strTemp<>'' DO
        BEGIN
          strPair:=ReadTrunc(strTemp);
          IF strPair=SEPARATOR THEN Break;
          strE:=strE+strPair+' ';
        END;
      strE:=Trim(strE);
      IF (strE='') OR (strTemp='') THEN Continue;
      rProbCE:=StrToFloatDef(ReadTrunc(strTemp), -1);
      IF rProbCE<=0 THEN Continue;
      rLexCE:=StrToFloatDef(ReadTrunc(strTemp), -1);
      IF rLexCE<0 THEN Continue;
      rProbEC:=StrToFloatDef(ReadTrunc(strTemp), -1);
      IF rProbEC<=0 THEN Continue;
      rLexEC:=StrToFloatDef(ReadTrunc(strTemp), -1);
      IF rLexEC<0 THEN Continue;
      New(ptTemp);
      ptTemp.strWord:=strE;
      ptTemp.rProbCE:=Ln(rProbCE);
      ptTemp.rLexCE:=Ln(rLexCE);
      ptTemp.rProbEC:=Ln(rProbEC);
      ptTemp.rLexEC:=Ln(rLexEC);
      ptTemp.rELen:=WordLen(ptTemp.strWord);
      ptTemp.rLMScore:=-1;

      IF hashBP.Search(strF, Pointer(lstTemp), iPos)<>HASH_OK THEN
        BEGIN
          lstTemp:=TList.Create; hashBP.Insert(strF, lstTemp, iPos);
        END;
      lstTemp.Add(ptTemp);
    END;
  CloseFile(txtfTemp);

  lstTempWords:=TList.Create;
END;

PROCEDURE LookUp(strF: STRING; lstEs: TList; IsAWord: Boolean);
VAR ptTemp: TPTranslation;
    lstTemp: TList; iLooper: Integer;
    strlTemp: TStringList;
BEGIN
  IF hashBP.Search(strF, Pointer(lstTemp))=HASH_OK THEN
    FOR iLooper:=0 TO lstTemp.Count-1 DO lstEs.Add(lstTemp[iLooper])
  ELSE IF IsAWord THEN
    IF bUseNeon THEN
      BEGIN
        strlTemp:=TStringlist.Create;
        IF NEON.LoopUp(strF, strlTemp) THEN
          FOR iLooper:=0 TO strlTemp.Count-1 DO
            BEGIN
              New(ptTemp);
              ptTemp.strWord:=strlTemp[iLooper];
              ptTemp.rProbCE:=Ln(SMOOTH_PROB);
              ptTemp.rLexCE:=Ln(SMOOTH_PROB);
              ptTemp.rProbEC:=Ln(SMOOTH_PROB);
              ptTemp.rLexEC:=Ln(SMOOTH_PROB);
              ptTemp.rELen:=WordLen(ptTemp.strWord);
              ptTemp.rLMScore:=-1;
              lstTempWords.Add(ptTemp);
              lstEs.Add(ptTemp);
            END
        ELSE
          BEGIN
            New(ptTemp);
            strF:=PostProcess(strF);
            ptTemp.strWord:=NEON.TranSent(strF);
            IF ptTemp.strWord='' THEN ptTemp.strWord:=strF;
            ptTemp.rProbCE:=Ln(SMOOTH_PROB);
            ptTemp.rLexCE:=Ln(SMOOTH_PROB);
            ptTemp.rProbEC:=Ln(SMOOTH_PROB);
            ptTemp.rLexEC:=Ln(SMOOTH_PROB);
            ptTemp.rELen:=WordLen(ptTemp.strWord);
            ptTemp.rLMScore:=-1;
            lstTempWords.Add(ptTemp);
            lstEs.Add(ptTemp);
          END;
        strlTemp.Free;
      END
    ELSE
      BEGIN
        New(ptTemp);
        ptTemp.strWord:=strF;
        ptTemp.rProbCE:=Ln(SMOOTH_PROB);
        ptTemp.rLexCE:=Ln(SMOOTH_PROB);
        ptTemp.rProbEC:=Ln(SMOOTH_PROB);
        ptTemp.rLexEC:=Ln(SMOOTH_PROB);
        ptTemp.rELen:=1;
        ptTemp.rLMScore:=-1;
        lstTempWords.Add(ptTemp);
        lstEs.Add(ptTemp);
      END;
END;

PROCEDURE ClearTempWords;
VAR iLooper: Integer;
    ptTemp: TPTranslation;
BEGIN
  FOR iLooper:=0 TO lstTempWords.Count-1 DO
    BEGIN
      ptTemp:=lstTempWords[iLooper];
      Dispose(ptTemp);
    END;
  lstTempWords.Clear;
END;

PROCEDURE CleanUp;
BEGIN
  hashBP.Free;
  lstTempWords.Free;
END;

END.

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -