⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 preproc.pas

📁 解码器是基于短语的统计机器翻译系统的核心模块
💻 PAS
字号:
(*
* PREPROC.PAS  -  PreProcessing** Copyright (C) 2006 by Yidong Chen <ydchen@xmu.edu.cn>Institute of Artificial Intelligence, Xiamen University* Begin       : 09/18/2006* Last Change : 09/18/2006** This program is free software; you can redistribute it and/or* modify it under the terms of the GNU Lesser General Public* License as published by the Free Software Foundation; either* version 2.1 of the License, or (at your option) any later version.** This program is distributed in the hope that it will be useful,* but WITHOUT ANY WARRANTY; without even the implied warranty of* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the* GNU General Public License for more details.** You should have received a copy of the GNU Lesser General Public* License along with this program; if not, write to the Free Software* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.*)
UNIT PREPROC;

INTERFACE

USES COMMON, CONFFILE;

PROCEDURE Init(cfConfig: TConfFile; init_stType: TSegType);
FUNCTION PreProcess(strInput: STRING): STRING;
PROCEDURE CleanUp;

IMPLEMENTATION

USES
  Windows, SysUtils, Classes, SEGTAG, ICTCLAS;

VAR stType: TSegType;

PROCEDURE Init(cfConfig: TConfFile; init_stType: TSegType);
BEGIN
  stType:=init_stType;
  CASE stType OF
    steMandel:
      SEGTAG.Init(cfConfig.SegTagPath, False);
    steICT:
      ICTCLAS.Init(cfConfig.ICTCLASPath)
  END;
END;

FUNCTION PreProcess(strInput: STRING): STRING;
CONST
  wstrSrc: WideString='1234567890~!@#$%^&*()[]{}<>?/,。:;「」『』';
  wstrTgt: WideString='1234567890~!@#$%^&*()[]{}《》?/,.:;‘’“”';
  FUNCTION Idx(wcInput: WideChar): Integer;
  VAR iLooper: Integer;
  BEGIN
    Result:=0;
    FOR iLooper:=1 TO Length(wstrSrc) DO
      IF wstrSrc[iLooper]=wcInput THEN BEGIN Result:=iLooper; Exit; END;
  END;
VAR wstrTemp: WideString; iLooper, iTemp: Integer; strTemp: STRING;
    strlTemp: TStringList;
BEGIN
  CASE stType OF
    steMandel:
      Result:=Trim(SEGTAG.SegSent(strInput));
    steICT:
      Result:=Trim(ICTCLAS.SegSent(strInput));
    ELSE Result:=Trim(strInput);
  END;

  wstrTemp:=Result;
  FOR iLooper:=1 TO Length(wstrTemp) DO
    BEGIN
      iTemp:=Idx(wstrTemp[iLooper]);
      IF iTemp>0 THEN wstrTemp[iLooper]:=wstrTgt[iTemp];
    END;
  strTemp:=wstrTemp;

  strlTemp:=TStringList.Create;
  WHILE strTemp<>'' DO strlTemp.Add(ReadTrunc(strTemp));

  FOR iLooper:=0 TO strlTemp.Count-1 DO
    IF strlTemp[iLooper]='……' THEN strlTemp[iLooper]:='…'
    ELSE IF strlTemp[iLooper]='...' THEN strlTemp[iLooper]:='…'
    ELSE IF strlTemp[iLooper]='....' THEN strlTemp[iLooper]:='…'
    ELSE IF strlTemp[iLooper]='.....' THEN strlTemp[iLooper]:='…';

  Result:='';
  FOR iLooper:=0 TO strlTemp.Count-1 DO Result:=Result+strlTemp[iLooper]+' ';

  Result:=Trim(Result);
  strlTemp.Free;
END;

PROCEDURE CleanUp;
BEGIN
  CASE stType OF
    steMandel:
      SEGTAG.CleanUp;
    steICT:
      ICTCLAS.CleanUp;
  END;
END;

END.

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -