📄 preproc.pas
字号:
(*
* PREPROC.PAS - PreProcessing** Copyright (C) 2006 by Yidong Chen <ydchen@xmu.edu.cn>Institute of Artificial Intelligence, Xiamen University* Begin : 09/18/2006* Last Change : 09/18/2006** This program is free software; you can redistribute it and/or* modify it under the terms of the GNU Lesser General Public* License as published by the Free Software Foundation; either* version 2.1 of the License, or (at your option) any later version.** This program is distributed in the hope that it will be useful,* but WITHOUT ANY WARRANTY; without even the implied warranty of* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the* GNU General Public License for more details.** You should have received a copy of the GNU Lesser General Public* License along with this program; if not, write to the Free Software* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.*)
UNIT PREPROC;
INTERFACE
USES COMMON, CONFFILE;
PROCEDURE Init(cfConfig: TConfFile; init_stType: TSegType);
FUNCTION PreProcess(strInput: STRING): STRING;
PROCEDURE CleanUp;
IMPLEMENTATION
USES
Windows, SysUtils, Classes, SEGTAG, ICTCLAS;
VAR stType: TSegType;
PROCEDURE Init(cfConfig: TConfFile; init_stType: TSegType);
BEGIN
stType:=init_stType;
CASE stType OF
steMandel:
SEGTAG.Init(cfConfig.SegTagPath, False);
steICT:
ICTCLAS.Init(cfConfig.ICTCLASPath)
END;
END;
FUNCTION PreProcess(strInput: STRING): STRING;
CONST
wstrSrc: WideString='1234567890~!@#$%^&*()[]{}<>?/,。:;「」『』';
wstrTgt: WideString='1234567890~!@#$%^&*()[]{}《》?/,.:;‘’“”';
FUNCTION Idx(wcInput: WideChar): Integer;
VAR iLooper: Integer;
BEGIN
Result:=0;
FOR iLooper:=1 TO Length(wstrSrc) DO
IF wstrSrc[iLooper]=wcInput THEN BEGIN Result:=iLooper; Exit; END;
END;
VAR wstrTemp: WideString; iLooper, iTemp: Integer; strTemp: STRING;
strlTemp: TStringList;
BEGIN
CASE stType OF
steMandel:
Result:=Trim(SEGTAG.SegSent(strInput));
steICT:
Result:=Trim(ICTCLAS.SegSent(strInput));
ELSE Result:=Trim(strInput);
END;
wstrTemp:=Result;
FOR iLooper:=1 TO Length(wstrTemp) DO
BEGIN
iTemp:=Idx(wstrTemp[iLooper]);
IF iTemp>0 THEN wstrTemp[iLooper]:=wstrTgt[iTemp];
END;
strTemp:=wstrTemp;
strlTemp:=TStringList.Create;
WHILE strTemp<>'' DO strlTemp.Add(ReadTrunc(strTemp));
FOR iLooper:=0 TO strlTemp.Count-1 DO
IF strlTemp[iLooper]='……' THEN strlTemp[iLooper]:='…'
ELSE IF strlTemp[iLooper]='...' THEN strlTemp[iLooper]:='…'
ELSE IF strlTemp[iLooper]='....' THEN strlTemp[iLooper]:='…'
ELSE IF strlTemp[iLooper]='.....' THEN strlTemp[iLooper]:='…';
Result:='';
FOR iLooper:=0 TO strlTemp.Count-1 DO Result:=Result+strlTemp[iLooper]+' ';
Result:=Trim(Result);
strlTemp.Free;
END;
PROCEDURE CleanUp;
BEGIN
CASE stType OF
steMandel:
SEGTAG.CleanUp;
steICT:
ICTCLAS.CleanUp;
END;
END;
END.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -