📄 dpdriver.pas
字号:
(*
* DPDRIVER.PAS - A phrase-based decoder implemented using DP** Copyright (C) 2006 by Yidong Chen <ydchen@xmu.edu.cn>Institute of Artificial Intelligence, Xiamen University* Begin : 09/18/2006* Last Change : 09/18/2006** This program is free software; you can redistribute it and/or* modify it under the terms of the GNU Lesser General Public* License as published by the Free Software Foundation; either* version 2.1 of the License, or (at your option) any later version.** This program is distributed in the hope that it will be useful,* but WITHOUT ANY WARRANTY; without even the implied warranty of* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the* GNU General Public License for more details.** You should have received a copy of the GNU Lesser General Public* License along with this program; if not, write to the Free Software* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.*)
UNIT DPDRIVER;
INTERFACE
USES SysUtils, Windows, Classes, COMMON, CONFFILE;
FUNCTION Init(init_cfConfig: TConfFile;
init_rtType: TResultType;
init_stType: TSegType;
init_ptType: TPhraseType): Real;
FUNCTION TranslateIt(strInput: STRING): STRING;
FUNCTION Translate863File(strSrcFile, strTgtFile: STRING): Real;
PROCEDURE CleanUp;
IMPLEMENTATION
USES Math, PERIOD, HASH, FSTRLIST, LM, BP, PREPROC, NEON;
TYPE
TPAQuantity=^TAQuantity;
TAQuantity=RECORD
strE: STRING;
rProb: Real;
BackPointer: TPAQuantity;
END;
VAR
cfConfig: TConfFile;
strlSents: TStringList;
rtType: TResultType;
ptType: TPhraseType;
FUNCTION Init(init_cfConfig: TConfFile;
init_rtType: TResultType;
init_stType: TSegType;
init_ptType: TPhraseType): Real;
VAR pTimer: TPeriod;
BEGIN
pTimer:=TPeriod.Create;
pTimer.Start;
cfConfig:=init_cfConfig;
rtType:=init_rtType;
ptType:=init_ptType;
CASE rtType OF
rtePurePb, rtePbNeonWord:
BEGIN
IF (rtType<>rtePurePb) THEN
BEGIN
Write('Loading Neon...');
NEON.Init(cfConfig.CEMTPath, False);
WriteLn(#13, 'Neon has been Loaded OK.');
END;
IF init_stType<>steNone THEN
BEGIN
Write('Loading PreProcessor...');
PREPROC.Init(cfConfig, init_stType);
WriteLn(#13, 'PreProcessor has been Loaded OK.');
END;
Write('Loading ', cfConfig.LanguageModel, '...');
LM.Init(cfConfig.LMPath, cfConfig.LanguageModel);
WriteLn(#13, cfConfig.LanguageModel, ' has been Loaded OK.');
Write('Loading ', cfConfig.BilingualPhrases, '...');
BP.Init(cfConfig.BilingualPhrases, ptType, rtType<>rtePurePb);
WriteLn(#13, cfConfig.BilingualPhrases, ' has been Loaded OK.');
WriteLn('Tp = ', FloatToStr(cfConfig.rTp));
WriteLn('Np = ', cfConfig.iNp);
WriteLn('*** parameters begin ***');
WriteLn('p(c|e) = ', FloatToStr(cfConfig.rLPce));
WriteLn('lex(c|e) = ', FloatToStr(cfConfig.rLLce));
WriteLn('p(e|c) = ', FloatToStr(cfConfig.rLPec));
WriteLn('lex(e|c) = ', FloatToStr(cfConfig.rLLec));
WriteLn('word_penalty = ', FloatToStr(cfConfig.rLLenP));
WriteLn('language_model = ', FloatToStr(cfConfig.rLLm));
WriteLn('**** parameters end ****');
strlSents:=TStringList.Create;
END;
END;
pTimer.Stop;
Result:=pTimer.TimeElapsed;
pTimer.Free;
END;
FUNCTION GetSource(strlSentence: TStringList; iLow, iHigh: Integer): STRING;
VAR iLooper: Integer; strTemp: STRING;
BEGIN
Result:='';
Dec(iLow);
Dec(iHigh);
IF ptType=pteWithBlank THEN strTemp:=' ' ELSE strTemp:='';
FOR iLooper:=iLow TO iHigh DO Result:=Result+strlSentence[iLooper]+strTemp;
Result:=Trim(Result);
END;
FUNCTION FirstWord(strEP: STRING): STRING;
BEGIN
Result:=ReadTrunc(strEP);
END;
FUNCTION SecondWord(strEP: STRING): STRING;
BEGIN
ReadTrunc(strEP);
Result:=ReadTrunc(strEP);
END;
FUNCTION LastWord(strEP: STRING): STRING;
BEGIN
Result:=ReadTruncBack(strEP);
END;
FUNCTION LastButOneWord(strEP: STRING): STRING;
BEGIN
ReadTruncBack(strEP);
Result:=ReadTruncBack(strEP);
END;
FUNCTION TheComparer(Item1, Item2: Pointer): Integer;
VAR paqTemp1, paqTemp2: TPAQuantity;
BEGIN
paqTemp1:=Item1; paqTemp2:=Item2;
IF paqTemp2.rProb - paqTemp1.rProb>0 THEN Result:=1
ELSE IF paqTemp2.rProb - paqTemp1.rProb=0 THEN Result:=0
ELSE Result:=-1;
END;
PROCEDURE DoPruning(lstInput: TList; rQ: Real; iMaxCount: Integer);
VAR iLooper: Integer; paqTemp: TPAQuantity; rNowMax: Real; iUpper: Integer;
BEGIN
IF (lstInput.Count=0) OR (iMaxCount<=0) OR (rQ<=0) THEN Exit;
lstInput.Sort(@TheComparer);
IF iMaxCount>0 THEN
FOR iLooper:=lstInput.Count-1 DOWNTO iMaxCount DO
BEGIN
paqTemp:=lstInput[iLooper];
Dispose(paqTemp);
lstInput.Delete(iLooper);
END;
IF rQ>0 THEN
BEGIN
paqTemp:=lstInput[0];
rNowMax:=paqTemp.rProb+Ln(rQ);
IF iMaxCount>0 THEN iUpper:=Min(iMaxCount-1, lstInput.Count-1)
ELSE iUpper:=lstInput.Count-1;
FOR iLooper:=iUpper DOWNTO 1 DO
BEGIN
paqTemp:=lstInput[iLooper];
IF rNowMax>paqTemp.rProb THEN
BEGIN
Dispose(paqTemp);
lstInput.Delete(iLooper);
END;
END;
END;
END;
FUNCTION TranslateSent(strInput: STRING): STRING;
VAR strlSentence: TStringList;
Q: ARRAY OF Pointer;
J: Integer;
jLooper, iLooper, kLooper, mLooper: Integer;
lstTemp: TList;
rMax, rTemp: Real;
paqMax, paqTemp, paqLooper: TPAQuantity;
strF: STRING;
lstEs: TList;
ptTemp: TPTranslation;
iTemp: Integer;
strTemp: STRING;
strFirstWord, strLastWord, strLastButOneWord, strSecondWord, strContext: STRING;
BEGIN
Result:='';
IF strInput='' THEN Exit;
strlSentence:=TStringList.Create;
WHILE strInput<>'' DO strlSentence.Add(ReadTrunc(strInput));
J:=strlSentence.Count;
SetLength(Q, J+2);
FOR iLooper:=0 TO J+1 DO Q[iLooper]:=TList.Create;
lstTemp:=Q[0];
New(paqTemp);
paqTemp.strE:=SOS;
paqTemp.rProb:=0;
paqTemp.BackPointer:=NIL;
lstTemp.Add(paqTemp);
lstEs:=TList.Create;
FOR jLooper:=1 TO J DO
BEGIN
FOR iLooper:=0 TO jLooper-1 DO
BEGIN
lstTemp:=Q[iLooper];
strF:=GetSource(strlSentence, iLooper+1, jLooper);
BP.LookUp(strF, lstEs, iLooper+1=jLooper);
IF lstEs.Count=0 THEN Continue;
IF lstTemp.Count=0 THEN Continue;
FOR kLooper:=0 TO lstEs.Count-1 DO
BEGIN
ptTemp:=lstEs[kLooper];
paqMax:=lstTemp[0];
strFirstWord:=FirstWord(ptTemp.strWord);
strLastWord:=LastWord(paqMax.strE);
strLastButOneWord:=LastButOneWord(paqMax.strE);
strSecondWord:=SecondWord(ptTemp.strWord);
IF strLastButOneWord='' THEN strContext:=strLastWord
ELSE strContext:=strLastButOneWord+' '+strLastWord;
rMax:=paqMax.rProb+
cfConfig.rLPce*ptTemp.rProbCE+
cfConfig.rLLce*ptTemp.rLexCE+
cfConfig.rLPec*ptTemp.rProbEC+
cfConfig.rLLec*ptTemp.rLexEC+
cfConfig.rLLm*LM.P(strFirstWord, strContext)+
cfConfig.rLLenP*ptTemp.rELen;
IF strSecondWord<>'' THEN
rMax:=rMax+cfConfig.rLLm*LM.P(strSecondWord, strLastWord+' '+strFirstWord);
IF ptTemp.rLMScore<0 THEN ptTemp.rLMScore:=LM.PPhrase(ptTemp.strWord);
rMax:=rMax+cfConfig.rLLm*ptTemp.rLMScore;
FOR mLooper:=1 TO lstTemp.Count-1 DO
BEGIN
paqTemp:=lstTemp[mLooper];
strLastWord:=LastWord(paqTemp.strE);
strLastButOneWord:=LastButOneWord(paqTemp.strE);
IF strLastButOneWord='' THEN strContext:=strLastWord
ELSE strContext:=strLastButOneWord+' '+strLastWord;
rTemp:=paqTemp.rProb+
cfConfig.rLPce*ptTemp.rProbCE+
cfConfig.rLLce*ptTemp.rLexCE+
cfConfig.rLPec*ptTemp.rProbEC+
cfConfig.rLLec*ptTemp.rLexEC+
cfConfig.rLLm*LM.P(strFirstWord, strContext)+
cfConfig.rLLenP*ptTemp.rELen;
IF strSecondWord<>'' THEN
rTemp:=rTemp+cfConfig.rLLm*LM.P(strSecondWord, strLastWord+' '+strFirstWord);
IF ptTemp.rLMScore<0 THEN ptTemp.rLMScore:=LM.PPhrase(ptTemp.strWord);
rTemp:=rTemp+cfConfig.rLLm*ptTemp.rLMScore;
IF rTemp>rMax THEN
BEGIN
rMax:=rTemp;
paqMax:=paqTemp;
END;
END;
New(paqTemp);
paqTemp.strE:=ptTemp.strWord;
paqTemp.rProb:=rMax;
paqTemp.BackPointer:=paqMax;
TList(Q[jLooper]).Add(paqTemp);
END;
lstEs.Clear;
END;
DoPruning(Q[jLooper], cfConfig.rTp, cfConfig.iNp);
END;
lstEs.Free;
lstTemp:=Q[J];
IF lstTemp.Count=0 THEN Exit; (*...*)
paqMax:=lstTemp[0];
rMax:=paqMax.rProb+
cfConfig.rLLm*LM.P(EOS, LastWord(paqMax.strE));
FOR mLooper:=1 TO lstTemp.Count-1 DO
BEGIN
paqTemp:=lstTemp[mLooper];
rTemp:=paqTemp.rProb+
cfConfig.rLLm*LM.P(EOS, LastWord(paqTemp.strE));
IF rTemp>rMax THEN
BEGIN
rMax:=rTemp;
paqMax:=paqTemp;
END;
END;
New(paqTemp);
paqTemp.strE:=EOS;
paqTemp.rProb:=rMax;
paqTemp.BackPointer:=paqMax;
TList(Q[J+1]).Add(paqTemp);
paqLooper:=paqMax;
Result:='';
WHILE paqLooper<>NIL DO
BEGIN
IF (paqLooper.BackPointer<>NIL) AND (paqLooper.BackPointer.BackPointer=NIL) THEN
BEGIN
strTemp:=paqLooper.strE;
IF (strTemp<>'') AND (strTemp[1] IN ['a'..'z']) THEN
BEGIN
strTemp[1]:=UpCase(strTemp[1]);
paqLooper.strE:=strTemp;
END;
END;
IF paqLooper.BackPointer<>NIL THEN Result:=paqLooper.strE+' '+Result;
paqLooper:=paqLooper.BackPointer;
END;
Result:=PostProcess(Result);
FOR iLooper:=0 TO J+1 DO
BEGIN
lstTemp:=Q[iLooper];
FOR jLooper:=lstTemp.Count-1 DOWNTO 0 DO
BEGIN
paqTemp:=lstTemp[jLooper];
Dispose(paqTemp);
lstTemp.Delete(jLooper);
END;
lstTemp.Free;
END;
strlSentence.Free;
BP.ClearTempWords;
END;
PROCEDURE SplitSent(strInput: STRING; strlOutput: TStringList);
VAR strTemp, strSub: STRING;
BEGIN
IF strlOutput=NIL THEN Exit;
strlOutput.Clear;
strSub:='';
WHILE strInput<>'' DO
BEGIN
strTemp:=ReadTrunc(strInput);
IF strSub='' THEN strSub:=strTemp ELSE strSub:=strSub+' '+strTemp;
IF (strTemp='.') OR (strTemp='。') OR
(strTemp='?') OR (strTemp='?') OR
(strTemp='!') OR (strTemp='!') OR
(strTemp=';') OR (strTemp=';') OR
(strTemp='...') OR (strTemp='……') OR (strTemp='…') {OR
(strTemp=',') OR (strTemp=',') OR
(strTemp=':') OR (strTemp=':') OR
(strTemp='-') OR (strTemp='--') OR (strTemp='————') OR (strTemp='——')} THEN
BEGIN
strlOutput.Add(strSub);
strSub:='';
END;
END;
IF strSub<>'' THEN strlOutput.Add(strSub);
END;
FUNCTION TranslateIt(strInput: STRING): STRING;
VAR iLooper: Integer;
BEGIN
CASE rtType OF
rtePurePb, rtePbNeonWord:
BEGIN
Result:='';
strInput:=PREPROC.PreProcess(Trim(strInput));
IF strInput='' THEN Exit;
SplitSent(strInput, strlSents);
FOR iLooper:=0 TO strlSents.Count-1 DO
Result:=Result+TranslateSent(strlSents[iLooper]);
Result:=Trim(Result);
END;
END;
END;
PROCEDURE SplitALine(strInput: STRING; VAR strST: STRING; VAR strContent: STRING; VAR strET: STRING);
BEGIN
strST:='';
strET:='';
strContent:='';
strST:=ReadTruncEx(strInput, '>'); strST:=strST+'>';
strContent:=ReadTruncEx(strInput, '<');
strET:='<'+strInput;
END;
FUNCTION ProcessA863Line(strInput: STRING): STRING;
VAR strTemp, strLine, strST, strET: STRING;
BEGIN
strLine:=Trim(strInput);
strTemp:=ReadTrunc(strLine);
IF strTemp='<srcset' THEN Result:='<tstset '+strLine
ELSE IF strTemp='<doc' THEN
BEGIN
strLine[Length(strLine)]:=' ';
Result:=strTemp+' '+strLine+'lang="en" site="厦门大学">';
END
ELSE IF strTemp='<s' THEN
BEGIN
SplitALine(strTemp+' '+strLine, strST, strTemp, strET);
Result:=strST+TranslateIt(strTemp)+strET;
END
ELSE IF strTemp='</srcset>' THEN Result:='</tstset>'
ELSE Result:=strTemp+' '+strLine;
END;
FUNCTION Translate863File(strSrcFile, strTgtFile: STRING): Real;
VAR pTimer: TPeriod;
fstrlInput: TFStringList;
iLooper: Integer;
BEGIN
pTimer:=TPeriod.Create;
pTimer.Start;
fstrlInput:=TFStringList.Create;
fstrlInput.LoadXML(strSrcFile);
WriteLn('Totally ', fstrlInput.Count, ' lines...');
FOR iLooper:=0 TO fstrlInput.Count-1 DO
BEGIN
Write(#13, 'Line', iLooper+1);
fstrlInput[iLooper]:=ProcessA863Line(fstrlInput[iLooper]);
END;
WriteLn;
fstrlInput.Save(strTgtFile);
fstrlInput.Free;
pTimer.Stop;
Result:=pTimer.TimeElapsed;
pTimer.Free;
END;
PROCEDURE CleanUp;
BEGIN
strlSents.Free;
IF rtType<>rteNone THEN BP.CleanUp;
IF rtType<>rteNone THEN LM.CleanUp;
PREPROC.CleanUp;
IF (rtType<>rtePurePb) THEN NEON.CleanUp;
END;
END.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -