⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 moses-parallel.pl.svn-base

📁 解码器是基于短语的统计机器翻译系统的核心模块
💻 SVN-BASE
📖 第 1 页 / 共 2 页
字号:
#! /usr/bin/perl######################## Revision history## 02 Aug 2006 added strict requirement# 01 Aug 2006 fix bug about inputfile parameter#             fix bug about suffix index generation# 31 Jul 2006 added parameter for reading queue parameters# 29 Jul 2006 added code to handling consfusion networks# 28 Jul 2006 added a better policy for removing jobs from the queue in case of killing signal (CTRL-C)#             added the parameter -qsub-prefix which sets the prefix for the name of submitted jobs# 27 Jul 2006 added safesystem() function and other checks to handle with process failure#             added checks for existence of decoder and configuration file# 26 Jul 2006 fix a bug related to the use of absolute path for srcfile and nbestfileuse strict;########################Customizable parameters #parameters for submiiting processes through SGE#NOTE: group name is ws06ossmt (with 2 's') and not ws06osmt (with 1 's')my $queueparameters="-l ws06ossmt=true -l mem_free=0.5G -hard";# look for the correct pwdcmd my $pwdcmd = getPwdCmd();my $workingdir = `$pwdcmd`; chomp $workingdir;my $tmpdir="$workingdir/tmp$$";my $splitpfx="split$$";$SIG{'INT'} = \&kill_all_and_quit; # catch exception for CTRL-C########################Default parameters my $jobscript="$workingdir/job$$";my $qsubout="$workingdir/out.job$$";my $qsuberr="$workingdir/err.job$$";my $mosesparameters="";my $cfgfile=""; #configuration filemy $version=undef;my $help=0;my $dbg=0;my $jobs=4;my $mosescmd="$ENV{MOSESBIN}/moses"; #decoder in usemy $orifile=undef;my $testfile=undef;my $nbestfile=undef;my $orinbestfile=undef;my $nbest=undef;my $nbestflag=0;my $orilogfile="";my $logflag="";my $qsubname="MOSES";my $inputtype=0;my $old_sge = 0; # assume old Sun Grid Engine (<6.0) where qsub does not                 # implement -sync and -b######################## Command line options processingsub init(){  use Getopt::Long qw(:config pass_through no_ignore_case);  GetOptions('version'=>\$version,	     'help'=>\$help,	     'debug'=>\$dbg,	     'jobs=i'=>\$jobs,	     'decoder=s'=> \$mosescmd,       'decoder-parameters=s'=> \$mosesparameters,			 'logfile=s'=> \$orilogfile,	     'i|inputfile|input-file=s'=> \$orifile,	     'n-best-file=s'=> \$orinbestfile,	     'n-best-size=i'=> \$nbest,	     'qsub-prefix=s'=> \$qsubname,	     'queue-parameters=s'=> \$queueparameters,	     'inputtype=i'=> \$inputtype,       'config=s'=>\$cfgfile,       'old-sge' => \$old_sge,	    ) or exit(1);  chomp($nbestfile=`basename $orinbestfile`) if defined $orinbestfile;  chomp($testfile=`basename $orifile`) if defined $orifile;  $mosesparameters.="@ARGV -config $cfgfile -inputtype $inputtype";  getNbestParameters();  getLogParameters();}#########################print versionsub version(){#    print STDERR "version 1.0 (15-07-2006)\n";#    print STDERR "version 1.1 (17-07-2006)\n";#    print STDERR "version 1.2 (18-07-2006)\n";#    print STDERR "version 1.3 (21-07-2006)\n";#    print STDERR "version 1.4 (26-07-2006)\n";#   print STDERR "version 1.5 (27-07-2006)\n";#    print STDERR "version 1.6 (28-07-2006)\n";#    print STDERR "version 1.7 (29-07-2006)\n";#    print STDERR "version 1.8 (31-07-2006)\n";#    print STDERR "version 1.9 (01-08-2006)\n";#    print STDERR "version 1.10 (02-08-2006)\n";#	print STDERR "version 1.11 (10-10-2006)\n";#	print STDERR "version 1.12 (27-12-2006)\n";	print STDERR "version 1.13 (29-12-2006)\n";    exit(1);}#usagesub usage(){  print STDERR "moses-parallel.pl [parallel-options]  [moses-options]\n";  print STDERR "Options marked (*) are required.\n";  print STDERR "Parallel options:\n";  print STDERR "*  -decoder <file> Moses decoder to use\n";  print STDERR "*  -i|inputfile|input-file <file>   the input text to translate\n";  print STDERR "*  -jobs <N> number of required jobs\n";  print STDERR "   -qsub-prefix <string> name for sumbitte jobs\n";	print STDERR "   -queue-parameters <string> specific requirements for queue\n";	print STDERR "   -old-sge Assume Sun Grid Engine < 6.0\n";  print STDERR "   -debug debug\n";  print STDERR "   -version print version of the script\n";  print STDERR "   -help this help\n";  print STDERR "Moses options:\n";  print STDERR "   -inputtype <0|1> 0 for text, 1 for confusion networks\n";  print STDERR "*  -config <cfgfile> configuration file\n";  print STDERR "   -decoder-parameters <string> specific parameters for the decoder\n";  print STDERR "All other options are passed to Moses\n";  print STDERR "  (This way to pass parameters is maintained for back compatibility\n";	print STDERR "   but preferably use -decoder-parameters)\n";	exit(1);}#printparameterssub print_parameters(){  print STDERR "Inputfile: $orifile\n";  print STDERR "Logfile: $orilogfile\n";  print STDERR "Configuration file: $cfgfile\n";  print STDERR "Decoder in use: $mosescmd\n";  if ($nbestflag) {    print STDERR "Nbest file: $orinbestfile\n";     print STDERR "Nbest size: $nbest\n";  }  print STDERR "Number of jobs:$jobs\n";  print STDERR "Qsub name: $qsubname\n";	print STDERR "Queue parameters: $queueparameters\n";	print STDERR "Inputtype: text\n" if $inputtype == 0;  print STDERR "Inputtype: confusion network\n" if $inputtype == 1;    print STDERR "parameters directly passed to Moses: $mosesparameters\n";}#get parameters for log filesub getLogParameters(){  $logflag=1 if $orilogfile;}#get parameters for nbest computation from configuration filesub getNbestParameters(){  if ($orinbestfile) {     $nbestflag=1;  }  else{    open (CFG, "$cfgfile");    while (chomp($_=<CFG>)){      if (/^\[n-best-list\]/){	chomp($orinbestfile=<CFG>);	chomp($nbest=<CFG>);	$nbestflag=1;	last;      }    }    close(CFG);  }}########################Script starts hereinit();version() if $version;usage() if $help;if (!defined $orifile || !defined $mosescmd || ! defined $cfgfile) {  print STDERR "Please specify -input-file, -decoder and -config\n";  usage();}#checking if inputfile existsif (! -e ${orifile} ){  print STDERR "Inputfile ($orifile) does not exists\n";  usage();}#checking if decoder existsif (! -e $mosescmd) {  print STDERR "Decoder ($mosescmd) does not exists\n";  usage();}#checking if configfile existsif (! -e $cfgfile) {  print STDERR "Configuration file ($cfgfile) does not exists\n";  usage();}print_parameters(); # so that people knowexit(1) if $dbg; # debug mode: just print and do not run#splitting test file in several parts#$decimal="-d"; #split does not accept this options (on MAC OS)my $decimal="";my $cmd;my $sentenceN;my $splitN;my @idxlist=();if ($inputtype==0){ #text input#getting the number of input sentences  chomp($sentenceN=`wc -l ${orifile} | awk '{print \$1}' `);#Reducing the number of jobs if less sentences to translate  if ($jobs>$sentenceN){ $jobs=$sentenceN; }#Computing the number of sentences for each files  if ($sentenceN % $jobs == 0){ $splitN=int($sentenceN / $jobs); }  else{ $splitN=int($sentenceN /$jobs) + 1; }  if ($dbg){    print STDERR "There are $sentenceN sentences to translate\n";    print STDERR "There are at most $splitN sentences per job\n";  }  $cmd="split $decimal -a 2 -l $splitN $orifile ${testfile}.$splitpfx-";  safesystem("$cmd") or die;}else{ #confusion network input  my $tmpfile="/tmp/cnsplit$$";  $cmd="cat $orifile | perl -pe 's/\\n/ _CNendline_ /g;' | perl -pe 's/_CNendline_  _CNendline_ /_CNendline_\\n/g;' > $tmpfile";  safesystem("$cmd") or die;#getting the number of input CNs  chomp($sentenceN=`wc -l $tmpfile | awk '{print \$1}' `);#Reducing the number of jobs if less CNs to translate  if ($jobs>$sentenceN){ $jobs=$sentenceN; }#Computing the number of CNs for each files  if ($sentenceN % $jobs == 0){ $splitN=int($sentenceN / $jobs); }  else{ $splitN=int($sentenceN /$jobs) + 1; }  if ($dbg){    print STDERR "There are $sentenceN confusion networks to translate\n";    print STDERR "There are at most $splitN sentences per job\n";  }  $cmd="split $decimal -a 2 -l $splitN $tmpfile $tmpfile-";  safesystem("$cmd") or die;   my @idxlist=();  chomp(@idxlist=`ls $tmpfile-*`);  grep(s/.+(\-\S+)$/$1/e,@idxlist);  foreach my $idx (@idxlist){    $cmd="perl -pe 's/ _CNendline_ /\\n/g;s/ _CNendline_/\\n/g;'";    safesystem("cat $tmpfile$idx | $cmd > ${testfile}.$splitpfx$idx ; rm $tmpfile$idx;");  }}chomp(@idxlist=`ls ${testfile}.$splitpfx-*`);grep(s/.+(\-\S+)$/$1/e,@idxlist);safesystem("mkdir -p $tmpdir") or die;preparing_script();#launching process through the queuemy @sgepids =();my $failure=0;foreach my $idx (@idxlist){

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -