📄 moses-parallel.pl.svn-base
字号:
#! /usr/bin/perl######################## Revision history## 02 Aug 2006 added strict requirement# 01 Aug 2006 fix bug about inputfile parameter# fix bug about suffix index generation# 31 Jul 2006 added parameter for reading queue parameters# 29 Jul 2006 added code to handling consfusion networks# 28 Jul 2006 added a better policy for removing jobs from the queue in case of killing signal (CTRL-C)# added the parameter -qsub-prefix which sets the prefix for the name of submitted jobs# 27 Jul 2006 added safesystem() function and other checks to handle with process failure# added checks for existence of decoder and configuration file# 26 Jul 2006 fix a bug related to the use of absolute path for srcfile and nbestfileuse strict;########################Customizable parameters #parameters for submiiting processes through SGE#NOTE: group name is ws06ossmt (with 2 's') and not ws06osmt (with 1 's')my $queueparameters="-l ws06ossmt=true -l mem_free=0.5G -hard";# look for the correct pwdcmd my $pwdcmd = getPwdCmd();my $workingdir = `$pwdcmd`; chomp $workingdir;my $tmpdir="$workingdir/tmp$$";my $splitpfx="split$$";$SIG{'INT'} = \&kill_all_and_quit; # catch exception for CTRL-C########################Default parameters my $jobscript="$workingdir/job$$";my $qsubout="$workingdir/out.job$$";my $qsuberr="$workingdir/err.job$$";my $mosesparameters="";my $cfgfile=""; #configuration filemy $version=undef;my $help=0;my $dbg=0;my $jobs=4;my $mosescmd="$ENV{MOSESBIN}/moses"; #decoder in usemy $orifile=undef;my $testfile=undef;my $nbestfile=undef;my $orinbestfile=undef;my $nbest=undef;my $nbestflag=0;my $orilogfile="";my $logflag="";my $qsubname="MOSES";my $inputtype=0;my $old_sge = 0; # assume old Sun Grid Engine (<6.0) where qsub does not # implement -sync and -b######################## Command line options processingsub init(){ use Getopt::Long qw(:config pass_through no_ignore_case); GetOptions('version'=>\$version, 'help'=>\$help, 'debug'=>\$dbg, 'jobs=i'=>\$jobs, 'decoder=s'=> \$mosescmd, 'decoder-parameters=s'=> \$mosesparameters, 'logfile=s'=> \$orilogfile, 'i|inputfile|input-file=s'=> \$orifile, 'n-best-file=s'=> \$orinbestfile, 'n-best-size=i'=> \$nbest, 'qsub-prefix=s'=> \$qsubname, 'queue-parameters=s'=> \$queueparameters, 'inputtype=i'=> \$inputtype, 'config=s'=>\$cfgfile, 'old-sge' => \$old_sge, ) or exit(1); chomp($nbestfile=`basename $orinbestfile`) if defined $orinbestfile; chomp($testfile=`basename $orifile`) if defined $orifile; $mosesparameters.="@ARGV -config $cfgfile -inputtype $inputtype"; getNbestParameters(); getLogParameters();}#########################print versionsub version(){# print STDERR "version 1.0 (15-07-2006)\n";# print STDERR "version 1.1 (17-07-2006)\n";# print STDERR "version 1.2 (18-07-2006)\n";# print STDERR "version 1.3 (21-07-2006)\n";# print STDERR "version 1.4 (26-07-2006)\n";# print STDERR "version 1.5 (27-07-2006)\n";# print STDERR "version 1.6 (28-07-2006)\n";# print STDERR "version 1.7 (29-07-2006)\n";# print STDERR "version 1.8 (31-07-2006)\n";# print STDERR "version 1.9 (01-08-2006)\n";# print STDERR "version 1.10 (02-08-2006)\n";# print STDERR "version 1.11 (10-10-2006)\n";# print STDERR "version 1.12 (27-12-2006)\n"; print STDERR "version 1.13 (29-12-2006)\n"; exit(1);}#usagesub usage(){ print STDERR "moses-parallel.pl [parallel-options] [moses-options]\n"; print STDERR "Options marked (*) are required.\n"; print STDERR "Parallel options:\n"; print STDERR "* -decoder <file> Moses decoder to use\n"; print STDERR "* -i|inputfile|input-file <file> the input text to translate\n"; print STDERR "* -jobs <N> number of required jobs\n"; print STDERR " -qsub-prefix <string> name for sumbitte jobs\n"; print STDERR " -queue-parameters <string> specific requirements for queue\n"; print STDERR " -old-sge Assume Sun Grid Engine < 6.0\n"; print STDERR " -debug debug\n"; print STDERR " -version print version of the script\n"; print STDERR " -help this help\n"; print STDERR "Moses options:\n"; print STDERR " -inputtype <0|1> 0 for text, 1 for confusion networks\n"; print STDERR "* -config <cfgfile> configuration file\n"; print STDERR " -decoder-parameters <string> specific parameters for the decoder\n"; print STDERR "All other options are passed to Moses\n"; print STDERR " (This way to pass parameters is maintained for back compatibility\n"; print STDERR " but preferably use -decoder-parameters)\n"; exit(1);}#printparameterssub print_parameters(){ print STDERR "Inputfile: $orifile\n"; print STDERR "Logfile: $orilogfile\n"; print STDERR "Configuration file: $cfgfile\n"; print STDERR "Decoder in use: $mosescmd\n"; if ($nbestflag) { print STDERR "Nbest file: $orinbestfile\n"; print STDERR "Nbest size: $nbest\n"; } print STDERR "Number of jobs:$jobs\n"; print STDERR "Qsub name: $qsubname\n"; print STDERR "Queue parameters: $queueparameters\n"; print STDERR "Inputtype: text\n" if $inputtype == 0; print STDERR "Inputtype: confusion network\n" if $inputtype == 1; print STDERR "parameters directly passed to Moses: $mosesparameters\n";}#get parameters for log filesub getLogParameters(){ $logflag=1 if $orilogfile;}#get parameters for nbest computation from configuration filesub getNbestParameters(){ if ($orinbestfile) { $nbestflag=1; } else{ open (CFG, "$cfgfile"); while (chomp($_=<CFG>)){ if (/^\[n-best-list\]/){ chomp($orinbestfile=<CFG>); chomp($nbest=<CFG>); $nbestflag=1; last; } } close(CFG); }}########################Script starts hereinit();version() if $version;usage() if $help;if (!defined $orifile || !defined $mosescmd || ! defined $cfgfile) { print STDERR "Please specify -input-file, -decoder and -config\n"; usage();}#checking if inputfile existsif (! -e ${orifile} ){ print STDERR "Inputfile ($orifile) does not exists\n"; usage();}#checking if decoder existsif (! -e $mosescmd) { print STDERR "Decoder ($mosescmd) does not exists\n"; usage();}#checking if configfile existsif (! -e $cfgfile) { print STDERR "Configuration file ($cfgfile) does not exists\n"; usage();}print_parameters(); # so that people knowexit(1) if $dbg; # debug mode: just print and do not run#splitting test file in several parts#$decimal="-d"; #split does not accept this options (on MAC OS)my $decimal="";my $cmd;my $sentenceN;my $splitN;my @idxlist=();if ($inputtype==0){ #text input#getting the number of input sentences chomp($sentenceN=`wc -l ${orifile} | awk '{print \$1}' `);#Reducing the number of jobs if less sentences to translate if ($jobs>$sentenceN){ $jobs=$sentenceN; }#Computing the number of sentences for each files if ($sentenceN % $jobs == 0){ $splitN=int($sentenceN / $jobs); } else{ $splitN=int($sentenceN /$jobs) + 1; } if ($dbg){ print STDERR "There are $sentenceN sentences to translate\n"; print STDERR "There are at most $splitN sentences per job\n"; } $cmd="split $decimal -a 2 -l $splitN $orifile ${testfile}.$splitpfx-"; safesystem("$cmd") or die;}else{ #confusion network input my $tmpfile="/tmp/cnsplit$$"; $cmd="cat $orifile | perl -pe 's/\\n/ _CNendline_ /g;' | perl -pe 's/_CNendline_ _CNendline_ /_CNendline_\\n/g;' > $tmpfile"; safesystem("$cmd") or die;#getting the number of input CNs chomp($sentenceN=`wc -l $tmpfile | awk '{print \$1}' `);#Reducing the number of jobs if less CNs to translate if ($jobs>$sentenceN){ $jobs=$sentenceN; }#Computing the number of CNs for each files if ($sentenceN % $jobs == 0){ $splitN=int($sentenceN / $jobs); } else{ $splitN=int($sentenceN /$jobs) + 1; } if ($dbg){ print STDERR "There are $sentenceN confusion networks to translate\n"; print STDERR "There are at most $splitN sentences per job\n"; } $cmd="split $decimal -a 2 -l $splitN $tmpfile $tmpfile-"; safesystem("$cmd") or die; my @idxlist=(); chomp(@idxlist=`ls $tmpfile-*`); grep(s/.+(\-\S+)$/$1/e,@idxlist); foreach my $idx (@idxlist){ $cmd="perl -pe 's/ _CNendline_ /\\n/g;s/ _CNendline_/\\n/g;'"; safesystem("cat $tmpfile$idx | $cmd > ${testfile}.$splitpfx$idx ; rm $tmpfile$idx;"); }}chomp(@idxlist=`ls ${testfile}.$splitpfx-*`);grep(s/.+(\-\S+)$/$1/e,@idxlist);safesystem("mkdir -p $tmpdir") or die;preparing_script();#launching process through the queuemy @sgepids =();my $failure=0;foreach my $idx (@idxlist){
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -