📄 moses-parallel.pl.svn-base
字号:
print STDERR "qsub $queueparameters -b no -j yes -o $qsubout$idx -e $qsuberr$idx -N $qsubname$idx ${jobscript}${idx}.bash\n" if $dbg; $cmd="qsub $queueparameters -b no -j yes -o $qsubout$idx -e $qsuberr$idx -N $qsubname$idx ${jobscript}${idx}.bash >& ${jobscript}${idx}.log"; safesystem($cmd) or die; my ($res,$id); open (IN,"${jobscript}${idx}.log") or die "Can't read id of job ${jobscript}${idx}.log"; chomp($res=<IN>); split(/\s+/,$res); $id=$_[2]; close(IN); push @sgepids, $id;}#waiting until all jobs have finishedmy $hj = "-hold_jid " . join(" -hold_jid ", @sgepids);if ($old_sge) { # we need to implement our own waiting script safesystem("echo 'date' > sync_workaround_script.sh") or kill_all_and_quit(); my $pwd = `$pwdcmd`; chomp $pwd; my $checkpointfile = "sync_workaround_checkpoint"; # delete previous checkpoint, if left from previous runs safesystem("rm -f $checkpointfile") or kill_all_and_quit(); # start the 'hold' job, i.e. the job that will wait $cmd="qsub -cwd $queueparameters $hj -o $checkpointfile -e /dev/null -N $qsubname.W $pwd/sync_workaround_script.sh >& $qsubname.W.log"; safesystem($cmd) or kill_all_and_quit(); # and wait for checkpoint file to appear my $nr=0; while (!-e $checkpointfile) { sleep(10); $nr++; print STDERR "w" if $nr % 3 == 0; } print STDERR "End of waiting.\n"; safesystem("rm -f $checkpointfile sync_workaround_script.sh") or kill_all_and_quit(); my $failure = 1; my $nr = 0; while ($nr < 60 && $failure) { $nr ++; $failure=&check_exit_status(); if (!$failure) { $failure = ! check_translation(); } last if !$failure; print STDERR "Extra wait ($nr) for possibly unfinished processes.\n"; sleep 10; }} else { # use the -sync option for qsub $cmd="qsub $queueparameters -sync y $hj -j y -o /dev/null -e /dev/null -N $qsubname.W -b y /bin/ls >& $qsubname.W.log"; safesystem($cmd) or kill_all_and_quit(); $failure=&check_exit_status();}kill_all_and_quit() if $failure;check_translation();#concatenating translations and removing temporary filesconcatenate_1best();concatenate_logs() if $logflag;concatenate_nbest() if $nbestflag; remove_temporary_files();#script creationsub preparing_script(){ foreach my $idx (@idxlist){ my $scriptheader="\#\! /bin/bash\n\n"; $scriptheader.="uname -a\n\n"; $scriptheader.="cd $workingdir\n\n"; open (OUT, "> ${jobscript}${idx}.bash"); print OUT $scriptheader; if ($nbestflag){ print OUT "$mosescmd $mosesparameters -n-best-list $tmpdir/${nbestfile}.$splitpfx$idx $nbest -input-file ${testfile}.$splitpfx$idx > $tmpdir/${testfile}.$splitpfx$idx.trans\n\n"; print OUT "echo exit status \$\?\n\n"; print OUT "mv $tmpdir/${nbestfile}.$splitpfx$idx .\n\n"; print OUT "echo exit status \$\?\n\n"; }else{ print OUT "$mosescmd $mosesparameters -input-file ${testfile}.$splitpfx$idx > $tmpdir/${testfile}.$splitpfx$idx.trans\n\n"; } print OUT "mv $tmpdir/${testfile}.$splitpfx$idx.trans .\n\n"; print OUT "echo exit status \$\?\n\n"; close(OUT); #setting permissions of each script chmod(oct(755),"${jobscript}${idx}.bash"); }}sub concatenate_nbest(){ my $oldcode=""; my $newcode=-1; my %inplength = (); my $offset = 0; # get the list of feature and set a fictitious string with zero scores open (IN, "${nbestfile}.${splitpfx}$idxlist[0]"); my $str = <IN>; chomp($str); close(IN); my ($code,$trans,$featurescores,$globalscore)=split(/\|\|\|/,$str); my $emptytrans = " "; my $emptyglobalscore = " 0.0"; my $emptyfeaturescores = $featurescores; $emptyfeaturescores =~ s/[-0-9\.]+/0/g; open (OUT, "> ${orinbestfile}"); foreach my $idx (@idxlist){#computing the length of each input file my @in=(); open (IN, "${testfile}.${splitpfx}${idx}.trans"); @in=<IN>; close(IN); $inplength{$idx} = scalar(@in); open (IN, "${nbestfile}.${splitpfx}${idx}"); while (<IN>){ my ($code,@extra)=split(/\|\|\|/,$_); $code += $offset; if ($code ne $oldcode){# if there is a jump between two consecutive codes# it means that an input sentence is not translated# fill this hole with a "fictitious" list of translation# comprising just one "emtpy translation" with zero scores while ($code - $oldcode > 1){ $oldcode++; print OUT join("\|\|\|",($oldcode,$emptytrans,$emptyfeaturescores,$emptyglobalscore)),"\n"; } } $oldcode=$code; print OUT join("\|\|\|",($oldcode,@extra)); } close(IN); $offset += $inplength{$idx}; while ($offset - $oldcode > 1){ $oldcode++; print OUT join("\|\|\|",($oldcode,$emptytrans,$emptyfeaturescores,$emptyglobalscore)),"\n"; } } close(OUT);}sub concatenate_1best(){ foreach my $idx (@idxlist){ my @in=(); open (IN, "${testfile}.${splitpfx}${idx}.trans"); @in=<IN>; print STDOUT "@in"; close(IN); }}sub concatenate_logs(){ open (OUT, "> ${orilogfile}"); foreach my $idx (@idxlist){ my @in=(); open (IN, "$qsubout$idx"); @in=<IN>; print OUT "@in"; close(IN); } close(OUT);}sub check_exit_status(){ print STDERR "check_exit_status\n"; my $failure=0; foreach my $idx (@idxlist){ print STDERR "check_exit_status of job $idx\n"; open(IN,"$qsubout$idx"); while (<IN>){ $failure=1 if (/exit status 1/); } close(IN); } return $failure;}sub kill_all_and_quit(){ print STDERR "Got interrupt or something failed.\n"; print STDERR "kill_all_and_quit\n"; foreach my $id (@sgepids){ print STDERR "qdel $id\n"; safesystem("qdel $id"); } print STDERR "Translation was not performed correctly\n"; print STDERR "or some of the submitted jobs died.\n"; print STDERR "qdel function was called for all submitted jobs\n"; exit(1);}sub check_translation(){ #checking if all sentences were translated my $inputN; my $outputN; foreach my $idx (@idxlist){ if ($inputtype==0){#text input chomp($inputN=`wc -l ${testfile}.$splitpfx$idx | cut -d' ' -f1`); } else{ chomp($inputN=`cat ${testfile}.$splitpfx$idx | perl -pe 's/\\n/ _CNendline_ /g;' | perl -pe 's/_CNendline_ _CNendline_ /_CNendline_\\n/g;' | wc -l | cut -d' ' -f1 `); } chomp($outputN=`wc -l ${testfile}.$splitpfx$idx.trans | cut -d' ' -f1`); if ($inputN != $outputN){ print STDERR "Split ($idx) were not entirely translated\n"; print STDERR "outputN=$outputN inputN=$inputN\n"; print STDERR "outputfile=${testfile}.$splitpfx$idx.trans inputfile=${testfile}.$splitpfx$idx\n"; return 0; } } return 1;}sub remove_temporary_files(){ #removing temporary files foreach my $idx (@idxlist){ unlink("${testfile}.${splitpfx}${idx}.trans"); unlink("${testfile}.${splitpfx}${idx}"); if ($nbestflag){ unlink("${nbestfile}.${splitpfx}${idx}"); } unlink("${jobscript}${idx}.bash"); unlink("${jobscript}${idx}.log"); unlink("$qsubname.W.log"); unlink("$qsubout$idx"); unlink("$qsuberr$idx"); rmdir("$tmpdir"); }}sub safesystem { print STDERR "Executing: @_\n"; system(@_); if ($? == -1) { print STDERR "Failed to execute: @_\n $!\n"; exit(1); } elsif ($? & 127) { printf STDERR "Execution of: @_\n died with signal %d, %s coredump\n", ($? & 127), ($? & 128) ? 'with' : 'without'; exit 1; } else { my $exitcode = $? >> 8; print STDERR "Exit code: $exitcode\n" if $exitcode; return ! $exitcode; }}# look for the correct pwdcmd (pwd by default, pawd if it exists)# I assume that pwd always existssub getPwdCmd(){ my $pwdcmd="pwd"; my $a; chomp($a=`which pawd | head -1 | awk '{print $1}'`); if ($a && -e $a){ $pwdcmd=$a; } return $pwdcmd;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -