📄 mert-moses.pl.svn-base
字号:
die "No default weights defined for -$name" if !defined $default_triples->{$name}; # XXX here was a deadly bug: we need a deep copy of the default values my @copy = (); foreach my $triple (@{$default_triples->{$name}}) { my @copy_triple = @$triple; push @copy, [ @copy_triple ]; } push @{$use_triples->{$name}}, @copy; } }}# moses should use our configif ($___DECODER_FLAGS =~ /(^|\s)-(config|f) /|| $___DECODER_FLAGS =~ /(^|\s)-(ttable-file|t) /|| $___DECODER_FLAGS =~ /(^|\s)-(distortion-file) /|| $___DECODER_FLAGS =~ /(^|\s)-(generation-file) /|| $___DECODER_FLAGS =~ /(^|\s)-(lmodel-file) /) { die "It is forbidden to supply any of -config, -ttable-file, -distortion-file, -generation-file or -lmodel-file in the --decoder-flags.\nPlease use only the --config option to give the config file that lists all the supplementary files.";}# walk through all lambdas the user wishes to optimize and check# if the number of lambdas matchesforeach my $name (keys %$use_triples) { my $expected_lambdas = $lambdas_per_model->{$name}; $expected_lambdas = 0 if !defined $expected_lambdas; my $got_lambdas = defined $use_triples->{$name} ? scalar @{$use_triples->{$name}} : 0; if ($got_lambdas != $expected_lambdas) { if ($allow_unknown_lambdas && $expected_lambdas == 0) { print STDERR "Allowing to optimize $name, although I have no idea what it is.\n"; } else { print STDERR "Wrong number of lambdas for $name. Expected (given the config file): $expected_lambdas, got: $got_lambdas.Use --allow-unknown-lambdas to optimize lambdas that you are just introducingand I cannot validate against the models mentioned in moses.ini.\n"; exit 1; } }}# as weights are normalized in the next steps (by cmert)# normalize initial LAMBDAs, toomy $need_to_normalize = 1;my @order_of_lambdas_from_decoder = ();# this will store the labels of scores coming out of the decoder (and hence the order of lambdas coming out of mert)# we will use the array to interpret the lambdas# the array gets filled with labels only after first nbestlist was generated#store current directory and create the working directory (if needed)my $cwd = `pawd 2>/dev/null`; if(!$cwd){$cwd = `pwd`;}chomp($cwd);safesystem("mkdir -p $___WORKING_DIR") or die "Can't mkdir $___WORKING_DIR";{# open local scope#chdir to the working directorychdir($___WORKING_DIR) or die "Can't chdir to $___WORKING_DIR";# set start runmy $start_run = 1;if ($continue) { # need to load last best values print STDERR "Trying to continue an interrupted optimization.\n"; open IN, "finished_step.txt" or die "Failed to find the step number, failed to read finished_step.txt"; my $step = <IN>; chomp $step; $step++; close IN; if (! -e "run$step.best$___N_BEST_LIST_SIZE.out.gz") { # allow stepping one extra iteration back $step--; die "Can't start from step $step, because run$step.best$___N_BEST_LIST_SIZE.out.gz was not found!" if ! -e "run$step.best$___N_BEST_LIST_SIZE.out.gz"; } $start_run = $step +1; print STDERR "Reading last cached lambda values (result from step $step)\n"; @order_of_lambdas_from_decoder = get_order_of_scores_from_nbestlist("gunzip -c < run$step.best$___N_BEST_LIST_SIZE.out.gz |"); open IN, "weights.txt" or die "Can't read weights.txt"; my $newweights = <IN>; chomp $newweights; close IN; my @newweights = split /\s+/, $newweights; # dump_triples($use_triples); $use_triples = store_new_lambda_values($use_triples, \@order_of_lambdas_from_decoder, \@newweights); # dump_triples($use_triples);}if ($___FILTER_PHRASE_TABLE){ # filter the phrase tables wih respect to input, use --decoder-flags print "filtering the phrase tables... ".`date`; my $cmd = "$filtercmd ./filtered $___CONFIG $___DEV_F"; if (defined $___JOBS) { safesystem("$qsubwrapper $pass_old_sge -command='$cmd' -queue-parameter=\"$queue_flags\" -stdout=filterphrases.out -stderr=filterphrases.err" ) or die "Failed to submit filtering of tables to the queue (via $qsubwrapper)"; } else { safesystem($cmd) or die "Failed to filter the tables."; } # the decoder should now use the filtered model $___CONFIG = "filtered/moses.ini";}else{ # do not filter phrase tables (useful if binary phrase tables are available) # use the original configuration file}my $PARAMETERS;$PARAMETERS = $___DECODER_FLAGS . " -config $___CONFIG -inputtype $___INPUTTYPE";my $devbleu = undef;my $bestpoint = undef;my $run=$start_run-1;my $prev_size = -1;while(1) { $run++; # run beamdecoder with option to output nbestlists # the end result should be (1) @NBEST_LIST, a list of lists; (2) @SCORE, a list of lists of lists print "run $run start at ".`date`; # In case something dies later, we might wish to have a copy create_config($___CONFIG, "./run$run.moses.ini", $use_triples, $run, (defined$devbleu?$devbleu:"--not-estimated--")); # skip if the user wanted if (!$skip_decoder) { print "($run) run decoder to produce n-best lists\n"; @order_of_lambdas_from_decoder = run_decoder($use_triples, $PARAMETERS, $run, \@order_of_lambdas_from_decoder, $need_to_normalize); $need_to_normalize = 0; safesystem("gzip -f run*out") or die "Failed to gzip run*out"; } else { print "skipped decoder run\n"; if (0 == scalar @order_of_lambdas_from_decoder) { @order_of_lambdas_from_decoder = get_order_of_scores_from_nbestlist("gunzip -dc run*.best*.out.gz | head -1 |"); } $skip_decoder = 0; $need_to_normalize = 0; } my $EFF_REF_LEN = ""; if ($___AVERAGE) { $EFF_REF_LEN = "-a"; }elsif ($___CLOSEST){ $EFF_REF_LEN = "-e"; } my $EFF_NORM = ""; if ($___NONORM) { $EFF_NORM = "-n"; } # To be sure that scoring script produses these fresh: safesystem("rm -f cands.opt feats.opt") or die; # convert n-best list into a numberized format with error scores print STDERR "Scoring the nbestlist.\n"; my $cmd = "export PYTHONPATH=$pythonpath ; gunzip -dc run*.best*.out.gz | sort -n -t \"|\" -k 1,1 | $SCORENBESTCMD $EFF_NORM $EFF_REF_LEN ".join(" ", @references)." ./"; if (defined $___JOBS) { safesystem("$qsubwrapper $pass_old_sge -command='$cmd' -queue-parameter=\"$queue_flags\" -stdout=scorenbest.out -stderr=scorenbest.err") or die "Failed to submit scoring nbestlist to queue (via $qsubwrapper)"; } else { safesystem($cmd) or die "Failed to score nbestlist"; } print STDERR "Hoping that scoring succeeded. We'll see if we can read the output files now.\n"; # keep a count of lines in nbests lists (alltogether) # if it did not increase since last iteration, we are DONE open(IN,"cands.opt") or die "Can't read cands.opt"; my $size=0; while (<IN>) { chomp; my @flds = split / /; $size += $flds[1]; } close(IN); print "$size accumulated translations\n"; print "prev accumulated translations was : $prev_size\n"; if ($size <= $prev_size){ print STDERR "No new hypotheses in nbest list. Stopping.\n"; last; } $prev_size = $size; # run cmert # cmert reads in the file init.opt containing three lines: # minimum values # maximum values # current values # We need to prepare the files and **the order of the lambdas must # correspond to the order @order_of_lambdas_from_decoder my @MIN = (); # lower bounds my @MAX = (); # upper bounds my @CURR = (); # the starting values my @NAME = (); # to which model does the lambda belong # walk in order of @order_of_lambdas_from_decoder and collect the min,max,val my %visited = (); foreach my $name (@order_of_lambdas_from_decoder) { next if $visited{$name}; $visited{$name} = 1; die "The decoder produced also some '$name' scores, but we do not know the ranges for them, no way to optimize them\n" if !defined $use_triples->{$name}; foreach my $feature (@{$use_triples->{$name}}) { my ($val, $min, $max) = @$feature; push @CURR, $val; push @MIN, $min; push @MAX, $max; push @NAME, $name; } } open(OUT,"> init.opt") or die "Can't write init.opt (WD now $___WORKING_DIR)"; print OUT join(" ", @MIN)."\n"; print OUT join(" ", @MAX)."\n"; print OUT join(" ", @CURR)."\n"; close(OUT); #just for brevity open(OUT,"> names.txt") or die "Can't write names.txt (WD now $___WORKING_DIR)"; print OUT join(" ", @NAME)."\n"; close(OUT); # make a backup copy labelled with this run number safesystem("cp init.opt run$run.init.opt") or die; my $DIM = scalar(@CURR); # number of lambdas $cmd="$cmertcmd -d $DIM"; print STDERR "Starting cmert.\n"; if (defined $___JOBS) { safesystem("$qsubwrapper $pass_old_sge -command='$cmd' -stderr=cmert.log -queue-parameter=\"$queue_flags\"") or die "Failed to start cmert (via qsubwrapper $qsubwrapper)"; } else { safesystem("$cmd 2> cmert.log") or die "Failed to run cmert"; } die "Optimization failed, file weights.txt does not exist or is empty" if ! -s "weights.txt"; # backup copies safesystem ("cp cmert.log run$run.cmert.log") or die; safesystem ("cp weights.txt run$run.weights.txt") or die; # this one is needed for restarts, too print "run $run end at ".`date`; $bestpoint = undef; $devbleu = undef; open(IN,"cmert.log") or die "Can't open cmert.log"; while (<IN>) { if (/Best point:\s*([\s\d\.\-]+?)\s*=> ([\d\.]+)/) { $bestpoint = $1; $devbleu = $2; last; } } close IN; die "Failed to parse cmert.log, missed Best point there." if !defined $bestpoint || !defined $devbleu; print "($run) BEST at $run: $bestpoint => $devbleu at ".`date`; my @newweights = split /\s+/, $bestpoint; # update my cache of lambda values $use_triples = store_new_lambda_values($use_triples, \@order_of_lambdas_from_decoder, \@newweights); ## additional stopping criterion: weights have not changed my $shouldstop = 1; for(my $i=0; $i<@CURR; $i++) { die "Lost weight! cmert reported fewer weights (@newweights) than we gave it (@CURR)" if !defined $newweights[$i]; if (abs($CURR[$i] - $newweights[$i]) >= $minimum_required_change_in_weights) { $shouldstop = 0; last; } } open F, "> finished_step.txt" or die "Can't mark finished step"; print F $run."\n"; close F; if ($shouldstop) { print STDERR "None of the weights changed more than $minimum_required_change_in_weights. Stopping.\n"; last; }}print "Training finished at ".`date`;safesystem("cp init.opt run$run.init.opt") or die;safesystem ("cp cmert.log run$run.cmert.log") or die;create_config($___CONFIG, "./moses.ini", $use_triples, $run, $devbleu);# just to be sure that we have the really last finished step markedopen F, "> finished_step.txt" or die "Can't mark finished step";print F $run."\n";close F;#chdir back to the original directory # useless, just to remind we were not therechdir($cwd);} # end of local scopesub store_new_lambda_values { # given new lambda values (in given order), replace the 'val' element in our triples my $triples = shift; my $names = shift; my $values = shift; my %idx = (); foreach my $i (0..scalar(@$values)-1) { my $name = $names->[$i]; die "Missed name for lambda $values->[$i] (in @$values; names: @$names)" if !defined $name; if (!defined $idx{$name}) { $idx{$name} = 0; } else { $idx{$name}++;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -