📄 mert-moses.pl.svn-base
字号:
die "Moses gave us too many lambdas for '$name', we had ".scalar(@{$triples->{$name}}) ." but we got at least ".$idx{$name}+1 if !defined $triples->{$name}->[$idx{$name}]; # set the corresponding field in triples # print STDERR "Storing $i-th score as $name: $idx{$name}: $values->[$i]\n"; $triples->{$name}->[$idx{$name}]->[0] = $values->[$i]; }}sub dump_triples { my $triples = shift; foreach my $name (keys %$triples) { foreach my $triple (@{$triples->{$name}}) { my ($val, $min, $max) = @$triple; print STDERR "Triples: $name\t$val\t$min\t$max ($triple)\n"; } }}sub run_decoder { my ($triples, $parameters, $run, $output_order_of_lambdas, $need_to_normalize) = @_; my $filename_template = "run%d.best$___N_BEST_LIST_SIZE.out"; my $filename = sprintf($filename_template, $run); print "params = $parameters\n"; # prepare the decoder config: my $decoder_config = ""; my @vals = (); foreach my $name (keys %$triples) { $decoder_config .= "-$name "; foreach my $triple (@{$triples->{$name}}) { my ($val, $min, $max) = @$triple; $decoder_config .= "%.6f "; push @vals, $val; } } if ($need_to_normalize) { print STDERR "Normalizing lambdas: @vals\n"; my $totlambda=0; grep($totlambda+=abs($_),@vals); grep($_/=$totlambda,@vals); } print STDERR "DECODER_CFG = $decoder_config\n"; print STDERR " values = @vals\n"; $decoder_config = sprintf($decoder_config, @vals); print "decoder_config = $decoder_config\n"; # run the decoder my $nBest_cmd = "-n-best-size $___N_BEST_LIST_SIZE"; my $decoder_cmd; if (defined $___JOBS) { $decoder_cmd = "$moses_parallel_cmd $pass_old_sge -config $___CONFIG -inputtype $___INPUTTYPE -qsub-prefix mert$run -queue-parameters \"$queue_flags\" -decoder-parameters \"$parameters $decoder_config\" -n-best-file $filename -n-best-size $___N_BEST_LIST_SIZE -input-file $___DEV_F -jobs $___JOBS -decoder $___DECODER > run$run.out"; } else { $decoder_cmd = "$___DECODER $parameters -config $___CONFIG -inputtype $___INPUTTYPE $decoder_config -n-best-list $filename $___N_BEST_LIST_SIZE -i $___DEV_F > run$run.out"; } safesystem($decoder_cmd) or die "The decoder died. CONFIG WAS $decoder_config \n"; if (0 == scalar @$output_order_of_lambdas) { # we have to peek at the nbestlist return get_order_of_scores_from_nbestlist($filename); } else { # we have checked the nbestlist already, we trust the order of output scores does not change return @$output_order_of_lambdas; }}sub get_order_of_scores_from_nbestlist { # read the first line and interpret the ||| label: num num num label2: num ||| column in nbestlist # return the score labels in order my $fname_or_source = shift; print STDERR "Peeking at the beginning of nbestlist to get order of scores: $fname_or_source\n"; open IN, $fname_or_source or die "Failed to get order of scores from nbestlist '$fname_or_source'"; my $line = <IN>; close IN; die "Line empty in nbestlist '$fname_or_source'" if !defined $line; my ($sent, $hypo, $scores, $total) = split /\|\|\|/, $line; $scores =~ s/^\s*|\s*$//g; die "No scores in line: $line" if $scores eq ""; my @order = (); my $label = undef; foreach my $tok (split /\s+/, $scores) { if ($tok =~ /^([a-z][0-9a-z]*):/i) { $label = $1; } elsif ($tok =~ /^-?[-0-9.e]+$/) { # a score found, remember it die "Found a score but no label before it! Bad nbestlist '$fname_or_source'!" if !defined $label; push @order, $label; } else { die "Not a label, not a score '$tok'. Failed to parse the scores string: '$scores' of nbestlist '$fname_or_source'"; } } print STDERR "The decoder returns the scores in this order: @order\n"; return @order;}sub create_config { my $infn = shift; # source config my $outfn = shift; # where to save the config my $triples = shift; # the lambdas we should write my $iteration = shift; # just for verbosity my $bleu_achieved = shift; # just for verbosity my %P; # the hash of all parameters we wish to override # first convert the command line parameters to the hash { # ensure local scope of vars my $parameter=undef; print "Parsing --decoder-flags: |$___DECODER_FLAGS|\n"; $___DECODER_FLAGS =~ s/^\s*|\s*$//; $___DECODER_FLAGS =~ s/\s+/ /; foreach (split(/ /,$___DECODER_FLAGS)) { if (/^\-([^\d].*)$/) { $parameter = $1; $parameter = $ABBR2FULL{$parameter} if defined($ABBR2FULL{$parameter}); } else { die "Found value with no -paramname before it: $_" if !defined $parameter; push @{$P{$parameter}},$_; } } } # Convert weights to elements in P foreach my $abbr (keys %$triples) { # First delete all weights params from the input, in short or long-named version delete($P{$abbr}); delete($P{$ABBR2FULL{$abbr}}); # Then feed P with the current values foreach my $feature (@{$used_triples{$abbr}}) { my ($val, $min, $max) = @$feature; my $name = defined $ABBR2FULL{$abbr} ? $ABBR2FULL{$abbr} : $abbr; push @{$P{$name}}, $val; } } # create new moses.ini decoder config file by cloning and overriding the original one open(INI,$infn) or die "Can't read $infn"; delete($P{"config"}); # never output print "Saving new config to: $outfn\n"; open(OUT,"> $outfn") or die "Can't write $outfn"; print OUT "# MERT optimized configuration\n"; print OUT "# decoder $___DECODER\n"; print OUT "# BLEU $bleu_achieved on dev $___DEV_F\n"; print OUT "# We were before running iteration $iteration\n"; print OUT "# finished ".`date`; my $line = <INI>; while(1) { last unless $line; # skip until hit [parameter] if ($line !~ /^\[(.+)\]\s*$/) { $line = <INI>; print OUT $line if $line =~ /^\#/ || $line =~ /^\s+$/; next; } # parameter name my $parameter = $1; $parameter = $ABBR2FULL{$parameter} if defined($ABBR2FULL{$parameter}); print OUT "[$parameter]\n"; # change parameter, if new values if (defined($P{$parameter})) { # write new values foreach (@{$P{$parameter}}) { print OUT $_."\n"; } delete($P{$parameter}); # skip until new parameter, only write comments while($line = <INI>) { print OUT $line if $line =~ /^\#/ || $line =~ /^\s+$/; last if $line =~ /^\[/; last unless $line; } next; } # unchanged parameter, write old while($line = <INI>) { last if $line =~ /^\[/; print OUT $line; } } # write all additional parameters foreach my $parameter (keys %P) { print OUT "\n[$parameter]\n"; foreach (@{$P{$parameter}}) { print OUT $_."\n"; } } close(INI); close(OUT); print STDERR "Saved: $outfn\n";}sub safesystem { print STDERR "Executing: @_\n"; system(@_); if ($? == -1) { print STDERR "Failed to execute: @_\n $!\n"; exit(1); } elsif ($? & 127) { printf STDERR "Execution of: @_\n died with signal %d, %s coredump\n", ($? & 127), ($? & 128) ? 'with' : 'without'; exit(1); } else { my $exitcode = $? >> 8; print STDERR "Exit code: $exitcode\n" if $exitcode; return ! $exitcode; }}sub ensure_full_path { my $PATH = shift;$PATH =~ s/\/nfsmnt//; return $PATH if $PATH =~ /^\//; my $dir = `pawd 2>/dev/null`; if(!$dir){$dir = `pwd`;} chomp($dir); $PATH = $dir."/".$PATH; $PATH =~ s/[\r\n]//g; $PATH =~ s/\/\.\//\//g; $PATH =~ s/\/+/\//g; my $sanity = 0; while($PATH =~ /\/\.\.\// && $sanity++<10) { $PATH =~ s/\/+/\//g; $PATH =~ s/\/[^\/]+\/\.\.\//\//g; } $PATH =~ s/\/[^\/]+\/\.\.$//; $PATH =~ s/\/+$//;$PATH =~ s/\/nfsmnt//; return $PATH;}sub scan_config { my $ini = shift; my $inishortname = $ini; $inishortname =~ s/^.*\///; # for error reporting # we get a pre-filled counts, because some lambdas are always needed (word penalty, for instance) # as we walk though the ini file, we record how many extra lambdas do we need # and finally, we report it # in which field (counting from zero) is the filename to check? my %where_is_filename = ( "ttable-file" => 3, "generation-file" => 3, "lmodel-file" => 3, "distortion-file" => 3, ); # by default, each line of each section means one lambda, but some sections # explicitly state a custom number of lambdas my %where_is_lambda_count = ( "ttable-file" => 2, "generation-file" => 2, "distortion-file" => 2, ); open INI, $ini or die "Can't read $ini"; my $section = undef; # name of the section we are reading my $shortname = undef; # the corresponding short name my $nr = 0; my $error = 0; my %defined_files; my %defined_steps; # check the ini file for compatible mapping steps and actually defined files while (<INI>) { $nr++; next if /^\s*#/; # skip comments if (/^\[([^\]]*)\]\s*$/) { $section = $1; $shortname = $TABLECONFIG2ABBR{$section}; next; } if (defined $section && $section eq "mapping") { # keep track of mapping steps used $defined_steps{$1}++ if /^([TG])/ || /^\d+ ([TG])/; } if (defined $section && defined $where_is_filename{$section}) { # this ini section is relevant to lambdas chomp; my @flds = split / +/; my $fn = $flds[$where_is_filename{$section}]; if (defined $fn && $fn !~ /^\s+$/) { print "checking weight-count for $section\n"; # this is a filename! check it if ($fn !~ /^\//) { $error = 1; print STDERR "$inishortname:$nr:Filename not absolute: $fn\n"; } if (! -s $fn && ! -s "$fn.gz") { $error = 1; print STDERR "$inishortname:$nr:File does not exist or empty: $fn\n"; } # remember the number of files used, to know how many lambdas do we need die "No short name was defined for section $section!" if ! defined $shortname; # how many lambdas does this model need? # either specified explicitly, or the default, i.e. one my $needlambdas = defined $where_is_lambda_count{$section} ? $flds[$where_is_lambda_count{$section}] : 1; print STDERR "Config needs $needlambdas lambdas for $section (i.e. $shortname)\n" if $verbose; if (!defined $___LAMBDA && (!defined $additional_triples->{$shortname} || scalar(@{$additional_triples->{$shortname}}) < $needlambdas)) { print STDERR "$inishortname:$nr:Your model $shortname needs $needlambdas weights but we define the default ranges for only " .scalar(@{$additional_triples->{$shortname}})." weights. Cannot use the default, you must supply lambdas by hand.\n"; $error = 1; } else { # note: table may use less parameters than the maximum number # of triples for(my $lambda=0;$lambda<$needlambdas;$lambda++) { my ($start, $min, $max) = @{${$additional_triples->{$shortname}}[$lambda]}; push @{$used_triples{$shortname}}, [$start, $min, $max]; } } $defined_files{$shortname}++; } } } die "$inishortname: File was empty!" if !$nr; close INI; for my $pair (qw/T=tm=translation G=g=generation/) { my ($tg, $shortname, $label) = split /=/, $pair; $defined_files{$shortname} = 0 if ! defined $defined_files{$shortname}; $defined_steps{$tg} = 0 if ! defined $defined_steps{$tg}; if ($defined_files{$shortname} != $defined_steps{$tg}) { print STDERR "$inishortname: You defined $defined_files{$shortname} files for $label but use $defined_steps{$tg} in [mapping]!\n"; $error = 1; } } # distance-based distortion if ($___ASYNC == 1) { print STDERR "ASYNC distortion & word penalty"; my @my_array; for(my $i=0 ; $i < $defined_steps{"T"} ; $i++) { push @my_array, [ 1.0, 0.0, 2.0 ]; } push @{$used_triples{"d"}}, @my_array; @my_array = (); for(my $i=0 ; $i < $defined_steps{"T"} ; $i++) { push @my_array, [ 0.5, -1.0, 1.0 ]; } push @{$used_triples{"w"}}, @my_array; # debug print print "distortion:"; my $refarray=$used_triples{"d"}; my @vector=@$refarray; foreach my $subarray (@vector) { my @toto=@$subarray; print @toto,"\n"; } #exit 1; } else { print STDERR "SYNC distortion"; push @{$used_triples{"d"}}, [1.0, 0.0, 2.0]; } exit(1) if $error; return (\%defined_files);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -