⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mert-moses.pl.svn-base

📁 moses开源的机器翻译系统
💻 SVN-BASE
📖 第 1 页 / 共 3 页
字号:
    die "Moses gave us too many lambdas for '$name', we had ".scalar(@{$triples->{$name}})      ." but we got at least ".$idx{$name}+1      if !defined $triples->{$name}->[$idx{$name}];    # set the corresponding field in triples    # print STDERR "Storing $i-th score as $name: $idx{$name}: $values->[$i]\n";    $triples->{$name}->[$idx{$name}]->[0] = $values->[$i];  }}sub dump_triples {  my $triples = shift;  foreach my $name (keys %$triples) {    foreach my $triple (@{$triples->{$name}}) {      my ($val, $min, $max) = @$triple;      print STDERR "Triples:  $name\t$val\t$min\t$max    ($triple)\n";    }  }}sub run_decoder {    my ($triples, $parameters, $run, $output_order_of_lambdas, $need_to_normalize) = @_;    my $filename_template = "run%d.best$___N_BEST_LIST_SIZE.out";    my $filename = sprintf($filename_template, $run);        print "params = $parameters\n";    # prepare the decoder config:    my $decoder_config = "";    my @vals = ();    foreach my $name (keys %$triples) {      $decoder_config .= "-$name ";      foreach my $triple (@{$triples->{$name}}) {        my ($val, $min, $max) = @$triple;        $decoder_config .= "%.6f ";        push @vals, $val;      }    }    if ($need_to_normalize) {      print STDERR "Normalizing lambdas: @vals\n";      my $totlambda=0;      grep($totlambda+=abs($_),@vals);      grep($_/=$totlambda,@vals);    }    print STDERR "DECODER_CFG = $decoder_config\n";    print STDERR "     values = @vals\n";    $decoder_config = sprintf($decoder_config, @vals);    print "decoder_config = $decoder_config\n";    # run the decoder	my $nBest_cmd = "-n-best-size $___N_BEST_LIST_SIZE";    my $decoder_cmd;    if (defined $___JOBS) {      $decoder_cmd = "$moses_parallel_cmd $pass_old_sge -config $___CONFIG -inputtype $___INPUTTYPE -qsub-prefix mert$run -queue-parameters \"$queue_flags\" -decoder-parameters \"$parameters $decoder_config\" -n-best-file $filename -n-best-size $___N_BEST_LIST_SIZE -input-file $___DEV_F -jobs $___JOBS -decoder $___DECODER > run$run.out";    } else {      $decoder_cmd = "$___DECODER $parameters  -config $___CONFIG -inputtype $___INPUTTYPE $decoder_config -n-best-list $filename $___N_BEST_LIST_SIZE -i $___DEV_F > run$run.out";    }    safesystem($decoder_cmd) or die "The decoder died. CONFIG WAS $decoder_config \n";    if (0 == scalar @$output_order_of_lambdas) {      # we have to peek at the nbestlist      return get_order_of_scores_from_nbestlist($filename);    } else {      # we have checked the nbestlist already, we trust the order of output scores does not change      return @$output_order_of_lambdas;    }}sub get_order_of_scores_from_nbestlist {  # read the first line and interpret the ||| label: num num num label2: num ||| column in nbestlist  # return the score labels in order  my $fname_or_source = shift;  print STDERR "Peeking at the beginning of nbestlist to get order of scores: $fname_or_source\n";  open IN, $fname_or_source or die "Failed to get order of scores from nbestlist '$fname_or_source'";  my $line = <IN>;  close IN;  die "Line empty in nbestlist '$fname_or_source'" if !defined $line;  my ($sent, $hypo, $scores, $total) = split /\|\|\|/, $line;  $scores =~ s/^\s*|\s*$//g;  die "No scores in line: $line" if $scores eq "";  my @order = ();  my $label = undef;  foreach my $tok (split /\s+/, $scores) {    if ($tok =~ /^([a-z][0-9a-z]*):/i) {      $label = $1;    } elsif ($tok =~ /^-?[-0-9.e]+$/) {      # a score found, remember it      die "Found a score but no label before it! Bad nbestlist '$fname_or_source'!"        if !defined $label;      push @order, $label;    } else {      die "Not a label, not a score '$tok'. Failed to parse the scores string: '$scores' of nbestlist '$fname_or_source'";    }  }  print STDERR "The decoder returns the scores in this order: @order\n";  return @order;}sub create_config {    my $infn = shift; # source config    my $outfn = shift; # where to save the config    my $triples = shift; # the lambdas we should write    my $iteration = shift;  # just for verbosity    my $bleu_achieved = shift; # just for verbosity    my %P; # the hash of all parameters we wish to override    # first convert the command line parameters to the hash    { # ensure local scope of vars	my $parameter=undef;	print "Parsing --decoder-flags: |$___DECODER_FLAGS|\n";        $___DECODER_FLAGS =~ s/^\s*|\s*$//;        $___DECODER_FLAGS =~ s/\s+/ /;	foreach (split(/ /,$___DECODER_FLAGS)) {	    if (/^\-([^\d].*)$/) {		$parameter = $1;		$parameter = $ABBR2FULL{$parameter} if defined($ABBR2FULL{$parameter});	    }	    else {                die "Found value with no -paramname before it: $_"                  if !defined $parameter;		push @{$P{$parameter}},$_;	    }	}    }    # Convert weights to elements in P    foreach my $abbr (keys %$triples) {      # First delete all weights params from the input, in short or long-named version      delete($P{$abbr});      delete($P{$ABBR2FULL{$abbr}});      # Then feed P with the current values      foreach my $feature (@{$used_triples{$abbr}}) {        my ($val, $min, $max) = @$feature;        my $name = defined $ABBR2FULL{$abbr} ? $ABBR2FULL{$abbr} : $abbr;        push @{$P{$name}}, $val;      }    }    # create new moses.ini decoder config file by cloning and overriding the original one    open(INI,$infn) or die "Can't read $infn";    delete($P{"config"}); # never output     print "Saving new config to: $outfn\n";    open(OUT,"> $outfn") or die "Can't write $outfn";    print OUT "# MERT optimized configuration\n";    print OUT "# decoder $___DECODER\n";    print OUT "# BLEU $bleu_achieved on dev $___DEV_F\n";    print OUT "# We were before running iteration $iteration\n";    print OUT "# finished ".`date`;    my $line = <INI>;    while(1) {	last unless $line;	# skip until hit [parameter]	if ($line !~ /^\[(.+)\]\s*$/) { 	    $line = <INI>;	    print OUT $line if $line =~ /^\#/ || $line =~ /^\s+$/;	    next;	}	# parameter name	my $parameter = $1;	$parameter = $ABBR2FULL{$parameter} if defined($ABBR2FULL{$parameter});	print OUT "[$parameter]\n";	# change parameter, if new values	if (defined($P{$parameter})) {	    # write new values	    foreach (@{$P{$parameter}}) {		print OUT $_."\n";	    }	    delete($P{$parameter});	    # skip until new parameter, only write comments	    while($line = <INI>) {		print OUT $line if $line =~ /^\#/ || $line =~ /^\s+$/;		last if $line =~ /^\[/;		last unless $line;	    }	    next;	}		# unchanged parameter, write old	while($line = <INI>) {	    last if $line =~ /^\[/;	    print OUT $line;	}    }    # write all additional parameters    foreach my $parameter (keys %P) {	print OUT "\n[$parameter]\n";	foreach (@{$P{$parameter}}) {	    print OUT $_."\n";	}    }    close(INI);    close(OUT);    print STDERR "Saved: $outfn\n";}sub safesystem {  print STDERR "Executing: @_\n";  system(@_);  if ($? == -1) {      print STDERR "Failed to execute: @_\n  $!\n";      exit(1);  }  elsif ($? & 127) {      printf STDERR "Execution of: @_\n  died with signal %d, %s coredump\n",          ($? & 127),  ($? & 128) ? 'with' : 'without';      exit(1);  }  else {    my $exitcode = $? >> 8;    print STDERR "Exit code: $exitcode\n" if $exitcode;    return ! $exitcode;  }}sub ensure_full_path {    my $PATH = shift;$PATH =~ s/\/nfsmnt//;    return $PATH if $PATH =~ /^\//;    my $dir = `pawd 2>/dev/null`;     if(!$dir){$dir = `pwd`;}    chomp($dir);    $PATH = $dir."/".$PATH;    $PATH =~ s/[\r\n]//g;    $PATH =~ s/\/\.\//\//g;    $PATH =~ s/\/+/\//g;    my $sanity = 0;    while($PATH =~ /\/\.\.\// && $sanity++<10) {        $PATH =~ s/\/+/\//g;        $PATH =~ s/\/[^\/]+\/\.\.\//\//g;    }    $PATH =~ s/\/[^\/]+\/\.\.$//;    $PATH =~ s/\/+$//;$PATH =~ s/\/nfsmnt//;    return $PATH;}sub scan_config {  my $ini = shift;  my $inishortname = $ini; $inishortname =~ s/^.*\///; # for error reporting  # we get a pre-filled counts, because some lambdas are always needed (word penalty, for instance)  # as we walk though the ini file, we record how many extra lambdas do we need  # and finally, we report it  # in which field (counting from zero) is the filename to check?  my %where_is_filename = (    "ttable-file" => 3,    "generation-file" => 3,    "lmodel-file" => 3,    "distortion-file" => 3,  );  # by default, each line of each section means one lambda, but some sections  # explicitly state a custom number of lambdas  my %where_is_lambda_count = (    "ttable-file" => 2,    "generation-file" => 2,    "distortion-file" => 2,  );    open INI, $ini or die "Can't read $ini";  my $section = undef;  # name of the section we are reading  my $shortname = undef;  # the corresponding short name  my $nr = 0;  my $error = 0;  my %defined_files;  my %defined_steps;  # check the ini file for compatible mapping steps and actually defined files  while (<INI>) {    $nr++;    next if /^\s*#/; # skip comments    if (/^\[([^\]]*)\]\s*$/) {      $section = $1;      $shortname = $TABLECONFIG2ABBR{$section};      next;    }    if (defined $section && $section eq "mapping") {      # keep track of mapping steps used      $defined_steps{$1}++ if /^([TG])/ || /^\d+ ([TG])/;    }    if (defined $section && defined $where_is_filename{$section}) {      # this ini section is relevant to lambdas      chomp;      my @flds = split / +/;      my $fn = $flds[$where_is_filename{$section}];      if (defined $fn && $fn !~ /^\s+$/) {	  print "checking weight-count for $section\n";        # this is a filename! check it	if ($fn !~ /^\//) {	  $error = 1;	  print STDERR "$inishortname:$nr:Filename not absolute: $fn\n";	}	if (! -s $fn && ! -s "$fn.gz") {	  $error = 1;	  print STDERR "$inishortname:$nr:File does not exist or empty: $fn\n";	}	# remember the number of files used, to know how many lambdas do we need        die "No short name was defined for section $section!"          if ! defined $shortname;        # how many lambdas does this model need?        # either specified explicitly, or the default, i.e. one        my $needlambdas = defined $where_is_lambda_count{$section} ? $flds[$where_is_lambda_count{$section}] : 1;        print STDERR "Config needs $needlambdas lambdas for $section (i.e. $shortname)\n" if $verbose;        if (!defined $___LAMBDA && (!defined $additional_triples->{$shortname} || scalar(@{$additional_triples->{$shortname}}) < $needlambdas)) {          print STDERR "$inishortname:$nr:Your model $shortname needs $needlambdas weights but we define the default ranges for only "            .scalar(@{$additional_triples->{$shortname}})." weights. Cannot use the default, you must supply lambdas by hand.\n";          $error = 1;        }	else {	    # note: table may use less parameters than the maximum number	    # of triples	    for(my $lambda=0;$lambda<$needlambdas;$lambda++) {		my ($start, $min, $max) 		    = @{${$additional_triples->{$shortname}}[$lambda]};		push @{$used_triples{$shortname}}, [$start, $min, $max];	    }	}        $defined_files{$shortname}++;      }    }  }  die "$inishortname: File was empty!" if !$nr;  close INI;  for my $pair (qw/T=tm=translation G=g=generation/) {    my ($tg, $shortname, $label) = split /=/, $pair;    $defined_files{$shortname} = 0 if ! defined $defined_files{$shortname};    $defined_steps{$tg} = 0 if ! defined $defined_steps{$tg};    if ($defined_files{$shortname} != $defined_steps{$tg}) {      print STDERR "$inishortname: You defined $defined_files{$shortname} files for $label but use $defined_steps{$tg} in [mapping]!\n";      $error = 1;    }  }	# distance-based distortion  if ($___ASYNC == 1)  {		print STDERR "ASYNC distortion & word penalty";		my @my_array;	    for(my $i=0 ; $i < $defined_steps{"T"} ; $i++) 		{		    push @my_array, [ 1.0, 0.0, 2.0 ];		}		push @{$used_triples{"d"}}, @my_array;		@my_array = ();	    for(my $i=0 ; $i < $defined_steps{"T"} ; $i++) 		{		    push @my_array, [ 0.5, -1.0, 1.0 ];		}		push @{$used_triples{"w"}}, @my_array;		# debug print		print "distortion:";		my $refarray=$used_triples{"d"};		my @vector=@$refarray;		foreach my $subarray (@vector) {			my @toto=@$subarray;			print @toto,"\n";		}		#exit 1;  }  else  { 	print STDERR "SYNC distortion";    push @{$used_triples{"d"}}, [1.0, 0.0, 2.0];  }  exit(1) if $error;  return (\%defined_files);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -