⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mert-moses.pl.svn-base

📁 解码器是基于短语的统计机器翻译系统的核心模块
💻 SVN-BASE
📖 第 1 页 / 共 3 页
字号:
      die "No default weights defined for -$name"        if !defined $default_triples->{$name};      # XXX here was a deadly bug: we need a deep copy of the default values      my @copy = ();      foreach my $triple (@{$default_triples->{$name}}) {        my @copy_triple = @$triple;        push @copy, [ @copy_triple ];      }      push @{$use_triples->{$name}}, @copy;    }  }}# moses should use our configif ($___DECODER_FLAGS =~ /(^|\s)-(config|f) /|| $___DECODER_FLAGS =~ /(^|\s)-(ttable-file|t) /|| $___DECODER_FLAGS =~ /(^|\s)-(distortion-file) /|| $___DECODER_FLAGS =~ /(^|\s)-(generation-file) /|| $___DECODER_FLAGS =~ /(^|\s)-(lmodel-file) /) {  die "It is forbidden to supply any of -config, -ttable-file, -distortion-file, -generation-file or -lmodel-file in the --decoder-flags.\nPlease use only the --config option to give the config file that lists all the supplementary files.";}# walk through all lambdas the user wishes to optimize and check# if the number of lambdas matchesforeach my $name (keys %$use_triples) {  my $expected_lambdas = $lambdas_per_model->{$name};  $expected_lambdas = 0 if !defined $expected_lambdas;  my $got_lambdas = defined $use_triples->{$name} ? scalar @{$use_triples->{$name}}  : 0;  if ($got_lambdas != $expected_lambdas) {    if ($allow_unknown_lambdas && $expected_lambdas == 0) {      print STDERR "Allowing to optimize $name, although I have no idea what it is.\n";    } else {      print STDERR "Wrong number of lambdas for $name. Expected (given the config file): $expected_lambdas, got: $got_lambdas.Use --allow-unknown-lambdas to optimize lambdas that you are just introducingand I cannot validate against the models mentioned in moses.ini.\n";      exit 1;    }  }}# as weights are normalized in the next steps (by cmert)# normalize initial LAMBDAs, toomy $need_to_normalize = 1;my @order_of_lambdas_from_decoder = ();# this will store the labels of scores coming out of the decoder (and hence the order of lambdas coming out of mert)# we will use the array to interpret the lambdas# the array gets filled with labels only after first nbestlist was generated#store current directory and create the working directory (if needed)my $cwd = `pawd 2>/dev/null`; if(!$cwd){$cwd = `pwd`;}chomp($cwd);safesystem("mkdir -p $___WORKING_DIR") or die "Can't mkdir $___WORKING_DIR";{# open local scope#chdir to the working directorychdir($___WORKING_DIR) or die "Can't chdir to $___WORKING_DIR";# set start runmy $start_run = 1;if ($continue) {  # need to load last best values  print STDERR "Trying to continue an interrupted optimization.\n";  open IN, "finished_step.txt" or die "Failed to find the step number, failed to read finished_step.txt";  my $step = <IN>;  chomp $step;  $step++;  close IN;  if (! -e "run$step.best$___N_BEST_LIST_SIZE.out.gz") {    # allow stepping one extra iteration back    $step--;    die "Can't start from step $step, because run$step.best$___N_BEST_LIST_SIZE.out.gz was not found!"      if ! -e "run$step.best$___N_BEST_LIST_SIZE.out.gz";  }  $start_run = $step +1;  print STDERR "Reading last cached lambda values (result from step $step)\n";  @order_of_lambdas_from_decoder = get_order_of_scores_from_nbestlist("gunzip -c < run$step.best$___N_BEST_LIST_SIZE.out.gz |");  open IN, "weights.txt" or die "Can't read weights.txt";  my $newweights = <IN>;  chomp $newweights;  close IN;  my @newweights = split /\s+/, $newweights;  # dump_triples($use_triples);  $use_triples = store_new_lambda_values($use_triples, \@order_of_lambdas_from_decoder, \@newweights);  # dump_triples($use_triples);}if ($___FILTER_PHRASE_TABLE){  # filter the phrase tables wih respect to input, use --decoder-flags  print "filtering the phrase tables... ".`date`;  my $cmd = "$filtercmd ./filtered $___CONFIG $___DEV_F";  if (defined $___JOBS) {    safesystem("$qsubwrapper $pass_old_sge -command='$cmd' -queue-parameter=\"$queue_flags\" -stdout=filterphrases.out -stderr=filterphrases.err" )      or die "Failed to submit filtering of tables to the queue (via $qsubwrapper)";  } else {    safesystem($cmd) or die "Failed to filter the tables.";  }  # the decoder should now use the filtered model  $___CONFIG = "filtered/moses.ini";}else{  # do not filter phrase tables (useful if binary phrase tables are available)  # use the original configuration file}my $PARAMETERS;$PARAMETERS = $___DECODER_FLAGS . " -config $___CONFIG -inputtype $___INPUTTYPE";my $devbleu = undef;my $bestpoint = undef;my $run=$start_run-1;my $prev_size = -1;while(1) {  $run++;  # run beamdecoder with option to output nbestlists  # the end result should be (1) @NBEST_LIST, a list of lists; (2) @SCORE, a list of lists of lists  print "run $run start at ".`date`;  # In case something dies later, we might wish to have a copy  create_config($___CONFIG, "./run$run.moses.ini", $use_triples, $run, (defined$devbleu?$devbleu:"--not-estimated--"));  # skip if the user wanted  if (!$skip_decoder) {      print "($run) run decoder to produce n-best lists\n";      @order_of_lambdas_from_decoder = run_decoder($use_triples, $PARAMETERS, $run, \@order_of_lambdas_from_decoder, $need_to_normalize);      $need_to_normalize = 0;      safesystem("gzip -f run*out") or die "Failed to gzip run*out";  }  else {      print "skipped decoder run\n";      if (0 == scalar @order_of_lambdas_from_decoder) {        @order_of_lambdas_from_decoder = get_order_of_scores_from_nbestlist("gunzip -dc run*.best*.out.gz | head -1 |");      }      $skip_decoder = 0;      $need_to_normalize = 0;  }  my $EFF_REF_LEN = "";  if ($___AVERAGE) {     $EFF_REF_LEN = "-a";  }elsif ($___CLOSEST){     $EFF_REF_LEN = "-e";  }     my $EFF_NORM = "";  if ($___NONORM) {     $EFF_NORM = "-n";  }     # To be sure that scoring script produses these fresh:  safesystem("rm -f cands.opt feats.opt") or die;    # convert n-best list into a numberized format with error scores  print STDERR "Scoring the nbestlist.\n";  my $cmd = "export PYTHONPATH=$pythonpath ; gunzip -dc run*.best*.out.gz | sort -n -t \"|\" -k 1,1 | $SCORENBESTCMD $EFF_NORM $EFF_REF_LEN ".join(" ", @references)." ./";  if (defined $___JOBS) {    safesystem("$qsubwrapper $pass_old_sge -command='$cmd' -queue-parameter=\"$queue_flags\" -stdout=scorenbest.out -stderr=scorenbest.err") or die "Failed to submit scoring nbestlist to queue (via $qsubwrapper)";  } else {    safesystem($cmd) or die "Failed to score nbestlist";  }  print STDERR "Hoping that scoring succeeded. We'll see if we can read the output files now.\n";  # keep a count of lines in nbests lists (alltogether)  # if it did not increase since last iteration, we are DONE  open(IN,"cands.opt") or die "Can't read cands.opt";  my $size=0;  while (<IN>) {    chomp;    my @flds = split / /;    $size += $flds[1];  }  close(IN);  print "$size accumulated translations\n";  print "prev accumulated translations was : $prev_size\n";  if ($size <= $prev_size){     print STDERR "No new hypotheses in nbest list. Stopping.\n";     last;  }  $prev_size = $size;  # run cmert  # cmert reads in the file init.opt containing three lines:  #  minimum values  #  maximum values  #  current values  # We need to prepare the files and **the order of the lambdas must  # correspond to the order @order_of_lambdas_from_decoder  my @MIN = ();   # lower bounds  my @MAX = ();   # upper bounds  my @CURR = ();   # the starting values  my @NAME = ();  # to which model does the lambda belong    # walk in order of @order_of_lambdas_from_decoder and collect the min,max,val  my %visited = ();  foreach my $name (@order_of_lambdas_from_decoder) {    next if $visited{$name};    $visited{$name} = 1;    die "The decoder produced also some '$name' scores, but we do not know the ranges for them, no way to optimize them\n"      if !defined $use_triples->{$name};    foreach my $feature (@{$use_triples->{$name}}) {      my ($val, $min, $max) = @$feature;      push @CURR, $val;      push @MIN, $min;      push @MAX, $max;      push @NAME, $name;    }  }  open(OUT,"> init.opt") or die "Can't write init.opt (WD now $___WORKING_DIR)";  print OUT join(" ", @MIN)."\n";  print OUT join(" ", @MAX)."\n";  print OUT join(" ", @CURR)."\n";  close(OUT);  #just for brevity  open(OUT,"> names.txt") or die "Can't write names.txt (WD now $___WORKING_DIR)";  print OUT join(" ", @NAME)."\n";  close(OUT);  # make a backup copy labelled with this run number  safesystem("cp init.opt run$run.init.opt") or die;  my $DIM = scalar(@CURR); # number of lambdas  $cmd="$cmertcmd -d $DIM";   print STDERR "Starting cmert.\n";  if (defined $___JOBS) {    safesystem("$qsubwrapper $pass_old_sge -command='$cmd' -stderr=cmert.log -queue-parameter=\"$queue_flags\"") or die "Failed to start cmert (via qsubwrapper $qsubwrapper)";  } else {    safesystem("$cmd 2> cmert.log") or die "Failed to run cmert";  }  die "Optimization failed, file weights.txt does not exist or is empty"    if ! -s "weights.txt";  # backup copies  safesystem ("cp cmert.log run$run.cmert.log") or die;  safesystem ("cp weights.txt run$run.weights.txt") or die; # this one is needed for restarts, too  print "run $run end at ".`date`;  $bestpoint = undef;  $devbleu = undef;  open(IN,"cmert.log") or die "Can't open cmert.log";  while (<IN>) {    if (/Best point:\s*([\s\d\.\-]+?)\s*=> ([\d\.]+)/) {      $bestpoint = $1;      $devbleu = $2;      last;    }  }  close IN;  die "Failed to parse cmert.log, missed Best point there."    if !defined $bestpoint || !defined $devbleu;  print "($run) BEST at $run: $bestpoint => $devbleu at ".`date`;  my @newweights = split /\s+/, $bestpoint;  # update my cache of lambda values  $use_triples = store_new_lambda_values($use_triples, \@order_of_lambdas_from_decoder, \@newweights);  ## additional stopping criterion: weights have not changed  my $shouldstop = 1;  for(my $i=0; $i<@CURR; $i++) {    die "Lost weight! cmert reported fewer weights (@newweights) than we gave it (@CURR)"      if !defined $newweights[$i];    if (abs($CURR[$i] - $newweights[$i]) >= $minimum_required_change_in_weights) {      $shouldstop = 0;      last;    }  }  open F, "> finished_step.txt" or die "Can't mark finished step";  print F $run."\n";  close F;  if ($shouldstop) {    print STDERR "None of the weights changed more than $minimum_required_change_in_weights. Stopping.\n";    last;  }}print "Training finished at ".`date`;safesystem("cp init.opt run$run.init.opt") or die;safesystem ("cp cmert.log run$run.cmert.log") or die;create_config($___CONFIG, "./moses.ini", $use_triples, $run, $devbleu);# just to be sure that we have the really last finished step markedopen F, "> finished_step.txt" or die "Can't mark finished step";print F $run."\n";close F;#chdir back to the original directory # useless, just to remind we were not therechdir($cwd);} # end of local scopesub store_new_lambda_values {  # given new lambda values (in given order), replace the 'val' element in our triples  my $triples = shift;  my $names = shift;  my $values = shift;  my %idx = ();  foreach my $i (0..scalar(@$values)-1) {    my $name = $names->[$i];    die "Missed name for lambda $values->[$i] (in @$values; names: @$names)"      if !defined $name;    if (!defined $idx{$name}) {      $idx{$name} = 0;    } else {      $idx{$name}++;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -