⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 train-factored-phrase-model.perl.svn-base

📁 解码器是基于短语的统计机器翻译系统的核心模块
💻 SVN-BASE
📖 第 1 页 / 共 4 页
字号:
	elsif ($o_following eq 'swap') {  $swap_following_f++;  $swap_following_fe++; }	elsif ($o_following eq 'other'){ $other_following_f++; $other_following_fe++; }	else { print STDERR "buggy line (o_following:$o_following): $_\n"; }    }    if (defined($REORDERING_MODEL{"f"})) {	&store_reordering_f();    }    if (defined($REORDERING_MODEL{"fe"})) {	&store_reordering_fe();    }    if (! $debug) { safesystem("rm $___EXTRACT_FILE.$factor.o.sorted") or die;}}sub store_reordering_f {    my $total_previous_f = $mono_previous_f+$swap_previous_f+$other_previous_f;    my $total_following_f = $mono_following_f+$swap_following_f+$other_following_f;    if(defined($REORDERING_MODEL{"orientation-f"})) { 	printf OF ("%s ||| %.5f %.5f %.5f\n",		   $f_current, 		   $mono_previous_f/$total_previous_f,		   $swap_previous_f/$total_previous_f,		   $other_previous_f/$total_previous_f);    }    if(defined($REORDERING_MODEL{"orientation-bidirectional-f"})) {	printf OBF ("%s ||| %.5f %.5f %.5f %.5f %.5f %.5f\n",		    $f_current, 		    $mono_previous_f/$total_previous_f,		    $swap_previous_f/$total_previous_f,		    $other_previous_f/$total_previous_f,		    $mono_following_f/$total_following_f,		    $swap_following_f/$total_following_f,		    $other_following_f/$total_following_f);    }    if(defined($REORDERING_MODEL{"monotonicity-f"})) {	printf MF ("%s ||| %.5f %.5f\n",		  $f_current, 		   $mono_previous_f/$total_previous_f,		   ($swap_previous_f+$other_previous_f)/$total_previous_f);    }    if(defined($REORDERING_MODEL{"monotonicity-bidirectional-f"})) {	printf MBF ("%s ||| %.5f %.5f %.5f %.5f\n",		    $f_current, 		    $mono_previous_f/$total_previous_f,		    ($swap_previous_f+$other_previous_f)/$total_previous_f,		    $mono_following_f/$total_following_f,		    ($swap_following_f+$other_following_f)/$total_following_f);    }}sub store_reordering_fe {    my $total_previous_fe = $mono_previous_fe+$swap_previous_fe+$other_previous_fe;    my $total_following_fe = $mono_following_fe+$swap_following_fe+$other_following_fe;        if(defined($REORDERING_MODEL{"orientation-fe"})) { 	printf OFE ("%s ||| %s ||| %.5f %.5f %.5f\n",		   $f_current, $e_current, 		   $mono_previous_fe/$total_previous_fe,		   $swap_previous_fe/$total_previous_fe,		   $other_previous_fe/$total_previous_fe);    }    if(defined($REORDERING_MODEL{"orientation-bidirectional-fe"})) {	printf OBFE ("%s ||| %s ||| %.5f %.5f %.5f %.5f %.5f %.5f\n",		    $f_current, $e_current, 		    $mono_previous_fe/$total_previous_fe,		    $swap_previous_fe/$total_previous_fe,		    $other_previous_fe/$total_previous_fe,		    $mono_following_fe/$total_following_fe,		    $swap_following_fe/$total_following_fe,		    $other_following_fe/$total_following_fe);    }    if(defined($REORDERING_MODEL{"monotonicity-fe"})) {	printf MFE ("%s ||| %s ||| %.5f %.5f\n",		   $f_current, $e_current, 		   $mono_previous_fe/$total_previous_fe,		   ($swap_previous_fe+$other_previous_fe)/$total_previous_fe);    }    if(defined($REORDERING_MODEL{"monotonicity-bidirectional-fe"})) {	printf MBFE ("%s ||| %s ||| %.5f %.5f %.5f %.5f\n",		    $f_current, $e_current, 		    $mono_previous_fe/$total_previous_fe,		    ($swap_previous_fe+$other_previous_fe)/$total_previous_fe,		    $mono_following_fe/$total_following_fe,		    ($swap_following_fe+$other_following_fe)/$total_following_fe);    }}### (8) LEARN GENERATION MODELmy $factor_e_source;sub get_generation_factored {    print STDERR "(8) learn generation model @ ".`date`;    if (defined $___GENERATION_FACTORS) {      foreach my $f (split(/\+/,$___GENERATION_FACTORS)) {	$factor = $f;	($factor_e_source,$factor_e) = split(/\-/,$factor);	&get_generation();      }    } else {      print STDERR "  no generation model requested, skipping step\n";    }}sub get_generation {    print STDERR "(8) [$factor] generate generation table @ ".`date`;    my (%WORD_TRANSLATION,%TOTAL_FOREIGN,%TOTAL_ENGLISH);    my %INCLUDE_SOURCE;    foreach my $factor (split(/,/,$factor_e_source)) {		$INCLUDE_SOURCE{$factor} = 1;    }    my %INCLUDE;    foreach my $factor (split(/,/,$factor_e)) {	$INCLUDE{$factor} = 1;    }    my (%GENERATION,%GENERATION_TOTAL_SOURCE,%GENERATION_TOTAL_TARGET);    open(E,$___CORPUS.".".$___E) or die "Can't read ".$___CORPUS.".".$___E;    $alignment_id=0;    while(<E>) {	chomp;	foreach (split) {	    my @FACTOR = split(/\|/);	    my ($source,$target);	    my $first_factor = 1;	    foreach my $factor (split(/,/,$factor_e_source)) {		$source .= "|" unless $first_factor;		$first_factor = 0;		$source .= $FACTOR[$factor];	    }	    $first_factor = 1;	    foreach my $factor (split(/,/,$factor_e)) {		$target .= "|" unless $first_factor;		$first_factor = 0;		$target .= $FACTOR[$factor];	    }	    	    $GENERATION{$source}{$target}++;	    $GENERATION_TOTAL_SOURCE{$source}++;	    $GENERATION_TOTAL_TARGET{$target}++;	}    }     close(E);     open(GEN,">$___MODEL_DIR/generation.$factor") or die "Can't write $___MODEL_DIR/generation.$factor";    foreach my $source (keys %GENERATION) {	foreach my $target (keys %{$GENERATION{$source}}) {	    printf GEN ("%s %s %.7f %.7f\n",$source,$target,			$GENERATION{$source}{$target}/$GENERATION_TOTAL_SOURCE{$source},			$GENERATION{$source}{$target}/$GENERATION_TOTAL_TARGET{$target});	}    }    close(GEN);    safesystem("rm -f $___MODEL_DIR/generation.$factor.gz") or die;    safesystem("gzip $___MODEL_DIR/generation.$factor") or die;}### (9) CREATE CONFIGURATION FILEsub create_ini {    print STDERR "(9) create moses.ini @ ".`date`;        &full_path(\$___MODEL_DIR);    &full_path(\$___VCB_E);    &full_path(\$___VCB_F);    open(INI,">$___MODEL_DIR/moses.ini") or die "Can't write $___MODEL_DIR/moses.ini";    print INI "############################ MOSES CONFIG FILE ############################\n";    if (defined $___TRANSLATION_FACTORS) {      print INI "# input factors\n";      print INI "[input-factors]\n";      my $INPUT_FACTOR_MAX = 0;      foreach my $table (split /\+/, $___TRANSLATION_FACTORS) {	      my ($factor_list, $output) = split /-+/, $table;        foreach (split(/,/,$factor_list)) {          $INPUT_FACTOR_MAX = $_ if $_>$INPUT_FACTOR_MAX;        }        }      for (my $c = 0; $c <= $INPUT_FACTOR_MAX; $c++) { print INI "$c\n"; }    } else {      die "No translation steps defined, cannot prepare [input-factors] section\n";    }    my %stepsused;    print INI "\n# mapping steps[mapping]\n";   foreach (split(/,/,$___DECODING_STEPS)) {     s/t/T /g;      s/g/G /g;     my ($type, $num) = split /\s+/;     $stepsused{$type} = $num+1 if !defined $stepsused{$type} || $stepsused{$type} < $num+1;     print INI $_."\n";   }   print INI "\n# translation tables: source-factors, target-factors, number of scores, file [ttable-file]\n";   my $num_of_ttables = 0;   foreach my $f (split(/\+/,$___TRANSLATION_FACTORS)) {     $num_of_ttables++;     my $ff = $f;     $ff =~ s/\-/ /;     print INI "$ff 5 $___MODEL_DIR/phrase-table.$f.gz\n";   }   if ($num_of_ttables != $stepsused{"T"}) {     print STDERR "WARNING: Your [mapping-steps] require translation steps up to id $stepsused{T} but you defined translation steps 0..$num_of_ttables\n";     exit 1 if $num_of_ttables < $stepsused{"T"}; # fatal to define less   }    my $weights_per_generation_model = 2;    if (defined $___GENERATION_FACTORS) {      print INI "\n# generation models: source-factors, target-factors, number-of-weights, filename\n";      print INI "[generation-file]\n";      my $cnt = 0;      foreach my $f (split(/\+/,$___GENERATION_FACTORS)) {        $cnt++;        my $ff = $f;        $ff =~ s/\-/ /;        print INI "$ff $weights_per_generation_model $___MODEL_DIR/generation.$f.gz\n";      }      if ($cnt != $stepsused{"G"}) {        print STDERR "WARNING: Your [mapping-steps] require generation steps up to id $stepsused{G} but you defined generation steps 0..$cnt\n";        exit 1 if $cnt < $stepsused{"G"}; # fatal to define less      }    } else {      print INI "\n# no generation models, no generation-file section\n";    }print INI "\n# language models: type(srilm/irstlm), factors, order, file[lmodel-file]\n";  foreach my $lm (@___LM) {    my ($f, $o, $fn) = @$lm;    my $type = 0; # default to srilm    print INI "$type $f $o $fn\n";  }print INI "\n\n# limit on how many phrase translations e for each phrase f are loaded# 0 = all elements loaded[ttable-limit]20\n";  foreach(1..$num_of_ttables) {    print INI "0\n";  }  my $weight_d_count = 0;  if ($___REORDERING ne "distance") {    my $file = "# distortion (reordering) files\n[distortion-file]\n";    my $factor_i = 0;    foreach my $factor (split(/\+/,$___REORDERING_FACTORS)) {	foreach my $r (keys %REORDERING_MODEL) {	    next if $r eq "fe" || $r eq "f";	    next if $r eq "distance" && $factor_i>0;	    if ($r eq "distance") { $weight_d_count++; } 	    else {		my $type = $r;		$type =~ s/orientation/msd/;		$r =~ s/-bidirectional/.bi/;		$r =~ s/-f/.f/;		$r =~ s/orientation/orientation-table.$factor/;		$r =~ s/monotonicity/monotonicity-table.$factor/;				my $w;		if ($r =~ /orient/) { $w = 3; } else { $w = 1; }		if ($r =~ /bi/) { $w *= 2; }		$weight_d_count += $w;		$file .= "$factor $type $w $___MODEL_DIR/$r.$___REORDERING_SMOOTH.gz\n";	    }	}        $factor_i++;    }    print INI $file."\n";  }  else {    $weight_d_count = 1;  }    print INI "# distortion (reordering) weight\n[weight-d]\n";  for(my $i=0;$i<$weight_d_count;$i++) {     print INI "".(0.6/(scalar keys %REORDERING_MODEL))."\n";  }  print INI "\n# language model weights[weight-l]\n";  my $lmweighttotal = 0.5;  foreach(1..scalar @___LM) {    printf INI "%.4f\n", $lmweighttotal / scalar @___LM;  }print INI "\n\n# translation model weights[weight-t]\n";   foreach my $f (split(/\+/,$___TRANSLATION_FACTORS)) {     print INI "0.2\n0.2\n0.2\n0.2\n0.2\n";   }    if (defined $___GENERATION_FACTORS) {      print INI "\n# generation model weights, for each model $weights_per_generation_model weights\n";      print INI "[weight-generation]\n";      foreach my $f (split(/\+/,$___GENERATION_FACTORS)) {        print INI "0.3\n0\n";      }    } else {      print INI "\n# no generation models, no weight-generation section\n";    }print INI "\n# word penalty[weight-w]-1[distortion-limit]6";  # only set the factor delimiter if it is non-standard  unless ($___FACTOR_DELIMITER eq '|') {    print INI "\n# delimiter between factors in input\n[factor-delimiter]\n$___FACTOR_DELIMITER\n\n"  }  close(INI);}sub full_path {    my ($PATH) = @_;    return if $$PATH =~ /^\//;    my $dir = `pawd 2>/dev/null`;    if(!$dir){$dir = `pwd`;}    chomp $dir;    $$PATH = $dir."/".$$PATH;    $$PATH =~ s/[\r\n]//g;    $$PATH =~ s/\/\.\//\//g;    $$PATH =~ s/\/+/\//g;    my $sanity = 0;    while($$PATH =~ /\/\.\.\// && $sanity++<10) {	$$PATH =~ s/\/+/\//g;	$$PATH =~ s/\/[^\/]+\/\.\.\//\//g;    }    $$PATH =~ s/\/[^\/]+\/\.\.$//;    $$PATH =~ s/\/+$//;}sub safesystem {  print STDERR "Executing: @_\n";  system(@_);  if ($? == -1) {      print STDERR "Failed to execute: @_\n  $!\n";      exit(1);  }  elsif ($? & 127) {      printf STDERR "Execution of: @_\n  died with signal %d, %s coredump\n",          ($? & 127),  ($? & 128) ? 'with' : 'without';      exit(1);  }  else {    my $exitcode = $? >> 8;    print STDERR "Exit code: $exitcode\n" if $exitcode;    return ! $exitcode;  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -