⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 train-factored-phrase-model.perl.svn-base

📁 moses开源的机器翻译系统
💻 SVN-BASE
📖 第 1 页 / 共 4 页
字号:
	# update counts	if    ($o_previous eq 'mono') {  $mono_previous_f++;  $mono_previous_fe++; }	elsif ($o_previous eq 'swap') {  $swap_previous_f++;  $swap_previous_fe++; }	elsif ($o_previous eq 'other'){ $other_previous_f++; $other_previous_fe++; }	else { print STDERR "buggy line (o_previous:$o_previous): $_\n"; }		if    ($o_following eq 'mono') {  $mono_following_f++;  $mono_following_fe++; }	elsif ($o_following eq 'swap') {  $swap_following_f++;  $swap_following_fe++; }	elsif ($o_following eq 'other'){ $other_following_f++; $other_following_fe++; }	else { print STDERR "buggy line (o_following:$o_following): $_\n"; }    }    if (defined($REORDERING_MODEL{"f"})) {	&store_reordering_f();    }    if (defined($REORDERING_MODEL{"fe"})) {	&store_reordering_fe();    }    if (! $debug) { safesystem("rm $___EXTRACT_FILE.$factor.o.sorted") or die;}}sub store_reordering_f {    my $total_previous_f = $mono_previous_f+$swap_previous_f+$other_previous_f;    my $total_following_f = $mono_following_f+$swap_following_f+$other_following_f;    if(defined($REORDERING_MODEL{"msd-f"})) { 	printf OF ("%s ||| %.5f %.5f %.5f\n",		   $f_current, 		   $mono_previous_f/$total_previous_f,		   $swap_previous_f/$total_previous_f,		   $other_previous_f/$total_previous_f);    }    if(defined($REORDERING_MODEL{"msd-bidirectional-f"})) {	printf OBF ("%s ||| %.5f %.5f %.5f %.5f %.5f %.5f\n",		    $f_current, 		    $mono_previous_f/$total_previous_f,		    $swap_previous_f/$total_previous_f,		    $other_previous_f/$total_previous_f,		    $mono_following_f/$total_following_f,		    $swap_following_f/$total_following_f,		    $other_following_f/$total_following_f);    }    if(defined($REORDERING_MODEL{"monotonicity-f"})) {	printf MF ("%s ||| %.5f %.5f\n",		  $f_current, 		   $mono_previous_f/$total_previous_f,		   ($swap_previous_f+$other_previous_f)/$total_previous_f);    }    if(defined($REORDERING_MODEL{"monotonicity-bidirectional-f"})) {	printf MBF ("%s ||| %.5f %.5f %.5f %.5f\n",		    $f_current, 		    $mono_previous_f/$total_previous_f,		    ($swap_previous_f+$other_previous_f)/$total_previous_f,		    $mono_following_f/$total_following_f,		    ($swap_following_f+$other_following_f)/$total_following_f);    }}sub store_reordering_fe {    my $total_previous_fe = $mono_previous_fe+$swap_previous_fe+$other_previous_fe;    my $total_following_fe = $mono_following_fe+$swap_following_fe+$other_following_fe;        if(defined($REORDERING_MODEL{"msd-fe"})) { 	printf OFE ("%s ||| %s ||| %.5f %.5f %.5f\n",		   $f_current, $e_current, 		   $mono_previous_fe/$total_previous_fe,		   $swap_previous_fe/$total_previous_fe,		   $other_previous_fe/$total_previous_fe);    }    if(defined($REORDERING_MODEL{"msd-bidirectional-fe"})) {	printf OBFE ("%s ||| %s ||| %.5f %.5f %.5f %.5f %.5f %.5f\n",		    $f_current, $e_current, 		    $mono_previous_fe/$total_previous_fe,		    $swap_previous_fe/$total_previous_fe,		    $other_previous_fe/$total_previous_fe,		    $mono_following_fe/$total_following_fe,		    $swap_following_fe/$total_following_fe,		    $other_following_fe/$total_following_fe);    }    if(defined($REORDERING_MODEL{"monotonicity-fe"})) {	printf MFE ("%s ||| %s ||| %.5f %.5f\n",		   $f_current, $e_current, 		   $mono_previous_fe/$total_previous_fe,		   ($swap_previous_fe+$other_previous_fe)/$total_previous_fe);    }    if(defined($REORDERING_MODEL{"monotonicity-bidirectional-fe"})) {	printf MBFE ("%s ||| %s ||| %.5f %.5f %.5f %.5f\n",		    $f_current, $e_current, 		    $mono_previous_fe/$total_previous_fe,		    ($swap_previous_fe+$other_previous_fe)/$total_previous_fe,		    $mono_following_fe/$total_following_fe,		    ($swap_following_fe+$other_following_fe)/$total_following_fe);    }}### (8) LEARN GENERATION MODELmy $factor_e_source;sub get_generation_factored {    print STDERR "(8) learn generation model @ ".`date`;    if (defined $___GENERATION_FACTORS) {      my @SPECIFIED_TABLE = @_GENERATION_TABLE;      my @TYPE = @_GENERATION_TYPE;      foreach my $f (split(/\+/,$___GENERATION_FACTORS)) {	$factor = $f;	($factor_e_source,$factor_e) = split(/\-/,$factor);	my $file = shift @SPECIFIED_TABLE if scalar(@SPECIFIED_TABLE);        my $type = "double";        $type = shift @TYPE if scalar @TYPE;	&get_generation($file,$type);      }    } else {      print STDERR "  no generation model requested, skipping step\n";    }}sub get_generation {    print STDERR "(8) [$factor] generate generation table @ ".`date`;    my ($file,$type) = @_;    $file = "$___MODEL_DIR/generation.$factor" unless $file;    my (%WORD_TRANSLATION,%TOTAL_FOREIGN,%TOTAL_ENGLISH);    my %INCLUDE_SOURCE;    foreach my $factor (split(/,/,$factor_e_source)) {		$INCLUDE_SOURCE{$factor} = 1;    }    my %INCLUDE;    foreach my $factor (split(/,/,$factor_e)) {	$INCLUDE{$factor} = 1;    }    my (%GENERATION,%GENERATION_TOTAL_SOURCE,%GENERATION_TOTAL_TARGET);    open(E,$___CORPUS.".".$___E) or die "Can't read ".$___CORPUS.".".$___E;    $alignment_id=0;    while(<E>) {	chomp;	foreach (split) {	    my @FACTOR = split(/\|/);	    my ($source,$target);	    my $first_factor = 1;	    foreach my $factor (split(/,/,$factor_e_source)) {		$source .= "|" unless $first_factor;		$first_factor = 0;		$source .= $FACTOR[$factor];	    }	    $first_factor = 1;	    foreach my $factor (split(/,/,$factor_e)) {		$target .= "|" unless $first_factor;		$first_factor = 0;		$target .= $FACTOR[$factor];	    }	    	    $GENERATION{$source}{$target}++;	    $GENERATION_TOTAL_SOURCE{$source}++;	    $GENERATION_TOTAL_TARGET{$target}++;	}    }     close(E); 	safesystem("mkdir -p $___MODEL_DIR") or die;    open(GEN,">$file") or die "Can't write $file";    foreach my $source (keys %GENERATION) {	foreach my $target (keys %{$GENERATION{$source}}) {	    printf GEN ("%s %s %.7f ",$source,$target,                        $GENERATION{$source}{$target}/$GENERATION_TOTAL_SOURCE{$source});            printf GEN (" %.7f",                        $GENERATION{$source}{$target}/$GENERATION_TOTAL_TARGET{$target})                unless $type eq 'single';            print GEN "\n";	}    }    close(GEN);    safesystem("rm -f $file.gz") or die;    safesystem("gzip $file") or die;}### (9) CREATE CONFIGURATION FILEsub create_ini {    print STDERR "(9) create moses.ini @ ".`date`;        &full_path(\$___MODEL_DIR);    &full_path(\$___VCB_E);    &full_path(\$___VCB_F);    `mkdir -p $___MODEL_DIR`;    open(INI,">$___CONFIG") or die("Can't write $___CONFIG");    print INI "############################ MOSES CONFIG FILE ############################\n";        if (defined $___TRANSLATION_FACTORS) {      print INI "# input factors\n";      print INI "[input-factors]\n";      my $INPUT_FACTOR_MAX = 0;      foreach my $table (split /\+/, $___TRANSLATION_FACTORS) {	      my ($factor_list, $output) = split /-+/, $table;        foreach (split(/,/,$factor_list)) {          $INPUT_FACTOR_MAX = $_ if $_>$INPUT_FACTOR_MAX;        }        }      $INPUT_FACTOR_MAX = $_INPUT_FACTOR_MAX if $_INPUT_FACTOR_MAX; # use specified, if exists      for (my $c = 0; $c <= $INPUT_FACTOR_MAX; $c++) { print INI "$c\n"; }    } else {      die "No translation steps defined, cannot prepare [input-factors] section\n";    }    my %stepsused;    print INI "\n# mapping steps[mapping]\n";   my $steplist = 0;   foreach my $list (split(/:/,$___DECODING_STEPS)) {     foreach (split(/,/,$list)) {       s/t/T /g;        s/g/G /g;       my ($type, $num) = split /\s+/;       $stepsused{$type} = $num+1 if !defined $stepsused{$type} || $stepsused{$type} < $num+1;       print INI $steplist," ",$_,"\n";     }     $steplist++;   }   print INI "\n# translation tables: source-factors, target-factors, number of scores, file [ttable-file]\n";   my $num_of_ttables = 0;   my @SPECIFIED_TABLE = @_PHRASE_TABLE;   foreach my $f (split(/\+/,$___TRANSLATION_FACTORS)) {     $num_of_ttables++;     my $ff = $f;     $ff =~ s/\-/ /;     my $file = "$___MODEL_DIR/phrase-table.$f.gz";     $file = shift @SPECIFIED_TABLE if scalar(@SPECIFIED_TABLE);     print INI "$ff 5 $file\n";   }   if ($num_of_ttables != $stepsused{"T"}) {     print STDERR "WARNING: Your [mapping-steps] require translation steps up to id $stepsused{T} but you defined translation steps 0..$num_of_ttables\n";     exit 1 if $num_of_ttables < $stepsused{"T"}; # fatal to define less   }    if (defined $___GENERATION_FACTORS) {      my @TYPE = @_GENERATION_TYPE;      print INI "\n# generation models: source-factors, target-factors, number-of-weights, filename\n";      print INI "[generation-file]\n";      my $cnt = 0;      my @SPECIFIED_TABLE = @_GENERATION_TABLE;      foreach my $f (split(/\+/,$___GENERATION_FACTORS)) {        my $weights_per_generation_model = 2;        $weights_per_generation_model = 1 if (shift @TYPE) eq 'single';        $cnt++;        my $ff = $f;        $ff =~ s/\-/ /;	my $file = "$___MODEL_DIR/generation.$f";	$file = shift @SPECIFIED_TABLE if scalar(@SPECIFIED_TABLE);        print INI "$ff $weights_per_generation_model $file\n";      }      if ($cnt != $stepsused{"G"}) {        print STDERR "WARNING: Your [mapping-steps] require generation steps up to id $stepsused{G} but you defined generation steps 0..$cnt\n";        exit 1 if $cnt < $stepsused{"G"}; # fatal to define less      }    } else {      print INI "\n# no generation models, no generation-file section\n";    }print INI "\n# language models: type(srilm/irstlm), factors, order, file[lmodel-file]\n";  foreach my $lm (@___LM) {    my ($f, $o, $fn, $type) = @{$lm};    print INI "$type $f $o $fn\n";  }print INI "\n\n\# limit on how many phrase translations e for each phrase f are loaded# 0 = all elements loaded[ttable-limit]20\n";  foreach(1..$num_of_ttables) {    print INI "0\n";  }  my $weight_d_count = 1;  if ($___REORDERING ne "distance") {    my $file = "# distortion (reordering) files\n\[distortion-file]\n";    my $factor_i = 0;     my @SPECIFIED_TABLE = @_REORDERING_TABLE;    foreach my $factor (split(/\+/,$___REORDERING_FACTORS)) {	foreach my $r (keys %REORDERING_MODEL) {	    next if $r eq "fe" || $r eq "f";	    next if $r eq "distance" && $factor_i>0;	    if ($r eq "distance") { $weight_d_count++; } 	    else {		my $type = $r;		$r =~ s/-bidirectional/.bi/;		$r =~ s/-f/.f/;		$r =~ s/msd/msd-table.$factor/;		$r =~ s/monotonicity/monotonicity-table.$factor/;				my $w;		if ($r =~ /msd/) { $w = 3; } else { $w = 1; }		if ($r =~ /bi/) { $w *= 2; }		$weight_d_count += $w;                my $table_file = "$___MODEL_DIR/reordering-table.$type.$___REORDERING_SMOOTH.$factor.gz";		$table_file = shift @SPECIFIED_TABLE if scalar(@SPECIFIED_TABLE);		$file .= "$factor $type $w $table_file\n";	    }	}        $factor_i++;      }      print INI $file."\n";  }  else {    $weight_d_count = 1;  }    print INI "# distortion (reordering) weight\n[weight-d]\n";  for(my $i=0;$i<$weight_d_count;$i++) {     print INI "".(0.6/(scalar keys %REORDERING_MODEL))."\n";  }  print INI "\n# language model weights[weight-l]\n";  my $lmweighttotal = 0.5;  foreach(1..scalar @___LM) {    printf INI "%.4f\n", $lmweighttotal / scalar @___LM;  }print INI "\n\n# translation model weights[weight-t]\n";   foreach my $f (split(/\+/,$___TRANSLATION_FACTORS)) {     print INI "0.2\n0.2\n0.2\n0.2\n0.2\n";   }    if (defined $___GENERATION_FACTORS) {      print INI "\n# generation model weights\n";      print INI "[weight-generation]\n";      my @TYPE = @_GENERATION_TYPE;      foreach my $f (split(/\+/,$___GENERATION_FACTORS)) {        print INI "0.3\n";        print INI "0\n" unless (shift @TYPE) eq 'single';      }    } else {      print INI "\n# no generation models, no weight-generation section\n";    }print INI "\n# word penalty[weight-w]-1[distortion-limit]6";  # only set the factor delimiter if it is non-standard  unless ($___FACTOR_DELIMITER eq '|') {    print INI "\n# delimiter between factors in input\n[factor-delimiter]\n$___FACTOR_DELIMITER\n\n"  }  close(INI);}sub full_path {    my ($PATH) = @_;$$PATH =~ s/\/nfsmnt//;    return if $$PATH =~ /^\//;        my $dir = `pawd 2>/dev/null`;	if(!$dir){$dir = `pwd`;}$PATH =~ s/\/nfsmnt//;	chomp $dir;	$$PATH = $dir."/".$$PATH;    $$PATH =~ s/[\r\n]//g;    $$PATH =~ s/\/\.\//\//g;    $$PATH =~ s/\/+/\//g;    my $sanity = 0;    while($$PATH =~ /\/\.\.\// && $sanity++<10) {	$$PATH =~ s/\/+/\//g;	$$PATH =~ s/\/[^\/]+\/\.\.\//\//g;    }    $$PATH =~ s/\/[^\/]+\/\.\.$//;    $$PATH =~ s/\/+$//;}sub safesystem {  print STDERR "Executing: @_\n";  system(@_);  if ($? == -1) {      print STDERR "Failed to execute: @_\n  $!\n";      exit(1);  }  elsif ($? & 127) {      printf STDERR "Execution of: @_\n  died with signal %d, %s coredump\n",          ($? & 127),  ($? & 128) ? 'with' : 'without';      exit(1);  }  else {    my $exitcode = $? >> 8;    print STDERR "Exit code: $exitcode\n" if $exitcode;    return ! $exitcode;  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -