📄 newsmtgui.cgi.svn-base

📁 解码器是基于短语的统计机器翻译系统的核心模块
💻 SVN-BASE
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
	      $REF_NGRAM{$ngram} < $REF_NGRAM_N{$ngram}) {	    $REF_NGRAM{$ngram} = $REF_NGRAM_N{$ngram};	  }	}      }    }    $length_translation += $length_translation_this_sentence;    $length_reference += $closest_length;    for(my $n=1;$n<=4;$n++) {      my %T_NGRAM = ();      for(my $start=0;$start<=$#WORD-($n-1);$start++) {	my $ngram = "$n";	for(my $w=0;$w<$n;$w++) {	  $ngram .= " ".$WORD[$start+$w];	}	$T_NGRAM{$ngram}++;      }      foreach my $ngram (keys %T_NGRAM) {	my $n = 0+$ngram;#	print "$i e $ngram $T_NGRAM{$ngram}<BR>\n";	$TOTAL[$n] += $T_NGRAM{$ngram};	if (defined($REF_NGRAM{$ngram})) {	  if ($REF_NGRAM{$ngram} >= $T_NGRAM{$ngram}) {	    $CORRECT[$n] += $T_NGRAM{$ngram};#	    print "$i e correct1 $T_NGRAM{$ngram}<BR>\n";	  }	  else {	    $CORRECT[$n] += $REF_NGRAM{$ngram};#	    print "$i e correct2 $REF_NGRAM{$ngram}<BR>\n";	  }	}      }    }  }  my $brevity_penalty = 1;  if ($length_translation<$length_reference) {    $brevity_penalty = exp(1-$length_reference/$length_translation);  }  my $bleu = $brevity_penalty * exp((my_log( $CORRECT[1]/$TOTAL[1] ) +				     my_log( $CORRECT[2]/$TOTAL[2] ) +				     my_log( $CORRECT[3]/$TOTAL[3] ) +				     my_log( $CORRECT[4]/$TOTAL[4] ) ) / 4);  open(BLEU,">>mbleu-memory.dat");  @STAT = stat($translation_file);  printf BLEU "$translation_file $STAT[9] %f %f %f %f %f %f\n",$bleu,$CORRECT[1]/$TOTAL[1],$CORRECT[2]/$TOTAL[2],$CORRECT[3]/$TOTAL[3],$CORRECT[4]/$TOTAL[4],$brevity_penalty;  close(BLEU);    return ($bleu,	  100*$CORRECT[1]/$TOTAL[1],	  100*$CORRECT[2]/$TOTAL[2],	  100*$CORRECT[3]/$TOTAL[3],	  100*$CORRECT[4]/$TOTAL[4],	  $brevity_penalty);}sub my_log {  return -9999999999 unless $_[0];  return log($_[0]);}###### SCORE TRANSLATIONS################################ IN PROGRESS ###############################sub compare2{	&htmlhead("Compare Translations");	print "<A HREF=\"?ACTION=VIEW_CORPUS&CORPUS=".CGI::escape($in{CORPUS})."\">View Corpus $in{CORPUS}</A><P>\n";	print "<FORM ACTION=\"\" METHOD=POST>\n";	print "<INPUT TYPE=HIDDEN NAME=ACTION VALUE=$in{ACTION}>\n";	print "<INPUT TYPE=HIDDEN NAME=CORPUS VALUE=\"$in{CORPUS}\">\n";	my $corpus = new Corpus('-name' => "$in{CORPUS}", '-descriptions' => \%FILEDESC, '-info_line' => $factorData{$in{CORPUS}});	$corpus->writeComparisonPage(\*STDOUT, /^.*$/);	print "</FORM>\n";}sub compare {  &htmlhead("Compare Translations");  print "<A HREF=\"?ACTION=VIEW_CORPUS&CORPUS=".CGI::escape($in{CORPUS})."\">View Corpus $in{CORPUS}</A><P>\n";  print "<FORM ACTION=\"\" METHOD=POST>\n";  print "<INPUT TYPE=HIDDEN NAME=ACTION VALUE=$in{ACTION}>\n";  print "<INPUT TYPE=HIDDEN NAME=CORPUS VALUE=\"$in{CORPUS}\">\n";  # get sentences  my %SENTENCES;  my $sentence_count;  foreach (keys %in) {    if (/^FILE_(.+)$/) {      my $file = $1;      print "<INPUT TYPE=HIDDEN NAME=\"$file\" VALUE=1>\n";      my @SENTENCES;      if ($file =~ /.sgm$/) {	  @{$SENTENCES{$file}} = `grep '<seg' $in{CORPUS}.$file`;	  for(my $i=0;$i<$#{$SENTENCES{$file}};$i++) {	      $SENTENCES{$file}[$i] =~ s/^<seg[^>]+> *(\S.+\S) *<\/seg> *$/$1/;	  }      }      else {	  @{$SENTENCES{$file}} = `cat $in{CORPUS}.$1`;	  chop(@{$SENTENCES{$file}});      }      $sentence_count = scalar @{$SENTENCES{$file}};    }  }  my %REFERENCE;  foreach (@SHOW) {    if (-e "$in{CORPUS}.$_") {      @{$REFERENCE{$_}} = `cat $in{CORPUS}.$_`; chop(@{$REFERENCE{$_}});    }  }  # update memory  foreach (keys %in) {    next unless /^SYN_SCORE_(.+)_(\d+)$/;    next unless $in{"SEM_SCORE_$1_$2"};    &store_in_memory($REFERENCE{$FOREIGN}[$2],		     $SENTENCES{$1}[$2],                     "syn_".$in{"SYN_SCORE_$1_$2"}." sem_".$in{"SEM_SCORE_$1_$2"});  }  # display sentences  for(my $i=0;$i<$sentence_count;$i++)  {    my $evaluation = "";    my $show = 0;    my $surface = "";    foreach my $file (keys %SENTENCES)	 {      if ($in{SURFACE}) {	$SENTENCES{$file}[$i] =~ s/ *$//;	$surface = $SENTENCES{$file}[$i] if ($surface eq '');	$show = 1 if ($SENTENCES{$file}[$i] ne $surface);      }      else {	my $this_ev = &get_from_memory($REFERENCE{$FOREIGN}[$i],$SENTENCES{$file}[$i]);	$this_ev = "syn_wrong sem_wrong" unless $this_ev;	$evaluation = $this_ev if ($evaluation eq '');	$show = 1 if ($evaluation ne $this_ev);      }    }    next unless $show;    print "<HR>Sentence ".($i+1).":<BR>\n";    foreach my $ref (@SHOW) {      if (-e "$in{CORPUS}.$ref") {	print "<FONT COLOR=$SHOW_COLOR{$ref}>".$REFERENCE{$ref}[$i]."</FONT> (".$FILETYPE{$ref}.")<BR>\n";      }    }    foreach my $file (keys %SENTENCES) {      print "<B>$SENTENCES{$file}[$i]</B> ($file)<BR>\n";      &color_highlight_ngrams($i,&nist_normalize_text($SENTENCES{$file}[$i]),$REFERENCE{"e"}[$i]);      if (0 && $in{WITH_EVAL}) {	$evaluation = &get_from_memory($REFERENCE{$FOREIGN}[$i],$SENTENCES{$file}[$i]);	print "<INPUT TYPE=RADIO NAME=SYN_SCORE_$file"."_$i VALUE=correct";	print " CHECKED" if ($evaluation =~ /syn_correct/);	print "> perfect English\n";	print "<INPUT TYPE=RADIO NAME=SYN_SCORE_$file"."_$i VALUE=wrong";	print " CHECKED" if ($evaluation =~ /syn_wrong/);	print "> imperfect English<BR>\n";	print "<INPUT TYPE=RADIO NAME=SEM_SCORE_$file"."_$i VALUE=correct";	print " CHECKED" if ($evaluation =~ /sem_correct/);	print "> correct meaning\n";	print "<INPUT TYPE=RADIO NAME=SEM_SCORE_$file"."_$i VALUE=wrong";	print " CHECKED" if ($evaluation =~ /sem_wrong/);	print "> incorrect meaning<BR>\n";      }    }  }  print "<P><INPUT TYPE=SUBMIT VALUE=\"Add evaluation\">\n";  print "</FORM>\n";}###### MEMORY SUBSsub load_memory {  open(MEMORY,"evaluation-memory.dat") or return;  while(<MEMORY>) {    chop;    my($foreign,$translation,$evaluation) = split(/ \.o0O0o\. /);    $evaluation = 'syn_correct sem_correct' if ($evaluation eq 'correct');    $MEMORY{"$foreign .o0O0o. $translation"} = $evaluation;  }  close(MEMORY);}sub get_score_from_memory {  my($foreign_file,$translation_file) = @_;  my $unknown=0;  my $correct=0;  my $just_syn=0;  my $just_sem=0;  my $wrong=0;  my @FOREIGN = `cat $foreign_file`; chop(@FOREIGN);  my @TRANSLATION = `cat $translation_file`; chop(@TRANSLATION);  for(my $i=0;$i<=$#FOREIGN;$i++) {    if (my $evaluation = &get_from_memory($FOREIGN[$i],$TRANSLATION[$i])) {      if ($evaluation eq 'syn_correct sem_correct') { $correct++ }      elsif ($evaluation eq 'syn_correct sem_wrong') { $just_syn++ }      elsif ($evaluation eq 'syn_wrong sem_correct') { $just_sem++ }      elsif ($evaluation eq 'syn_wrong sem_wrong') { $wrong++ }      else { $unknown++; }    }    else { $unknown++; }  }  return($correct,$just_syn,$just_sem,$wrong,$unknown);}sub store_in_memory {  my($foreign,$translation,$evaluation) = @_;  &trim(\$translation);  return if $MEMORY{"$foreign .o0O0o. $translation"} eq $evaluation;  $MEMORY{"$foreign .o0O0o. $translation"} = $evaluation;  open(MEMORY,">>evaluation-memory.dat") or die "store_in_memory(): couldn't open 'evaluation-memory.dat' for append\n";  print MEMORY "$foreign .o0O0o. $translation .o0O0o. $evaluation\n";  close(MEMORY);}sub get_from_memory {  my($foreign,$translation) = @_;  &trim(\$translation);  return $MEMORY{"$foreign .o0O0o. $translation"};}sub trim {  my($translation) = @_;  $$translation =~ s/ +/ /g;  $$translation =~ s/^ +//;  $$translation =~ s/ +$//;}sub load_descriptions {  open(FD,"file-descriptions") or die "load_descriptions(): couldn't open 'file-descriptions' for read\n";  while(<FD>) {  	chomp;    my($file,$description) = split(/\s+/,$_,2);    $FILEDESC{$file} = $description;  }  close(FD);}#read config file giving various corpus config info#arguments: filename to read#return: hash of corpus names to strings containing formatted infosub loadFactorData{	my $filename = shift;	my %data = ();	open(INFILE, "<$filename") or die "loadFactorData(): couldn't open '$filename' for read\n";	while(my $line = <INFILE>)	{		if($line =~ /^\#/) {next;} #skip comment lines		$line =~ /^\s*(\S+)\s*:\s*(\S.*\S)\s*$/;		my $corpusName = $1;		$data{$corpusName} = $2;	}	close(INFILE);	return %data;}###### SUBSsub htmlhead {  print <<"___ENDHTML";Content-type: text/html<HTML><HEAD><TITLE>MTEval: $_[0]</TITLE><SCRIPT LANGUAGE="JavaScript"><!-- hide from old browsersfunction FieldInfo(field,description) {  popup = window.open("","popDialog","height=500,width=600,scrollbars=yes,resizable=yes");  popup.document.write("<HTML><HEAD><TITLE>"+field+"</TITLE></HEAD><BODY BGCOLOR=#FFFFCC><CENTER><B>"+field+"</B><HR SIZE=2 NOSHADE></CENTER><PRE>"+description+"</PRE><CENTER><FORM><INPUT TYPE='BUTTON' VALUE='Okay' onClick='self.close()'></FORM><CENTER></BODY></HTML>");  popup.focus();  popup.document.close();}<!-- done hiding --></SCRIPT></HEAD><BODY BGCOLOR=white><H2>Evaluation Tool for Machine Translation<BR>$_[0]</H2>___ENDHTML}############################# parts of cgi-lib.plsub ReadParse {  my ($i, $key, $val);  # Read in text  my $in;  if (&MethGet) {    $in = $ENV{'QUERY_STRING'};  } elsif (&MethPost) {    read(STDIN,$in,$ENV{'CONTENT_LENGTH'});  }  my @in = split(/[&;]/,$in);  foreach $i (0 .. $#in) {    # Convert plus's to spaces    $in[$i] =~ s/\+/ /g;    # Split into key and value.    ($key, $val) = split(/=/,$in[$i],2); # splits on the first =.    # Convert %XX from hex numbers to alphanumeric    $key =~ s/%(..)/pack("c",hex($1))/ge;    $val =~ s/%(..)/pack("c",hex($1))/ge;    # Associate key and value    $in{$key} .= "\0" if (defined($in{$key})); # \0 is the multiple separator    $in{$key} .= $val;  }  return scalar(@in);}sub MethGet {  return ($ENV{'REQUEST_METHOD'} eq "GET");}sub MethPost {  return ($ENV{'REQUEST_METHOD'} eq "POST");}
上一页 1 23
💿 文件大小 5827 K
👤 上传用户 lyyfengyutongzh
📂 所属分类多国语言处理
🏷️ 相关标签

#解码器 #机器翻译系统 #核心 #模块
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -