📄 newsmtgui.cgi.svn-base
字号:
$REF_NGRAM{$ngram} < $REF_NGRAM_N{$ngram}) { $REF_NGRAM{$ngram} = $REF_NGRAM_N{$ngram}; } } } } $length_translation += $length_translation_this_sentence; $length_reference += $closest_length; for(my $n=1;$n<=4;$n++) { my %T_NGRAM = (); for(my $start=0;$start<=$#WORD-($n-1);$start++) { my $ngram = "$n"; for(my $w=0;$w<$n;$w++) { $ngram .= " ".$WORD[$start+$w]; } $T_NGRAM{$ngram}++; } foreach my $ngram (keys %T_NGRAM) { my $n = 0+$ngram;# print "$i e $ngram $T_NGRAM{$ngram}<BR>\n"; $TOTAL[$n] += $T_NGRAM{$ngram}; if (defined($REF_NGRAM{$ngram})) { if ($REF_NGRAM{$ngram} >= $T_NGRAM{$ngram}) { $CORRECT[$n] += $T_NGRAM{$ngram};# print "$i e correct1 $T_NGRAM{$ngram}<BR>\n"; } else { $CORRECT[$n] += $REF_NGRAM{$ngram};# print "$i e correct2 $REF_NGRAM{$ngram}<BR>\n"; } } } } } my $brevity_penalty = 1; if ($length_translation<$length_reference) { $brevity_penalty = exp(1-$length_reference/$length_translation); } my $bleu = $brevity_penalty * exp((my_log( $CORRECT[1]/$TOTAL[1] ) + my_log( $CORRECT[2]/$TOTAL[2] ) + my_log( $CORRECT[3]/$TOTAL[3] ) + my_log( $CORRECT[4]/$TOTAL[4] ) ) / 4); open(BLEU,">>mbleu-memory.dat"); @STAT = stat($translation_file); printf BLEU "$translation_file $STAT[9] %f %f %f %f %f %f\n",$bleu,$CORRECT[1]/$TOTAL[1],$CORRECT[2]/$TOTAL[2],$CORRECT[3]/$TOTAL[3],$CORRECT[4]/$TOTAL[4],$brevity_penalty; close(BLEU); return ($bleu, 100*$CORRECT[1]/$TOTAL[1], 100*$CORRECT[2]/$TOTAL[2], 100*$CORRECT[3]/$TOTAL[3], 100*$CORRECT[4]/$TOTAL[4], $brevity_penalty);}sub my_log { return -9999999999 unless $_[0]; return log($_[0]);}###### SCORE TRANSLATIONS################################ IN PROGRESS ###############################sub compare2{ &htmlhead("Compare Translations"); print "<A HREF=\"?ACTION=VIEW_CORPUS&CORPUS=".CGI::escape($in{CORPUS})."\">View Corpus $in{CORPUS}</A><P>\n"; print "<FORM ACTION=\"\" METHOD=POST>\n"; print "<INPUT TYPE=HIDDEN NAME=ACTION VALUE=$in{ACTION}>\n"; print "<INPUT TYPE=HIDDEN NAME=CORPUS VALUE=\"$in{CORPUS}\">\n"; my $corpus = new Corpus('-name' => "$in{CORPUS}", '-descriptions' => \%FILEDESC, '-info_line' => $factorData{$in{CORPUS}}); $corpus->writeComparisonPage(\*STDOUT, /^.*$/); print "</FORM>\n";}sub compare { &htmlhead("Compare Translations"); print "<A HREF=\"?ACTION=VIEW_CORPUS&CORPUS=".CGI::escape($in{CORPUS})."\">View Corpus $in{CORPUS}</A><P>\n"; print "<FORM ACTION=\"\" METHOD=POST>\n"; print "<INPUT TYPE=HIDDEN NAME=ACTION VALUE=$in{ACTION}>\n"; print "<INPUT TYPE=HIDDEN NAME=CORPUS VALUE=\"$in{CORPUS}\">\n"; # get sentences my %SENTENCES; my $sentence_count; foreach (keys %in) { if (/^FILE_(.+)$/) { my $file = $1; print "<INPUT TYPE=HIDDEN NAME=\"$file\" VALUE=1>\n"; my @SENTENCES; if ($file =~ /.sgm$/) { @{$SENTENCES{$file}} = `grep '<seg' $in{CORPUS}.$file`; for(my $i=0;$i<$#{$SENTENCES{$file}};$i++) { $SENTENCES{$file}[$i] =~ s/^<seg[^>]+> *(\S.+\S) *<\/seg> *$/$1/; } } else { @{$SENTENCES{$file}} = `cat $in{CORPUS}.$1`; chop(@{$SENTENCES{$file}}); } $sentence_count = scalar @{$SENTENCES{$file}}; } } my %REFERENCE; foreach (@SHOW) { if (-e "$in{CORPUS}.$_") { @{$REFERENCE{$_}} = `cat $in{CORPUS}.$_`; chop(@{$REFERENCE{$_}}); } } # update memory foreach (keys %in) { next unless /^SYN_SCORE_(.+)_(\d+)$/; next unless $in{"SEM_SCORE_$1_$2"}; &store_in_memory($REFERENCE{$FOREIGN}[$2], $SENTENCES{$1}[$2], "syn_".$in{"SYN_SCORE_$1_$2"}." sem_".$in{"SEM_SCORE_$1_$2"}); } # display sentences for(my $i=0;$i<$sentence_count;$i++) { my $evaluation = ""; my $show = 0; my $surface = ""; foreach my $file (keys %SENTENCES) { if ($in{SURFACE}) { $SENTENCES{$file}[$i] =~ s/ *$//; $surface = $SENTENCES{$file}[$i] if ($surface eq ''); $show = 1 if ($SENTENCES{$file}[$i] ne $surface); } else { my $this_ev = &get_from_memory($REFERENCE{$FOREIGN}[$i],$SENTENCES{$file}[$i]); $this_ev = "syn_wrong sem_wrong" unless $this_ev; $evaluation = $this_ev if ($evaluation eq ''); $show = 1 if ($evaluation ne $this_ev); } } next unless $show; print "<HR>Sentence ".($i+1).":<BR>\n"; foreach my $ref (@SHOW) { if (-e "$in{CORPUS}.$ref") { print "<FONT COLOR=$SHOW_COLOR{$ref}>".$REFERENCE{$ref}[$i]."</FONT> (".$FILETYPE{$ref}.")<BR>\n"; } } foreach my $file (keys %SENTENCES) { print "<B>$SENTENCES{$file}[$i]</B> ($file)<BR>\n"; &color_highlight_ngrams($i,&nist_normalize_text($SENTENCES{$file}[$i]),$REFERENCE{"e"}[$i]); if (0 && $in{WITH_EVAL}) { $evaluation = &get_from_memory($REFERENCE{$FOREIGN}[$i],$SENTENCES{$file}[$i]); print "<INPUT TYPE=RADIO NAME=SYN_SCORE_$file"."_$i VALUE=correct"; print " CHECKED" if ($evaluation =~ /syn_correct/); print "> perfect English\n"; print "<INPUT TYPE=RADIO NAME=SYN_SCORE_$file"."_$i VALUE=wrong"; print " CHECKED" if ($evaluation =~ /syn_wrong/); print "> imperfect English<BR>\n"; print "<INPUT TYPE=RADIO NAME=SEM_SCORE_$file"."_$i VALUE=correct"; print " CHECKED" if ($evaluation =~ /sem_correct/); print "> correct meaning\n"; print "<INPUT TYPE=RADIO NAME=SEM_SCORE_$file"."_$i VALUE=wrong"; print " CHECKED" if ($evaluation =~ /sem_wrong/); print "> incorrect meaning<BR>\n"; } } } print "<P><INPUT TYPE=SUBMIT VALUE=\"Add evaluation\">\n"; print "</FORM>\n";}###### MEMORY SUBSsub load_memory { open(MEMORY,"evaluation-memory.dat") or return; while(<MEMORY>) { chop; my($foreign,$translation,$evaluation) = split(/ \.o0O0o\. /); $evaluation = 'syn_correct sem_correct' if ($evaluation eq 'correct'); $MEMORY{"$foreign .o0O0o. $translation"} = $evaluation; } close(MEMORY);}sub get_score_from_memory { my($foreign_file,$translation_file) = @_; my $unknown=0; my $correct=0; my $just_syn=0; my $just_sem=0; my $wrong=0; my @FOREIGN = `cat $foreign_file`; chop(@FOREIGN); my @TRANSLATION = `cat $translation_file`; chop(@TRANSLATION); for(my $i=0;$i<=$#FOREIGN;$i++) { if (my $evaluation = &get_from_memory($FOREIGN[$i],$TRANSLATION[$i])) { if ($evaluation eq 'syn_correct sem_correct') { $correct++ } elsif ($evaluation eq 'syn_correct sem_wrong') { $just_syn++ } elsif ($evaluation eq 'syn_wrong sem_correct') { $just_sem++ } elsif ($evaluation eq 'syn_wrong sem_wrong') { $wrong++ } else { $unknown++; } } else { $unknown++; } } return($correct,$just_syn,$just_sem,$wrong,$unknown);}sub store_in_memory { my($foreign,$translation,$evaluation) = @_; &trim(\$translation); return if $MEMORY{"$foreign .o0O0o. $translation"} eq $evaluation; $MEMORY{"$foreign .o0O0o. $translation"} = $evaluation; open(MEMORY,">>evaluation-memory.dat") or die "store_in_memory(): couldn't open 'evaluation-memory.dat' for append\n"; print MEMORY "$foreign .o0O0o. $translation .o0O0o. $evaluation\n"; close(MEMORY);}sub get_from_memory { my($foreign,$translation) = @_; &trim(\$translation); return $MEMORY{"$foreign .o0O0o. $translation"};}sub trim { my($translation) = @_; $$translation =~ s/ +/ /g; $$translation =~ s/^ +//; $$translation =~ s/ +$//;}sub load_descriptions { open(FD,"file-descriptions") or die "load_descriptions(): couldn't open 'file-descriptions' for read\n"; while(<FD>) { chomp; my($file,$description) = split(/\s+/,$_,2); $FILEDESC{$file} = $description; } close(FD);}#read config file giving various corpus config info#arguments: filename to read#return: hash of corpus names to strings containing formatted infosub loadFactorData{ my $filename = shift; my %data = (); open(INFILE, "<$filename") or die "loadFactorData(): couldn't open '$filename' for read\n"; while(my $line = <INFILE>) { if($line =~ /^\#/) {next;} #skip comment lines $line =~ /^\s*(\S+)\s*:\s*(\S.*\S)\s*$/; my $corpusName = $1; $data{$corpusName} = $2; } close(INFILE); return %data;}###### SUBSsub htmlhead { print <<"___ENDHTML";Content-type: text/html<HTML><HEAD><TITLE>MTEval: $_[0]</TITLE><SCRIPT LANGUAGE="JavaScript"><!-- hide from old browsersfunction FieldInfo(field,description) { popup = window.open("","popDialog","height=500,width=600,scrollbars=yes,resizable=yes"); popup.document.write("<HTML><HEAD><TITLE>"+field+"</TITLE></HEAD><BODY BGCOLOR=#FFFFCC><CENTER><B>"+field+"</B><HR SIZE=2 NOSHADE></CENTER><PRE>"+description+"</PRE><CENTER><FORM><INPUT TYPE='BUTTON' VALUE='Okay' onClick='self.close()'></FORM><CENTER></BODY></HTML>"); popup.focus(); popup.document.close();}<!-- done hiding --></SCRIPT></HEAD><BODY BGCOLOR=white><H2>Evaluation Tool for Machine Translation<BR>$_[0]</H2>___ENDHTML}############################# parts of cgi-lib.plsub ReadParse { my ($i, $key, $val); # Read in text my $in; if (&MethGet) { $in = $ENV{'QUERY_STRING'}; } elsif (&MethPost) { read(STDIN,$in,$ENV{'CONTENT_LENGTH'}); } my @in = split(/[&;]/,$in); foreach $i (0 .. $#in) { # Convert plus's to spaces $in[$i] =~ s/\+/ /g; # Split into key and value. ($key, $val) = split(/=/,$in[$i],2); # splits on the first =. # Convert %XX from hex numbers to alphanumeric $key =~ s/%(..)/pack("c",hex($1))/ge; $val =~ s/%(..)/pack("c",hex($1))/ge; # Associate key and value $in{$key} .= "\0" if (defined($in{$key})); # \0 is the multiple separator $in{$key} .= $val; } return scalar(@in);}sub MethGet { return ($ENV{'REQUEST_METHOD'} eq "GET");}sub MethPost { return ($ENV{'REQUEST_METHOD'} eq "POST");}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -