📄 newsmtgui.cgi
字号:
if (!defined($REF_NGRAM{$ngram}) ||
$REF_NGRAM{$ngram} < $REF_NGRAM_N{$ngram}) {
$REF_NGRAM{$ngram} = $REF_NGRAM_N{$ngram};
}
}
}
}
$length_translation += $length_translation_this_sentence;
$length_reference += $closest_length;
for(my $n=1;$n<=4;$n++) {
my %T_NGRAM = ();
for(my $start=0;$start<=$#WORD-($n-1);$start++) {
my $ngram = "$n";
for(my $w=0;$w<$n;$w++) {
$ngram .= " ".$WORD[$start+$w];
}
$T_NGRAM{$ngram}++;
}
foreach my $ngram (keys %T_NGRAM) {
my $n = 0+$ngram;
# print "$i e $ngram $T_NGRAM{$ngram}<BR>\n";
$TOTAL[$n] += $T_NGRAM{$ngram};
if (defined($REF_NGRAM{$ngram})) {
if ($REF_NGRAM{$ngram} >= $T_NGRAM{$ngram}) {
$CORRECT[$n] += $T_NGRAM{$ngram};
# print "$i e correct1 $T_NGRAM{$ngram}<BR>\n";
}
else {
$CORRECT[$n] += $REF_NGRAM{$ngram};
# print "$i e correct2 $REF_NGRAM{$ngram}<BR>\n";
}
}
}
}
}
my $brevity_penalty = 1;
if ($length_translation<$length_reference) {
$brevity_penalty = exp(1-$length_reference/$length_translation);
}
my $bleu = $brevity_penalty * exp((my_log( $CORRECT[1]/$TOTAL[1] ) +
my_log( $CORRECT[2]/$TOTAL[2] ) +
my_log( $CORRECT[3]/$TOTAL[3] ) +
my_log( $CORRECT[4]/$TOTAL[4] ) ) / 4);
open(BLEU,">>mbleu-memory.dat");
@STAT = stat($translation_file);
printf BLEU "$translation_file $STAT[9] %f %f %f %f %f %f\n",$bleu,$CORRECT[1]/$TOTAL[1],$CORRECT[2]/$TOTAL[2],$CORRECT[3]/$TOTAL[3],$CORRECT[4]/$TOTAL[4],$brevity_penalty;
close(BLEU);
return ($bleu,
100*$CORRECT[1]/$TOTAL[1],
100*$CORRECT[2]/$TOTAL[2],
100*$CORRECT[3]/$TOTAL[3],
100*$CORRECT[4]/$TOTAL[4],
$brevity_penalty);
}
sub my_log {
return -9999999999 unless $_[0];
return log($_[0]);
}
###### SCORE TRANSLATIONS
################################ IN PROGRESS ###############################
sub compare2
{
&htmlhead("Compare Translations");
print "<A HREF=\"?ACTION=VIEW_CORPUS&CORPUS=".CGI::escape($in{CORPUS})."\">View Corpus $in{CORPUS}</A><P>\n";
print "<FORM ACTION=\"\" METHOD=POST>\n";
print "<INPUT TYPE=HIDDEN NAME=ACTION VALUE=$in{ACTION}>\n";
print "<INPUT TYPE=HIDDEN NAME=CORPUS VALUE=\"$in{CORPUS}\">\n";
my $corpus = new Corpus('-name' => "$in{CORPUS}", '-descriptions' => \%FILEDESC, '-info_line' => $factorData{$in{CORPUS}});
$corpus->writeComparisonPage(\*STDOUT, /^.*$/);
print "</FORM>\n";
}
sub compare {
&htmlhead("Compare Translations");
print "<A HREF=\"?ACTION=VIEW_CORPUS&CORPUS=".CGI::escape($in{CORPUS})."\">View Corpus $in{CORPUS}</A><P>\n";
print "<FORM ACTION=\"\" METHOD=POST>\n";
print "<INPUT TYPE=HIDDEN NAME=ACTION VALUE=$in{ACTION}>\n";
print "<INPUT TYPE=HIDDEN NAME=CORPUS VALUE=\"$in{CORPUS}\">\n";
# get sentences
my %SENTENCES;
my $sentence_count;
foreach (keys %in) {
if (/^FILE_(.+)$/) {
my $file = $1;
print "<INPUT TYPE=HIDDEN NAME=\"$file\" VALUE=1>\n";
my @SENTENCES;
if ($file =~ /.sgm$/) {
@{$SENTENCES{$file}} = `grep '<seg' $in{CORPUS}.$file`;
for(my $i=0;$i<$#{$SENTENCES{$file}};$i++) {
$SENTENCES{$file}[$i] =~ s/^<seg[^>]+> *(\S.+\S) *<\/seg> *$/$1/;
}
}
else {
@{$SENTENCES{$file}} = `cat $in{CORPUS}.$1`;
chop(@{$SENTENCES{$file}});
}
$sentence_count = scalar @{$SENTENCES{$file}};
}
}
my %REFERENCE;
foreach (@SHOW) {
if (-e "$in{CORPUS}.$_") {
@{$REFERENCE{$_}} = `cat $in{CORPUS}.$_`; chop(@{$REFERENCE{$_}});
}
}
# update memory
foreach (keys %in) {
next unless /^SYN_SCORE_(.+)_(\d+)$/;
next unless $in{"SEM_SCORE_$1_$2"};
&store_in_memory($REFERENCE{$FOREIGN}[$2],
$SENTENCES{$1}[$2],
"syn_".$in{"SYN_SCORE_$1_$2"}." sem_".$in{"SEM_SCORE_$1_$2"});
}
# display sentences
for(my $i=0;$i<$sentence_count;$i++)
{
my $evaluation = "";
my $show = 0;
my $surface = "";
foreach my $file (keys %SENTENCES)
{
if ($in{SURFACE}) {
$SENTENCES{$file}[$i] =~ s/ *$//;
$surface = $SENTENCES{$file}[$i] if ($surface eq '');
$show = 1 if ($SENTENCES{$file}[$i] ne $surface);
}
else {
my $this_ev = &get_from_memory($REFERENCE{$FOREIGN}[$i],$SENTENCES{$file}[$i]);
$this_ev = "syn_wrong sem_wrong" unless $this_ev;
$evaluation = $this_ev if ($evaluation eq '');
$show = 1 if ($evaluation ne $this_ev);
}
}
next unless $show;
print "<HR>Sentence ".($i+1).":<BR>\n";
foreach my $ref (@SHOW) {
if (-e "$in{CORPUS}.$ref") {
print "<FONT COLOR=$SHOW_COLOR{$ref}>".$REFERENCE{$ref}[$i]."</FONT> (".$FILETYPE{$ref}.")<BR>\n";
}
}
foreach my $file (keys %SENTENCES) {
print "<B>$SENTENCES{$file}[$i]</B> ($file)<BR>\n";
&color_highlight_ngrams($i,&nist_normalize_text($SENTENCES{$file}[$i]),$REFERENCE{"e"}[$i]);
if (0 && $in{WITH_EVAL}) {
$evaluation = &get_from_memory($REFERENCE{$FOREIGN}[$i],$SENTENCES{$file}[$i]);
print "<INPUT TYPE=RADIO NAME=SYN_SCORE_$file"."_$i VALUE=correct";
print " CHECKED" if ($evaluation =~ /syn_correct/);
print "> perfect English\n";
print "<INPUT TYPE=RADIO NAME=SYN_SCORE_$file"."_$i VALUE=wrong";
print " CHECKED" if ($evaluation =~ /syn_wrong/);
print "> imperfect English<BR>\n";
print "<INPUT TYPE=RADIO NAME=SEM_SCORE_$file"."_$i VALUE=correct";
print " CHECKED" if ($evaluation =~ /sem_correct/);
print "> correct meaning\n";
print "<INPUT TYPE=RADIO NAME=SEM_SCORE_$file"."_$i VALUE=wrong";
print " CHECKED" if ($evaluation =~ /sem_wrong/);
print "> incorrect meaning<BR>\n";
}
}
}
print "<P><INPUT TYPE=SUBMIT VALUE=\"Add evaluation\">\n";
print "</FORM>\n";
}
###### MEMORY SUBS
sub load_memory {
open(MEMORY,"evaluation-memory.dat") or return;
while(<MEMORY>) {
chop;
my($foreign,$translation,$evaluation) = split(/ \.o0O0o\. /);
$evaluation = 'syn_correct sem_correct' if ($evaluation eq 'correct');
$MEMORY{"$foreign .o0O0o. $translation"} = $evaluation;
}
close(MEMORY);
}
sub get_score_from_memory {
my($foreign_file,$translation_file) = @_;
my $unknown=0;
my $correct=0;
my $just_syn=0;
my $just_sem=0;
my $wrong=0;
my @FOREIGN = `cat $foreign_file`; chop(@FOREIGN);
my @TRANSLATION = `cat $translation_file`; chop(@TRANSLATION);
for(my $i=0;$i<=$#FOREIGN;$i++) {
if (my $evaluation = &get_from_memory($FOREIGN[$i],$TRANSLATION[$i])) {
if ($evaluation eq 'syn_correct sem_correct') { $correct++ }
elsif ($evaluation eq 'syn_correct sem_wrong') { $just_syn++ }
elsif ($evaluation eq 'syn_wrong sem_correct') { $just_sem++ }
elsif ($evaluation eq 'syn_wrong sem_wrong') { $wrong++ }
else { $unknown++; }
}
else { $unknown++; }
}
return($correct,$just_syn,$just_sem,$wrong,$unknown);
}
sub store_in_memory {
my($foreign,$translation,$evaluation) = @_;
&trim(\$translation);
return if $MEMORY{"$foreign .o0O0o. $translation"} eq $evaluation;
$MEMORY{"$foreign .o0O0o. $translation"} = $evaluation;
open(MEMORY,">>evaluation-memory.dat") or die "store_in_memory(): couldn't open 'evaluation-memory.dat' for append\n";
print MEMORY "$foreign .o0O0o. $translation .o0O0o. $evaluation\n";
close(MEMORY);
}
sub get_from_memory {
my($foreign,$translation) = @_;
&trim(\$translation);
return $MEMORY{"$foreign .o0O0o. $translation"};
}
sub trim {
my($translation) = @_;
$$translation =~ s/ +/ /g;
$$translation =~ s/^ +//;
$$translation =~ s/ +$//;
}
sub load_descriptions {
open(FD,"file-descriptions") or die "load_descriptions(): couldn't open 'file-descriptions' for read\n";
while(<FD>) {
chomp;
my($file,$description) = split(/\s+/,$_,2);
$FILEDESC{$file} = $description;
}
close(FD);
}
#read config file giving various corpus config info
#arguments: filename to read
#return: hash of corpus names to strings containing formatted info
sub loadFactorData
{
my $filename = shift;
my %data = ();
open(INFILE, "<$filename") or die "loadFactorData(): couldn't open '$filename' for read\n";
while(my $line = <INFILE>)
{
if($line =~ /^\#/) {next;} #skip comment lines
$line =~ /^\s*(\S+)\s*:\s*(\S.*\S)\s*$/;
my $corpusName = $1;
$data{$corpusName} = $2;
}
close(INFILE);
return %data;
}
###### SUBS
sub htmlhead {
print <<"___ENDHTML";
Content-type: text/html
<HTML><HEAD>
<TITLE>MTEval: $_[0]</TITLE>
<SCRIPT LANGUAGE="JavaScript">
<!-- hide from old browsers
function FieldInfo(field,description) {
popup = window.open("","popDialog","height=500,width=600,scrollbars=yes,resizable=yes");
popup.document.write("<HTML><HEAD><TITLE>"+field+"</TITLE></HEAD><BODY BGCOLOR=#FFFFCC><CENTER><B>"+field+"</B><HR SIZE=2 NOSHADE></CENTER><PRE>"+description+"</PRE><CENTER><FORM><INPUT TYPE='BUTTON' VALUE='Okay' onClick='self.close()'></FORM><CENTER></BODY></HTML>");
popup.focus();
popup.document.close();
}
<!-- done hiding -->
</SCRIPT>
</HEAD>
<BODY BGCOLOR=white>
<H2>Evaluation Tool for Machine Translation<BR>$_[0]</H2>
___ENDHTML
}
############################# parts of cgi-lib.pl
sub ReadParse {
my ($i, $key, $val);
# Read in text
my $in;
if (&MethGet) {
$in = $ENV{'QUERY_STRING'};
} elsif (&MethPost) {
read(STDIN,$in,$ENV{'CONTENT_LENGTH'});
}
my @in = split(/[&;]/,$in);
foreach $i (0 .. $#in) {
# Convert plus's to spaces
$in[$i] =~ s/\+/ /g;
# Split into key and value.
($key, $val) = split(/=/,$in[$i],2); # splits on the first =.
# Convert %XX from hex numbers to alphanumeric
$key =~ s/%(..)/pack("c",hex($1))/ge;
$val =~ s/%(..)/pack("c",hex($1))/ge;
# Associate key and value
$in{$key} .= "\0" if (defined($in{$key})); # \0 is the multiple separator
$in{$key} .= $val;
}
return scalar(@in);
}
sub MethGet {
return ($ENV{'REQUEST_METHOD'} eq "GET");
}
sub MethPost {
return ($ENV{'REQUEST_METHOD'} eq "POST");
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -