📄 newsmtgui.cgi

📁 moses开源的机器翻译系统
💻 CGI
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
	  if (!defined($REF_NGRAM{$ngram}) || 
	      $REF_NGRAM{$ngram} < $REF_NGRAM_N{$ngram}) {
	    $REF_NGRAM{$ngram} = $REF_NGRAM_N{$ngram};
	  }
	}
      }
    }
    $length_translation += $length_translation_this_sentence;
    $length_reference += $closest_length;
    for(my $n=1;$n<=4;$n++) {
      my %T_NGRAM = ();
      for(my $start=0;$start<=$#WORD-($n-1);$start++) {
	my $ngram = "$n";
	for(my $w=0;$w<$n;$w++) {
	  $ngram .= " ".$WORD[$start+$w];
	}
	$T_NGRAM{$ngram}++;
      }
      foreach my $ngram (keys %T_NGRAM) {
	my $n = 0+$ngram;
#	print "$i e $ngram $T_NGRAM{$ngram}<BR>\n";
	$TOTAL[$n] += $T_NGRAM{$ngram};
	if (defined($REF_NGRAM{$ngram})) {
	  if ($REF_NGRAM{$ngram} >= $T_NGRAM{$ngram}) {
	    $CORRECT[$n] += $T_NGRAM{$ngram};
#	    print "$i e correct1 $T_NGRAM{$ngram}<BR>\n";
	  }
	  else {
	    $CORRECT[$n] += $REF_NGRAM{$ngram};
#	    print "$i e correct2 $REF_NGRAM{$ngram}<BR>\n";
	  }
	}
      }
    }
  }
  my $brevity_penalty = 1;
  if ($length_translation<$length_reference) {
    $brevity_penalty = exp(1-$length_reference/$length_translation);
  }
  my $bleu = $brevity_penalty * exp((my_log( $CORRECT[1]/$TOTAL[1] ) +
				     my_log( $CORRECT[2]/$TOTAL[2] ) +
				     my_log( $CORRECT[3]/$TOTAL[3] ) +
				     my_log( $CORRECT[4]/$TOTAL[4] ) ) / 4);

  open(BLEU,">>mbleu-memory.dat");
  @STAT = stat($translation_file);
  printf BLEU "$translation_file $STAT[9] %f %f %f %f %f %f\n",$bleu,$CORRECT[1]/$TOTAL[1],$CORRECT[2]/$TOTAL[2],$CORRECT[3]/$TOTAL[3],$CORRECT[4]/$TOTAL[4],$brevity_penalty;
  close(BLEU);
  
  return ($bleu,
	  100*$CORRECT[1]/$TOTAL[1],
	  100*$CORRECT[2]/$TOTAL[2],
	  100*$CORRECT[3]/$TOTAL[3],
	  100*$CORRECT[4]/$TOTAL[4],
	  $brevity_penalty);
}

sub my_log {
  return -9999999999 unless $_[0];
  return log($_[0]);
}


###### SCORE TRANSLATIONS

################################ IN PROGRESS ###############################
sub compare2
{
	&htmlhead("Compare Translations");
	print "<A HREF=\"?ACTION=VIEW_CORPUS&CORPUS=".CGI::escape($in{CORPUS})."\">View Corpus $in{CORPUS}</A><P>\n";
	print "<FORM ACTION=\"\" METHOD=POST>\n";
	print "<INPUT TYPE=HIDDEN NAME=ACTION VALUE=$in{ACTION}>\n";
	print "<INPUT TYPE=HIDDEN NAME=CORPUS VALUE=\"$in{CORPUS}\">\n";
	my $corpus = new Corpus('-name' => "$in{CORPUS}", '-descriptions' => \%FILEDESC, '-info_line' => $factorData{$in{CORPUS}});
	$corpus->writeComparisonPage(\*STDOUT, /^.*$/);
	print "</FORM>\n";
}

sub compare {
  &htmlhead("Compare Translations");
  print "<A HREF=\"?ACTION=VIEW_CORPUS&CORPUS=".CGI::escape($in{CORPUS})."\">View Corpus $in{CORPUS}</A><P>\n";
  print "<FORM ACTION=\"\" METHOD=POST>\n";
  print "<INPUT TYPE=HIDDEN NAME=ACTION VALUE=$in{ACTION}>\n";
  print "<INPUT TYPE=HIDDEN NAME=CORPUS VALUE=\"$in{CORPUS}\">\n";

  # get sentences
  my %SENTENCES;
  my $sentence_count;
  foreach (keys %in) {
    if (/^FILE_(.+)$/) {
      my $file = $1;
      print "<INPUT TYPE=HIDDEN NAME=\"$file\" VALUE=1>\n";
      my @SENTENCES;
      if ($file =~ /.sgm$/) {
	  @{$SENTENCES{$file}} = `grep '<seg' $in{CORPUS}.$file`;
	  for(my $i=0;$i<$#{$SENTENCES{$file}};$i++) {
	      $SENTENCES{$file}[$i] =~ s/^<seg[^>]+> *(\S.+\S) *<\/seg> *$/$1/;
	  }
      }
      else {
	  @{$SENTENCES{$file}} = `cat $in{CORPUS}.$1`;
	  chop(@{$SENTENCES{$file}});
      }

      $sentence_count = scalar @{$SENTENCES{$file}};
    }
  }
  my %REFERENCE;
  foreach (@SHOW) {
    if (-e "$in{CORPUS}.$_") {
      @{$REFERENCE{$_}} = `cat $in{CORPUS}.$_`; chop(@{$REFERENCE{$_}});
    }
  }

  # update memory
  foreach (keys %in) {
    next unless /^SYN_SCORE_(.+)_(\d+)$/;
    next unless $in{"SEM_SCORE_$1_$2"};
    &store_in_memory($REFERENCE{$FOREIGN}[$2],
		     $SENTENCES{$1}[$2],
                     "syn_".$in{"SYN_SCORE_$1_$2"}." sem_".$in{"SEM_SCORE_$1_$2"});
  }

  # display sentences
  for(my $i=0;$i<$sentence_count;$i++)
  {
    my $evaluation = "";
    my $show = 0;
    my $surface = "";
    foreach my $file (keys %SENTENCES)
	 {
      if ($in{SURFACE}) {
	$SENTENCES{$file}[$i] =~ s/ *$//;
	$surface = $SENTENCES{$file}[$i] if ($surface eq '');
	$show = 1 if ($SENTENCES{$file}[$i] ne $surface);
      }
      else {
	my $this_ev = &get_from_memory($REFERENCE{$FOREIGN}[$i],$SENTENCES{$file}[$i]);
	$this_ev = "syn_wrong sem_wrong" unless $this_ev;
	$evaluation = $this_ev if ($evaluation eq '');
	$show = 1 if ($evaluation ne $this_ev);
      }
    }
    next unless $show;
    print "<HR>Sentence ".($i+1).":<BR>\n";
    foreach my $ref (@SHOW) {
      if (-e "$in{CORPUS}.$ref") {
	print "<FONT COLOR=$SHOW_COLOR{$ref}>".$REFERENCE{$ref}[$i]."</FONT> (".$FILETYPE{$ref}.")<BR>\n";
      }
    }
    foreach my $file (keys %SENTENCES) {
      print "<B>$SENTENCES{$file}[$i]</B> ($file)<BR>\n";
      &color_highlight_ngrams($i,&nist_normalize_text($SENTENCES{$file}[$i]),$REFERENCE{"e"}[$i]);
      if (0 && $in{WITH_EVAL}) {
	$evaluation = &get_from_memory($REFERENCE{$FOREIGN}[$i],$SENTENCES{$file}[$i]);
	print "<INPUT TYPE=RADIO NAME=SYN_SCORE_$file"."_$i VALUE=correct";
	print " CHECKED" if ($evaluation =~ /syn_correct/);
	print "> perfect English\n";
	print "<INPUT TYPE=RADIO NAME=SYN_SCORE_$file"."_$i VALUE=wrong";
	print " CHECKED" if ($evaluation =~ /syn_wrong/);
	print "> imperfect English<BR>\n";
	print "<INPUT TYPE=RADIO NAME=SEM_SCORE_$file"."_$i VALUE=correct";
	print " CHECKED" if ($evaluation =~ /sem_correct/);
	print "> correct meaning\n";
	print "<INPUT TYPE=RADIO NAME=SEM_SCORE_$file"."_$i VALUE=wrong";
	print " CHECKED" if ($evaluation =~ /sem_wrong/);
	print "> incorrect meaning<BR>\n";
      }
    }
  }
  print "<P><INPUT TYPE=SUBMIT VALUE=\"Add evaluation\">\n";
  print "</FORM>\n";
}

###### MEMORY SUBS

sub load_memory {
  open(MEMORY,"evaluation-memory.dat") or return;
  while(<MEMORY>) {
    chop;
    my($foreign,$translation,$evaluation) = split(/ \.o0O0o\. /);
    $evaluation = 'syn_correct sem_correct' if ($evaluation eq 'correct');
    $MEMORY{"$foreign .o0O0o. $translation"} = $evaluation;
  }
  close(MEMORY);
}

sub get_score_from_memory {
  my($foreign_file,$translation_file) = @_;
  my $unknown=0;
  my $correct=0;
  my $just_syn=0;
  my $just_sem=0;
  my $wrong=0;
  my @FOREIGN = `cat $foreign_file`; chop(@FOREIGN);
  my @TRANSLATION = `cat $translation_file`; chop(@TRANSLATION);
  for(my $i=0;$i<=$#FOREIGN;$i++) {
    if (my $evaluation = &get_from_memory($FOREIGN[$i],$TRANSLATION[$i])) {
      if ($evaluation eq 'syn_correct sem_correct') { $correct++ }
      elsif ($evaluation eq 'syn_correct sem_wrong') { $just_syn++ }
      elsif ($evaluation eq 'syn_wrong sem_correct') { $just_sem++ }
      elsif ($evaluation eq 'syn_wrong sem_wrong') { $wrong++ }
      else { $unknown++; }
    }
    else { $unknown++; }
  }
  return($correct,$just_syn,$just_sem,$wrong,$unknown);
}

sub store_in_memory {
  my($foreign,$translation,$evaluation) = @_;
  &trim(\$translation);
  return if $MEMORY{"$foreign .o0O0o. $translation"} eq $evaluation;
  $MEMORY{"$foreign .o0O0o. $translation"} = $evaluation;
  open(MEMORY,">>evaluation-memory.dat") or die "store_in_memory(): couldn't open 'evaluation-memory.dat' for append\n";
  print MEMORY "$foreign .o0O0o. $translation .o0O0o. $evaluation\n";
  close(MEMORY);
}

sub get_from_memory {
  my($foreign,$translation) = @_;
  &trim(\$translation);
  return $MEMORY{"$foreign .o0O0o. $translation"};
}

sub trim {
  my($translation) = @_;
  $$translation =~ s/ +/ /g;
  $$translation =~ s/^ +//;
  $$translation =~ s/ +$//;
}

sub load_descriptions {
  open(FD,"file-descriptions") or die "load_descriptions(): couldn't open 'file-descriptions' for read\n";
  while(<FD>) {
  	chomp;
    my($file,$description) = split(/\s+/,$_,2);
    $FILEDESC{$file} = $description;
  }
  close(FD);
}

#read config file giving various corpus config info
#arguments: filename to read
#return: hash of corpus names to strings containing formatted info
sub loadFactorData
{
	my $filename = shift;
	my %data = ();
	open(INFILE, "<$filename") or die "loadFactorData(): couldn't open '$filename' for read\n";
	while(my $line = <INFILE>)
	{
		if($line =~ /^\#/) {next;} #skip comment lines
		$line =~ /^\s*(\S+)\s*:\s*(\S.*\S)\s*$/;
		my $corpusName = $1;
		$data{$corpusName} = $2;
	}
	close(INFILE);
	return %data;
}

###### SUBS

sub htmlhead {
  print <<"___ENDHTML";
Content-type: text/html

<HTML><HEAD>
<TITLE>MTEval: $_[0]</TITLE>
<SCRIPT LANGUAGE="JavaScript">

<!-- hide from old browsers

function FieldInfo(field,description) {
  popup = window.open("","popDialog","height=500,width=600,scrollbars=yes,resizable=yes");
  popup.document.write("<HTML><HEAD><TITLE>"+field+"</TITLE></HEAD><BODY BGCOLOR=#FFFFCC><CENTER><B>"+field+"</B><HR SIZE=2 NOSHADE></CENTER><PRE>"+description+"</PRE><CENTER><FORM><INPUT TYPE='BUTTON' VALUE='Okay' onClick='self.close()'></FORM><CENTER></BODY></HTML>");
  popup.focus();
  popup.document.close();
}

<!-- done hiding -->

</SCRIPT>
</HEAD>
<BODY BGCOLOR=white>
<H2>Evaluation Tool for Machine Translation<BR>$_[0]</H2>
___ENDHTML
}


############################# parts of cgi-lib.pl


sub ReadParse {
  my ($i, $key, $val);

  # Read in text
  my $in;
  if (&MethGet) {
    $in = $ENV{'QUERY_STRING'};
  } elsif (&MethPost) {
    read(STDIN,$in,$ENV{'CONTENT_LENGTH'});
  }

  my @in = split(/[&;]/,$in);

  foreach $i (0 .. $#in) {
    # Convert plus's to spaces
    $in[$i] =~ s/\+/ /g;

    # Split into key and value.
    ($key, $val) = split(/=/,$in[$i],2); # splits on the first =.

    # Convert %XX from hex numbers to alphanumeric
    $key =~ s/%(..)/pack("c",hex($1))/ge;
    $val =~ s/%(..)/pack("c",hex($1))/ge;

    # Associate key and value
    $in{$key} .= "\0" if (defined($in{$key})); # \0 is the multiple separator
    $in{$key} .= $val;

  }

  return scalar(@in);
}

sub MethGet {
  return ($ENV{'REQUEST_METHOD'} eq "GET");
}

sub MethPost {
  return ($ENV{'REQUEST_METHOD'} eq "POST");
}
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -