📄 rouge.java
字号:
package dragon.ir.summarize;
import dragon.nlp.*;
import dragon.nlp.compare.*;
import dragon.nlp.extract.*;
import dragon.nlp.tool.*;
import dragon.util.*;
import java.util.*;
import java.util.ArrayList;
/**
* <p>A Program for Summarizaiton Evaluation</p>
* <p>We only implemented and tested the ROUGE-N metric so far</p>
* <p>Copyright: Copyright (c) 2006</p>
* <p>Company: Drexel University</p>
* @author Xiaodan Zhang, Davis Zhou
* @version 1.0
*/
public class ROUGE {
public static final int ROUGE_N=1;
public static final int ROUGE_L=2;
public static final int ROUGE_W=3;
public static final int ROUGE_S=4;
public static final int ROUGE_SU=5;
public static final int MULTIPLE_MAX=1;
public static final int MULTIPLE_MIN=2;
public static final int MULTIPLE_AVG=3;
private static final String stopwordFile="nlpdata/rouge/rouge.stopword";
//private static final String wordDelimitor=" \r\n\t_-;,?/\"'`:(){}!+[]><=%$#*@&^~|\\";
private TokenExtractor tokenExtractor;
private double beta; //control the f-score
private double[][] evaStat; //store evaluation result
private int metric; //ROUGE metrics
private int multipleMode;
private int gram; //used for ROUGE-N metric
private int maxSkip; //used for ROUGE-S metric
private boolean caseSensitive;
public ROUGE() {
caseSensitive=false;
tokenExtractor=new BasicTokenExtractor(null);
beta=1.0;
metric=ROUGE_N;
gram=2;
}
public void setBeta(double beta){
if(beta>0)
this.beta =beta;
}
public double getBeta(){
return beta;
}
public void setLemmatiser(Lemmatiser lemmatiser){
tokenExtractor.setLemmatiser(lemmatiser);
}
public Lemmatiser getLemmatiser(){
return tokenExtractor.getLemmatiser();
}
public void setLemmatiserOption(boolean option){
if(option)
tokenExtractor.setLemmatiser(new PorterStemmer());
else
tokenExtractor.setLemmatiser(null);
}
public boolean getLemmatiserOption(){
return tokenExtractor.getLemmatiser()!=null;
}
public void setMultipleReferenceMode(int mode){
this.multipleMode =mode;
}
public void setStopwordOption(boolean option){
if (option)
tokenExtractor.setConceptFilter(new BasicConceptFilter(EnvVariable.getDragonHome()+"/"+stopwordFile));
else
tokenExtractor.setFilteringOption(false);
}
public boolean getStopwordOption(){
return tokenExtractor.getFilteringOption();
}
public void setStopwordFile(String stopwordFile){
tokenExtractor.setConceptFilter(new BasicConceptFilter(stopwordFile));
}
public void setCaseOption(boolean sensitive){
caseSensitive=sensitive;
}
public boolean getCaseOption(){
return caseSensitive;
}
public void useRougeN(int gram){
this.gram=gram;
metric=ROUGE_N;
}
public int getGram(){
return gram;
}
public void useRougeS(){
this.maxSkip =Integer.MAX_VALUE;
metric=ROUGE_S;
}
public void useRougeS(int maxSkip){
this.maxSkip =maxSkip;
metric=ROUGE_S;
}
public double getPrecision(){
return getEvaResult(1);
}
public double getRecall(){
return getEvaResult(0);
}
public double getFScore(){
return getEvaResult(2);
}
private double getEvaResult(int dimension){
double[] results;
int i;
results=new double[evaStat.length];
for(i=0;i<results.length;i++)
results[i]=evaStat[i][dimension];
if(multipleMode==MULTIPLE_MAX)
return MathUtil.max(results);
else if(multipleMode==MULTIPLE_AVG)
return MathUtil.average(results);
else if(multipleMode==MULTIPLE_MIN)
return MathUtil.min(results);
else
return -1;
}
public synchronized boolean evaluate(String testSummary, String[] refSummaries){
boolean ret;
ret=true;
if(metric==ROUGE_N)
computeRougeN(testSummary,refSummaries);
else if(metric==ROUGE_S)
computeRougeS(testSummary,refSummaries);
else if(metric==ROUGE_L)
computeRougeL(testSummary,refSummaries);
else if(metric==ROUGE_SU)
computeRougeSU(testSummary,refSummaries);
else
ret=false;
return ret;
}
public void printResult() {
int j, k;
for (k = 0; k < 50; k++) {
System.out.print("-");
}
System.out.println();
for (j = 0; j < evaStat.length; j++) {
System.out.println("ReferenceModel: " + (j + 1));
System.out.println("Average_R: " + evaStat[j][0]);
System.out.println("Average_P: " + evaStat[j][1]);
System.out.println("Average_F: " + evaStat[j][2]);
System.out.println();
}
for (k = 0; k < 50; k++) {
System.out.print("-");
}
System.out.println();
}
private void computeRougeN(String testSummary, String[] refSummaries) {
HashMap testHash, refHash;
ArrayList testList, referenceList;
int match, reference, test, j;
testList=tokenize(testSummary);
evaStat = new double[refSummaries.length][3];
testHash=computeNgrams(testList,gram);
test=testList.size()-gram+1;
for (j = 0; j < refSummaries.length; j++) {
referenceList=tokenize(refSummaries[j]);
refHash = computeNgrams(referenceList, gram);
match = matchNgrams(testHash,refHash);
reference=referenceList.size()-gram+1;
if (reference<=0)
evaStat[j][0] = 0;
else
evaStat[j][0] = (double) match / reference;
if(test<=0)
evaStat[j][1] =0;
else
evaStat[j][1] =match/(double)test;
evaStat[j][2] =computeFScore(evaStat[j][1],evaStat[j][0]);
}
}
private void computeRougeS(String testSummary, String[] refSummaries) {
HashSet hashGrams;
SimpleElementList keyList;
ArrayList testList, referenceList;
int match, reference, test, j;
keyList=new SimpleElementList();
testList=index(tokenize(testSummary),keyList);
test=countSkipBigram(testList.size(),maxSkip);
evaStat = new double[refSummaries.length][3];
for (j = 0; j < refSummaries.length; j++) {
referenceList=index(tokenize(refSummaries[j]),keyList);
hashGrams = computeSkipBigram(referenceList, maxSkip);
match = matchSkipBigram(testList,maxSkip, hashGrams);
reference=countSkipBigram(testList.size(),maxSkip);
if (reference<=0)
evaStat[j][0] = 0;
else
evaStat[j][0] = (double) match / reference;
if(test<=0)
evaStat[j][1] =0;
else
evaStat[j][1] =match/(double)test;
evaStat[j][2] =computeFScore(evaStat[j][1],evaStat[j][0]);
}
}
private void computeRougeSU(String testSummary, String[] refSummaries) {
HashSet hashGrams;
SimpleElementList keyList;
ArrayList testList, referenceList;
int match, reference, test, j;
keyList=new SimpleElementList();
testList=index(tokenize(testSummary),keyList);
test=countSkipBigram(testList.size(),maxSkip)+testList.size();
evaStat = new double[refSummaries.length][3];
for (j = 0; j < refSummaries.length; j++) {
referenceList=index(tokenize(refSummaries[j]),keyList);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -