⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 rouge.java

📁 dragontoolkit用于机器学习
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
package dragon.ir.summarize;

import dragon.nlp.*;
import dragon.nlp.compare.*;
import dragon.nlp.extract.*;
import dragon.nlp.tool.*;
import dragon.util.*;
import java.util.*;
import java.util.ArrayList;

/**
 * <p>A Program for Summarizaiton Evaluation</p>
 * <p>We only implemented and tested the ROUGE-N metric so far</p>
 * <p>Copyright: Copyright (c) 2006</p>
 * <p>Company: Drexel University</p>
 * @author Xiaodan Zhang, Davis Zhou
 * @version 1.0
 */

public class ROUGE {
    public static final int ROUGE_N=1;
    public static final int ROUGE_L=2;
    public static final int ROUGE_W=3;
    public static final int ROUGE_S=4;
    public static final int ROUGE_SU=5;
    public static final int MULTIPLE_MAX=1;
    public static final int MULTIPLE_MIN=2;
    public static final int MULTIPLE_AVG=3;
    private static final String stopwordFile="nlpdata/rouge/rouge.stopword";
    //private static final String wordDelimitor=" \r\n\t_-;,?/\"'`:(){}!+[]><=%$#*@&^~|\\";

    private TokenExtractor tokenExtractor;
    private double beta; //control the f-score
    private double[][] evaStat; //store evaluation result
    private int metric; //ROUGE metrics
    private int multipleMode;
    private int gram; //used for ROUGE-N metric
    private int maxSkip; //used for ROUGE-S metric
    private boolean caseSensitive;

    public ROUGE() {
        caseSensitive=false;
        tokenExtractor=new BasicTokenExtractor(null);
        beta=1.0;
        metric=ROUGE_N;
        gram=2;
    }

    public void setBeta(double beta){
        if(beta>0)
            this.beta =beta;
    }

    public double getBeta(){
        return beta;
    }

    public void setLemmatiser(Lemmatiser lemmatiser){
        tokenExtractor.setLemmatiser(lemmatiser);
    }

    public Lemmatiser getLemmatiser(){
        return tokenExtractor.getLemmatiser();
    }

    public void setLemmatiserOption(boolean option){
        if(option)
            tokenExtractor.setLemmatiser(new PorterStemmer());
        else
            tokenExtractor.setLemmatiser(null);
    }

    public boolean getLemmatiserOption(){
        return tokenExtractor.getLemmatiser()!=null;
    }

    public void setMultipleReferenceMode(int mode){
        this.multipleMode =mode;
    }

    public void setStopwordOption(boolean option){
        if (option)
            tokenExtractor.setConceptFilter(new BasicConceptFilter(EnvVariable.getDragonHome()+"/"+stopwordFile));
        else
            tokenExtractor.setFilteringOption(false);
    }

    public boolean getStopwordOption(){
        return tokenExtractor.getFilteringOption();
    }

    public void setStopwordFile(String stopwordFile){
        tokenExtractor.setConceptFilter(new BasicConceptFilter(stopwordFile));
    }

    public void setCaseOption(boolean sensitive){
        caseSensitive=sensitive;
    }

    public boolean getCaseOption(){
        return caseSensitive;
    }

    public void useRougeN(int gram){
        this.gram=gram;
        metric=ROUGE_N;
    }

    public int getGram(){
        return gram;
    }

    public void useRougeS(){
        this.maxSkip =Integer.MAX_VALUE;
        metric=ROUGE_S;
    }

    public void useRougeS(int maxSkip){
        this.maxSkip =maxSkip;
        metric=ROUGE_S;
    }

    public double getPrecision(){
        return getEvaResult(1);
    }

    public double getRecall(){
        return getEvaResult(0);
    }

    public double getFScore(){
        return getEvaResult(2);
    }

    private double getEvaResult(int dimension){
        double[] results;
        int i;

        results=new double[evaStat.length];
        for(i=0;i<results.length;i++)
            results[i]=evaStat[i][dimension];
        if(multipleMode==MULTIPLE_MAX)
            return MathUtil.max(results);
        else if(multipleMode==MULTIPLE_AVG)
            return MathUtil.average(results);
        else if(multipleMode==MULTIPLE_MIN)
            return MathUtil.min(results);
        else
            return -1;
    }

    public synchronized boolean evaluate(String testSummary, String[] refSummaries){
        boolean ret;

        ret=true;
        if(metric==ROUGE_N)
            computeRougeN(testSummary,refSummaries);
        else if(metric==ROUGE_S)
            computeRougeS(testSummary,refSummaries);
        else if(metric==ROUGE_L)
            computeRougeL(testSummary,refSummaries);
        else if(metric==ROUGE_SU)
            computeRougeSU(testSummary,refSummaries);
        else
            ret=false;
        return ret;
    }

    public void printResult() {
        int j, k;
        for (k = 0; k < 50; k++) {
            System.out.print("-");
        }
        System.out.println();
        for (j = 0; j < evaStat.length; j++) {
            System.out.println("ReferenceModel: " + (j + 1));
            System.out.println("Average_R: " + evaStat[j][0]);
            System.out.println("Average_P: " + evaStat[j][1]);
            System.out.println("Average_F: " + evaStat[j][2]);
            System.out.println();
        }
        for (k = 0; k < 50; k++) {
            System.out.print("-");
        }
        System.out.println();
    }

    private void computeRougeN(String testSummary, String[] refSummaries) {
        HashMap testHash, refHash;
        ArrayList testList, referenceList;
        int match, reference, test, j;

        testList=tokenize(testSummary);
        evaStat = new double[refSummaries.length][3];
        testHash=computeNgrams(testList,gram);
        test=testList.size()-gram+1;

        for (j = 0; j < refSummaries.length; j++) {
            referenceList=tokenize(refSummaries[j]);
            refHash = computeNgrams(referenceList, gram);
            match = matchNgrams(testHash,refHash);
            reference=referenceList.size()-gram+1;

            if (reference<=0)
                evaStat[j][0] = 0;
            else
                evaStat[j][0] = (double) match / reference;

            if(test<=0)
                evaStat[j][1] =0;
            else
                evaStat[j][1] =match/(double)test;

            evaStat[j][2] =computeFScore(evaStat[j][1],evaStat[j][0]);
        }
    }

    private void computeRougeS(String testSummary, String[] refSummaries) {
        HashSet hashGrams;
        SimpleElementList keyList;
        ArrayList testList, referenceList;
        int match, reference, test, j;

        keyList=new SimpleElementList();
        testList=index(tokenize(testSummary),keyList);
        test=countSkipBigram(testList.size(),maxSkip);
        evaStat = new double[refSummaries.length][3];

        for (j = 0; j < refSummaries.length; j++) {
            referenceList=index(tokenize(refSummaries[j]),keyList);
            hashGrams = computeSkipBigram(referenceList, maxSkip);
            match = matchSkipBigram(testList,maxSkip, hashGrams);
            reference=countSkipBigram(testList.size(),maxSkip);

            if (reference<=0)
                evaStat[j][0] = 0;
            else
                evaStat[j][0] = (double) match / reference;

            if(test<=0)
                evaStat[j][1] =0;
            else
                evaStat[j][1] =match/(double)test;

            evaStat[j][2] =computeFScore(evaStat[j][1],evaStat[j][0]);
        }
    }

    private void computeRougeSU(String testSummary, String[] refSummaries) {
        HashSet hashGrams;
        SimpleElementList keyList;
        ArrayList testList, referenceList;
        int match, reference, test, j;

        keyList=new SimpleElementList();
        testList=index(tokenize(testSummary),keyList);
        test=countSkipBigram(testList.size(),maxSkip)+testList.size();
        evaStat = new double[refSummaries.length][3];

        for (j = 0; j < refSummaries.length; j++) {
            referenceList=index(tokenize(refSummaries[j]),keyList);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -