⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 semanticranksummarizer.java

📁 dragontoolkit用于机器学习
💻 JAVA
字号:
package dragon.ir.summarize;

import dragon.ir.clustering.docdistance.*;
import dragon.ir.index.*;
import dragon.ir.index.sentence.*;
import dragon.ir.kngbase.*;
import dragon.matrix.DoubleSparseMatrix;
import dragon.nlp.SimpleElementList;

/**
 * <p>Semantic based ranking summarizer which can supports differnt semantic similarity measure such as ontology based
 * and model based.</p>
 * <p></p>
 * <p>Copyright: Copyright (c) 2005</p>
 * <p>Company: IST, Drexel University</p>
 * @author Davis Zhou
 * @version 1.0
 */

public class SemanticRankSummarizer extends LexRankSummarizer{
    private OnlineSentenceIndexer phraseIndexer;
    private KnowledgeBase kngBase;
    private double transCoefficient, bkgCoefficient;
    private double maxKLDivDistance;

    public SemanticRankSummarizer(OnlineSentenceIndexer tokenIndexer,KnowledgeBase kngBase) {
        this(tokenIndexer,null,kngBase);
    }

    public SemanticRankSummarizer(OnlineSentenceIndexer tokenIndexer, OnlineSentenceIndexer phraseIndexer, KnowledgeBase kngBase) {
        super(tokenIndexer);
        this.phraseIndexer =phraseIndexer;
        if(phraseIndexer!=null)
            phraseIndexer.setMinSentenceLength(tokenIndexer.getMinSentenceLength());
        this.kngBase =kngBase;
        this.transCoefficient =0.3;
        this.bkgCoefficient =0.4;
        this.maxKLDivDistance=10;
    }

    public void setTranslationCoefficient(double coefficient){
        this.transCoefficient =coefficient;
    }

    public void setBackgroundCoefficient(double coefficient){
        this.bkgCoefficient =coefficient;
    }

    public void setMaxKLDivDistance(double distance){
        this.maxKLDivDistance =distance;
    }

    private DoubleSparseMatrix computeTransDocTermMatrix(){
        DocRepresentation docRep;
        DoubleSparseMatrix transMatrix;
        OnlineSentenceIndexReader phraseIndexReader;
        int[] termMap, phraseMap;

        if(phraseIndexer!=null){
            collectionReader.restart();
            phraseIndexReader = new OnlineSentenceIndexReader(phraseIndexer, collectionReader);
            phraseIndexReader.initialize();
            phraseMap = getTermMap(phraseIndexReader, kngBase.getRowKeyList());
            termMap = getTermMap(indexReader, kngBase.getColumnKeyList());
            docRep = new DocRepresentation(indexReader, termMap);
            transMatrix = docRep.genModelMatrix(phraseIndexReader, phraseMap, kngBase.getKnowledgeMatrix(),
                                                transCoefficient, bkgCoefficient, true, 0.001);
            phraseIndexReader.close();
        }
        else{
            termMap = getTermMap(indexReader, kngBase.getColumnKeyList());
            docRep = new DocRepresentation(indexReader, termMap);
            transMatrix = docRep.genModelMatrix(indexReader, termMap, kngBase.getKnowledgeMatrix(),
                                                transCoefficient, bkgCoefficient, true, 0.001);
        }
        return transMatrix;
    }

    private int[] getTermMap(IndexReader reader, SimpleElementList newTermList){
        String curKey;
        int[] termMap;
        int i, newIndex, newTermNum;

        termMap=new int[reader.getCollection().getTermNum()];
        newTermNum=newTermList.size();
        for(i=0;i<termMap.length;i++){
            curKey=reader.getTermKey(i);
            newIndex=newTermList.search(curKey);
            if(newIndex>=0)
                termMap[i]=newIndex;
            else
                termMap[i]=newTermNum++;
        }
        return termMap;
    }

    protected double computeSimilarity(IRDoc firstSent, IRDoc secondSent){
        if(distanceMetric==null){
            distanceMetric=new KLDivDocDistance(computeTransDocTermMatrix(),maxKLDivDistance);
        }
        return Math.max(1-distanceMetric.getDistance(firstSent,secondSent),1-distanceMetric.getDistance(secondSent,firstSent));
    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -