📄 abstractsentencesum.java
字号:
package dragon.ir.summarize;import dragon.ir.clustering.*;import dragon.ir.index.*;import dragon.matrix.vector.DoubleVector;import dragon.nlp.compare.*;import dragon.onlinedb.Article;import dragon.util.SortedArray;import java.util.*;/** * <p>The class implement basic function of building summary given a sentence set. </p> * <p></p> * <p>Copyright: Copyright (c) 2005</p> * <p>Company: IST, Drexel University</p> * @author Davis Zhou * @version 1.0 */public abstract class AbstractSentenceSum { protected String buildSummary(IndexReader indexReader, ArrayList sentSet, int summaryLength, DoubleVector weightVector){ ArrayList list; IRDoc curDoc; Article article; TopicSummary summary; StringBuffer autoSum; String curSentence; int i, curLength; curLength=0; summary=new TopicSummary(TextUnit.UNIT_SENTENCE); list=new ArrayList(sentSet.size()); for(i=0;i<sentSet.size();i++){ curDoc=(IRDoc)sentSet.get(i); curDoc.setWeight(weightVector.get(i)); list.add(curDoc); } Collections.sort(list,new WeightComparator(true)); for(i=0;i<list.size() && curLength<summaryLength; i++){ curDoc=(IRDoc)list.get(i); article=indexReader.getOriginalDoc(curDoc.getIndex()); if(article==null || (curSentence=article.getTitle())==null) continue; if(summary.contains(new TextUnit(curSentence))) continue; if(curLength<summaryLength){ summary.addText(new TextUnit(curSentence,curDoc.getIndex(),curDoc.getWeight())); curLength+=curSentence.length(); } } summary.sortByWegiht(); if(summary.size()==0) return null; autoSum = new StringBuffer(summary.getTextUnit(0).getText()); for (i = 1; i < summary.size(); i++) { autoSum.append("\n"); autoSum.append(summary.getTextUnit(i).getText()); } if(autoSum.length()<=summaryLength) return autoSum.toString(); else return autoSum.substring(0,summaryLength); } protected String buildSummary(IndexReader indexReader, ArrayList sentSet, int summaryLength, DoubleVector weightVector, DocClusterSet clusters){ SortedArray list; IRDoc curDoc; Article article; TopicSummary summary; DocCluster curCluster; StringBuffer autoSum; String curSentence; boolean[] usedDoc, usedCluster; int i, j, pos,curLength; list=new SortedArray(sentSet.size(), new IndexComparator()); for(i=0;i<sentSet.size();i++){ curDoc=(IRDoc)sentSet.get(i); curDoc.setWeight(weightVector.get(i)); list.add(curDoc); } for(i=0;i<clusters.getClusterNum();i++){ curCluster=clusters.getDocCluster(i); for(j=0;j<curCluster.getDocNum();j++){ curDoc=curCluster.getDoc(j); pos=list.binarySearch(curDoc); if(pos<0) continue; curDoc=(IRDoc)list.get(pos); curDoc.setCategory(i); } } list.setComparator(new WeightComparator(true)); usedDoc=new boolean[list.size()]; usedCluster=new boolean[clusters.getClusterNum()]; curLength=0; summary=new TopicSummary(TextUnit.UNIT_SENTENCE); //extract one sentence with highest score from each cluster for(i=0;i<list.size() && curLength<summaryLength; i++){ curDoc=(IRDoc)list.get(i); if(usedCluster[curDoc.getCategory()]) continue; article=indexReader.getOriginalDoc(curDoc.getIndex()); if(article==null || (curSentence=article.getTitle())==null) continue; if(summary.contains(new TextUnit(curSentence))) continue; if(curLength<summaryLength){ summary.addText(new TextUnit(curSentence,curDoc.getIndex(),curDoc.getWeight())); curLength+=curSentence.length(); usedCluster[curDoc.getCategory()]=true; usedDoc[i]=true; } } //extract remaining sentences for(i=0;i<list.size() && curLength<summaryLength; i++){ if(usedDoc[i]) continue; curDoc=(IRDoc)list.get(i); article=indexReader.getOriginalDoc(curDoc.getIndex()); if(article==null || (curSentence=article.getTitle())==null) continue; if(summary.contains(new TextUnit(curSentence))) continue; if(curLength<summaryLength){ summary.addText(new TextUnit(curSentence,curDoc.getIndex(),curDoc.getWeight())); curLength+=curSentence.length(); usedDoc[i]=true; } } summary.sortByWegiht(); if(summary.size()==0) return null; autoSum = new StringBuffer(summary.getTextUnit(0).getText()); for (i = 1; i < summary.size(); i++) { autoSum.append("\n"); autoSum.append(summary.getTextUnit(i).getText()); } if(autoSum.length()<=summaryLength) return autoSum.toString(); else return autoSum.substring(0,summaryLength); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -