fullranksearcher.java

来自「dragontoolkit用于机器学习」· Java 代码 · 共 132 行

JAVA

132 行

package dragon.ir.search;

import dragon.ir.index.*;
import dragon.ir.query.*;
import dragon.ir.search.smooth.Smoother;
import dragon.nlp.compare.WeightComparator;
import java.util.*;

/**
 * <p>Full rank searcher </p>
 * <p> </p>
 * <p>Copyright: Copyright (c) 2005</p>
 * <p>Company: IST, Drexel University</p>
 * @author Davis Zhou
 * @version 1.0
 */

public class FullRankSearcher extends AbstractSearcher{
    private boolean docFirst;

    public FullRankSearcher(IndexReader indexReader, Smoother smoother) {
        super(indexReader,smoother);
        smoother.setLogLikelihoodOption(true);
        if(smoother.isQueryTermFirstOptimal())
            this.docFirst =false;
        else
            this.docFirst =true;
    }

    public FullRankSearcher(IndexReader indexReader, Smoother smoother, boolean docFirst) {
        super(indexReader,smoother);
        smoother.setLogLikelihoodOption(true);
        this.docFirst =docFirst;
    }

    public int search(IRQuery query){
        this.query =query;
        if(docFirst)
            return breadthFirstSearch(query);
        else
            return depthFirstSearch(query);
    }

    public int breadthFirstSearch(IRQuery query){
        SimpleTermPredicate[] arrPredicate;
        IRTerm curTerm;
        IRDoc curDoc;
        int docNum,i,j;
        double weight;

        if(!query.isRelSimpleQuery()){
            hitlist = null;
            return 0;
        }

        docNum=indexReader.getCollection().getDocNum();
        hitlist=new ArrayList(docNum);
        arrPredicate=checkSimpleTermQuery((RelSimpleQuery)query);
        if(arrPredicate==null || arrPredicate.length==0){
            hitlist=null;
            return 0;
        }

        for (i = 0; i < docNum; i++) {
            curDoc=indexReader.getDoc(i);
            smoother.setDoc(curDoc);
            weight=0;
            for(j=0;j<arrPredicate.length;j++){
                curTerm=indexReader.getIRTerm(arrPredicate[j].getIndex(),i);
                if(curTerm!=null)
                    weight += smoother.getSmoothedProb(arrPredicate[j],curTerm.getFrequency());
                else
                    weight += smoother.getSmoothedProb(arrPredicate[j]);
            }
            curDoc.setWeight(weight);
            hitlist.add(curDoc);
        }
        Collections.sort(hitlist, new WeightComparator(true));
        return hitlist.size();
    }

    public int depthFirstSearch(IRQuery query){
        SimpleTermPredicate[] arrPredicate;
        IRDoc arrDoc[];
        int[] arrIndex, arrFreq;
        int docNum, i,j,k;

        if(!query.isRelSimpleQuery()){
            hitlist = null;
            return 0;
        }

        docNum=indexReader.getCollection().getDocNum();
        hitlist=new ArrayList(docNum);
        arrPredicate=checkSimpleTermQuery((RelSimpleQuery)query);
        if(arrPredicate==null || arrPredicate.length==0){
            hitlist=null;
            return 0;
        }

        arrDoc=new IRDoc[docNum];
        for (i = 0; i < docNum; i++){
            arrDoc[i]=indexReader.getDoc(i);
            arrDoc[i].setWeight(0);
        }

        for (i = 0; i < arrPredicate.length; i++) {
            smoother.setQueryTerm(arrPredicate[i]);

            arrIndex=indexReader.getTermDocIndexList(arrPredicate[i].getIndex());
            arrFreq=indexReader.getTermDocFrequencyList(arrPredicate[i].getIndex());
            k=0;
            for (j = 0; j <arrIndex.length; j++){
                while(k<arrIndex[j]){
                    arrDoc[k].setWeight(arrDoc[k].getWeight() + smoother.getSmoothedProb(arrDoc[k]));
                    k++;
                }
                arrDoc[k].setWeight(arrDoc[k].getWeight() + smoother.getSmoothedProb(arrDoc[k],arrFreq[j]));
                k++;
            }
            while(k<docNum){
                arrDoc[k].setWeight(arrDoc[k].getWeight() + smoother.getSmoothedProb(arrDoc[k]));
                k++;
            }
        }

        for (i = 0; i < docNum; i++)  hitlist.add(arrDoc[i]);
        Collections.sort(hitlist, new WeightComparator(true));
        return hitlist.size();
    }

}

fullranksearcher.java - 源码说明

本页面展示了「dragontoolkit用于机器学习」中的 fullranksearcher.java 源码文件，采用 Java 编程语言编写，共 132 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。

虫虫下载站收录了大量与dragontoolkit相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。

⌨️ 快捷键说明

复制代码Ctrl + C

搜索代码Ctrl + F

全屏模式F11

增大字号Ctrl + =

减小字号Ctrl + -

显示快捷键?