okapismoother.java

来自「dragontoolkit用于机器学习」· Java 代码 · 共 72 行

JAVA
72
字号
package dragon.ir.search.smooth;import dragon.ir.index.*;import dragon.ir.query.SimpleTermPredicate;/** * <p>Okapi Smoother</p> * <p></p> * <p>Copyright: Copyright (c) 2005</p> * <p>Company: IST, Drexel University</p> * @author Davis Zhou * @version 1.0 */public class OkapiSmoother extends AbstractSmoother{    private int docNum;    private double curTermIDF, curDocLengthRatio;    private double avgDocLength;    private double bm25k1, bm25b;    private double param1, param2;    public OkapiSmoother(IRCollection collection) {        docNum=collection.getDocNum();        avgDocLength=collection.getTermCount()*1.0/docNum;        this.bm25b=0.75;        this.bm25k1 =2;        this.useLog=false;        this.docFirstOptimal=true;        this.querytermFirstOptimal=true;        param1=bm25k1*(1-bm25b);        param2=bm25k1*bm25b;    }    public OkapiSmoother(IRCollection collection, double bm25k1, double bm25b) {        docNum=collection.getDocNum();        avgDocLength=collection.getTermCount()*1.0/docNum;        this.bm25b=bm25b;        this.bm25k1 =bm25k1;        this.useLog=false;        this.docFirstOptimal=true;        this.querytermFirstOptimal=true;        param1=bm25k1*(1-bm25b);        param2=bm25k1*bm25b;    }    public boolean setParameters(double[] params){        if(params!=null && params.length>=2)        {            this.bm25k1 =params[0];            this.bm25b =params[1];            param1=bm25k1*(1-bm25b);            param2=bm25k1*bm25b;            return true;        }        else            return false;    }    public void setQueryTerm(SimpleTermPredicate queryTerm){        this.queryWeight =queryTerm.getWeight();        curTermIDF=Math.log((docNum-queryTerm.getDocFrequency()+0.5)/(queryTerm.getDocFrequency()+0.5));    }    public void setDoc(IRDoc doc){        curDocLengthRatio=param2*doc.getTermCount()/avgDocLength;    }    protected double computeSmoothedProb(int termFrequency){        //we did not use bm25k3 in this implementation, ie., (s3*qtf/(k3+qtf) is implemented as qtf own.        //query weight is equivalent to query term frequency (qtf);        return queryWeight*termFrequency*curTermIDF/(param1+curDocLengthRatio+termFrequency);    }}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?