📄 catdist.java

📁 java数据挖掘算法
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
package shared;
import java.lang.*;
import java.util.*;

/** The CatDist class is for representing a distribution of categories. A
 * CatDist object is produced by a categorizer during the scoring process.
 * A loss function may optionally be applied to the CatDist.                <P>
 * It is assumed the distribution is normalized. This is done automatically
 * on construction. The internal array dist should be indexed by category
 * number, starting with UNKNOWN_CATEGORY_VAL.
 *
 * @author James Louis	2/25/2001	Ported to Java.
 * @author Dan Sommerfield	2/10/97	Initial revision.
 */
public class CatDist {
    //CorrectionType ENUM
    /** None Correction Type value.**/
    static public final int none = 0;
    /** Laplace Correction Type value.**/
    static public final int laplace = 1;
    /** Evidence Correction Type value.**/
    static public final int evidence = 2;
    //END CorrectionType ENUM
    
    /** The Schema for the data for which distribution is to be
        calculated. **/
    private Schema schema;
    
    /** The distribution of categories **/
    private double[] dist;
    
    /** The order for used in the event that two categories have the same
        distribution. **/
    private int[] tiebreakingOrder;
    
    /** The options for logging displays. **/
    public static LogOptions logOptions = new LogOptions();
    
    /** Constructor. It builds a distribution based on a single category, with a
     * 1.0 probability given to this category, and 0.0 to all others.
     * @param aSchema	The Schema for the data in this distribution.
     * @param aug		The AugCategory with information on the category on which
     * this distribution is built.
     */
    public CatDist(Schema aSchema, AugCategory aug) {
        schema = aSchema;
        dist = new double[aSchema.num_label_values() +1];
        tiebreakingOrder = new int[aSchema.num_label_values() + 1];
        MLJArray.init_values(-1,tiebreakingOrder);
        set_scores(aug.num());
    }
    
    /** Constructor. It builds an all-or-nothing distribution based on a single
     * category, with a 1.0 probability given to this category, and 0.0 to all
     * others.
     * @param aSchema	The Schema for the data in this distribution.
     * @param singleCat	The specific category on which this distribution is built.
     */
    public CatDist(Schema aSchema, int singleCat) {
        schema = aSchema;
        dist = new double[aSchema.num_label_values() + 1];
        tiebreakingOrder = new int[aSchema.num_label_values() + 1];
        MLJArray.init_values(-1,tiebreakingOrder);
        set_scores(singleCat);
    }
    
    /** Constructor.
     * @param aSchema	The Schema for the data in this distribution.
     * @param fCounts	The frequency count of categories found as labels.
     * @param cType	Type of correction to perform. Range is CatDist.none,
     * CatDist.laplace, CatDist.evidence.
     */
    public CatDist(Schema aSchema, double[] fCounts, int cType) {
        schema = aSchema;
        dist = new double[aSchema.num_label_values() + 1];
        tiebreakingOrder =new int[aSchema.num_label_values() + 1];
        MLJArray.init_values(-1,tiebreakingOrder);
        
        set_preferred_category(0);
        set_scores(fCounts, cType, 1.0);
        set_default_tiebreaking();
    }
    
    /** Constructor.
     * @param aSchema	The Schema for the data in this distribution.
     * @param fCounts	The frequency count of categories found as labels.
     * @param cType	Type of correction to perform. Range is CatDist.none,
     * CatDist.laplace, CatDist.evidence.
     * @param cParam	Correction parameter. Must be equal to or greater than 0.
     */
    public CatDist(Schema aSchema, double[] fCounts, int cType, double cParam) {
        schema = aSchema;
        dist = new double[aSchema.num_label_values() + 1];
        tiebreakingOrder =new int[aSchema.num_label_values() + 1];
        MLJArray.init_values(-1,tiebreakingOrder);
        
        set_preferred_category(0);
        set_scores(fCounts, cType, cParam);
        set_default_tiebreaking();
    }
    
    /** Constructor.
     * @param aSchema	The Schema for the data in this distribution.
     * @param unknownProb	The desired probability weight for the unknown
     * category.
     * @param aDist	A weight distribution for this CatDist object.
     */
    public CatDist(Schema aSchema, DoubleRef unknownProb,
    double[] aDist) {
        schema = aSchema;
        dist = new double[aSchema.num_label_values() + 1];
        tiebreakingOrder =new int[aSchema.num_label_values() + 1];
        MLJArray.init_values(-1,tiebreakingOrder);
        
        set_preferred_category(0);
        set_scores(unknownProb, aDist);
        set_default_tiebreaking();
    }
    
    /** Copy constructor.
     * @param cDist	The CatDist object to be copied.
     */
    public CatDist(CatDist cDist) {
        schema = cDist.schema;
        dist =(double[]) cDist.dist.clone();
        tiebreakingOrder =(int[]) cDist.tiebreakingOrder.clone();
    }
    
    
    /** Converts the distribution scores to a String.
     * @return A String containing information about the scores.
     */
    private String scoresToString() {
        int i;
        String rtrn = new String();
        for(i = 0 ; i < dist.length-1 ; i++)
            rtrn = rtrn +(int) dist[i]+", ";
        rtrn = rtrn +(int) dist[i];
        return rtrn;
    }
    
    /** Merges the tie breaking order with the given weight distribution.
     * @return The tie breaking order.
     * @param weightDistribution	The given weight distribution of categories.
     */
    public static int[] merge_tiebreaking_order(double[] weightDistribution) {
        double[] dist =(double[]) weightDistribution.clone();
        //      if (Globals.DBG)
        //         MLJ.ASSERT(dist.min() >= 0 || MLJ.approx_equal(dist.min(), 0.0),
        //               "CatDist::merge_tiebreaking_order: Minimum distribution < 0.");
        int[] order = new int[dist.length];
        MLJArray.init_values(Integer.MAX_VALUE,order);
        
        if (dist[0] == Globals.UNKNOWN_CATEGORY_VAL &&
        MLJ.approx_equal(dist[0], 0.0))
            dist[0] = -1;
        int nextIndex = 0;
        
        for(int i = 0 ; i < order.length ; i++) {
            IntRef highestIndex = new IntRef(0);
            MLJArray.max(highestIndex,dist);
            //         if (Globals.DBG)
            //            MLJ.ASSERT(order[highestIndex.value] == Globals.INT_MAX,
            //                  "CatDist::merge_tiebreaking_order: order[highestIndex]"
            //                  + " != Globals.INT_MAX.");
            order[highestIndex.value] = nextIndex++;
            dist[highestIndex.value] = -1;
        }
        MLJ.ASSERT(nextIndex == order.length, "CatDist::merge_tiebreaking_order: nextIndex == order.length");
        return order;
    }
    
    /** Finds the majority category in the given weight distribution, using the
     * given tie breaking order.
     * @return The category which appears the most among the labelled instances.
     * @param weightDistribution	The weight sums for each category found.
     * @param tieBreakingOrder		The order of choices in the event that a tie
     * occurs between categories.
     */
    public static int majority_category(double[] weightDistribution, int[] tieBreakingOrder) {
        IntRef bestIndex = new IntRef(0);
        double highestWeight = MLJArray.max(bestIndex,weightDistribution);
        int lastIndex = bestIndex.value;
        
        while((lastIndex = MLJArray.find(highestWeight, lastIndex + 1,weightDistribution)) != -1)
            if (tieBreakingOrder[lastIndex] <
            tieBreakingOrder[bestIndex.value])
                bestIndex.value = lastIndex;
        return bestIndex.value + Globals.UNKNOWN_CATEGORY_VAL;
    }
    
    /** Merges a given tie breaking order with the given weight distribution.
     * @return The tie breaking order.
     * @param tieBreakingOrder		The order for choices in the event that a tie
     * occurs between categories.
     * @param weightDistribution	The given weight distribution of categories.
     */
    static public int[] merge_tiebreaking_order(int[] tieBreakingOrder,
    double[] weightDistribution) {
        double[] dist =(double[]) weightDistribution.clone();
        int[] order = new int[dist.length];
        MLJArray.init_values(Integer.MAX_VALUE, order);
        
        IntRef bestIndex = new IntRef(0);
        int lastIndex;
        double highestWeight;
        
        int ordering = 0;
        
        for(int i = 0 ; i < order.length ; i++) {
            highestWeight = MLJArray.max(bestIndex, dist);
            lastIndex = bestIndex.value;
            while((lastIndex = MLJArray.find(highestWeight, lastIndex + 1, dist)) != -1)
                if (tieBreakingOrder[lastIndex] < tieBreakingOrder[bestIndex.value])
                    bestIndex.value = lastIndex;
            //         if (Globals.DBG)
            //            MLJ.ASSERT(order[bestIndex.value] == Globals.INT_MAX,"CatDist::"
            //                  +"merge_tiebreaking_order: order[bestIndex] != "
            //                  +"Globals.INT_MAX.");
            order[bestIndex.value] = ordering++;
            dist[bestIndex.value] = -1;
        }
        MLJ.ASSERT(ordering == order.length, "CatDist::merge_tiebreaking_order: ordering == order.length");
        
        return order;
    }
    
    /** Returns the Schema stored in this CatDist object.
     * @return The Schema for data on which this CatDist object contains
     * information.
     */
    public Schema get_schema() {
        return schema;
    }
    
    /** Allows the results stored in and returned by a CatDist to be changed.
     * This method takes a single category index and builds an all-or-nothing
     * distribution around it. 1.0 probability mass is given to the single category
     * and 0.0 is given to all others.
     * @param singleCat	The index for the category that should have a 1.0
     * probability mass.
     */
    public void set_scores(int singleCat) {
        for(int i = 0 ; i<dist.length ; i++)
            dist[i] = 0.0;
        dist[singleCat] = 1.0;
12 3 下一页
💿 文件大小 441 K
👤 上传用户 l2335800
📂 所属分类人工智能/神经网络
🏷️ 相关标签

#java #数据挖掘算法
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -