📄 catdist.java
字号:
package shared;
import java.lang.*;
import java.util.*;
/** The CatDist class is for representing a distribution of categories. A
* CatDist object is produced by a categorizer during the scoring process.
* A loss function may optionally be applied to the CatDist. <P>
* It is assumed the distribution is normalized. This is done automatically
* on construction. The internal array dist should be indexed by category
* number, starting with UNKNOWN_CATEGORY_VAL.
*
* @author James Louis 2/25/2001 Ported to Java.
* @author Dan Sommerfield 2/10/97 Initial revision.
*/
public class CatDist {
//CorrectionType ENUM
/** None Correction Type value.**/
static public final int none = 0;
/** Laplace Correction Type value.**/
static public final int laplace = 1;
/** Evidence Correction Type value.**/
static public final int evidence = 2;
//END CorrectionType ENUM
/** The Schema for the data for which distribution is to be
calculated. **/
private Schema schema;
/** The distribution of categories **/
private double[] dist;
/** The order for used in the event that two categories have the same
distribution. **/
private int[] tiebreakingOrder;
/** The options for logging displays. **/
public static LogOptions logOptions = new LogOptions();
/** Constructor. It builds a distribution based on a single category, with a
* 1.0 probability given to this category, and 0.0 to all others.
* @param aSchema The Schema for the data in this distribution.
* @param aug The AugCategory with information on the category on which
* this distribution is built.
*/
public CatDist(Schema aSchema, AugCategory aug) {
schema = aSchema;
dist = new double[aSchema.num_label_values() +1];
tiebreakingOrder = new int[aSchema.num_label_values() + 1];
MLJArray.init_values(-1,tiebreakingOrder);
set_scores(aug.num());
}
/** Constructor. It builds an all-or-nothing distribution based on a single
* category, with a 1.0 probability given to this category, and 0.0 to all
* others.
* @param aSchema The Schema for the data in this distribution.
* @param singleCat The specific category on which this distribution is built.
*/
public CatDist(Schema aSchema, int singleCat) {
schema = aSchema;
dist = new double[aSchema.num_label_values() + 1];
tiebreakingOrder = new int[aSchema.num_label_values() + 1];
MLJArray.init_values(-1,tiebreakingOrder);
set_scores(singleCat);
}
/** Constructor.
* @param aSchema The Schema for the data in this distribution.
* @param fCounts The frequency count of categories found as labels.
* @param cType Type of correction to perform. Range is CatDist.none,
* CatDist.laplace, CatDist.evidence.
*/
public CatDist(Schema aSchema, double[] fCounts, int cType) {
schema = aSchema;
dist = new double[aSchema.num_label_values() + 1];
tiebreakingOrder =new int[aSchema.num_label_values() + 1];
MLJArray.init_values(-1,tiebreakingOrder);
set_preferred_category(0);
set_scores(fCounts, cType, 1.0);
set_default_tiebreaking();
}
/** Constructor.
* @param aSchema The Schema for the data in this distribution.
* @param fCounts The frequency count of categories found as labels.
* @param cType Type of correction to perform. Range is CatDist.none,
* CatDist.laplace, CatDist.evidence.
* @param cParam Correction parameter. Must be equal to or greater than 0.
*/
public CatDist(Schema aSchema, double[] fCounts, int cType, double cParam) {
schema = aSchema;
dist = new double[aSchema.num_label_values() + 1];
tiebreakingOrder =new int[aSchema.num_label_values() + 1];
MLJArray.init_values(-1,tiebreakingOrder);
set_preferred_category(0);
set_scores(fCounts, cType, cParam);
set_default_tiebreaking();
}
/** Constructor.
* @param aSchema The Schema for the data in this distribution.
* @param unknownProb The desired probability weight for the unknown
* category.
* @param aDist A weight distribution for this CatDist object.
*/
public CatDist(Schema aSchema, DoubleRef unknownProb,
double[] aDist) {
schema = aSchema;
dist = new double[aSchema.num_label_values() + 1];
tiebreakingOrder =new int[aSchema.num_label_values() + 1];
MLJArray.init_values(-1,tiebreakingOrder);
set_preferred_category(0);
set_scores(unknownProb, aDist);
set_default_tiebreaking();
}
/** Copy constructor.
* @param cDist The CatDist object to be copied.
*/
public CatDist(CatDist cDist) {
schema = cDist.schema;
dist =(double[]) cDist.dist.clone();
tiebreakingOrder =(int[]) cDist.tiebreakingOrder.clone();
}
/** Converts the distribution scores to a String.
* @return A String containing information about the scores.
*/
private String scoresToString() {
int i;
String rtrn = new String();
for(i = 0 ; i < dist.length-1 ; i++)
rtrn = rtrn +(int) dist[i]+", ";
rtrn = rtrn +(int) dist[i];
return rtrn;
}
/** Merges the tie breaking order with the given weight distribution.
* @return The tie breaking order.
* @param weightDistribution The given weight distribution of categories.
*/
public static int[] merge_tiebreaking_order(double[] weightDistribution) {
double[] dist =(double[]) weightDistribution.clone();
// if (Globals.DBG)
// MLJ.ASSERT(dist.min() >= 0 || MLJ.approx_equal(dist.min(), 0.0),
// "CatDist::merge_tiebreaking_order: Minimum distribution < 0.");
int[] order = new int[dist.length];
MLJArray.init_values(Integer.MAX_VALUE,order);
if (dist[0] == Globals.UNKNOWN_CATEGORY_VAL &&
MLJ.approx_equal(dist[0], 0.0))
dist[0] = -1;
int nextIndex = 0;
for(int i = 0 ; i < order.length ; i++) {
IntRef highestIndex = new IntRef(0);
MLJArray.max(highestIndex,dist);
// if (Globals.DBG)
// MLJ.ASSERT(order[highestIndex.value] == Globals.INT_MAX,
// "CatDist::merge_tiebreaking_order: order[highestIndex]"
// + " != Globals.INT_MAX.");
order[highestIndex.value] = nextIndex++;
dist[highestIndex.value] = -1;
}
MLJ.ASSERT(nextIndex == order.length, "CatDist::merge_tiebreaking_order: nextIndex == order.length");
return order;
}
/** Finds the majority category in the given weight distribution, using the
* given tie breaking order.
* @return The category which appears the most among the labelled instances.
* @param weightDistribution The weight sums for each category found.
* @param tieBreakingOrder The order of choices in the event that a tie
* occurs between categories.
*/
public static int majority_category(double[] weightDistribution, int[] tieBreakingOrder) {
IntRef bestIndex = new IntRef(0);
double highestWeight = MLJArray.max(bestIndex,weightDistribution);
int lastIndex = bestIndex.value;
while((lastIndex = MLJArray.find(highestWeight, lastIndex + 1,weightDistribution)) != -1)
if (tieBreakingOrder[lastIndex] <
tieBreakingOrder[bestIndex.value])
bestIndex.value = lastIndex;
return bestIndex.value + Globals.UNKNOWN_CATEGORY_VAL;
}
/** Merges a given tie breaking order with the given weight distribution.
* @return The tie breaking order.
* @param tieBreakingOrder The order for choices in the event that a tie
* occurs between categories.
* @param weightDistribution The given weight distribution of categories.
*/
static public int[] merge_tiebreaking_order(int[] tieBreakingOrder,
double[] weightDistribution) {
double[] dist =(double[]) weightDistribution.clone();
int[] order = new int[dist.length];
MLJArray.init_values(Integer.MAX_VALUE, order);
IntRef bestIndex = new IntRef(0);
int lastIndex;
double highestWeight;
int ordering = 0;
for(int i = 0 ; i < order.length ; i++) {
highestWeight = MLJArray.max(bestIndex, dist);
lastIndex = bestIndex.value;
while((lastIndex = MLJArray.find(highestWeight, lastIndex + 1, dist)) != -1)
if (tieBreakingOrder[lastIndex] < tieBreakingOrder[bestIndex.value])
bestIndex.value = lastIndex;
// if (Globals.DBG)
// MLJ.ASSERT(order[bestIndex.value] == Globals.INT_MAX,"CatDist::"
// +"merge_tiebreaking_order: order[bestIndex] != "
// +"Globals.INT_MAX.");
order[bestIndex.value] = ordering++;
dist[bestIndex.value] = -1;
}
MLJ.ASSERT(ordering == order.length, "CatDist::merge_tiebreaking_order: ordering == order.length");
return order;
}
/** Returns the Schema stored in this CatDist object.
* @return The Schema for data on which this CatDist object contains
* information.
*/
public Schema get_schema() {
return schema;
}
/** Allows the results stored in and returned by a CatDist to be changed.
* This method takes a single category index and builds an all-or-nothing
* distribution around it. 1.0 probability mass is given to the single category
* and 0.0 is given to all others.
* @param singleCat The index for the category that should have a 1.0
* probability mass.
*/
public void set_scores(int singleCat) {
for(int i = 0 ; i<dist.length ; i++)
dist[i] = 0.0;
dist[singleCat] = 1.0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -