📄 kstarnumericattribute.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//** * KStarNumericAttribute.java * Copyright (c) 1995-97 by Len Trigg (trigg@cs.waikato.ac.nz). * Java port to Weka by Abdelaziz Mahoui (am14@cs.waikato.ac.nz). * */package weka.classifiers.kstar;import java.io.*;import java.util.*;import weka.core.*;import weka.classifiers.*;/** * A custom class which provides the environment for computing the * transformation probability of a specified test instance numeric * attribute to a specified train instance numeric attribute. * * @author Len Trigg (len@intelligenesis.net) * @author Abdelaziz Mahoui (am14@cs.waikato.ac.nz) * @version $Revision 1.0 $ */public class KStarNumericAttribute implements KStarConstants { /** The training instances used for classification. */ protected Instances m_TrainSet; /** The test instance */ protected Instance m_Test; /** The train instance */ protected Instance m_Train; /** The index of the attribute in the test and train instances */ protected int m_AttrIndex; /** The scale parameter */ protected double m_Scale = 1.0; /** Probability of test attribute transforming into train attribute with missing value */ protected double m_MissingProb = 1.0; /** Average probability of test attribute transforming into train attribute */ protected double m_AverageProb = 1.0; /** Smallest probability of test attribute transforming into train attribute */ protected double m_SmallestProb = 1.0; /** The set of disctances from the test attribute to the set of train attributes */ protected double [] m_Distances; /** Set of colomns: each colomn representing a randomised version of the train dataset class colomn */ protected int [][] m_RandClassCols; /** The number of train instances with no missing attribute values */ protected int m_ActualCount = 0; /** A cache for storing attribute values and their corresponding scale parameters */ protected KStarCache m_Cache; /** The number of instances in the dataset */ protected int m_NumInstances; /** The number of class values */ protected int m_NumClasses; /** The number of attributes */ protected int m_NumAttributes; /** The class attribute type */ protected int m_ClassType; /** missing value treatment */ protected int m_MissingMode = M_AVERAGE; /** 0 = use specified blend, 1 = entropic blend setting */ protected int m_BlendMethod = B_SPHERE ; /** default sphere of influence blend setting */ protected int m_BlendFactor = 20; /** * Constructor */ public KStarNumericAttribute(Instance test, Instance train, int attrIndex, Instances trainSet, int [][] randClassCols, KStarCache cache) { m_Test = test; m_Train = train; m_AttrIndex = attrIndex; m_TrainSet = trainSet; m_RandClassCols = randClassCols; m_Cache = cache; init(); } /** * Initializes the m_Attributes of the class. */ private void init() { try { m_NumInstances = m_TrainSet.numInstances(); m_NumClasses = m_TrainSet.numClasses(); m_NumAttributes = m_TrainSet.numAttributes(); m_ClassType = m_TrainSet.classAttribute().type(); } catch(Exception e) { e.printStackTrace(); } } /** * Calculates the transformation probability of the attribute indexed * "m_AttrIndex" in test instance "m_Test" to the same attribute in * the train instance "m_Train". * * @return the probability value */ public double transProb() { String debug = "(KStarNumericAttribute.transProb) "; double transProb, distance, scale; // check if the attribute value has been encountred before // in which case it should be in the numeric cache if ( m_Cache.containsKey(m_Test.value(m_AttrIndex))) { KStarCache.TableEntry te = m_Cache.getCacheValues( m_Test.value(m_AttrIndex) ); m_Scale = te.value; m_MissingProb = te.pmiss; } else { if (m_BlendMethod == B_ENTROPY) { m_Scale = scaleFactorUsingEntropy(); } else { // default is B_SPHERE m_Scale = scaleFactorUsingBlend(); } m_Cache.store( m_Test.value(m_AttrIndex), m_Scale, m_MissingProb ); } // now what??? if (m_Train.isMissing(m_AttrIndex)) { transProb = m_MissingProb; } else { distance = Math.abs( m_Test.value(m_AttrIndex) - m_Train.value(m_AttrIndex) ); transProb = PStar( distance, m_Scale ); } return transProb; } /** * Calculates the scale factor for the attribute indexed * "m_AttrIndex" in test instance "m_Test" using a global * blending factor (default value is 20%). * * @return the scale factor value */ private double scaleFactorUsingBlend() { String debug = "(KStarNumericAttribute.scaleFactorUsingBlend)"; int i, j, lowestcount = 0, count = 0; double lowest = -1.0, nextlowest = -1.0; double root, broot, up, bot; double aimfor, min_val = 9e300, scale = 1.0; double avgprob = 0.0, minprob = 0.0, min_pos = 0.0; KStarWrapper botvals = new KStarWrapper(); KStarWrapper upvals = new KStarWrapper(); KStarWrapper vals = new KStarWrapper(); m_Distances = new double [m_NumInstances]; for (j=0; j<m_NumInstances; j++) { if ( m_TrainSet.instance(j).isMissing(m_AttrIndex) ) { // mark the train instance with a missing value by setting // the distance to -1.0 m_Distances[j] = -1.0; } else { m_Distances[j] = Math.abs(m_TrainSet.instance(j).value(m_AttrIndex) - m_Test.value(m_AttrIndex)); if ( (m_Distances[j]+1e-5) < nextlowest || nextlowest == -1.0 ) { if ( (m_Distances[j]+1e-5) < lowest || lowest == -1.0 ) { nextlowest = lowest; lowest = m_Distances[j]; lowestcount = 1; } else if ( Math.abs(m_Distances[j]-lowest) < 1e-5 ) { // record the number training instances (number n0) at // the smallest distance from test instance lowestcount++; } else { nextlowest = m_Distances[j]; } } // records the actual number of instances with no missing value m_ActualCount++; } } if (nextlowest == -1 || lowest == -1) { // Data values are all the same scale = 1.0; m_SmallestProb = m_AverageProb = 1.0; return scale; } else { // starting point for root root = 1.0 / (nextlowest - lowest); i = 0; // given the expression: n0 <= E(scale) <= N // E(scale) = (N - n0) * b + n0 with blending factor: 0 <= b <= 1 // aimfor = (N - n0) * b + n0 aimfor = (m_ActualCount - lowestcount) * (double)m_BlendFactor / 100.0 + lowestcount; if (m_BlendFactor == 0) { aimfor += 1.0; } // root is bracketed in interval [bot,up] bot = 0.0 + ROOT_FINDER_ACCURACY / 2.0; up = root * 16; // This is bodgy // E(bot) calculateSphereSize(bot, botvals); botvals.sphere -= aimfor; // E(up) calculateSphereSize(up, upvals); upvals.sphere -= aimfor; if (botvals.sphere < 0) { // Couldn't include that many // instances - going for max possible min_pos = bot; avgprob = botvals.avgProb; minprob = botvals.minProb; } else if (upvals.sphere > 0) { // Couldn't include that few, // going for min possible min_pos = up; avgprob = upvals.avgProb; minprob = upvals.minProb; } else { // Root finding Algorithm starts here ! for (;;) { calculateSphereSize(root, vals); vals.sphere -= aimfor; if ( Math.abs(vals.sphere) < min_val ) { min_val = Math.abs(vals.sphere); min_pos = root; avgprob = vals.avgProb; minprob = vals.minProb; } if ( Math.abs(vals.sphere) <= ROOT_FINDER_ACCURACY ) { break; // converged to a solution, done! } if (vals.sphere > 0.0) { broot = (root + up) / 2.0; bot = root; root = broot; } else { broot = (root + bot) / 2.0; up = root; root = broot; } i++; if (i > ROOT_FINDER_MAX_ITER) { // System.err.println("Warning: "+debug+" // ROOT_FINDER_MAX_ITER exceeded"); root = min_pos; break; } } } m_SmallestProb = minprob; m_AverageProb = avgprob; // Set the probability of transforming to a missing value switch ( m_MissingMode ) { case M_DELETE: m_MissingProb = 0.0; break; case M_NORMAL: m_MissingProb = 1.0; break; case M_MAXDIFF: m_MissingProb = m_SmallestProb; break; case M_AVERAGE: m_MissingProb = m_AverageProb; break; } // set the scale factor value scale = min_pos; return scale; } } /** * Calculates the size of the "sphere of influence" defined as:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -