📄 kstarnominalattribute.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//** * KStarNominalAttribute.java * Copyright (c) 1995-97 by Len Trigg (trigg@cs.waikato.ac.nz). * Java port to Weka by Abdelaziz Mahoui (am14@cs.waikato.ac.nz). * */package weka.classifiers.kstar;import java.io.*;import java.util.*;import weka.core.*;import weka.classifiers.*;/** * A custom class which provides the environment for computing the * transformation probability of a specified test instance nominal * attribute to a specified train instance nominal attribute. * * @author Len Trigg (len@intelligenesis.net) * @author Abdelaziz Mahoui (am14@cs.waikato.ac.nz) * @version $Revision 1.0 $ */public class KStarNominalAttribute implements KStarConstants { /** The training instances used for classification. */ protected Instances m_TrainSet; /** The test instance */ protected Instance m_Test; /** The train instance */ protected Instance m_Train; /** The index of the nominal attribute in the test and train instances */ protected int m_AttrIndex; /** The stop parameter */ protected double m_Stop = 1.0; /** Probability of test attribute transforming into train attribute with missing value */ protected double m_MissingProb = 1.0; /** Average probability of test attribute transforming into train attribute */ protected double m_AverageProb = 1.0; /** Smallest probability of test attribute transforming into train attribute */ protected double m_SmallestProb = 1.0; /** Number of trai instances with no missing attribute values */ protected int m_TotalCount; /** Distribution of the attribute value in the train dataset */ protected int [] m_Distribution; /** Set of colomns: each colomn representing a randomised version of the train dataset class colomn */ protected int [][] m_RandClassCols; /** A cache for storing attribute values and their corresponding stop parameters */ protected KStarCache m_Cache; // KStar Global settings /** The number of instances in the dataset */ protected int m_NumInstances; /** The number of class values */ protected int m_NumClasses; /** The number of attributes */ protected int m_NumAttributes; /** The class attribute type */ protected int m_ClassType; /** missing value treatment */ protected int m_MissingMode = M_AVERAGE; /** B_SPHERE = use specified blend, B_ENTROPY = entropic blend setting */ protected int m_BlendMethod = B_SPHERE ; /** default sphere of influence blend setting */ protected int m_BlendFactor = 20; /** * Constructor */ public KStarNominalAttribute(Instance test, Instance train, int attrIndex, Instances trainSet, int [][] randClassCol, KStarCache cache) { m_Test = test; m_Train = train; m_AttrIndex = attrIndex; m_TrainSet = trainSet; m_RandClassCols = randClassCol; m_Cache = cache; init(); } /** * Initializes the m_Attributes of the class. */ private void init() { try { m_NumInstances = m_TrainSet.numInstances(); m_NumClasses = m_TrainSet.numClasses(); m_NumAttributes = m_TrainSet.numAttributes(); m_ClassType = m_TrainSet.classAttribute().type(); } catch(Exception e) { e.printStackTrace(); } } /** * Calculates the probability of the indexed nominal attribute of the test * instance transforming into the indexed nominal attribute of the training * instance. * * @return the value of the transformation probability. */ public double transProb() { String debug = "(KStarNominalAttribute.transProb) "; double transProb = 0.0; // check if the attribute value has been encountred before // in which case it should be in the nominal cache if (m_Cache.containsKey(m_Test.value(m_AttrIndex))) { KStarCache.TableEntry te = m_Cache.getCacheValues(m_Test.value(m_AttrIndex)); m_Stop = te.value; m_MissingProb = te.pmiss; } else { generateAttrDistribution(); // we have to compute the parameters if (m_BlendMethod == B_ENTROPY) { m_Stop = stopProbUsingEntropy(); } else { // default is B_SPHERE m_Stop = stopProbUsingBlend(); } // store the values in cache m_Cache.store( m_Test.value(m_AttrIndex), m_Stop, m_MissingProb ); } // we've got our m_Stop, then what? if (m_Train.isMissing(m_AttrIndex)) { transProb = m_MissingProb; } else { try { transProb = (1.0 - m_Stop) / m_Test.attribute(m_AttrIndex).numValues(); if ( (int)m_Test.value(m_AttrIndex) == (int)m_Train.value(m_AttrIndex) ) { transProb += m_Stop; } } catch (Exception e) { e.printStackTrace(); } } return transProb; } /** * Calculates the "stop parameter" for this attribute using * the entropy method: the value is computed using a root finder * algorithm. The method takes advantage of the calculation to * compute the smallest and average transformation probabilities * once the stop factor is obtained. It also sets the transformation * probability to an attribute with a missing value. * * @return the value of the stop parameter. * */ private double stopProbUsingEntropy() { String debug = "(KStarNominalAttribute.stopProbUsingEntropy)"; if ( m_ClassType != Attribute.NOMINAL ) { System.err.println("Error: "+debug+" attribute class must be nominal!"); System.exit(1); } int itcount = 0; double stopProb; double lower, upper, pstop; double bestminprob = 0.0, bestpsum = 0.0; double bestdiff = 0.0, bestpstop = 0.0; double currentdiff, lastdiff, stepsize, delta; KStarWrapper botvals = new KStarWrapper(); KStarWrapper upvals = new KStarWrapper(); KStarWrapper vals = new KStarWrapper(); // Initial values for root finder lower = 0.0 + ROOT_FINDER_ACCURACY/2.0; upper = 1.0 - ROOT_FINDER_ACCURACY/2.0; // Find (approx) entropy ranges calculateEntropy(upper, upvals); calculateEntropy(lower, botvals); if (upvals.avgProb == 0) { // When there are no training instances with the test value: // doesn't matter what exact value we use for pstop, just acts as // a constant scale factor in this case. calculateEntropy(lower, vals); } else { // Optimise the scale factor if ( (upvals.randEntropy - upvals.actEntropy < botvals.randEntropy - botvals.actEntropy) && (botvals.randEntropy - botvals.actEntropy > FLOOR) ) { bestpstop = pstop = lower; stepsize = INITIAL_STEP; bestminprob = botvals.minProb; bestpsum = botvals.avgProb; } else { bestpstop = pstop = upper; stepsize = -INITIAL_STEP; bestminprob = upvals.minProb; bestpsum = upvals.avgProb; } bestdiff = currentdiff = FLOOR; itcount = 0; /* Enter the root finder */ while (true) { itcount++; lastdiff = currentdiff; pstop += stepsize; if (pstop <= lower) { pstop = lower; currentdiff = 0.0; delta = -1.0; } else if (pstop >= upper) { pstop = upper; currentdiff = 0.0; delta = -1.0; } else { calculateEntropy(pstop, vals); currentdiff = vals.randEntropy - vals.actEntropy; if (currentdiff < FLOOR) { currentdiff = FLOOR; if ((Math.abs(stepsize) < INITIAL_STEP) && (bestdiff == FLOOR)) { bestpstop = lower; bestminprob = botvals.minProb; bestpsum = botvals.avgProb; break; } } delta = currentdiff - lastdiff; } if (currentdiff > bestdiff) { bestdiff = currentdiff; bestpstop = pstop; bestminprob = vals.minProb; bestpsum = vals.avgProb; } if (delta < 0) { if (Math.abs(stepsize) < ROOT_FINDER_ACCURACY) { break; } else { stepsize /= -2.0; } } if (itcount > ROOT_FINDER_MAX_ITER) { break; } } } m_SmallestProb = bestminprob; m_AverageProb = bestpsum; // Set the probability of transforming to a missing value switch ( m_MissingMode ) { case M_DELETE: m_MissingProb = 0.0; break;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -