📄 entropydiscretizer.java
字号:
/** * JBNC - Bayesian Network Classifiers Toolbox <p> * * Latest release available at http://sourceforge.net/projects/jbnc/ <p> * * Copyright (C) 1999-2003 Jarek Sacha <p> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) * any later version. <p> * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. <p> * * You should have received a copy of the GNU General Public License along with * this program; if not, write to the Free Software Foundation, Inc., 59 Temple * Place - Suite 330, Boston, MA 02111-1307, USA. <br> * http://www.fsf.org/licenses/gpl.txt */package jbnc.discretize;import java.io.Serializable;import java.util.ArrayList;import java.util.Arrays;import java.util.Comparator;/** * Implements entropy based discretisation algotithm by Fayyad and Irani.<p> * <p/> * U.M. Fayyad and K.B Irani "Multi-interval discretizationn of contnous-valued attributes for classification learning", * In <i>Proc. Thirtheenth International Joint Conference on Artificial Intelligence</i>, Chambery, France. Morgan * Kauffman, pp. 1022-1027, 1993. <p> * <p/> * Discretization process maps a continuous value into a fixed set of partitions. Partitioning is uniquely described by * a set of partition boundaries. A first partition corresponds to interval from negative infinity to first partition * point (inclusively). Second partition streaches from first boundary point (exclusively) to second boundary point * (inclusively). And so on. Last partition streaches from last boundary point (exclusively) to positive infinity.<p> * <p/> * This disretization algorithm is intended for discetization of attributes as a preprocessing step for a * classification. It requires knowledge of a class for each case (nstance) used to build discretizer. A discretizer is * build for each attribute separately.<p> * <p/> * To create a discretizer use method <code>buildDiscretizer()</code>. To applay the discretizer use method * <code>discretize()</code>. */public final class EntropyDiscretizer implements Serializable { /** * Represent partition boundaries for discretization. */ private double[] partitionBoundaries; /** * Partition names. Number of elements is one more that in <code>partitionBoundaries</code>. */ private String[] partitionNames; private int nbClasses = -1; /** * Performs construction of the discretizer for a set of cases (class + continous attribute pairs). * * @param cases training data for creation of a discretizer. */ public void buildDiscretizer(final Case[] cases) { if (cases == null) { throw new IllegalArgumentException("Argument cases cannot be null."); } if (cases.length <= 1) { throw new IllegalArgumentException("Number of elements cases in argument cases must be greater than 1."); } final Case[] cases_ = new Case[cases.length]; System.arraycopy(cases, 0, cases_, 0, cases_.length); // Find number of classes final int n = maxClassId(cases_) + 1; if (n <= 1) { throw new IllegalArgumentException("Number of classes in argument cases must be greater than 1."); } nbClasses = n; final int[] classCount = classCount(cases_); // Sort cases by attribute Arrays.sort(cases_, new ValueClassCaseComparator()); // Recursively partition s final Subset s = new Subset(cases_, classCount); final ArrayList p = s.partitions(); partitionBoundaries = new double[p.size()]; for (int i = 0; i < partitionBoundaries.length; i++) { partitionBoundaries[i] = ((Double) p.get(i)).doubleValue(); } // Create partition names partitionNames = new String[partitionBoundaries.length + 1]; if (partitionNames.length == 1) { partitionNames[0] = "(-inf,+inf)"; } else { partitionNames[0] = "(-inf," + partitionBoundaries[0] + "]"; for (int i = 1; i < partitionBoundaries.length; i++) { partitionNames[i] = "(" + partitionBoundaries[i - 1] + "-" + +partitionBoundaries[i] + "]"; } partitionNames[partitionBoundaries.length] = "(" + partitionBoundaries[partitionBoundaries.length - 1] + ",+inf)"; } } /** * Discretize single continous value. A discretizer must be build before calling this method. * * @param v value to discretize. * @return label of a discretizer value. */ public int discretize(final double v) { for (int i = 0; i < partitionBoundaries.length; i++) { if (v < partitionBoundaries[i]) { return i; } } return partitionBoundaries.length; } /** * Return array representing partition boundaries. The first partition corresponds to interval from negative * infinity to first partition point (inclusively). The second partition streaches from first boundary point * (exclusively) to second boundary point (inclusively). And so on. The last partition streaches from last boundary * point (exclusively) to positive infinity. * * @return Array representing partition boundaries. */ public double[] getPartitionBoundaries() { final double[] p = new double[partitionBoundaries.length]; System.arraycopy(partitionBoundaries, 0, p, 0, p.length); return p; } /** * Return symbolic names describing discretization partitions. Number of partitions is one more than number of * partition boundaries. * * @return Names describing discretization partitions. * @see #getPartitionBoundaries() */ public String[] getPartitionNames() { final String[] names = new String[partitionNames.length]; for (int i = 0; i < names.length; i++) { names[i] = partitionNames[i]; } return names; } /** * Largest class ID in case set <code>s</code>. Class IDs must be greater or equal 0. * * @param s * @return Largest class ID. */ private static int maxClassId(final Case[] s) { int maxClassId = -1; for (int i = 0; i < s.length; i++) { final int classId = s[i].classId; if (classId < 0) { throw new IllegalArgumentException("Class ID for case " + i + " is less then zero (" + classId + ")."); } if (classId > maxClassId) { maxClassId = classId; } } return maxClassId; } /* * */ private int[] classCount(final Case[] s) { final int[] classCount = new int[nbClasses]; for (int i = 0; i < s.length; i++) { ++classCount[s[i].classId]; } return classCount; } /** * Enables ordering of cases by attribute value. If two attributeValues are equal cases are ordered by class * indexes. */ private static final class ValueClassCaseComparator implements Comparator { public int compare(final Object o1, final Object o2) { final Case c1 = (Case) o1; final Case c2 = (Case) o2; if (c1.attributeValue > c2.attributeValue) { return 1; } else if (c1.attributeValue < c2.attributeValue) { return -1; } else if (c1.classId > c1.classId) { return 1; } else if (c1.classId < c1.classId) { return -1; } else { return 0; } } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -