📄 ibk.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * IBk.java * Copyright (C) 1999 Stuart Inglis,Len Trigg,Eibe Frank * */package weka.classifiers.lazy;import weka.classifiers.Classifier;import weka.classifiers.DistributionClassifier;import weka.classifiers.Evaluation;import weka.classifiers.UpdateableClassifier;import java.io.*;import java.util.*;import weka.core.KDTree;import weka.core.DistanceFunction;import weka.core.EuclideanDistance;import weka.core.Utils;import weka.core.Attribute;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.SelectedTag;import weka.core.Tag;import weka.core.Option;import weka.core.OptionHandler;import weka.core.UnsupportedAttributeTypeException;import weka.core.WeightedInstancesHandler;/** * <i>K</i>-nearest neighbour classifier. For more information, see <p> * * Aha, D., and D. Kibler (1991) "Instance-based learning algorithms", * <i>Machine Learning</i>, vol.6, pp. 37-66.<p> * * Valid options are:<p> * * -K num <br> * Set the number of nearest neighbours to use in prediction * (default 1) <p> * * -W num <br> * Set a fixed window size for incremental train/testing. As * new training instances are added, oldest instances are removed * to maintain the number of training instances at this size. * (default no window) <p> * * -D <br> * Neighbours will be weighted by the inverse of their distance * when voting. (default equal weighting) <p> * * -F <br> * Neighbours will be weighted by their similarity when voting. * (default equal weighting) <p> * * -X <br> * Selects the number of neighbours to use by hold-one-out cross * validation, with an upper limit given by the -K option. <p> * * -S <br> * When k is selected by cross-validation for numeric class attributes, * minimize mean-squared error. (default mean absolute error) <p> * * -N <br> * Turns off normalization. <p> * * -E <kdtree class><br> * KDTrees class and its options (can only use the same distance function * as XMeans).<p> * * @author Gabi Schmidberger (gabi@cs.waikato.ac.nz) * @author Stuart Inglis (singlis@cs.waikato.ac.nz) * @author Len Trigg (trigg@cs.waikato.ac.nz) * @author Eibe Frank (eibe@cs.waikato.ac.nz) * @version $Revision: 1.1.1.1 $ */public class IBk extends DistributionClassifier implements OptionHandler, UpdateableClassifier, WeightedInstancesHandler { /* * A class for storing data about a neighbouring instance */ private class NeighbourNode { /** The neighbour instance */ private Instance m_Instance; /** The distance from the current instance to this neighbour */ private double m_Distance; /** A link to the next neighbour instance */ private NeighbourNode m_Next; /** * Create a new neighbour node. * * @param distance the distance to the neighbour * @param instance the neighbour instance * @param next the next neighbour node */ public NeighbourNode(double distance, Instance instance, NeighbourNode next) { m_Distance = distance; m_Instance = instance; m_Next = next; } /** * Create a new neighbour node that doesn't link to any other nodes. * @param distance the distance to the neighbour * @param instance the neighbour instance */ public NeighbourNode(double distance, Instance instance) { this(distance, instance, null); } } /* * A class for a linked list to store the nearest k neighbours * to an instance. We use a list so that we can take care of * cases where multiple neighbours are the same distance away. * i.e. the minimum length of the list is k. */ private class NeighbourList { /** The first node in the list */ private NeighbourNode m_First; /** The last node in the list */ private NeighbourNode m_Last; /** The number of nodes to attempt to maintain in the list */ private int m_Length = 1; /** * Creates the neighbourlist with a desired length * * @param length the length of list to attempt to maintain */ public NeighbourList(int length) { m_Length = length; } /** * Gets whether the list is empty. * @return true if so */ public boolean isEmpty() { return (m_First == null); } /** * Gets the current length of the list. * @return the current length of the list */ public int currentLength() { int i = 0; NeighbourNode current = m_First; while (current != null) { i++; current = current.m_Next; } return i; } /** * Inserts an instance neighbour into the list, maintaining the list * sorted by distance. * * @param distance the distance to the instance * @param instance the neighbouring instance */ public void insertSorted(double distance, Instance instance) { if (isEmpty()) { m_First = m_Last = new NeighbourNode(distance, instance); } else { NeighbourNode current = m_First; if (distance < m_First.m_Distance) {// Insert at head m_First = new NeighbourNode(distance, instance, m_First); } else { // Insert further down the list for( ;(current.m_Next != null) && (current.m_Next.m_Distance < distance); current = current.m_Next); current.m_Next = new NeighbourNode(distance, instance, current.m_Next); if (current.equals(m_Last)) { m_Last = current.m_Next; } } // Trip down the list until we've got k list elements (or more if the // distance to the last elements is the same). int valcount = 0; for(current = m_First; current.m_Next != null; current = current.m_Next) { valcount++; if ((valcount >= m_Length) && (current.m_Distance != current.m_Next.m_Distance)) { m_Last = current; current.m_Next = null; break; } } } } /** * Prunes the list to contain the k nearest neighbours. If there are * multiple neighbours at the k'th distance, all will be kept. * * @param k the number of neighbours to keep in the list. */ public void pruneToK(int k) { if (isEmpty()) { return; } if (k < 1) { k = 1; } int currentK = 0; double currentDist = m_First.m_Distance; NeighbourNode current = m_First; for(; current.m_Next != null; current = current.m_Next) { currentK++; currentDist = current.m_Distance; if ((currentK >= k) && (currentDist != current.m_Next.m_Distance)) { m_Last = current; current.m_Next = null; break; } } } /** * Prints out the contents of the neighbourlist */ public void printList() { if (isEmpty()) { System.out.println("Empty list"); } else { NeighbourNode current = m_First; //System.out.print("Node:"); while (current != null) { System.out.println("Node: instance " + current.m_Instance + ", distance " + current.m_Distance); //System.out.print("distance " + current.m_Distance); current = current.m_Next; } System.out.println(); } } } /** KDTrees class if KDTrees are used */ private KDTree m_KDTree = null; /** The training instances used for classification. */ protected Instances m_Train; /** The number of class values (or 1 if predicting numeric) */ protected int m_NumClasses; /** The class attribute type */ protected int m_ClassType; /** The number of neighbours to use for classification (currently) */ protected int m_kNN; /** Distance functions */ protected DistanceFunction m_DistanceF = null; /** * The value of kNN provided by the user. This may differ from * m_kNN if cross-validation is being used */ protected int m_kNNUpper; /** * Whether the value of k selected by cross validation has * been invalidated by a change in the training instances */ protected boolean m_kNNValid; /** * The maximum number of training instances allowed. When * this limit is reached, old training instances are removed, * so the training data is "windowed". Set to 0 for unlimited * numbers of instances. */ protected int m_WindowSize; /** Whether the neighbours should be distance-weighted */ protected int m_DistanceWeighting; /** Whether to select k by cross validation */ protected boolean m_CrossValidate; /** * Whether to minimise mean squared error rather than mean absolute * error when cross-validating on numeric prediction tasks */ protected boolean m_MeanSquared; /** True if debugging output should be printed */ boolean m_Debug; /** True if normalization is turned off */ protected boolean m_DontNormalize; /* Define possible instance weighting methods */ public static final int WEIGHT_NONE = 1; public static final int WEIGHT_INVERSE = 2; public static final int WEIGHT_SIMILARITY = 4; public static final Tag [] TAGS_WEIGHTING = { new Tag(WEIGHT_NONE, "No distance weighting"), new Tag(WEIGHT_INVERSE, "Weight by 1/distance"), new Tag(WEIGHT_SIMILARITY, "Weight by 1-distance") }; /** The number of attributes the contribute to a prediction */ protected double m_NumAttributesUsed; /** Ranges of the universe of data, lowest value, highest value and width */ protected double [][] m_Ranges; /** Index in ranges for LOW and HIGH and WIDTH */ protected static int R_MIN = 0; protected static int R_MAX = 1; protected static int R_WIDTH = 2; /** * IBk classifier. Simple instance-based learner that uses the class * of the nearest k training instances for the class of the test * instances. * * @param k the number of nearest neighbours to use for prediction */ public IBk(int k) { init(); setKNN(k); } /** * IB1 classifer. Instance-based learner. Predicts the class of the * single nearest training instance for each test instance. */ public IBk() { init(); } /** * Get the value of Debug. * @return Value of Debug. */ public boolean getDebug() { return m_Debug; } /** * Set the value of Debug. * @param newDebug Value to assign to Debug. */ public void setDebug(boolean newDebug) { m_Debug = newDebug; } /** * Sets the KDTree class. * @param k a KDTree object with all options set */ public void setKDTree(KDTree k) { m_KDTree = k; } /** * Gets the KDTree class. * @return flag if KDTrees are used */ public KDTree getKDTree() { return m_KDTree; } /** * Gets the KDTree specification string, which contains the class name of * the KDTree class and any options to the KDTree * @return the KDTree string. */ protected String getKDTreeSpec() { KDTree c = getKDTree(); if (c instanceof OptionHandler) { return c.getClass().getName() + " " + Utils.joinOptions(((OptionHandler)c).getOptions()); } return c.getClass().getName(); } /** * Set the number of neighbours the learner is to use. * @param k the number of neighbours. */ public void setKNN(int k) { m_kNN = k; m_kNNUpper = k; m_kNNValid = false; } /** * Gets the number of neighbours the learner will use. * * @return the number of neighbours */ public int getKNN() { return m_kNN; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -