📄 ibk.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* IBk.java
* Copyright (C) 1999 Stuart Inglis,Len Trigg,Eibe Frank
*
*/
package weka.classifiers.lazy;
import java.util.Enumeration;
import java.util.Vector;
import weka.classifiers.Classifier;
import weka.classifiers.Evaluation;
import weka.classifiers.UpdateableClassifier;
import weka.core.Attribute;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.SelectedTag;
import weka.core.Tag;
import weka.core.UnsupportedAttributeTypeException;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;
/**
* <i>K</i>-nearest neighbours classifier. For more information, see <p>
*
* Aha, D., and D. Kibler (1991) "Instance-based learning algorithms",
* <i>Machine Learning</i>, vol.6, pp. 37-66.<p>
*
* Valid options are:<p>
*
* -K num <br>
* Set the number of nearest neighbors to use in prediction
* (default 1) <p>
*
* -W num <br>
* Set a fixed window size for incremental train/testing. As
* new training instances are added, oldest instances are removed
* to maintain the number of training instances at this size.
* (default no window) <p>
*
* -I <br>
* Neighbors will be weighted by the inverse of their distance
* when voting. (default equal weighting) <p>
*
* -F <br>
* Neighbors will be weighted by their similarity when voting.
* (default equal weighting) <p>
*
* -X <br>
* Select the number of neighbors to use by hold-one-out cross
* validation, with an upper limit given by the -K option. <p>
*
* -E <br>
* When k is selected by cross-validation for numeric class attributes,
* minimize mean-squared error. (default mean absolute error) <p>
*
* -N <br>
* Turns off normalization. <p>
*
* @author Stuart Inglis (singlis@cs.waikato.ac.nz)
* @author Len Trigg (trigg@cs.waikato.ac.nz)
* @author Eibe Frank (eibe@cs.waikato.ac.nz)
* @version $Revision$
*/
public class IBk extends Classifier implements
OptionHandler, UpdateableClassifier, WeightedInstancesHandler {
/**
* Returns a string describing classifier
* @return a description suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return "K-nearest neighbours classifier. Normalizes attributes by default. Can "
+ "select appropriate value of K based on cross-validation. Can also do "
+ "distance weighting. For more information, see\n\n"
+ "Aha, D., and D. Kibler (1991) \"Instance-based learning algorithms\", "
+ "Machine Learning, vol.6, pp. 37-66.";
}
/*
* A class for storing data about a neighboring instance
*/
private class NeighborNode {
/** The neighbor instance */
private Instance m_Instance;
/** The distance from the current instance to this neighbor */
private double m_Distance;
/** A link to the next neighbor instance */
private NeighborNode m_Next;
/**
* Create a new neighbor node.
*
* @param distance the distance to the neighbor
* @param instance the neighbor instance
* @param next the next neighbor node
*/
public NeighborNode(double distance, Instance instance, NeighborNode next){
m_Distance = distance;
m_Instance = instance;
m_Next = next;
}
/**
* Create a new neighbor node that doesn't link to any other nodes.
*
* @param distance the distance to the neighbor
* @param instance the neighbor instance
*/
public NeighborNode(double distance, Instance instance) {
this(distance, instance, null);
}
}
/*
* A class for a linked list to store the nearest k neighbours
* to an instance. We use a list so that we can take care of
* cases where multiple neighbours are the same distance away.
* i.e. the minimum length of the list is k.
*/
private class NeighborList {
/** The first node in the list */
private NeighborNode m_First;
/** The last node in the list */
private NeighborNode m_Last;
/** The number of nodes to attempt to maintain in the list */
private int m_Length = 1;
/**
* Creates the neighborlist with a desired length
*
* @param length the length of list to attempt to maintain
*/
public NeighborList(int length) {
m_Length = length;
}
/**
* Gets whether the list is empty.
*
* @return true if so
*/
public boolean isEmpty() {
return (m_First == null);
}
/**
* Gets the current length of the list.
*
* @return the current length of the list
*/
public int currentLength() {
int i = 0;
NeighborNode current = m_First;
while (current != null) {
i++;
current = current.m_Next;
}
return i;
}
/**
* Inserts an instance neighbor into the list, maintaining the list
* sorted by distance.
*
* @param distance the distance to the instance
* @param instance the neighboring instance
*/
public void insertSorted(double distance, Instance instance) {
if (isEmpty()) {
m_First = m_Last = new NeighborNode(distance, instance);
} else {
NeighborNode current = m_First;
if (distance < m_First.m_Distance) {// Insert at head
m_First = new NeighborNode(distance, instance, m_First);
} else { // Insert further down the list
for( ;(current.m_Next != null) &&
(current.m_Next.m_Distance < distance);
current = current.m_Next);
current.m_Next = new NeighborNode(distance, instance,
current.m_Next);
if (current.equals(m_Last)) {
m_Last = current.m_Next;
}
}
// Trip down the list until we've got k list elements (or more if the
// distance to the last elements is the same).
int valcount = 0;
for(current = m_First; current.m_Next != null;
current = current.m_Next) {
valcount++;
if ((valcount >= m_Length) && (current.m_Distance !=
current.m_Next.m_Distance)) {
m_Last = current;
current.m_Next = null;
break;
}
}
}
}
/**
* Prunes the list to contain the k nearest neighbors. If there are
* multiple neighbors at the k'th distance, all will be kept.
*
* @param k the number of neighbors to keep in the list.
*/
public void pruneToK(int k) {
if (isEmpty()) {
return;
}
if (k < 1) {
k = 1;
}
int currentK = 0;
double currentDist = m_First.m_Distance;
NeighborNode current = m_First;
for(; current.m_Next != null; current = current.m_Next) {
currentK++;
currentDist = current.m_Distance;
if ((currentK >= k) && (currentDist != current.m_Next.m_Distance)) {
m_Last = current;
current.m_Next = null;
break;
}
}
}
/**
* Prints out the contents of the neighborlist
*/
public void printList() {
if (isEmpty()) {
System.out.println("Empty list");
} else {
NeighborNode current = m_First;
while (current != null) {
System.out.println("Node: instance " + current.m_Instance
+ ", distance " + current.m_Distance);
current = current.m_Next;
}
System.out.println();
}
}
}
/** The training instances used for classification. */
protected Instances m_Train;
/** The number of class values (or 1 if predicting numeric) */
protected int m_NumClasses;
/** The class attribute type */
protected int m_ClassType;
/** The minimum values for numeric attributes. */
protected double [] m_Min;
/** The maximum values for numeric attributes. */
protected double [] m_Max;
/** The number of neighbours to use for classification (currently) */
protected int m_kNN;
/**
* The value of kNN provided by the user. This may differ from
* m_kNN if cross-validation is being used
*/
protected int m_kNNUpper;
/**
* Whether the value of k selected by cross validation has
* been invalidated by a change in the training instances
*/
protected boolean m_kNNValid;
/**
* The maximum number of training instances allowed. When
* this limit is reached, old training instances are removed,
* so the training data is "windowed". Set to 0 for unlimited
* numbers of instances.
*/
protected int m_WindowSize;
/** Whether the neighbours should be distance-weighted */
protected int m_DistanceWeighting;
/** Whether to select k by cross validation */
protected boolean m_CrossValidate;
/**
* Whether to minimise mean squared error rather than mean absolute
* error when cross-validating on numeric prediction tasks
*/
protected boolean m_MeanSquared;
/** True if normalization is turned off */
protected boolean m_DontNormalize;
/* Define possible instance weighting methods */
public static final int WEIGHT_NONE = 1;
public static final int WEIGHT_INVERSE = 2;
public static final int WEIGHT_SIMILARITY = 4;
public static final Tag [] TAGS_WEIGHTING = {
new Tag(WEIGHT_NONE, "No distance weighting"),
new Tag(WEIGHT_INVERSE, "Weight by 1/distance"),
new Tag(WEIGHT_SIMILARITY, "Weight by 1-distance")
};
/** The number of attributes the contribute to a prediction */
protected double m_NumAttributesUsed;
/**
* IBk classifier. Simple instance-based learner that uses the class
* of the nearest k training instances for the class of the test
* instances.
*
* @param k the number of nearest neighbors to use for prediction
*/
public IBk(int k) {
init();
setKNN(k);
}
/**
* IB1 classifer. Instance-based learner. Predicts the class of the
* single nearest training instance for each test instance.
*/
public IBk() {
init();
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String KNNTipText() {
return "The number of neighbours to use.";
}
/**
* Set the number of neighbours the learner is to use.
*
* @param k the number of neighbours.
*/
public void setKNN(int k) {
m_kNN = k;
m_kNNUpper = k;
m_kNNValid = false;
}
/**
* Gets the number of neighbours the learner will use.
*
* @return the number of neighbours.
*/
public int getKNN() {
return m_kNN;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String windowSizeTipText() {
return "Gets the maximum number of instances allowed in the training " +
"pool. The addition of new instances above this value will result " +
"in old instances being removed. A value of 0 signifies no limit " +
"to the number of training instances.";
}
/**
* Gets the maximum number of instances allowed in the training
* pool. The addition of new instances above this value will result
* in old instances being removed. A value of 0 signifies no limit
* to the number of training instances.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -