📄 simplekmeans.java
字号:
second.valueSparse(p2));
p1++; p2++;
} else if (firstI > secondI) {
diff = difference(secondI,
0, second.valueSparse(p2));
p2++;
} else {
diff = difference(firstI,
first.valueSparse(p1), 0);
p1++;
}
distance += diff * diff;
}
//return Math.sqrt(distance / m_ClusterCentroids.numAttributes());
return distance;
}
/**
* Computes the difference between two given attribute
* values.
*/
private double difference(int index, double val1, double val2) {
switch (m_ClusterCentroids.attribute(index).type()) {
case Attribute.NOMINAL:
// If attribute is nominal
if (Instance.isMissingValue(val1) ||
Instance.isMissingValue(val2) ||
((int)val1 != (int)val2)) {
return 1;
} else {
return 0;
}
case Attribute.NUMERIC:
// If attribute is numeric
if (Instance.isMissingValue(val1) ||
Instance.isMissingValue(val2)) {
if (Instance.isMissingValue(val1) &&
Instance.isMissingValue(val2)) {
return 1;
} else {
double diff;
if (Instance.isMissingValue(val2)) {
diff = norm(val1, index);
} else {
diff = norm(val2, index);
}
if (diff < 0.5) {
diff = 1.0 - diff;
}
return diff;
}
} else {
return norm(val1, index) - norm(val2, index);
}
default:
return 0;
}
}
/**
* Normalizes a given value of a numeric attribute.
*
* @param x the value to be normalized
* @param i the attribute's index
*/
private double norm(double x, int i) {
if (Double.isNaN(m_Min[i]) || Utils.eq(m_Max[i],m_Min[i])) {
return 0;
} else {
return (x - m_Min[i]) / (m_Max[i] - m_Min[i]);
}
}
/**
* Updates the minimum and maximum values for all the attributes
* based on a new instance.
*
* @param instance the new instance
*/
private void updateMinMax(Instance instance) {
for (int j = 0;j < m_ClusterCentroids.numAttributes(); j++) {
if (!instance.isMissing(j)) {
if (Double.isNaN(m_Min[j])) {
m_Min[j] = instance.value(j);
m_Max[j] = instance.value(j);
} else {
if (instance.value(j) < m_Min[j]) {
m_Min[j] = instance.value(j);
} else {
if (instance.value(j) > m_Max[j]) {
m_Max[j] = instance.value(j);
}
}
}
}
}
}
/**
* Returns the number of clusters.
*
* @return the number of clusters generated for a training dataset.
* @exception Exception if number of clusters could not be returned
* successfully
*/
public int numberOfClusters() throws Exception {
return m_NumClusters;
}
/**
* Returns an enumeration describing the available options.. <p>
*
* Valid options are:<p>
*
* -N <number of clusters> <br>
* Specify the number of clusters to generate. If omitted,
* EM will use cross validation to select the number of clusters
* automatically. <p>
*
* -S <seed> <br>
* Specify random number seed. <p>
*
* @return an enumeration of all the available options.
*
**/
public Enumeration listOptions () {
Vector newVector = new Vector(2);
newVector.addElement(new Option("\tnumber of clusters. (default = 2)."
, "N", 1, "-N <num>"));
newVector.addElement(new Option("\trandom number seed.\n (default 10)"
, "S", 1, "-S <num>"));
return newVector.elements();
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String numClustersTipText() {
return "set number of clusters";
}
/**
* set the number of clusters to generate
*
* @param n the number of clusters to generate
*/
public void setNumClusters(int n) throws Exception {
if (n <= 0) {
throw new Exception("Number of clusters must be > 0");
}
m_NumClusters = n;
}
/**
* gets the number of clusters to generate
*
* @return the number of clusters to generate
*/
public int getNumClusters() {
return m_NumClusters;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String seedTipText() {
return "random number seed";
}
/**
* Set the random number seed
*
* @param s the seed
*/
public void setSeed (int s) {
m_Seed = s;
}
/**
* Get the random number seed
*
* @return the seed
*/
public int getSeed () {
return m_Seed;
}
/**
* Parses a given list of options.
* @param options the list of options as an array of strings
* @exception Exception if an option is not supported
*
**/
public void setOptions (String[] options)
throws Exception {
String optionString = Utils.getOption('N', options);
if (optionString.length() != 0) {
setNumClusters(Integer.parseInt(optionString));
}
optionString = Utils.getOption('S', options);
if (optionString.length() != 0) {
setSeed(Integer.parseInt(optionString));
}
}
/**
* Gets the current settings of SimpleKMeans
*
* @return an array of strings suitable for passing to setOptions()
*/
public String[] getOptions () {
String[] options = new String[4];
int current = 0;
options[current++] = "-N";
options[current++] = "" + getNumClusters();
options[current++] = "-S";
options[current++] = "" + getSeed();
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* return a string describing this clusterer
*
* @return a description of the clusterer as a string
*/
public String toString() {
int maxWidth = 0;
for (int i = 0; i < m_NumClusters; i++) {
for (int j = 0 ;j < m_ClusterCentroids.numAttributes(); j++) {
if (m_ClusterCentroids.attribute(j).isNumeric()) {
double width = Math.log(Math.abs(m_ClusterCentroids.instance(i).value(j))) /
Math.log(10.0);
width += 1.0;
if ((int)width > maxWidth) {
maxWidth = (int)width;
}
}
}
}
StringBuffer temp = new StringBuffer();
String naString = "N/A";
for (int i = 0; i < maxWidth+2; i++) {
naString += " ";
}
temp.append("\nkMeans\n======\n");
temp.append("\nNumber of iterations: " + m_Iterations+"\n");
temp.append("Within cluster sum of squared errors: " + Utils.sum(m_squaredErrors));
temp.append("\n\nCluster centroids:\n");
for (int i = 0; i < m_NumClusters; i++) {
temp.append("\nCluster "+i+"\n\t");
temp.append("Mean/Mode: ");
for (int j = 0; j < m_ClusterCentroids.numAttributes(); j++) {
if (m_ClusterCentroids.attribute(j).isNominal()) {
temp.append(" "+m_ClusterCentroids.attribute(j).
value((int)m_ClusterCentroids.instance(i).value(j)));
} else {
temp.append(" "+Utils.doubleToString(m_ClusterCentroids.instance(i).value(j),
maxWidth+5, 4));
}
}
temp.append("\n\tStd Devs: ");
for (int j = 0; j < m_ClusterStdDevs.numAttributes(); j++) {
if (m_ClusterStdDevs.attribute(j).isNumeric()) {
temp.append(" "+Utils.doubleToString(m_ClusterStdDevs.instance(i).value(j),
maxWidth+5, 4));
} else {
temp.append(" "+naString);
}
}
}
temp.append("\n\n");
return temp.toString();
}
public Instances getClusterCentroids() {
return m_ClusterCentroids;
}
public Instances getClusterStandardDevs() {
return m_ClusterStdDevs;
}
public int [][][] getClusterNominalCounts() {
return m_ClusterNominalCounts;
}
public double getSquaredError() {
return Utils.sum(m_squaredErrors);
}
public int [] getClusterSizes() {
return m_ClusterSizes;
}
/**
* Main method for testing this class.
*
* @param argv should contain the following arguments: <p>
* -t training file [-N number of clusters]
*/
public static void main (String[] argv) {
try {
System.out.println(ClusterEvaluation.
evaluateClusterer(new SimpleKMeans(), argv));
}
catch (Exception e) {
System.out.println(e.getMessage());
e.printStackTrace();
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -