📄 kwmeans.txt
字号:
firstI = first.index(p1);
}
if (p2 >= second.numValues()) {
secondI = m_ClusterCentroids.numAttributes();
} else {
secondI = second.index(p2);
}
/* if (firstI == m_ClusterCentroids.classIndex()) {
p1++; continue;
}
if (secondI == m_ClusterCentroids.classIndex()) {
p2++; continue;
} */
double diff;
if (firstI == secondI) {
diff = difference(firstI,
first.valueSparse(p1),
second.valueSparse(p2));
p1++; p2++;
} else if (firstI > secondI) {
diff = difference(secondI,
0, second.valueSparse(p2));
p2++;
} else {
diff = difference(firstI,
first.valueSparse(p1), 0);
p1++;
}
distance += diff * diff;
}
//return Math.sqrt(distance / m_ClusterCentroids.numAttributes());
return distance;
}
/**
* Computes the difference between two given attribute
* values.
*
* @param index the attribute index
* @param val1 the first value
* @param val2 the second value
* @return the difference
*/
private double difference(int index, double val1, double val2) {
switch (m_ClusterCentroids.attribute(index).type()) {
case Attribute.NOMINAL:
// If attribute is nominal
if (Instance.isMissingValue(val1) ||
Instance.isMissingValue(val2) ||
((int)val1 != (int)val2)) {
return 1;
} else {
return 0;
}
case Attribute.NUMERIC:
// If attribute is numeric
if (Instance.isMissingValue(val1) ||
Instance.isMissingValue(val2)) {
if (Instance.isMissingValue(val1) &&
Instance.isMissingValue(val2)) {
return 1;
} else {
double diff;
if (Instance.isMissingValue(val2)) {
diff = norm(val1, index);
} else {
diff = norm(val2, index);
}
if (diff < 0.5) {
diff = 1.0 - diff;
}
return diff;
}
} else {
return norm(val1, index) - norm(val2, index);
}
default:
return 0;
}
}
protected void setWeight(Instances instances,int num){
double Alldistance=0.0,Allpreweight=0.0;
double[] preweight=new double[num];
double[] weight=new double[num];
for(int j=0;j<num;j++){
for(int i=0;i<num;i++){
Alldistance+=distance(instances.instance(i),instances.instance(j));
}
preweight[j]=Alldistance/num;
}
for(int i=0;i<num;i++){
Allpreweight+=preweight[i];
}
for(int i=0;i<num;i++){
weight[i]=Allpreweight/preweight[i];
instances.instance(i).setWeight(weight[i]);
}
}
//index为属性索引,instance为所要加权的属性
protected double wMean(Instances instance,int index){
double weightmean=0.0,awmean=0.0,sumweight=0.0;
int [] counts;
if(instance.attribute(index).isNumeric()){
for(int i=0;i<instance.numInstances();i++){
sumweight += instance.instance(i).weight();
weightmean+=instance.instance(i).weight()*instance.instance(i).value(index);
awmean=weightmean/sumweight;
}
}
//如果属性是名词性的
else if (instance.attribute(index).isNominal()) {
counts = new int[instance.attribute(index).numValues()];
for (int j = 0; j < instance.numInstances(); j++) {
if (!instance.instance(j).isMissing(index)) {//如果是名词性属性则直接求权值和
counts[(int) instance.instance(j).value(index)] += instance.instance(j).weight();
}
}
awmean= (double)Utils.maxIndex(counts);//返回权值最大的属性值的权重作为均值
}else{awmean=0.0;}
return awmean;
}
/**
* Normalizes a given value of a numeric attribute.
*
* @param x the value to be normalized
* @param i the attribute's index
* @return the normalized value
*/
private double norm(double x, int i) {
if (Double.isNaN(m_Min[i]) || Utils.eq(m_Max[i],m_Min[i])) {
return 0;
} else {
return (x - m_Min[i]) / (m_Max[i] - m_Min[i]);
}
}
/**
* Updates the minimum and maximum values for all the attributes
* based on a new instance.
*
* @param instance the new instance
*/
private void updateMinMax(Instance instance) {
for (int j = 0;j < m_ClusterCentroids.numAttributes(); j++) {
if (!instance.isMissing(j)) {
if (Double.isNaN(m_Min[j])) {
m_Min[j] = instance.value(j);
m_Max[j] = instance.value(j);
} else {
if (instance.value(j) < m_Min[j]) {
m_Min[j] = instance.value(j);
} else {
if (instance.value(j) > m_Max[j]) {
m_Max[j] = instance.value(j);
}
}
}
}
}
}
/**
* Returns the number of clusters.
*
* @return the number of clusters generated for a training dataset.
* @throws Exception if number of clusters could not be returned
* successfully
*/
public int numberOfClusters() throws Exception {
return m_NumClusters;
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
public Enumeration listOptions () {
Vector result = new Vector();
result.addElement(new Option(
"\tnumber of clusters.\n"
+ "\t(default 2).",
"N", 1, "-N <num>"));
Enumeration en = super.listOptions();
while (en.hasMoreElements())
result.addElement(en.nextElement());
return result.elements();
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String numClustersTipText() {
return "set number of clusters";
}
/**
* set the number of clusters to generate
*
* @param n the number of clusters to generate
* @throws Exception if number of clusters is negative
*/
public void setNumClusters(int n) throws Exception {
if (n <= 0) {
throw new Exception("Number of clusters must be > 0");
}
m_NumClusters = n;
}
/**
* gets the number of clusters to generate
*
* @return the number of clusters to generate
*/
public int getNumClusters() {
return m_NumClusters;
}
/**
* Parses a given list of options. <p/>
*
<!-- options-start -->
* Valid options are: <p/>
*
* <pre> -N <num>
* number of clusters.
* (default 2).</pre>
*
* <pre> -S <num>
* Random number seed.
* (default 10)</pre>
*
<!-- options-end -->
*
* @param options the list of options as an array of strings
* @throws Exception if an option is not supported
*/
public void setOptions (String[] options)
throws Exception {
String optionString = Utils.getOption('N', options);
if (optionString.length() != 0) {
setNumClusters(Integer.parseInt(optionString));
}
super.setOptions(options);
}
/**
* Gets the current settings of SimpleKMeans
*
* @return an array of strings suitable for passing to setOptions()
*/
public String[] getOptions () {
int i;
Vector result;
String[] options;
result = new Vector();
result.add("-N");
result.add("" + getNumClusters());
options = super.getOptions();
for (i = 0; i < options.length; i++)
result.add(options[i]);
return (String[]) result.toArray(new String[result.size()]);
}
/**
* return a string describing this clusterer
*
* @return a description of the clusterer as a string
*/
public String toString() {
int maxWidth = 0;
for (int i = 0; i < m_NumClusters; i++) {
for (int j = 0 ;j < m_ClusterCentroids.numAttributes(); j++) {
if (m_ClusterCentroids.attribute(j).isNumeric()) {
double width = Math.log(Math.abs(m_ClusterCentroids.instance(i).value(j))) /
Math.log(10.0);
width += 1.0;
if ((int)width > maxWidth) {
maxWidth = (int)width;
}
}
}
}
StringBuffer temp = new StringBuffer();
String naString = "N/A";
for (int i = 0; i < maxWidth+2; i++) {
naString += " ";
}
temp.append("\nkMeans\n======\n");
temp.append("\nNumber of iterations: " + m_Iterations+"\n");
temp.append("Within cluster sum of squared errors: " + Utils.sum(m_squaredErrors));
temp.append("\n\nCluster centroids:\n");
for (int i = 0; i < m_NumClusters; i++) {
temp.append("\nCluster "+i+"\n\t");
temp.append("Mean/Mode: ");
for (int j = 0; j < m_ClusterCentroids.numAttributes(); j++) {
if (m_ClusterCentroids.attribute(j).isNominal()) {
temp.append(" "+m_ClusterCentroids.attribute(j).
value((int)m_ClusterCentroids.instance(i).value(j)));
} else {
temp.append(" "+Utils.doubleToString(m_ClusterCentroids.instance(i).value(j),
maxWidth+5, 4));
}
}
temp.append("\n\tStd Devs: ");
for (int j = 0; j < m_ClusterStdDevs.numAttributes(); j++) {
if (m_ClusterStdDevs.attribute(j).isNumeric()) {
temp.append(" "+Utils.doubleToString(m_ClusterStdDevs.instance(i).value(j),
maxWidth+5, 4));
} else {
temp.append(" "+naString);
}
}
}
temp.append("\n\n");
return temp.toString();
}
/**
* Gets the the cluster centroids
*
* @return the cluster centroids
*/
public Instances getClusterCentroids() {
return m_ClusterCentroids;
}
/**
* Gets the standard deviations of the numeric attributes in each cluster
*
* @return the standard deviations of the numeric attributes
* in each cluster
*/
public Instances getClusterStandardDevs() {
return m_ClusterStdDevs;
}
/**
* Returns for each cluster the frequency counts for the values of each
* nominal attribute
*
* @return the counts
*/
public int [][][] getClusterNominalCounts() {
return m_ClusterNominalCounts;
}
/**
* Gets the squared error for all clusters
*
* @return the squared error
*/
public double getSquaredError() {
return Utils.sum(m_squaredErrors);
}
/**
* Gets the number of instances in each cluster
*
* @return The number of instances in each cluster
*/
public int [] getClusterSizes() {
return m_ClusterSizes;
}
/**
* Main method for testing this class.
*
* @param argv should contain the following arguments: <p>
* -t training file [-N number of clusters]
*/
public static void main (String[] argv) {
runClusterer(new KwMeans(), argv);
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -