📄 hac.java
字号:
*/ public ArrayList getIntClusters() throws Exception { m_clusters = new ArrayList(); Cluster [] clusterArray = new Cluster[m_numCurrentClusters]; if (m_verbose) { System.out.println("Cluster assignments: "); for (int i=0; i < m_clusterAssignments.length; i++) { System.out.print(i + ":" + m_clusterAssignments[i] + " "); } } for (int i=0; i < m_instances.numInstances(); i++) { Instance inst = m_instances.instance(i); if(clusterArray[m_clusterAssignments[i]] == null) clusterArray[m_clusterAssignments[i]] = new Cluster(m_clusterID++); clusterArray[m_clusterAssignments[i]].add(new Integer(i), 1); // System.out.println("Adding: " + i + " to cluster: " + m_clusterAssignments[i]); } for (int j =0; j< m_numCurrentClusters; j++) { if (clusterArray[j] == null) { System.out.println("Empty cluster: " + j); // printIntClusters(); setVerbose(true); m_numCurrentClusters--; m_numClusters--; } else { m_clusters.add(clusterArray[j]); String labelString = ""; for (int i = 0; i < clusterArray[j].size(); i++){ Instance inst = m_instances.instance(((Integer) (clusterArray[j].get(i))).intValue()); labelString = labelString + printInstance(inst) + "\\n"; } m_dotWriter.println("node" + clusterArray[j].clusterID + "[label = \"" + labelString + "\"]"); } } // printIntClusters(); return m_clusters; } /** * Computes the final clusters from the cluster assignments, for external access * * @exception Exception if clusters could not be computed successfully */ public ArrayList getClusters() throws Exception { ArrayList finalClusters = new ArrayList(); Cluster [] clusterArray = new Cluster[m_numClusters]; for (int i=0; i < m_instances.numInstances(); i++) { Instance inst = m_instances.instance(i); if(clusterArray[m_clusterAssignments[i]] == null) clusterArray[m_clusterAssignments[i]] = new Cluster(); clusterArray[m_clusterAssignments[i]].add(inst, 1); } for (int j =0; j< m_numClusters; j++) finalClusters.add(clusterArray[j]); return finalClusters; } /** * internal method that returns the distance between two clusters */ protected double clusterDistance(Cluster cluster1, Cluster cluster2) { if (cluster2 == null || cluster1 == null) { System.err.println("PANIC! clusterDistance called with null argument(s)"); try{ printIntClusters(); } catch(Exception e){} } int i1 = ((Integer) cluster1.get(0)).intValue(); int i2 = ((Integer) cluster2.get(0)).intValue(); return m_distanceMatrix[i1][i2]; } protected void checkClusters() { } /** Internal method to merge two clusters and update distances */ protected Cluster mergeClusters (int cluster1Idx, int cluster2Idx) throws Exception { Cluster newCluster = new Cluster(m_clusterID++); Cluster cluster1 = (Cluster) m_clusters.get(cluster1Idx); Cluster cluster2 = (Cluster) m_clusters.get(cluster2Idx); int cluster1FirstIdx =((Integer) cluster1.get(0)).intValue(); int cluster2FirstIdx =((Integer) cluster2.get(0)).intValue(); newCluster.copyElements(cluster1); newCluster.copyElements(cluster2); checkClusters(); // Update the distance matrix depending on the linkage type switch (m_linkingType) { case SINGLE_LINK: // go through all clusters and update the distance from first element // to the first element of the new cluster for (int i = 0; i < m_clusters.size(); i++){ if (i != cluster1Idx && i != cluster2Idx) { // skip these clusters themselves Cluster currentCluster = (Cluster) m_clusters.get(i); int currClusterFirstIdx = ((Integer) currentCluster.get(0)).intValue(); if (m_distanceMatrix[cluster1FirstIdx][currClusterFirstIdx] < m_distanceMatrix[cluster2FirstIdx][currClusterFirstIdx]) { // first cluster is closer, no need to update } else { // second cluster is closer, must update distance between the first representative m_distanceMatrix[cluster1FirstIdx][currClusterFirstIdx] = m_distanceMatrix[currClusterFirstIdx][cluster1FirstIdx] = m_distanceMatrix[cluster2FirstIdx][currClusterFirstIdx]; } // check for infinity links if (m_distanceMatrix[cluster2FirstIdx][currClusterFirstIdx] == Double.POSITIVE_INFINITY) { m_distanceMatrix[cluster1FirstIdx][currClusterFirstIdx] = m_distanceMatrix[currClusterFirstIdx][cluster1FirstIdx] = Double.POSITIVE_INFINITY; } if (m_distanceMatrix[cluster1FirstIdx][currClusterFirstIdx] == Double.POSITIVE_INFINITY) { m_distanceMatrix[cluster2FirstIdx][currClusterFirstIdx] = m_distanceMatrix[currClusterFirstIdx][cluster2FirstIdx] = Double.POSITIVE_INFINITY; } } } break; case COMPLETE_LINK: // go through all clusters and update the distance from first element // to the first element of the new cluster for (int i = 0; i < m_clusters.size(); i++){ if (i != cluster1Idx && i != cluster2Idx) { // skip these clusters themselves Cluster currentCluster = (Cluster) m_clusters.get(i); int currClusterFirstIdx = ((Integer) currentCluster.get(0)).intValue(); if (m_distanceMatrix[cluster1FirstIdx][currClusterFirstIdx] > m_distanceMatrix[cluster2FirstIdx][currClusterFirstIdx]) { // first cluster is closer, no need to update } else { // second cluster is closer, must update distance between the first representative m_distanceMatrix[cluster1FirstIdx][currClusterFirstIdx] = m_distanceMatrix[currClusterFirstIdx][cluster1FirstIdx] = m_distanceMatrix[cluster2FirstIdx][currClusterFirstIdx]; } } } break; case GROUP_AVERAGE: // go through all clusters and update the distance from first element // to the first element of the new cluster for (int i = 0; i < m_clusters.size(); i++){ if (i != cluster1Idx && i != cluster2Idx) { // skip these clusters themselves Cluster currentCluster = (Cluster) m_clusters.get(i); int currClusterFirstIdx = ((Integer) currentCluster.get(0)).intValue(); int cluster1Size = cluster1.size(); int cluster2Size = cluster2.size(); // must update distance between the first representative m_distanceMatrix[cluster1FirstIdx][currClusterFirstIdx] = m_distanceMatrix[currClusterFirstIdx][cluster1FirstIdx] = (m_distanceMatrix[cluster1FirstIdx][currClusterFirstIdx] * cluster1Size + m_distanceMatrix[cluster2FirstIdx][currClusterFirstIdx] * cluster2Size) / (cluster1Size + cluster2Size); } } } String labelString = ""; for (int i = 0; i < newCluster.size(); i++){ Instance inst = m_instances.instance(((Integer) (newCluster.get(i))).intValue()); labelString = labelString + printInstance(inst) + "\\n"; } m_dotWriter.println("node" + newCluster.clusterID + "[label = \"" + labelString + "\"]"); m_dotWriter.println("node" + newCluster.clusterID + "->node" + cluster1.clusterID); m_dotWriter.println("node" + newCluster.clusterID + "->node" + cluster2.clusterID); return newCluster; } /** Print an instance for the dot file */ String printInstance(Instance instance) { String stringToPrint; int[] ascendingSortIndicesOfAttributes = Utils.sort(instance.toDoubleArray()); if (m_descrInstances == null) { stringToPrint = instance.toString(); } else { int idx = ((Integer) m_reverseInstancesHash.get(instance)).intValue(); stringToPrint = (m_descrInstances.instance(idx)).toString() + ": "; } DecimalFormat fmt = new DecimalFormat("0.000"); for (int i = 0; i < 5; i++) { Attribute attrib = m_instances.attribute(ascendingSortIndicesOfAttributes[m_instances.numAttributes()-i-1]); if (instance.value(attrib) > 0) { stringToPrint = stringToPrint + attrib.name() + ": " + fmt.format(instance.value(attrib)) + "\t"; } } return stringToPrint; } /** Update the clusterAssignments for all points in two clusters that are about to be merged */ protected void initClusterAssignments() { m_clusterAssignments = new int[m_instances.numInstances()]; for (int i = 0; i < m_clusters.size(); i++) { Cluster cluster = (Cluster) m_clusters.get(i); for (int j = 0; j < cluster.size(); j++) { Integer idx = (Integer) cluster.get(j); // System.out.println("Instance number: " + idx + " has cluster id: " + i); m_clusterAssignments[idx.intValue()] = i; } } } /** Outputs the current clustering * * @exception Exception if something goes wrong */ public void printClusters() throws Exception { if (m_clusters == null) throw new Exception ("Clusters were not created"); for (int i = 0; i < m_clusters.size(); i++) { System.out.println ("Cluster " + i); printCluster(i); } } /** Outputs the specified cluster * * @exception Exception if something goes wrong */ public void printCluster(int i) throws Exception { if (m_clusters == null) throw new Exception ("Clusters were not created"); Cluster cluster = (Cluster) m_clusters.get(i); for (int j = 0; j < cluster.size(); j++) { // Instance instance = (Instance) m_instancesHash.get((Integer) cluster.elementAt(j)); Object o = cluster.get(j); Instance instance = (o instanceof Instance) ? (Instance)o : m_instances.instance(((Integer)o).intValue()); if (m_descrInstances == null) { System.out.print("\t" + instance); } else { System.out.print("\t"); System.out.println(printInstance(instance)); } } } /** Outputs the current clustering * * @exception Exception if something goes wrong */ public void printIntClusters() throws Exception { if (m_clusters == null) throw new Exception ("Clusters were not created"); for (int i = 0; i < m_clusters.size(); i++) { Cluster cluster = (Cluster) m_clusters.get(i); System.out.println ("Cluster " + i + " consists of " + cluster.size() + " elements"); for (int j = 0; j < cluster.size(); j++) { // Instance instance = (Instance) m_instancesHash.get((Integer) cluster.elementAt(j)); Integer idx = (Integer) cluster.get(j); Instance instance = (Instance) m_instancesHash.get(idx); System.out.println("\t\t" + instance); } } } /** * Clusters an instance. * * @param instance the instance to cluster. * @exception Exception if something goes wrong. */ public int clusterInstance(Instance instance) throws Exception { double bestDistance = Double.MAX_VALUE; int instanceIdx = 0; // if (m_reverseInstancesHash.containsKey(instance)) { // instanceIdx = ((Integer) m_reverseInstancesHash.get(instance)).intValue(); // System.out.println("Located index in m_reverseInstancesHash"); // return m_clusterAssignments[instanceIdx]; // } else { double [] values = instance.toDoubleArray(); double checksum = 0; for (int i = 0; i < values.length; i++) { checksum += m_checksumPerturb[i] * values[i]; } Double checksumIdx = new Double(checksum); if (m_checksumHash.containsKey(checksumIdx)) { Object obj = m_checksumHash.get(checksumIdx); if (obj instanceof Integer) { int idx = ((Integer) obj).intValue(); return m_clusterAssignments[idx]; } else { // instanceof Arraylist ArrayList chain = (ArrayList) obj; for (int i = 0; i < chain.size(); i++) { Integer idx = (Integer) chain.get(i); Instance clusteredInstance = (Instance) m_instancesHash.get(idx); if (matchInstance(instance, clusteredInstance)) { return m_clusterAssignments[idx.intValue()]; } } throw new Exception("UNKNOWN INSTANCE!!!!"); } } else { // unknown checksum throw new Exception("UNKNOWN CHECKSUM!!!!"); // ArrayList candidateClusterList = new ArrayList();// for (int i = 0; i < m_numClusters; i++) {// Cluster thisCluster = (Cluster) m_clusters.get(i);// double thisDistance = distance (instance, thisCluster);// if (thisDistance < bestDistance) {// candidateClusterList.clear();// candidateClusterList.add (new Integer(i));// bestDistance = thisDistance;// } else if (thisDistance == bestDistance) {// candidateClusterList.add (new Integer(i));// }// }// // randomly pick a candidate// int i = (int) (candidateClusterList.size() * Math.random());// int clusterIdx = ((Integer) candidateClusterList.get(i)).intValue();// if (clusterIdx != m_clusterAssignments[instanceIdx]) {// System.out.println("Mismatch: idx=" + clusterIdx + " assigned=" + m_clusterAssignments[instanceIdx]);// }// return clusterIdx; } } /** Internal method: check if two instances match on their attribute values */ protected boolean matchInstance(Instance instance1, Instance instance2) { double [] values1 = instance1.toDoubleArray(); double [] values2 = instance2.toDoubleArray(); for (int i = 0; i < values1.length; i++) { if (values1[i] != values2[i]) { return false; } } return true; } /** * internal method that returns the distance between an instance and a cluster */ protected double distance (Instance instance, Cluster cluster) throws Exception { Integer idx; double distance = 0; switch (m_linkingType) { case SINGLE_LINK: double minDistance = Double.MAX_VALUE; for (int i = 0; i < cluster.size(); i++) { Instance clusterInstance = (Instance) cluster.get(i); double currDistance = m_metric.distance(instance, clusterInstance); if (currDistance < minDistance) minDistance = currDistance; } distance = minDistance; break; case COMPLETE_LINK: double maxDistance = Double.MIN_VALUE; for (int i = 0; i < cluster.size(); i++) { Instance clusterInstance = (Instance) cluster.get(i); double currDistance = m_metric.distance(instance, clusterInstance); if (currDistance > maxDistance) maxDistance = currDistance; } distance = maxDistance; break; case GROUP_AVERAGE: double avgDistance = 0; for (int i = 0; i < cluster.size(); i++) { Instance clusterInstance = (Instance) cluster.get(i); avgDistance += m_metric.distance(instance, clusterInstance); } distance = avgDistance/cluster.size(); break;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -