⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hac.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
   */      public ArrayList getIntClusters() throws Exception {    m_clusters = new ArrayList();    Cluster [] clusterArray = new Cluster[m_numCurrentClusters];    if (m_verbose) {      System.out.println("Cluster assignments: ");      for (int i=0; i < m_clusterAssignments.length; i++) {	System.out.print(i + ":" + m_clusterAssignments[i] + "  ");      }    }    for (int i=0; i < m_instances.numInstances(); i++) {	Instance inst = m_instances.instance(i);	if(clusterArray[m_clusterAssignments[i]] == null)	   clusterArray[m_clusterAssignments[i]] = new Cluster(m_clusterID++);	clusterArray[m_clusterAssignments[i]].add(new Integer(i), 1);	//	System.out.println("Adding: " + i + " to cluster: " + m_clusterAssignments[i]);     }    for (int j =0; j< m_numCurrentClusters; j++) {      if (clusterArray[j] == null) {	System.out.println("Empty cluster: " + j);	//	printIntClusters();	setVerbose(true);	m_numCurrentClusters--;	m_numClusters--;      } else {	m_clusters.add(clusterArray[j]);	String labelString = "";	for (int i = 0; i < clusterArray[j].size(); i++){    	  Instance inst = m_instances.instance(((Integer) (clusterArray[j].get(i))).intValue());	  labelString = labelString + printInstance(inst) + "\\n";	}	m_dotWriter.println("node" + clusterArray[j].clusterID + "[label = \"" + labelString + "\"]");      }    }    //    printIntClusters();        return m_clusters;  }  /**   * Computes the final clusters from the cluster assignments, for external access   *    * @exception Exception if clusters could not be computed successfully   */      public ArrayList getClusters() throws Exception {    ArrayList finalClusters = new ArrayList();    Cluster [] clusterArray = new Cluster[m_numClusters];    for (int i=0; i < m_instances.numInstances(); i++) {	Instance inst = m_instances.instance(i);	if(clusterArray[m_clusterAssignments[i]] == null)	   clusterArray[m_clusterAssignments[i]] = new Cluster();	clusterArray[m_clusterAssignments[i]].add(inst, 1);    }    for (int j =0; j< m_numClusters; j++)       finalClusters.add(clusterArray[j]);    return finalClusters;  }  /**   * internal method that returns the distance between two clusters   */  protected double clusterDistance(Cluster cluster1, Cluster cluster2) {    if (cluster2 == null || cluster1 == null) {      System.err.println("PANIC!  clusterDistance called with null argument(s)");      try{	printIntClusters();      } catch(Exception e){}    }	    int i1 = ((Integer) cluster1.get(0)).intValue();    int i2 = ((Integer) cluster2.get(0)).intValue();    return m_distanceMatrix[i1][i2];  }  protected void checkClusters() {  }    /** Internal method to merge two clusters and update distances   */  protected Cluster mergeClusters (int cluster1Idx, int cluster2Idx) throws Exception {    Cluster newCluster = new Cluster(m_clusterID++);    Cluster cluster1 = (Cluster) m_clusters.get(cluster1Idx);    Cluster cluster2 = (Cluster) m_clusters.get(cluster2Idx);    int cluster1FirstIdx =((Integer) cluster1.get(0)).intValue();    int cluster2FirstIdx =((Integer) cluster2.get(0)).intValue();     newCluster.copyElements(cluster1);    newCluster.copyElements(cluster2);    checkClusters();        // Update the distance matrix depending on the linkage type    switch (m_linkingType) {    case SINGLE_LINK:      // go through all clusters and update the distance from first element      // to the first element of the new cluster      for (int i = 0; i < m_clusters.size(); i++){	if (i != cluster1Idx && i != cluster2Idx) { // skip these clusters themselves	  Cluster currentCluster = (Cluster) m_clusters.get(i);	  int currClusterFirstIdx = ((Integer) currentCluster.get(0)).intValue();	  if (m_distanceMatrix[cluster1FirstIdx][currClusterFirstIdx] <	      m_distanceMatrix[cluster2FirstIdx][currClusterFirstIdx]) {	    // first cluster is closer, no need to update	  } else {	    // second cluster is closer, must update distance between the first representative	    m_distanceMatrix[cluster1FirstIdx][currClusterFirstIdx] =	      m_distanceMatrix[currClusterFirstIdx][cluster1FirstIdx] =	      m_distanceMatrix[cluster2FirstIdx][currClusterFirstIdx];	  }	  // check for infinity links	  if (m_distanceMatrix[cluster2FirstIdx][currClusterFirstIdx] == Double.POSITIVE_INFINITY) {	    m_distanceMatrix[cluster1FirstIdx][currClusterFirstIdx] =	      m_distanceMatrix[currClusterFirstIdx][cluster1FirstIdx] = Double.POSITIVE_INFINITY;	  }	  if (m_distanceMatrix[cluster1FirstIdx][currClusterFirstIdx] == Double.POSITIVE_INFINITY) {	    m_distanceMatrix[cluster2FirstIdx][currClusterFirstIdx] =	      m_distanceMatrix[currClusterFirstIdx][cluster2FirstIdx] = Double.POSITIVE_INFINITY;	  }	}      }      break;    case COMPLETE_LINK:      // go through all clusters and update the distance from first element      // to the first element of the new cluster      for (int i = 0; i < m_clusters.size(); i++){	if (i != cluster1Idx && i != cluster2Idx) { // skip these clusters themselves	  Cluster currentCluster = (Cluster) m_clusters.get(i);	  int currClusterFirstIdx = ((Integer) currentCluster.get(0)).intValue();	  if (m_distanceMatrix[cluster1FirstIdx][currClusterFirstIdx] >	      m_distanceMatrix[cluster2FirstIdx][currClusterFirstIdx]) {	    // first cluster is closer, no need to update	  } else {	    // second cluster is closer, must update distance between the first representative	    m_distanceMatrix[cluster1FirstIdx][currClusterFirstIdx] =	      m_distanceMatrix[currClusterFirstIdx][cluster1FirstIdx] =	      m_distanceMatrix[cluster2FirstIdx][currClusterFirstIdx];	  }	}      }      break;    case GROUP_AVERAGE:      // go through all clusters and update the distance from first element      // to the first element of the new cluster      for (int i = 0; i < m_clusters.size(); i++){	if (i != cluster1Idx && i != cluster2Idx) { // skip these clusters themselves	  Cluster currentCluster = (Cluster) m_clusters.get(i);	  int currClusterFirstIdx = ((Integer) currentCluster.get(0)).intValue();	  int cluster1Size = cluster1.size();	  int cluster2Size = cluster2.size();	  // must update distance between the first representative	    m_distanceMatrix[cluster1FirstIdx][currClusterFirstIdx] =	      m_distanceMatrix[currClusterFirstIdx][cluster1FirstIdx] =	      (m_distanceMatrix[cluster1FirstIdx][currClusterFirstIdx] * cluster1Size +	       m_distanceMatrix[cluster2FirstIdx][currClusterFirstIdx] * cluster2Size) /	      (cluster1Size + cluster2Size);	}      }    }     String labelString = "";    for (int i = 0; i < newCluster.size(); i++){          Instance inst = m_instances.instance(((Integer) (newCluster.get(i))).intValue());      labelString = labelString + printInstance(inst) + "\\n";    }    m_dotWriter.println("node" + newCluster.clusterID + "[label = \"" + labelString + "\"]");    m_dotWriter.println("node" + newCluster.clusterID + "->node" + cluster1.clusterID);    m_dotWriter.println("node" + newCluster.clusterID + "->node" + cluster2.clusterID);    return newCluster;  }  /** Print an instance for the dot file */  String printInstance(Instance instance) {    String stringToPrint;    int[] ascendingSortIndicesOfAttributes = Utils.sort(instance.toDoubleArray());    if (m_descrInstances == null) {       stringToPrint = instance.toString();    } else {      int idx = ((Integer) m_reverseInstancesHash.get(instance)).intValue();      stringToPrint = (m_descrInstances.instance(idx)).toString() + ":  ";    }    DecimalFormat fmt = new DecimalFormat("0.000");    for (int i = 0; i < 5; i++) {       Attribute attrib = m_instances.attribute(ascendingSortIndicesOfAttributes[m_instances.numAttributes()-i-1]);      if (instance.value(attrib) > 0) { 	stringToPrint = stringToPrint + attrib.name() + ": " + fmt.format(instance.value(attrib)) + "\t";      }    }    return stringToPrint;  }  /** Update the clusterAssignments for all points in two clusters that are about to be merged   */  protected void initClusterAssignments() {    m_clusterAssignments = new int[m_instances.numInstances()];        for (int i = 0; i < m_clusters.size(); i++) {      Cluster cluster = (Cluster) m_clusters.get(i);      for (int j = 0; j < cluster.size(); j++) {	Integer idx = (Integer) cluster.get(j);	//	System.out.println("Instance number: " + idx + " has cluster id: " + i);	m_clusterAssignments[idx.intValue()] = i;      }    }  }      /** Outputs the current clustering   *   * @exception Exception if something goes wrong   */  public void printClusters() throws Exception {    if (m_clusters == null)      throw new Exception ("Clusters were not created");    for (int i = 0; i < m_clusters.size(); i++) {      System.out.println ("Cluster " + i);      printCluster(i);    }  }  /** Outputs the specified cluster   *   * @exception Exception if something goes wrong   */  public void printCluster(int i) throws Exception {    if (m_clusters == null)      throw new Exception ("Clusters were not created");    Cluster cluster = (Cluster) m_clusters.get(i);    for (int j = 0; j < cluster.size(); j++) {      //		Instance instance = (Instance) m_instancesHash.get((Integer) cluster.elementAt(j));      Object o = cluster.get(j);      Instance instance = (o instanceof Instance) ? (Instance)o : m_instances.instance(((Integer)o).intValue());      if (m_descrInstances == null) { 	System.out.print("\t" + instance);      } else {	System.out.print("\t");	System.out.println(printInstance(instance));      }     }  }  /** Outputs the current clustering   *   * @exception Exception if something goes wrong   */  public void printIntClusters() throws Exception {    if (m_clusters == null)      throw new Exception ("Clusters were not created");    for (int i = 0; i < m_clusters.size(); i++) {      Cluster cluster = (Cluster) m_clusters.get(i);      System.out.println ("Cluster " + i + " consists of " + cluster.size() + " elements");      for (int j = 0; j < cluster.size(); j++) {	//		Instance instance = (Instance) m_instancesHash.get((Integer) cluster.elementAt(j));	Integer idx = (Integer) cluster.get(j);	Instance instance = (Instance) m_instancesHash.get(idx);	System.out.println("\t\t" + instance);      }    }  }  /**   * Clusters an instance.   *   * @param instance the instance to cluster.   * @exception Exception if something goes wrong.   */  public int clusterInstance(Instance instance) throws Exception {    double bestDistance = Double.MAX_VALUE;    int instanceIdx = 0;    //  if (m_reverseInstancesHash.containsKey(instance)) {    //        instanceIdx = ((Integer) m_reverseInstancesHash.get(instance)).intValue();    //        System.out.println("Located index in m_reverseInstancesHash");    //        return m_clusterAssignments[instanceIdx];    //      } else {        double [] values = instance.toDoubleArray();    double checksum = 0;     for (int i = 0; i < values.length; i++) {      checksum += m_checksumPerturb[i] * values[i];    }    Double checksumIdx = new Double(checksum);    if (m_checksumHash.containsKey(checksumIdx)) {      Object obj = m_checksumHash.get(checksumIdx);      if (obj instanceof Integer) {	int idx = ((Integer) obj).intValue();	return m_clusterAssignments[idx];      } else { // instanceof Arraylist	ArrayList chain = (ArrayList) obj;	for (int i = 0; i < chain.size(); i++) {	  Integer idx = (Integer) chain.get(i);	  Instance clusteredInstance = (Instance) m_instancesHash.get(idx);	  if (matchInstance(instance, clusteredInstance)) {	    return m_clusterAssignments[idx.intValue()];	  }	}	throw new Exception("UNKNOWN INSTANCE!!!!");      }	    } else {   // unknown checksum	throw new Exception("UNKNOWN CHECKSUM!!!!");           //   ArrayList candidateClusterList = new ArrayList();//        for (int i = 0; i < m_numClusters; i++) {//  	Cluster thisCluster = (Cluster) m_clusters.get(i);//  	double thisDistance = distance (instance, thisCluster);//  	if (thisDistance < bestDistance) {//  	  candidateClusterList.clear();//  	  candidateClusterList.add (new Integer(i));//  	  bestDistance = thisDistance;//  	} else if (thisDistance == bestDistance) {//  	  candidateClusterList.add (new Integer(i));//  	}//        }//        // randomly pick a candidate//        int i = (int) (candidateClusterList.size() * Math.random());//        int clusterIdx = ((Integer) candidateClusterList.get(i)).intValue();//        if (clusterIdx != m_clusterAssignments[instanceIdx]) {//  	System.out.println("Mismatch: idx=" + clusterIdx + " assigned=" + m_clusterAssignments[instanceIdx]);//        }//        return clusterIdx;    }  }    /** Internal method:  check if two instances match on their attribute values   */  protected boolean matchInstance(Instance instance1, Instance instance2) {    double [] values1 = instance1.toDoubleArray();    double [] values2 = instance2.toDoubleArray();    for (int i = 0; i < values1.length; i++) {      if (values1[i] != values2[i]) {	return false;      }    }    return true;  }     /**   * internal method that returns the distance between an instance and a cluster   */  protected double distance (Instance instance, Cluster cluster) throws Exception {    Integer idx;    double distance = 0;        switch (m_linkingType) {    case SINGLE_LINK:      double minDistance = Double.MAX_VALUE;      for (int i = 0; i < cluster.size(); i++) {	Instance clusterInstance = (Instance) cluster.get(i);	double currDistance = m_metric.distance(instance, clusterInstance);	if (currDistance < minDistance)	  minDistance = currDistance;      }      distance = minDistance;      break;    case COMPLETE_LINK:      double maxDistance = Double.MIN_VALUE;      for (int i = 0; i < cluster.size(); i++) {	Instance clusterInstance = (Instance) cluster.get(i);	double currDistance = m_metric.distance(instance, clusterInstance);	if (currDistance > maxDistance)	  maxDistance = currDistance;      }      distance = maxDistance;      break;    case GROUP_AVERAGE:      double avgDistance = 0;      for (int i = 0; i < cluster.size(); i++) {	Instance clusterInstance = (Instance) cluster.get(i);	avgDistance += m_metric.distance(instance, clusterInstance);      }      distance = avgDistance/cluster.size();      break;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -