⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pckmeans.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
  }  /** Updates the clusterAssignments for all points after clustering.   *  Map assignments from [0,numInstances-1] to [0,numClusters-1]   *  i.e. from [0 2 2 0 6 6 2] -> [0 1 1 0 2 2 0]    *  **** NOTE: THIS FUNCTION IS NO LONGER USED!!! ****   */  protected void updateClusterAssignments() throws Exception {    //  **** DEPRECATED: THIS FUNCTION IS NO LONGER USED!!! ****    int numInstances = m_Instances.numInstances();    HashMap clusterNumberHash = new HashMap((int) (m_NumClusters/0.75+10));    int clusterNumber = 0;    if (m_verbose) {      System.out.println("Mapping cluster assignments. Initial cluster assignments:");      for (int i=0; i<numInstances; i++) {	System.out.print(m_ClusterAssignments[i] + " ");      }      System.out.println();    }        for (int i=0; i<numInstances; i++) {      if (m_ClusterAssignments[i]!=-1) {	Integer clusterNum = new Integer(m_ClusterAssignments[i]);	if (!clusterNumberHash.containsKey(clusterNum)) {	  clusterNumberHash.put(clusterNum, new Integer(clusterNumber));	  clusterNumber++;	}      }    }    if (clusterNumber != m_NumClusters) {      throw new Exception("Number of clusters do not match");    }    for (int i=0; i<numInstances; i++) {      if (m_ClusterAssignments[i]!=-1) {	int newCluster = ((Integer) clusterNumberHash.get(new Integer(m_ClusterAssignments[i]))).intValue();	m_ClusterAssignments[i] = newCluster;      }    }    if (m_verbose) {      System.out.println("Done updating cluster assignments. New cluster assignments:");      for (int i=0; i<numInstances; i++) {	System.out.print(m_ClusterAssignments[i] + " ");      }      System.out.println();    }    clusterNumberHash.clear(); clusterNumberHash = null; //free memory  }  /** Outputs the current clustering   *   * @exception Exception if something goes wrong   */  public void printIndexClusters() throws Exception {    if (m_IndexClusters == null)      throw new Exception ("Clusters were not created");    for (int i = 0; i < m_NumClusters; i++) {      HashSet cluster = m_IndexClusters[i];      if (cluster == null) {	System.out.println("Cluster " + i + " is null");      }      else {	System.out.println ("Cluster " + i + " consists of " + cluster.size() + " elements");	Iterator iter = cluster.iterator();	while(iter.hasNext()) {	  int idx = ((Integer) iter.next()).intValue();	  System.out.println("\t\t" + idx);	}      }    }  }  /** E-step of the KMeans clustering algorithm -- find best cluster assignments   */  protected void findBestAssignments() throws Exception{    int moved = 0;    int numInstances = m_Instances.numInstances();    int [] indices = new int[numInstances];    for (int i=0; i<numInstances; i++) {      indices[i] = i; // initialize    }        if (m_InstanceOrdering == ORDERING_DEFAULT) {      for (int i=0; i<numInstances; i++) {	try {	  // Update number of points moved	  moved += assignInstanceToClusterWithConstraints(i);	}	catch (Exception e) {	  System.out.println("Could not find distance. Exception: " + e);	  e.printStackTrace();	}      }      if (m_MovePointsTillAssignmentStabilizes) {	int newMoved = -1;	for (int t=0; t<100 && newMoved != 0; t++) { // move points till assignment stabilizes	  newMoved = 0;	  for (int i=0; i<numInstances; i++) {	    newMoved += assignInstanceToClusterWithConstraints(i);	  }	  if (newMoved > 0) {	    System.out.println(newMoved + " points moved on changing order in t=" + t);	  } 	}      }    } else if (m_InstanceOrdering == ORDERING_RANDOM) { // randomize instance ordering      m_RandomNumberGenerator = new Random(m_RandomSeed); // initialize random number generator again      for (int i = numInstances - 1; i > 0; i--) { 	int indexToSwap = m_RandomNumberGenerator.nextInt(i+1);	int temp = indices[i]; // swap	indices[i] = indices[indexToSwap];	indices[indexToSwap] = temp;      }      for (int i=0; i<numInstances; i++) {	try {	  // Update number of points moved	  moved += assignInstanceToClusterWithConstraints(indices[i]);	}	catch (Exception e) {	  System.out.println("Could not find distance. Exception: " + e);	  e.printStackTrace();	}      }      if (m_MovePointsTillAssignmentStabilizes) {	int newMoved = -1;	for (int t=0; t<100 && newMoved != 0; t++) { // move points till assignment stabilizes	  newMoved = 0;	  for (int i=0; i<numInstances; i++) {	    newMoved += assignInstanceToClusterWithConstraints(indices[i]);	  }	  if (newMoved > 0) {	    System.out.println(newMoved + " points moved on changing order in t=" + t);	  } 	}      }    } else if (m_InstanceOrdering == ORDERING_SORTED) {            int [] sortedIndices = null;      double bestSquareDistance = Integer.MAX_VALUE;      double bestSimilarity = Integer.MIN_VALUE;      double [] distances = new double[numInstances];      // find closest cluster centroid for each instance      for (int i = 0; i < numInstances; i++) {	for (int j = 0; j < m_NumClusters; j++) {	  double squareDistance = 0, similarity = 0;	  if (!m_objFunDecreasing) {	    similarity = similarityInPottsModel(i,j);	    if (similarity > bestSimilarity) {	      bestSimilarity = similarity;	      distances[i] = -similarity; // hacked distance conversion for sorting	    }	  } else {	    squareDistance = squareDistanceInPottsModel(i,j);	    if (squareDistance < bestSquareDistance) {	      bestSquareDistance = squareDistance;	      distances[i] = squareDistance;	    }	  }	}      }      sortedIndices = Utils.sort(distances); // sort in ascending order            for (int i=0; i<numInstances; i++) {	try {	  // Update number of points moved	  moved += assignInstanceToClusterWithConstraints(sortedIndices[i]);	}	catch (Exception e) {	  System.out.println("Could not find distance. Exception: " + e);	  e.printStackTrace();	}      }      if (m_MovePointsTillAssignmentStabilizes) {	int newMoved = -1;	for (int t=0; t<100 && newMoved != 0; t++) { // move points till assignment stabilizes	  newMoved = 0;	  for (int i=0; i<numInstances; i++) {	    newMoved += assignInstanceToClusterWithConstraints(sortedIndices[i]);	  }	  if (newMoved > 0) {	    System.out.println(newMoved + " points moved on changing order in t=" + t);	  } 	}      }    } else {      throw new Exception ("Unknown instance ordering!!");    }        System.out.println("\t" + moved + " points moved in this E-step");  }  /**   * Classifies the instance using the current clustering considering   * constraints, updates cluster assignments   *   * @param instance the instance to be assigned to a cluster   * @return 1 if the point is moved, 0 otherwise   * @exception Exception if instance could not be classified   * successfully */  public int assignInstanceToClusterWithConstraints(int instIdx) throws Exception {    int bestCluster = 0;    double bestSquareDistance = Integer.MAX_VALUE;    double bestSimilarity = Integer.MIN_VALUE;    int moved = 0;        for (int i = 0; i < m_NumClusters; i++) {      double squareDistance = 0, similarity = 0;      if (!m_objFunDecreasing) {	similarity = similarityInPottsModel(instIdx, i);	//	System.out.println("Sim between instance " + instIdx + " and cluster " + i + " = " + similarity);	if (similarity > bestSimilarity) {	  bestSimilarity = similarity;	  bestCluster = i;	}      } else {	squareDistance = squareDistanceInPottsModel(instIdx, i);	if (squareDistance < bestSquareDistance) {	  bestSquareDistance = squareDistance;	  bestCluster = i;	}      }    }    if (m_ClusterAssignments[instIdx] != bestCluster) {      if (m_verbose) {	System.out.println("Moving instance " + instIdx + " from cluster " + m_ClusterAssignments[instIdx] + " to cluster " + bestCluster);      }      moved = 1;//        // remove instIdx from old cluster//        if (m_ClusterAssignments[instIdx] < m_NumClusters && m_ClusterAssignments[instIdx] != -1 && m_IndexClusters[m_ClusterAssignments[instIdx]] != null ) {//  	m_IndexClusters[m_ClusterAssignments[instIdx]].remove(new Integer(instIdx));//        }//        // add instIdx to new cluster//        if (m_IndexClusters[bestCluster] == null) {//  	m_IndexClusters[bestCluster] = new HashSet();//        }//        m_IndexClusters[bestCluster].add(new Integer (instIdx));      // updates cluster Assignments      m_ClusterAssignments[instIdx] = bestCluster;     }    if (m_verbose) {      System.out.println("Assigning instance " + instIdx + " to cluster " + bestCluster);    }    return moved;  }  /** finds similarity between instance and centroid in Potts Model   */  double similarityInPottsModel(int instIdx, int centroidIdx) throws Exception{    double sim = m_metric.similarity(m_Instances.instance(instIdx), m_ClusterCentroids.instance(centroidIdx));    Object list =  m_instanceConstraintHash.get(new Integer(instIdx));    if (list != null) {   // there are constraints associated with this instance      ArrayList constraintList = (ArrayList) list;      for (int i = 0; i < constraintList.size(); i++) {	InstancePair pair = (InstancePair) constraintList.get(i);	int firstIdx = pair.first;	int secondIdx = pair.second;	Instance instance1 = m_Instances.instance(firstIdx);	Instance instance2 = m_Instances.instance(secondIdx);	int otherIdx = (firstIdx == instIdx) ? m_ClusterAssignments[secondIdx] : m_ClusterAssignments[firstIdx];		// check whether the constraint is violated	if (otherIdx != -1) { 	  if (otherIdx != centroidIdx && pair.linkType == InstancePair.MUST_LINK) { 	    sim -= m_MustLinkWeight;	  } else if (otherIdx == centroidIdx && pair.linkType == InstancePair.CANNOT_LINK) { 	    sim -= m_CannotLinkWeight;	  }	}      }    }    if(m_verbose) {      System.out.println("Final similarity between instance " + instIdx + " and centroid " + centroidIdx + " is: " + sim);    }    return sim;  }  /** finds squaredistance between instance and centroid in Potts Model   */  double squareDistanceInPottsModel(int instIdx, int centroidIdx) throws Exception{    double dist = m_metric.distance(m_Instances.instance(instIdx), m_ClusterCentroids.instance(centroidIdx));    dist *= dist; // doing the squaring here itself    if(m_verbose) {      System.out.println("Unconstrained distance between instance " + instIdx + " and centroid " + centroidIdx + " is: " + dist);    }    Object list =  m_instanceConstraintHash.get(new Integer(instIdx));    if (list != null) {   // there are constraints associated with this instance      ArrayList constraintList = (ArrayList) list;      for (int i = 0; i < constraintList.size(); i++) {	InstancePair pair = (InstancePair) constraintList.get(i);	int firstIdx = pair.first;	int secondIdx = pair.second;	Instance instance1 = m_Instances.instance(firstIdx);	Instance instance2 = m_Instances.instance(secondIdx);	int otherIdx = (firstIdx == instIdx) ? m_ClusterAssignments[secondIdx] : m_ClusterAssignments[firstIdx];		// check whether the constraint is violated	if (otherIdx != -1) { 	  if (otherIdx != centroidIdx && pair.linkType == InstancePair.MUST_LINK) { 	    dist += m_MustLinkWeight;	  } else if (otherIdx == centroidIdx && pair.linkType == InstancePair.CANNOT_LINK) { 	    dist += m_CannotLinkWeight;	  }	}      }    }    if(m_verbose) {      System.out.println("Final distance between instance " + instIdx + " and centroid " + centroidIdx + " is: " + dist);    }    return dist;  }  /** M-step of the KMeans clustering algorithm -- updates cluster centroids   */  protected void updateClusterCentroids() throws Exception {    // M-step: update cluster centroids    Instances [] tempI = new Instances[m_NumClusters];    m_ClusterCentroids = new Instances(m_Instances, m_NumClusters);        for (int i = 0; i < m_NumClusters; i++) {      tempI[i] = new Instances(m_Instances, 0); // tempI[i] stores the cluster instances for cluster i    }    for (int i = 0; i < m_Instances.numInstances(); i++) {      tempI[m_ClusterAssignments[i]].add(m_Instances.instance(i));    }        // Calculates cluster centroids    for (int i = 0; i < m_NumClusters; i++) {      double [] values = new double[m_Instances.numAttributes()];      if (m_isSparseInstance) {	values = meanOrMode(tempI[i]); // uses fast meanOrMode      }      else {	for (int j = 0; j < m_Instances.numAttributes(); j++) {	  values[j] = tempI[i].meanOrMode(j); // uses usual meanOrMode	}      }            // cluster centroids are dense in SPKMeans      m_ClusterCentroids.add(new Instance(1.0, values));      if (m_Algorithm == ALGORITHM_SPHERICAL) {	try {	  normalize(m_ClusterCentroids.instance(i));	}	catch (Exception e) {	  e.printStackTrace();	}      }    }    for (int i = 0; i < m_NumClusters; i++) {      tempI[i] = null; // free memory for garbage collector to pick up    }  }    /** calculates objective function */  protected void calculateObjectiveFunction() throws Exception {    if (m_verbose) {      System.out.println("Calculating objective function ...");    }    m_Objective = 0;    double tempML = m_MustLinkWeight;    double tempCL = m_CannotLinkWeight;    m_MustLinkWeight = tempML/2;    m_CannotLinkWeight = tempCL/2; // adjust weights to take care of double counting of constraints     if (m_verbose) {      System.out.println("Must link weight: " + m_MustLinkWeight);      System.out.println("Cannot link weight: " + m_CannotLinkWeight);        }    for (int i=0; i<m_Instances.num

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -