📄 pckmeans.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
   */  public void buildClusterer(Instances data) throws Exception {    System.out.println("Must link weight: " + m_MustLinkWeight);    System.out.println("Cannot link weight: " + m_CannotLinkWeight);    m_RandomNumberGenerator = new Random(m_RandomSeed);    setInstances(data);    // Don't rebuild the metric if it was already trained    if (!m_metricBuilt) {      m_metric.buildMetric(data.numAttributes());    }    m_ClusterCentroids = new Instances(m_Instances, m_NumClusters);    m_ClusterAssignments = new int [m_Instances.numInstances()];    if (m_Instances.checkForNominalAttributes() && m_Instances.checkForStringAttributes()) {      throw new UnsupportedAttributeTypeException("Cannot handle nominal attributes\n");    }    System.out.println("Initializing clustering ... ");        if (m_Active) {      bestPairsForActiveLearning(m_NumActive);    }    else {      nonActivePairwiseInit();    }    System.out.println("Done initializing clustering ...");        if (m_verbose) {      if (m_Seedable) {	System.out.println("Initial assignments of seed points:");	getIndexClusters();	printIndexClusters();      }      for (int i=0; i<m_NumClusters; i++) {	System.out.println("Centroid " + i + ": " + m_ClusterCentroids.instance(i));      }    }    runKMeans();  }  /**   * Reset all values that have been learned   */  public void resetClusterer()  throws Exception{    if (m_metric instanceof LearnableMetric) {      ((LearnableMetric)m_metric).resetMetric();    }    m_SeedHash = null;    m_ConstraintsHash = null;    m_instanceConstraintHash = null;  }  /** Set default perturbation value   * @param p perturbation fraction   */  public void setDefaultPerturb(double p) {    m_DefaultPerturb = p;  }  /** Get default perturbation value   * @return perturbation fraction   */  public double getDefaultPerturb(){    return m_DefaultPerturb;  }  /** Turn seeding on and off   * @param seedable should seeding be done?   */  public void setSeedable(boolean seedable) {    m_Seedable = seedable;  }  /** Is seeding performed?   * @return is seeding being done?   */  public boolean getSeedable() {      return m_Seedable;  }    /**   * We can have clusterers that don't utilize seeding   */  public boolean seedable() {    return m_Seedable;  }  /** Phase 1 code for active learning    */  protected int activePhaseOne(int numQueries) throws Exception {    int numInstances = m_Instances.numInstances();    int X, Y, Z;    int query, lambda, Label, CLcount;    boolean MLmode = true;        System.out.println("In Explore phase, with numqueries: " + numQueries);    // these are the main data-structures to be updated here    m_NeighborSets = new HashSet[m_NumClusters]; // set of points in each cluster neighborhood    m_SumOfClusterInstances = new Instance[m_NumClusters];    m_ClusterAssignments = new int[numInstances];    m_AssignedSet = new HashSet((int) (numQueries/0.75+10)); // whether a point has been assigned or not    for (int i=0; i<m_Instances.numInstances(); i++) {      m_ClusterAssignments[i] = -1;    }    query = 0; // current num queries    lambda = -1; // curent number of disjoint neighborhoods    X = 0; // current point under investigation        while( query < numQueries ){      if( m_NeighborSets[0] == null ){	// start the first neighborhood from the first point	lambda++;	if (m_verbose) 	  System.out.println("Setting cluster of " + X + " to " + lambda);	// update data structures	m_NeighborSets[lambda] = new HashSet();	m_NeighborSets[lambda].add(new Integer(X));	m_SumOfClusterInstances[lambda] = sumWithInstance(m_SumOfClusterInstances[lambda],m_Instances.instance(X));	m_ClusterAssignments[X] = lambda;	m_AssignedSet.add(new Integer(X));      }      else if( lambda == m_NumClusters-1 && !m_AllExplore) {	// NOTE: this condition is fired only if we are doing 2 phase (Explore + Consolidate)	System.out.println("Explore phase over after " + query + " queries");	m_NumCurrentClusters = lambda+1;	return query;      }      else {	Z = (int) farthestFromSet(m_AssignedSet, null);	CLcount = -1;	for( int h = 0; h <= lambda; h++ ){ 	  if (m_verbose) 	    System.out.println("Starting for loop CLcount: " + CLcount);	  Iterator NbrIt = null;	  if (m_NeighborSets[h] != null) {	    NbrIt = m_NeighborSets[h].iterator();	  }	  if( NbrIt != null && NbrIt.hasNext() ){	    X = ((Integer) NbrIt.next()).intValue();	    if (m_verbose) 	      System.out.println("Inside iterator next ... X: " + X);	    	    Label = askOracle(X,Z);	    query++;	    System.out.println("Making query: " + query);	    if( Label == InstancePair.CANNOT_LINK ){ // Cannot-link, update CLcount	      CLcount++;	    }	    else{  // Must-link, add to neighborset	      // update data structures	      	      m_NeighborSets[h].add(new Integer(Z));	      m_SumOfClusterInstances[h] = sumWithInstance(m_SumOfClusterInstances[h],m_Instances.instance(Z));	      m_ClusterAssignments[Z] = h;	      m_AssignedSet.add(new Integer(Z));	      break; // get out of for loop	    }	    if(query >= numQueries){	      if (m_verbose) 		System.out.println("Run out of queries");	      m_NumCurrentClusters = lambda+1;	      return query;	    }	  }	}	if (m_verbose) {	  System.out.println("Out of for loop");	}	if( CLcount == lambda ){ // found a point cannot-linked to all current clusters	  lambda++;	  // update data structures	  m_NeighborSets[lambda] = new HashSet();	  m_NeighborSets[lambda].add(new Integer(Z));	  m_SumOfClusterInstances[lambda] = sumWithInstance(m_SumOfClusterInstances[lambda],m_Instances.instance(Z));	  m_ClusterAssignments[Z] = lambda;	  m_AssignedSet.add(new Integer(Z));	}      } // close else    } // close while    if (m_verbose)       System.out.println("Number of queries: " + query);    m_NumCurrentClusters = lambda+1;    return query;  }  /** Phase 2 code for active learning, with round robin    */  protected void activePhaseTwoRoundRobin(int numQueries) throws Exception {    int numInstances = m_Instances.numInstances();    int X,Y;    int query = 0, Label;    System.out.println("In Consolidate phase, with numqueries: " + numQueries);    while( query < numQueries ){      if (m_verbose)	System.out.println("Starting round robin");            // starting round robin      Instance[] clusterCentroids = new Instance[m_NumClusters];            // find cluster with smallest size      int smallestSize = Integer.MAX_VALUE, smallestCluster = -1;      for (int i=0; i<m_NumClusters; i++) {	if (m_NeighborSets[i].size() < smallestSize) {	  smallestSize = m_NeighborSets[i].size();	  smallestCluster = i;	}      }            // compute centroid for smallest cluster      if (m_isSparseInstance) {	clusterCentroids[smallestCluster] = new SparseInstance(m_SumOfClusterInstances[smallestCluster]);      }      else {	clusterCentroids[smallestCluster] = new Instance(m_SumOfClusterInstances[smallestCluster]);      }      clusterCentroids[smallestCluster].setDataset(m_Instances);      if (!m_objFunDecreasing) {	normalize(clusterCentroids[smallestCluster]);      }      else {	normalizeByWeight(clusterCentroids[smallestCluster]);      }            // find next point, closest to centroid of smallest cluster      X = nearestFromPoint(clusterCentroids[smallestCluster], m_AssignedSet);      if (X == -1) {	if (m_verbose)	  System.out.println("No more points left unassigned, we are DONE!!");	createGlobalCentroids();	addMLAndCLTransitiveClosure(null);	return;      }            if (m_verbose)	System.out.println("Nearest point is " + X);      if (X >= m_StartingIndexOfTest) { // Sanity Check	throw new Exception ("Test point selected, something went wrong!");      }                  Iterator NbrIt = m_NeighborSets[smallestCluster].iterator();      Y = ((Integer) NbrIt.next()).intValue(); // get any point from the smallest neighborhood      Label = askOracle(X,Y);      query++;      System.out.println("Making query:" + query);      if (m_verbose) 	System.out.println("Number of queries: " + query);      if( Label == InstancePair.MUST_LINK ){	// update data structures	m_NeighborSets[smallestCluster].add(new Integer(X));	m_SumOfClusterInstances[smallestCluster] = sumWithInstance(m_SumOfClusterInstances[smallestCluster], m_Instances.instance(X));	m_ClusterAssignments[X] = smallestCluster;	if (m_verbose)	  System.out.println("Adding " + X + " to cluster: " + smallestCluster);	m_AssignedSet.add(new Integer(X));	if( query >= numQueries ){	  if (m_verbose)	    System.out.println("Ran out of queries");	  System.out.println("Consolidate phase over after " + query + " queries");	  	  createGlobalCentroids();	  addMLAndCLTransitiveClosure(null);	  return;	}      }      else { // must-link not found with smallest neighborhood, process other neighborhoods now	if (m_verbose) 	  System.out.println("Processing other centroids now");	// compute centroids of other clusters	for (int i=0; i<m_NumClusters; i++) { 	  if (i != smallestCluster) { // already made query for smallest cluster	    if (m_isSparseInstance) {	      clusterCentroids[i] = new SparseInstance(m_SumOfClusterInstances[i]);	    }	    else {	      clusterCentroids[i] = new Instance(m_SumOfClusterInstances[i]);	    }	    clusterCentroids[i].setDataset(m_Instances);	    if (!m_objFunDecreasing) {	      normalize(clusterCentroids[i]);	    }	    else {	      normalizeByWeight(clusterCentroids[i]);	    }	  }	}		double[] similaritiesToCentroids = new double[m_NumClusters];	for (int i=0; i<m_NumClusters; i++) {	  if (i != smallestCluster) { // already made query for smallestCluster	    similaritiesToCentroids[i] = m_metric.similarity(clusterCentroids[i], m_Instances.instance(X));	  }	} // handles both Euclidean and WeightedDotP		if (m_verbose) {	  System.out.println("Before sort");	  for (int i=0; i<m_NumClusters; i++) {	    System.out.println(similaritiesToCentroids[i]);	  }	}		int[] indices = Utils.sort(similaritiesToCentroids); // sorts in ascending order of similarity		if (m_verbose) {	  System.out.println("After sort");	  for (int i=0; i<m_NumClusters; i++) {	    System.out.println(indices[i]);	  }	}		for(int h = m_NumClusters-1; h >=0; h-- ){ 	  // since sort is ascending, and we want descending sort of similarity values	  int index = indices[h];	  if (index != smallestCluster) { // already made query for smallest cluster	    NbrIt = m_NeighborSets[index].iterator();	    Y = ((Integer) NbrIt.next()).intValue(); // get any point from the neighborhood	    Label = askOracle(X,Y);	    query++;	    System.out.println("Making query:" + query);	    if (m_verbose) 	      System.out.println("Number of queries: " + query);	    if( Label == InstancePair.MUST_LINK ){	      // update data structures	      m_NeighborSets[index].add(new Integer(X));	      m_SumOfClusterInstances[index] = sumWithInstance(m_SumOfClusterInstances[index], m_Instances.instance(X));	      m_ClusterAssignments[X] = index;	      if (m_verbose)		System.out.println("Adding " + X + " to cluster: " + index);	      m_AssignedSet.add(new Integer(X));	      if (m_verbose)		System.out.println("Exiting phase 2 for loop");	      break; // exit from for	    }	    if( query >= numQueries ){	      if (m_verbose)		System.out.println("Ran out of queries");	      createGlobalCentroids();	      addMLAndCLTransitiveClosure(null);	      return;	    }	  }	} // end reverse for      } // end else    } // end while    createGlobalCentroids();    addMLAndCLTransitiveClosure(null);    return;  }  /** Phase 2 code for active learning, random */  protected void activePhaseTwoRandom(int numQueries) throws Exception {    int numInstances = m_Instances.numInstances();    int X,Y;    int query = 0, Label;    System.out.println("In Phase 2 with random, with numqueries: " + numQueries);    while( query < numQueries ){      if (m_verbose)	System.out.println("Starting phase 2");            Instance[] clusterCentroids = new Instance[m_NumClusters];      if (m_AssignedSet.size() == m_StartingIndexOfTest) {	if (m_verbose)	  System.out.println("No more points left unassigned, we are DONE!!");	createGlobalCentroids();	addMLAndCLTransitiveClosure(null);	return;      }      // find next point at random      X = m_RandomNumberGenerator.nextInt(m_StartingIndexOfTest);      while (m_AssignedSet != null && m_AssignedSet.contains(new Integer(X))) {	X = m_RandomNumberGenerator.nextInt(m_StartingIndexOfTest);      }      if (m_verbose) 	System.out.println("X = " + X + ", finding distances to centroids now");      // compute centroids of other clusters      for (int i=0; i<m_NumClusters; i++) { 	if (m_isSparseInstance) {	  clusterCentroids[i] = new SparseInstance(m_SumOfClusterInstances[i]);	}	else {	  clusterCentroids[i] = new Instance(m_SumOfClusterInstances[i]);	}	clusterCentroids[i].setDataset(m_Instances);
💿 文件大小 12323 K
👤 上传用户 ilovexzhu
📂 所属分类人工智能/神经网络
🏷️ 相关标签

#university #supervised #learning #wekaUT
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -