⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 xmeans.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
    Instances children;     // builds up a KDTree    if (m_KDTree != null) {      m_KDTree.buildKDTree(m_Instances, m_Ranges);      //PFD(D_KDTREE, tree.toString());    }      // loop counter of main loop    m_IterationCount = 0;    // is true for the beginning of first loop,     // might false because ass. is already done in the following loops    boolean firstAssignment = true;    /**     * "finished" does get true as soon as:     * 1. number of clusters gets >= m_MaxClusters,      * 2. in the last round, none of the centers have been split     *      * if number of clusters is already >= m_MaxClusters      * part 1 (= Improve-Params) is done at least once.     */    while (!finished &&           !stopIteration(m_IterationCount, m_MaxIterations)) {            /* ====================================================================       * 1. Improve-Params                         *    conventional K-means       */      PFD(D_FOLLOWSPLIT, "\nBeginning of main loop - centers:");      PrCentersFD(D_FOLLOWSPLIT);      PFD(D_ITERCOUNT, "\n*** 1. Improve-Params " + m_IterationCount + 	  ". time");      m_IterationCount++;      // prepare to converge      boolean converged = false;      // initialize assignments to -1      m_ClusterAssignments = initAssignments(m_Instances.numInstances());      // stores a list of indexes of instances belonging to each center      int [][] instOfCent = new int[m_ClusterCenters.numInstances()][];      // KMeans loop counter      int kMeansIteration = 0;      // converge in conventional K-means ----------------------------------      PFD(D_FOLLOWSPLIT, "\nConverge in K-Means:");      while (!converged && 	     !stopKMeansIteration(kMeansIteration, m_MaxKMeans)) {		kMeansIteration++;	converged = true;	        // assign instances to centers -------------------------------------        converged = assignToCenters(m_KDTree,				    m_ClusterCenters, 				    instOfCent,				    allInstList, 				    m_ClusterAssignments,				    kMeansIteration);	        /*gabifor (int l = 0; l < instOfCent.length; l++) {	  System.out.println(" " + instOfCent[l].length + "+++");          for (int m = 0; m < instOfCent[l].length; m++) {	  OOPS(" " + instOfCent[l][m]);	  }	  }*/	PFD(D_FOLLOWSPLIT, "\nMain loop - Assign - centers:");	PrCentersFD(D_FOLLOWSPLIT);	//System.out.println(" ");	// compute new centers = centers of mass of points        converged = recomputeCenters(m_ClusterCenters, // clusters				     instOfCent,       // their instances				     m_Model);         // model information      PFD(D_FOLLOWSPLIT, "\nMain loop - Recompute - centers:");      PrCentersFD(D_FOLLOWSPLIT);      }      PFD(D_FOLLOWSPLIT, "");      PFD(D_FOLLOWSPLIT, "End of Part: 1. Improve-Params - conventional K-means");      /*      for (int m = 0; m < instOfCent.length; m++) {        System.out.println("Center "+m+" : "+ m_ClusterCenters.instance(m));        for (int n = 0; n < instOfCent[m].length; n++) {	  System.out.print(instOfCent[m][n]+", ");	}	PFD(D_FOLLOWSPLIT, "");	}*/          /** =====================================================================       * 2. Improve-Structur       */      // BIC before split distortioning the centres      m_Mle = distortion(instOfCent, m_ClusterCenters);      m_Bic = calculateBIC(instOfCent, m_ClusterCenters, m_Mle);      PFD(D_FOLLOWSPLIT, "m_Bic " + m_Bic);      int currNumCent = m_ClusterCenters.numInstances();      Instances splitCenters = new Instances(m_ClusterCenters, 					     currNumCent * 2);            // store BIC values of parent and children      double [] pbic = new double [currNumCent];      double [] cbic = new double [currNumCent];                  // split each center      for (int i = 0; i < currNumCent 	   // this could help to optimize the algorithm	   //	     && currNumCent + numSplits <= m_MaxNumClusters           ; 	   i++) {		PFD(D_FOLLOWSPLIT, "\nsplit center " + i +		      " " + m_ClusterCenters.instance(i));	Instance currCenter = m_ClusterCenters.instance(i);	int [] currInstList = instOfCent[i];	int currNumInst = instOfCent[i].length;	double currMLE = m_Mle[i];		// not enough instances; than continue with next	if (currNumInst <= 2) {	  pbic[i] = Double.MAX_VALUE;	  cbic[i] = 0.0;	  // add center itself as dummy	  splitCenters.add(currCenter);	  splitCenters.add(currCenter);	  continue;	}		// split centers  ----------------------------------------------	double variance = m_Mle[i] / (double)currNumInst;	children = splitCenter(random0, currCenter, variance, m_Model);		// initialize assignments to -1	int[] oneCentAssignments = initAssignments(currNumInst);	int[][] instOfChCent = new int [2][]; // todo maybe split didn't work		// converge the children  --------------------------------------	converged = false;	int kMeansForChildrenIteration = 0;	PFD(D_FOLLOWSPLIT, "\nConverge, K-Means for children: " + i);	while (!converged &&           !stopKMeansIteration(kMeansForChildrenIteration, 			       m_MaxKMeansForChildren)) {	  kMeansForChildrenIteration++;	  	  converged =	    assignToCenters(children, instOfChCent,			    currInstList, oneCentAssignments);	  if (!converged) {       	    recomputeCentersFast(children, instOfChCent, m_Model);	  }	} 	// store new centers for later decision if they are taken	splitCenters.add(children.instance(0));	splitCenters.add(children.instance(1));	PFD(D_FOLLOWSPLIT, "\nconverged cildren ");	PFD(D_FOLLOWSPLIT, " " + children.instance(0));	PFD(D_FOLLOWSPLIT, " " + children.instance(1));	// compare parent and children model by their BIC-value	pbic[i] = calculateBIC(currInstList, currCenter,  m_Mle[i], m_Model);	double [] chMLE = distortion(instOfChCent, children);	cbic[i] = calculateBIC(instOfChCent, children, chMLE);      } // end of loop over clusters      // decide which one to split and make new list of cluster centers      Instances newClusterCenters = null;      newClusterCenters = newCentersAfterSplit(pbic, cbic, m_CutOffFactor,                                                 splitCenters);      /**       * Compare with before Improve-Structure       */      int newNumClusters = newClusterCenters.numInstances();      if (newNumClusters != m_NumClusters) {		PFD(D_FOLLOWSPLIT, "Compare with non-split");	// initialize assignments to -1	int [] newClusterAssignments = 	  initAssignments(m_Instances.numInstances());		// stores a list of indexes of instances belonging to each center	int [][] newInstOfCent = new int[newClusterCenters.numInstances()][];		// assign instances to centers -------------------------------------	converged = assignToCenters(m_KDTree,				    newClusterCenters, 				    newInstOfCent,				    allInstList, 				    newClusterAssignments,				    m_IterationCount);		double [] newMle = distortion(newInstOfCent, newClusterCenters);	double newBic = calculateBIC(newInstOfCent, newClusterCenters, newMle);	PFD(D_FOLLOWSPLIT, "newBic " + newBic);	if (newBic > m_Bic) {          PFD(D_FOLLOWSPLIT, "*** decide for new clusters");	  m_Bic = newBic;	  m_ClusterCenters = newClusterCenters;	  m_ClusterAssignments = newClusterAssignments;	} else {          PFD(D_FOLLOWSPLIT, "*** keep old clusters");        }      }      newNumClusters = m_ClusterCenters.numInstances();      // decide if finished: max num cluster reached       // or last centers where not split at all       if ((newNumClusters >= m_MaxNumClusters) 	  || (newNumClusters == m_NumClusters)) {	finished = true;      }      m_NumClusters = newNumClusters;    }  }  /**   * Checks for nominal attributes in the dataset.   * Class attribute is ignored.   * @param data    * @return false if no nominal attributes are present   */  public boolean checkForNominalAttributes(Instances data) {    int i = 0;    while (i < data.numAttributes()) {      if ((i != data.classIndex()) && data.attribute(i++).isNominal()) {	return true;      }    }    return false;  }  /**   * Set array of int, used to store assignments, to -1.   * @param ass integer array used for storing assignments   * @return integer array used for storing assignments   */  private int [] initAssignments(int [] ass) {    for (int i = 0; i < ass.length; i++)      ass[i] = -1;    return ass;  }       /**   * Creates and initializes integer array, used to store assignments.   * @param numInstances length of array used for assignments   * @return integer array used for storing assignments   */  private int [] initAssignments(int numInstances) {    int [] ass = new int[numInstances];    for (int i = 0; i < numInstances; i++)      ass[i] = -1;    return ass;  }        /**   * Creates and initializes boolean array.   * @param len length of new array   * @return the new array   */  boolean [] initBoolArray(int len) {    boolean[] boolArray = new boolean [len];    for (int i = 0; i < len; i++) {      boolArray[i] = false;    }    return boolArray;  }  /**   * Returns new center list.   *   * The following steps 1. and 2. both take care that the number of centers   * does not exceed maxCenters.   *   * 1. Compare BIC values of parent and children and takes the one as   * new centers which do win (= BIC-value is smaller).   *   * 2. If in 1. none of the children are chosen    *    && and cutoff factor is > 0   * cutoff factor is taken as the percentage of "best" centers that are   * still taken.   * @param pbic array of parents BIC-values   * @param cbic array of childrens BIC-values   * @param cutoffFactor cutoff factor    * @param splitCenters all children    * @return the new centers   */  private Instances newCentersAfterSplit(double [] pbic, 					 double [] cbic,					 double cutoffFactor,					 Instances splitCenters) {    // store if split won    boolean splitPerCutoff = false;    boolean takeSomeAway = false;    boolean [] splitWon = initBoolArray(m_ClusterCenters.numInstances());    int numToSplit = 0;    Instances newCenters = null;        // how many would be split, because the children have a better bic value    for (int i = 0; i < cbic.length; i++) {      if (cbic[i] > pbic[i]) {	// decide for splitting ----------------------------------------	splitWon[i] = true; numToSplit++;	PFD(D_FOLLOWSPLIT, "Center " + i + " decide for children");      }      else {	// decide for parents and finished stays true  -----------------	PFD(D_FOLLOWSPLIT, "Center " + i + " decide for parent");      }    }    // no splits yet so split per cutoff factor    if ((numToSplit == 0) && (cutoffFactor > 0)) {      splitPerCutoff = true;            // how many to split per cutoff factor      numToSplit = (int)         ((double) m_ClusterCenters.numInstances() * m_CutOffFactor);     }    // prepare indexes of values in ascending order      double [] diff = new double [m_NumClusters];    for (int j = 0; j < diff.length; j++) {      diff[j] = pbic[j] - cbic[j];    }        int [] sortOrder = Utils.sort(diff);        // check if maxNumClusters would be exceeded    int possibleToSplit = m_MaxNumClusters - m_NumClusters;     if (possibleToSplit > numToSplit) {      // still enough possible, do the whole amount      possibleToSplit = numToSplit;    }    else      takeSomeAway = true;    // prepare for splitting the one that are supposed to be split    if (splitPerCutoff) {      for (int j = 0; (j < possibleToSplit) && (cbic[sortOrder[j]] > 0.0);	   j++) {	splitWon[sortOrder[j]] = true;      }      m_NumSplitsStillDone += possibleToSplit;    }     else {      // take some splits away if max number of clusters would be exceeded      if (takeSomeAway) {	int count = 0;	int j = 0;	for (;j < splitWon.length && count < possibleToSplit; j++){	  if (splitWon[sortOrder[j]] == true) count++;	}		while (j < splitWon.length) {	  splitWon[sortOrder[j]] = false;	  j++;	}      }    }       // finally split    if (possibleToSplit > 0)       newCenters = newCentersAfterSplit(splitWon, splitCenters);    else      newCenters = m_ClusterCenters;    return newCenters;  }  /**   * Returns new centers.   * Depending on splitWon: if true takes children, if false   * takes parent = current center.   * @param splitWon array of boolean to indicate to take split or not   * @param splitCenters list of splitted centers   * @return the new centers   */  private Instances newCentersAfterSplit(boolean [] splitWon,                                          Instances splitCenters) {    Instances newCenters = new Instances(splitCenters, 0);    int sIndex = 0;    for (int i = 0; i < splitWon.length; i++) {      if (splitWon[i]) {        m_NumSplitsDone++;	newCenters.add(splitCenters.instance(sIndex++));	newCenters.add(splitCenters.instance(sIndex++));      } else {	sIndex++; sIndex++;        newCenters.add(m_ClusterCenters.instance(i));      }    }    return newCenters;  }  /**   * Controls that counter does not exceed max iteration value.   * Special function for kmeans iterations.   * @param iterationCount current value of counter   * @param max maximum value for counter   * @return true if iteration should be stopped   */   private boolean stopKMeansIteration(int iterationCount, int max) {    boolean stopIterate = false;    if (max >= 0)       stopIterate = (iterationCount >= max);    if (stopIterate)       m_KMeansStopped++;    return stopIterate;  }  /**   * Checks if iterationCount has to be checked and if yes   * (this means m_MaxIterations is > 0) compares it with   * m_MaxIteration   */   private boolean stopIteration(int iterationCount, int max) {    boolean stopIterate = false;    if (max >= 0)       stopIterate = (iterationCount >= max);    return stopIterate;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -