⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mpckmeans.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
	  calculateObjectiveFunction(true);	  System.out.println((float)m_Objective + " - Objective function after metric update");	  System.out.println("\tvar=" + ((float)m_objVariance) + "\tC=" + ((float)m_objCannotLinks) +			     "\tM=" + ((float)m_objMustLinks)  + "\tLOG=" + ((float)m_objNormalizer) +			     "\tREG=" + ((float)m_objRegularizer));	}	  	if (m_ConstraintsHash.size() > 0) {	  m_maxCLPenalties = calculateMaxCLPenalties();	}      }      if (fincoh != null) {	  printConstraintIncoherence(fincoh);      }      converged = convergenceCheck(m_OldObjective, m_Objective);      m_Iterations++;    }    if (fincoh != null) {      fincoh.close();    }    System.out.println("Converged!");    System.err.print("Its\t" + m_Iterations + "\t");    if (m_verbose) {      System.out.println("Done clustering; top cluster features: ");      for (int i = 0; i < m_NumClusters; i++){	System.out.println("Centroid " + i);	TreeMap map = new TreeMap(Collections.reverseOrder());	Instance centroid= m_ClusterCentroids.instance(i);	for (int j = 0; j < centroid.numValues(); j++) {	  Attribute attr = centroid.attributeSparse(j);	  map.put(new Double(centroid.value(attr)), attr.name());	}	Iterator it = map.entrySet().iterator();	for (int j=0; j < 5 && it.hasNext(); j++) {	  Map.Entry entry = (Map.Entry) it.next();	  System.out.println("\t" + entry.getKey() + "\t" + entry.getValue());	}      }    }  }  public void printConstraintIncoherence(PrintStream fincoh) throws Exception {      Object[] array = m_ConstraintsHash.entrySet().toArray();            int numML = 0, numCL = 0;       double incoh = 0;            m_numViolations = 0;      System.out.println("NumConstraints: " + array.length);      for (int i=0; i < array.length; i++) {	  Map.Entry con1 = (Map.Entry) array[i];	  InstancePair pair1 = (InstancePair) con1.getKey();	  int link1 = ((Integer) con1.getValue()).intValue();	  double dist1 =  m_metric.distance(m_Instances.instance(pair1.first),					    m_Instances.instance(pair1.second));	  if (link1 == InstancePair.MUST_LINK) {	      numML++;	  } else if (link1 == InstancePair.CANNOT_LINK) {	      numCL++;	  }	  for (int j=i+1; j < array.length; j++) {	      Map.Entry con2 = (Map.Entry) array[j];	      InstancePair pair2 = (InstancePair) con2.getKey();	      int link2 = ((Integer) con2.getValue()).intValue();	      double dist2 =  m_metric.distance(m_Instances.instance(pair2.first),						m_Instances.instance(pair2.second));	      	      if (link1 == InstancePair.MUST_LINK) {		  if (link2 == InstancePair.CANNOT_LINK) {		      if (dist1 > dist2) {			  m_numViolations++;			  //			  System.out.println("(" + pair1.first + "," + pair1.second + "): " + link1 + ":" + dist1);			  //			  System.out.println("(" + pair2.first + "," + pair2.second + "): " + link2 + ":" + dist2);			  //			  System.out.println("Violations: " + m_numViolations);		      }		  }	      } else if (link1 == InstancePair.CANNOT_LINK) {		  if (link2 == InstancePair.MUST_LINK) {		      if (dist1 < dist2) {			  m_numViolations++;			  //			  System.out.println("(" + pair1.first + "," + pair1.second + "): " + link1 + ":" + dist1);			  //			  System.out.println("(" + pair2.first + "," + pair2.second + "): " + link2 + ":" + dist2);			  //			  System.out.println("Violations: " + m_numViolations);		      }		  }	      }	  }      }      incoh = (m_numViolations * 1.0) / (numCL * numML);      if (fincoh != null) {	  //	  fincoh.println((m_Iterations+1)  + "\tNumViolations\t" + m_numViolations + "\tNumTotalCL\t" + numCL + "\tNumTotalML\t" + numML);	  fincoh.println("Iterations\t" + (m_Iterations+1)  + "\tIncoh\t" + incoh);      } else {	  System.out.println((m_Iterations+1) + "\tNumViolations\t" + m_numViolations + "\tNumTotalCL\t" + numCL + "\tNumTotalML\t" + numML);      }  }  /** reset the value of the objective function and all of its components */   public void resetObjective() {     m_Objective = 0;    m_objVariance = 0;    m_objCannotLinks = 0;    m_objMustLinks = 0;    m_objNormalizer = 0;    m_objRegularizer = 0;  }    /** Go through the cannot-link constraints and find the current maximum distance   * @return an array of maximum weighted distances.  If a single metric is used, maximum distance   * is calculated over the entire dataset */  // TODO:  non-datasetWide case is not debugged currently!!!  protected double[] calculateMaxCLPenalties() throws Exception {    double [] maxPenalties = null;    double [][] minValues = null;    double [][] maxValues = null;    int[] attrIdxs = null;     maxPenalties = new double[m_NumClusters];    m_maxCLPoints = new Instance[m_NumClusters][2];    m_maxCLDiffInstances = new Instance[m_NumClusters];    for (int i = 0; i < m_NumClusters; i++) {       m_maxCLPoints[i][0] = new Instance(m_Instances.numAttributes());      m_maxCLPoints[i][1] = new Instance(m_Instances.numAttributes());      m_maxCLPoints[i][0].setDataset(m_Instances);      m_maxCLPoints[i][1].setDataset(m_Instances);      m_maxCLDiffInstances[i] = new Instance(m_Instances.numAttributes());      m_maxCLDiffInstances[i].setDataset(m_Instances);    }    // TEMPORARY PLUG:  this was supposed to take care of WeightedDotp,    // but it turns out that with weighting similarity can be > 1. //      if (m_metric.m_fixedMaxDistance) {//        for (int i = 0; i < m_NumClusters; i++) {//  	maxPenalties[i] = m_metric.getMaxDistance(); //        }//        return maxPenalties; //      }         minValues = new double[m_NumClusters][m_metrics[0].getNumAttributes()];    maxValues = new double[m_NumClusters][m_metrics[0].getNumAttributes()];    attrIdxs = m_metrics[0].getAttrIndxs();    // temporary plug:  if this if the first iteration when no instances were assigned to clusters,    // dataset-wide (not cluster-wide!) minimum and maximum are used even for the case with    // multiple metrics    boolean datasetWide = true;    if (m_useMultipleMetrics && m_Iterations > 0) {       datasetWide = false;    }     // TODO:  Mahalanobis - check with getMaxPoints    // go through all points    if (m_metric instanceof WeightedMahalanobis) {      if (m_useMultipleMetrics) { 	for (int i = 0; i < m_metrics.length; i++) { 	  double[][] maxPoints = ((WeightedMahalanobis)m_metrics[i]).getMaxPoints(m_ConstraintsHash, m_Instances);	  minValues[i] = maxPoints[0];	  maxValues[i] = maxPoints[1];	  //  	  System.out.println("Max points " + i);	  //  	  for (int j = 0; j < maxPoints[0].length; j++) { System.out.println(maxPoints[0][j] + " - " + maxPoints[1][j]);}	}      } else { 	double[][] maxPoints = ((WeightedMahalanobis)m_metric).getMaxPoints(m_ConstraintsHash, m_Instances);	minValues[0] = maxPoints[0];	maxValues[0] = maxPoints[1];	for (int i = 0; i < m_metrics.length; i++) {	  minValues[i] = maxPoints[0];	  maxValues[i] = maxPoints[1];	}	//  	System.out.println("Max points:");	//  	for (int i = 0; i < maxPoints[0].length; i++) { System.out.println(maxPoints[0][i] + " - " + maxPoints[1][i]);}      }    } else { // find the enclosing hypercube for WeightedEuclidean etc.       for (int i = 0; i < m_Instances.numInstances(); i++) {	Instance instance = m_Instances.instance(i);	for (int j = 0; j < attrIdxs.length; j++) {	  double val = instance.value(attrIdxs[j]);	  if (datasetWide) {	    if (val < minValues[0][j]) {	      minValues[0][j] = val; 	    }	    if (val > maxValues[0][j]) {	      maxValues[0][j] = val; 	    } 	  } else { // cluster-specific min's and max's  are needed	    if (val < minValues[m_ClusterAssignments[i]][j]) {	      minValues[m_ClusterAssignments[i]][j] = val; 	    }	    if (val > maxValues[m_ClusterAssignments[i]][j]) {	      maxValues[m_ClusterAssignments[i]][j] = val; 	    } 	  } 	}      }    }    // get the max/min points    if (datasetWide) {      for (int i = 0; i < attrIdxs.length; i++) {	m_maxCLPoints[0][0].setValue(attrIdxs[i], minValues[0][i]);	m_maxCLPoints[0][1].setValue(attrIdxs[i], maxValues[0][i]);      }      // must copy these over all clusters - just for the first iteration      for (int j = 1; j < m_NumClusters; j++) { 	for (int i = 0; i < attrIdxs.length; i++) {	  m_maxCLPoints[j][0].setValue(attrIdxs[i], minValues[0][i]);	  m_maxCLPoints[j][1].setValue(attrIdxs[i], maxValues[0][i]);	}      }     } else { // cluster-specific      for (int j = 0; j < m_NumClusters; j++) { 	for (int i = 0; i < attrIdxs.length; i++) {	  m_maxCLPoints[j][0].setValue(attrIdxs[i], minValues[j][i]);	  m_maxCLPoints[j][1].setValue(attrIdxs[i], maxValues[j][i]);	}      }    }    // calculate the distances    if (datasetWide) {      maxPenalties[0] = m_metrics[0].penaltySymmetric(m_maxCLPoints[0][0],						      m_maxCLPoints[0][1]);      m_maxCLDiffInstances[0] = m_metrics[0].createDiffInstance(m_maxCLPoints[0][0], 								 m_maxCLPoints[0][1]);      for (int i = 1; i < maxPenalties.length; i++) {	maxPenalties[i] = maxPenalties[0];	m_maxCLDiffInstances[i] = m_maxCLDiffInstances[0];      }     } else { // cluster-specific - SHOULD BE FIXED!!!!      for (int j = 0; j < m_NumClusters; j++) { 	for (int i = 0; i < attrIdxs.length; i++) {	  maxPenalties[j] += m_metrics[j].penaltySymmetric(m_maxCLPoints[j][0],							   m_maxCLPoints[j][1]);	  m_maxCLDiffInstances[j] = m_metrics[0].createDiffInstance(m_maxCLPoints[j][0], 								    m_maxCLPoints[j][1]);	}      }    }    System.out.println("Recomputed max CL penalties");    return maxPenalties;  }   /**   * Checks if instance has to be normalized and classifies the   * instance using the current clustering   *   * @param instance the instance to be assigned to a cluster   * @return the number of the assigned cluster as an integer   * if the class is enumerated, otherwise the predicted value   * @exception Exception if instance could not be classified   * successfully */  public int clusterInstance(Instance instance) throws Exception {    return assignInstanceToCluster(instance);  }  /** lookup the instance in the checksum hash, assuming transductive clustering   * @param instance instance to be looked up   * @return the index of the cluster to which the instance was assigned, -1 if the instance has not bee clustered   */  protected int lookupInstanceCluster(Instance instance) throws Exception {    int classIdx = instance.classIndex();    double checksum = 0;    // need to normalize using original metric, since cluster data is normalized similarly    if (m_metric.doesNormalizeData()) {      if (m_Trainable == TRAINING_INTERNAL) {	m_metric.resetMetric();      }      m_metric.normalizeInstanceWeighted(instance);    }    double[] values1 = instance.toDoubleArray();    for (int i = 0; i < values1.length; i++) {      if (i != classIdx) {	checksum += m_checksumCoeffs[i] * values1[i];       }     }    Object list = m_checksumHash.get(new Double((float)checksum));    if (list != null) {      // go through the list of instances with the same checksum and find the one that is equivalent      ArrayList checksumList = (ArrayList) list;      for (int i = 0; i < checksumList.size(); i++) {	int instanceIdx = ((Integer) checksumList.get(i)).intValue();	Instance listInstance = m_Instances.instance(instanceIdx);	double[] values2 = listInstance.toDoubleArray();	boolean equal = true; 	for (int j = 0; j < values1.length && equal == true; j++) {	  if (j != classIdx) {	    if ((float)values1[j] != (float)values2[j]) {	      equal = false;	    }	  } 	}	if (equal == true) {	  return m_ClusterAssignments[instanceIdx]; 	}      }     }    return -1;   }  /**   * Classifies the instances using the current clustering, moves   * must-linked points together (Xing's approach)   *   * @param instIdx the instance index to be assigned to a cluster   * @return the number of the assigned cluster as an integer   * if the class is enumerated, otherwise the predicted value   * @exception Exception if instance could not be classified   * successfully */  public int assignAllInstancesToClusters() throws Exception {    int numInstances = m_Instances.numInstances();    boolean [] instanceAlreadyAssigned = new boolean[numInstances];    int moved = 0;    if (!m_isOfflineMetric) {      System.err.println("WARNING!!!\n\nThis code should not be called if metric is not a BarHillelMetric or XingMetric!!!!\n\n");    }    for (int i=0; i<numInstances; i++) {      instanceAlreadyAssigned[i] = false;    }    // now process points not in ML meighborhood sets    for (int instIdx = 0; instIdx < numInstances; instIdx++) {      if (instanceAlreadyAssigned[instIdx]) { 	continue; // was already in some ML neighborhood      }      int bestCluster = 0;      double bestDistance = Double.POSITIVE_INFINITY;      for (int centroidIdx = 0; centroidIdx < m_NumClusters; centroidIdx++) {	double sqDistance = m_metric.distance(m_Instances.instance(instIdx), m_ClusterCentroids.instance(centroidIdx));	if (sqDistance < bestDistance) {	  bestDistance = sqDistance;	  bestCluster = centroidIdx;	}      }      // accumulate objective function value      //      m_Objective += bestDistance;      // do we need to reassign the point?      if (m_ClusterAssignments[instIdx] != bestCluster) {	m_ClusterAssignments[instIdx] = bestCluster;	instanceAlreadyAssigned[instIdx] = true;	moved++;      }    }    return moved;  }  /**   * Classifies the instance using the current clustering, without considering constraints   *   * @param instance the instance to be assigned to a cluster   * @return the number of the assigned cluster as an integer   * if the class is enumerated, otherwise the predicted value   * @exception Exception if instance could not be classified   * successfully */  public int assignInstanceToCluster(Instance instance) throws Exception {    int bestCluster = 0;    double bestDistance = Double.POSITIVE_INFINITY;    double bestSimilarity = Double.NEGATIVE_INFINITY;    int lookupCluster;    if (m_metric instanceof InstanceConverter) {      Instance newInstance = ((InstanceConverter)m_metric).convertInstance(instance);      lookupCluster = lookupInstanceCluster(newInstance);    } else {      lookupCluster = lookupInstanceCluster(instance);    }    if (lookupCluster >= 0) {      return lookupCluster;    }    throw new Exception ("ACHTUNG!!!\n\nCouldn't lookup the instance!!! Size of hash = " + m_checksumHash.size());  }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -