⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 .#mpckmeans.java.1.110

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 110
📖 第 1 页 / 共 5 页
字号:
//  	double[][] maxPoints = ((WeightedMahalanobis)m_metric).getMaxPoints(m_ConstraintsHash, m_Instances);//  	minValues[0] = maxPoints[0];//  	maxValues[0] = maxPoints[1];//  	for (int i = 0; i < m_metrics.length; i++) {//  	  minValues[i] = maxPoints[0];//  	  maxValues[i] = maxPoints[1];//  	}//  	//  	System.out.println("Max points:");//  	//  	for (int i = 0; i < maxPoints[0].length; i++) { System.out.println(maxPoints[0][i] + " - " + maxPoints[1][i]);}//        }//      } else { // find the enclosing hypercube for WeightedEuclidean etc.     for (int i = 0; i < m_Instances.numInstances(); i++) {      Instance instance = m_Instances.instance(i);      for (int j = 0; j < attrIdxs.length; j++) {	double val = instance.value(attrIdxs[j]);	if (datasetWide) {	  if (val < minValues[0][j]) {	    minValues[0][j] = val; 	  }	  if (val > maxValues[0][j]) {	    maxValues[0][j] = val; 	  } 	} else { // cluster-specific min's and max's  are needed	  if (val < minValues[m_ClusterAssignments[i]][j]) {	    minValues[m_ClusterAssignments[i]][j] = val; 	  }	  if (val > maxValues[m_ClusterAssignments[i]][j]) {	    maxValues[m_ClusterAssignments[i]][j] = val; 	  } 	}       }    }    // get the max/min points    if (datasetWide) {      for (int i = 0; i < attrIdxs.length; i++) {	m_maxCLPoints[0][0].setValue(attrIdxs[i], minValues[0][i]);	m_maxCLPoints[0][1].setValue(attrIdxs[i], maxValues[0][i]);      }      // must copy these over all clusters - just for the first iteration      for (int j = 1; j < m_NumClusters; j++) { 	for (int i = 0; i < attrIdxs.length; i++) {	  m_maxCLPoints[j][0].setValue(attrIdxs[i], minValues[0][i]);	  m_maxCLPoints[j][1].setValue(attrIdxs[i], maxValues[0][i]);	}      }     } else { // cluster-specific      for (int j = 0; j < m_NumClusters; j++) { 	for (int i = 0; i < attrIdxs.length; i++) {	  m_maxCLPoints[j][0].setValue(attrIdxs[i], minValues[j][i]);	  m_maxCLPoints[j][1].setValue(attrIdxs[i], maxValues[j][i]);	}      }    }    // calculate the distances    if (datasetWide) {      maxPenalties[0] = m_metrics[0].penaltySymmetric(m_maxCLPoints[0][0],						      m_maxCLPoints[0][1]);      m_maxCLDiffInstances[0] = m_metrics[0].createDiffInstance(m_maxCLPoints[0][0], 								 m_maxCLPoints[0][1]);      for (int i = 1; i < maxPenalties.length; i++) {	maxPenalties[i] = maxPenalties[0];	m_maxCLDiffInstances[i] = m_maxCLDiffInstances[0];      }     } else { // cluster-specific - SHOULD BE FIXED!!!!      for (int j = 0; j < m_NumClusters; j++) { 	for (int i = 0; i < attrIdxs.length; i++) {	  maxPenalties[j] += m_metrics[j].penaltySymmetric(m_maxCLPoints[j][0],							   m_maxCLPoints[j][1]);	  m_maxCLDiffInstances[j] = m_metrics[0].createDiffInstance(m_maxCLPoints[j][0], 								    m_maxCLPoints[j][1]);	}      }    }    return maxPenalties;  }   /**   * Checks if instance has to be normalized and classifies the   * instance using the current clustering   *   * @param instance the instance to be assigned to a cluster   * @return the number of the assigned cluster as an integer   * if the class is enumerated, otherwise the predicted value   * @exception Exception if instance could not be classified   * successfully */  public int clusterInstance(Instance instance) throws Exception {    return assignInstanceToCluster(instance);  }  /** lookup the instance in the checksum hash, assuming transductive clustering   * @param instance instance to be looked up   * @return the index of the cluster to which the instance was assigned, -1 if the instance has not bee clustered   */  protected int lookupInstanceCluster(Instance instance) throws Exception {    int classIdx = instance.classIndex();    double checksum = 0;    // need to normalize using original metric, since cluster data is normalized similarly    if (m_metric.doesNormalizeData()) {      if (m_Trainable == TRAINING_INTERNAL) {	m_metric.resetMetric();      }      m_metric.normalizeInstanceWeighted(instance);    }    double[] values1 = instance.toDoubleArray();    for (int i = 0; i < values1.length; i++) {      if (i != classIdx) {	checksum += m_checksumCoeffs[i] * values1[i];       }     }    Object list = m_checksumHash.get(new Double((float)checksum));    if (list != null) {      // go through the list of instances with the same checksum and find the one that is equivalent      ArrayList checksumList = (ArrayList) list;      for (int i = 0; i < checksumList.size(); i++) {	int instanceIdx = ((Integer) checksumList.get(i)).intValue();	Instance listInstance = m_Instances.instance(instanceIdx);	double[] values2 = listInstance.toDoubleArray();	boolean equal = true; 	for (int j = 0; j < values1.length && equal == true; j++) {	  if (j != classIdx) {	    if ((float)values1[j] != (float)values2[j]) {	      equal = false;	    }	  } 	}	if (equal == true) {	  return m_ClusterAssignments[instanceIdx]; 	}      }     }    return -1;   }  /**   * Classifies the instances using the current clustering, moves   * must-linked points together (Xing's approach)   *   * @param instIdx the instance index to be assigned to a cluster   * @return the number of the assigned cluster as an integer   * if the class is enumerated, otherwise the predicted value   * @exception Exception if instance could not be classified   * successfully */  public int assignAllInstancesToClusters() throws Exception {    int numInstances = m_Instances.numInstances();    boolean [] instanceAlreadyAssigned = new boolean[numInstances];    int moved = 0;    if (!m_isOfflineMetric) {      System.err.println("WARNING!!!\n\nThis code should not be called if metric is not a BarHillelMetric or XingMetric!!!!\n\n");    }    for (int i=0; i<numInstances; i++) {      instanceAlreadyAssigned[i] = false;    }    // now process points not in ML meighborhood sets    for (int instIdx = 0; instIdx < numInstances; instIdx++) {      if (instanceAlreadyAssigned[instIdx]) { 	continue; // was already in some ML neighborhood      }      int bestCluster = 0;      double bestDistance = Double.POSITIVE_INFINITY;      for (int centroidIdx = 0; centroidIdx < m_NumClusters; centroidIdx++) {	double sqDistance = m_metric.distance(m_Instances.instance(instIdx), m_ClusterCentroids.instance(centroidIdx));	if (sqDistance < bestDistance) {	  bestDistance = sqDistance;	  bestCluster = centroidIdx;	}      }      // accumulate objective function value      //      m_Objective += bestDistance;      // do we need to reassign the point?      if (m_ClusterAssignments[instIdx] != bestCluster) {	m_ClusterAssignments[instIdx] = bestCluster;	instanceAlreadyAssigned[instIdx] = true;	moved++;      }    }    return moved;  }  /**   * Classifies the instance using the current clustering, without considering constraints   *   * @param instance the instance to be assigned to a cluster   * @return the number of the assigned cluster as an integer   * if the class is enumerated, otherwise the predicted value   * @exception Exception if instance could not be classified   * successfully */  public int assignInstanceToCluster(Instance instance) throws Exception {    int bestCluster = 0;    double bestDistance = Double.POSITIVE_INFINITY;    double bestSimilarity = Double.NEGATIVE_INFINITY;    int lookupCluster;    if (m_metric instanceof InstanceConverter) {      Instance newInstance = ((InstanceConverter)m_metric).convertInstance(instance);      lookupCluster = lookupInstanceCluster(newInstance);    } else {      lookupCluster = lookupInstanceCluster(instance);    }    if (lookupCluster >= 0) {      return lookupCluster;    }    throw new Exception ("ACHTUNG!!!\n\nCouldn't lookup the instance!!! Size of hash = " + m_checksumHash.size());  }    /** Set the cannot link constraint weight */  public void setCannotLinkWeight(double w) {    m_CLweight = w;  }  /** Return the cannot link constraint weight */  public double getCannotLinkWeight() {    return m_CLweight;  }  /** Set the must link constraint weight */  public void setMustLinkWeight(double w) {    m_MLweight = w;  }  /** Return the must link constraint weight */  public double getMustLinkWeight() {    return m_MLweight;  }  /** Return the number of clusters */  public int getNumClusters() {    return m_NumClusters;  }  /** A duplicate function to conform to Clusterer abstract class.   * @returns the number of clusters   */  public int numberOfClusters() {    return getNumClusters();  }     /** Set the m_SeedHash */  public void setSeedHash(HashMap seedhash) {    System.err.println("Not implemented here");  }      /**   * Set the random number seed   * @param s the seed   */  public void setRandomSeed (int s) {    m_RandomSeed = s;  }      /** Return the random number seed */  public int getRandomSeed () {    return  m_RandomSeed;  }  /** Set the maximum number of iterations */  public void setMaxIterations(int maxIterations) {    m_maxIterations = maxIterations;  }  /** Get the maximum number of iterations */  public int getMaxIterations() {    return m_maxIterations;  }  /** Set the maximum number of blank iterations (those where no points are moved) */  public void setMaxBlankIterations(int maxBlankIterations) {    m_maxBlankIterations = maxBlankIterations;  }  /** Get the maximum number of blank iterations */  public int getMaxBlankIterations() {    return m_maxBlankIterations;  }    /**   * Set the minimum value of the objective function difference required for convergence   * @param objFunConvergenceDifference the minimum value of the objective function difference required for convergence   */  public void setObjFunConvergenceDifference(double objFunConvergenceDifference) {    m_ObjFunConvergenceDifference = objFunConvergenceDifference;  }  /**   * Get the minimum value of the objective function difference required for convergence   * @returns the minimum value of the objective function difference required for convergence   */  public double getObjFunConvergenceDifference() {    return m_ObjFunConvergenceDifference;  }      /** Sets training instances */  public void setInstances(Instances instances) {    m_Instances = instances;    // create the checksum coefficients    m_checksumCoeffs = new double[instances.numAttributes()];    for (int i = 0; i < m_checksumCoeffs.length; i++) {      m_checksumCoeffs[i] = m_RandomNumberGenerator.nextDouble();    }    // hash the instance checksums    m_checksumHash = new HashMap(instances.numInstances());    int classIdx = instances.classIndex();    for (int i = 0; i < instances.numInstances(); i++) {      Instance instance = instances.instance(i);      double[] values = instance.toDoubleArray();      double checksum = 0;      for (int j = 0; j < values.length; j++) {	if (j != classIdx) {	  checksum += m_checksumCoeffs[j] * values[j]; 	}       }      // take care of chaining      Object list = m_checksumHash.get(new Double((float)checksum));      ArrayList idxList = null;       if (list == null) {	idxList = new ArrayList();	m_checksumHash.put(new Double((float)checksum), idxList);      } else { // chaining	idxList = (ArrayList) list;      }      idxList.add(new Integer(i));    }   }  /** Return training instances */  public Instances getInstances() {    return m_Instances;  }  /**   * Set the number of clusters to generate   *   * @param n the number of clusters to generate   */  public void setNumClusters(int n) {    m_NumClusters = n;    if (m_verbose) {      System.out.println("Number of clusters: " + n);    }  }  /** Is the objective function decreasing or increasing? */  public boolean isObjFunDecreasing() {    return m_objFunDecreasing;  }   /**   * Set the distance metric   *   * @param s the metric   */  public void setMetric (LearnableMetric m) {    String metricName = m.getClass().getName();    m_metric = m;    m_metricLearner.setMetric(m_metric);    m_metricLearner.setClusterer(this);  }  /**   * get the distance metric   * @returns the distance metric used   */  public LearnableMetric getMetric () {    return m_metric;  }  /**   * get the array of metrics   */  public LearnableMetric[] getMetrics () {    return m_metrics;  }  /** Set/get the metric learner */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -