pcsoftkmeans.java

来自「wekaUT是 university texas austin 开发的基于wek」· Java 代码 · 共 1,952 行 · 第 1/5 页

JAVA
1,952
字号
  /** Dummy: not implemented for PCSoftKMeans */  public int[] bestInstancesForActiveLearning(int numActive) throws Exception{    throw new Exception("Not implemented for PCSoftKMeans");  }  /** Dummy: not implemented for PCSoftKMeans */  public InstancePair[] bestPairsForActiveLearning(int numActive) throws Exception{    throw new Exception("Not implemented for PCSoftKMeans");  }  /**   * Checks if instance has to be normalized and returns the   * distribution of the instance using the current clustering   *   * @param instance the instance under consideration   * @return an array containing the estimated membership    * probabilities of the test instance in each cluster (this    * should sum to at most 1)   * @exception Exception if distribution could not be    * computed successfully   */  public double[] distributionForInstance(Instance instance) throws Exception {    if (m_Algorithm == ALGORITHM_SPHERICAL) { // check here, since evaluateModel calls this function on test data      normalize(instance);    }    return null;  }  /**   * Computes the density for a given instance.   *    * @param inst the instance to compute the density for   * @return the density.   * @exception Exception if the density could not be computed   * successfully   */  public double densityForInstance(Instance inst) throws Exception {    throw new Exception("Not implemented for PCSoftKMeans, since posterior probs directly computed and density weights not stored!!");  }  /** lookup the instance in the checksum hash   * @param instance instance to be looked up   * @return the index of the cluster to which the instance was assigned, -1 if the instance has not bee clustered   */  protected int lookupInstanceCluster(Instance instance) {    int classIdx = instance.classIndex();    double[] values1 = instance.toDoubleArray();    double checksum = 0;     for (int i = 0; i < values1.length; i++) {      if (i != classIdx) {	checksum += m_checksumCoeffs[i] * values1[i];       }     }    Object list = m_checksumHash.get(new Double(checksum));    if (list != null) {      // go through the list of instances with the same checksum and find the one that is equivalent      ArrayList checksumList = (ArrayList) list;      for (int i = 0; i < checksumList.size(); i++) {	int instanceIdx = ((Integer) checksumList.get(i)).intValue();	Instance listInstance = m_Instances.instance(instanceIdx);	double[] values2 = listInstance.toDoubleArray();	boolean equal = true; 	for (int j = 0; j < values1.length && equal == true; j++) {	  if (j != classIdx) {	    if (values1[j] != values2[j]) {	      equal = false;	    }	  } 	}	if (equal == true) {	  return m_ClusterAssignments[instanceIdx]; 	}      }     }     return -1;   }  /**   * Classifies the instance using the current clustering, without considering constraints   *   * @param instance the instance to be assigned to a cluster   * @return the number of the assigned cluster as an integer   * if the class is enumerated, otherwise the predicted value   * @exception Exception if instance could not be classified   * successfully */  public int assignInstanceToCluster(Instance instance) throws Exception {    int bestCluster = 0;    double bestDistance = Double.POSITIVE_INFINITY;    double bestSimilarity = Double.NEGATIVE_INFINITY;    // lookup the cluster assignment of the instance    int lookupCluster = lookupInstanceCluster(instance);    if (lookupCluster >= 0) {      return lookupCluster;    }    System.err.println("\n\nWARNING!!! Couldn't lookup instance - it wasn't in the clustering!!!\n\n");    System.err.println("\n\n Assuming user wants prediction of new test instance based on clustering model ...\nDoing instance assignment without constraints!!!\n\n");    for (int i = 0; i < m_NumClusters; i++) {      double distance = 0, similarity = 0;      if (!m_objFunDecreasing) {	similarity = m_metric.similarity(instance, m_ClusterCentroids.instance(i));	if (similarity > bestSimilarity) {	  bestSimilarity = similarity;	  bestCluster = i;	}      } else {	distance = m_metric.distance(instance, m_ClusterCentroids.instance(i));	if (distance < bestDistance) {	  bestDistance = distance;	  bestCluster = i;	}      }    }    return bestCluster;  }    /** Set the cannot link constraint weight */  public void setCannotLinkWeight(double w) {    m_CannotLinkWeight = w;  }  /** Return the cannot link constraint weight */  public double getCannotLinkWeight() {    return m_CannotLinkWeight;  }  /** Set the must link constraint weight */  public void setMustLinkWeight(double w) {    m_MustLinkWeight = w;  }  /** Return the must link constraint weight */  public double getMustLinkWeight() {    return m_MustLinkWeight;  }  /** Return the number of clusters */  public int getNumClusters() {    return m_NumClusters;  }  /** A duplicate function to conform to Clusterer abstract class.   * @returns the number of clusters   */  public int numberOfClusters() {    return getNumClusters();  }     /** Set the m_SeedHash */  public void setSeedHash(HashMap seedhash) {    System.err.println("Not implemented here");  }     /**   * Set the random number seed   * @param s the seed   */  public void setRandomSeed (int s) {    m_RandomSeed = s;  }      /** Return the random number seed */  public int getRandomSeed () {    return  m_RandomSeed;  }  /**   * Set the minimum value of the objective function difference required for convergence   * @param objFunConvergenceDifference the minimum value of the objective function difference required for convergence   */  public void setObjFunConvergenceDifference(double objFunConvergenceDifference) {    m_ObjFunConvergenceDifference = objFunConvergenceDifference;  }    /**   * Get the minimum value of the objective function difference required for convergence   * @returns the minimum value of the objective function difference required for convergence   */  public double getObjFunConvergenceDifference() {    return m_ObjFunConvergenceDifference;  }     /** Sets training instances */  public void setInstances(Instances instances) {    m_Instances = instances;    // create the checksum coefficients    m_checksumCoeffs = new double[instances.numAttributes()];    Random r = new Random(instances.numInstances());     for (int i = 0; i < m_checksumCoeffs.length; i++) {      m_checksumCoeffs[i] = r.nextDouble();    }    // hash the instance checksums    m_checksumHash = new HashMap(instances.numInstances());    int classIdx = instances.classIndex();    for (int i = 0; i < instances.numInstances(); i++) {      Instance instance = instances.instance(i);      double[] values = instance.toDoubleArray();      double checksum = 0;      for (int j = 0; j < values.length; j++) {	if (j != classIdx) {	  checksum += m_checksumCoeffs[j] * values[j]; 	}       }      // take care of chaining      Object list = m_checksumHash.get(new Double(checksum));      ArrayList idxList = null;       if (list == null) {	idxList = new ArrayList();	m_checksumHash.put(new Double(checksum), idxList);      } else { // chaining	idxList = (ArrayList) list;      }      idxList.add(new Integer(i));    }   }  /** Return training instances */  public Instances getInstances() {    return m_Instances;  }  /**   * Set the number of clusters to generate   *   * @param n the number of clusters to generate   */  public void setNumClusters(int n) {    m_NumClusters = n;    if (m_verbose) {      System.out.println("Number of clusters: " + n);    }  }  /**   * Set the distance metric   *   * @param s the metric   */  public void setMetric (LearnableMetric m) {    m_metric = m;    String metricName = m_metric.getClass().getName();    System.out.println("Setting m_metric to " + metricName);    m_objFunDecreasing = m.isDistanceBased();  }  /**   * Get the distance metric   *   * @returns the distance metric used   */  public Metric getMetric () {    return m_metric;  }  /**   * Set the KMeans algorithm.  Values other than   * ALGORITHM_SIMPLE or ALGORITHM_SPHERICAL will be ignored   *   * @param algo algorithm type   */  public void setAlgorithm (SelectedTag algo)  {    if (algo.getTags() == TAGS_ALGORITHM) {      if (m_verbose) {	System.out.println("Algorithm: " + algo.getSelectedTag().getReadable());      }      m_Algorithm = algo.getSelectedTag().getID();    }  }  /**   * Get the KMeans algorithm type. Will be one of   * ALGORITHM_SIMPLE or ALGORITHM_SPHERICAL   *   * @returns algorithm type   */  public SelectedTag getAlgorithm ()  {    return new SelectedTag(m_Algorithm, TAGS_ALGORITHM);  }      /** Read the seeds from a hastable, where every key is an instance and every value is:   * the cluster assignment of that instance    * seedVector vector containing seeds   */    public void seedClusterer(HashMap seedHash) {    System.err.println("Not implemented here");  }   /** Prints clusters */  public void printClusters () throws Exception{    ArrayList clusters = getClusters();    for (int i=0; i<clusters.size(); i++) {      Cluster currentCluster = (Cluster) clusters.get(i);      System.out.println("\nCluster " + i + ": " + currentCluster.size() + " instances");      if (currentCluster == null) {	System.out.println("(empty)");      }      else {	for (int j=0; j<currentCluster.size(); j++) {	  Instance instance = (Instance) currentCluster.get(j);		  System.out.println("Instance: " + instance);	}      }    }  }  /**   * Computes the clusters from the cluster assignments, for external access   *    * @exception Exception if clusters could not be computed successfully   */      public ArrayList getClusters() throws Exception {    m_Clusters = new ArrayList();    Cluster [] clusterArray = new Cluster[m_NumClusters];    for (int i=0; i < m_Instances.numInstances(); i++) {	Instance inst = m_Instances.instance(i);	if(clusterArray[m_ClusterAssignments[i]] == null)	   clusterArray[m_ClusterAssignments[i]] = new Cluster();	clusterArray[m_ClusterAssignments[i]].add(inst, 1);    }    for (int j =0; j< m_NumClusters; j++)       m_Clusters.add(clusterArray[j]);    return m_Clusters;  }  /**   * Computes the clusters from the cluster assignments, for external access   *    * @exception Exception if clusters could not be computed successfully   */      public HashMap[] getIndexClusters() throws Exception {//      m_IndexClusters = new HashMap[m_NumClusters];//      for (int j=0; j < m_NumClusters; j++) { //        m_IndexClusters[j] = new HashMap();//        for (int i=0; i < m_Instances.numInstances(); i++) {//  	m_IndexClusters[j].put(new Integer(i),new Double(m_ClusterDistribution[i][j]));//        }//      }//      return m_IndexClusters;    return null;  }  public Enumeration listOptions () {    Vector newVector = new Vector(10);     newVector.addElement(new Option("\tnumber of clusters (default = 3)." 				    , "N", 1, "-N <num>"));     newVector.addElement(new Option("\trandom number seed (default 1)"				     , "R", 1, "-R <num>"));     newVector.addElement(new Option("\tperform no seeding (default false)"				     , "NS", 1, "-NS"));     newVector.addElement(new Option("\tperform active learning (default false)"				     , "A", 1, "-A"));     newVector.addElement(new Option("\tphase two of active learning is random (default false)"				     , "P2", 1, "-P2"));     newVector.addElement(new Option("\tdo only Explore phase in active learning (default false)"				     , "E", 1, "-E"));     newVector.addElement(new Option("\tmetric type (default WeightedEuclidean)"				     , "M", 1, "-M <string> (WeightedEuclidean or WeightedDotP)"));          newVector.addElement(new Option("\tconstraints file"				     , "C", 1, "-C <string> (each line is of the form \"firstID\\tsecondID\\t<+1/-1>\", where +1=>must-link, -1=>cannot-link)"));     newVector.addElement(new Option("\tmust link weight (default 1)"				     , "ML", 1, "-ML <double>"));     newVector.addElement(new Option("\tcannot link weight (default 1)"				     , "CL", 1, "-CL <double>"));     newVector.addElement(new Option("\talgorithm type (default Simple)"				     , "A", 1, "-A <string> (Simple => Simple-KMeans, Spherical => Spherical-KMeans)"));

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?