pcsoftkmeans.java

来自「wekaUT是 university texas austin 开发的基于wek」· Java 代码 · 共 1,952 行 · 第 1/5 页

JAVA
1,952
字号
     return  newVector.elements();  }  public String [] getOptions ()  {    String[] options = new String[80];    int current = 0;        if (!m_Seedable) {      options[current++] = "-NS";    }        options[current++] = "-A";    options[current++] = "" + getAlgorithm().getSelectedTag().getID();    options[current++] = "-N";    options[current++] = "" + getNumClusters();    options[current++] = "-R";    options[current++] = "" + getRandomSeed();    options[current++] = "-ML";    options[current++] = "" + m_MustLinkWeight;    options[current++] = "-CL";    options[current++] = "" + m_CannotLinkWeight;        options[current++] = "-M";    options[current++] = Utils.removeSubstring(m_metric.getClass().getName(), "weka.core.metrics.");    if (m_metric instanceof OptionHandler) {      String[] metricOptions = ((OptionHandler)m_metric).getOptions();      for (int i = 0; i < metricOptions.length; i++) {	options[current++] = metricOptions[i];      }    }         while (current < options.length) {      options[current++] = "";    }        return  options;  }    /**   * Parses a given list of options.   * @param options the list of options as an array of strings   * @exception Exception if an option is not supported   *   **/  public void setOptions (String[] options)    throws Exception {    String optionString = Utils.getOption('N', options);    if (optionString.length() != 0) {      setNumClusters(Integer.parseInt(optionString));    }    optionString = Utils.getOption('R', options);    if (optionString.length() != 0) {      setRandomSeed(Integer.parseInt(optionString));    }    optionString = Utils.getOption('A', options);    if (optionString.length() != 0) {      setAlgorithm(new SelectedTag(Integer.parseInt(optionString), TAGS_ALGORITHM));    }    optionString = Utils.getOption('M', options);    if (optionString.length() != 0) {      String[] metricSpec = Utils.splitOptions(optionString);      String metricName = metricSpec[0];       metricSpec[0] = "";      setMetric((LearnableMetric) LearnableMetric.forName(metricName, metricSpec));    }  }  /**      * return a string describing this clusterer   *   * @return a description of the clusterer as a string   */  public String toString() {    StringBuffer temp = new StringBuffer();    temp.append("\nkMeans\n======\n");    temp.append("\nNumber of iterations: " + m_Iterations+"\n");//      temp.append("\nCluster centroids:\n");//      for (int i = 0; i < m_NumClusters; i++) {//        temp.append("\nCluster "+i+"\n\t");//      }//      temp.append("\n");    return temp.toString();  }  /**   * set the verbosity level of the clusterer   * @param verbose messages on(true) or off (false)   */  public void setVerbose (boolean verbose) {    m_verbose = verbose;  }  /**   * get the verbosity level of the clusterer   * @return messages on(true) or off (false)   */  public boolean getVerbose () {    return m_verbose;  }       /**   * Train the clusterer using specified parameters   *   * @param instances Instances to be used for training   */  public void trainClusterer (Instances instances) throws Exception {    if (m_metric instanceof LearnableMetric) {      if (((LearnableMetric)m_metric).getTrainable()) {	((LearnableMetric)m_metric).learnMetric(instances);      }      else {	throw new Exception ("Metric is not trainable");      }    }    else {      throw new Exception ("Metric is not trainable");    }  }  /** Normalizes Instance or SparseInstance   *   * @author Sugato Basu   * @param inst Instance to be normalized   */  public void normalize(Instance inst) throws Exception {    if (inst instanceof SparseInstance) {      normalizeSparseInstance(inst);    }    else {      normalizeInstance(inst);    }  }  /** Normalizes the values of a normal Instance in L2 norm   *   * @author Sugato Basu   * @param inst Instance to be normalized   */  public void normalizeInstance(Instance inst) throws Exception{    double norm = 0;    double values [] = inst.toDoubleArray();    if (inst instanceof SparseInstance) {      System.err.println("Is SparseInstance, using normalizeSparseInstance function instead");      normalizeSparseInstance(inst);    }        for (int i=0; i<values.length; i++) {      if (i != inst.classIndex()) { // don't normalize the class index 	norm += values[i] * values[i];      }    }    norm = Math.sqrt(norm);    for (int i=0; i<values.length; i++) {      if (i != inst.classIndex()) { // don't normalize the class index 	values[i] /= norm;      }    }    inst.setValueArray(values);  }  /** Normalizes the values of a SparseInstance in L2 norm   *   * @author Sugato Basu   * @param inst SparseInstance to be normalized   */  public void normalizeSparseInstance(Instance inst) throws Exception{    double norm=0;    int length = inst.numValues();    if (!(inst instanceof SparseInstance)) {      System.err.println("Not SparseInstance, using normalizeInstance function instead");      normalizeInstance(inst);    }    for (int i=0; i<length; i++) {      if (inst.index(i) != inst.classIndex()) { // don't normalize the class index	norm += inst.valueSparse(i) * inst.valueSparse(i);      }    }    norm = Math.sqrt(norm);    for (int i=0; i<length; i++) { // don't normalize the class index      if (inst.index(i) != inst.classIndex()) {	inst.setValueSparse(i, inst.valueSparse(i)/norm);      }    }  }    /** Fast version of meanOrMode - streamlined from Instances.meanOrMode for efficiency    *  Does not check for missing attributes, assumes numeric attributes, assumes Sparse instances   */  protected double[] meanOrMode(Instances insts) {    int numAttributes = insts.numAttributes();    double [] value = new double[numAttributes];    double weight = 0;        for (int i=0; i<numAttributes; i++) {      value[i] = 0;    }    for (int j=0; j<insts.numInstances(); j++) {      SparseInstance inst = (SparseInstance) (insts.instance(j));      weight += inst.weight();      for (int i=0; i<inst.numValues(); i++) {	int indexOfIndex = inst.index(i);	value[indexOfIndex]  += inst.weight() * inst.valueSparse(i);      }    }        if (Utils.eq(weight, 0)) {      for (int k=0; k<numAttributes; k++) {	value[k] = 0;      }    }    else {      for (int k=0; k<numAttributes; k++) {	value[k] = value[k] / weight;      }    }    return value;  }  /**   * Gets a Double representing the current date and time.   * eg: 1:46pm on 20/5/1999 -> 19990520.1346   *   * @return a value of type Double   */  public static Double getTimeStamp() {    Calendar now = Calendar.getInstance(TimeZone.getTimeZone("UTC"));    double timestamp = now.getTimeInMillis();    return new Double(timestamp);  }  /**   * Main method for testing this class.   *   */  public static void main (String[] args) {    try {          testCase();      //System.out.println(ClusterEvaluation.evaluateClusterer(new PCSoftKMeans(), args));    }    catch (Exception e) {      System.out.println(e.getMessage());      e.printStackTrace();    }  }  protected static void testCase() {    try {      //String dataset = new String("lowd");      String dataset = new String("highd");      if (dataset.equals("lowd")) {	//////// Low-D data	String datafile = "/u/ml/software/weka-latest/data/iris.arff";		// set up the data	FileReader reader = new FileReader (datafile);	Instances data = new Instances (reader);		// Make the last attribute be the class 	int classIndex = data.numAttributes()-1;	data.setClassIndex(classIndex); // starts with 0	System.out.println("ClassIndex is: " + classIndex);		// Remove the class labels before clustering	Instances clusterData = new Instances(data);	clusterData.deleteClassAttribute();			// create random constraints from the labeled training data	int numPairs = 10, num=0;	ArrayList labeledPair = new ArrayList(numPairs);	Random rand = new Random(42);	System.out.println("Initializing constraint matrix:");	while (num < numPairs) {	  int i = (int) (data.numInstances()*rand.nextFloat());		  int j = (int) (data.numInstances()*rand.nextFloat());	  int first = (i<j)? i:j;	  int second = (i>=j)? i:j;	  int linkType = (data.instance(first).classValue() == 			  data.instance(second).classValue())? 	    InstancePair.MUST_LINK:InstancePair.CANNOT_LINK;	  InstancePair pair = new InstancePair(first, second, linkType);	  if (first!=second && !labeledPair.contains(pair)) {	    labeledPair.add(pair);	    num++;	  }	}	System.out.println("Finished initializing constraints");		// create clusterer	PCSoftKMeans pckmeans = new PCSoftKMeans();	System.out.println("\nClustering the iris data using PCKmeans...\n");	pckmeans.setAlgorithm(new SelectedTag(ALGORITHM_SIMPLE, TAGS_ALGORITHM));	WeightedEuclidean euclidean = new WeightedEuclidean();	euclidean.setExternal(false);	pckmeans.setMetric(euclidean);	pckmeans.setVerbose(false);	pckmeans.setSeedable(false);	pckmeans.setNumClusters(data.numClasses());	// do clustering	pckmeans.buildClusterer(labeledPair, clusterData, data, data.numInstances());	pckmeans.printIndexClusters();      }      else if (dataset.equals("highd")) {	//////// Newsgroup data	String datafile = "/u/ml/data/CCSfiles/arffFromCCS/different-100_fromCCS.arff";		// set up the data	FileReader reader = new FileReader (datafile);	Instances data = new Instances (reader);		// Make the last attribute be the class 	int classIndex = data.numAttributes()-1;	data.setClassIndex(classIndex); // starts with 0	System.out.println("ClassIndex is: " + classIndex);		// Remove the class labels before clustering	Instances clusterData = new Instances(data);	clusterData.deleteClassAttribute();		// create random constraints from the labeled training data	int numPairs = 100, num=0;	ArrayList labeledPair = new ArrayList(numPairs);	Random rand = new Random(42); 	System.out.println("Initializing constraint matrix:");	while (num < numPairs) {	  int i = (int) (data.numInstances()*rand.nextFloat());		  int j = (int) (data.numInstances()*rand.nextFloat());	  int first = (i<j)? i:j;	  int second = (i>=j)? i:j;	  int linkType = (data.instance(first).classValue() == 			  data.instance(second).classValue())? 	    InstancePair.MUST_LINK:InstancePair.CANNOT_LINK;	  InstancePair pair = new InstancePair(first, second, linkType);	  if (first!=second && !labeledPair.contains(pair)) {	    labeledPair.add(pair);	    num++;	  }	}	System.out.println("Finished initializing constraints");		// create clusterer	PCSoftKMeans pckmeans = new PCSoftKMeans();	System.out.println("\nClustering the news data using PCKmeans...\n");	pckmeans.resetClusterer();	pckmeans.setAlgorithm(new SelectedTag(ALGORITHM_SPHERICAL, TAGS_ALGORITHM));	WeightedDotP dotp = new WeightedDotP();	dotp.setExternal(false);	dotp.setLengthNormalized(true);	pckmeans.setMetric(dotp);	pckmeans.setVerbose(false);	pckmeans.setSeedable(true);	pckmeans.setNumClusters(data.numClasses());	// do clustering	pckmeans.buildClusterer(labeledPair, clusterData, data, clusterData.numInstances());	pckmeans.printIndexClusters();      }    }    catch (Exception e) {      e.printStackTrace();    }  }}// TODO: Add init using farthest first

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?