📄 clusterevaluation.java

📁 一个数据挖掘系统的源码
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
上一页 1 2 34

      for (int j = 0; j < test.numInstances(); j++) {
	try {
	  double temp = ((DistributionClusterer)clusterer).
	    densityForInstance(test.instance(j));
	  //	double temp = Utils.sum(tempDist);

	  if (temp > 0) {
	    foldAv += Math.log(temp);
	  }
	} catch (Exception ex) {
	  // unclustered instances
	}
      }

      CvAv += (foldAv/test.numInstances());
    }

    CvAv /= numFolds;
    CvString.append("\n" + numFolds
		    + " fold CV Log Likelihood: "
		    + Utils.doubleToString(CvAv, 6, 4)
		    + "\n");
    return  CvString.toString();
  }


  // ===============
  // Private methods
  // ===============
  /**
   * Print the cluster statistics for either the training
   * or the testing data.
   *
   * @param clusterer the clusterer to use for generating statistics.
   * @return a string containing cluster statistics.
   * @exception if statistics can't be generated.
   */
  private static String printClusterStats (Clusterer clusterer,
					   String fileName)
    throws Exception
  {
    StringBuffer text = new StringBuffer();
    int i = 0;
    int cnum;
    double loglk = 0.0;
    double[] dist;
    double temp;
    int cc = clusterer.numberOfClusters();
    double[] instanceStats = new double[cc];
    int unclusteredInstances = 0;

    if (fileName.length() != 0) {
      BufferedReader inStream = null;

      try {
	inStream = new BufferedReader(new FileReader(fileName));
      }
      catch (Exception e) {
	throw  new Exception("Can't open file " + e.getMessage() + '.');
      }

      Instances inst = new Instances(inStream, 1);

      while (inst.readInstance(inStream)) {
	try {
	  cnum = clusterer.clusterInstance(inst.instance(0));

	  if (clusterer instanceof DistributionClusterer) {
	    temp = ((DistributionClusterer)clusterer).
	      densityForInstance(inst.instance(0));
	    //	    temp = Utils.sum(dist);

	    if (temp > 0) {
	      loglk += Math.log(temp);
	    }
	  }
	  instanceStats[cnum]++;
	}
	catch (Exception e) {
	  unclusteredInstances++;
	}
	inst.delete(0);
	i++;
      }

      /*
      // count the actual number of used clusters
      int count = 0;
      for (i = 0; i < cc; i++) {
	if (instanceStats[i] > 0) {
	  count++;
	}
      }
      if (count > 0) {
	double [] tempStats = new double [count];
	count=0;
	for (i=0;i<cc;i++) {
	  if (instanceStats[i] > 0) {
	    tempStats[count++] = instanceStats[i];
	}
	}
	instanceStats = tempStats;
	cc = instanceStats.length;
	} */

      int clustFieldWidth = (int)((Math.log(cc)/Math.log(10))+1);
      int numInstFieldWidth = (int)((Math.log(i)/Math.log(10))+1);
      double sum = Utils.sum(instanceStats);
      loglk /= sum;
      text.append("Clustered Instances\n");

      for (i = 0; i < cc; i++) {
	if (instanceStats[i] > 0) {
	  text.append(Utils.doubleToString((double)i,
					   clustFieldWidth, 0)
		      + "      "
		      + Utils.doubleToString(instanceStats[i],
					     numInstFieldWidth, 0)
		      + " ("
		    + Utils.doubleToString((instanceStats[i]/sum*100.0)
					   , 3, 0) + "%)\n");
	}
      }
      if (unclusteredInstances > 0) {
	text.append("\nUnclustered Instances : "+unclusteredInstances);
      }

      if (clusterer instanceof DistributionClusterer) {
	text.append("\n\nLog likelihood: "
		    + Utils.doubleToString(loglk, 1, 5)
		    + "\n");
      }
    }

    return  text.toString();
  }



  /**
   * Print the cluster statistics for either the training
   * or the testing data.
   *
   * @param clusterer the clusterer to use for generating statistics.
   * @return a string containing cluster statistics.
   * @exception if statistics can't be generated.
   */
  private static String printClusterStats (Clusterer clusterer, Instances data)
  throws Exception {
    StringBuffer text = new StringBuffer();
    int i = 0;
    int cnum;
    double loglk = 0.0;
    double[] dist;
    double temp;
    int cc = clusterer.numberOfClusters();
    double[] instanceStats = new double[cc];
    int unclusteredInstances = 0;

      Instances inst = data;
      Enumeration instEnumeration = inst.enumerateInstances();
      while (instEnumeration.hasMoreElements()) {
	try {
	  cnum = clusterer.clusterInstance(inst.instance(0));

	  if (clusterer instanceof DistributionClusterer) {
	    temp = ((DistributionClusterer)clusterer).
	      densityForInstance(inst.instance(0));
	    //	    temp = Utils.sum(dist);

	    if (temp > 0) {
	      loglk += Math.log(temp);
	    }
	  }
	  instanceStats[cnum]++;
	}
	catch (Exception e) {
	  unclusteredInstances++;
	}
	inst.delete(0);
	i++;
      }

      /*
      // count the actual number of used clusters
      int count = 0;
      for (i = 0; i < cc; i++) {
	if (instanceStats[i] > 0) {
	  count++;
	}
      }
      if (count > 0) {
	double [] tempStats = new double [count];
	count=0;
	for (i=0;i<cc;i++) {
	  if (instanceStats[i] > 0) {
	    tempStats[count++] = instanceStats[i];
	}
	}
	instanceStats = tempStats;
	cc = instanceStats.length;
	} */

      int clustFieldWidth = (int)((Math.log(cc)/Math.log(10))+1);
      int numInstFieldWidth = (int)((Math.log(i)/Math.log(10))+1);
      double sum = Utils.sum(instanceStats);
      loglk /= sum;
      text.append("Clustered Instances\n");

      for (i = 0; i < cc; i++) {
	if (instanceStats[i] > 0) {
	  text.append(Utils.doubleToString((double)i,
					   clustFieldWidth, 0)
		      + "      "
		      + Utils.doubleToString(instanceStats[i],
					     numInstFieldWidth, 0)
		      + " ("
		    + Utils.doubleToString((instanceStats[i]/sum*100.0)
					   , 3, 0) + "%)\n");
	}
      }
      if (unclusteredInstances > 0) {
	text.append("\nUnclustered Instances : "+unclusteredInstances);
      }

      if (clusterer instanceof DistributionClusterer) {
	text.append("\n\nLog likelihood: "
		    + Utils.doubleToString(loglk, 1, 5)
		    + "\n");
      }
    return  text.toString();
  }


  /**
   * Print the cluster assignments for either the training
   * or the testing data.
   *
   * @param clusterer the clusterer to use for cluster assignments
   * @return a string containing the instance indexes and cluster assigns.
   * @exception if cluster assignments can't be printed
   */
  private static String printClusterings (Clusterer clusterer, Instances train,
					  String testFileName, Range attributesToOutput)
    throws Exception
  {
    StringBuffer text = new StringBuffer();
    int i = 0;
    int cnum;

    if (testFileName.length() != 0) {
      BufferedReader testStream = null;

      try {
	testStream = new BufferedReader(new FileReader(testFileName));
      }
      catch (Exception e) {
	throw  new Exception("Can't open file " + e.getMessage() + '.');
      }

      Instances test = new Instances(testStream, 1);

      while (test.readInstance(testStream)) {
	try {
	  cnum = clusterer.clusterInstance(test.instance(0));

	  text.append(i + " " + cnum + " "
		      + attributeValuesString(test.instance(0), attributesToOutput) + "\n");
	}
	catch (Exception e) {
	  /*	  throw  new Exception('\n' + "Unable to cluster instance\n"
		  + e.getMessage()); */
	  text.append(i + " Unclustered "
		      + attributeValuesString(test.instance(0), attributesToOutput) + "\n");
	}
	test.delete(0);
	i++;
      }
    }
    else// output for training data
      {
	for (i = 0; i < train.numInstances(); i++) {
	  try {
	    cnum = clusterer.clusterInstance(train.instance(i));

	    text.append(i + " " + cnum + " "
			+ attributeValuesString(train.instance(i), attributesToOutput)
			+ "\n");
	  }
	  catch (Exception e) {
	    /*  throw  new Exception('\n'
				 + "Unable to cluster instance\n"
				 + e.getMessage()); */
	    text.append(i + " Unclustered "
			+ attributeValuesString(train.instance(i), attributesToOutput)
			+ "\n");
	  }
	}
      }

    return  text.toString();
  }

  /**
   * Builds a string listing the attribute values in a specified range of indices,
   * separated by commas and enclosed in brackets.
   *
   * @param instance the instance to print the values from
   * @param attributes the range of the attributes to list
   * @return a string listing values of the attributes in the range
   */
  private static String attributeValuesString(Instance instance, Range attRange) {
    StringBuffer text = new StringBuffer();
    if (attRange != null) {
      boolean firstOutput = true;
      attRange.setUpper(instance.numAttributes() - 1);
      for (int i=0; i<instance.numAttributes(); i++)
	if (attRange.isInRange(i)) {
	  if (firstOutput) text.append("(");
	  else text.append(",");
	  text.append(instance.toString(i));
	  firstOutput = false;
	}
      if (!firstOutput) text.append(")");
    }
    return text.toString();
  }

  /**
   * Make up the help string giving all the command line options
   *
   * @param clusterer the clusterer to include options for
   * @return a string detailing the valid command line options
   */
  private static String makeOptionString (Clusterer clusterer) {
    StringBuffer optionsText = new StringBuffer("");
    // General options
    optionsText.append("\n\nGeneral options:\n\n");
    optionsText.append("-t <name of training file>\n");
    optionsText.append("\tSets training file.\n");
    optionsText.append("-T <name of test file>\n");
    optionsText.append("-l <name of input file>\n");
    optionsText.append("\tSets model input file.\n");
    optionsText.append("-d <name of output file>\n");
    optionsText.append("\tSets model output file.\n");
    optionsText.append("-p <attribute range>\n");
    optionsText.append("\tOutput predictions. Predictions are for "
		       + "training file"
		       + "\n\tif only training file is specified,"
		       + "\n\totherwise predictions are for the test file."
		       + "\n\tThe range specifies attribute values to be output"
		       + "\n\twith the predictions. Use '-p 0' for none.\n");
    optionsText.append("-x <number of folds>\n");
    optionsText.append("\tOnly Distribution Clusterers can be cross "
		       + "validated.\n");
    optionsText.append("-s <random number seed>\n");
    optionsText.append("-c <class index>\n");
    optionsText.append("\tSet class attribute. If supplied, class is ignored");
    optionsText.append("\n\tduring clustering but is used in a classes to");
    optionsText.append("\n\tclusters evaluation.\n");

    // Get scheme-specific options
    if (clusterer instanceof OptionHandler) {
      optionsText.append("\nOptions specific to "
			 + clusterer.getClass().getName() + ":\n\n");
      Enumeration enum = ((OptionHandler)clusterer).listOptions();

      while (enum.hasMoreElements()) {
	Option option = (Option)enum.nextElement();
	optionsText.append(option.synopsis() + '\n');
	optionsText.append(option.description() + "\n");
      }
    }

    return  optionsText.toString();
  }


  /**
   * Main method for testing this class.
   *
   * @param args the options
   */
  public static void main (String[] args) {
    try {
      if (args.length == 0) {
	throw  new Exception("The first argument must be the name of a "
			     + "clusterer");
      }

      String ClustererString = args[0];
      args[0] = "";
      Clusterer newClusterer = Clusterer.forName(ClustererString, null);
      System.out.println(evaluateClusterer(newClusterer, args));
    }
    catch (Exception e) {
      log.error(e.getMessage());
    }
  }

}
上一页 1 2 34
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -