⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 checkclassifier.java

📁 :<<数据挖掘--实用机器学习技术及java实现>>一书的配套源程序
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
			      numericPredictor ? 1 : 0, 			      numClasses, 			      numericClass);      test = makeTestDataset(24, numTest,			     nominalPredictor ? 2 : 0,			     numericPredictor ? 1 : 0, 			     numClasses, 			     numericClass);      if (nominalPredictor) {	train.deleteAttributeAt(0);	test.deleteAttributeAt(0);      }      if (missingLevel > 0) {	addMissing(train, missingLevel, predictorMissing, classMissing);	addMissing(test, Math.min(missingLevel, 50), predictorMissing, 		   classMissing);      }      classifier = Classifier.makeCopies(getClassifier(), 1)[0];      evaluation = new Evaluation(train);    } catch (Exception ex) {      throw new Error("Error setting up for tests: " + ex.getMessage());    }    try {      Instances trainCopy = new Instances(train);      Instances testCopy = new Instances(test);      classifier.buildClassifier(trainCopy);      compareDatasets(train, trainCopy);      built = true;      testWRTZeroR(classifier, evaluation, trainCopy, testCopy);      compareDatasets(test, testCopy);      System.out.println("yes");      return true;    } catch (Exception ex) {      System.out.println("no");      if (m_Debug) {	System.out.println("\n=== Full Report ===");	System.out.print("Problem during");	if (built) {	  System.out.print(" testing");	} else {	  System.out.print(" training");	}	System.out.println(": " + ex.getMessage() + "\n");	System.out.println("Here are the datasets:\n");	System.out.println("=== Train Dataset ===\n"			   + train.toString() + "\n");	System.out.println("=== Test Dataset ===\n"			   + test.toString() + "\n\n");      }    }    return false;  }  /**   * Runs a text on the datasets with the given characteristics.   */  protected boolean runBasicTest(boolean nominalPredictor,				 boolean numericPredictor, 				 boolean numericClass,				 int missingLevel,				 boolean predictorMissing,				 boolean classMissing,				 int numTrain,				 int numTest,				 int numClasses,				 FastVector accepts) {    Instances train = null;    Instances test = null;    Classifier classifier = null;    Evaluation evaluation = null;    boolean built = false;    try {      train = makeTestDataset(42, numTrain, 			      nominalPredictor ? 2 : 0,			      numericPredictor ? 1 : 0, 			      numClasses, 			      numericClass);      test = makeTestDataset(24, numTest,			     nominalPredictor ? 2 : 0,			     numericPredictor ? 1 : 0, 			     numClasses, 			     numericClass);      if (nominalPredictor) {	train.deleteAttributeAt(0);	test.deleteAttributeAt(0);      }      if (missingLevel > 0) {	addMissing(train, missingLevel, predictorMissing, classMissing);	addMissing(test, Math.min(missingLevel, 50), predictorMissing, 		   classMissing);      }      classifier = Classifier.makeCopies(getClassifier(), 1)[0];      evaluation = new Evaluation(train);    } catch (Exception ex) {      throw new Error("Error setting up for tests: " + ex.getMessage());    }    try {      classifier.buildClassifier(train);      built = true;      if (!testWRTZeroR(classifier, evaluation, train, test)) {	throw new Exception("Scheme performs worse than ZeroR");      }      System.out.println("yes");      return true;    } catch (Exception ex) {      boolean acceptable = false;      String msg = ex.getMessage().toLowerCase();      if (msg.indexOf("worse than zeror") >= 0) {	System.out.println("warning: performs worse than ZeroR");      } else {	for (int i = 0; i < accepts.size(); i++) {	  if (msg.indexOf((String)accepts.elementAt(i)) >= 0) {	    acceptable = true;	  }	}	System.out.println("no" + (acceptable ? " (OK error message)" : ""));      }      if (m_Debug) {	System.out.println("\n=== Full Report ===");	System.out.print("Problem during");	if (built) {	  System.out.print(" testing");	} else {	  System.out.print(" training");	}	System.out.println(": " + ex.getMessage() + "\n");	if (!acceptable) {	  if (accepts.size() > 0) {	    System.out.print("Error message doesn't mention ");	    for (int i = 0; i < accepts.size(); i++) {	      if (i != 0) {		System.out.print(" or ");	      }	      System.out.print('"' + (String)accepts.elementAt(i) + '"');	    }	  }	  System.out.println("here are the datasets:\n");	  System.out.println("=== Train Dataset ===\n"			     + train.toString() + "\n");	  System.out.println("=== Test Dataset ===\n"			     + test.toString() + "\n\n");	}      }    }    return false;  }  /**   * Determine whether the scheme performs worse than ZeroR during testing   *   * @param classifier the pre-trained classifier   * @param evaluation the classifier evaluation object   * @param train the training data   * @param test the test data   * @return true if the scheme performs better than ZeroR   * @exception Exception if there was a problem during the scheme's testing   */  protected boolean testWRTZeroR(Classifier classifier,				 Evaluation evaluation,				 Instances train, Instances test)     throws Exception {	     evaluation.evaluateModel(classifier, test);    try {      // Tested OK, compare with ZeroR      Classifier zeroR = new weka.classifiers.ZeroR();      zeroR.buildClassifier(train);      Evaluation zeroREval = new Evaluation(train);      zeroREval.evaluateModel(zeroR, test);      return Utils.grOrEq(zeroREval.errorRate(), evaluation.errorRate());    } catch (Exception ex) {      throw new Error("Problem determining ZeroR performance: "		      + ex.getMessage());    }  }  /**   * Compare two datasets to see if they differ.   *   * @param data1 one set of instances   * @param data2 the other set of instances   * @exception Exception if the datasets differ   */  protected void compareDatasets(Instances data1, Instances data2)    throws Exception {    if (!data2.equalHeaders(data1)) {      throw new Exception("header has been modified");    }    if (!(data2.numInstances() == data1.numInstances())) {      throw new Exception("number of instances has changed");    }    for (int i = 0; i < data2.numInstances(); i++) {      Instance orig = data1.instance(i);      Instance copy = data2.instance(i);      for (int j = 0; j < orig.numAttributes(); j++) {	if (orig.isMissing(j)) {	  if (!copy.isMissing(j)) {	    throw new Exception("instances have changed");	  }	} else if (orig.value(j) != copy.value(j)) {	    throw new Exception("instances have changed");	}	if (orig.weight() != copy.weight()) {	  throw new Exception("instance weights have changed");	}	        }    }  }  /**   * Add missing values to a dataset.   *   * @param data the instances to add missing values to   * @param level the level of missing values to add (if positive, this   * is the probability that a value will be set to missing, if negative   * all but one value will be set to missing (not yet implemented))   * @param predictorMissing if true, predictor attributes will be modified   * @param classMissing if true, the class attribute will be modified   */  protected void addMissing(Instances data, int level,			    boolean predictorMissing, boolean classMissing) {    int classIndex = data.classIndex();    Random random = new Random(1);    for (int i = 0; i < data.numInstances(); i++) {      Instance current = data.instance(i);      for (int j = 0; j < data.numAttributes(); j++) {	if (((j == classIndex) && classMissing) ||	    ((j != classIndex) && predictorMissing)) {	  if (Math.abs(random.nextInt()) % 100 < level)	    current.setMissing(j);	}      }    }  }  /**   * Make a simple set of instances, which can later be modified   * for use in specific tests.   *   * @param seed the random number seed   * @param numInstances the number of instances to generate   * @param numNominal the number of nominal attributes   * @param numNumeric the number of numeric attributes   * @param numClasses the number of classes (if nominal class)   * @param numericClass true if the class attribute should be numeric   * @return the test dataset   * @exception Exception if the dataset couldn't be generated   */  protected Instances makeTestDataset(int seed, int numInstances, 				      int numNominal, int numNumeric, 				      int numClasses, boolean numericClass)    throws Exception {    int numAttributes = numNominal + numNumeric + 1;    Random random = new Random(seed);    FastVector attributes = new FastVector(numAttributes);    // Add Nominal attributes    for (int i = 0; i < numNominal; i++) {      FastVector nomStrings = new FastVector(i + 1);      for(int j = 0; j <= i; j++) {	nomStrings.addElement("a" + (i + 1) + "l" + (j + 1));      }      attributes.addElement(new Attribute("Nominal" + (i + 1), nomStrings));    }    // Add Numeric attributes    for (int i = 0; i < numNumeric; i++) {      attributes.addElement(new Attribute("Numeric" + (i + 1)));    }    // TODO: Add some String attributes...    // Add class attribute    if (numericClass) {      attributes.addElement(new Attribute("Class"));    } else {      FastVector nomStrings = new FastVector();      for(int j = 0; j <numClasses; j++) {	nomStrings.addElement("cl" + (j + 1));      }      attributes.addElement(new Attribute("Class",nomStrings));    }        Instances data = new Instances("CheckSet", attributes, numInstances);    data.setClassIndex(data.numAttributes() - 1);    // Generate the instances    for (int i = 0; i < numInstances; i++) {      Instance current = new Instance(numAttributes);      current.setDataset(data);      if (numericClass) {		current.setClassValue(random.nextFloat() * 0.25			      + Math.abs(random.nextInt())			      % Math.max(2, numNominal));      } else {	current.setClassValue(Math.abs(random.nextInt()) % data.numClasses());      }      double classVal = current.classValue();      double newVal = 0;      for (int j = 0; j < numAttributes - 1; j++) {	switch (data.attribute(j).type()) {	case Attribute.NUMERIC:	  newVal = classVal * 4 + random.nextFloat() * 1 - 0.5;	  current.setValue(j, newVal);	  break;	case Attribute.NOMINAL:	  if (random.nextFloat() < 0.2) {	    newVal = Math.abs(random.nextInt())	      % data.attribute(j).numValues();	  } else {	    newVal = ((int)classVal) % data.attribute(j).numValues();	  }	  current.setValue(j, newVal);	  break;	case Attribute.STRING:	  System.err.println("Huh? this bit isn't implemented yet");	  break;	}      }      data.add(current);    }    return data;  }  /**   * Print out a short summary string for the dataset characteristics   *   * @param nominalPredictor true if nominal predictor attributes are present   * @param numericPredictor true if numeric predictor attributes are present   * @param numericClass true if the class attribute is numeric   */  protected void printAttributeSummary(boolean nominalPredictor, 				       boolean numericPredictor, 				       boolean numericClass) {        if (numericClass) {      System.out.print(" (numeric class,");    } else {      System.out.print(" (nominal class,");    }    if (numericPredictor) {      System.out.print(" numeric");      if (nominalPredictor) {	System.out.print(" &");      }    }    if (nominalPredictor) {      System.out.print(" nominal");    }    System.out.print(" predictors)");  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -