⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 checkclassifier.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
			     numericPredictor ? 1 : 0, 
			     numClasses, 
			     numericClass);
      if (nominalPredictor) {
	train.deleteAttributeAt(0);
	test.deleteAttributeAt(0);
      }
      if (missingLevel > 0) {
	addMissing(train, missingLevel, predictorMissing, classMissing);
	addMissing(test, Math.min(missingLevel, 50), predictorMissing, 
		   classMissing);
      }
      classifier = Classifier.makeCopies(getClassifier(), 1)[0];
      evaluation = new Evaluation(train);
    } catch (Exception ex) {
      throw new Error("Error setting up for tests: " + ex.getMessage());
    }
    try {
      Instances trainCopy = new Instances(train);
      Instances testCopy = new Instances(test);
      classifier.buildClassifier(trainCopy);
      compareDatasets(train, trainCopy);
      built = true;
      testWRTZeroR(classifier, evaluation, trainCopy, testCopy);
      compareDatasets(test, testCopy);
      System.out.println("yes");
      return true;
    } catch (Exception ex) {
      System.out.println("no");
      if (m_Debug) {
	System.out.println("\n=== Full Report ===");
	System.out.print("Problem during");
	if (built) {
	  System.out.print(" testing");
	} else {
	  System.out.print(" training");
	}
	System.out.println(": " + ex.getMessage() + "\n");
	System.out.println("Here are the datasets:\n");
	System.out.println("=== Train Dataset ===\n"
			   + train.toString() + "\n");
	System.out.println("=== Test Dataset ===\n"
			   + test.toString() + "\n\n");
      }
    }
    return false;
  }


  /**
   * Runs a text on the datasets with the given characteristics.
   */
  protected boolean runBasicTest(boolean nominalPredictor,
				 boolean numericPredictor, 
				 boolean numericClass,
				 int missingLevel,
				 boolean predictorMissing,
				 boolean classMissing,
				 int numTrain,
				 int numTest,
				 int numClasses,
				 FastVector accepts) {

    Instances train = null;
    Instances test = null;
    Classifier classifier = null;
    Evaluation evaluation = null;
    boolean built = false;
    try {
      train = makeTestDataset(42, numTrain, 
			      nominalPredictor ? 2 : 0,
			      numericPredictor ? 1 : 0, 
			      numClasses, 
			      numericClass);
      test = makeTestDataset(24, numTest,
			     nominalPredictor ? 2 : 0,
			     numericPredictor ? 1 : 0, 
			     numClasses, 
			     numericClass);
      if (nominalPredictor) {
	train.deleteAttributeAt(0);
	test.deleteAttributeAt(0);
      }
      if (missingLevel > 0) {
	addMissing(train, missingLevel, predictorMissing, classMissing);
	addMissing(test, Math.min(missingLevel, 50), predictorMissing, 
		   classMissing);
      }
      classifier = Classifier.makeCopies(getClassifier(), 1)[0];
      evaluation = new Evaluation(train);
    } catch (Exception ex) {
      throw new Error("Error setting up for tests: " + ex.getMessage());
    }
    try {
      classifier.buildClassifier(train);
      built = true;
      if (!testWRTZeroR(classifier, evaluation, train, test)) {
	throw new Exception("Scheme performs worse than ZeroR");
      }
      System.out.println("yes");
      return true;
    } catch (Exception ex) {
      boolean acceptable = false;
      String msg = ex.getMessage().toLowerCase();
      if (msg.indexOf("worse than zeror") >= 0) {
	System.out.println("warning: performs worse than ZeroR");
      } else {
	for (int i = 0; i < accepts.size(); i++) {
	  if (msg.indexOf((String)accepts.elementAt(i)) >= 0) {
	    acceptable = true;
	  }
	}
	System.out.println("no" + (acceptable ? " (OK error message)" : ""));
      }
      if (m_Debug) {
	System.out.println("\n=== Full Report ===");
	System.out.print("Problem during");
	if (built) {
	  System.out.print(" testing");
	} else {
	  System.out.print(" training");
	}
	System.out.println(": " + ex.getMessage() + "\n");
	if (!acceptable) {
	  if (accepts.size() > 0) {
	    System.out.print("Error message doesn't mention ");
	    for (int i = 0; i < accepts.size(); i++) {
	      if (i != 0) {
		System.out.print(" or ");
	      }
	      System.out.print('"' + (String)accepts.elementAt(i) + '"');
	    }
	  }
	  System.out.println("here are the datasets:\n");
	  System.out.println("=== Train Dataset ===\n"
			     + train.toString() + "\n");
	  System.out.println("=== Test Dataset ===\n"
			     + test.toString() + "\n\n");
	}
      }
    }
    return false;
  }

  /**
   * Determine whether the scheme performs worse than ZeroR during testing
   *
   * @param classifier the pre-trained classifier
   * @param evaluation the classifier evaluation object
   * @param train the training data
   * @param test the test data
   * @return true if the scheme performs better than ZeroR
   * @exception Exception if there was a problem during the scheme's testing
   */
  protected boolean testWRTZeroR(Classifier classifier,
				 Evaluation evaluation,
				 Instances train, Instances test) 
    throws Exception {
	 
    evaluation.evaluateModel(classifier, test);
    try {

      // Tested OK, compare with ZeroR
      Classifier zeroR = new weka.classifiers.rules.ZeroR();
      zeroR.buildClassifier(train);
      Evaluation zeroREval = new Evaluation(train);
      zeroREval.evaluateModel(zeroR, test);
      return Utils.grOrEq(zeroREval.errorRate(), evaluation.errorRate());
    } catch (Exception ex) {
      throw new Error("Problem determining ZeroR performance: "
		      + ex.getMessage());
    }
  }

  /**
   * Compare two datasets to see if they differ.
   *
   * @param data1 one set of instances
   * @param data2 the other set of instances
   * @exception Exception if the datasets differ
   */
  protected void compareDatasets(Instances data1, Instances data2)
    throws Exception {
    if (!data2.equalHeaders(data1)) {
      throw new Exception("header has been modified");
    }
    if (!(data2.numInstances() == data1.numInstances())) {
      throw new Exception("number of instances has changed");
    }
    for (int i = 0; i < data2.numInstances(); i++) {
      Instance orig = data1.instance(i);
      Instance copy = data2.instance(i);
      for (int j = 0; j < orig.numAttributes(); j++) {
	if (orig.isMissing(j)) {
	  if (!copy.isMissing(j)) {
	    throw new Exception("instances have changed");
	  }
	} else if (orig.value(j) != copy.value(j)) {
	    throw new Exception("instances have changed");
	}
	if (orig.weight() != copy.weight()) {
	  throw new Exception("instance weights have changed");
	}	  
      }
    }
  }

  /**
   * Add missing values to a dataset.
   *
   * @param data the instances to add missing values to
   * @param level the level of missing values to add (if positive, this
   * is the probability that a value will be set to missing, if negative
   * all but one value will be set to missing (not yet implemented))
   * @param predictorMissing if true, predictor attributes will be modified
   * @param classMissing if true, the class attribute will be modified
   */
  protected void addMissing(Instances data, int level,
			    boolean predictorMissing, boolean classMissing) {

    int classIndex = data.classIndex();
    Random random = new Random(1);
    for (int i = 0; i < data.numInstances(); i++) {
      Instance current = data.instance(i);
      for (int j = 0; j < data.numAttributes(); j++) {
	if (((j == classIndex) && classMissing) ||
	    ((j != classIndex) && predictorMissing)) {
	  if (Math.abs(random.nextInt()) % 100 < level)
	    current.setMissing(j);
	}
      }
    }
  }

  /**
   * Make a simple set of instances, which can later be modified
   * for use in specific tests.
   *
   * @param seed the random number seed
   * @param numInstances the number of instances to generate
   * @param numNominal the number of nominal attributes
   * @param numNumeric the number of numeric attributes
   * @param numClasses the number of classes (if nominal class)
   * @param numericClass true if the class attribute should be numeric
   * @return the test dataset
   * @exception Exception if the dataset couldn't be generated
   */
  protected Instances makeTestDataset(int seed, int numInstances, 
				      int numNominal, int numNumeric, 
				      int numClasses, boolean numericClass)
    throws Exception {

    int numAttributes = numNominal + numNumeric + 1;
    Random random = new Random(seed);
    FastVector attributes = new FastVector(numAttributes);

    // Add Nominal attributes
    for (int i = 0; i < numNominal; i++) {
      FastVector nomStrings = new FastVector(i + 1);
      for(int j = 0; j <= i; j++) {
	nomStrings.addElement("a" + (i + 1) + "l" + (j + 1));
      }
      attributes.addElement(new Attribute("Nominal" + (i + 1), nomStrings));
    }

    // Add Numeric attributes
    for (int i = 0; i < numNumeric; i++) {
      attributes.addElement(new Attribute("Numeric" + (i + 1)));
    }

    // TODO: Add some String attributes...

    // Add class attribute
    if (numericClass) {
      attributes.addElement(new Attribute("Class"));
    } else {
      FastVector nomStrings = new FastVector();
      for(int j = 0; j <numClasses; j++) {
	nomStrings.addElement("cl" + (j + 1));
      }
      attributes.addElement(new Attribute("Class",nomStrings));
    }    

    Instances data = new Instances("CheckSet", attributes, numInstances);
    data.setClassIndex(data.numAttributes() - 1);

    // Generate the instances
    for (int i = 0; i < numInstances; i++) {
      Instance current = new Instance(numAttributes);
      current.setDataset(data);
      if (numericClass) {
	
	current.setClassValue(random.nextFloat() * 0.25
			      + Math.abs(random.nextInt())
			      % Math.max(2, numNominal));
      } else {
	current.setClassValue(Math.abs(random.nextInt()) % data.numClasses());
      }
      double classVal = current.classValue();
      double newVal = 0;
      for (int j = 0; j < numAttributes - 1; j++) {
	switch (data.attribute(j).type()) {
	case Attribute.NUMERIC:
	  newVal = classVal * 4 + random.nextFloat() * 1 - 0.5;
	  current.setValue(j, newVal);
	  break;
	case Attribute.NOMINAL:
	  if (random.nextFloat() < 0.2) {
	    newVal = Math.abs(random.nextInt())
	      % data.attribute(j).numValues();
	  } else {
	    newVal = ((int)classVal) % data.attribute(j).numValues();
	  }
	  current.setValue(j, newVal);
	  break;
	case Attribute.STRING:
	  System.err.println("Huh? this bit isn't implemented yet");
	  break;
	}
      }
      data.add(current);
    }
    return data;
  }

  /**
   * Print out a short summary string for the dataset characteristics
   *
   * @param nominalPredictor true if nominal predictor attributes are present
   * @param numericPredictor true if numeric predictor attributes are present
   * @param numericClass true if the class attribute is numeric
   */
  protected void printAttributeSummary(boolean nominalPredictor, 
				       boolean numericPredictor, 
				       boolean numericClass) {
    
    if (numericClass) {
      System.out.print(" (numeric class,");
    } else {
      System.out.print(" (nominal class,");
    }
    if (numericPredictor) {
      System.out.print(" numeric");
      if (nominalPredictor) {
	System.out.print(" &");
      }
    }
    if (nominalPredictor) {
      System.out.print(" nominal");
    }
    System.out.print(" predictors)");
  }
}


⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -