📄 bootstrap.java

📁 java数据挖掘算法
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12

	 saveDribble = GlobalOptions.dribble;
	 GlobalOptions.dribble = false;
//obs	 train_and_test(inducer, trainList, testList, "-" + MString(time, 0),
//obs			perfData);
	 train_and_test(inducer, trainList, testList, "-" + time,perfData);
	 GlobalOptions.dribble = saveDribble;
      }
//obs      delete trainList;
      trainList = null;
   }

   logOptions.DRIBBLE("\n");
   logOptions.LOG(2, "Error: "+this+"\n");

   perfData.insert_cost(numTimes);
   return error();
}


/*****************************************************************************
  Trains and tests the inducer using files. Uses the files fileStem.names, 
fileStem-T.data, and fileStem-T.test, where T is an integer in the 
range[0, numTimes-1]. Fractional bootstrap also use the file 
fileStem-A.data, and fileStem-A.test to compute apparent error. Most of the 
work is done before the files are dumped.
*****************************************************************************/
public double estimate_performance(BaseInducer inducer, String fileStem) {

   perfData = null;
   perfData = new PerfData();
   
   // get apparent error first, if needed.
   if(bsType != simple) {
      apparentError =
       single_file_performance(inducer, fileStem + BOOTSTRAP_AA_SUFFIX, null);
   }

   // get results using estimate_file_performance
   estimate_file_performance(inducer, numTimes, fileStem, perfData);

   logOptions.LOG(2, "Error: "+this+"\n");

   perfData.insert_cost(numTimes);
   return error();
}

//Test functions and variables
/*
private String FILE_STEM  = "t_lensesBS";
private String NAMES_FILE = "t_Bootstrap.names";
private String DATA_FILE  = "t_Bootstrap.data";
private String BS_STEM = "t_Bootstrap";

private int NUM_TIMES = 20;
private int RAND_SEED = 7258789;

void cleanup()
{
   // Create a file so the asterisk won't fail.
   MLCOStream dummy(BS_STEM + "-dummy.names");
   MLCOStream dummy2(BS_STEM + "-dummy.data");
   MLCOStream dummy3(BS_STEM + "-dummy.test");
   dummy.close();
   dummy2.close();
   dummy3.close();

   // Without a shell this doesn't work because the asterisk isn't expanded
   // The input from /dev/null ensures yes to all questions
   // Note that this call DOES expand to -A-*.names, etc as well, because
   // -* can expand to -A-*.
   remove_wildcard_files(BS_STEM + "-*.names");
   remove_wildcard_files(BS_STEM + "-*.data");
   remove_wildcard_files(BS_STEM + "-*.test");

   // repeat the cleanup with the other FILE_STEM
   MLCOStream dummy4(FILE_STEM + "-dummy.names");
   MLCOStream dummy5(FILE_STEM + "-dummy.data");
   MLCOStream dummy6(FILE_STEM + "-dummy.test");
   dummy4.close();
   dummy5.close();
   dummy6.close();
   remove_wildcard_files( FILE_STEM + "-*.names");
   remove_wildcard_files( FILE_STEM + "-*.data");
   remove_wildcard_files( FILE_STEM + "-*.test");
   
}

public static void main(String[] args) {
       Mcout << "t_Bootstrap executing." << endl;
   cleanup();

   const int NUM_BS_TYPES = 3;
   for(int i=0; i<NUM_BS_TYPES; i++) {
      Real bsFract;
    
      // based on the loop index, select a bootstrap type.
      Bootstrap::BootstrapType bsType;
      switch(i) {
	 case 0:
	    bsType = Bootstrap::simple;
	    bsFract = 0.632; break;
	 case 1: 
	    bsType = Bootstrap::fractional;
	    bsFract = 0.632; break;
	 case 2: 
	    bsType = Bootstrap::fractional;
	    bsFract = 0.5; break;
	 default:
	    ASSERT(FALSE);
      }
      Mcout << "t_Bootstrap: type = " << bsType << endl;

      InstanceList instList(EMPTY_STRING, NAMES_FILE, DATA_FILE);
      MString prefix("t_Bootstrap.out");
      MString outName = prefix  + MString(i+1, 1);
      MLCOStream out1(outName);
      Bootstrap bootstrap;

      if (!memCheck) {
	 TEST_ERROR("Bootstrap::set_times: illegal number",
		    bootstrap.set_times(-1));
	 TEST_ERROR("Bootstrap::set_type: illegal type",
		    bootstrap.set_type((Bootstrap::BootstrapType)666));
      }

      bootstrap.set_type(bsType);
      bootstrap.set_times(NUM_TIMES);
      if(bsType == Bootstrap::fractional)
	 bootstrap.set_fraction(bsFract);

      bootstrap.set_log_level(2);
      bootstrap.set_log_stream(out1);
      bootstrap.init_rand_num_gen(RAND_SEED);

      if (!memCheck) {
	 TEST_ERROR("PerfEstimator::check_error_data: Must be called",
		    bootstrap.error());
	 TEST_ERROR("PerfEstimator::check_error_data: Must be called",
		    bootstrap.error_std_dev());
      }
      
      Mcout << bootstrap << endl;
      out1 << bootstrap << endl;

      bootstrap.dump_files(instList, FILE_STEM);
      out1 << "Finished dump" << endl;

      Bootstrap bs2(NUM_TIMES, bsType, bsFract);
      bs2.set_log_level(2);
      bs2.set_log_stream(out1);
      ID3Inducer id3Inducer("t_Bootstrap id3 inducer");
      bs2.init_rand_num_gen(RAND_SEED);
      Real acc2 = bs2.estimate_performance(id3Inducer, FILE_STEM);
      ASSERT(acc2 == bs2.error());

      out1  << "ID3 categorizer bootstrap from " << FILE_STEM;
      bs2.display_error_data(out1);
      out1 << endl;
      Mcout << "ID3 categorizer bootstrap from "  << FILE_STEM;
      bs2.display_error_data(Mcout);
      Mcout << endl;

      Bootstrap bs(NUM_TIMES, bsType, bsFract);
      bs.set_log_level(3);
      bs.set_log_stream(out1);
      bs.rand_num_gen().init(RAND_SEED);
      Real accBS = bs.estimate_performance(id3Inducer, instList);
      out1  << "ID3 bs in memory " << bs << endl;
      Mcout << "ID3 bs in memory " << bs << endl;   

      ASSERT(accBS == bs.error());
      ASSERT(accBS == acc2);

      out1 << "ID3 categorizer bootstrap from Instance List: " << endl
	   << bs << endl;

   }

   // Extra bootstrap tests.  These will only be done on the default
   // (.632) bootstrap.
   Mcout << "Extra tests:" << endl;
   InstanceList instList(EMPTY_STRING, NAMES_FILE, DATA_FILE);
   MLCOStream out1("t_Bootstrap.out4");
    
   Bootstrap bs(NUM_TIMES, Bootstrap::fractional);
   bs.set_log_level(3);
   bs.set_log_stream(out1);
   bs.rand_num_gen().init(RAND_SEED);

   ID3Inducer id3Inducer("t_Bootstrap id3 inducer");

   // Test for errors.
   bs.set_times(1);
   bs.estimate_performance(id3Inducer, instList);
   bs.set_fraction(0.5);

   if (!memCheck) {
      TEST_ERROR("Bootstrap::set_fraction: Illegal value",
		 bs.set_fraction(-0.5));
      TEST_ERROR("Bootstrap::set_fraction: Illegal value",
		 bs.set_fraction(1.5));
   }

   // Try everything with a C45 inducer, so we know there's no problem with
   // external inducers.
   cleanup();
   C45Inducer c45Inducer("my c45 inducer");
   Bootstrap C45BS;
   C45BS.set_times(2);
   C45BS.set_log_level(3);
   C45BS.init_rand_num_gen(RAND_SEED);
   Real bserr = Mround(C45BS.estimate_performance(c45Inducer, instList), 4);
   Mcout << "c4.5 error is " << bserr << endl;

   // Dump files.
   C45BS.init_rand_num_gen(RAND_SEED);
   C45BS.set_log_level(0);
   C45BS.dump_files(instList, "t_Bootstrap");

   // Run C4.5 "manually," i.e., without using Bootstrap
   InstanceList train0("t_Bootstrap-0");
   InstanceList test0 (train0.get_schema(),
		       train0.get_original_schema(),
		       "t_Bootstrap-0.test");
   InstanceList train1("t_Bootstrap-1");
   InstanceList test1 (train1.get_schema(),
		       train1.get_original_schema(),
		       "t_Bootstrap-1.test");
   InstanceList train2("t_Bootstrap-A");
   InstanceList test2 (train2.get_schema(),
		       train2.get_original_schema(),
		       "t_Bootstrap-A.test");
   
   Real pruneErr, noPruneErr, estimateErr;
   int noPruneSize, pruneSize;
   
   MString c45Pgm = C45Inducer::defaultPgmName + C45Inducer::defaultPgmFlags;
   LogOptions c45Log;
  
   run_c45(c45Log, c45Pgm, train0, test0, pruneErr, noPruneErr, estimateErr, 
	   noPruneSize, pruneSize);
   Real err0 = pruneErr;

   run_c45(c45Log, c45Pgm, train1, test1, pruneErr, noPruneErr, estimateErr, 
	   noPruneSize, pruneSize);
   Real err1 = pruneErr;
   
   run_c45(c45Log, c45Pgm, train2, test2, pruneErr, noPruneErr, estimateErr, 
	   noPruneSize, pruneSize);
   Real errApp = pruneErr;

   Real avgErr = Mround((err0 + err1) / 2 * .632 + errApp*.368, 4);
   Mcout << "Running C4.5 manually yields " << err0 << ", " << err1 <<
      " and apparent " << errApp << " with the average " << avgErr << endl;

   if(!mlc.approx_equal(avgErr, bserr))
      err << "C45 manual error (" << avgErr << ") does not match "
	 "bootstrap error (" << bserr << ")" << fatal_error;
      
   cleanup();

   System.exit(0); // return success to shell

}
*/
}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -