📄 bootstrap.java
字号:
saveDribble = GlobalOptions.dribble;
GlobalOptions.dribble = false;
//obs train_and_test(inducer, trainList, testList, "-" + MString(time, 0),
//obs perfData);
train_and_test(inducer, trainList, testList, "-" + time,perfData);
GlobalOptions.dribble = saveDribble;
}
//obs delete trainList;
trainList = null;
}
logOptions.DRIBBLE("\n");
logOptions.LOG(2, "Error: "+this+"\n");
perfData.insert_cost(numTimes);
return error();
}
/*****************************************************************************
Trains and tests the inducer using files. Uses the files fileStem.names,
fileStem-T.data, and fileStem-T.test, where T is an integer in the
range[0, numTimes-1]. Fractional bootstrap also use the file
fileStem-A.data, and fileStem-A.test to compute apparent error. Most of the
work is done before the files are dumped.
*****************************************************************************/
public double estimate_performance(BaseInducer inducer, String fileStem) {
perfData = null;
perfData = new PerfData();
// get apparent error first, if needed.
if(bsType != simple) {
apparentError =
single_file_performance(inducer, fileStem + BOOTSTRAP_AA_SUFFIX, null);
}
// get results using estimate_file_performance
estimate_file_performance(inducer, numTimes, fileStem, perfData);
logOptions.LOG(2, "Error: "+this+"\n");
perfData.insert_cost(numTimes);
return error();
}
//Test functions and variables
/*
private String FILE_STEM = "t_lensesBS";
private String NAMES_FILE = "t_Bootstrap.names";
private String DATA_FILE = "t_Bootstrap.data";
private String BS_STEM = "t_Bootstrap";
private int NUM_TIMES = 20;
private int RAND_SEED = 7258789;
void cleanup()
{
// Create a file so the asterisk won't fail.
MLCOStream dummy(BS_STEM + "-dummy.names");
MLCOStream dummy2(BS_STEM + "-dummy.data");
MLCOStream dummy3(BS_STEM + "-dummy.test");
dummy.close();
dummy2.close();
dummy3.close();
// Without a shell this doesn't work because the asterisk isn't expanded
// The input from /dev/null ensures yes to all questions
// Note that this call DOES expand to -A-*.names, etc as well, because
// -* can expand to -A-*.
remove_wildcard_files(BS_STEM + "-*.names");
remove_wildcard_files(BS_STEM + "-*.data");
remove_wildcard_files(BS_STEM + "-*.test");
// repeat the cleanup with the other FILE_STEM
MLCOStream dummy4(FILE_STEM + "-dummy.names");
MLCOStream dummy5(FILE_STEM + "-dummy.data");
MLCOStream dummy6(FILE_STEM + "-dummy.test");
dummy4.close();
dummy5.close();
dummy6.close();
remove_wildcard_files( FILE_STEM + "-*.names");
remove_wildcard_files( FILE_STEM + "-*.data");
remove_wildcard_files( FILE_STEM + "-*.test");
}
public static void main(String[] args) {
Mcout << "t_Bootstrap executing." << endl;
cleanup();
const int NUM_BS_TYPES = 3;
for(int i=0; i<NUM_BS_TYPES; i++) {
Real bsFract;
// based on the loop index, select a bootstrap type.
Bootstrap::BootstrapType bsType;
switch(i) {
case 0:
bsType = Bootstrap::simple;
bsFract = 0.632; break;
case 1:
bsType = Bootstrap::fractional;
bsFract = 0.632; break;
case 2:
bsType = Bootstrap::fractional;
bsFract = 0.5; break;
default:
ASSERT(FALSE);
}
Mcout << "t_Bootstrap: type = " << bsType << endl;
InstanceList instList(EMPTY_STRING, NAMES_FILE, DATA_FILE);
MString prefix("t_Bootstrap.out");
MString outName = prefix + MString(i+1, 1);
MLCOStream out1(outName);
Bootstrap bootstrap;
if (!memCheck) {
TEST_ERROR("Bootstrap::set_times: illegal number",
bootstrap.set_times(-1));
TEST_ERROR("Bootstrap::set_type: illegal type",
bootstrap.set_type((Bootstrap::BootstrapType)666));
}
bootstrap.set_type(bsType);
bootstrap.set_times(NUM_TIMES);
if(bsType == Bootstrap::fractional)
bootstrap.set_fraction(bsFract);
bootstrap.set_log_level(2);
bootstrap.set_log_stream(out1);
bootstrap.init_rand_num_gen(RAND_SEED);
if (!memCheck) {
TEST_ERROR("PerfEstimator::check_error_data: Must be called",
bootstrap.error());
TEST_ERROR("PerfEstimator::check_error_data: Must be called",
bootstrap.error_std_dev());
}
Mcout << bootstrap << endl;
out1 << bootstrap << endl;
bootstrap.dump_files(instList, FILE_STEM);
out1 << "Finished dump" << endl;
Bootstrap bs2(NUM_TIMES, bsType, bsFract);
bs2.set_log_level(2);
bs2.set_log_stream(out1);
ID3Inducer id3Inducer("t_Bootstrap id3 inducer");
bs2.init_rand_num_gen(RAND_SEED);
Real acc2 = bs2.estimate_performance(id3Inducer, FILE_STEM);
ASSERT(acc2 == bs2.error());
out1 << "ID3 categorizer bootstrap from " << FILE_STEM;
bs2.display_error_data(out1);
out1 << endl;
Mcout << "ID3 categorizer bootstrap from " << FILE_STEM;
bs2.display_error_data(Mcout);
Mcout << endl;
Bootstrap bs(NUM_TIMES, bsType, bsFract);
bs.set_log_level(3);
bs.set_log_stream(out1);
bs.rand_num_gen().init(RAND_SEED);
Real accBS = bs.estimate_performance(id3Inducer, instList);
out1 << "ID3 bs in memory " << bs << endl;
Mcout << "ID3 bs in memory " << bs << endl;
ASSERT(accBS == bs.error());
ASSERT(accBS == acc2);
out1 << "ID3 categorizer bootstrap from Instance List: " << endl
<< bs << endl;
}
// Extra bootstrap tests. These will only be done on the default
// (.632) bootstrap.
Mcout << "Extra tests:" << endl;
InstanceList instList(EMPTY_STRING, NAMES_FILE, DATA_FILE);
MLCOStream out1("t_Bootstrap.out4");
Bootstrap bs(NUM_TIMES, Bootstrap::fractional);
bs.set_log_level(3);
bs.set_log_stream(out1);
bs.rand_num_gen().init(RAND_SEED);
ID3Inducer id3Inducer("t_Bootstrap id3 inducer");
// Test for errors.
bs.set_times(1);
bs.estimate_performance(id3Inducer, instList);
bs.set_fraction(0.5);
if (!memCheck) {
TEST_ERROR("Bootstrap::set_fraction: Illegal value",
bs.set_fraction(-0.5));
TEST_ERROR("Bootstrap::set_fraction: Illegal value",
bs.set_fraction(1.5));
}
// Try everything with a C45 inducer, so we know there's no problem with
// external inducers.
cleanup();
C45Inducer c45Inducer("my c45 inducer");
Bootstrap C45BS;
C45BS.set_times(2);
C45BS.set_log_level(3);
C45BS.init_rand_num_gen(RAND_SEED);
Real bserr = Mround(C45BS.estimate_performance(c45Inducer, instList), 4);
Mcout << "c4.5 error is " << bserr << endl;
// Dump files.
C45BS.init_rand_num_gen(RAND_SEED);
C45BS.set_log_level(0);
C45BS.dump_files(instList, "t_Bootstrap");
// Run C4.5 "manually," i.e., without using Bootstrap
InstanceList train0("t_Bootstrap-0");
InstanceList test0 (train0.get_schema(),
train0.get_original_schema(),
"t_Bootstrap-0.test");
InstanceList train1("t_Bootstrap-1");
InstanceList test1 (train1.get_schema(),
train1.get_original_schema(),
"t_Bootstrap-1.test");
InstanceList train2("t_Bootstrap-A");
InstanceList test2 (train2.get_schema(),
train2.get_original_schema(),
"t_Bootstrap-A.test");
Real pruneErr, noPruneErr, estimateErr;
int noPruneSize, pruneSize;
MString c45Pgm = C45Inducer::defaultPgmName + C45Inducer::defaultPgmFlags;
LogOptions c45Log;
run_c45(c45Log, c45Pgm, train0, test0, pruneErr, noPruneErr, estimateErr,
noPruneSize, pruneSize);
Real err0 = pruneErr;
run_c45(c45Log, c45Pgm, train1, test1, pruneErr, noPruneErr, estimateErr,
noPruneSize, pruneSize);
Real err1 = pruneErr;
run_c45(c45Log, c45Pgm, train2, test2, pruneErr, noPruneErr, estimateErr,
noPruneSize, pruneSize);
Real errApp = pruneErr;
Real avgErr = Mround((err0 + err1) / 2 * .632 + errApp*.368, 4);
Mcout << "Running C4.5 manually yields " << err0 << ", " << err1 <<
" and apparent " << errApp << " with the average " << avgErr << endl;
if(!mlc.approx_equal(avgErr, bserr))
err << "C45 manual error (" << avgErr << ") does not match "
"bootstrap error (" << bserr << ")" << fatal_error;
cleanup();
System.exit(0); // return success to shell
}
*/
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -