📄 vfdt.c
字号:
}
}
} else {
sprintf(fileNames, "%s/%s.test", gSourceDirectory, gFileStem);
exampleIn = fopen(fileNames, "r");
DebugError(exampleIn == 0, "Unable to open the .test file");
if(gMessageLevel >= 1) {
printf("opened test file, starting scan...\n");
}
e = ExampleRead(exampleIn, es);
while(e != 0) {
if(!ExampleIsClassUnknown(e)) {
tested++;
if(ExampleGetClass(e) != DecisionTreeClassify(dt, e)) {
errors++;
}
}
ExampleFree(e);
e = ExampleRead(exampleIn, es);
}
fclose(exampleIn);
}
if(finalOutput) {
if(gMessageLevel >= 1) {
printf("Tested %ld examples made %ld errors\n", (long)tested,
(long)errors);
}
printf("%.4f\t%ld\n", ((float)errors/(float)tested) * 100,
(long)DecisionTreeCountNodes(dt));
} else {
printf(">> %ld\t%.4f\t%ld\t%ld\t%.2lf\t%.2f\t%ld\n",
learnCount,
((float)errors/(float)tested) * 100,
(long)DecisionTreeCountNodes(dt),
growingNodes,
((double)learnTime) / 100,
((double)allocation) / (1024 * 1024),
numBoundsUsed);
}
fflush(stdout);
if(gOutputTree) {
sprintf(fileNames, "%s-%lu.tree", gFileStem, learnCount);
treeOut = fopen(fileNames, "w");
DecisionTreeWrite(dt, treeOut);
fclose(treeOut);
}
MSetActivePool(oldPool);
}
int main(int argc, char *argv[]) {
char fileNames[255];
FILE *exampleIn, *pruneSet = 0;
ExampleSpecPtr es;
ExamplePtr e;
VFDTPtr vfdt;
DecisionTreePtr dt;
long seen = 0;
long learnTime, allocation;
int iteration;
struct tms starttime;
struct tms endtime;
_processArgs(argc, argv);
sprintf(fileNames, "%s/%s.names", gSourceDirectory, gFileStem);
if(gInitialPause) {
sleep(5);
}
es = ExampleSpecRead(fileNames);
DebugError(es == 0, "Unable to open the .names file");
RandomInit();
/* initialize the vfdt */
vfdt = VFDTNew(es, gSplitConfidence, gTieConfidence);
VFDTSetUseGini(vfdt, gUseGini);
VFDTSetProcessChunkSize(vfdt, gChunk);
VFDTSetMaxAllocationMegs(vfdt, gGrowMegs);
VFDTSetMessageLevel(vfdt, gMessageLevel);
DebugSetMessageLevel(gMessageLevel);
VFDTSetPrePruneTau(vfdt, gPrePruneTau);
VFDTSetLaplace(vfdt, gLaplace);
VFDTSetDoBonferonni(vfdt, gDoBonferonni);
if(!gRestartLeaves) {
VFDTSetRestartLeaves(vfdt, 0);
}
if(!gCacheTrainingExamples) {
VFDTSetCacheTrainingExamples(vfdt, 0);
}
if(gMessageLevel >= 1) {
printf("allocation %ld\n", MGetTotalAllocation());
}
if(gREPrune) {
if(gCachePruneSet) {
gPruneSet = VALNew();
gPruneCacheInited = 1;
} else {
sprintf(fileNames, "%s/%s.prunedata", gSourceDirectory, gFileStem);
pruneSet = fopen(fileNames, "w");
DebugError(pruneSet == 0, "Unable to open the .prunedata file");
}
}
if(gStdin) {
gRescans = 1;
}
learnTime = 0;
times(&starttime);
if(gDoBatch) {
if(gStdin) {
exampleIn = stdin;
} else {
sprintf(fileNames, "%s/%s.data", gSourceDirectory, gFileStem);
exampleIn = fopen(fileNames, "r");
DebugError(exampleIn == 0, "Unable to open the data file");
}
if(gREPrune) {
e = ExampleRead(exampleIn, es);
while(e != 0) {
seen++;
if(RandomDouble() < gPruneSetPercent &&
gPruneSetCurrentSize < gPruneSetMaxSize) {
gPruneSetCurrentSize++;
if(gCachePruneSet) {
VALAppend(gPruneSet, e);
} else {
ExampleWrite(e, pruneSet);
ExampleFree(e);
}
} else {
VFDTProcessExampleBatch(vfdt, e);
}
e = ExampleRead(exampleIn, es);
}
VFDTBatchExamplesDone(vfdt);
} else {
VFDTProcessExamplesBatch(vfdt, exampleIn);
}
if(!gStdin) {
fclose(exampleIn);
}
} else { /* not in batch mode */
seen = 0;
for(iteration = 0 ; iteration < gRescans ; iteration++) {
if(gStdin) {
exampleIn = stdin;
} else {
sprintf(fileNames, "%s/%s.data", gSourceDirectory, gFileStem);
exampleIn = fopen(fileNames, "r");
DebugError(exampleIn == 0, "Unable to open the .data file");
}
e = ExampleRead(exampleIn, es);
while(e != 0) {
seen++;
if(gIncrementalReporting) {
times(&endtime);
learnTime += endtime.tms_utime - starttime.tms_utime;
_doIncrementalTest(vfdt, es, e);
if(seen % 1000 == 0) {
_doIncrementalReport();
}
times(&starttime);
}
if(gREPrune && gPruneSetCurrentSize < gPruneSetMaxSize &&
(RandomDouble() < gPruneSetPercent)) {
gPruneSetCurrentSize++;
if(gCachePruneSet) {
VALAppend(gPruneSet, e);
} else {
ExampleWrite(e, pruneSet);
ExampleFree(e);
}
} else {
VFDTProcessExample(vfdt, e);
}
/* HERE if I use an example cache I better have this commented */
// ExampleFree(e);
e = ExampleRead(exampleIn, es);
/* check to see if it's time to run tests */
if(gUseSchedule && seen == gScheduleCount) {
gScheduleCount *= gScheduleMult;
allocation = MGetTotalAllocation();
times(&endtime);
learnTime += endtime.tms_utime - starttime.tms_utime;
//printf("allocation %.2f\n",
//(float)allocation / (1024 * 1024));
//fflush(stdout);
dt = VFDTGetLearnedTree(vfdt);
if(gREPrune && !gCachePruneSet) {
//printf("closing %d\n", fileno(pruneSet));
fflush(pruneSet);
fsync(fileno(pruneSet));
fclose(pruneSet);
}
_doTests(es, dt, VFDTGetNumBoundsUsed(vfdt),
VFDTGetNumGrowing(vfdt),
seen, learnTime, allocation, 0);
if(gREPrune && !gCachePruneSet) {
sprintf(fileNames, "%s/%s.prunedata",
gSourceDirectory, gFileStem);
pruneSet = fopen(fileNames, "a");
DebugError(pruneSet == 0, "Unable to open the .prunedata file");
}
DecisionTreeFree(dt);
times(&starttime);
}
}
if(!gStdin) {
fclose(exampleIn);
}
}
}
times(&endtime);
learnTime += endtime.tms_utime - starttime.tms_utime;
if(gMessageLevel >= 1) {
printf("done learning...\n");
printf(" allocation %ld\n", MGetTotalAllocation());
printf("time %.2lfs\n", ((double)learnTime) / 100);
}
allocation = MGetTotalAllocation();
dt = VFDTGetLearnedTree(vfdt);
if(gREPrune && !gCachePruneSet) {
//printf("closing %d\n", fileno(pruneSet));
fflush(pruneSet);
fsync(fileno(pruneSet));
fclose(pruneSet);
}
if(gDoTests) {
//times(&starttime);
_doTests(es, dt, VFDTGetNumBoundsUsed(vfdt), VFDTGetNumGrowing(vfdt), seen, learnTime, allocation, 1);
} else if(gUseSchedule) {
//times(&starttime);
_doTests(es, dt, VFDTGetNumBoundsUsed(vfdt), VFDTGetNumGrowing(vfdt), seen, learnTime, allocation, 0);
} else if(gIncrementalReporting) {
_doIncrementalReport();
} else {
DecisionTreePrint(dt, stdout);
}
if(gMessageLevel >= 1) {
printf("allocation %ld\n", MGetTotalAllocation());
//times(&endtime);
printf("time %.2lfs\n", learnTime / (float)100);
printf("num bounds used %ld\n", vfdt->numBoundsUsed);
printf("induced leaves by level: ");
DecisionTreePrintStats(dt, stdout);
printf("DONE\n");
}
return 0;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -