📄 vfdt.c
字号:
}
}
}
} else {
sprintf(fileNames, "%s/%s.test", gSourceDirectory, gFileStem);
exampleIn = fopen(fileNames, "r");
DebugError(exampleIn == 0, "Unable to open the .test file");
if(gMessageLevel >= 1) {
printf("opened test file, starting scan...\n");
}
e = ExampleRead(exampleIn, es);
while(e != 0) {
if(!ExampleIsClassUnknown(e)) {
tested++;
if(ExampleGetClass(e) != DecisionTreeClassify(dt, e)) {
errors++;
}
}
ExampleFree(e);
e = ExampleRead(exampleIn, es);
}
fclose(exampleIn);
}
if(finalOutput) {
if(gMessageLevel >= 1) {
printf("Tested %ld examples made %ld errors\n", (long)tested,
(long)errors);
}
printf("%.4f\t%ld\n", ((float)errors/(float)tested) * 100,
(long)DecisionTreeCountNodes(dt));
} else {
printf("%ld\t%.4f\t%ld\t%ld\t%.2lf\t%.2f\n",
learnCount,
((float)errors/(float)tested) * 100,
(long)DecisionTreeCountNodes(dt),
growingNodes,
((double)learnTime) / 100,
((double)allocation) / (1024 * 1024));
}
fflush(stdout);
if(gOutputTree) {
sprintf(fileNames, "%s-%lu.tree", gFileStem, learnCount);
treeOut = fopen(fileNames, "w");
DecisionTreeWrite(dt, treeOut);
fclose(treeOut);
}
MSetActivePool(oldPool);
}
int main(int argc, char *argv[]) {
char fileNames[255];
FILE *exampleIn, *pruneSet;
ExampleSpecPtr es;
ExamplePtr e;
VFDTPtr vfdt;
DecisionTreePtr dt;
long tested, errors, seen;
long learnTime, allocation;
int iteration;
struct tms starttime;
struct tms endtime;
_processArgs(argc, argv);
sprintf(fileNames, "%s/%s.names", gSourceDirectory, gFileStem);
es = ExampleSpecRead(fileNames);
DebugError(es == 0, "Unable to open the .names file");
RandomInit();
/* initialize the vfdt */
vfdt = VFDTNew(es, gSplitConfidence, gTieConfidence);
VFDTSetUseGini(vfdt, gUseGini);
VFDTSetProcessChunkSize(vfdt, gChunk);
VFDTSetMaxAllocationMegs(vfdt, gGrowMegs);
VFDTSetMessageLevel(vfdt, gMessageLevel);
VFDTSetPrePrune(vfdt, gPrePrune);
VFDTSetAdaptiveDelta(vfdt, gAdaptiveDelta);
if(!gRestartLeaves) {
VFDTSetRestartLeaves(vfdt, 0);
}
if(!gCacheTrainingExamples) {
VFDTSetCacheTrainingExamples(vfdt, 0);
}
if(gMessageLevel >= 1) {
printf("allocation %ld\n", MGetTotalAllocation());
}
if(gREPrune) {
if(gCachePruneSet) {
gPruneSet = VALNew();
gPruneCacheInited = 1;
} else {
sprintf(fileNames, "%s/%s.prune", gSourceDirectory, gFileStem);
pruneSet = fopen(fileNames, "w");
DebugError(pruneSet == 0, "Unable to open the .prune file");
}
}
sprintf(fileNames, "%s/%s.data", gSourceDirectory, gFileStem);
if(gStdin) {
gRescans = 1;
}
times(&starttime);
if(gDoBatch) {
if(gStdin) {
exampleIn = stdin;
} else {
exampleIn = fopen(fileNames, "r");
DebugError(exampleIn == 0, "Unable to open the data file");
}
if(gREPrune) {
e = ExampleRead(exampleIn, es);
while(e != 0) {
seen++;
if(RandomDouble() < gPruneSetPercent) {
if(gCachePruneSet) {
VALAppend(gPruneSet, e);
} else {
ExampleWrite(e, pruneSet);
ExampleFree(e);
}
} else {
VFDTProcessExampleBatch(vfdt, e);
}
e = ExampleRead(exampleIn, es);
}
VFDTBatchExamplesDone(vfdt);
} else {
VFDTProcessExamplesBatch(vfdt, exampleIn);
}
if(!gStdin) {
fclose(exampleIn);
}
} else {
seen = 0;
learnTime = 0;
for(iteration = 0 ; iteration < gRescans ; iteration++) {
if(gStdin) {
exampleIn = stdin;
} else {
exampleIn = fopen(fileNames, "r");
DebugError(exampleIn == 0, "Unable to open the .data file");
}
e = ExampleRead(exampleIn, es);
while(e != 0) {
seen++;
if(gREPrune && (RandomDouble() < gPruneSetPercent)) {
if(gCachePruneSet) {
VALAppend(gPruneSet, e);
} else {
ExampleWrite(e, pruneSet);
ExampleFree(e);
}
} else {
VFDTProcessExample(vfdt, e);
}
/* HERE if I use an example cache I better have this commented */
// ExampleFree(e);
e = ExampleRead(exampleIn, es);
/* check to see if it's time to run tests */
if(gUseSchedule && seen == gScheduleCount) {
gScheduleCount *= gScheduleMult;
allocation = MGetTotalAllocation();
times(&endtime);
learnTime += endtime.tms_utime - starttime.tms_utime;
//printf("allocation %.2f\n", (float)allocation / (1024 * 1024));
//fflush(stdout);
dt = VFDTGetLearnedTree(vfdt);
if(gREPrune && !gCachePruneSet) {
fflush(pruneSet);
fclose(pruneSet);
}
_doTests(es, dt, VFDTGetNumGrowing(vfdt),
seen, learnTime, allocation, 0);
if(gREPrune && !gCachePruneSet) {
sprintf(fileNames, "%s/%s.prune",
gSourceDirectory, gFileStem);
pruneSet = fopen(fileNames, "a");
DebugError(pruneSet == 0, "Unable to open the .prune file");
}
DecisionTreeFree(dt);
times(&starttime);
}
}
if(!gStdin) {
fclose(exampleIn);
}
}
}
times(&endtime);
learnTime += endtime.tms_utime - starttime.tms_utime;
if(gMessageLevel >= 1) {
printf("done learning...\n");
printf(" allocation %ld\n", MGetTotalAllocation());
printf("time %.2lfs\n", ((double)learnTime) / 100);
}
allocation = MGetTotalAllocation();
dt = VFDTGetLearnedTree(vfdt);
if(gDoTests) {
times(&starttime);
_doTests(es, dt, VFDTGetNumGrowing(vfdt), seen, learnTime, allocation, 1);
} else if(gUseSchedule) {
times(&starttime);
_doTests(es, dt, VFDTGetNumGrowing(vfdt), seen, learnTime, allocation, 0);
} else {
DecisionTreePrint(dt, stdout);
}
if(gMessageLevel >= 1) {
printf("allocation %ld\n", MGetTotalAllocation());
times(&endtime);
printf("time %.2lfs\n", ((double)(endtime.tms_utime) -
(double)(starttime.tms_utime)) / 100);
}
return 0;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -