⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 vfdt.c

📁 此算法是数据挖掘中的聚类算法
💻 C
📖 第 1 页 / 共 2 页
字号:
         }
      }

   } else {
      sprintf(fileNames, "%s/%s.test", gSourceDirectory, gFileStem);
      exampleIn = fopen(fileNames, "r");
      DebugError(exampleIn == 0, "Unable to open the .test file");
      
      if(gMessageLevel >= 1) {
         printf("opened test file, starting scan...\n");
      }

      e = ExampleRead(exampleIn, es);
      while(e != 0) {
         if(!ExampleIsClassUnknown(e)) {
            tested++;
            if(ExampleGetClass(e) != DecisionTreeClassify(dt, e)) {
               errors++;
            }
         }
         ExampleFree(e);
         e = ExampleRead(exampleIn, es);
      }
      fclose(exampleIn);
   }

   if(finalOutput) {
      if(gMessageLevel >= 1) {
         printf("Tested %ld examples made %ld errors\n", (long)tested,
               (long)errors);
      }
      printf("%.4f\t%ld\n", ((float)errors/(float)tested) * 100,
                (long)DecisionTreeCountNodes(dt));
   } else {
      printf(">> %ld\t%.4f\t%ld\t%ld\t%.2lf\t%.2f\t%ld\n",
                learnCount,
                ((float)errors/(float)tested) * 100,
                (long)DecisionTreeCountNodes(dt),
                growingNodes,
                ((double)learnTime) / 100,
                ((double)allocation) / (1024 * 1024),
                numBoundsUsed);
   }
   fflush(stdout); 

   if(gOutputTree) {
      sprintf(fileNames, "%s-%lu.tree", gFileStem, learnCount);
      treeOut = fopen(fileNames, "w");
      DecisionTreeWrite(dt, treeOut);
      fclose(treeOut);
   }

   MSetActivePool(oldPool);
}

int main(int argc, char *argv[]) {
   char fileNames[255];

   FILE *exampleIn, *pruneSet = 0;
   ExampleSpecPtr es;
   ExamplePtr e;

   VFDTPtr vfdt;
   DecisionTreePtr dt;

   long seen = 0;

   long learnTime, allocation;

   int iteration;

   struct tms starttime;
   struct tms endtime;

   _processArgs(argc, argv);

   sprintf(fileNames, "%s/%s.names", gSourceDirectory, gFileStem);

   if(gInitialPause) {
      sleep(5);
   }

   es = ExampleSpecRead(fileNames);
   DebugError(es == 0, "Unable to open the .names file");

   RandomInit();

   /* initialize the vfdt */
   vfdt = VFDTNew(es, gSplitConfidence, gTieConfidence);
   VFDTSetUseGini(vfdt, gUseGini);
   VFDTSetProcessChunkSize(vfdt, gChunk);
   VFDTSetMaxAllocationMegs(vfdt, gGrowMegs);
   VFDTSetMessageLevel(vfdt, gMessageLevel);
   DebugSetMessageLevel(gMessageLevel);
   VFDTSetPrePruneTau(vfdt, gPrePruneTau);
   VFDTSetLaplace(vfdt, gLaplace);
   VFDTSetDoBonferonni(vfdt, gDoBonferonni);
   if(!gRestartLeaves) {
      VFDTSetRestartLeaves(vfdt, 0);
   }
   if(!gCacheTrainingExamples) {
      VFDTSetCacheTrainingExamples(vfdt, 0);
   }

   if(gMessageLevel >= 1) {
      printf("allocation %ld\n", MGetTotalAllocation());
   }

   if(gREPrune) {
      if(gCachePruneSet) {
         gPruneSet = VALNew();
         gPruneCacheInited = 1;
      } else {
         sprintf(fileNames, "%s/%s.prunedata", gSourceDirectory, gFileStem);
         pruneSet = fopen(fileNames, "w");
         DebugError(pruneSet == 0, "Unable to open the .prunedata file");
      }
   }

   if(gStdin) {
      gRescans = 1;
   }

   learnTime = 0;
   times(&starttime);

   if(gDoBatch) {
      if(gStdin) {
         exampleIn = stdin;
      } else {
         sprintf(fileNames, "%s/%s.data", gSourceDirectory, gFileStem);
         exampleIn = fopen(fileNames, "r");
         DebugError(exampleIn == 0, "Unable to open the data file");
      }

      if(gREPrune) {
         e = ExampleRead(exampleIn, es);
         while(e != 0) {
            seen++;
            if(RandomDouble() < gPruneSetPercent && 
                  gPruneSetCurrentSize < gPruneSetMaxSize) {
               gPruneSetCurrentSize++;
               if(gCachePruneSet) {
                  VALAppend(gPruneSet, e);
               } else {
                  ExampleWrite(e, pruneSet);
                  ExampleFree(e);
               }
            } else {
               VFDTProcessExampleBatch(vfdt, e);
            }
            e = ExampleRead(exampleIn, es);
         }
         VFDTBatchExamplesDone(vfdt);

      } else {
         VFDTProcessExamplesBatch(vfdt, exampleIn);
      }

      if(!gStdin) {
         fclose(exampleIn);
      }

   } else { /* not in batch mode */
      seen = 0;
      for(iteration = 0 ; iteration < gRescans ; iteration++) {
         if(gStdin) {
            exampleIn = stdin;
         } else {
            sprintf(fileNames, "%s/%s.data", gSourceDirectory, gFileStem);
            exampleIn = fopen(fileNames, "r");
            DebugError(exampleIn == 0, "Unable to open the .data file");
         }

         e = ExampleRead(exampleIn, es);

         while(e != 0) {
            seen++;

            if(gIncrementalReporting) {
               times(&endtime);
               learnTime += endtime.tms_utime - starttime.tms_utime;
   
               _doIncrementalTest(vfdt, es, e);               

               if(seen % 1000 == 0) {
                  _doIncrementalReport();
               }

               times(&starttime);
            }

            if(gREPrune && gPruneSetCurrentSize < gPruneSetMaxSize && 
                                    (RandomDouble() < gPruneSetPercent)) {
               gPruneSetCurrentSize++;
               if(gCachePruneSet) {
                  VALAppend(gPruneSet, e);
               } else {
                  ExampleWrite(e, pruneSet);
                  ExampleFree(e);
               }
            } else {
               VFDTProcessExample(vfdt, e);
            }
            /* HERE if I use an example cache I better have this commented */
            //    ExampleFree(e);

            e = ExampleRead(exampleIn, es);

            /* check to see if it's time to run tests */
            if(gUseSchedule && seen == gScheduleCount) {
               gScheduleCount *= gScheduleMult;
               allocation = MGetTotalAllocation();
               times(&endtime);
               learnTime += endtime.tms_utime - starttime.tms_utime;
               //printf("allocation %.2f\n",
                        //(float)allocation / (1024 * 1024));
               //fflush(stdout);
               dt = VFDTGetLearnedTree(vfdt);

               if(gREPrune && !gCachePruneSet) {
                  //printf("closing %d\n", fileno(pruneSet));
                  fflush(pruneSet);
                  fsync(fileno(pruneSet));
                  fclose(pruneSet);
               }
               _doTests(es, dt, VFDTGetNumBoundsUsed(vfdt), 
                      VFDTGetNumGrowing(vfdt),
                      seen, learnTime, allocation, 0);

               if(gREPrune && !gCachePruneSet) {
                  sprintf(fileNames, "%s/%s.prunedata", 
                            gSourceDirectory, gFileStem);
                  pruneSet = fopen(fileNames, "a");
                  DebugError(pruneSet == 0, "Unable to open the .prunedata file");
               }

               DecisionTreeFree(dt);
               times(&starttime);
            }
         }

         if(!gStdin) {
            fclose(exampleIn);
         }
      }
   }

   times(&endtime);
   learnTime += endtime.tms_utime - starttime.tms_utime;

   if(gMessageLevel >= 1) {
      printf("done learning...\n");

      printf("   allocation %ld\n", MGetTotalAllocation());

      printf("time %.2lfs\n", ((double)learnTime) / 100);
   }

   allocation = MGetTotalAllocation();
   dt = VFDTGetLearnedTree(vfdt);


   if(gREPrune && !gCachePruneSet) {
      //printf("closing %d\n", fileno(pruneSet));
      fflush(pruneSet);
      fsync(fileno(pruneSet));
      fclose(pruneSet);
   }


   if(gDoTests) {
      //times(&starttime);
      _doTests(es, dt, VFDTGetNumBoundsUsed(vfdt), VFDTGetNumGrowing(vfdt), seen, learnTime, allocation, 1);
   } else if(gUseSchedule) {
      //times(&starttime);
      _doTests(es, dt, VFDTGetNumBoundsUsed(vfdt), VFDTGetNumGrowing(vfdt), seen, learnTime, allocation, 0);
   } else if(gIncrementalReporting) {
      _doIncrementalReport();
   } else  {
      DecisionTreePrint(dt, stdout);
   }

   if(gMessageLevel >= 1) {
      printf("allocation %ld\n", MGetTotalAllocation());

      //times(&endtime);
      printf("time %.2lfs\n", learnTime / (float)100);

      printf("num bounds used %ld\n", vfdt->numBoundsUsed);

      printf("induced leaves by level: ");
      DecisionTreePrintStats(dt, stdout);

      printf("DONE\n");
   }

   return 0;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -