⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cvfdt.c

📁 数据挖掘方面的源码
💻 C
📖 第 1 页 / 共 4 页
字号:
	  && i != firstAttrib && i != secondAttrib) {	 ExampleGroupStatsIgnoreAttribute(egs, i);	 if(gMessageLevel >= 3) {	   printf("started ignoring an attrib %d index %f, best %f based on %ld examples\n", i, allIndexes[i], firstIndex, ExampleGroupStatsNumExamplesSeen(egs));	 }       }     }   }   MFreePtr(allIndexes);}static void _AddExampleToNode(CVFDTPtr root,                               CVFDTPtr currentNode,			      int level,                              CVFDTExamplePtr ce,			      int count);static void _RebuildSubtree(CVFDTPtr subtreeRoot, CVFDTPtr root, CVFDTExamplePtr ce,			    int count) {   int foundNode = 0;   int modifyNode = 0;   CVFDTPtr currentNode = root;   int level = 0;   while(!foundNode && currentNode != 0) {     if ( currentNode == subtreeRoot )       modifyNode = 1;     if ( modifyNode )       _AddExampleToNode(root, currentNode, level, ce, count);     if(DecisionTreeIsLeaf(currentNode->dtreeNode ))        foundNode = 1;		     else if(DecisionTreeIsNodeGrowing(currentNode->dtreeNode ))        foundNode = 1;          if(!foundNode) {       currentNode = CVFDTOneStepClassify(currentNode, ce->example );       level++;     }   }}void _CVFDTAltInfoFree( CVFDTAltInfoPtr altInfo, int freeSubtree );void CVFDTFree( CVFDTPtr cvfdt ){  int i;//  long nodeid = cvfdt->uid;  VFDTGrowingDataPtr gd;  if ( cvfdt->children != 0 ) {    for ( i = 0; i < VALLength( cvfdt->children ); i++ ) {      CVFDTPtr curChild = VALIndex( cvfdt->children, i );      if ( curChild != 0 )	CVFDTFree( curChild );    }  }  else {    cvfdt->vfdt->numGrowing--;  }    if ( cvfdt->altSubtreeInfos != 0 ) {    CVFDTAltInfoPtr oldTree;    CVFDTAltInfoPtr altTree = VALIndex( cvfdt->altSubtreeInfos, 1 );    _CVFDTAltInfoFree( altTree, 1 );    oldTree = VALIndex( cvfdt->altSubtreeInfos, 0 );    _CVFDTAltInfoFree( oldTree, 0 );    VALFree( cvfdt->altSubtreeInfos );  }    gd = (VFDTGrowingDataPtr)DecisionTreeGetGrowingData( cvfdt->dtreeNode );  ExampleGroupStatsFree(gd->egs);  MFreePtr( (VFDTGrowingDataPtr)gd );  MFreePtr( cvfdt->dtreeNode );  if (cvfdt->children != 0)    VALFree( cvfdt->children );  MFreePtr( cvfdt );}void _CVFDTExampleFree( CVFDTExamplePtr ce ) {  ExampleFree( ce->example );  MFreePtr( ce );}void _CVFDTExampleWrite( FILE* file, CVFDTExamplePtr ce ) {  ExampleWrite( ce->example, file );  fprintf( file, "%ul", ce->oldestNode );}CVFDTAltInfoPtr _CVFDTAltInfoNew( CVFDTPtr subtree ) {  CVFDTAltInfoPtr catp = MNewPtr(sizeof(CVFDTAltInfo));  catp->status = 0;  catp->exampleCount = 0;  catp->numTested = 0;  catp->numErrors = 0;  catp->subtree = subtree;  catp->closestErr = 100.0;  return catp;}void _CVFDTAltInfoFree( CVFDTAltInfoPtr altInfo, int freeSubtree ) {  if ( freeSubtree )    CVFDTFree( altInfo->subtree );  MFreePtr(altInfo);}CVFDTExamplePtr _CVFDTExampleNew( ExamplePtr e ) {  CVFDTExamplePtr ce = MNewPtr(sizeof(CVFDTExample));  ce->example = e;  ce->oldestNode = 0;  ce->inQNode = 0;  return ce;}CVFDTExamplePtr _CVFDTExampleRead( FILE* file, ExampleSpecPtr es  ) {  ExamplePtr e = ExampleRead( file, es );  CVFDTExamplePtr ce = _CVFDTExampleNew( e );  int oldestNode;  fscanf( file, "%ul", &oldestNode );  ce->oldestNode = oldestNode;  return ce;}void _PrimeAlternateTree( CVFDTPtr root, CVFDTPtr curNode, int count ) {  int i, j, numExamples;  CVFDTExamplePtr ce;  FILE* curFile;  char* fileName;  int numCacheFiles = VALLength( gCacheFiles );  for ( j = 0; j < gCurCachePos; j++ ) {    ce = VALIndex( gCache, j );    _RebuildSubtree(curNode, root, ce, count + j);  }  if (gLastFilledCacheIndex >= 0) {    int fileIndex;    if (gCurCacheIndex == 0) {      if (gLastFilledCacheIndex < 4)	return;      fileIndex = gLastFilledCacheIndex;    }          else {      fileIndex = (gCurCacheIndex - 5 + numCacheFiles)%numCacheFiles;    }    for ( i = 0; i < 5; i++) {      fileName = VALIndex( gCacheFiles, fileIndex );      curFile = fopen( fileName, "r" );      fscanf( curFile, "%d ", &numExamples );      for ( j = 0; j < numExamples; j++ ) {	ce = _CVFDTExampleRead( curFile, curNode->vfdt->spec );	_RebuildSubtree( curNode, root, ce, count + j );	_CVFDTExampleFree( ce );      }      fileIndex++;      fileIndex = fileIndex % numCacheFiles;      fclose( curFile );    }  }    }void _StartAlternateTree( CVFDTPtr root, CVFDTPtr parent, int nodeIndex, CVFDTPtr curNode, int attNum) {    //int i;  VFDTGrowingDataPtr parentGD;  AttributeTrackerPtr at;  AttributeTrackerPtr newAT;  VFDTGrowingDataPtr newGD;  int parentClass;  DecisionTreePtr dt;  CVFDTAltInfoPtr newAltInfo;  CVFDTPtr newRoot;  CVFDTAltInfoPtr oldAltInfo;  if ( parent->altSubtreeInfos != 0 ) {    return;  }  // Place current tree in parent's alt tree info  gNumNewAlts++;  parent->altSubtreeInfos = VALNew();  oldAltInfo =_CVFDTAltInfoNew( curNode );  VALAppend( parent->altSubtreeInfos, oldAltInfo );  // Create alternate tree  if(gMessageLevel >= 1) {	    printf("Starting alternate tree at %ld\n", curNode->uid );  }      parentGD = (VFDTGrowingDataPtr)(DecisionTreeGetGrowingData( parent->dtreeNode ));  at = ExampleGroupStatsGetAttributeTracker( parentGD->egs );  newAT = AttributeTrackerClone(at);  newGD = MNewPtr(sizeof(VFDTGrowingData));  parentClass = ExampleGroupStatsGetMostCommonClass( parentGD->egs );  dt = DecisionTreeNew( parent->dtreeNode->spec );  newRoot = CVFDTNew( root->vfdt, dt, curNode->uid );  newAltInfo = _CVFDTAltInfoNew( newRoot );  newAltInfo->subtree->nodeId = curNode->nodeId;  newGD->egs = ExampleGroupStatsNew( root->vfdt->spec, newAT );  newGD->seenExamplesCount = 0;  newGD->seenSinceLastProcess = 0;  DecisionTreeSetGrowingData( newAltInfo->subtree->dtreeNode, newGD );  VALAppend( parent->altSubtreeInfos, newAltInfo );    // Cleanup old  /*  if ( curNode->children != 0 ) {    for ( i = 0; i < VALLength( curNode->children ); i++ ) {      CVFDTFree( VALIndex( curNode->children, i ));	    }    VALFree( curNode->children );    curNode->children = 0;  }  */    // Perform first split on the new tree  if (ExampleSpecIsAttributeDiscrete(root->vfdt->spec, attNum))    _DoDiscreteSplit( root, newAltInfo->subtree, attNum );  else {    float indexOne, threshOne, indexTwo, threshTwo;    ExampleGroupStatsEntropyContinuousAttributeSplit(newGD->egs, attNum,						     &indexOne, &threshOne,						     &indexTwo, &threshTwo);    _DoContinuousSplit( root, newAltInfo->subtree, attNum, threshOne );  }  if(gMessageLevel >= 3) {    CVFDTPrint( root, stdout );  }      //_PrimeAlternateTree( root, newAltInfo->subtree );    /*  for ( i = 0; i < VALLength( gCacheFiles ) - 1; i++ ) {    FILE* curFile;    char* fileName;    int numExamples;    int j;    if ( i == gCurCacheIndex )      continue;    if ( i > gLastFilledCacheIndex )      break;    fileName = VALIndex( gCacheFiles, i );    curFile = fopen( fileName, "r" );    fscanf( curFile, "%d ", &numExamples );    for ( j = 0; j < numExamples; j++ ) {      ce = _CVFDTExampleRead( curFile, curNode->vfdt->spec );      _RebuildSubtree( curNode, root, ce );      _CVFDTExampleFree( ce );    }    fclose( curFile );   }*/  }void _CVFDTCheckForQuestionableNodes( CVFDTPtr root, CVFDTPtr curParent){  int i;  CVFDTPtr curNode;  int bestSplitAtt;    // Don't traverse any further in depth, this branch hasn't changed  // or we are at the end of the branch  if ( !curParent->hasChanged ||  ( curParent->children == 0 )) {    return;  }    // For each child node  for ( i = 0; i < VALLength( curParent->children ); i++ ) {    bestSplitAtt = -1;    curNode = VALIndex( curParent->children, i );    bestSplitAtt = _ValidateSplitEntropy(root, curNode );          if ( bestSplitAtt != curNode->dtreeNode->splitAttribute ) {      _StartAlternateTree( root, curParent, i, curNode, bestSplitAtt);    }        _CVFDTCheckForQuestionableNodes( root, curNode );    curParent->hasChanged = 0;  }}static void _ProcessExample(CVFDTPtr cvfdt, CVFDTExamplePtr ce, int count) {  int foundNode = 0;  CVFDTPtr currentNode = cvfdt;  int level = 0;  while(!foundNode && currentNode != 0) {    _AddExampleToNode( cvfdt, currentNode, level, ce, count);    if(DecisionTreeIsLeaf(currentNode->dtreeNode ))       foundNode = 1;    else if(DecisionTreeIsNodeGrowing(currentNode->dtreeNode ))       foundNode = 1;        if(!foundNode) {      currentNode = CVFDTOneStepClassify(currentNode, ce->example);      level++;    }  }}static void _AddExampleToNode(CVFDTPtr root,                              CVFDTPtr currentNode,			      int level,                              CVFDTExamplePtr ce,			      int count ) {  VFDTGrowingDataPtr gd     = (VFDTGrowingDataPtr)DecisionTreeGetGrowingData(currentNode->dtreeNode);  ExampleGroupStatsPtr egs = gd->egs;  // Check for alternate trees  if (currentNode->altSubtreeInfos != 0) {    CVFDTAltInfoPtr oldInfo = VALIndex(currentNode->altSubtreeInfos, 0);    CVFDTAltInfoPtr newInfo = VALIndex(currentNode->altSubtreeInfos, 1);    ce->inQNode = 1;    // Increment counts    oldInfo->exampleCount++;    newInfo->exampleCount++;    // Check accuracy if testing    if ( oldInfo->status == 1) {      oldInfo->numTested++;      if(ExampleGetClass(ce->example) 	 != DecisionTreeClassify(currentNode->dtreeNode, ce->example)) {	oldInfo->numErrors++;      }      newInfo->numTested++;      if(ExampleGetClass(ce->example)	 != DecisionTreeClassify(newInfo->subtree->dtreeNode, ce->example))	newInfo->numErrors++;    }    // Now use example to build tree    // HERE Geoff removed this else...why not, as long as the test is before     //  the example is used for learning    //    else    _ProcessExample(newInfo->subtree, ce, count);    // If it is time to switch between testing and building    if ((oldInfo->exampleCount == gAltTestNum) 	|| (oldInfo->exampleCount == floor(0.9 * (float)gAltTestNum))) {      // Calculate error rate if testing      if (oldInfo->status == 1) {	float oldErrorRate = (float)oldInfo->numErrors / (float)oldInfo->numTested;	float newErrorRate = (float)newInfo->numErrors / (float)newInfo->numTested; 	float errorDiff = newErrorRate - oldErrorRate;	//int j;	//ExampleGroupStatsPtr egs;	if ( gMessageLevel >= 1 ) {	  printf("Node %ld: old %.3f on %d, new %.3f on %d\n",                 oldInfo->subtree->uid,                oldErrorRate, oldInfo->numTested,                newErrorRate, newInfo->numTested);	}	// Replace tree if new tree is better	if (newErrorRate < oldErrorRate) {	  int k;	  int childCount;	  int found = 0;	  if ( gMessageLevel >= 1 ) {	    printf("Switching trees at node %ld\n", currentNode->uid);	  }	  childCount = VALLength( currentNode->children );	  for (k = 0; k < childCount; k++ ) {	    void* cur = VALIndex( currentNode->children, k );	    if (cur == oldInfo->subtree) {	      found = 1;	      break;	    }	  }	  DebugError( found == 0, "Alt subtree not found" );	  //VALRemove( currentNode->children, k );	  VALSet( currentNode->children, k, newInfo->subtree);	  	  found = 0;	  childCount = DecisionTreeGetChildCount( currentNode->dtreeNode );	  for (k = 0; k < childCount; k++ ) {	    DecisionTreePtr childDt 	      = DecisionTreeGetChild( currentNode->dtreeNode, k );	    if (childDt == oldInfo->subtree->dtreeNode) {	      found = 1;	      break;	    }	  }	  DebugError( found == 0, "Alt dtree not found" );	  //VALRemove( currentNode->dtreeNode->children, k );	  VALSet( currentNode->dtreeNode->children, k,		     newInfo->subtree->dtreeNode);	  VALFree( currentNode->altSubtreeInfos );	  currentNode->altSubtreeInfos = 0;	  _CVFDTAltInfoFree( oldInfo, 1 );	  _CVFDTAltInfoFree( newInfo, 0 );	  gSplitLevels[level + 1]++; 	  gNumSwitches++;	}	else if (( newInfo->closestErr + 0.001 < errorDiff) 		 || (oldErrorRate == 0)		 || (newInfo->closestErr == 0 && errorDiff == 0)) {	  if ( gMessageLevel >= 1 ) {	    printf("Pruning alt tree at node %ld\n", currentNode->uid );	  }	  _CVFDTAltInfoFree( oldInfo, 0 );	  _CVFDTAltInfoFree( newInfo, 1 );	  VALFree( currentNode->altSubtreeInfos );	  currentNode->altSubtreeInfos = 0;	  gNumPrunes++;	}	else {	  newInfo->closestErr = errorDiff;	}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -