📄 cvfdt.c
字号:
&& i != firstAttrib && i != secondAttrib) { ExampleGroupStatsIgnoreAttribute(egs, i); if(gMessageLevel >= 3) { printf("started ignoring an attrib %d index %f, best %f based on %ld examples\n", i, allIndexes[i], firstIndex, ExampleGroupStatsNumExamplesSeen(egs)); } } } } MFreePtr(allIndexes);}static void _AddExampleToNode(CVFDTPtr root, CVFDTPtr currentNode, int level, CVFDTExamplePtr ce, int count);static void _RebuildSubtree(CVFDTPtr subtreeRoot, CVFDTPtr root, CVFDTExamplePtr ce, int count) { int foundNode = 0; int modifyNode = 0; CVFDTPtr currentNode = root; int level = 0; while(!foundNode && currentNode != 0) { if ( currentNode == subtreeRoot ) modifyNode = 1; if ( modifyNode ) _AddExampleToNode(root, currentNode, level, ce, count); if(DecisionTreeIsLeaf(currentNode->dtreeNode )) foundNode = 1; else if(DecisionTreeIsNodeGrowing(currentNode->dtreeNode )) foundNode = 1; if(!foundNode) { currentNode = CVFDTOneStepClassify(currentNode, ce->example ); level++; } }}void _CVFDTAltInfoFree( CVFDTAltInfoPtr altInfo, int freeSubtree );void CVFDTFree( CVFDTPtr cvfdt ){ int i;// long nodeid = cvfdt->uid; VFDTGrowingDataPtr gd; if ( cvfdt->children != 0 ) { for ( i = 0; i < VALLength( cvfdt->children ); i++ ) { CVFDTPtr curChild = VALIndex( cvfdt->children, i ); if ( curChild != 0 ) CVFDTFree( curChild ); } } else { cvfdt->vfdt->numGrowing--; } if ( cvfdt->altSubtreeInfos != 0 ) { CVFDTAltInfoPtr oldTree; CVFDTAltInfoPtr altTree = VALIndex( cvfdt->altSubtreeInfos, 1 ); _CVFDTAltInfoFree( altTree, 1 ); oldTree = VALIndex( cvfdt->altSubtreeInfos, 0 ); _CVFDTAltInfoFree( oldTree, 0 ); VALFree( cvfdt->altSubtreeInfos ); } gd = (VFDTGrowingDataPtr)DecisionTreeGetGrowingData( cvfdt->dtreeNode ); ExampleGroupStatsFree(gd->egs); MFreePtr( (VFDTGrowingDataPtr)gd ); MFreePtr( cvfdt->dtreeNode ); if (cvfdt->children != 0) VALFree( cvfdt->children ); MFreePtr( cvfdt );}void _CVFDTExampleFree( CVFDTExamplePtr ce ) { ExampleFree( ce->example ); MFreePtr( ce );}void _CVFDTExampleWrite( FILE* file, CVFDTExamplePtr ce ) { ExampleWrite( ce->example, file ); fprintf( file, "%ul", ce->oldestNode );}CVFDTAltInfoPtr _CVFDTAltInfoNew( CVFDTPtr subtree ) { CVFDTAltInfoPtr catp = MNewPtr(sizeof(CVFDTAltInfo)); catp->status = 0; catp->exampleCount = 0; catp->numTested = 0; catp->numErrors = 0; catp->subtree = subtree; catp->closestErr = 100.0; return catp;}void _CVFDTAltInfoFree( CVFDTAltInfoPtr altInfo, int freeSubtree ) { if ( freeSubtree ) CVFDTFree( altInfo->subtree ); MFreePtr(altInfo);}CVFDTExamplePtr _CVFDTExampleNew( ExamplePtr e ) { CVFDTExamplePtr ce = MNewPtr(sizeof(CVFDTExample)); ce->example = e; ce->oldestNode = 0; ce->inQNode = 0; return ce;}CVFDTExamplePtr _CVFDTExampleRead( FILE* file, ExampleSpecPtr es ) { ExamplePtr e = ExampleRead( file, es ); CVFDTExamplePtr ce = _CVFDTExampleNew( e ); int oldestNode; fscanf( file, "%ul", &oldestNode ); ce->oldestNode = oldestNode; return ce;}void _PrimeAlternateTree( CVFDTPtr root, CVFDTPtr curNode, int count ) { int i, j, numExamples; CVFDTExamplePtr ce; FILE* curFile; char* fileName; int numCacheFiles = VALLength( gCacheFiles ); for ( j = 0; j < gCurCachePos; j++ ) { ce = VALIndex( gCache, j ); _RebuildSubtree(curNode, root, ce, count + j); } if (gLastFilledCacheIndex >= 0) { int fileIndex; if (gCurCacheIndex == 0) { if (gLastFilledCacheIndex < 4) return; fileIndex = gLastFilledCacheIndex; } else { fileIndex = (gCurCacheIndex - 5 + numCacheFiles)%numCacheFiles; } for ( i = 0; i < 5; i++) { fileName = VALIndex( gCacheFiles, fileIndex ); curFile = fopen( fileName, "r" ); fscanf( curFile, "%d ", &numExamples ); for ( j = 0; j < numExamples; j++ ) { ce = _CVFDTExampleRead( curFile, curNode->vfdt->spec ); _RebuildSubtree( curNode, root, ce, count + j ); _CVFDTExampleFree( ce ); } fileIndex++; fileIndex = fileIndex % numCacheFiles; fclose( curFile ); } } }void _StartAlternateTree( CVFDTPtr root, CVFDTPtr parent, int nodeIndex, CVFDTPtr curNode, int attNum) { //int i; VFDTGrowingDataPtr parentGD; AttributeTrackerPtr at; AttributeTrackerPtr newAT; VFDTGrowingDataPtr newGD; int parentClass; DecisionTreePtr dt; CVFDTAltInfoPtr newAltInfo; CVFDTPtr newRoot; CVFDTAltInfoPtr oldAltInfo; if ( parent->altSubtreeInfos != 0 ) { return; } // Place current tree in parent's alt tree info gNumNewAlts++; parent->altSubtreeInfos = VALNew(); oldAltInfo =_CVFDTAltInfoNew( curNode ); VALAppend( parent->altSubtreeInfos, oldAltInfo ); // Create alternate tree if(gMessageLevel >= 1) { printf("Starting alternate tree at %ld\n", curNode->uid ); } parentGD = (VFDTGrowingDataPtr)(DecisionTreeGetGrowingData( parent->dtreeNode )); at = ExampleGroupStatsGetAttributeTracker( parentGD->egs ); newAT = AttributeTrackerClone(at); newGD = MNewPtr(sizeof(VFDTGrowingData)); parentClass = ExampleGroupStatsGetMostCommonClass( parentGD->egs ); dt = DecisionTreeNew( parent->dtreeNode->spec ); newRoot = CVFDTNew( root->vfdt, dt, curNode->uid ); newAltInfo = _CVFDTAltInfoNew( newRoot ); newAltInfo->subtree->nodeId = curNode->nodeId; newGD->egs = ExampleGroupStatsNew( root->vfdt->spec, newAT ); newGD->seenExamplesCount = 0; newGD->seenSinceLastProcess = 0; DecisionTreeSetGrowingData( newAltInfo->subtree->dtreeNode, newGD ); VALAppend( parent->altSubtreeInfos, newAltInfo ); // Cleanup old /* if ( curNode->children != 0 ) { for ( i = 0; i < VALLength( curNode->children ); i++ ) { CVFDTFree( VALIndex( curNode->children, i )); } VALFree( curNode->children ); curNode->children = 0; } */ // Perform first split on the new tree if (ExampleSpecIsAttributeDiscrete(root->vfdt->spec, attNum)) _DoDiscreteSplit( root, newAltInfo->subtree, attNum ); else { float indexOne, threshOne, indexTwo, threshTwo; ExampleGroupStatsEntropyContinuousAttributeSplit(newGD->egs, attNum, &indexOne, &threshOne, &indexTwo, &threshTwo); _DoContinuousSplit( root, newAltInfo->subtree, attNum, threshOne ); } if(gMessageLevel >= 3) { CVFDTPrint( root, stdout ); } //_PrimeAlternateTree( root, newAltInfo->subtree ); /* for ( i = 0; i < VALLength( gCacheFiles ) - 1; i++ ) { FILE* curFile; char* fileName; int numExamples; int j; if ( i == gCurCacheIndex ) continue; if ( i > gLastFilledCacheIndex ) break; fileName = VALIndex( gCacheFiles, i ); curFile = fopen( fileName, "r" ); fscanf( curFile, "%d ", &numExamples ); for ( j = 0; j < numExamples; j++ ) { ce = _CVFDTExampleRead( curFile, curNode->vfdt->spec ); _RebuildSubtree( curNode, root, ce ); _CVFDTExampleFree( ce ); } fclose( curFile ); }*/ }void _CVFDTCheckForQuestionableNodes( CVFDTPtr root, CVFDTPtr curParent){ int i; CVFDTPtr curNode; int bestSplitAtt; // Don't traverse any further in depth, this branch hasn't changed // or we are at the end of the branch if ( !curParent->hasChanged || ( curParent->children == 0 )) { return; } // For each child node for ( i = 0; i < VALLength( curParent->children ); i++ ) { bestSplitAtt = -1; curNode = VALIndex( curParent->children, i ); bestSplitAtt = _ValidateSplitEntropy(root, curNode ); if ( bestSplitAtt != curNode->dtreeNode->splitAttribute ) { _StartAlternateTree( root, curParent, i, curNode, bestSplitAtt); } _CVFDTCheckForQuestionableNodes( root, curNode ); curParent->hasChanged = 0; }}static void _ProcessExample(CVFDTPtr cvfdt, CVFDTExamplePtr ce, int count) { int foundNode = 0; CVFDTPtr currentNode = cvfdt; int level = 0; while(!foundNode && currentNode != 0) { _AddExampleToNode( cvfdt, currentNode, level, ce, count); if(DecisionTreeIsLeaf(currentNode->dtreeNode )) foundNode = 1; else if(DecisionTreeIsNodeGrowing(currentNode->dtreeNode )) foundNode = 1; if(!foundNode) { currentNode = CVFDTOneStepClassify(currentNode, ce->example); level++; } }}static void _AddExampleToNode(CVFDTPtr root, CVFDTPtr currentNode, int level, CVFDTExamplePtr ce, int count ) { VFDTGrowingDataPtr gd = (VFDTGrowingDataPtr)DecisionTreeGetGrowingData(currentNode->dtreeNode); ExampleGroupStatsPtr egs = gd->egs; // Check for alternate trees if (currentNode->altSubtreeInfos != 0) { CVFDTAltInfoPtr oldInfo = VALIndex(currentNode->altSubtreeInfos, 0); CVFDTAltInfoPtr newInfo = VALIndex(currentNode->altSubtreeInfos, 1); ce->inQNode = 1; // Increment counts oldInfo->exampleCount++; newInfo->exampleCount++; // Check accuracy if testing if ( oldInfo->status == 1) { oldInfo->numTested++; if(ExampleGetClass(ce->example) != DecisionTreeClassify(currentNode->dtreeNode, ce->example)) { oldInfo->numErrors++; } newInfo->numTested++; if(ExampleGetClass(ce->example) != DecisionTreeClassify(newInfo->subtree->dtreeNode, ce->example)) newInfo->numErrors++; } // Now use example to build tree // HERE Geoff removed this else...why not, as long as the test is before // the example is used for learning // else _ProcessExample(newInfo->subtree, ce, count); // If it is time to switch between testing and building if ((oldInfo->exampleCount == gAltTestNum) || (oldInfo->exampleCount == floor(0.9 * (float)gAltTestNum))) { // Calculate error rate if testing if (oldInfo->status == 1) { float oldErrorRate = (float)oldInfo->numErrors / (float)oldInfo->numTested; float newErrorRate = (float)newInfo->numErrors / (float)newInfo->numTested; float errorDiff = newErrorRate - oldErrorRate; //int j; //ExampleGroupStatsPtr egs; if ( gMessageLevel >= 1 ) { printf("Node %ld: old %.3f on %d, new %.3f on %d\n", oldInfo->subtree->uid, oldErrorRate, oldInfo->numTested, newErrorRate, newInfo->numTested); } // Replace tree if new tree is better if (newErrorRate < oldErrorRate) { int k; int childCount; int found = 0; if ( gMessageLevel >= 1 ) { printf("Switching trees at node %ld\n", currentNode->uid); } childCount = VALLength( currentNode->children ); for (k = 0; k < childCount; k++ ) { void* cur = VALIndex( currentNode->children, k ); if (cur == oldInfo->subtree) { found = 1; break; } } DebugError( found == 0, "Alt subtree not found" ); //VALRemove( currentNode->children, k ); VALSet( currentNode->children, k, newInfo->subtree); found = 0; childCount = DecisionTreeGetChildCount( currentNode->dtreeNode ); for (k = 0; k < childCount; k++ ) { DecisionTreePtr childDt = DecisionTreeGetChild( currentNode->dtreeNode, k ); if (childDt == oldInfo->subtree->dtreeNode) { found = 1; break; } } DebugError( found == 0, "Alt dtree not found" ); //VALRemove( currentNode->dtreeNode->children, k ); VALSet( currentNode->dtreeNode->children, k, newInfo->subtree->dtreeNode); VALFree( currentNode->altSubtreeInfos ); currentNode->altSubtreeInfos = 0; _CVFDTAltInfoFree( oldInfo, 1 ); _CVFDTAltInfoFree( newInfo, 0 ); gSplitLevels[level + 1]++; gNumSwitches++; } else if (( newInfo->closestErr + 0.001 < errorDiff) || (oldErrorRate == 0) || (newInfo->closestErr == 0 && errorDiff == 0)) { if ( gMessageLevel >= 1 ) { printf("Pruning alt tree at node %ld\n", currentNode->uid ); } _CVFDTAltInfoFree( oldInfo, 0 ); _CVFDTAltInfoFree( newInfo, 1 ); VALFree( currentNode->altSubtreeInfos ); currentNode->altSubtreeInfos = 0; gNumPrunes++; } else { newInfo->closestErr = errorDiff; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -