📄 mafia.cpp
字号:
MFIDepth += C->Name->_count; MFISize++;}//////////////////////////////////////////////////////////////////////// Add this node's name bitmap to the MFI list////// @param C the current node/////////////////////////////////////////////////////////////////////void AddToMFI(TreeNode *C) { // copy name bitmap BaseBitmap *name = new BaseBitmap(*C->Name); // add to MFI MFI.push_back(name); // update the end of the relevant C->rEnd++; // Update stat variables MFIDepth += C->Name->_count; MFISize++; SupportCountList.push_back(C->Trans->_count); MFIBySizes[name->_count]++; if (name->_count > maxItemsetSize) maxItemsetSize = name->_count;}//////////////////////////////////////////////////////////////////////// Add this node's name bitmap to the FCI list////// @param C the current node/////////////////////////////////////////////////////////////////////void AddToFCI(TreeNode *C) { // Search HT HashTable::iterator h = HT.find(C->Trans->_count); // If the support of node C is NOT in HashSup if (h == HT.end()) { // Add a new list to the HashSup ItemSet* newList = new ItemSet(); newList->reserve(500); newList->push_back(MFI.size()); HT.insert(HashTable::value_type(C->Trans->_count, newList)); // Else add pointer to last item in iName to HT entry } else { for (ItemSet::reverse_iterator goli = (*h).second->rbegin(); goli != (*h).second->rend(); goli++) if (MFI[*goli]->Superset(C->Name)) return ; // search the table (*h).second->push_back(MFI.size()); } // copy name bitmap BaseBitmap *name = new BaseBitmap(*C->Name); // add to MFI MFI.push_back(name); // Update stat variables MFIDepth += C->Name->_count; MFISize++; SupportCountList.push_back(C->Trans->_count); MFIBySizes[name->_count]++; if (name->_count > maxItemsetSize) maxItemsetSize = name->_count;}int SortLMFI(int rBegin, int rEnd, int sortBy) { int left = rBegin; int right = rEnd - 1; while (left <= right) { while (left < rEnd && !MFI[left]->CheckPosition(sortBy, CountCheckPosition)) left++; while (right >= rBegin && MFI[right]->CheckPosition(sortBy, CountCheckPosition)) right--; if (left < right) { // we are now at a point where MFI[left] is relevant // and MFI[right] is not since left < right, we swap the two BaseBitmap* tempBitmap = MFI[left]; MFI[left] = MFI[right]; MFI[right] = tempBitmap; tempBitmap = NULL; int tempSupport = SupportCountList[left]; SupportCountList[left] = SupportCountList[right]; SupportCountList[right] = tempSupport; left++; right--; } } // the first relevant one for the next node is left return left;}//////////////////////////////////////////////////////////////////////// Determine whether a HUTMFI is true./// - if HUT is in MFI, then HUT is frequent/// and the subtree rooted at this node can be pruned////// @return True if HUT is in the MFI/////////////////////////////////////////////////////////////////////bool CheckHUTMFI(TreeNode *C, int iTAIL) { // for each element i in the tail form {head} U {i} and check for a // superset in the MFI int rBegin = C->rBegin; int rEnd = C->rEnd; for (; iTAIL < C->tEnd; iTAIL++) { rBegin = SortLMFI(rBegin, rEnd, F1[gTail[iTAIL].Item]->Prefix); if (rEnd <= rBegin) return false; } return true;}//////////////////////////////////////////////////////////////////////// Dynamically reorder the elements in the tail by increasing support/// - Expand all children and sort by increasing support/// - Remove infrequent children////// @param C current node/// @param iTAIL index into tail of current node/// @param useComp whether compressed bitmaps should be used/// @param NoChild whether C has any frequent children/// @param AllFreq whether all children are frequent/////////////////////////////////////////////////////////////////////void ReorderTail( TreeNode *C, int &iTAIL, bool useComp, bool &NoChild, bool &AllFreq) { int tailIndex = 0; int lol = 0; // for each tail element for (lol = iTAIL; lol < C->tEnd; lol++) { Bitmap *trans = TransBuffy[C->Depth]; int theCount = 0; // Compress the bitmaps if (useComp && (F1[gTail[lol].Item]->compID != C->compID)) { F1[gTail[lol].Item]->Trans->BuildRelComp( *TransBuffy[projectDepth]); F1[gTail[lol].Item]->compID = C->compID; CountRebuilds++; } // Use the compressed bitmaps if (useComp) { // AND the compressed bitmaps and count the result trans->AndCompOnly( *C->Trans, *F1[gTail[lol].Item]->Trans, CountSmallAnds); theCount = trans->SmallCount(CountCounts); // use the full bitmaps } else { // AND & count the bitmaps trans->AndOnly(*C->Trans, *F1[gTail[lol].Item]->Trans, CountAnds); theCount = trans->Count(CountCounts); } // If the results is frequent if (theCount >= MS) { // if PEP pruning holds if (PEPrune && (trans->_count == C->Trans->_count)) { // Move tail element from tail to head C->Name->Or(*C->Name, *F1[gTail[lol].Item]->Name); CountPEPrunes++; // add tail element to reordered tail } else { NoChild = false; // create new tail element TailBuffy[tailIndex].Count = theCount; TailBuffy[tailIndex].Item = gTail[lol].Item; tailIndex++; } } else AllFreq = false; } sort(TailBuffy, TailBuffy + tailIndex); // Set the begin and end values of the new tail iTAIL = C->tEnd; C->tEnd = iTAIL + tailIndex; C->tBegin = iTAIL; int r = 0; // Copy new tail into next slots for (lol = iTAIL; lol < C->tEnd; lol++) { gTail[lol] = TailBuffy[r]; r++; }}//////////////////////////////////////////////////////////////////////// Simply copy over the tail without expanding any of the children/// for pure DFS (no expansion of all children)////// @param C current node/// @param iTAIL index into tail of current node/////////////////////////////////////////////////////////////////////void NoorderTail( TreeNode *C, int &iTAIL) { // set begin and end tail pointers iTAIL = C->tEnd; C->tEnd = C->tEnd - C->tBegin + C->tEnd; C->tBegin = iTAIL; int r = 0; // copy over old tail to new tail for (int lol = iTAIL; lol < C->tEnd; lol++) { gTail[lol] = gTail[lol - C->tEnd + C->tBegin]; r++; }}//////////////////////////////////////////////////////////////////////// The main MAFIA algorithm function////// @param C the current node/// @param HUT whether this is a HUT check (left most branch)/// @param FHUT [output] whether the HUT is frequent/// @param useComp if compression has been switched on/////////////////////////////////////////////////////////////////////void MAFIA(TreeNode *C, bool HUT, bool &FHUT, bool useComp) { CountNodes++; int iTAIL = C->tBegin; // index into the tail bool NoChild = true; // whether the node has any frequent children bool AllFreq = true; // whether all the children are frequent FHUT = false; // whether this is a FHUT int beforeCountNodes = CountNodes; int frequentTailSize = C->tEnd - iTAIL; if (iTAIL < C->tEnd) { if (C != Root) { if (Reorder) { ReorderTail(C, iTAIL, useComp, NoChild, AllFreq); } else { NoorderTail(C, iTAIL); } } frequentTailSize = C->tEnd - iTAIL; int estimateTail = (int)(frequentTailSize / (double)EstimateDiv); if (estimateTail > 0 && C->Trans->_count != TransCount) { double estimateSubTree = EstimateBuffy[estimateTail].Sum / (double)EstimateBuffy[estimateTail].Count; double support = C->Trans->_count / (double) TransCount; double factor = 11.597 - 29.914 * (support - .52392) * (support - .52392); double cost = abs(factor * frequentTailSize / (1 - 1.2 * support)); //double cost = 5 * frequentTailSize / (1 - support); // check if relative comp should be performed if ((!useComp) && (estimateSubTree > cost)) { // build the relative bitmap for source [node bitmap] (all 1's) C->Trans->BuildSource(); // remember the depth of the FULL bitmap your projecting from projectDepth = C->Depth - 1; // increment the ID C->compID = MAX_compID; MAX_compID++; useComp = true; } } // Candidate generation - extend the Head with the tail elements // We start from the end of the tail and move backwards // Therefore the tail is iterated through in increasing support, // but is stored in decreasing support. while (iTAIL < C->tEnd) { // form a one-extension Bitmap *trans = TransBuffy[C->Depth]; BaseBitmap *name = NameBuffy[C->Depth]; TreeNode *newNode = NodeBuffy[C->Depth]; // create name for the new node name->Or(*C->Name, *F1[gTail[iTAIL].Item]->Name); // compress the bitmaps if warranted if (useComp && (F1[gTail[iTAIL].Item]->compID != C->compID)) { // build the relative for this node F1[gTail[iTAIL].Item]->Trans->BuildRelComp( *TransBuffy[projectDepth]); F1[gTail[iTAIL].Item]->compID = C->compID; CountRebuilds++; } int theCount = 0; // use the compressed bitmaps for ANDing and counting if (useComp) { // AND and count small bitmaps trans->AndCompOnly( *C->Trans, *F1[gTail[iTAIL].Item]->Trans, CountSmallAnds); if (Reorder) trans->_count = gTail[iTAIL].Count; else theCount = trans->SmallCount(CountCounts); } else { // AND and count the full bitmaps trans->AndOnly( *C->Trans, *F1[gTail[iTAIL].Item]->Trans, CountAnds); if (Reorder) trans->_count = gTail[iTAIL].Count; else theCount = trans->Count(CountCounts); } if (!Reorder && PEPrune && (theCount == C->Trans->_count)) { CountPEPrunes++; C->Name->Or(*C->Name, *F1[gTail[iTAIL].Item]->Name); iTAIL++; continue; } // Determine whether this candidate will be a HUT // Conceptually the leftmost branch of the tree is a HUT check if ((iTAIL != C->tBegin) && (C != Root)) HUT = 0; else HUT = 1; if (!AllFreq) HUT = 0; if (Reorder || (theCount >= MS)) { // form the 1-extension node newNode->setTreeNode(name, trans, C->Depth + 1, C->compID, F1[gTail[iTAIL].Item]->Prefix, iTAIL + 1, C->tEnd); // setup the LMFI for the next level; it contains all // itemsets in LMFI for this level that also include the // one we're extending the node with. We do sort of a // quicksort thing to move the relevant itemsets for the // next node to the end of the portion of the MFI relevant // to this node newNode->rEnd = C->rEnd; newNode->rBegin = SortLMFI(C->rBegin, C->rEnd, newNode->Prefix); // Check for HUT in MFI for remaining tail if (HUTMFI && newNode->tBegin != newNode->tEnd && !HUT) { CountHUTMFI++; if (CheckHUTMFI(newNode, newNode->tBegin)) { // stop generation of extensions CountHUTMFISuccess++; AllFreq = false; break; } } NoChild = false; // recurse down the tree MAFIA(newNode, HUT, FHUT, useComp); // Add those discovered from lower levels to the current LMFI // LMFI_l = LMFI_l \union LMFI_{l+1} // all we need to do is to update the end pointer C->rEnd = newNode->rEnd; } else AllFreq = false; // if this was a successful HUT check if (FHUT) { // keep going up the tree if (HUT) { return ; // reached start of HUT, so stop generation of subtree // rooted at this node } else { FHUT = false; break; } } // Move on the next tail element iTAIL++; } } // if this is a FHUT if (GoFHUT && HUT && AllFreq) { FHUT = true; CountFHUT++; } // if this node is childless and not in MFI if (MethodIsFI) AddToFI(C); else if (MethodIsFCI && C != Root) AddToFCI(C); else if (NoChild && !LMFISuperSet(C)) { AddToMFI(C); } int subtreeSize = CountNodes - beforeCountNodes + 1; int estimateTail = (int)(frequentTailSize / (double)EstimateDiv); if (estimateTail > 0 && C->Trans->_count != TransCount) { EstimateBuffy[estimateTail].Count++; EstimateBuffy[estimateTail].Sum += subtreeSize; }}//////////////////////////////////////////////////////////////////////// Merge repeated itemsets into one combined itemset/// - e.g. if (transaction set of item 4) AND
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -