📄 mafia.cpp
字号:
/// (transaction set item 5) = (transaction set item 5),/// then item 5 is a duplicate of item 4/// due to increasing support/////////////////////////////////////////////////////////////////////void MergeRepeatedItemsets() { NodeList::iterator bali = F1.begin(); Bitmap out(*(*bali)->Trans); int blah = 0; // for each frequent 1-itemset while (bali != F1.end()) { out.FillOnes(); NodeList::iterator noli = bali; noli++; // search for a copy of the itemset's transaction set while (noli != F1.end()) { // stop when count is no longer the same if ((*bali)->Trans->_count != (*noli)->Trans->_count) break; else { // AND itemsets with the same count out.AndOnly(*(*bali)->Trans, *(*noli)->Trans, CountAnds); out.Count(blah); // check for a duplicate if (out._count == (*noli)->Trans->_count) { (*bali)->Name->Or(*(*bali)->Name, *(*noli)->Name); F1.erase(noli); } else noli++; } } bali++; }}/** @} *//********************************************************************* MAIN*********************************************************************//// @defgroup MainFunction Main Function/// Main function for running the program/** @{ *//////////////////////////////////////////////////////////////////////// main function for the program/////////////////////////////////////////////////////////////////////int main(int argc, char **argv) { // Check parameters if (argc < 5) { cerr << "Usage: " << argv[0] << " [-mfi/-fci/-fi] [min sup (percent)] " << endl; cerr << "\t[-ascii/-binary] [input filename] " << endl; cerr << "\t[output filename (optional)]" << endl; cerr << "Ex: " << argv[0] << " -mfi .5 -ascii connect4.ascii mfi.txt" << endl; cerr << "Ex: " << argv[0] << " -mfi .3 -binary chess.binary" << endl; exit(0); } // time hook time(&total_start); // get the algorithm type method = argv[1]; // Minimum support as a fraction MSF = atof(argv[2]); if (strcmp(argv[3], "-ascii") == 0) F1FromFile(argv[4], true); else if (strcmp(argv[3], "-binary") == 0) F1FromFile(argv[4], false); else { cerr << "File format must be -ascii or -binary" << endl; exit(1); } if (argc == 6) { outputMFI = true; outFilename = argv[5]; } // Create a null node to begin the DFS tree NullTrans = new Bitmap(TransCount); NullTrans->FillOnes(); NullTrans->_count = TransCount; ItemSet NullList; MFIBySizes = new int[MAX_ITEMSET_SIZE]; for (int h = 0; h < MAX_ITEMSET_SIZE; h++) { MFIBySizes[h] = 0; } // Set size of F1 FullF1size = F1size = F1.size(); // if F1 is not empty if (FullF1size != 0) { BaseBitmap *NullName = new BaseBitmap(FullF1size); MFI.reserve(100000); int p = 0; ItemMap = new int[FullF1size]; ItemsetBuffy = new int[FullF1size]; // Rename items in F1 for (NodeList::iterator nli = F1.begin(); nli != F1.end(); nli++) { // store old itemid ItemMap[p] = (*nli)->Prefix; // assign new itemid (*nli)->Prefix = p; // assign name bitmaps (*nli)->Name = new BaseBitmap(FullF1size); (*nli)->Name->FillEmptyPosition(p); (*nli)->Name->_count = 1; p++; } // don't merge equivalent items for FI output if (method.compare("-fi") != 0) { MergeRepeatedItemsets(); } F1size = F1.size(); // Create global tail maxtail = F1size * (F1size + 1) / 2; gTail = new TailElement[maxtail]; // Create buffer for sorting TailBuffy = new TailElement[F1size]; // Create buffer for estimating size of each subtree EstimateSize = (int)ceil(F1size / (double)EstimateDiv); EstimateBuffy = new SubtreeEstimate[EstimateSize]; for (int estimateIndex = 0; estimateIndex < EstimateSize; estimateIndex++) { EstimateBuffy[estimateIndex].Count = 1; EstimateBuffy[estimateIndex].Sum = estimateIndex * EstimateDiv * estimateIndex * EstimateDiv / 2; } // Initialize global tail int uu; for (uu = 0; uu < maxtail; uu++) { gTail[uu].Item = -1; gTail[uu].Count = 0; } // Fill global tail for (uu = 0; uu < F1size; uu++) { gTail[uu].Item = uu; gTail[uu].Count = F1[uu]->Trans->_count; // assign tail index F1[uu]->tBegin = uu + 1; if (uu == F1size - 1) F1[uu]->tBegin = -1; TempName = new BaseBitmap(FullF1size); // add a buffer element for each item in F1 BaseBitmap *name = new BaseBitmap(FullF1size); NameBuffy.push_back(name); Bitmap *buff = new Bitmap(TransCount); TransBuffy.push_back(buff); TreeNode *newNode = new TreeNode(); NodeBuffy.push_back(newNode); } srand(666); bool FHUT; // start algorithm timer clock_t start, finish; double duration = -1; start = clock(); time(&algorithm_start); // create root node and its associated tail Root = new TreeNode(NullName, NullTrans, 0, 0, -1, 0, F1size); //Nothing is in MFI, so nothing is relevant Root->rBegin = 0; Root->rEnd = 0; // run the appropriate algorithm if (method.compare("-fci") == 0) { //cout << "running closure (FCI) algorithm..." << endl; GoFHUT = false; // FHUT flag HUTMFI = false; // HUTMFI flag PEPrune = true; // PEPrune flag Reorder = true; // Reorder flag MethodIsFCI = true; MAFIA(Root, false, FHUT, false); } else if (method.compare("-mfi") == 0) { //cout << "running MFI algorithm..." << endl; GoFHUT = true; // FHUT flag HUTMFI = true; // HUTMFI flag PEPrune = true; // PEPrune flag Reorder = true; // Reorder flag MAFIA(Root, true, FHUT, false); } else if (method.compare("-fi") == 0) { if (outputMFI) { // open output file outFile = new ItemsetOutput(outFilename); if (!outFile->isOpen()) { cerr << "Output file not open!" << endl; exit(1); } } //cout << "running FI algorithm..." << endl; GoFHUT = false; // FHUT flag HUTMFI = false; // HUTMFI flag PEPrune = false; // PEPrune flag Reorder = false; // Reorder flag MethodIsFI = true; MAFIA(Root, false, FHUT, false); if (outputMFI) { delete outFile; } } else { cerr << "Invalid algorithm option!" << endl; exit(0); } finish = clock(); duration = (finish - start) / (double)CLOCKS_PER_SEC; //printf( "Algorithm CPU time: %.3f seconds.\n", duration ); time(&algorithm_finish); algorithm_time = difftime(algorithm_finish, algorithm_start); printf("Algorithm time: %.2f seconds.\n", algorithm_time); } else { MFIBySizes[0]++; } /* // Print out MFI length distribution for (int sizeIndex = 0; sizeIndex <= maxItemsetSize; sizeIndex++) { cout << sizeIndex << " " << MFIBySizes[sizeIndex] << endl; } */ if (outputMFI && !MethodIsFI) { // PrintMFI to a file //cout << "Printing out the mfi..." << endl; time(&print_start); PrintMFI(); time(&print_finish); print_time = difftime(print_finish, print_start); printf("Printing output time: %.2f seconds.\n", print_time); } time(&total_finish); total_time = difftime(total_finish, total_start); printf("Total time: %.2f seconds.\n\n", total_time); // output stat data cout << "MinSup: " << MS << endl; cout << "TransCount: " << TransCount << endl; cout << "F1size: " << FullF1size << endl; if (!MethodIsFI) cout << "MFISize: " << MFI.size() << endl; else cout << "MFISize: " << MFISize << endl; cout << "MFIAvg: " << MFIDepth / (double) MFISize << endl << endl; #ifdef DEBUG cout << "CountNodes: " << CountNodes << endl; cout << "CountAnds: " << CountAnds << endl; cout << "CountSmallAnds: " << CountSmallAnds << endl; cout << "CountRebuilds: " << CountRebuilds << endl; cout << "CountCounts: " << CountCounts << endl << endl; cout << "CountFHUT: " << CountFHUT << endl; cout << "CountHUTMFI: " << CountHUTMFI << endl; cout << "CountHUTMFISuccess: " << CountHUTMFISuccess << endl; cout << "CountCheckPosition: " << CountCheckPosition << endl; cout << "CountPEPrunes: " << CountPEPrunes << endl << endl; #endif return 0;}/** @} *///////////////////////////////////////////////////////////////////////// @mainpage MAFIA Code Documentation/// \image html MafiaLogo.jpg////// \section ContactSection Contact/// - Manuel Calimlim (calimlim@cs.cornell.edu)/// - Johannes Gehrke (johannes@cs.cornell.edu)////// \section DownloadSection Download/// - Main MAFIA webpage/// http://himalaya-tools.sourceforge.net/Mafia////// \section LinuxCompilationSection Linux Compilation/// -# ./configure/// -# make/// - executable is created as 'src/mafia'////// \section WindowsCompilationSection Windows Compilation/// -# Use cygwin and follow the Linux instructions/// or/// -# Use a Windows Compiler or IDE (such as Visual Studio)/// to compile the source code.////// \section DirectoryStructureSection Directory Structure/// <table border=1 cellspacing=5 cellpadding=5>/// <tr>/// <td>admin/</td>/// <td>Contains config files for compiling. Should/// not be altered.</td>/// </tr>/// <tr>/// <td>src/</td>/// <td>Contains all of source code for MAFIA</td>/// </tr>/// <tr>/// <td>src/Transaction.h/// <br>src/Transaction.cpp</td>/// <td>Class for reading transactions from ASCII datasets</td>/// </tr>/// <tr>/// <td>src/ItemsetOutput.h/// <br>src/ItemsetOutput.cpp</td>/// <td>Class for writing itemsets to file output</td>/// </tr>/// <tr>/// <td>src/BaseBitmap.h/// <br>src/BaseBitmap.cpp</td>/// <td>Simple bitmap class for name bitmaps</td>/// </tr>/// <tr>/// <td>src/Bitmap.h/// <br>src/Bitmap.cpp</td>/// <td>Main bitmap class for transaction bitmaps</td>/// </tr>/// <tr>/// <td>src/Mafia.cpp</td>/// <td>Main class file with most of the MAFIA code</td>/// </tr>/// <tr>/// <td>src/Tables.h</td>/// <td>Stores precomputed lookup tables (not included in/// documentation due to very large tables)</td>/// </tr>/// <tr>/// <td>src/TreeNode.h</td>/// <td>Class for representing nodes in the search tree</td>/// </tr>/// <tr>/// <td>INSTALL</td>/// <td>Generic installation instructions for Linux/Unix</td>/// </tr>/// <tr>/// <td>mafia.{kdevprj,kdevses}</td>/// <td>KDevelop project files for Linux</td>/// </tr>/// <tr>/// <td>README</td>/// <td>Pointer this page</td>/// </tr>/// </table>////// \section UsageSection Program Usage/// <pre>/// Usage: mafia [-mfi/-fci/-fi] [min sup (percent)]/// [-ascii/-binary] [input filename]/// [output filename (optional)]/// Ex: mafia -mfi .5 -ascii connect4.ascii mfi.txt/// Ex: mafia -mfi .3 -binary chess.binary/// </pre>////// \section InputSection File Input/// Datasets can be in ASCII or binary format./// For ASCII files, the file format must be:/// <pre>/// [item_id_1] [item_id_2] ... [item_id_n]/// </pre>////// Items do not have to be sorted within each transaction./// Items are separated by spaces and each transaction/// should end with a newline, e.g./// <pre>/// 1 4 2/// 2 8 9 4/// 2 5/// </pre>////// For binary files, the file format must be:/// <pre>/// [custid][transid][number of items][itemid_1][itemid_2] ... [itemid_n]/// </pre>////// The custid and transid numbers are ignored at this time./// Since the file is in binary format, all numbers are read/// as integers.////// \section DatasetsSection Datasets/// Download <a href="http://sourceforge.net/project/showfiles.php?group_id=72667">Datasets-ascii.tar.gz</a>/// or <a href="http://sourceforge.net/project/showfiles.php?group_id=72667">Datasets-binary.tar.gz</a>/// for the full set of datasets used for testing:/// - chess.{ascii,binary}/// - connect4.{ascii,binary}/// - mushroom.{ascii,binary}/// - pumsb.{ascii,binary}/// - pumsb_star.{ascii,binary}////// \section OutputSection Program Output/// The frequent itemsets outputted by the program are/// in ASCII form with the following format:/// <pre>/// [item_id_1] [item_id_2] ... [item_id_n] [(support)]/// </pre>/// Ex:/// <pre>/// 28 64 42 60 40 29 52 58 (966)/// 46 64 42 3 25 9 5 48 66 56 34 62 7 36 60 40 29 52 58 (962)/// 39 36 40 29 52 58 (960)/// </pre>/////////////////////////////////////////////////////////////////////
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -