⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mafia.cpp

📁 关联规则中的频繁项集生成算法Mafia1.4版本
💻 CPP
📖 第 1 页 / 共 3 页
字号:
///    (transaction set item 5) = (transaction set item 5),///    then item 5 is a duplicate of item 4///    due to increasing support/////////////////////////////////////////////////////////////////////void MergeRepeatedItemsets() {    NodeList::iterator bali = F1.begin();    Bitmap out(*(*bali)->Trans);    int blah = 0;    // for each frequent 1-itemset    while (bali != F1.end()) {        out.FillOnes();        NodeList::iterator noli = bali;        noli++;        // search for a copy of the itemset's transaction set        while (noli != F1.end()) {            // stop when count is no longer the same            if ((*bali)->Trans->_count != (*noli)->Trans->_count)                break;            else {                // AND itemsets with the same count                out.AndOnly(*(*bali)->Trans, *(*noli)->Trans, CountAnds);                out.Count(blah);                // check for a duplicate                if (out._count == (*noli)->Trans->_count) {                    (*bali)->Name->Or(*(*bali)->Name, *(*noli)->Name);                    F1.erase(noli);                } else                    noli++;            }        }        bali++;    }}/** @} *//********************************************************************* MAIN*********************************************************************//// @defgroup MainFunction Main Function/// Main function for running the program/** @{ *//////////////////////////////////////////////////////////////////////// main function for the program/////////////////////////////////////////////////////////////////////int main(int argc, char **argv) {    // Check parameters    if (argc < 5) {        cerr << "Usage: " << argv[0] << " [-mfi/-fci/-fi] [min sup (percent)] " << endl;        cerr << "\t[-ascii/-binary] [input filename] " << endl;        cerr << "\t[output filename (optional)]" << endl;        cerr << "Ex: " << argv[0] << " -mfi .5 -ascii connect4.ascii mfi.txt" << endl;        cerr << "Ex: " << argv[0] << " -mfi .3 -binary chess.binary" << endl;        exit(0);    }    // time hook    time(&total_start);    // get the algorithm type    method = argv[1];    // Minimum support as a fraction    MSF = atof(argv[2]);        if (strcmp(argv[3], "-ascii") == 0)        F1FromFile(argv[4], true);    else if (strcmp(argv[3], "-binary") == 0)        F1FromFile(argv[4], false);    else {        cerr << "File format must be -ascii or -binary" << endl;        exit(1);    }    if (argc == 6) {        outputMFI = true;        outFilename = argv[5];    }        // Create a null node to begin the DFS tree    NullTrans = new Bitmap(TransCount);    NullTrans->FillOnes();    NullTrans->_count = TransCount;    ItemSet NullList;    MFIBySizes = new int[MAX_ITEMSET_SIZE];    for (int h = 0; h < MAX_ITEMSET_SIZE; h++) {        MFIBySizes[h] = 0;    }    // Set size of F1    FullF1size = F1size = F1.size();    // if F1 is not empty    if (FullF1size != 0) {        BaseBitmap *NullName = new BaseBitmap(FullF1size);        MFI.reserve(100000);        int p = 0;        ItemMap = new int[FullF1size];        ItemsetBuffy = new int[FullF1size];        // Rename items in F1        for (NodeList::iterator nli = F1.begin(); nli != F1.end(); nli++) {            // store old itemid            ItemMap[p] = (*nli)->Prefix;            // assign new itemid            (*nli)->Prefix = p;            // assign name bitmaps            (*nli)->Name = new BaseBitmap(FullF1size);            (*nli)->Name->FillEmptyPosition(p);            (*nli)->Name->_count = 1;            p++;        }        // don't merge equivalent items for FI output        if (method.compare("-fi") != 0) {           MergeRepeatedItemsets();        }        F1size = F1.size();        // Create global tail        maxtail = F1size * (F1size + 1) / 2;        gTail = new TailElement[maxtail];        // Create buffer for sorting        TailBuffy = new TailElement[F1size];        // Create buffer for estimating size of each subtree        EstimateSize = (int)ceil(F1size / (double)EstimateDiv);        EstimateBuffy = new SubtreeEstimate[EstimateSize];        for (int estimateIndex = 0; estimateIndex < EstimateSize; estimateIndex++) {            EstimateBuffy[estimateIndex].Count = 1;            EstimateBuffy[estimateIndex].Sum = estimateIndex * EstimateDiv * estimateIndex * EstimateDiv / 2;        }        // Initialize global tail        int uu;        for (uu = 0; uu < maxtail; uu++) {            gTail[uu].Item = -1;            gTail[uu].Count = 0;        }        // Fill global tail        for (uu = 0; uu < F1size; uu++) {            gTail[uu].Item = uu;            gTail[uu].Count = F1[uu]->Trans->_count;            // assign tail index            F1[uu]->tBegin = uu + 1;            if (uu == F1size - 1)                F1[uu]->tBegin = -1;            TempName = new BaseBitmap(FullF1size);            // add a buffer element for each item in F1            BaseBitmap *name = new BaseBitmap(FullF1size);            NameBuffy.push_back(name);            Bitmap *buff = new Bitmap(TransCount);            TransBuffy.push_back(buff);            TreeNode *newNode = new TreeNode();            NodeBuffy.push_back(newNode);        }        srand(666);        bool FHUT;        // start algorithm timer        clock_t start, finish;        double duration = -1;        start = clock();        time(&algorithm_start);        // create root node and its associated tail        Root = new TreeNode(NullName, NullTrans, 0, 0, -1, 0, F1size);        //Nothing is in MFI, so nothing is relevant        Root->rBegin = 0;        Root->rEnd = 0;        // run the appropriate algorithm        if (method.compare("-fci") == 0) {            //cout << "running closure (FCI) algorithm..." << endl;            GoFHUT = false;      // FHUT flag            HUTMFI = false;      // HUTMFI flag            PEPrune = true;      // PEPrune flag            Reorder = true;      // Reorder flag            MethodIsFCI = true;            MAFIA(Root, false, FHUT, false);        } else if (method.compare("-mfi") == 0) {            //cout << "running MFI algorithm..." << endl;            GoFHUT = true;      // FHUT flag            HUTMFI = true;      // HUTMFI flag            PEPrune = true;     // PEPrune flag            Reorder = true;     // Reorder flag            MAFIA(Root, true, FHUT, false);        } else if (method.compare("-fi") == 0) {            if (outputMFI) {                // open output file                outFile = new ItemsetOutput(outFilename);                if (!outFile->isOpen()) {                    cerr << "Output file not open!" << endl;                    exit(1);                }            }            //cout << "running FI algorithm..." << endl;            GoFHUT = false;      // FHUT flag            HUTMFI = false;      // HUTMFI flag            PEPrune = false;     // PEPrune flag            Reorder = false;     // Reorder flag            MethodIsFI = true;            MAFIA(Root, false, FHUT, false);            if (outputMFI) {                delete outFile;            }        } else {            cerr << "Invalid algorithm option!" << endl;            exit(0);        }        finish = clock();        duration = (finish - start) / (double)CLOCKS_PER_SEC;        //printf( "Algorithm CPU time: %.3f seconds.\n", duration );        time(&algorithm_finish);        algorithm_time = difftime(algorithm_finish, algorithm_start);        printf("Algorithm time:        %.2f seconds.\n", algorithm_time);    } else {        MFIBySizes[0]++;    }        /*    // Print out MFI length distribution    for (int sizeIndex = 0; sizeIndex <= maxItemsetSize; sizeIndex++) {        cout << sizeIndex << " " << MFIBySizes[sizeIndex] << endl;    }    */        if (outputMFI && !MethodIsFI) {        // PrintMFI to a file        //cout << "Printing out the mfi..." << endl;        time(&print_start);        PrintMFI();        time(&print_finish);        print_time = difftime(print_finish, print_start);        printf("Printing output time:  %.2f seconds.\n", print_time);    }    time(&total_finish);    total_time = difftime(total_finish, total_start);    printf("Total time:            %.2f seconds.\n\n", total_time);    // output stat data    cout << "MinSup:      " << MS << endl;    cout << "TransCount:  " << TransCount << endl;    cout << "F1size:      " << FullF1size << endl;    if (!MethodIsFI)        cout << "MFISize:     " << MFI.size() << endl;    else        cout << "MFISize:     " << MFISize << endl;            cout << "MFIAvg:      " << MFIDepth / (double) MFISize << endl << endl;    #ifdef DEBUG    cout << "CountNodes: " << CountNodes << endl;    cout << "CountAnds: " << CountAnds << endl;    cout << "CountSmallAnds: " << CountSmallAnds << endl;    cout << "CountRebuilds: " << CountRebuilds << endl;    cout << "CountCounts: " << CountCounts << endl << endl;    cout << "CountFHUT:   " << CountFHUT << endl;    cout << "CountHUTMFI: " << CountHUTMFI << endl;    cout << "CountHUTMFISuccess: " << CountHUTMFISuccess << endl;    cout << "CountCheckPosition:   " << CountCheckPosition << endl;    cout << "CountPEPrunes:        " << CountPEPrunes << endl << endl;    #endif    return 0;}/** @} *///////////////////////////////////////////////////////////////////////// @mainpage MAFIA Code Documentation/// \image html MafiaLogo.jpg////// \section ContactSection Contact/// - Manuel Calimlim (calimlim@cs.cornell.edu)/// - Johannes Gehrke (johannes@cs.cornell.edu)////// \section DownloadSection Download/// - Main MAFIA webpage///   http://himalaya-tools.sourceforge.net/Mafia////// \section LinuxCompilationSection Linux Compilation/// -# ./configure/// -# make///     - executable is created as 'src/mafia'////// \section WindowsCompilationSection Windows Compilation/// -# Use cygwin and follow the Linux instructions/// or/// -# Use a Windows Compiler or IDE (such as Visual Studio)/// to compile the source code.////// \section DirectoryStructureSection Directory Structure/// <table border=1 cellspacing=5 cellpadding=5>/// <tr>/// <td>admin/</td>/// <td>Contains config files for compiling.  Should///     not be altered.</td>/// </tr>/// <tr>/// <td>src/</td>/// <td>Contains all of source code for MAFIA</td>/// </tr>/// <tr>/// <td>src/Transaction.h/// <br>src/Transaction.cpp</td>/// <td>Class for reading transactions from ASCII datasets</td>/// </tr>/// <tr>/// <td>src/ItemsetOutput.h/// <br>src/ItemsetOutput.cpp</td>/// <td>Class for writing itemsets to file output</td>/// </tr>/// <tr>/// <td>src/BaseBitmap.h/// <br>src/BaseBitmap.cpp</td>/// <td>Simple bitmap class for name bitmaps</td>/// </tr>/// <tr>/// <td>src/Bitmap.h/// <br>src/Bitmap.cpp</td>/// <td>Main bitmap class for transaction bitmaps</td>/// </tr>/// <tr>/// <td>src/Mafia.cpp</td>/// <td>Main class file with most of the MAFIA code</td>/// </tr>/// <tr>/// <td>src/Tables.h</td>/// <td>Stores precomputed lookup tables (not included in///    documentation due to very large tables)</td>/// </tr>/// <tr>/// <td>src/TreeNode.h</td>/// <td>Class for representing nodes in the search tree</td>/// </tr>/// <tr>/// <td>INSTALL</td>/// <td>Generic installation instructions for Linux/Unix</td>/// </tr>/// <tr>/// <td>mafia.{kdevprj,kdevses}</td>/// <td>KDevelop project files for Linux</td>/// </tr>/// <tr>/// <td>README</td>/// <td>Pointer this page</td>/// </tr>/// </table>////// \section UsageSection Program Usage/// <pre>/// Usage: mafia [-mfi/-fci/-fi] [min sup (percent)]///         [-ascii/-binary] [input filename]///         [output filename (optional)]/// Ex: mafia -mfi .5 -ascii connect4.ascii mfi.txt/// Ex: mafia -mfi .3 -binary chess.binary/// </pre>////// \section InputSection File Input/// Datasets can be in ASCII or binary format./// For ASCII files, the file format must be:/// <pre>/// [item_id_1] [item_id_2] ... [item_id_n]/// </pre>////// Items do not have to be sorted within each transaction./// Items are separated by spaces and each transaction/// should end with a newline, e.g./// <pre>/// 1 4 2/// 2 8 9 4/// 2 5/// </pre>////// For binary files, the file format must be:/// <pre>/// [custid][transid][number of items][itemid_1][itemid_2] ... [itemid_n]/// </pre>////// The custid and transid numbers are ignored at this time./// Since the file is in binary format, all numbers are read/// as integers.////// \section DatasetsSection Datasets/// Download <a href="http://sourceforge.net/project/showfiles.php?group_id=72667">Datasets-ascii.tar.gz</a>/// or <a href="http://sourceforge.net/project/showfiles.php?group_id=72667">Datasets-binary.tar.gz</a>/// for the full set of datasets used for testing:/// - chess.{ascii,binary}/// - connect4.{ascii,binary}/// - mushroom.{ascii,binary}/// - pumsb.{ascii,binary}/// - pumsb_star.{ascii,binary}////// \section OutputSection Program Output/// The frequent itemsets outputted by the program are/// in ASCII form with the following format:/// <pre>/// [item_id_1] [item_id_2] ... [item_id_n] [(support)]/// </pre>/// Ex:/// <pre>/// 28 64 42 60 40 29 52 58 (966)/// 46 64 42 3 25 9 5 48 66 56 34 62 7 36 60 40 29 52 58 (962)/// 39 36 40 29 52 58 (960)/// </pre>/////////////////////////////////////////////////////////////////////

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -