⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 apriori.cpp

📁 Aprioriall的频繁序列发觉源码
💻 CPP
字号:
// Apriori.cpp: implementation of the CApriori class.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"

#include "Apriori.h"
#include "HashTree.h"

#include "itemSet.h"
#include "List.h"

//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////


//---------------------------------------------------------------------------
//    Class Apriori

// Method that finds all large itemsets for the given set of instances.
void CApriori::FindLargeItemSets(List *instances)
{
    List *kMinusOneSets, *KSets;
    itemSet *current;

    int i = 0, j;

    CHashTree *tree;

    m_samples = instances;
    m_sampleNum = (long)m_samples->size();//数据集中项集个数

    necSupport = (long)(m_minSupport*(double)m_sampleNum + 0.5);

    KSets = singletons();
    if (KSets->size() == 0)
        return;

	if(m_samples->size() <= 0)
		return;

    // Find k>=2 large itemsets
    do
    {
        m_Ls->add(KSets);
        kMinusOneSets = KSets;
        
        KSets = selfjoin(kMinusOneSets, i);
		delete kMinusOneSets;

        tree = (CHashTree *)new CHashTree();

        for(j = 0; j < KSets->size(); j++)
        {
            current = (itemSet *)KSets->get(j);
            tree->insert(&(tree->root), (itemSet *)current->clone(), 0);
//            delete current;
        }
        delete KSets;

        j= 0;
        while(j < m_samples->size())
        {
            current = (itemSet *)m_samples->get(j);
            if(current->size() < i+2)
            {
                m_samples->remove(j);
            }
            else
            {
                tree->subset(tree->root, current, 0);
                j++;
            }

//            delete current;
        }
        
        KSets = (List *)new List();
        tree->scan(tree->root, KSets, necSupport);

        delete tree;

        i++;

    } while (KSets->size() > 0);

    return;
}


// find 1 itemset
List * CApriori::singletons()
{
    List *setOfItemSets = (List *)new List();
    itemSet *current, *train;
    long necSupport;
    int i, j;
    int *svector;
    int itemid;


    svector = (int *)new int[pagenum];
    for(i = 0; i < pagenum; i++)
        svector[i] = 0;

    for(i = 0; i < m_samples->size(); i++)
    {
        train = (itemSet *)m_samples->get(i);

        for(j = 0; j < train->size(); j++)
        {
            itemid = train->get(j);

            svector[itemid] += 1;
        }
    }

    necSupport = (long)(m_minSupport*(double)(m_samples->size())+0.5);

    for(i = 0; i < pagenum; i++)
    {
        if((double)svector[i]/(double)(m_samples->size()) >= m_minSupport)
        {
            current = new itemSet();
            current->add(i);
            current->support(svector[i]);
    
            setOfItemSets->add(current);
//            delete current;
        }
    }

    /* delete all the items that are not in 1-large itemset */

    i = 0;
    while (i < m_samples->size())
    {
        train = (itemSet *)m_samples->get(i);

        j = 0;
        while (j < train->size())
        {
            itemid = train->get(j);

            if(svector[itemid] < necSupport)
                train->remove(j);
            else
                j++;
        }

        if(train->size() == 0)
            m_samples->remove(i);
        else
            i++;
    }

    delete svector;

    return setOfItemSets;
}


List * CApriori::selfjoin(List *in, int size)
{
    List *newVector = new List();
    itemSet *first;
    itemSet *second;
    itemSet *result;
    int i, j;
    
    for (i = 0; i < in->size(); i++)
    {
        first = (itemSet *)in->get(i);
        
        for (j = i+1; j < in->size(); j++)
        {
            second = (itemSet *)in->get(j);

            if((result = join(first, second, size)) != NULL)
            {
                if(prune(in, result, size))
                {
                    result->support(0);
                    newVector->add(result);
                }
				else
	                delete result;
            }

//            delete second;
        }

//        delete first;
    }
    
    return(newVector);
}

bool CApriori::prune(List *in, itemSet *attend, int size)
{
    itemSet *temp;
    bool result = true;

    for(int i = 0; i < attend->size(); i++)
    {
        temp = (itemSet *)attend->clone();
        temp->remove(i);
        
        if(in->indexOf(temp) < 0)
        {
            delete temp;
            result = false;
            break;
        }
        
        delete temp;
    }
    
    return(result);
}



itemSet * CApriori::join(itemSet *first, itemSet *attend, int size)
{
    itemSet *result = (itemSet *)NULL;
    int i;

    if( ! check(first, attend, size))
        return(result);
        
    result = (itemSet *)new itemSet();
    for(i = 0; i < size; i++)
        result->add(first->get(i));
    
    if(first->get(i) < attend->get(i))
    {
        result->add(first->get(i));
        result->add(attend->get(i));
    }
    else
    {
        result->add(attend->get(i));
        result->add(first->get(i));
    }
    
    return(result);		
}

bool CApriori::check(itemSet *first, itemSet *attend, int size)
{
    for(int i = 0; i < size; i++)
        if(first->get(i) != attend->get(i))
            return(false);
            
    return(true);
}



⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -