📄 apriori.cpp
字号:
//---------------------------------------------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "apriori.h"
#include "hash.h"
#include "util.h"
extern int pagenum;
//---------------------------------------------------------------------------
// Class Apriori
// Method that finds all large itemsets for the given set of instances.
void Apriori::FindLargeItemSets(ItemSets *instances)
{
ItemSets *kMinusOneSets, *KSets;
ItemSet *current;
int i = 0, j, k;
HashTree *tree;
m_samples = instances;
m_sampleNum = (long)m_samples->count;
necSupport = (long)(m_minSupport*(double)m_sampleNum + 0.5);
KSets = singletons();
if (KSets->count == 0)
return;
printf("\n 1-Large ItemSet has %d \n", KSets->count);
printf("\n\n %d samples remained \n\n", m_samples->count);
// Find k>=2 large itemsets
do
{
m_Ls->cat(KSets);
kMinusOneSets = KSets;
KSets = selfjoin(kMinusOneSets, i);
printf("\n %d : %d Large Item Sets before pruning\n", i+2, KSets->count);
tree = new HashTree();
for(j = 0; j < KSets->count; j++)
{
current = KSets->itemsetof(j);
tree->insert(&(tree->root), current, 0);
delete current;
}
delete KSets;
j= 0;
while(j < m_samples->count)
{
current = m_samples->itemsetof(j);
if(current->count < i+2)
{
m_samples->remove(j);
}
else
{
tree->subset(tree->root, current, 0);
j++;
}
delete current;
}
KSets = new ItemSets();
tree->scan(tree->root, KSets, necSupport);
printf(" %d Large Itemsets after pruning !\n", KSets->count);
printf("\n\n %d samples remained \n\n", m_samples->count);
delete tree;
i++;
} while (KSets->count > 0);
return;
}
// find 1 itemset
ItemSets * Apriori::singletons()
{
ItemSets *setOfItemSets = new ItemSets();
ItemSet *current, *train;
long necSupport;
int i, j;
long *svector;
int itemid;
svector = new long[pagenum];
for(i = 0; i < pagenum; i++)
svector[i] = 0;
for(i = 0; i < m_samples->count; i++)
{
train = m_samples->pointof(i);
for(j = 0; j < train->count; j++)
{
itemid = train->itemof(j);
svector[itemid] += 1;
}
}
necSupport = (long)(m_minSupport*(double)(m_samples->count) + 0.5);
for(i = 0; i < pagenum; i++)
{
if(svector[i] >= necSupport)
{
current = new ItemSet();
current->add(i);
current->support = svector[i];
setOfItemSets->add(current);
delete current;
}
}
/* delete all the items that are not in 1-lerge itemset */
i = 0;
while (i < m_samples->count)
{
train = m_samples->pointof(i);
j = 0;
while (j < train->count)
{
itemid = train->itemof(j);
if(svector[itemid] < necSupport)
train->remove(j);
else
j++;
}
if(train->count == 0)
m_samples->remove(i);
else
i++;
}
delete svector;
return setOfItemSets;
}
ItemSets * Apriori::selfjoin(ItemSets *in, int size)
{
ItemSets *newVector = new ItemSets();
ItemSet *first;
ItemSet *second;
ItemSet *result;
int i, j;
for (i = 0; i < in->count; i++)
{
first = in->itemsetof(i);
for (j = i+1; j < in->count; j++)
{
second = in->itemsetof(j);
if((result = join(first, second, size)) != NULL)
{
if(prune(in, result, size))
{
result->support = 0;
newVector->add(result);
}
delete result;
}
delete second;
}
delete first;
}
return(newVector);
}
bool Apriori::prune(ItemSets *in, ItemSet *attend, int size)
{
ItemSet *temp;
bool result = true;
for(int i = 0; i < attend->count; i++)
{
temp = attend->clone();
temp->remove(i);
if(in->indexof(temp) < 0)
{
delete temp;
result = true;
break;
}
delete temp;
}
return(result);
}
ItemSet * Apriori::join(ItemSet *first, ItemSet *attend, int size)
{
ItemSet *result = (ItemSet *)NULL;
int i;
if( ! check(first, attend, size))
return(result);
result = (ItemSet *)new ItemSet();
for(i = 0; i < size; i++)
result->add(first->itemof(i));
if(first->itemof(i) < attend->itemof(i))
{
result->add(first->itemof(i));
result->add(attend->itemof(i));
}
else
{
result->add(attend->itemof(i));
result->add(first->itemof(i));
}
return(result);
}
bool Apriori::check(ItemSet *first, ItemSet *attend, int size)
{
for(int i = 0; i < size; i++)
if(first->itemof(i) != attend->itemof(i))
return(false);
return(true);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -