📄 apriori.cpp
字号:
// Apriori.cpp: implementation of the CApriori class.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include "Apriori.h"
#include "HashTree.h"
#include "itemSet.h"
#include "List.h"
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
//---------------------------------------------------------------------------
// Class Apriori
// Method that finds all large itemsets for the given set of instances.
void CApriori::FindLargeItemSets(List *instances)
{
List *kMinusOneSets, *KSets;
itemSet *current;
int i = 0, j;
CHashTree *tree;
m_samples = instances;
m_sampleNum = (long)m_samples->size();//数据集中项集个数
necSupport = (long)(m_minSupport*(double)m_sampleNum + 0.5);
KSets = singletons();
if (KSets->size() == 0)
return;
if(m_samples->size() <= 0)
return;
// Find k>=2 large itemsets
do
{
m_Ls->add(KSets);
kMinusOneSets = KSets;
KSets = selfjoin(kMinusOneSets, i);
delete kMinusOneSets;
tree = (CHashTree *)new CHashTree();
for(j = 0; j < KSets->size(); j++)
{
current = (itemSet *)KSets->get(j);
tree->insert(&(tree->root), (itemSet *)current->clone(), 0);
// delete current;
}
delete KSets;
j= 0;
while(j < m_samples->size())
{
current = (itemSet *)m_samples->get(j);
if(current->size() < i+2)
{
m_samples->remove(j);
}
else
{
tree->subset(tree->root, current, 0);
j++;
}
// delete current;
}
KSets = (List *)new List();
tree->scan(tree->root, KSets, necSupport);
delete tree;
i++;
} while (KSets->size() > 0);
return;
}
// find 1 itemset
List * CApriori::singletons()
{
List *setOfItemSets = (List *)new List();
itemSet *current, *train;
long necSupport;
int i, j;
int *svector;
int itemid;
svector = (int *)new int[pagenum];
for(i = 0; i < pagenum; i++)
svector[i] = 0;
for(i = 0; i < m_samples->size(); i++)
{
train = (itemSet *)m_samples->get(i);
for(j = 0; j < train->size(); j++)
{
itemid = train->get(j);
svector[itemid] += 1;
}
}
necSupport = (long)(m_minSupport*(double)(m_samples->size())+0.5);
for(i = 0; i < pagenum; i++)
{
if((double)svector[i]/(double)(m_samples->size()) >= m_minSupport)
{
current = new itemSet();
current->add(i);
current->support(svector[i]);
setOfItemSets->add(current);
// delete current;
}
}
/* delete all the items that are not in 1-large itemset */
i = 0;
while (i < m_samples->size())
{
train = (itemSet *)m_samples->get(i);
j = 0;
while (j < train->size())
{
itemid = train->get(j);
if(svector[itemid] < necSupport)
train->remove(j);
else
j++;
}
if(train->size() == 0)
m_samples->remove(i);
else
i++;
}
delete svector;
return setOfItemSets;
}
List * CApriori::selfjoin(List *in, int size)
{
List *newVector = new List();
itemSet *first;
itemSet *second;
itemSet *result;
int i, j;
for (i = 0; i < in->size(); i++)
{
first = (itemSet *)in->get(i);
for (j = i+1; j < in->size(); j++)
{
second = (itemSet *)in->get(j);
if((result = join(first, second, size)) != NULL)
{
if(prune(in, result, size))
{
result->support(0);
newVector->add(result);
}
else
delete result;
}
// delete second;
}
// delete first;
}
return(newVector);
}
bool CApriori::prune(List *in, itemSet *attend, int size)
{
itemSet *temp;
bool result = true;
for(int i = 0; i < attend->size(); i++)
{
temp = (itemSet *)attend->clone();
temp->remove(i);
if(in->indexOf(temp) < 0)
{
delete temp;
result = false;
break;
}
delete temp;
}
return(result);
}
itemSet * CApriori::join(itemSet *first, itemSet *attend, int size)
{
itemSet *result = (itemSet *)NULL;
int i;
if( ! check(first, attend, size))
return(result);
result = (itemSet *)new itemSet();
for(i = 0; i < size; i++)
result->add(first->get(i));
if(first->get(i) < attend->get(i))
{
result->add(first->get(i));
result->add(attend->get(i));
}
else
{
result->add(attend->get(i));
result->add(first->get(i));
}
return(result);
}
bool CApriori::check(itemSet *first, itemSet *attend, int size)
{
for(int i = 0; i < size; i++)
if(first->get(i) != attend->get(i))
return(false);
return(true);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -