📄 eclat_bak.cpp
字号:
// << " " << DCB->FreqIdx[DCB->FreqMap[i]] << endl;
}
//cout << "ORDER :";
for (i=0; i < DCB->NumF1; ++i){
DCB->ParentClass[i]->val = i;
//cout << " " << DCB->FreqIdx[i];
}
//cout << endl;
for (i=0; i < DCB->NumF1; ++i) {
delete [] itcnt2[i];
}
delete [] itcnt2;
delete [] DCB->class_sz;
delete [] DCB->F2sum;
//cout << "F2 - " << F2cnt << " " << DCB->NumF1 * DCB->NumF1 << endl;
te = tt.Stop();
stats.add(DCB->NumF1 * DCB->NumF1, F2cnt, te);
return F2list;
}
//performs l1 intersect l2
subset_vals get_intersect(idlist *l1, idlist *l2, idlist *join, int &idsum,
int minsup=0)
{
int diffmax1, diffmax2;
diffmax1 = l1->size() - minsup;
diffmax2 = l2->size() - minsup;
int diffcnt1 = 0, diffcnt2 = 0;
int n1, n2;
unsigned int i1 = 0, i2 = 0;
idsum = 0;
while (i1 < l1->size() && i2 < l2->size() &&
diffcnt1 <= diffmax1 && diffcnt2 <= diffmax2){
n1 = (*l1)[i1];
n2 = (*l2)[i2];
//look for matching cids
if (n1 < n2){
++i1;
++diffcnt1;
}
else if (n1 > n2){
++i2;
++diffcnt2;
}
else{
join->push_back(n1);
idsum += n1;
++i1;
++i2;
}
}
if (i1 < l1->size()) ++diffcnt1;
if (i2 < l2->size()) ++diffcnt2;
if (diffcnt1 == 0 && diffcnt2 == 0) return equals;
else if (diffcnt1 == 0 && diffcnt2 > 0) return subset;
else if (diffcnt1 > 0 && diffcnt2 == 0) return superset;
else return notequal;
}
//performs l1 - l2
subset_vals get_diff (idlist *l1, idlist *l2, idlist *join,
int &idsum, int diffmax=INT_MAX)
{
// insert_iterator<idlist> differ(*join,join->begin());
// set_difference(l1->begin(), l1->end(),
// l2->begin(), l2->end(),
// differ);
int n1, n2;
int diffcnt1 = 0, diffcnt2 = 0;
unsigned int i1 = 0, i2 = 0;
idsum = 0;
while (i1 < l1->size() && i2 < l2->size() && diffcnt1 <= diffmax){
n1 = (*l1)[i1];
n2 = (*l2)[i2];
if (n1 < n2){
//implies that n1 is not to be found in n2
join->push_back(n1);
++diffcnt1;
idsum += n1;
++i1;
}
else if (n1 > n2){
++i2;
++diffcnt2;
}
else{
++i1;
++i2;
}
}
//add any remaining elements in l1 to join
while (i1 < l1->size()){
join->push_back((*l1)[i1]);
idsum += (*l1)[i1];
++i1;
++diffcnt1;
}
if (i2 < l2->size()) ++diffcnt2;
if (diffcnt1 == 0 && diffcnt2 == 0) return equals;
else if (diffcnt1 == 0 && diffcnt2 > 0) return superset;
else if (diffcnt1 > 0 && diffcnt2 == 0) return subset;
else return notequal;
}
subset_vals get_join(Eqnode *l1, Eqnode *l2, Eqnode *join, int iter)
{
int diffmax = l1->support()-MINSUPPORT;
int idsum;
subset_vals sval = notequal;
//compute tidset or diffset for join of l1 nd l2
switch (diff_type){
case diff2:
if (iter == 2) sval = get_diff(&l1->tidset, &l2->tidset,
&join->tidset, idsum, diffmax);
else sval = get_diff(&l2->tidset, &l1->tidset, &join->tidset,
idsum, diffmax);
if (sval == subset) sval = superset;
else if (sval == superset) sval = subset;
join->support() = l1->support() - join->tidset.size();
join->hashval() = l1->hashval() - idsum;
break;
case nodiff:
sval = get_intersect(&l1->tidset, &l2->tidset,
&join->tidset, idsum, MINSUPPORT);
join->support() = join->tidset.size();
join->hashval() = idsum;
break;
case diffin:
sval = get_diff(&l2->tidset, &l1->tidset, &join->tidset, idsum, diffmax);
if (sval == subset) sval = superset;
else if (sval == superset) sval = subset;
join->support() = l1->support() - join->tidset.size();
join->hashval() = l1->hashval() - idsum;
break;
case diff:
if (iter == 2){
sval = get_intersect(&l1->tidset, &l2->tidset,
&join->tidset, idsum, MINSUPPORT);
join->support() = join->tidset.size();
join->hashval() = idsum;
}
else{
if (iter == 3)
sval = get_diff(&l1->tidset, &l2->tidset, &join->tidset,
idsum, diffmax);
else
sval = get_diff(&l2->tidset, &l1->tidset, &join->tidset,
idsum, diffmax);
if (sval == subset) sval = superset;
else if (sval == superset) sval = subset;
join->support() = l1->support() - join->tidset.size();
join->hashval() = l1->hashval() - idsum;
}
break;
}
++Stats::numjoin;
return sval;
}
void get_max_join(Eqnode *l1, Eqnode *l2, Eqnode *join, int iter)
{
int idsum;
//find local maximal context for join
//i.e., which maximal sets contain join as a subset
switch(max_diff_type){
case nodiff:
get_intersect(&l1->maxset, &l2->maxset, &join->maxset, idsum);
join->maxsupport() = join->maxset.size();
break;
case diff2:
if (iter == 2) get_diff(&l1->maxset, &l2->maxset, &join->maxset, idsum);
else get_diff(&l2->maxset, &l1->maxset, &join->maxset, idsum);
join->maxsupport() = l1->maxsupport() - join->maxset.size();
break;
case diffin:
get_diff(&l2->maxset, &l1->maxset, &join->maxset, idsum);
join->maxsupport() = l1->maxsupport() - join->maxset.size();
break;
case diff:
cout << "diff NOT HANDLED\n";
exit(-1);
break;
}
}
void get_Fk(list<Eqclass *> &F2list){
Eqclass *eq;
idlist newmax;
int iter = 2;
while(!F2list.empty()){
eq = F2list.front();
//cout << "OUTSIDE " << *eq << "\\\\\\\\\\\\\n";
switch(alg_type){
case eclat:
form_f2_lists(eq);
enumerate_freq(eq, iter+1);
break;
case charm:
form_closed_f2_lists(eq);
newmax.clear();
enumerate_closed_freq(eq, iter+1, newmax);
break;
case basicmax:
form_f2_lists(eq);
newmax.clear();
enumerate_max_freq(eq, iter+1, newmax);
break;
case maxcharm:
form_closed_f2_lists(eq);
newmax.clear();
enumerate_max_closed_freq(eq, iter+1, newmax);
break;
}
delete eq;
F2list.pop_front();
}
}
int main(int argc, char **argv)
{
TimeTracker tt;
tt.Start();
parse_args(argc, argv);
ofstream summary("summary.out", ios::app);
switch(alg_type){
case basicmax: summary << "MAX "; break;
case charm:
summary << "CHARM ";
switch(closed_type){
case cnone: break;
case chash: summary << "CHASH "; break;
case cmax: summary << "CMAX "; break;
}
break;
case maxcharm: summary << "MAXCHARM "; break;
case eclat: summary << "ECLAT "; break;
}
switch(diff_type){
case nodiff: summary << "NODIFF "; break;
case diff: summary << "DIFF "; break;
case diff2: summary << "DIFF2 "; break;
case diffin: summary << "DIFFIN "; break;
}
switch(max_diff_type){
case nodiff: summary << "MNODIFF "; break;
case diff: summary << "MDIFF "; break;
case diff2: summary << "MDIFF2 "; break;
case diffin: summary << "MDIFFIN "; break;
}
switch(sort_type){
case nosort: summary << "NOSORT "; break;
case incr: summary << "INCR "; break;
case incr_noclass: summary << "INCRNOC "; break;
case decr: summary << "DECR "; break;
}
switch(prune_type){
case prune: summary << "PRUNE "; break;
case noprune: break;
}
DCB = new Dbase_Ctrl_Blk(infile);
get_F1();
list<Eqclass *> *F2list = get_F2();
//DCB->print_vertical();
get_Fk(*F2list);
for (unsigned int i=0; i < stats.size(); ++i){
//cout << "F" << i+1 << " - ";
//cout << stats[i].numlarge << " " << stats[i].numcand
// << " " << stats[i].nummax << endl;
}
double tottime = tt.Stop();
stats.tottime = tottime;
summary << infile << " " << MINSUP_PER << " "
<< DBASE_NUM_TRANS << " " << MINSUPPORT << " ";
summary << stats << endl;
summary.close();
//cout << "TIME = " << tottime << endl;
//cout << "NUMMAX = " << stats.summax << endl;
if (closed_type == chash) hashtest.print_hashstats();
exit(0);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -