📄 eclat_bak.cpp
字号:
#ifdef __GNUC__
#include <unistd.h>
#endif
#include<iostream>
#include <stdio.h>
#include <list>
//headers
#include "eclat.h"
#include "timetrack.h"
#include "calcdb.h"
#include "eqclass.h"
#include "stats.h"
#include "chashtable.h"
//global vars
char infile[300];
Dbase_Ctrl_Blk *DCB;
Stats stats;
double MINSUP_PER;
int MINSUPPORT=-1;
int DBASE_MAXITEM;
int DBASE_NUM_TRANS;
//default flags
bool output = false; //don't print freq itemsets
bool output_idlist = false; //don't print idlist
diff_vals diff_type = diff; //default is diffset mode
diff_vals max_diff_type = nodiff; //deafult is to use tidsets for maxtest
sort_vals sort_type = incr; //default is to sort in increasing order
alg_vals alg_type = eclat; //default is to find all freq patterns
closed_vals closed_type = cnone; //default is not to eliminate non-closed
prune_vals prune_type = noprune; //default no pruning
//extern functions
extern void enumerate_max_freq(Eqclass *eq, int iter, idlist &newmax);
extern void enumerate_max_closed_freq(Eqclass *eq, int iter, idlist &newmax);
extern void enumerate_closed_freq(Eqclass *eq, int iter, idlist &newmax);
extern void enumerate_freq(Eqclass *eq, int iter);
extern void form_closed_f2_lists(Eqclass *eq);
extern void form_f2_lists(Eqclass *eq);
extern cHashTable hashtest; //for closed (with chash) testing
void parse_args(int argc, char **argv)
{
extern char * optarg;
int c;
if (argc < 2){
cout << "usage: eclat\n";
cout << "\t-i<infile>\n";
cout << "\t-s<support>\n";
cout << "\t-S<absolute support> (specify -s or -S)\n";
cout << "\t-a<algtype> (0=eclat, 1=charm, 2=basicmax, 3=maxcharm)\n";
cout << "\t-c<closedtype> (0=cnone, 1=chash, 2=cmax)\n";
cout << "\t-d<difftype> (0=nodiff, 1=diff, 2=diff2, 3=diffin)\n";
cout << "\t-m<maxdifftype> (0=nodiff, 1=diff, 2=diff2, 3=diffin)\n";
cout << "\t-p<prunetype> (0=noprune, 1=prune)\n";
cout << "\t-z<sorttype> (0=nosort, 1=incr, 2=decr, 3=incr_noclass)\n";
cout << "\t-o (print patterns?)\n";
cout << "\t-l (output + output_tidlist?)\n";
cout << "\t-b (binary input file?) (default=false)\n";
exit(-1);
}
else{
while ((c=getopt(argc,argv,"a:bc:d:i:lm:op:s:S:z:"))!=-1){
switch(c){
case 'a':
alg_type = (alg_vals) atoi(optarg);
break;
case 'b':
Dbase_Ctrl_Blk::binary_input = true;
break;
case 'c':
closed_type = (closed_vals) atoi(optarg);
break;
case 'd':
diff_type = (diff_vals) atoi(optarg);
break;
case 'i': //input files
sprintf(infile,"%s",optarg);
break;
case 'l': //print idlists along with freq subtrees
output=true;
output_idlist = true;
break;
case 'm':
max_diff_type = (diff_vals) atoi(optarg);
break;
case 'o': //print freq subtrees
output = true;
break;
case 'p':
prune_type = (prune_vals) atoi(optarg);
break;
case 's': //support value for L2
MINSUP_PER = atof(optarg);
break;
case 'S': //absolute support
MINSUPPORT = atoi(optarg);
break;
case 'z':
sort_type = (sort_vals) atoi(optarg);
break;
}
}
}
//cout << "ENUM VALS " << alg_type << " " << diff_type << " "
// << max_diff_type << " " << closed_type << " " << prune_type << endl;
}
ostream & operator<<(ostream& fout, idlist &vec){
for (unsigned int i=0; i < vec.size(); ++i)
fout << vec[i] << " ";
return fout;
}
void get_F1()
{
TimeTracker tt;
double te;
int i, j, it;
const int arysz = 10;
vector<int> itcnt(arysz,0); //count item frequency
tt.Start();
DBASE_MAXITEM=0;
DBASE_NUM_TRANS = 0;
DCB->Cidsum = 0;
while(DCB->get_next_trans())
{
for (i=0; i < DCB->TransSz; ++i){
it = DCB->TransAry[i];
if (it >= DBASE_MAXITEM){
itcnt.resize(it+1,0);
DBASE_MAXITEM = it+1;
//cout << "IT " << DBASE_MAXITEM << endl;
}
++itcnt[it];
}
if (DCB->MaxTransSz < DCB->TransSz) DCB->MaxTransSz = DCB->TransSz;
++DBASE_NUM_TRANS;
DCB->Cidsum += DCB->Cid; //used to initialize hashval for closed set mining
}
//set the value of MINSUPPORT
if (MINSUPPORT == -1)
MINSUPPORT = (int) (MINSUP_PER*DBASE_NUM_TRANS+0.5);
if (MINSUPPORT<1) MINSUPPORT=1;
//cout<<"DBASE_NUM_TRANS : "<< DBASE_NUM_TRANS << endl;
//cout<<"DBASE_MAXITEM : "<< DBASE_MAXITEM << endl;
//cout<<"MINSUPPORT : "<< MINSUPPORT << " (" << MINSUP_PER << ")" << endl;
//count number of frequent items
DCB->NumF1 = 0;
for (i=0; i < DBASE_MAXITEM; ++i)
if (itcnt[i] >= MINSUPPORT)
++DCB->NumF1;
//construct forward and reverse mapping from items to freq items
DCB->FreqIdx = new int [DCB->NumF1];
DCB->FreqMap = new int [DBASE_MAXITEM];
for (i=0,j=0; i < DBASE_MAXITEM; ++i) {
if (itcnt[i] >= MINSUPPORT) {
if (output && alg_type == eclat)
// cout << i << " - " << itcnt[i] << endl;
DCB->FreqIdx[j] = i;
DCB->FreqMap[i] = j;
++j;
}
else DCB->FreqMap[i] = -1;
}
//cout<< "F1 - " << DCB->NumF1 << " " << DBASE_MAXITEM << endl;
DCB->alloc_ParentClass(itcnt);
te = tt.Stop();
stats.add(DBASE_MAXITEM, DCB->NumF1, te);
}
list<Eqclass *> * get_F2()
{
int i,j;
int it1, it2;
TimeTracker tt;
double te;
tt.Start();
list<Eqclass *> *F2list = new list<Eqclass *>;
//itcnt2 is a matrix of pairs p, p.first is count, p.second is flag
int **itcnt2 = new int*[DCB->NumF1];
//unsigned int **itcnt2 = new unsigned int *[DCB->NumF1];
for (i=0; i < DCB->NumF1; ++i){
itcnt2[i] = new int [DCB->NumF1];
//cout << "alloc " << i << " " << itcnt2[i] << endl;
for (j=0; j < DCB->NumF1; ++j){
itcnt2[i][j] = 0;
}
}
while(DCB->get_next_trans())
{
DCB->get_valid_trans();
DCB->make_vertical();
//DCB->print_trans();
//count a pair only once per cid
for (i=0; i < DCB->TransSz; ++i){
it1 = DCB->TransAry[i];
for (j=i+1; j < DCB->TransSz; ++j){
it2 = DCB->TransAry[j];
++itcnt2[it1][it2];
}
}
}
//compute class size
DCB->class_sz = new int[DCB->NumF1];
DCB->F2sum = new int[DCB->NumF1];
for (i=0; i < DCB->NumF1; ++i){
DCB->class_sz[i] = 0;
DCB->F2sum[i] = 0;
}
for (i=0; i < DCB->NumF1; ++i){
for (j=i+1; j < DCB->NumF1; ++j){
if (itcnt2[i][j] >= MINSUPPORT){
// cout << "TEST " << DCB->FreqIdx[i] << " " << DCB->FreqIdx[j]
// << " - " << itcnt2[i][j] << endl;
++DCB->class_sz[i];
++DCB->class_sz[j];
DCB->F2sum[i] += itcnt2[i][j];
DCB->F2sum[j] += itcnt2[i][j];
}
}
}
DCB->sort_ParentClass();
//cout << "sorted order:";
//for (i=0; i < DCB->NumF1; ++i){
// cout << " " << DCB->FreqIdx[DCB->ParentClass[i]->val];
//}
//cout << endl;
// for (i=0; i < DCB->NumF1; ++i)
// cout << " " << DCB->class_sz[DCB->ParentClass[i]->val];
// cout << endl;
// for (i=0; i < DCB->NumF1; ++i)
// cout << " " << DCB->F2sum[DCB->ParentClass[i]->val];
// cout << endl;
// cout << "CLASS SORT " << i << " "
// << DCB->ParentClass[i]->val << " "
// << DCB->FreqIdx[DCB->ParentClass[i]->val] << " "
// << DCB->ParentClass[i]->sup << endl;
int F2cnt=0;
// count frequent patterns and generate eqclass
Eqclass *eq;
int sup;
for (i=0; i < DCB->NumF1; ++i) {
eq = new Eqclass;
eq->prefix().push_back(i);
//if (alg_type == charm)
// eq->closedsupport() = DCB->ParentClass[i]->support();
it1 = DCB->ParentClass[i]->val;
for (j=i+1; j < DCB->NumF1; ++j) {
//cout << "access " << i << " " << j << endl;
it2 = DCB->ParentClass[j]->val;
if (it1 < it2) sup = itcnt2[it1][it2];
else sup = itcnt2[it2][it1];
if (sup >= MINSUPPORT){
++F2cnt;
eq->add_node(j);
if (output && alg_type == eclat)
// cout << DCB->FreqIdx[it1] << " " << DCB->FreqIdx[it2]
// << " - " << sup << endl;
}
}
F2list->push_back(eq);
}
//remap FreqIdx, FreqMap and ParentClass vals in sorted order
//cout << "FREQMAP :\n";
for (i=0; i < DCB->NumF1; ++i){
DCB->FreqMap[DCB->FreqIdx[DCB->ParentClass[i]->val]] = i;
///cout << i << " " << DCB->ParentClass[i]->val << " "
// << DCB->FreqIdx[DCB->ParentClass[i]->val]
// << " " << DCB->FreqMap[DCB->FreqIdx[DCB->ParentClass[i]->val]]
// << endl;
}
for (i=0; i < DBASE_MAXITEM; ++i)
if (DCB->FreqMap[i] != -1){
DCB->FreqIdx[DCB->FreqMap[i]] = i;
//cout << i << " " << DCB->FreqMap[i]
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -