⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 eclat_bak.cpp

📁 clique code with sample data set. clique is a data clustering algorithm which follows hierarchical c
💻 CPP
📖 第 1 页 / 共 2 页
字号:
#ifdef __GNUC__
#include <unistd.h>
#endif

#include<iostream>
#include <stdio.h>
#include <list>

//headers
#include "eclat.h"
#include "timetrack.h"
#include "calcdb.h"
#include "eqclass.h"
#include "stats.h"
#include "chashtable.h"

//global vars
char infile[300];
Dbase_Ctrl_Blk *DCB;
Stats stats;

double MINSUP_PER;
int MINSUPPORT=-1;
int DBASE_MAXITEM;
int DBASE_NUM_TRANS;

//default flags
bool output = false; //don't print freq itemsets
bool output_idlist = false; //don't print idlist

diff_vals diff_type = diff; //default is diffset mode
diff_vals max_diff_type = nodiff; //deafult is to use tidsets for maxtest
sort_vals sort_type = incr; //default is to sort in increasing order
alg_vals alg_type = eclat; //default is to find all freq patterns
closed_vals closed_type = cnone; //default is not to eliminate non-closed
prune_vals prune_type = noprune; //default no pruning

//extern functions
extern void enumerate_max_freq(Eqclass *eq, int iter, idlist &newmax);
extern void enumerate_max_closed_freq(Eqclass *eq, int iter, idlist &newmax);
extern void enumerate_closed_freq(Eqclass *eq, int iter, idlist &newmax);
extern void enumerate_freq(Eqclass *eq, int iter);
extern void form_closed_f2_lists(Eqclass *eq);
extern void form_f2_lists(Eqclass *eq);
extern cHashTable hashtest; //for closed (with chash) testing

void parse_args(int argc, char **argv)
{
   extern char * optarg;
   int c;

   if (argc < 2){
      cout << "usage: eclat\n";
      cout << "\t-i<infile>\n";
      cout << "\t-s<support>\n";
      cout << "\t-S<absolute support> (specify -s or -S)\n";
      cout << "\t-a<algtype> (0=eclat, 1=charm, 2=basicmax, 3=maxcharm)\n";
      cout << "\t-c<closedtype> (0=cnone, 1=chash, 2=cmax)\n";
      cout << "\t-d<difftype> (0=nodiff, 1=diff, 2=diff2, 3=diffin)\n";
      cout << "\t-m<maxdifftype> (0=nodiff, 1=diff, 2=diff2, 3=diffin)\n";
      cout << "\t-p<prunetype> (0=noprune, 1=prune)\n";
      cout << "\t-z<sorttype> (0=nosort, 1=incr, 2=decr, 3=incr_noclass)\n";
      cout << "\t-o (print patterns?)\n";
      cout << "\t-l (output + output_tidlist?)\n";
      cout << "\t-b (binary input file?) (default=false)\n";
      exit(-1);
   }
   else{
     while ((c=getopt(argc,argv,"a:bc:d:i:lm:op:s:S:z:"))!=-1){
       switch(c){
       case 'a':
          alg_type = (alg_vals) atoi(optarg);
          break;
       case 'b':
          Dbase_Ctrl_Blk::binary_input = true;
          break;
       case 'c':
          closed_type = (closed_vals) atoi(optarg);
          break;
       case 'd':
          diff_type = (diff_vals) atoi(optarg);
          break;
       case 'i': //input files
	 sprintf(infile,"%s",optarg);
	 break;
       case 'l': //print idlists along with freq subtrees
          output=true;
          output_idlist = true;
          break;
       case 'm':
          max_diff_type = (diff_vals) atoi(optarg);
          break;
       case 'o': //print freq subtrees
	 output = true;
	 break;
       case 'p':
          prune_type = (prune_vals) atoi(optarg);
          break;
        case 's': //support value for L2
	 MINSUP_PER = atof(optarg);
	 break;
       case 'S': //absolute support
	 MINSUPPORT = atoi(optarg);
	 break;
       case 'z':
          sort_type = (sort_vals) atoi(optarg);
          break;
       }               
     }
   }
   //cout << "ENUM VALS " << alg_type << " " << diff_type << " " 
   //     << max_diff_type << " " << closed_type << " " << prune_type << endl;
}


ostream & operator<<(ostream& fout, idlist &vec){
  for (unsigned int i=0; i < vec.size(); ++i)
     fout << vec[i] << " ";
  return fout;
}


void get_F1()
{
  TimeTracker tt;
  double te;

  int i, j, it;
  const int arysz = 10;
  
  vector<int> itcnt(arysz,0); //count item frequency

  tt.Start();

  DBASE_MAXITEM=0;
  DBASE_NUM_TRANS = 0;
  
  DCB->Cidsum = 0;
  
   while(DCB->get_next_trans())
   {
      for (i=0; i < DCB->TransSz; ++i){
         it = DCB->TransAry[i];

         if (it >= DBASE_MAXITEM){
            itcnt.resize(it+1,0);
            DBASE_MAXITEM = it+1;
            //cout << "IT " << DBASE_MAXITEM << endl;
         }
         ++itcnt[it];
      }
      
      if (DCB->MaxTransSz < DCB->TransSz) DCB->MaxTransSz = DCB->TransSz;     
      ++DBASE_NUM_TRANS;
      DCB->Cidsum += DCB->Cid; //used to initialize hashval for closed set mining
   }
   
   //set the value of MINSUPPORT
   if (MINSUPPORT == -1)
     MINSUPPORT = (int) (MINSUP_PER*DBASE_NUM_TRANS+0.5);
   
   if (MINSUPPORT<1) MINSUPPORT=1;
   //cout<<"DBASE_NUM_TRANS : "<< DBASE_NUM_TRANS << endl;
   //cout<<"DBASE_MAXITEM : "<< DBASE_MAXITEM << endl;
   //cout<<"MINSUPPORT : "<< MINSUPPORT << " (" << MINSUP_PER << ")" << endl;

   //count number of frequent items
   DCB->NumF1 = 0;
   for (i=0; i < DBASE_MAXITEM; ++i)
     if (itcnt[i] >= MINSUPPORT)
       ++DCB->NumF1;

   //construct forward and reverse mapping from items to freq items
   DCB->FreqIdx = new int [DCB->NumF1];
   DCB->FreqMap = new int [DBASE_MAXITEM];
   for (i=0,j=0; i < DBASE_MAXITEM; ++i) {
      if (itcnt[i] >= MINSUPPORT) {
         if (output && alg_type == eclat) 
	   // cout << i << " - " << itcnt[i] << endl;
         DCB->FreqIdx[j] = i;
         DCB->FreqMap[i] = j;
         ++j;
      }
      else DCB->FreqMap[i] = -1;
   }
   
   //cout<< "F1 - " << DCB->NumF1 << " " << DBASE_MAXITEM << endl;  

   DCB->alloc_ParentClass(itcnt);
   
   te = tt.Stop();
   stats.add(DBASE_MAXITEM, DCB->NumF1, te);

}

list<Eqclass *> * get_F2()
{
  int i,j;
  int it1, it2;
  
  TimeTracker tt;
  double te;

  tt.Start();

  list<Eqclass *> *F2list = new list<Eqclass *>;

  //itcnt2 is a matrix of pairs p, p.first is count, p.second is flag
  int **itcnt2 = new int*[DCB->NumF1];

  //unsigned int **itcnt2 = new unsigned int *[DCB->NumF1];
  for (i=0; i < DCB->NumF1; ++i){
    itcnt2[i] = new int [DCB->NumF1];
    //cout << "alloc " << i << " " << itcnt2[i] << endl;
    for (j=0; j < DCB->NumF1; ++j){
      itcnt2[i][j] = 0;
    }
  }
    
   while(DCB->get_next_trans())
   {
      DCB->get_valid_trans();
      DCB->make_vertical();
      //DCB->print_trans();
      //count a pair only once per cid
      for (i=0; i < DCB->TransSz; ++i){
         it1 = DCB->TransAry[i];
         for (j=i+1; j < DCB->TransSz; ++j){
            it2 = DCB->TransAry[j];
            ++itcnt2[it1][it2];
         }
      }
   }                           

   //compute class size
   DCB->class_sz = new int[DCB->NumF1];
   DCB->F2sum = new int[DCB->NumF1];
   for (i=0; i < DCB->NumF1; ++i){
      DCB->class_sz[i] = 0;
      DCB->F2sum[i] = 0;
   }
   
   for (i=0; i < DCB->NumF1; ++i){
      for (j=i+1; j < DCB->NumF1; ++j){
         if (itcnt2[i][j] >= MINSUPPORT){
//              cout << "TEST " << DCB->FreqIdx[i] << " " << DCB->FreqIdx[j] 
//                   << " - " << itcnt2[i][j] << endl;
            ++DCB->class_sz[i];
            ++DCB->class_sz[j];
            DCB->F2sum[i] += itcnt2[i][j];
            DCB->F2sum[j] += itcnt2[i][j];
         }
      }
   }
   
   DCB->sort_ParentClass();
   
   //cout << "sorted order:";
   //for (i=0; i < DCB->NumF1; ++i){
   //   cout << " " << DCB->FreqIdx[DCB->ParentClass[i]->val];
   //}
   //cout << endl;
   //     for (i=0; i < DCB->NumF1; ++i)
   //        cout << " " << DCB->class_sz[DCB->ParentClass[i]->val];
   //     cout << endl;
   //     for (i=0; i < DCB->NumF1; ++i)
   //        cout << " " << DCB->F2sum[DCB->ParentClass[i]->val];
   //     cout << endl;
   
   //        cout << "CLASS SORT " << i << " " 
   //             << DCB->ParentClass[i]->val << " " 
   //             << DCB->FreqIdx[DCB->ParentClass[i]->val] << " "
   //             << DCB->ParentClass[i]->sup << endl;
   
   int F2cnt=0;

   // count frequent patterns and generate eqclass
   Eqclass *eq;
   int sup;
   for (i=0; i < DCB->NumF1; ++i) {
      eq = new Eqclass;
      eq->prefix().push_back(i);
      //if (alg_type == charm) 
      //   eq->closedsupport() = DCB->ParentClass[i]->support();
      it1 = DCB->ParentClass[i]->val;
      for (j=i+1; j < DCB->NumF1; ++j) {
	//cout << "access " << i << " " << j << endl;
         it2 = DCB->ParentClass[j]->val;
         if (it1 < it2) sup = itcnt2[it1][it2];
         else sup = itcnt2[it2][it1];
         if (sup >= MINSUPPORT){
            ++F2cnt;
	    eq->add_node(j);
	    if (output && alg_type == eclat) 
	      // cout << DCB->FreqIdx[it1] << " " << DCB->FreqIdx[it2] 
              //      << " - " << sup << endl;
         }
      }   
      F2list->push_back(eq);
   }
   
   //remap FreqIdx, FreqMap and ParentClass vals in sorted order
   //cout << "FREQMAP :\n";
   for (i=0; i < DCB->NumF1; ++i){
      DCB->FreqMap[DCB->FreqIdx[DCB->ParentClass[i]->val]] = i;
      ///cout << i << " " << DCB->ParentClass[i]->val << " " 
      //     << DCB->FreqIdx[DCB->ParentClass[i]->val] 
      //     << " " << DCB->FreqMap[DCB->FreqIdx[DCB->ParentClass[i]->val]] 
      //     << endl;
   }

   for (i=0; i < DBASE_MAXITEM; ++i)
      if (DCB->FreqMap[i] != -1){
         DCB->FreqIdx[DCB->FreqMap[i]] = i;
         //cout << i << " " << DCB->FreqMap[i] 

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -