⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dci.cc

📁 this will produce the dci program in the src/ directory, or in /path/to/your/installation/bin if y
💻 CC
📖 第 1 页 / 共 3 页
字号:
  if (!DCI_dataset.VD_is_allocated()) {    // trunc and set global pruning mask only if DCP continues    oo.trunc();    counters.set_num_of_trans(oo.get_num_of_trans());    counters.init_global_pruning();  }   T1 count;  int  num_freq=0, k=0;  set_of_frequents<T,T1> *set_freq;  set_freq = new set_of_frequents<T,T1>(2);  T t_mapped[2];  if (write_output) {     // dump to file frequent 2-itemsets     FSout o(OUTF, 2); // Iter=2    if(!o.isOpen()) {      cerr << OUTF << " could not be opened for writing!" << endl;      exit(1);    }    static const int SZ_NUM = 128;    char cand_unmapped[SZ_NUM * (2+1)]; // +1 for storing count    int sz1, sz2, num_written;    // loop over the first item    for (int i=0; i < m1-1; i++) {      t_mapped[0] = i;      memcpy(&cand_unmapped[0], counters.unmap_ascii[i], 	     counters.unmap_ascii_len[i]);      sz1 = counters.unmap_ascii_len[i];    // loop over the second item      for (int j=i+1; j < m1; j++) {	t_mapped[1] = j;      	memcpy(&cand_unmapped[sz1], counters.unmap_ascii[j], 	       counters.unmap_ascii_len[j]);	sz2 = sz1 + counters.unmap_ascii_len[j];	count = c.get_cand_count(k++);	if (count >= (T1) counters.min_count){ 	  // yes, itemset (i,j) is frequent	  num_freq++;	  if (!DCI_dataset.VD_is_allocated()) { 	    // prune only if DCP continues	    counters.incr_global_pruning((unsigned int) i);		    counters.incr_global_pruning((unsigned int) j);	  }	  set_freq->add_itemset(t_mapped, count);		  num_written = sprintf(&cand_unmapped[sz2], "(%d)\n", count);	  o.printSet(cand_unmapped, sz2+num_written);	}      }    }  }  else {     // DON't dump to file frequent 2-itemsets     // loop over the first item    for (int i=0; i < m1-1; i++) {      t_mapped[0] = i;      // loop over the second item      for (int j=i+1; j < m1; j++) {	t_mapped[1] = j;	count = c.get_cand_count(k++);	if (count >= (T1) counters.min_count){ 	  // yes, itemset (i,j) is frequent	  num_freq++;	  if (!DCI_dataset.VD_is_allocated()) { 	    // prune only if DCP continues	    counters.incr_global_pruning((unsigned int) i);		    counters.incr_global_pruning((unsigned int) j);	  }	  set_freq->add_itemset(t_mapped, count);		}      }    }  }    if (!DCI_dataset.VD_is_allocated())   // prune only if DCP continues    counters.end_global_pruning(2);     // parameter: K=2#ifdef VERBOSE  print_statistics("DCP", 2, m1*(m1-1)/2, num_freq, time.ReadChronos());#else  printf("%d\n",num_freq);#endif    return set_freq;}// performs the current iteration with DCP. // If possible it builds VD on the flytemplate <class T, class T1>void DCP_iter(int iter, 	      int max_trans_len, dci_items& counters, 	      DCP_candidates<T,T1>& c, set_of_frequents<T,T1>& next_freq,	      DCI_vertical_dataset<T>& DCI_dataset){  Chronos time;  time.StartChronos();  next_freq.reset(iter);  binFSin<T> ii(TEMPORARY_DATASET);  if(!ii.isOpen()) {    cerr << TEMPORARY_DATASET << " could not be opened!" << endl;    exit(1);  }    bool create=false;  binFSout<T> oo(TEMPORARY_DATASET, create);  if(!oo.isOpen()) {    cerr << TEMPORARY_DATASET << " could not be opened!" << endl;    exit(1);  }  int m1 = counters.get_m1();  dci_transaction<T> t(max_trans_len, iter, m1);  bool is_remapped=false;  if (DCI_dataset.VD_is_allocated()) {    is_remapped = counters.update_map();   // new mapping of items!!!  }  unsigned int n_tr=0;    while(ii.getNextTransaction(t)) {    t.prune_global(counters); // t.t_len = 0 or at least iter    if (t.t_len > 0) {      t.init_prune_local();      c.subset_and_count_and_prune_local(t);      t.prune_local(); // t.t_len = 0 or at least iter+1      if (DCI_dataset.VD_is_allocated()) { // write the trans in VD on the fly	if (is_remapped) {	  for (unsigned int h=0; h<t.t_len; h++)	    t.elements[h] = counters.map[t.elements[h]];	}	DCI_dataset.write_t_in_VD(n_tr, t);	n_tr++;      }      else	oo.writeTransaction(t); // write pruned trans    }  }    if (!DCI_dataset.VD_is_allocated()) {// trunc and prune only if DCP continues    oo.trunc();    counters.set_num_of_trans(oo.get_num_of_trans());    counters.init_global_pruning();  }   // Dump frequents  T1 count;  int  num_freq=0;  T *v, *v_remapped;  v = new T[iter];  v_remapped = new T[iter];  int ind;  T1 tmp_c;  c.init_read_itemsets();  while ((ind=c.read_next_itemset(v, tmp_c)) != -1) {    if ((count = c.get_count(ind)) >= (T1) counters.min_count) {      num_freq++;      if (!DCI_dataset.VD_is_allocated()) { // prune only if DCP continues	for (int i=0; i<iter; i++) {	  counters.incr_global_pruning((unsigned int) v[i]);	}	next_freq.add_itemset(v, count);	      }      else {	if (is_remapped) {	  for (int i=0; i<iter; i++) 	    v_remapped[i] = (T) counters.map[v[i]]; // re-map items	  next_freq.add_itemset(v_remapped, count);	}	else	  next_freq.add_itemset(v, count);      }    }  }    delete [] v;  delete [] v_remapped;  if (DCI_dataset.VD_is_allocated()  && is_remapped)    counters.update_unmap(write_output);   // new mapping of items!!!  if (write_output) { // dump to file frequent itemsets     FSout o(OUTF, iter);    if(!o.isOpen()) {      cerr << OUTF << " could not be opened for writing!" << endl;      exit(1);    }    next_freq.dump_itemsets(counters, o);  }  if (!DCI_dataset.VD_is_allocated()) // prune only if DCP continues    counters.end_global_pruning(iter); #ifdef VERBOSE  print_statistics("DCP", iter, c.get_num_candidates(), 		   num_freq, time.ReadChronos());#else  printf("%d\n",num_freq);#endif}// performs the current iteration with DCI // by using the optimizations for sparse datasetstemplate <class T, class T1>void DCI_iter_diffuse(int iter,dci_items& counters, 		     set_of_frequents<T,T1>& previous_freq, 		     set_of_frequents<T,T1>& next_freq, 		     DCI_vertical_dataset<T>& DCI_dataset){  Chronos time;  time.StartChronos();  static bool pruning_flag = true;  pruning_flag = ! pruning_flag;  next_freq.reset(iter);  if (!previous_freq.init_gen_cand())    return;  static bool first_order=false;  T *cand;  T *CACHE_items;  cand = new T[iter];  CACHE_items = new T[iter];  CACHE_items[0] = counters.get_m1() - 1; // init CACHE - surely different !!!  int num_freq = 0;  int num_cand = 0;  int cand_type;  int count;  previous_freq.get_prefix(cand);  previous_freq.get_suffix(&cand[iter - 2]);  num_cand++;  cand_type = NEW_PREFIX;    DCI_statistics stats;  stats.reset_stats();  DCI_dataset.reset_prune_mask();  DCI_dataset.init_cache(iter);   counters.init_flag_item();  counters.init_first_item_counts();  while (1) {    int start;    if (cand_type == NEW_PREFIX)      start = 0;    else      start = iter - 2;    int prefix_len;    for (prefix_len = start; prefix_len < iter-1; prefix_len++) {      if (cand[prefix_len] != CACHE_items[prefix_len])	break;    }        for (int i = prefix_len; i < iter; i++) { // copy to cache      CACHE_items[i] = cand[i];    }    if (DCI_dataset.candidate_is_frequent_diffuse(cand, prefix_len, iter, 					  counters.min_count, count, 					  stats, pruning_flag)) {      num_freq++;      next_freq.add_itemset(cand, (T1) count);      if (pruning_flag) {	for (int i = 0; i < iter; i++) 	  counters.flag_item[cand[i]] = true;	counters.first_item_counts[cand[0]]++;      }          }     cand_type = previous_freq.next_cand();    if (cand_type == END_GEN_CAND)       break;    else if (cand_type == NEW_SUFFIX)       previous_freq.get_suffix(&cand[iter-2]);    else {      previous_freq.get_prefix(cand);      previous_freq.get_suffix(&cand[iter-2]);    }    num_cand++;     }  int COMPLEXITY_INTERSECTION, COMPLEXITY_PRUNING;  int new_tid_list_size;  if (pruning_flag) {    int sz_list_proj;  // aggiunto    // aggiunto parametro    stats.get_stats(counters.get_active_items(), num_cand, iter,  		    DCI_dataset.get_tid_list_size(),  sz_list_proj,		    COMPLEXITY_INTERSECTION,		    COMPLEXITY_PRUNING);    new_tid_list_size = DCI_dataset.check_pruning();        if ((sz_list_proj > PRUNE_FACTOR_PROJ*DCI_dataset.get_tid_list_size()) && 	(new_tid_list_size<DCI_dataset.get_tid_list_size()*PRUNE_FACTOR_NEWLIST) &&	(COMPLEXITY_PRUNING < COMPLEXITY_INTERSECTION/PRUNE_FACTOR_COMPLEXITY)) {      DCI_dataset.prune_VD (new_tid_list_size, counters);      //      cout << "ORDERING\n";      DCI_dataset.order_bits_diffuse(counters);      first_order = true;    }            //      stats.get_stats(counters.get_active_items(), num_cand, iter,     //  		    DCI_dataset.get_tid_list_size(),     //  		    COMPLEXITY_INTERSECTION,     //  		    COMPLEXITY_PRUNING);        //      new_tid_list_size = DCI_dataset.check_pruning();        //      if ((new_tid_list_size < DCI_dataset.get_tid_list_size() * 0.8) &&     //  	(COMPLEXITY_PRUNING < COMPLEXITY_INTERSECTION/2)) {        //        DCI_dataset.prune_VD (new_tid_list_size, counters);    //        DCI_dataset.order_bits_sparse(counters);    //        first_order = true;    //      }  }    if (first_order == false) {    DCI_dataset.order_bits_diffuse(counters);    first_order = true;  }    delete [] cand;  delete [] CACHE_items;  if (write_output) { // dump to file frequent itemsets     FSout o(OUTF, iter);    if(!o.isOpen()) {      cerr << OUTF << " could not be opened for writing!" << endl;      exit(1);    }    next_freq.dump_itemsets(counters, o);  }#ifdef VERBOSE  print_statistics("DCIs", iter, num_cand, num_freq, time.ReadChronos());#else  printf("%d\n",num_freq);#endif  return;}// performs the current iteration with DCI by using the optimizations for sparse datasets and key patternstemplate <class T, class T1>void  DCI_iter_diffuse_key(int iter,dci_items& counters, 			   set_of_frequents<T,T1>& previous_freq, 			   set_of_frequents<T,T1>& next_freq, 			   DCI_vertical_dataset<T>& DCI_dataset){      Chronos time;  time.StartChronos();  next_freq.reset(iter);  if (!previous_freq.init_gen_cand())    return;  static bool first_order=false;  T *cand, *cand_subset;  T *CACHE_items;  T key_pair[2];  T1 count_pair[2];  cand = new T[iter];  cand_subset = new T[iter-1];  CACHE_items = new T[iter];  CACHE_items[0] = counters.get_m1() - 1; // init CACHE - surely different !!!  int num_freq = 0;  int num_cand = 0;  int cand_type;  int count;    previous_freq.get_prefix(cand);  previous_freq.get_suffix(&cand[iter - 2], key_pair, count_pair);  num_cand++;  cand_type = NEW_PREFIX;    DCI_statistics stats;  stats.reset_stats();  DCI_dataset.reset_prune_mask();  DCI_dataset.init_cache(iter);   counters.init_flag_item();  counters.init_first_item_counts();  T key = 0;  T1 min_count;  T min_key;  int one_search=0;  while (1) {    if ((key_pair[0] != (T) - 1)    ||    (key_pair[1] != (T) - 1)) {// cand is surely not a key pattern       one_search++;            if (cand[iter-2] == key_pair[0]) { // the key pattern is the first generator

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -