📄 dci.cc

📁 this will produce the dci program in the src/ directory, or in /path/to/your/installation/bin if y
💻 CC
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
        num_freq++;        next_freq.add_itemset(cand, (T1) count_pair[1], cand[iter-2]);      	if (!first_order) {	  for (int i = 0; i < iter; i++) 	    counters.flag_item[cand[i]] = true;	  counters.first_item_counts[cand[0]]++;	}      }      else if (cand[iter-1] == key_pair[1]) {// the key pattern is the second generator        num_freq++;        next_freq.add_itemset(cand, (T1) count_pair[0], cand[iter-1]);	if (!first_order) {	  for (int i = 0; i < iter; i++) 	    counters.flag_item[cand[i]] = true;	  counters.first_item_counts[cand[0]]++;	}      }      else {// the key pattern is another subset: we must find it        if (key_pair[0] != (T) -1)          key=key_pair[0];        else          key=key_pair[1];	        int j=0;        for (int i=0; i<iter; i++)          if (cand[i] != key)            cand_subset[j++] = cand[i];                T tmp_key;        if (previous_freq.find_one_subset(cand_subset, tmp_key, count)) {          num_freq++;          next_freq.add_itemset(cand, (T1) count, key);	  if (!first_order) {	    for (int i = 0; i < iter; i++) 	      counters.flag_item[cand[i]] = true;	    counters.first_item_counts[cand[0]]++;	  }        }              }           } else {      if (count_pair[0] < count_pair[1]) { // remember min_count and corresponding key between generators	min_count = count_pair[0];	min_key = cand[iter - 1];	         }      else {	min_count = count_pair[1];	min_key = cand[iter - 2];      }            T other_key;      bool is_key_pattern = true;      bool pruned = false;            for (int del=iter-3; del>=0; del--) { // look for the subset with minimum count	int j = 0;	for (int i=0; i<iter; i++)	  if (i != del)	    cand_subset[j++] = cand[i];	if (previous_freq.find_one_subset(cand_subset, other_key, count) == 0) {	  pruned = true;	  break; // a subset is infrequent, prune the cand and take the next one	}	 		if (other_key == (T) -1) {// remember min_count and corresponding key	  if (min_count >= (T1) count) {	    min_count = count;	    min_key = cand[del];	  }	} else {// cand is not a key pattern	  is_key_pattern = false;	  int j1=0;	  for (int i=0; i<iter; i++)	    if (cand[i] != other_key)	      cand_subset[j1++] = cand[i];	  	  T tmp_key; // check if the associated subset is frequent	  if (previous_freq.find_one_subset(cand_subset, tmp_key, count)) {	    num_freq++;	    next_freq.add_itemset(cand, (T1) count, other_key);	    if (!first_order) {	      for (int i = 0; i < iter; i++) 		counters.flag_item[cand[i]] = true;	      counters.first_item_counts[cand[0]]++;	    }	    break;	  }	}      }      if (!pruned && is_key_pattern) // we must count candidate support!	{	  int prefix_len;	  for (prefix_len = 0; prefix_len < iter-1; prefix_len++) {	    if (cand[prefix_len] != CACHE_items[prefix_len])	      break;	  }    	  for (int i = prefix_len; i < iter; i++) { // copy to cache	    CACHE_items[i] = cand[i];	  }	  if (DCI_dataset.candidate_is_frequent_diffuse(cand, prefix_len, iter, 						       counters.min_count, count, 						       stats, !first_order)) {	    if (count != (int) min_count)	      next_freq.add_itemset(cand, (T1) count, (T) -1);	    else	      next_freq.add_itemset(cand, (T1) count, min_key);	    num_freq++;	    if (!first_order) {	      for (int i = 0; i < iter; i++) 		counters.flag_item[cand[i]] = true;	      counters.first_item_counts[cand[0]]++;	    }   	  }	}    }    // generate next candidate    cand_type = previous_freq.next_cand();    if (cand_type == END_GEN_CAND)       break;    else if (cand_type == NEW_PREFIX)       previous_freq.get_prefix(cand);    previous_freq.get_suffix(&cand[iter-2], key_pair, count_pair);    num_cand++;     }  if (first_order == false) {    DCI_dataset.order_bits_diffuse(counters);    first_order = true;  }      delete [] cand;  delete [] cand_subset;  delete [] CACHE_items;  if (write_output) { // dump to file frequent itemsets     FSout o(OUTF, iter);    if(!o.isOpen()) {      cerr << OUTF << " could not be opened for writing!" << endl;      exit(1);    }    next_freq.dump_itemsets(counters, o);  }#ifdef VERBOSE  print_statistics("DCIsk", iter, num_cand, num_freq, time.ReadChronos());//    cout << "one search : " << one_search << " ("<< ((float) one_search)/num_cand*100 << ")"<< endl;#else  printf("%d\n",num_freq);#endif  return;}// performs the current iteration with DCI // by using the optimizations for dense datasetstemplate <class T, class T1>void DCI_iter_compact_key(int iter,dci_items& counters, 				       set_of_frequents<T,T1>& previous_freq, 				       set_of_frequents<T,T1>& next_freq, 				       DCI_vertical_dataset<T>& DCI_dataset){  Chronos time;  time.StartChronos();  next_freq.reset(iter);  if (!previous_freq.init_gen_cand())    return;  T *cand, *cand_subset;  T *CACHE_items;  T key_pair[2];  T1 count_pair[2];  cand = new T[iter];  cand_subset = new T[iter-1];  CACHE_items = new T[iter];  CACHE_items[0] = counters.get_m1() - 1; // init CACHE - surely different !!!  int num_freq = 0;  int num_cand = 0;  int cand_type;  int count;  previous_freq.get_prefix(cand);  previous_freq.get_suffix(&cand[iter - 2], key_pair, count_pair);  num_cand++;  cand_type = NEW_PREFIX;    DCI_statistics stats;  stats.reset_stats();  DCI_dataset.init_cache(iter);  T key = 0;  T1 min_count;  T min_key;  int one_search=0;    while (1) {    if ((key_pair[0] != (T) - 1) || (key_pair[1] != (T) - 1)) {      // cand is surely not a key pattern       one_search++;            if (cand[iter-2] == key_pair[0]) { 	// the key pattern is the first generator        num_freq++;        next_freq.add_itemset(cand, (T1) count_pair[1], cand[iter-2]);      }      else if (cand[iter-1] == key_pair[1]) {	// the key pattern is the second generator        num_freq++;        next_freq.add_itemset(cand, (T1) count_pair[0], cand[iter-1]);      }      else {	// the key pattern is another subset: we must find it        if (key_pair[0] != (T) -1)          key=key_pair[0];        else          key=key_pair[1];	        int j=0;        for (int i=0; i<iter; i++)          if (cand[i] != key)            cand_subset[j++] = cand[i];                T tmp_key;        if (previous_freq.find_one_subset(cand_subset, tmp_key, count)) {          num_freq++;          next_freq.add_itemset(cand, (T1) count, key);        }              }           } else {      if (count_pair[0] < count_pair[1]) { 	// remember min_count and corresponding key between generators	min_count = count_pair[0];	min_key = cand[iter - 1];	         }      else {	min_count = count_pair[1];	min_key = cand[iter - 2];      }      T other_key;      bool is_key_pattern = true;      bool pruned = false;      for (int del=iter-3; del>=0; del--) { 	// look for the subset with minimum count	int j = 0;	for (int i=0; i<iter; i++)	  if (i != del)	    cand_subset[j++] = cand[i];	if (previous_freq.find_one_subset(cand_subset, other_key, count)==0){	  pruned = true;	  break;	  // a subset is infrequent, prune the cand and take the next one	}	 		if (other_key == (T) -1) {	  // remember min_count and corresponding key	  if (min_count >= (T1) count) {	    min_count = count;	    min_key = cand[del];	  }	} else {// cand is not a key pattern	  is_key_pattern = false;	  int j1=0;	  for (int i=0; i<iter; i++)	    if (cand[i] != other_key)	      cand_subset[j1++] = cand[i];	  	  T tmp_key; // check if the associated subset is frequent	  if (previous_freq.find_one_subset(cand_subset, tmp_key, count)) {	    num_freq++;	    next_freq.add_itemset(cand, (T1) count, other_key);	    break;	  }//	  else //	    cout << "ERROR\n";	}      }      if (!pruned && is_key_pattern) // we must count candidate support!      {	  	  int prefix_len;	  for (prefix_len = 0; prefix_len < iter-1; prefix_len++) {	    if (cand[prefix_len] != CACHE_items[prefix_len])	      break;	  }      	  for (int i = prefix_len; i < iter; i++) { // copy to cache 	    CACHE_items[i] = cand[i];	  }	  	  if (DCI_dataset.candidate_is_frequent_compact(cand, 						      prefix_len, iter, 						      (int) counters.min_count, 						      count, 						      stats)) {	    	    num_freq++;	    if (count != (int) min_count)	      next_freq.add_itemset(cand, (T1) count, (T) -1);	    else	      next_freq.add_itemset(cand, (T1) count, min_key);	  }	}      }    // generate next candidate    cand_type = previous_freq.next_cand();    if (cand_type == END_GEN_CAND)       break;    else if (cand_type == NEW_PREFIX)       previous_freq.get_prefix(cand);    previous_freq.get_suffix(&cand[iter-2], key_pair, count_pair);    num_cand++;     }  delete [] cand;  delete [] cand_subset;  delete [] CACHE_items;  if (write_output) { // dump to file frequent itemsets     FSout o(OUTF, iter);    if(!o.isOpen()) {      cerr << OUTF << " could not be opened for writing!" << endl;      exit(1);    }    next_freq.dump_itemsets(counters, o);  }  #ifdef VERBOSE  print_statistics("DCIdk", iter, num_cand, num_freq, time.ReadChronos());//    cout << "one search : " << one_search << " ("<< ((float) one_search)/num_cand*100 << ")"<< endl;#else  printf("%d\n",num_freq);#endif  return;}// performs the current iteration with DCI by using the optimizations for dense datasetstemplate <class T, class T1>void DCI_iter_compact(int iter,dci_items& counters, 				       set_of_frequents<T,T1>& previous_freq, 				       set_of_frequents<T,T1>& next_freq, 				       DCI_vertical_dataset<T>& DCI_dataset){  Chronos time;  time.StartChronos();  next_freq.reset(iter);  if (!previous_freq.init_gen_cand())    return;  T *cand;  T *CACHE_items;  cand = new T[iter];  CACHE_items = new T[iter];  CACHE_items[0] = counters.get_m1() - 1; // init CACHE - surely different !!!  int num_freq = 0;  int num_cand = 0;  int cand_type;  int count;  previous_freq.get_prefix(cand);  previous_freq.get_suffix(&cand[iter - 2]);  num_cand++;  cand_type = NEW_PREFIX;    DCI_statistics stats;  stats.reset_stats();  DCI_dataset.init_cache(iter);  while (1) {    int start;    if (cand_type == NEW_PREFIX)      start = 0;    else      start = iter - 2;    int prefix_len;    for (prefix_len = start; prefix_len < iter-1; prefix_len++) {      if (cand[prefix_len] != CACHE_items[prefix_len])	break;    }        for (int i = prefix_len; i < iter; i++) { // copy to cache       CACHE_items[i] = cand[i];    }    //DCI_dataset.set_is_included_flags(cand, prefix_len, iter);    if (DCI_dataset.candidate_is_frequent_compact(cand, 						prefix_len, iter, 						(int) counters.min_count, 						count, 						stats)) {      num_freq++;      next_freq.add_itemset(cand, (T1) count);    }    cand_type = previous_freq.next_cand();    if (cand_type == END_GEN_CAND)       break;    else if (cand_type == NEW_SUFFIX)       previous_freq.get_suffix(&cand[iter-2]);    else {      previous_freq.get_prefix(cand);      previous_freq.get_suffix(&cand[iter-2]);    }    num_cand++;     }    delete [] cand;  delete [] CACHE_items;  if (write_output) { // dump to file frequent itemsets     FSout o(OUTF, iter);    if(!o.isOpen()) {      cerr << OUTF << " could not be opened for writing!" << endl;      exit(1);    }    next_freq.dump_itemsets(counters, o);  }  #ifdef VERBOSE  print_statistics("DCId", iter, num_cand, num_freq, time.ReadChronos());#else  printf("%d\n",num_freq);#endif  return;}
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -