⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 vertical.cc

📁 DCI 算法 源代码 DCI: A hybrid Algorithm for Frequent Set Counting
💻 CC
📖 第 1 页 / 共 2 页
字号:
        // we know which is the percentage range of superposition    // among tidlists. if there are other items to consider     // we go on, otherwise we break the loop    if (!found)      break;      }    start = start_OK;  perc = perc_OK;  int limit_num_freq_eq;  if (perc > 80)    limit_num_freq_eq = n_frequent_items/7;  else if (perc > 75)    limit_num_freq_eq = n_frequent_items/6;  else if (perc > 70)    limit_num_freq_eq = n_frequent_items/5;  else if (perc > 65)    limit_num_freq_eq = n_frequent_items/4;   else if (perc > 60)    limit_num_freq_eq = n_frequent_items/3;   else     limit_num_freq_eq = n_frequent_items/2;   if (perc == 0 || (end-start) < limit_num_freq_eq) {    delete [] mask;    delete [] result;    dataset_kind = DIFFUSE_DATASET;    return dataset_kind;  }    start_sect_eq=start;  end_sect_eq=end;  sz_sect_eq = cc / (sizeof(int)*8);  reorder_bits_compact(n_frequent_items, mask);    delete [] mask;  delete [] result;  dataset_kind = COMPACT_DATASET;  return dataset_kind;}template <class T>int DCI_vertical_dataset<T>::check_pruning () {    int num_bit_set;    num_bit_set = count_1_bits(prune_mask, tid_list_size);  int acc_bit = tid_list_size * sizeof(unsigned int) * 8 - num_bit_set;  int new_sz_bit = (sizeof(unsigned int) * 8 * tid_list_size) - acc_bit;  int new_tid_list_size = new_sz_bit / (sizeof(unsigned int) * 8);  if ((new_sz_bit % (sizeof(unsigned int) * 8)) != 0)    new_tid_list_size++;  return new_tid_list_size;}template <class T>void DCI_vertical_dataset<T>::prune_VD (int new_tid_list_size, dci_items& c) {  unsigned char *m = (unsigned char *) prune_mask;  for (unsigned int i = 0; i < VD_rows; i++) {    // c.flag_item[] indicates the rows corresponding to active items    if (c.flag_item[i] == 0)      continue;    unsigned char *o = (unsigned char *) &VD[i * tid_list_size];    unsigned char *n = (unsigned char *) &VD[i * new_tid_list_size];    unsigned int i_b = 0;    unsigned int i_B = 0;    for (unsigned int j = 0; j < tid_list_size * sizeof(unsigned int); j++)      for (int k = 0; k < 8; k++) {        if ((m[j] & mask_byte(k)) != 0) {          if ((o[j] & mask_byte(k)) != 0)            n[i_B] = n[i_B] | mask_byte(i_b);          else            n[i_B] = n[i_B] & (~mask_byte(i_b));	            i_b = (i_b + 1) % 8;          if (i_b == 0)            i_B++;        }      }    if (i_b != 0) {      while (i_b < 8) {        n[i_B] = n[i_B] & (~mask_byte(i_b));        i_b++;      }      i_B++;    }    while (i_B < new_tid_list_size * sizeof(unsigned int))      n[i_B++] = 0;  }  tid_list_size = new_tid_list_size;}template <class T>void DCI_vertical_dataset<T>::order_bits_diffuse(dci_items& c){  // cout << "ORDERING ...\n";  int *index_tid = new int[tid_list_size * 8 * sizeof(int)];  unsigned int *new_list = new unsigned int[tid_list_size];  T *ordered_freq_items = new T[VD_rows];  int num_ordered_freq_items=0;  unsigned int *and_firsts = new unsigned int[tid_list_size];  bzero(and_firsts, tid_list_size*sizeof(unsigned int));  for (unsigned int i = 0; i < tid_list_size * sizeof(int) * 8; i++)    index_tid[i] = -1;  while (1) {    int max=0;    for (unsigned int i = 0; i < VD_rows; i++) {      if (c.first_item_counts[i] > c.first_item_counts[max])	max=i;    }        if (c.first_item_counts[max] != 0) {       ordered_freq_items[num_ordered_freq_items++] = max;       c.first_item_counts[max] = 0;       and_tid_list(and_firsts, and_firsts, (&VD[max * tid_list_size]), tid_list_size);    }        else        break;  }     unsigned int ind=0;    //printf("num_ordered_freq_items=%d\n", num_ordered_freq_items);  unsigned char *o = (unsigned char *) and_firsts;  for (unsigned int j = 0; j < tid_list_size * sizeof(int); j++)    for (int k = 0; k < 8; k++) {      int tid = j * 8 + k;      if ((o[j] & mask_byte(k)) != 0)        if (index_tid[tid] == -1) {          //if (ind >= tid_list_size * sizeof(unsigned int) * 8)          //  cout << "error order_bits\n";          index_tid[tid] = ind++;        }    }  for (int ii = 0; ii < num_ordered_freq_items; ii++) {    int i = ordered_freq_items[ii];    unsigned char *o = (unsigned char *) &VD[i * tid_list_size];    for (unsigned int j = 0; j < tid_list_size * sizeof(unsigned int); j++)      for (int k = 0; k < 8; k++) {        int tid = j * 8 + k;        if ((o[j] & mask_byte(k)) != 0) {          if (index_tid[tid] == -1) {            //if (ind >= tid_list_size * sizeof(int) * 8)            //  cout << "error2 order_bits\n";	    index_tid[tid] = ind++;          }	}      }  }  while (ind < tid_list_size * sizeof(unsigned int) * 8) {    index_tid[ind] = ind;    ind++;  }  for (unsigned i = 0; i < VD_rows; i++) {    if (c.flag_item[i] == 0) continue;    unsigned char *o = (unsigned char *) &VD[i * tid_list_size];    unsigned char *n = (unsigned char *) new_list;    bzero(n, tid_list_size * sizeof(unsigned int));    for (unsigned int j = 0; j < tid_list_size * sizeof(int); j++)      for (int k = 0; k < 8; k++) {        int tid = j * 8 + k;        if ((o[j] & mask_byte(k)) != 0) {          /*           * set bit index_tid[tid]            */	  int i_b_glob;	  int i_b;	  int i_B;          i_b_glob = index_tid[tid];          i_B = i_b_glob / 8;          i_b = i_b_glob % 8;          n[i_B] = (n[i_B] | mask_byte(i_b));        }      }    memcpy(o, n, tid_list_size * sizeof(unsigned int));  }  delete [] index_tid;  delete [] new_list;  delete [] ordered_freq_items;  delete [] and_firsts;}template <class T>void DCI_vertical_dataset<T>::reorder_bits_compact(int n_frequent_items, 						 unsigned int *equal)	 {      unsigned int    *new_list;  new_list = new  unsigned int[tid_list_size];  unsigned char   *eq;  eq = (unsigned char *) equal;    for (int i = 0; i < n_frequent_items; i++) {    unsigned char   *o;    unsigned char   *n;    o = (unsigned char *) &VD[i * tid_list_size];    n = (unsigned char *) new_list;    bzero(n, tid_list_size * sizeof(int));    int tid=0;    for (unsigned int j = 0; j < tid_list_size * sizeof(int); j++)      for (unsigned int k = 0; k < 8; k++) {	if ((eq[j] & mask_byte(k)) != 0) {	  if ((o[j] & mask_byte(k)) != 0) {	    int i_b, i_B;	    	    i_B = tid / 8;	    i_b = tid % 8;	    n[i_B] = (n[i_B] | (0x1 << i_b));	  }	  tid++;	}      }    for (unsigned int j = 0; j < tid_list_size * sizeof(int); j++)      for (unsigned int k = 0; k < 8; k++) {	if ((eq[j] & mask_byte(k)) == 0) {	  if ((o[j] & mask_byte(k)) != 0) {	    int i_b, i_B;	    i_B = tid / 8;	    i_b = tid % 8;	    n[i_B] = (n[i_B] | (0x1 << i_b));	  }	  tid++;	}      }    memcpy(o, n, tid_list_size * sizeof(int));  }  unsigned int *pp = VD + start_sect_eq * tid_list_size;  n_bit_set_eq = count_1_bits(pp, sz_sect_eq);  delete [] new_list;}// checks if the vertical dataset fits into main memory and// DCI can start at the following itertemplate <class T>  bool DCI_vertical_dataset<T>::VD_can_be_allocated (unsigned int m, 						   unsigned int n_trans) {  tid_list_size = n_trans / (sizeof(int) * 8);    if ((n_trans % (sizeof(int) * 8)) != 0)    tid_list_size++;  VD_rows = m;  VD_size = VD_rows * tid_list_size * sizeof(int);    // maximum amount of memory available for vertical dataset  // one third of the memory present in the system... too much?  unsigned int MAX_SIZE_OF_VD = mem_total() / 3;  if (VD_size < MAX_SIZE_OF_VD) {     return true;        // VD can be stored in core  }  else return false;}template <class T>  inline void DCI_vertical_dataset<T>::write_t_in_VD (unsigned int n_tr, 						    dci_transaction<T>& t) {  unsigned int byte_sel;  unsigned int displ_sel;  unsigned char *p;  byte_sel = n_tr / 8;  displ_sel = n_tr % 8;  for (int i=0; i<(int) t.t_len; i++) {    p = (unsigned char *) &VD[tid_list_size * t.elements[i]];    p[byte_sel] = (p[byte_sel] | (mask_byte(displ_sel)));  }}  template <class T>  inline void DCI_vertical_dataset<T>::write_t_in_VD (unsigned int n_tr, 						    Transaction<T>& t) {  unsigned int byte_sel;  unsigned int displ_sel;  unsigned char *p;  byte_sel = n_tr / 8;  displ_sel = n_tr % 8;  for (int i=0; i<(int) t.length; i++) {    p = (unsigned char *) &VD[tid_list_size * t.t[i]];    p[byte_sel] = (p[byte_sel] | mask_byte(displ_sel));  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -