📄 vertical.cc
字号:
// we know which is the percentage range of superposition // among tidlists. if there are other items to consider // we go on, otherwise we break the loop if (!found) break; } start = start_OK; perc = perc_OK; int limit_num_freq_eq; if (perc > 80) limit_num_freq_eq = n_frequent_items/7; else if (perc > 75) limit_num_freq_eq = n_frequent_items/6; else if (perc > 70) limit_num_freq_eq = n_frequent_items/5; else if (perc > 65) limit_num_freq_eq = n_frequent_items/4; else if (perc > 60) limit_num_freq_eq = n_frequent_items/3; else limit_num_freq_eq = n_frequent_items/2; if (perc == 0 || (end-start) < limit_num_freq_eq) { delete [] mask; delete [] result; dataset_kind = DIFFUSE_DATASET; return dataset_kind; } start_sect_eq=start; end_sect_eq=end; sz_sect_eq = cc / (sizeof(int)*8); reorder_bits_compact(n_frequent_items, mask); delete [] mask; delete [] result; dataset_kind = COMPACT_DATASET; return dataset_kind;}template <class T>int DCI_vertical_dataset<T>::check_pruning () { int num_bit_set; num_bit_set = count_1_bits(prune_mask, tid_list_size); int acc_bit = tid_list_size * sizeof(unsigned int) * 8 - num_bit_set; int new_sz_bit = (sizeof(unsigned int) * 8 * tid_list_size) - acc_bit; int new_tid_list_size = new_sz_bit / (sizeof(unsigned int) * 8); if ((new_sz_bit % (sizeof(unsigned int) * 8)) != 0) new_tid_list_size++; return new_tid_list_size;}template <class T>void DCI_vertical_dataset<T>::prune_VD (int new_tid_list_size, dci_items& c) { unsigned char *m = (unsigned char *) prune_mask; for (unsigned int i = 0; i < VD_rows; i++) { // c.flag_item[] indicates the rows corresponding to active items if (c.flag_item[i] == 0) continue; unsigned char *o = (unsigned char *) &VD[i * tid_list_size]; unsigned char *n = (unsigned char *) &VD[i * new_tid_list_size]; unsigned int i_b = 0; unsigned int i_B = 0; for (unsigned int j = 0; j < tid_list_size * sizeof(unsigned int); j++) for (int k = 0; k < 8; k++) { if ((m[j] & mask_byte(k)) != 0) { if ((o[j] & mask_byte(k)) != 0) n[i_B] = n[i_B] | mask_byte(i_b); else n[i_B] = n[i_B] & (~mask_byte(i_b)); i_b = (i_b + 1) % 8; if (i_b == 0) i_B++; } } if (i_b != 0) { while (i_b < 8) { n[i_B] = n[i_B] & (~mask_byte(i_b)); i_b++; } i_B++; } while (i_B < new_tid_list_size * sizeof(unsigned int)) n[i_B++] = 0; } tid_list_size = new_tid_list_size;}template <class T>void DCI_vertical_dataset<T>::order_bits_diffuse(dci_items& c){ // cout << "ORDERING ...\n"; int *index_tid = new int[tid_list_size * 8 * sizeof(int)]; unsigned int *new_list = new unsigned int[tid_list_size]; T *ordered_freq_items = new T[VD_rows]; int num_ordered_freq_items=0; unsigned int *and_firsts = new unsigned int[tid_list_size]; bzero(and_firsts, tid_list_size*sizeof(unsigned int)); for (unsigned int i = 0; i < tid_list_size * sizeof(int) * 8; i++) index_tid[i] = -1; while (1) { int max=0; for (unsigned int i = 0; i < VD_rows; i++) { if (c.first_item_counts[i] > c.first_item_counts[max]) max=i; } if (c.first_item_counts[max] != 0) { ordered_freq_items[num_ordered_freq_items++] = max; c.first_item_counts[max] = 0; and_tid_list(and_firsts, and_firsts, (&VD[max * tid_list_size]), tid_list_size); } else break; } unsigned int ind=0; //printf("num_ordered_freq_items=%d\n", num_ordered_freq_items); unsigned char *o = (unsigned char *) and_firsts; for (unsigned int j = 0; j < tid_list_size * sizeof(int); j++) for (int k = 0; k < 8; k++) { int tid = j * 8 + k; if ((o[j] & mask_byte(k)) != 0) if (index_tid[tid] == -1) { //if (ind >= tid_list_size * sizeof(unsigned int) * 8) // cout << "error order_bits\n"; index_tid[tid] = ind++; } } for (int ii = 0; ii < num_ordered_freq_items; ii++) { int i = ordered_freq_items[ii]; unsigned char *o = (unsigned char *) &VD[i * tid_list_size]; for (unsigned int j = 0; j < tid_list_size * sizeof(unsigned int); j++) for (int k = 0; k < 8; k++) { int tid = j * 8 + k; if ((o[j] & mask_byte(k)) != 0) { if (index_tid[tid] == -1) { //if (ind >= tid_list_size * sizeof(int) * 8) // cout << "error2 order_bits\n"; index_tid[tid] = ind++; } } } } while (ind < tid_list_size * sizeof(unsigned int) * 8) { index_tid[ind] = ind; ind++; } for (unsigned i = 0; i < VD_rows; i++) { if (c.flag_item[i] == 0) continue; unsigned char *o = (unsigned char *) &VD[i * tid_list_size]; unsigned char *n = (unsigned char *) new_list; bzero(n, tid_list_size * sizeof(unsigned int)); for (unsigned int j = 0; j < tid_list_size * sizeof(int); j++) for (int k = 0; k < 8; k++) { int tid = j * 8 + k; if ((o[j] & mask_byte(k)) != 0) { /* * set bit index_tid[tid] */ int i_b_glob; int i_b; int i_B; i_b_glob = index_tid[tid]; i_B = i_b_glob / 8; i_b = i_b_glob % 8; n[i_B] = (n[i_B] | mask_byte(i_b)); } } memcpy(o, n, tid_list_size * sizeof(unsigned int)); } delete [] index_tid; delete [] new_list; delete [] ordered_freq_items; delete [] and_firsts;}template <class T>void DCI_vertical_dataset<T>::reorder_bits_compact(int n_frequent_items, unsigned int *equal) { unsigned int *new_list; new_list = new unsigned int[tid_list_size]; unsigned char *eq; eq = (unsigned char *) equal; for (int i = 0; i < n_frequent_items; i++) { unsigned char *o; unsigned char *n; o = (unsigned char *) &VD[i * tid_list_size]; n = (unsigned char *) new_list; bzero(n, tid_list_size * sizeof(int)); int tid=0; for (unsigned int j = 0; j < tid_list_size * sizeof(int); j++) for (unsigned int k = 0; k < 8; k++) { if ((eq[j] & mask_byte(k)) != 0) { if ((o[j] & mask_byte(k)) != 0) { int i_b, i_B; i_B = tid / 8; i_b = tid % 8; n[i_B] = (n[i_B] | (0x1 << i_b)); } tid++; } } for (unsigned int j = 0; j < tid_list_size * sizeof(int); j++) for (unsigned int k = 0; k < 8; k++) { if ((eq[j] & mask_byte(k)) == 0) { if ((o[j] & mask_byte(k)) != 0) { int i_b, i_B; i_B = tid / 8; i_b = tid % 8; n[i_B] = (n[i_B] | (0x1 << i_b)); } tid++; } } memcpy(o, n, tid_list_size * sizeof(int)); } unsigned int *pp = VD + start_sect_eq * tid_list_size; n_bit_set_eq = count_1_bits(pp, sz_sect_eq); delete [] new_list;}// checks if the vertical dataset fits into main memory and// DCI can start at the following itertemplate <class T> bool DCI_vertical_dataset<T>::VD_can_be_allocated (unsigned int m, unsigned int n_trans) { tid_list_size = n_trans / (sizeof(int) * 8); if ((n_trans % (sizeof(int) * 8)) != 0) tid_list_size++; VD_rows = m; VD_size = VD_rows * tid_list_size * sizeof(int); // maximum amount of memory available for vertical dataset // one third of the memory present in the system... too much? unsigned int MAX_SIZE_OF_VD = mem_total() / 3; if (VD_size < MAX_SIZE_OF_VD) { return true; // VD can be stored in core } else return false;}template <class T> inline void DCI_vertical_dataset<T>::write_t_in_VD (unsigned int n_tr, dci_transaction<T>& t) { unsigned int byte_sel; unsigned int displ_sel; unsigned char *p; byte_sel = n_tr / 8; displ_sel = n_tr % 8; for (int i=0; i<(int) t.t_len; i++) { p = (unsigned char *) &VD[tid_list_size * t.elements[i]]; p[byte_sel] = (p[byte_sel] | (mask_byte(displ_sel))); }} template <class T> inline void DCI_vertical_dataset<T>::write_t_in_VD (unsigned int n_tr, Transaction<T>& t) { unsigned int byte_sel; unsigned int displ_sel; unsigned char *p; byte_sel = n_tr / 8; displ_sel = n_tr % 8; for (int i=0; i<(int) t.length; i++) { p = (unsigned char *) &VD[tid_list_size * t.t[i]]; p[byte_sel] = (p[byte_sel] | mask_byte(displ_sel)); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -