⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 items.cc

📁 this will produce the dci program in the src/ directory, or in /path/to/your/installation/bin if y
💻 CC
字号:
// Copyright (C) 2003 salvatore orlando <salvatore.orlando@unive.it>//  // This program is free software; you can redistribute it and/or modify// it under the terms of the GNU General Public License as published by// the Free Software Foundation; either version 2 of the License, or// (at your option) any later version.// // This program is distributed in the hope that it will be useful,// but WITHOUT ANY WARRANTY; without even the implied warranty of// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the// GNU General Public License for more details.// // You should have received a copy of the GNU General Public License// along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.#include <iostream>#include <algorithm>#include "items.hh"using namespace std;template class set_of_itemsets<unsigned int, unsigned int>;template class set_of_itemsets<unsigned int, unsigned short int>;template class set_of_itemsets<unsigned int, unsigned char>;template class set_of_itemsets<unsigned short int, unsigned int>;template class set_of_itemsets<unsigned short int, unsigned short int>;template class set_of_itemsets<unsigned short int, unsigned char>;template class set_of_itemsets<unsigned char, unsigned int>;template class set_of_itemsets<unsigned char, unsigned short int>;template class set_of_itemsets<unsigned char, unsigned char>;void dci_items::init_global_pruning(){  if (prune_global_mask == NULL)    prune_global_mask = new unsigned int[m1];  bzero(prune_global_mask, m1 * sizeof(unsigned int));}void dci_items::end_global_pruning(unsigned int k) {  mk=0;  for(unsigned int i=0; i<m1; i++)    if (prune_global_mask[i] < k)      unmap[i] = -1;    else      mk++;}bool dci_items::update_map() {  // cout << "--m1=" << m1 << endl;  // cout << "--mk=" << mk << endl;  if (m1 == mk)    return false;  unsigned int j=0;  for (unsigned int i=0; i<m1; i++)    if (unmap[i] != -1)      map[i] = j++;  if (j != mk)    cout << "ERRORE mk\n";   return true;}bool dci_items::update_unmap(bool write_output) {  //  cout << "==m1=" << m1 << endl;  // cout << "==mk=" << mk << endl;  if (m1 == mk)    return false;//   for (unsigned int i=0; i<m1; i++) {//     printf("unmap[%d]->%s++\n", i, unmap_ascii[i]);//     if (strlen(unmap_ascii[i]) != unmap_ascii_len[i])//       printf("  diff len %d-%d\n", strlen(unmap_ascii[i]), unmap_ascii_len[i]);//   }  unsigned int j=0;  for (unsigned int i=0; i<m1; i++)    if (unmap[i] != -1) {      if (i != j) {	unmap[j] = unmap[i];	if (write_output) {	  int len = unmap_ascii_len[i];	  memcpy(unmap_ascii[j], unmap_ascii[i], len);	  unmap_ascii[j][len] = 0;	  unmap_ascii_len[j] =  unmap_ascii_len[i];	}      }      j++;    }  //  if (j != mk)  //  cout << "ERRORE mk\n";   m1=mk;//   for (unsigned int i=0; i<mk; i++)//     printf("unmap[%d]->%s++\n", i, unmap_ascii[i]);  return true;}int dci_items::remap_items(bool write_output)  // sort items according to their support  // and maps their id to contiguous and increasing integers.  // in this function we build a map from original_id to new_id  // and a reverse "unmap" from new_id to original.   // also, we build a map from the new_id to the ascii representation   // of the item, so we have it at hand when (if ever) we will have to write  // frequent itemsets to disk.{  m = (*acc).size();	  // add the item_id into the pairs; update m1 and avg_supp_perc  m1=0;  // counts how many items have support higher than a give threshold  // and which is their average support. this info is used later to   // determine if it is worth using the key-pattern optimization  avg_supp_at_threshold = 0.;  int n=0;  unsigned int count_at_threshold =  (unsigned int) (nr_t*SUPP_THRESHOLD/100.);    for (unsigned int i=0; i < (*acc).size(); i++) {    (*acc)[i].id = i;    if ((*acc)[i].count >= min_count) {      m1++;      if ((*acc)[i].count > max_supp)	max_supp = (*acc)[i].count;      // ----------------------------------------------------      // average support of items that pass a given threshold      if ((*acc)[i].count > count_at_threshold) {	avg_supp_at_threshold += (*acc)[i].count;	n++;      }      // ----------------------------------------------------    }  }  // ----------------------------------------------------  // average support of items that pass a given threshold  if (n > 0)    avg_supp_at_threshold = 100.*avg_supp_at_threshold/n/nr_t;  else     avg_supp_at_threshold = 0.;  // ----------------------------------------------------  AscendingItemsSort compare_item;  sort((*acc).begin(), (*acc).end(), compare_item);  map   = new int[m];  unmap = new int[m1];  for (unsigned int i=0; i < m; i++)     map[i] = -1;    int start = m - m1;  if (write_output) {    unmap_ascii = new char[m1][8];    unmap_ascii_len = new unsigned char[m1];  }  for (unsigned int i=start; i < m; i++) {    unmap[i-start] = (*acc)[i].id;    if (write_output)      unmap_ascii_len[i-start] = sprintf(&unmap_ascii[i-start][0], 					 "%d ", (*acc)[i].id);         map[(*acc)[i].id] = i-start;  }#ifdef DEBUG  for (unsigned int i=0; i < m1; i++)     cout << "unmap[" << i << "]=" << unmap[i] << endl;#endif	  mk = m1;  return start;}template <class T, class T1>inline void set_of_itemsets<T, T1>::add_itemset(T *v, T1 c){     if (num_itemsets == 0) {    num_itemsets++;    prefixes.resize(k - 1);    for (int i=0; i < k-1; i++)      prefixes[i] = v[i];    ind_section.push_back(0);    suffixes.push_back(v[k-1]);    counters.push_back(c);  }  else {    // check equality between prefixes    bool equal=true;    int ind_last_prefix = prefixes.size()-(k-1);        for (int i=0; i<k-1; i++)      if (prefixes[ind_last_prefix+i] != v[i]) {	equal=false;	break;      }          if (equal)  {      // cout << "+++same prefix " << v[0] << "\n";      num_itemsets++;      int last_ind = ind_section.size() - 1;      ind_section[last_ind]++;      suffixes.push_back(v[k-1]);      counters.push_back(c);    }    else {      // cout << "+++add section"  << v[0] << "\n";            num_itemsets++;      int old_sz = prefixes.size();      prefixes.resize(old_sz + k - 1);      for (int i=0; i<k-1; i++)	prefixes[old_sz + i] = v[i];            ind_section.push_back(suffixes.size());      suffixes.push_back(v[k-1]);      counters.push_back(c);    }  }}template <class T, class T1>inline void set_of_itemsets<T, T1>::add_itemset(T *v, T1 c, T key){     add_itemset(v, c);  keys.push_back(key);}template <class T, class T1>  void set_of_itemsets<T, T1>::init_read_itemsets() {  curr_itemset=-1;   curr_prefix=0;  init_sect = 0;  end_sect = ind_section[curr_prefix];}template <class T, class T1>  inline int set_of_itemsets<T, T1>::read_next_itemset(T* v, T1& c) {  curr_itemset++;  if (curr_itemset > end_sect) {    curr_prefix++;    if (curr_prefix == ind_section.size())       return(-1);    init_sect = end_sect + 1;    end_sect = ind_section[curr_prefix];  }   if (curr_itemset == init_sect) {    int ind_prefix = curr_prefix * (k-1);    int j=0;    for (int i=ind_prefix; i<ind_prefix+(k-1); i++)      v[j++] = prefixes[i];  }   v[k-1] = suffixes[curr_itemset];  c = counters[curr_itemset];    return curr_itemset;}template <class T, class T1>void set_of_itemsets<T, T1>::printf_itemsets() {  if (num_itemsets == 0) {    cerr << "no itemsets!!\n";    return;  }  curr_prefix = 0;  init_sect = 0;  end_sect = ind_section[curr_prefix];  cout << "k=" << k  << endl;  while (1) {    cout << "Prefix:" << curr_prefix << "- Section len:" 	 <<  end_sect-init_sect+1 << "\n   prefix: ";    int ind_prefix = curr_prefix * (k-1);    for (int i=ind_prefix; i<ind_prefix+(k-1); i++)      cout << (int) prefixes[i] << " ";        cout << "\n   (suffixes,counters): ";    for (unsigned int i=init_sect; i<=end_sect; i++)      cout << "(" << (int) suffixes[i] 	   << "," << (int) counters[i] << ") ";    cout << "\n\n";        curr_prefix++;    if (curr_prefix == ind_section.size())       break;    init_sect = end_sect + 1;    end_sect = ind_section[curr_prefix];  }}template <class T, class T1>void set_of_itemsets<T, T1>::flag_included_items(dci_items& c){  c.init_flag_item();  if (num_itemsets == 0)    return;  unsigned int curr_prefix = 0;  unsigned int init_sect = 0;  unsigned int end_sect = ind_section[curr_prefix];  while (1) {    int ind_prefix = curr_prefix * (k-1);    for (int i=ind_prefix; i<ind_prefix+(k-1); i++)      c.flag_item[prefixes[i]] = true;        for (unsigned int i=init_sect; i<=end_sect; i++)      c.flag_item[suffixes[i]] = true;        curr_prefix++;    if (curr_prefix == ind_section.size())       break;    init_sect = end_sect + 1;    end_sect = ind_section[curr_prefix];  }  }template <class T, class T1>void set_of_itemsets<T, T1>::set_occurrences_1prefix(dci_items& c){  c.init_first_item_counts();  if (num_itemsets == 0)    return;  unsigned int curr_prefix = 0;  unsigned int init_sect = 0;  unsigned int end_sect = ind_section[curr_prefix];  while (1) {    int ind_first = curr_prefix * (k-1);    c.first_item_counts[prefixes[ind_first]] += end_sect - init_sect + 1;        curr_prefix++;    if (curr_prefix == ind_section.size())       break;    init_sect = end_sect + 1;    end_sect = ind_section[curr_prefix];  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -