msapriori_trie.cpp

来自「Aprior的C++实现算法」· C++ 代码 · 共 293 行

CPP
293
字号
/***************************************************************************                          MSApriori_Trie.cpp  -  description                             -------------------    begin                : cs dec 26 2002    copyright            : (C) 2002 by Ferenc Bodon    email                : bodon@mit.bme.hu ***************************************************************************/#include "MSApriori_Trie.hpp"#include <cstdlib>#include <algorithm>#include <iostream>/**  \param counter_of_emptyset The support of the empty set, i.e. the number of transactions.  \param mis_abs The mis values.*/MSApriori_Trie::MSApriori_Trie(const unsigned long counter_of_emptyset, const vector<double>& mis_abs ):   main_trie( NULL, counter_of_emptyset),mis_abs(mis_abs){}/**  \param counters It stores the support of the frequent items.                   counters[i] stores                  the suport of item i.*/void MSApriori_Trie::insert_frequent_items( const set< pair<itemtype, unsigned long> >& counters ){   for(set< pair<itemtype, unsigned long> >::iterator it = counters.begin(); it != counters.end(); it++)      main_trie.add_empty_state( (*it).first, (*it).second );   if( !main_trie.edgevector.empty() ) main_trie.maxpath = 1;}/**  \param frequent_size Size of the frequent itemsets that generate the candidates.*/void MSApriori_Trie::candidate_generation( const itemtype& frequent_size ){   if( frequent_size == 1 ) candidate_generation_two();   else if( main_trie.maxpath == frequent_size )   {      set<itemtype> maybe_candidate;      candidate_generation_assist( &main_trie, frequent_size-1, maybe_candidate );   }}/**  \param basket The basket that hs to be analyzed.  \param candidate_size the size of the candidates.  \param counter_incr The number of time the basket occured.                      The counters of candidates that occure in the basket has to be incremented by counter_incr.*/void MSApriori_Trie::find_candidate( const vector<itemtype>& basket, const itemtype candidate_size, 				     const unsigned long counter_incr){   if( candidate_size != 2 )       main_trie.find_candidate( basket, candidate_size, basket.begin(), counter_incr );   else find_candidate_two( basket, counter_incr );    }/**  \param candidate_size The size of the candidate itemset.*/void MSApriori_Trie::delete_infrequent( const itemtype candidate_size ){   if( candidate_size != 2 )   {      for( vector<Edge>::iterator itEdge =  main_trie.edgevector.begin(); 	   itEdge !=  main_trie.edgevector.end(); itEdge++ )	 if( (*itEdge).subtrie->maxpath == candidate_size - 1 )              (*itEdge).subtrie->delete_infrequent( mis_abs[(*itEdge).label], candidate_size - 2 );   }   else delete_infrequent_two( );}/**  \param min_conf Confidence threshold.  \param input_output_manager This will write out itemsets.*/void MSApriori_Trie::association( const double min_conf, Input_Output_Manager& input_output_manager ) const{   input_output_manager << "\nAssociation rules:\ncondition ==> consequence (confidence, occurrence)\n";   set<itemtype> consequence_part;   assoc_rule_assist( min_conf, &main_trie, consequence_part, input_output_manager );}itemtype MSApriori_Trie::longest_path() const{   return main_trie.maxpath;}void MSApriori_Trie::write_content_to_file( Input_Output_Manager& input_output_manager ) const{   input_output_manager<< "Frequent 0-itemsets:\nitemset (occurrence)\n";   input_output_manager<< "{} ("<< main_trie.counter << ')' << endl;   for( itemtype item_size = 1; item_size < main_trie.maxpath+1; item_size++ )   {      input_output_manager<< "Frequent " << item_size << "-itemsets:\nitemset (occurrence)\n";      set<itemtype> frequent_itemset;      write_content_to_file_assist( input_output_manager, &main_trie, item_size, frequent_itemset );   }}void MSApriori_Trie::show_content_preorder( ) const{   main_trie.show_content_preorder( );}MSApriori_Trie::~MSApriori_Trie(){}/**  \param maybe_candidate The itemset that has to be checked. */bool MSApriori_Trie::is_all_subset_frequent( const set<itemtype>& maybe_candidate ) const{   if( maybe_candidate.size() < 3) return true;   // because of the candidate generation method!   else   {      set<itemtype>                 temp_itemset(maybe_candidate);      set<itemtype>::const_iterator item_it = --(--maybe_candidate.end());      set<itemtype>::const_iterator it_lower_bound = ++maybe_candidate.begin();      while ( item_it != it_lower_bound )      {         item_it--;         temp_itemset.erase( *item_it );         if( !main_trie.is_included( temp_itemset, temp_itemset.begin() ) ) return false;         temp_itemset.insert( *item_it );      }      if( mis_abs[*it_lower_bound] ==  mis_abs[*maybe_candidate.begin()] )      {	 temp_itemset.erase( *maybe_candidate.begin() );	 if( main_trie.is_included( temp_itemset, temp_itemset.begin() ) ) return true;	 else return false;      }      else return true;   }}/**     \param basket the given basket     \param counter The number the processed basket occures in the transactional database   */void MSApriori_Trie::find_candidate_two( const vector<itemtype>& basket, const unsigned long counter ){   if(basket.size() > 1)   {      vector<itemtype>::const_iterator it1_basket,                                       it2_basket;      for( it1_basket = basket.begin(); it1_basket != basket.end()-1; it1_basket++)         for( it2_basket = it1_basket+1; it2_basket != basket.end(); it2_basket++)            temp_counter_array[*it1_basket][*it2_basket-*it1_basket-1] += counter;   }}void MSApriori_Trie::candidate_generation_two( ){   if( !mis_abs.empty() )   {      main_trie.maxpath = 2;      temp_counter_array.reserve( mis_abs.size() - 1 );      temp_counter_array.resize( mis_abs.size() - 1 );      for( vector<Edge>::size_type stateIndex = 0; stateIndex <  mis_abs.size() - 1; stateIndex++ )      {         temp_counter_array[stateIndex].reserve( mis_abs.size() - 1 - stateIndex );         temp_counter_array[stateIndex].resize( mis_abs.size() - 1 - stateIndex, 0);      }   }}void MSApriori_Trie::candidate_generation_assist( Trie* trie, const itemtype distance_from_generator,						  set<itemtype>& maybe_candidate){   vector<Edge>::iterator itEdge = trie->edgevector.begin();   if( distance_from_generator )   {      for( ; itEdge != trie->edgevector.end(); itEdge++ )      if( (*itEdge).subtrie->maxpath + 1 >= distance_from_generator )      {         maybe_candidate.insert((*itEdge).label);         candidate_generation_assist((*itEdge).subtrie, distance_from_generator - 1, maybe_candidate );         maybe_candidate.erase((*itEdge).label);      }   }   else   {      vector<Edge>::iterator itEdge2;      Trie* toExtend;      for( ; itEdge != trie->edgevector.end(); itEdge++ )      {         maybe_candidate.insert((*itEdge).label);         toExtend = (*itEdge).subtrie;         for( itEdge2 = itEdge + 1; itEdge2 != trie->edgevector.end(); itEdge2++ )         {            maybe_candidate.insert( (*itEdge2).label );            if( is_all_subset_frequent( maybe_candidate) )               toExtend->add_empty_state( (*itEdge2).label );            maybe_candidate.erase( (*itEdge2).label );         }         if( !toExtend->edgevector.empty()) toExtend->maxpath = 1;         (vector<Edge>(toExtend->edgevector)).swap(toExtend->edgevector);  // we know that state toExtend will not have any more children!          maybe_candidate.erase((*itEdge).label);      }      trie->max_path_set();   }}void MSApriori_Trie::delete_infrequent_two(  ){   vector<Edge>::size_type stateIndex_1,                            stateIndex_2;   vector<Edge>::iterator it;   for( stateIndex_1 = 0; stateIndex_1 < mis_abs.size()-1; stateIndex_1++ )   {      it = lower_bound(main_trie.edgevector.begin(),                        main_trie.edgevector.end(), stateIndex_1, Edge_point_less);      for( stateIndex_2 = 0; stateIndex_2 < mis_abs.size() - 1 - stateIndex_1; stateIndex_2++ )        if( temp_counter_array[stateIndex_1][stateIndex_2] > mis_abs[stateIndex_1] )	   (*it).subtrie->add_empty_state( stateIndex_1 + stateIndex_2 + 1,					    temp_counter_array[stateIndex_1][stateIndex_2] );      temp_counter_array[stateIndex_1].clear();      vector<unsigned long>().swap(temp_counter_array[stateIndex_1]);   /// temp_counter_array[stateIndex_1] will never be used again!      if( !(*it).subtrie->edgevector.empty() ) (*it).subtrie->maxpath = 1;         }   temp_counter_array.clear();   vector< vector<unsigned long> >().swap(temp_counter_array);            /// temp_counter_array will never be used again!   main_trie.max_path_set();}void MSApriori_Trie::assoc_rule_find( const double min_conf, set<itemtype>& condition_part, set<itemtype>& consequence_part,                                     const unsigned long union_support, Input_Output_Manager& input_output_manager ) const{   itemtype                      item;   for( set<itemtype>::const_iterator item_it = consequence_part.begin(); item_it != consequence_part.end(); item_it++)   if( condition_part.empty() || *(--condition_part.end()) < *item_it)   {      item = *item_it;      consequence_part.erase( item );      condition_part.insert( item );      if( union_support > main_trie.is_included(condition_part, condition_part.begin() )->counter * min_conf)      {         input_output_manager<< endl;         input_output_manager.write_out_basket(condition_part);         input_output_manager<< "==> ";         input_output_manager.write_out_basket(consequence_part);         input_output_manager<< "("<<((double) union_support) / main_trie.is_included(condition_part, condition_part.begin())->counter << ", " << union_support << ')';      }      else if( consequence_part.size() > 1 ) assoc_rule_find( min_conf, condition_part, consequence_part, union_support, input_output_manager );      item_it = (consequence_part.insert( item )).first;      condition_part.erase( item );   }}void MSApriori_Trie::assoc_rule_assist( const double min_conf, const Trie* trie, set<itemtype>& consequence_part, Input_Output_Manager& input_output_manager) const{   if( consequence_part.size() > 1 )   {      set<itemtype> condition_part;      assoc_rule_find( min_conf, condition_part, consequence_part, trie->counter, input_output_manager );   }   for( vector<Edge>::const_iterator it_item = trie->edgevector.begin(); it_item != trie->edgevector.end(); it_item++)   {      consequence_part.insert( (*it_item).label );      assoc_rule_assist( min_conf, (*it_item).subtrie, consequence_part, input_output_manager);      consequence_part.erase( (*it_item).label );   }}void MSApriori_Trie::write_content_to_file_assist( Input_Output_Manager& input_output_manager, const Trie* trie, const itemtype distance_from_frequent, set<itemtype>& frequent_itemset ) const{   if( distance_from_frequent )   {      for( vector<Edge>::const_iterator it = trie->edgevector.begin(); it != trie->edgevector.end(); it++ )      if( (*it).subtrie->maxpath + 1 >= distance_from_frequent )      {         frequent_itemset.insert( (*it).label );         write_content_to_file_assist( input_output_manager, (*it).subtrie, distance_from_frequent -1, frequent_itemset );         frequent_itemset.erase( (*it).label );      }   }   else input_output_manager.write_out_basket_and_counter( frequent_itemset, trie->counter );}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?