msapriori_trie.cpp
来自「Aprior的C++实现算法」· C++ 代码 · 共 293 行
CPP
293 行
/*************************************************************************** MSApriori_Trie.cpp - description ------------------- begin : cs dec 26 2002 copyright : (C) 2002 by Ferenc Bodon email : bodon@mit.bme.hu ***************************************************************************/#include "MSApriori_Trie.hpp"#include <cstdlib>#include <algorithm>#include <iostream>/** \param counter_of_emptyset The support of the empty set, i.e. the number of transactions. \param mis_abs The mis values.*/MSApriori_Trie::MSApriori_Trie(const unsigned long counter_of_emptyset, const vector<double>& mis_abs ): main_trie( NULL, counter_of_emptyset),mis_abs(mis_abs){}/** \param counters It stores the support of the frequent items. counters[i] stores the suport of item i.*/void MSApriori_Trie::insert_frequent_items( const set< pair<itemtype, unsigned long> >& counters ){ for(set< pair<itemtype, unsigned long> >::iterator it = counters.begin(); it != counters.end(); it++) main_trie.add_empty_state( (*it).first, (*it).second ); if( !main_trie.edgevector.empty() ) main_trie.maxpath = 1;}/** \param frequent_size Size of the frequent itemsets that generate the candidates.*/void MSApriori_Trie::candidate_generation( const itemtype& frequent_size ){ if( frequent_size == 1 ) candidate_generation_two(); else if( main_trie.maxpath == frequent_size ) { set<itemtype> maybe_candidate; candidate_generation_assist( &main_trie, frequent_size-1, maybe_candidate ); }}/** \param basket The basket that hs to be analyzed. \param candidate_size the size of the candidates. \param counter_incr The number of time the basket occured. The counters of candidates that occure in the basket has to be incremented by counter_incr.*/void MSApriori_Trie::find_candidate( const vector<itemtype>& basket, const itemtype candidate_size, const unsigned long counter_incr){ if( candidate_size != 2 ) main_trie.find_candidate( basket, candidate_size, basket.begin(), counter_incr ); else find_candidate_two( basket, counter_incr ); }/** \param candidate_size The size of the candidate itemset.*/void MSApriori_Trie::delete_infrequent( const itemtype candidate_size ){ if( candidate_size != 2 ) { for( vector<Edge>::iterator itEdge = main_trie.edgevector.begin(); itEdge != main_trie.edgevector.end(); itEdge++ ) if( (*itEdge).subtrie->maxpath == candidate_size - 1 ) (*itEdge).subtrie->delete_infrequent( mis_abs[(*itEdge).label], candidate_size - 2 ); } else delete_infrequent_two( );}/** \param min_conf Confidence threshold. \param input_output_manager This will write out itemsets.*/void MSApriori_Trie::association( const double min_conf, Input_Output_Manager& input_output_manager ) const{ input_output_manager << "\nAssociation rules:\ncondition ==> consequence (confidence, occurrence)\n"; set<itemtype> consequence_part; assoc_rule_assist( min_conf, &main_trie, consequence_part, input_output_manager );}itemtype MSApriori_Trie::longest_path() const{ return main_trie.maxpath;}void MSApriori_Trie::write_content_to_file( Input_Output_Manager& input_output_manager ) const{ input_output_manager<< "Frequent 0-itemsets:\nitemset (occurrence)\n"; input_output_manager<< "{} ("<< main_trie.counter << ')' << endl; for( itemtype item_size = 1; item_size < main_trie.maxpath+1; item_size++ ) { input_output_manager<< "Frequent " << item_size << "-itemsets:\nitemset (occurrence)\n"; set<itemtype> frequent_itemset; write_content_to_file_assist( input_output_manager, &main_trie, item_size, frequent_itemset ); }}void MSApriori_Trie::show_content_preorder( ) const{ main_trie.show_content_preorder( );}MSApriori_Trie::~MSApriori_Trie(){}/** \param maybe_candidate The itemset that has to be checked. */bool MSApriori_Trie::is_all_subset_frequent( const set<itemtype>& maybe_candidate ) const{ if( maybe_candidate.size() < 3) return true; // because of the candidate generation method! else { set<itemtype> temp_itemset(maybe_candidate); set<itemtype>::const_iterator item_it = --(--maybe_candidate.end()); set<itemtype>::const_iterator it_lower_bound = ++maybe_candidate.begin(); while ( item_it != it_lower_bound ) { item_it--; temp_itemset.erase( *item_it ); if( !main_trie.is_included( temp_itemset, temp_itemset.begin() ) ) return false; temp_itemset.insert( *item_it ); } if( mis_abs[*it_lower_bound] == mis_abs[*maybe_candidate.begin()] ) { temp_itemset.erase( *maybe_candidate.begin() ); if( main_trie.is_included( temp_itemset, temp_itemset.begin() ) ) return true; else return false; } else return true; }}/** \param basket the given basket \param counter The number the processed basket occures in the transactional database */void MSApriori_Trie::find_candidate_two( const vector<itemtype>& basket, const unsigned long counter ){ if(basket.size() > 1) { vector<itemtype>::const_iterator it1_basket, it2_basket; for( it1_basket = basket.begin(); it1_basket != basket.end()-1; it1_basket++) for( it2_basket = it1_basket+1; it2_basket != basket.end(); it2_basket++) temp_counter_array[*it1_basket][*it2_basket-*it1_basket-1] += counter; }}void MSApriori_Trie::candidate_generation_two( ){ if( !mis_abs.empty() ) { main_trie.maxpath = 2; temp_counter_array.reserve( mis_abs.size() - 1 ); temp_counter_array.resize( mis_abs.size() - 1 ); for( vector<Edge>::size_type stateIndex = 0; stateIndex < mis_abs.size() - 1; stateIndex++ ) { temp_counter_array[stateIndex].reserve( mis_abs.size() - 1 - stateIndex ); temp_counter_array[stateIndex].resize( mis_abs.size() - 1 - stateIndex, 0); } }}void MSApriori_Trie::candidate_generation_assist( Trie* trie, const itemtype distance_from_generator, set<itemtype>& maybe_candidate){ vector<Edge>::iterator itEdge = trie->edgevector.begin(); if( distance_from_generator ) { for( ; itEdge != trie->edgevector.end(); itEdge++ ) if( (*itEdge).subtrie->maxpath + 1 >= distance_from_generator ) { maybe_candidate.insert((*itEdge).label); candidate_generation_assist((*itEdge).subtrie, distance_from_generator - 1, maybe_candidate ); maybe_candidate.erase((*itEdge).label); } } else { vector<Edge>::iterator itEdge2; Trie* toExtend; for( ; itEdge != trie->edgevector.end(); itEdge++ ) { maybe_candidate.insert((*itEdge).label); toExtend = (*itEdge).subtrie; for( itEdge2 = itEdge + 1; itEdge2 != trie->edgevector.end(); itEdge2++ ) { maybe_candidate.insert( (*itEdge2).label ); if( is_all_subset_frequent( maybe_candidate) ) toExtend->add_empty_state( (*itEdge2).label ); maybe_candidate.erase( (*itEdge2).label ); } if( !toExtend->edgevector.empty()) toExtend->maxpath = 1; (vector<Edge>(toExtend->edgevector)).swap(toExtend->edgevector); // we know that state toExtend will not have any more children! maybe_candidate.erase((*itEdge).label); } trie->max_path_set(); }}void MSApriori_Trie::delete_infrequent_two( ){ vector<Edge>::size_type stateIndex_1, stateIndex_2; vector<Edge>::iterator it; for( stateIndex_1 = 0; stateIndex_1 < mis_abs.size()-1; stateIndex_1++ ) { it = lower_bound(main_trie.edgevector.begin(), main_trie.edgevector.end(), stateIndex_1, Edge_point_less); for( stateIndex_2 = 0; stateIndex_2 < mis_abs.size() - 1 - stateIndex_1; stateIndex_2++ ) if( temp_counter_array[stateIndex_1][stateIndex_2] > mis_abs[stateIndex_1] ) (*it).subtrie->add_empty_state( stateIndex_1 + stateIndex_2 + 1, temp_counter_array[stateIndex_1][stateIndex_2] ); temp_counter_array[stateIndex_1].clear(); vector<unsigned long>().swap(temp_counter_array[stateIndex_1]); /// temp_counter_array[stateIndex_1] will never be used again! if( !(*it).subtrie->edgevector.empty() ) (*it).subtrie->maxpath = 1; } temp_counter_array.clear(); vector< vector<unsigned long> >().swap(temp_counter_array); /// temp_counter_array will never be used again! main_trie.max_path_set();}void MSApriori_Trie::assoc_rule_find( const double min_conf, set<itemtype>& condition_part, set<itemtype>& consequence_part, const unsigned long union_support, Input_Output_Manager& input_output_manager ) const{ itemtype item; for( set<itemtype>::const_iterator item_it = consequence_part.begin(); item_it != consequence_part.end(); item_it++) if( condition_part.empty() || *(--condition_part.end()) < *item_it) { item = *item_it; consequence_part.erase( item ); condition_part.insert( item ); if( union_support > main_trie.is_included(condition_part, condition_part.begin() )->counter * min_conf) { input_output_manager<< endl; input_output_manager.write_out_basket(condition_part); input_output_manager<< "==> "; input_output_manager.write_out_basket(consequence_part); input_output_manager<< "("<<((double) union_support) / main_trie.is_included(condition_part, condition_part.begin())->counter << ", " << union_support << ')'; } else if( consequence_part.size() > 1 ) assoc_rule_find( min_conf, condition_part, consequence_part, union_support, input_output_manager ); item_it = (consequence_part.insert( item )).first; condition_part.erase( item ); }}void MSApriori_Trie::assoc_rule_assist( const double min_conf, const Trie* trie, set<itemtype>& consequence_part, Input_Output_Manager& input_output_manager) const{ if( consequence_part.size() > 1 ) { set<itemtype> condition_part; assoc_rule_find( min_conf, condition_part, consequence_part, trie->counter, input_output_manager ); } for( vector<Edge>::const_iterator it_item = trie->edgevector.begin(); it_item != trie->edgevector.end(); it_item++) { consequence_part.insert( (*it_item).label ); assoc_rule_assist( min_conf, (*it_item).subtrie, consequence_part, input_output_manager); consequence_part.erase( (*it_item).label ); }}void MSApriori_Trie::write_content_to_file_assist( Input_Output_Manager& input_output_manager, const Trie* trie, const itemtype distance_from_frequent, set<itemtype>& frequent_itemset ) const{ if( distance_from_frequent ) { for( vector<Edge>::const_iterator it = trie->edgevector.begin(); it != trie->edgevector.end(); it++ ) if( (*it).subtrie->maxpath + 1 >= distance_from_frequent ) { frequent_itemset.insert( (*it).label ); write_content_to_file_assist( input_output_manager, (*it).subtrie, distance_from_frequent -1, frequent_itemset ); frequent_itemset.erase( (*it).label ); } } else input_output_manager.write_out_basket_and_counter( frequent_itemset, trie->counter );}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?