msapriori_trie.hpp

来自「Aprior的C++实现算法」· HPP 代码 · 共 117 行

HPP
117
字号
/***************************************************************************                          MSApriori_Trie.hpp  -  description                             -------------------    begin                : cs dec 26 2002    copyright            : (C) 2002 by Ferenc Bodon    email                : bodon@mit.bme.hu ***************************************************************************/#ifndef MSApriori_Trie_HPP#define MSApriori_Trie_HPP/**  *@author Ferenc Bodon  */#include "Trie.hpp"#include "Input_Output_Manager.hpp"#include <fstream>#include <set>#include <vector>#include <cstdio>using namespace std;/** Apriori_Trie (or prefix-tree) is a tree-based datastructure.   Apriori_Trie is a special tree designed to provide efficient methods for the apriori algorithm.   It mostly uses a regular trie except when there exist faster solution. For example for storing one and two itemset candidate where a simple vector and array gives better performance.   Apriori_Trie extends the functions provided by the regular trie with a candidate generation process.*/class MSApriori_Trie{public:   MSApriori_Trie( const unsigned long counter_of_emptyset, const vector<double>& mis_abs );      /// Insert the frequent items and their counters into the trie;   void insert_frequent_items( const set< pair<itemtype, unsigned long> >& counters );      /// Generates candidates.   void candidate_generation( const itemtype& frequent_size );   /// Increases the counter of those candidates that are contained by the given basket.   void find_candidate( const vector<itemtype>& basket, const itemtype candidate_size, const unsigned long counter=1 );   /// Deletes unfrequent itemsets.   void delete_infrequent( const itemtype candidate_size );   /// Generates association rules   void association( const double min_conf, Input_Output_Manager& input_output_manager ) const;   /// Returns the length of the longest path in the Apriori_Trie   itemtype longest_path() const;   /// Writes the content (frequent itemsets) to the file   void write_content_to_file( Input_Output_Manager& input_output_manager ) const;   /// This procedure shows the Apriori_Trie in a preorde   void show_content_preorder( ) const;   ~MSApriori_Trie();protected:   /// Decides if all subset of an itemset is contained in the Apriori_Trie   bool is_all_subset_frequent( const set<itemtype>& maybe_candidate ) const;   /// Generates candidate of size two   void candidate_generation_two( );   /// Generates candidate of size more than two   void candidate_generation_assist( Trie* Trie, const itemtype distance_from_generator,                                             set<itemtype>& maybe_candidate );   /// Increases the counter for those itempairs that are in the given basket.   void find_candidate_two( const vector<itemtype>& basket, const unsigned long counter=1 );   /// Deletes the Tries that represent infrequent itemsets of size 2.   void delete_infrequent_two( );   void assoc_rule_find( const double min_conf, set<itemtype>& condition_part, set<itemtype>& consequence_part, const unsigned long union_support, Input_Output_Manager& input_output_manager ) const;   void assoc_rule_assist( const double min_conf, const Trie* Trie, set<itemtype>& consequence_part, Input_Output_Manager& input_output_manager ) const;   //! Writes out the content of the Apriori_Trie (frequent itemset and counters).   void write_content_to_file_assist( Input_Output_Manager& input_output_manager, const Trie* actual_state, const itemtype distance_from_frequent, set<itemtype>& frequent_itemset ) const;private:   // No private methodspublic:   // No public membersprotected:/// Trie to store the candidates and the frequent itemsets      Trie main_trie;/**  temp_counter_array stores the occurences of the itempairs  *  * We can use a simple array to determine the support of itemset of size two. This requires  * less memory than the trie-based supportcount.  * temp_counter_array[i][j-i] stores the occurence of the itempair (i,j).  */      vector< vector<unsigned long> > temp_counter_array;/** The mis values. mis_abs[i] strores the mis value of item i. * * mis_abs vector is sorted, because eacs item gets a new code so that the item withe the smallest  * mis value has the smallest code. This is for the sake of efficiency, and candidate generation simplicity. */      vector<double> mis_abs;};#endif

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?