⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 apriori_trie.cpp

📁 数据挖掘的aprior算法
💻 CPP
字号:
 /***************************************************************************
                           Apriori_Trie.cpp  -  description
                              -------------------
     begin                : cs dec 26 2002
     copyright            : (C) 2002 by Ferenc Bodon
     email                : bodon@cs.bme.hu
  ***************************************************************************/
 
 
 #include "Apriori_Trie.hpp"
 #include <cstdlib>
 #include <algorithm>
 #include <iostream>
 
 
 void Apriori_Trie::insert_frequent_items(
    const vector<countertype>& counters )
 {
    for(vector<countertype>::size_type item_index = 0; 
        item_index < counters.size(); item_index++)
       main_trie.add_empty_state( item_index, counters[item_index] );
 }
 
 void Apriori_Trie::candidate_generation( 
    const itemtype& frequent_size,
    Input_Output_Manager& input_output_manager )
 {
    if( frequent_size == 1 ) candidate_generation_two();
    else
    {
       set<itemtype> maybe_candidate;
       candidate_generation_assist( &main_trie,
                                    maybe_candidate, 
                                    input_output_manager );
    }
 }
 
 void Apriori_Trie::find_candidate( const vector<itemtype>& basket, 
                                    const itemtype candidate_size, 
                                    const countertype counter_incr)
 {
    if( candidate_size != 2 ) 
       if ( candidate_size<basket.size()+1 )
          main_trie.find_candidate( basket.end()-candidate_size+1, 
                                    basket.begin(), counter_incr );
       else;
    else find_candidate_two( basket, counter_incr );    
 }
 
 void Apriori_Trie::delete_infrequent( const double min_occurrence, 
                                       const itemtype candidate_size )
 {
    if( candidate_size != 2 ) 
       main_trie.delete_infrequent( min_occurrence );
    else delete_infrequent_two( min_occurrence );
 }
 
 
 bool Apriori_Trie::is_all_subset_frequent( 
    const set<itemtype>& maybe_candidate ) const
 {
    if( maybe_candidate.size() < 3) return true; // because of the 
                                                 // candidate generation method!
    else
    {
       set<itemtype>                 temp_itemset(maybe_candidate);
       set<itemtype>::const_iterator item_it = --(--maybe_candidate.end());
       do
       {
          item_it--;
          temp_itemset.erase( *item_it );
          if( !main_trie.is_included( temp_itemset, temp_itemset.begin() ) ) 
             return false;
          temp_itemset.insert( *item_it );
       }
       while ( item_it != maybe_candidate.begin() );
       return true;
    }
 }
 
 void Apriori_Trie::candidate_generation_two()
 {
    if( !main_trie.edgevector.empty() )
    {
       temp_counter_array.reserve(main_trie.edgevector.size()-1);
       temp_counter_array.resize(main_trie.edgevector.size()-1);
       for( vector<Edge>::size_type stateIndex = 0; 
            stateIndex < main_trie.edgevector.size()-1; stateIndex++ )
       {
          temp_counter_array[stateIndex].reserve(
             main_trie.edgevector.size()-1-stateIndex );
          temp_counter_array[stateIndex].resize(
             main_trie.edgevector.size()-1-stateIndex, 0);
       }
    }
 }
 
 void Apriori_Trie::candidate_generation_assist( 
    Trie* trie, 
    set<itemtype>& maybe_candidate, 
    Input_Output_Manager& input_output_manager)
 {
    vector<Edge>::iterator itEdge = trie->edgevector.begin();
    if( (*itEdge).subtrie->edgevector.empty() )
    {
          vector<Edge>::iterator itEdge2;
          Trie* toExtend;
          while( itEdge != trie->edgevector.end() )
          {
             maybe_candidate.insert((*itEdge).label);
             toExtend = (*itEdge).subtrie;
             input_output_manager.write_out_basket_and_counter( 
                maybe_candidate, toExtend->counter );
             for( itEdge2 = itEdge + 1; itEdge2 != trie->edgevector.end(); 
                  itEdge2++ )
             {
                maybe_candidate.insert( (*itEdge2).label );
                if( is_all_subset_frequent( maybe_candidate) )
                   toExtend->add_empty_state( (*itEdge2).label );
                maybe_candidate.erase( (*itEdge2).label );
             }
                 // we know that state toExtend 
                 // will not have any more children!
             (vector<Edge>(toExtend->edgevector)).swap(toExtend->edgevector);  
             
             maybe_candidate.erase((*itEdge).label);
             if( toExtend->edgevector.empty() ) 
             {
                delete (*itEdge).subtrie;
                itEdge = trie->edgevector.erase(itEdge);
             }
             else itEdge++;
          }
    }
    else
    {
       while( itEdge != trie->edgevector.end() )
       {
 
          maybe_candidate.insert((*itEdge).label);
          candidate_generation_assist((*itEdge).subtrie, 
                                      maybe_candidate, input_output_manager );
          maybe_candidate.erase((*itEdge).label);
          if((*itEdge).subtrie->edgevector.empty())
          {
             delete (*itEdge).subtrie;
             itEdge = trie->edgevector.erase(itEdge);
          }
          else itEdge++;
       }
    }
 }
 
 void Apriori_Trie::find_candidate_two( const vector<itemtype>& basket, 
                                        const countertype counter )
 {
    if( basket.size() > 1)
    {
       vector<itemtype>::const_iterator it1_basket,
                                        it2_basket;
 
       for( it1_basket = basket.begin(); it1_basket != basket.end()-1; 
            it1_basket++)
          for( it2_basket = it1_basket+1; it2_basket != basket.end(); 
               it2_basket++)
             temp_counter_array[*it1_basket][*it2_basket-*it1_basket-1] 
                += counter;
    }
 }
 
 void Apriori_Trie::delete_infrequent_two( const double min_occurrence )
 {
    vector<Edge>::size_type stateIndex_1,
                             stateIndex_2;
    for( stateIndex_1 = 0; stateIndex_1 < main_trie.edgevector.size()-1; 
         stateIndex_1++ )
    {
       for( stateIndex_2 = 0; 
            stateIndex_2 < main_trie.edgevector.size() - 1 - stateIndex_1; 
            stateIndex_2++ )
       {
         if( temp_counter_array[stateIndex_1][stateIndex_2] > min_occurrence )
            main_trie.edgevector[stateIndex_1].subtrie->add_empty_state( 
               stateIndex_1 + stateIndex_2 + 1, 
               temp_counter_array[stateIndex_1][stateIndex_2] );
       }
       temp_counter_array[stateIndex_1].clear();
       vector<countertype>().swap(temp_counter_array[stateIndex_1]);   
    }
    temp_counter_array.clear();
 
    vector< vector<countertype> >().swap(temp_counter_array);   
    vector<Edge>::iterator it= main_trie.edgevector.begin();
    while( it!=main_trie.edgevector.end() )
       if((*it).subtrie->edgevector.empty())
       {
          delete (*it).subtrie;
          it = main_trie.edgevector.erase(it);
       }
       else it++;
 }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -