⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 apriori.cpp

📁 Apriori算法的实现
💻 CPP
字号:
// Apriori.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"

#include <fstream>
#include <iostream>
#include <string>
#include <sstream>
#include <vector>
#include <iterator>
#include <stdlib.h>
#include <cassert>
using namespace std;


class ListNode  
{	
public: 
	string itemset;
	int count;
	ListNode ( string iset, int c = 0 )
	{
	    itemset = iset;
		count = c;
	}

};

class List
{
public:
	vector<ListNode> candidate_set;
    vector<ListNode>::iterator iter;
}; 

int IsSubset(const string & str1 , const string & str2 );// judge wether str1 is the substring of str2 

int IsJoinable(const string & str1, const string &str2,  string & join);
// same length and first k-1 elements

int stringtovector(string & str, vector<int>& vint  );

class Apr
{
public:
	List* set_list ;
	int current_itemnum;
	vector< vector<int> > temporary_array;
	Apr(int n = 3){
		set_list = new List [ n + 1];
	}
    ~Apr(){
      delete[] set_list;
	}
	void setcurrent (int a)
	{
		current_itemnum = a;
	}
	void candidate_generate();
	void prune();
	int  isfrequent( const ListNode & );
	void prepare_array();
	void clear_array();
};

void Apr ::candidate_generate()
{
	assert(current_itemnum > 1 );
	int k =current_itemnum - 1;
    

	vector<ListNode>::iterator iter1,iter2;
    
	for(iter1 = set_list[k].candidate_set.begin(); iter1 != set_list[k].candidate_set.end(); iter1++ )
	{
		iter2 = iter1 + 1;
		while ( iter2 != set_list[k].candidate_set.end() )
		{
		  string afterjoin;
          if (IsJoinable( (*iter1).itemset, (*iter2).itemset, afterjoin) == 1)
		  {
            set_list[current_itemnum].candidate_set.push_back( ListNode(afterjoin) );  // push back a new item;

		  }
		   iter2 ++; 
		}
	}
}

void Apr::prune()
{
   // for each k-itemset (k > 2), find if it contains a k-1 itemset that is not frequent;

	// prepare the temporary  vector
	if(current_itemnum == 2) return;
    prepare_array();
    
    vector<ListNode>::iterator iter ;

	for ( iter = (set_list[current_itemnum].candidate_set).begin(); iter != set_list[current_itemnum].candidate_set.end();)
	{
		if ( isfrequent( (*iter)) == 0 )
		{
             set_list[current_itemnum].candidate_set.erase(iter);
		}
		else
		{
			iter ++;
		}
	}
	clear_array();
	
     //clear the temporary vector
}

void Apr::prepare_array()
{
	int k = current_itemnum - 1;
	
	int array_size = set_list[k].candidate_set.size();
    temporary_array.reserve(array_size);
	vector<ListNode>::iterator iter;
    
	for( iter = set_list[k].candidate_set.begin(); iter != set_list[k].candidate_set.end(); iter++ )
	{
		vector<int> temp;
        if( stringtovector( (*iter).itemset, temp) == 0)
		{
			cout << "Error when string to vector ";
		}
		temporary_array.push_back(temp);
	}

}

void Apr::clear_array()
{
	int array_size = temporary_array.size();
	int i;

	for( i = 0 ; i < array_size; i++)
	{
		temporary_array[i].clear();
	}
	temporary_array.clear();
}


int Apr::isfrequent(const ListNode & testcurrent )
{
	string str = testcurrent.itemset;
    vector<int> originalstr;
//	vector<int>::iterator iter;
 //   vector<ListNode>::iterator iter ;
  //  int k = current_itemnum - 1;
   

    assert( stringtovector ( str,originalstr ) == 1);
///	vector<int> temp0;
//	vector<int> temp;

	int l ,i;
	int array_size = temporary_array.size();
	int flag = 0;

	for( l = 0; l < current_itemnum - 1; l++ )
	{
		vector<int> temp0;
		
		temp0 = originalstr;
        vector<int> ::iterator iter = temp0.begin();

		temp0.erase( iter + l );
		// for each subset ,if there is not any match in the frequent k-1 itemset, then return false
	    flag = 0;
		for (i = 0; i < array_size; i ++ )
		{
			if (temporary_array[i] == temp0 )
			{
				flag = 1;
				break;
			}
		}
		if ( flag == 0 ) return 0;
	}
	return 1;
	
    
}
const int itemnum = 1000;
const int n = 4;

int main(int argc, char* argv[])
{
    ifstream in ;
    unsigned int frequency[itemnum + 1];
	unsigned int i;
	unsigned int item_id;
    Apr apr(n) ;

	string transaction ;
	// read the dataset of transaction
	in.open("dataset.txt",ios::in);
    
	for ( i = 1; i <= itemnum; i++)
	{
		frequency[i] = 0;
	}
	
    string str,strtemp;

	// get the frequency of each item

	while (  getline (in,str) )
	{   
		istringstream  sstr ;
		sstr.str(str);
	    while ( sstr >> item_id ) 
		{  
			cout << item_id << " ";
		    frequency[item_id] ++;
		}
		cout << endl;
	}
//	in.close();
//  find the set of 1-item frequent ;
	int support[n + 1] ;  // support number for each itemset number 

//	string temp;
// ****************************************************************
//***************** assign the value for support array*************
    for (i = 0 ; i <= n ; i++ )
	{
		support[i] = 2;
	}
    
	support[1] = 2; 
//**********************************************************************
	for(i = 1; i <= itemnum; i++)
	{
		if (frequency[i] >= support[1] )
		{
			string temp ;
			char c2[2] ;
			c2[0] = '0' + i;
			c2[1] = '\0';

            temp = c2;
		//	temp << i
		
			apr.set_list[1].candidate_set.push_back(ListNode(temp,frequency[i]));
		}
	}

	vector<ListNode> :: const_iterator citer ;
	for(citer = apr.set_list[1].candidate_set.begin(); citer != apr.set_list[1].candidate_set.end();citer++ )
    {
		cout << (*citer).itemset << ":" << (*citer).count << endl;
    }
	
 /*   string s1 = "1";
    string s2 = "4 5 ";
    string r;

	if(IsJoinable(s1,s2,r) == 0)
	{
		cout << " cannot be joined" << endl;
	}
	else
	{
		cout << endl << r.c_str();
	}
    */

	//***************************************************
    // set current item number
//	
//*******************************************
  // generate frequent itemset in loop
//********************************************
	int current_num = 2;

    while (current_num <= n){
	
	
	apr.setcurrent(current_num);
	// generate candidate set
	apr.candidate_generate();
    cout << "after generate" << current_num <<"-itemset"<< endl;
	for(citer = apr.set_list[current_num].candidate_set.begin(); citer != apr.set_list[current_num].candidate_set.end();citer++ )
    {
		cout << (*citer).itemset << ":" << (*citer).count << endl;
    } 
    
	
/*	for(citer = apr.set_list[2].candidate_set.begin(); citer != apr.set_list[2].candidate_set.end();citer++ )
    {
		cout << (*citer).itemset << ":" << (*citer).count << endl;
    } */
    cout << endl << endl ;
	// prune and generate the final candidate set
	apr.prune();
    
	cout << "after prune" << current_num <<"-itemset"<< endl;
	for(citer = apr.set_list[current_num].candidate_set.begin(); citer != apr.set_list[current_num].candidate_set.end();citer++ )
    {
		cout << (*citer).itemset << ":" << (*citer).count << endl;
    } 
	
  	//scan the database and delete the candidate that is not frequent in CANDIDATE SET WHICH IS THE DATA MEMBER IN THE APR
 
  //  in. open("dataset.txt",ios::in);
	in.clear();
	
    in.seekg(0,ios::beg);
	
	while ( getline(in,str) )
	{
		vector<ListNode>::iterator iter;
		for ( iter= apr.set_list[apr.current_itemnum].candidate_set.begin();
		     iter != apr.set_list[apr.current_itemnum].candidate_set.end(); iter ++)
		{
             if ( IsSubset((*iter).itemset ,str) )
			 {
				 (*iter).count ++;
			 }
 		}
		
	}
	cout << endl << "after scan the database:" << endl;
	for(citer = apr.set_list[current_num].candidate_set.begin(); citer != apr.set_list[current_num].candidate_set.end();citer++ )
    {
		cout << (*citer).itemset << ":" << (*citer).count << endl;
    } 
	cout << endl;
	vector<ListNode>::iterator iter ;
	// delete the elements with the support number 
	for(iter = apr.set_list[apr.current_itemnum].candidate_set.begin(); 
	   iter != apr.set_list[apr.current_itemnum].candidate_set.end();)
    {
		if((*iter).count < support[apr.current_itemnum])
		{
		   ////////////////////////////////////////
			//cout<< (*iter).itemset.c_str();
			apr.set_list[apr.current_itemnum].candidate_set.erase(iter);
		}
		else
		{
			iter++;
		}

	//	if(iter == apr.set_list[apr.current_itemnum].candidate_set.end())
	//		break;
    } 
    cout << "the final frequent " << current_num <<" itemset" << endl;
    for(citer = apr.set_list[current_num].candidate_set.begin(); citer != apr.set_list[current_num].candidate_set.end();citer++ )
    {
		cout << (*citer).itemset << ":" << (*citer).count << endl;
    } 
	
	current_num ++;
	}
	//*****************************************
	//*********the end of the loop************
	//*****************************************
	in.close();
	return 0;
} // end of main function





// see if str1 is the substring of string str2
// and presume that all the elements in the string is in increasing order
int IsSubset( const string & str1,const string & str2) 
{
	static const int npos = -1;

	if (str1.length() == 0)
	{
		return 1; // as the empty set is the subset of any set 
	}
	if(str2. length() == 0 && str1.length() != 0 )
	{
		return 0;
	}

	vector<int> v1, v2;
	int a;
// push the string into the vector v1 one integer by another	
	int start  = str1.find_first_not_of(' ');
	if (start == npos)
	{
		return 0;
	}

	int end  = str1. find_first_of(' ', start);
    

    while ( end != npos && start != npos )
	{
		a = atoi( ( str1.substr(start, end - start) ).c_str() );
		v1.push_back(a);
		start  = str1.find_first_not_of(' ', end);
		end = str1. find_first_of(' ', start);
	}
    if (end == npos && start != npos )  // insert the last item that has been detected
	{
        a = atoi( ( str1.substr(start, str1.length() - start) ).c_str() );
	    v1.push_back(a);
	}
	
// push the string into the vector v2 one integer by another
	
	 start  = str2.find_first_not_of(' ');
	if (start == npos)
	{
		return 0;
	}

	end  = str2. find_first_of(' ', start);
    
    while ( end != npos && start != npos )
	{
		a = atoi( ( str2.substr(start, end - start) ).c_str() );
		v2.push_back(a);
		start  = str2.find_first_not_of(' ', end);
		end = str2. find_first_of(' ', start);
	}
    if (end == npos && start != npos )
	{
        a = atoi( ( str2.substr(start, str1.length() - start) ).c_str() );
		v2.push_back(a);
	}


	int k1 = v1.size();
	int k2 = v2.size();

	int i1 = 0;
	int i2 = 0;

	while (i1 < k1 && i2 < k2)
	{
		if (v1[i1] == v2[i2])
		{
			i1++;
		}

		i2++;
	}

	if (i1 < k1)
	{
		return 0;
	}
	else
	{
		return 1;
	}

	
/*	int start1  = str1.find_first_not_of(' ');
	int end1 = str1. find_first_of(' ', start1);
	
    int start2  = str2.find_first_not_of(' ');
	int end2 = str2. find_first_of(' ', start2);
    
	int a ,b;

	a = atoi( ( str1.substr(start1, end1 - start1) ).c_str() );
	b = atoi( ( str2.substr(start2, end2 - start2) ).c_str() );
	
	while(start1 != npos  && start2 != npos)
	{
		
		while ( a != b) 
		{
			    int start2  = str2.find_first_not_of(' ', end2);
	            int end2 = str2. find_first_of(' ', start2);
				b = atoi( ( str2.substr(start2, end2 - start2) ).c_str() );
		}

		int start1  = str2.find_first_not_of(' ', end1);
	    int end1 = str2. find_first_of(' ', start1);
	    a = atoi( ( str2.substr(start1, end1 - start1) ).c_str() );
     
		int start2  = str2.find_first_not_of(' ', end2);
	    int end2 = str2. find_first_of(' ', start2);
	    b = atoi( ( str2.substr(start2, end2 - start2) ).c_str() );

	}

	if ( start1 != npos && start2 == npos )
		return 0;
	else return 1; */


}

int IsJoinable(const string & str1, const string &str2,  string & join)
// same length and first k-1 elements
{ 
	// change the two string into two int vector;

	static const int npos = -1;

	vector<int> v1, v2;
	int a;
// push the string into the vector v1 one integer by another	
	int start  = str1.find_first_not_of(' ');
	if (start == npos)
	{
		return 0;
	}

	int end  = str1. find_first_of(' ', start);
    

    while ( end != npos && start != npos )
	{
		a = atoi( ( str1.substr(start, end - start) ).c_str() );
		v1.push_back(a);
		start  = str1.find_first_not_of(' ', end);
		end = str1. find_first_of(' ', start);
	}
    if (end == npos && start != npos )  // insert the last item that has been detected
	{
        a = atoi( ( str1.substr(start, str1.length() - start) ).c_str() );
	    v1.push_back(a);
	}
	
// push the string into the vector v2 one integer by another
	
	 start  = str2.find_first_not_of(' ');
	if (start == npos)
	{
		return 0;
	}

	end  = str2. find_first_of(' ', start);
    
    while ( end != npos && start != npos )
	{
		a = atoi( ( str2.substr(start, end - start) ).c_str() );
		v2.push_back(a);
		start  = str2.find_first_not_of(' ', end);
		end = str2. find_first_of(' ', start);
	}
    if (end == npos && start != npos )
	{
        a = atoi( ( str2.substr(start, str1.length() - start) ).c_str() );
		v2.push_back(a);
	}
	

// judge if the two vector can be joined
	int k = v1.size();
    int i = 0;
	
	if (v1.size() != v2.size() )
	{
		return 0;
	}
//	vector<int> iter1,iter2;
    
	if ( k != 1 )
	{
		while ( i < k-1 )
		{
            if (v1[i] != v2[i])
			{
				return 0;
			}
			i++;
		} 
	}

    if(v1[i] < v2[i] )
	{
		v1.push_back(v2[i]);
	}
	else 
	{
		v1.push_back(v1[i]);
		v1[i] = v2[i]; 
	}
     
	for( i = 0; i < v1.size(); i++)
	{
		char ptr[80];
        
		_itoa(v1[i],ptr,10);	
		join += ptr;
		join += " ";
	}
   
    join = join.erase(join.size() -1 ,1);

	return 1;
}

int stringtovector (string & str, vector<int> & vint )
{
	static const int npos = -1;

	int a;
// push the string into the vector v1 one integer by another	
	int start  = str.find_first_not_of(' ');
	if (start == npos)
	{
		return 0;
	}

	int end  = str. find_first_of(' ', start);
    

    while ( end != npos && start != npos )
	{
		a = atoi( ( str.substr(start, end - start) ).c_str() );
		vint.push_back(a);
		start  = str.find_first_not_of(' ', end);
		end = str. find_first_of(' ', start);
	}
    if (end == npos && start != npos )  // insert the last item that has been detected
	{
        a = atoi( ( str.substr(start, str.length() - start) ).c_str() );
	    vint.push_back(a);
	}
	return 1;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -