⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 apriori.cpp

📁 经典的算法实例
💻 CPP
字号:
// Apriori.cpp : 定义控制台应用程序的入口点。
//

#include "stdafx.h"
#include "Apriori.h"
#include "conio.h"
#ifdef _DEBUG
#define new DEBUG_NEW
#endif


// 唯一的应用程序对象

CWinApp theApp;

using namespace std;

int _tmain(int argc, TCHAR* argv[], TCHAR* envp[])
{
	int nRetCode = 0;

	// 初始化 MFC 并在失败时显示错误
	if (!AfxWinInit(::GetModuleHandle(NULL), NULL, ::GetCommandLine(), 0))
	{
		// TODO: 更改错误代码以符合您的需要
		_tprintf(_T("致命错误: MFC 初始化失败\n"));
		nRetCode = 1;
	}
	else
	{
		// TODO: 在此处为应用程序的行为编写代码。
		DBLink *D;
		ItemSet *IS,*preIS;
		SearchTree *STk;
		char filename[30];
		double min_sup = 0.02;
		int Record_Count = 0,min_count;
		DWORD nStartTime,nEndTime,SumTime = 0;
		int k = 1;
		cout<<"输入数据文件名:";
		cin>>filename;
		cout<<"输入最小支持阈值min_sup:";
		cin>>min_sup;
		CString exstr1;
		exstr1 = "数据来源文件:";
		exstr1 += filename;
		exstr1 += "\r\n最小支持阈值:";
		exstr1.AppendFormat("%f\r\n\r\n",min_sup);
		nStartTime = GetTickCount();
		D = ReadDataFromText(filename,&Record_Count);
		min_count = int(Record_Count*min_sup);
		STk = find_frequent_1_itemset(D,min_count);
		nEndTime = GetTickCount();
		SumTime += nEndTime - nStartTime;
		IS = GetDataLink(STk);
		//要删除STk
		STk = DeleteTree(STk);
		PrintResult(IS,k,exstr1);
		while(IS != NULL)
		{
			k++;
			nStartTime = GetTickCount();
			preIS = apriori_gen(IS,k);
			if(preIS == NULL) break;
			IS = DeleteLink(IS);
			IS = find_frequent_k_itemset(D,preIS,k,min_count);    //只是把preIS中的count字段作了更新并剪枝
			nEndTime = GetTickCount();
			SumTime += nEndTime - nStartTime;
			PrintResult(IS,k,exstr1);
		}
		IS = DeleteLink(IS);
		cout<<endl<<"Apriori算法运行完毕,为您找到最高"<<k-1<<"-项集。"<<endl;
		cout<<"算法总耗时:"<<SumTime<<"ms。"<<"按任意键结束!!!"<<endl;
		getch();
	}

	return nRetCode;
}


SearchTree* find_frequent_1_itemset(DBLink* D,int min_count)
{
	SearchTree* ST_1 = NULL;
	DBLink* DBL = D;
	while(DBL != NULL)     //遍历源数据表,给每个关键字计数
	{
		int nStart = 0,nEnd;
		CString str;
		do{
			nEnd = DBL->datastr.Find(';',nStart);
			if(nEnd == -1)
				str = DBL->datastr.Mid(nStart);
			else{
				str = DBL->datastr.Mid(nStart,nEnd-nStart);
				nStart = nEnd+1;
			}
			ST_1 = SearchAndAddInTree(ST_1,str,1);
		}while(nEnd != -1);
		DBL = DBL->next;
	}
	SearchAndDelInTree(ST_1,min_count);     //剪枝
	return ST_1;
}

ItemSet* find_frequent_k_itemset(DBLink* D,ItemSet* IS,int k,int min_count)
{
	DBLink* DBL = D;
	int step = 0;
	CString tmp;
	while(DBL != NULL)     //遍历源数据表,搜索k-项集,增加计数
	{
		step++;
		IS = SearchAndAddIn_kLink(IS,DBL->datastr);
		DBL = DBL->next;
		if((step % 10) == 0){
			tmp.Format("%d\r\n",step);
			TRACE(tmp.GetBuffer());
		}
	}
	return Del_k_ItemSet(IS,min_count);     //剪枝
}

ItemSet* apriori_gen(ItemSetLink* IS,int k)
{
	ItemSetLink *preIS1 = IS,*preIS2;
	ItemSetLink *ISk,*ISkhead = NULL;
	while(preIS1)
	{
		preIS2 = preIS1->next;
		while(preIS2)
		{
			CString tmp;    //临时存放候选节点
			tmp.Empty();
			if(k == 2)
				tmp = preIS1->datastr+ ';' + preIS2->datastr;
			else{    //注:各关键字集都是已经按字母序排好序的
				int index1,index2;
				CString tmp1,tmp2;
				index1 = preIS1->datastr.ReverseFind(';');
				index2 = preIS2->datastr.ReverseFind(';');
				tmp1 = preIS1->datastr.Left(index1);
				tmp2 = preIS2->datastr.Left(index2);
				if(tmp1 == tmp2)
					tmp = tmp1 + preIS1->datastr.Mid(index1) + preIS2->datastr.Mid(index2);   //preIS1的子串排序在preIS2的子串前
			}
			if(!tmp.IsEmpty()){
				if(!has_infrequent_subset(tmp,IS))    //无非频繁子集   tmp作为一个新结点的值插入候选链表中
					if(ISkhead == NULL){
						ISkhead = CreatePreItemSet(ISkhead,tmp,k);
						ISk = ISkhead;
					}
					else{
						ISk->next = CreatePreItemSet(ISk,tmp,k);    //直接从上一个节点开始插入新节点,减少无谓的链表遍历
						ISk = ISk->next;
					}
			}
			preIS2 = preIS2->next;
		}
		preIS1 = preIS1->next;
	}

	return ISkhead;
}

ItemSetNode* GetDataLink(SearchTree* ST)
{
	if(ST == NULL) return NULL;
	ItemSetNode* N;
	N = GetDataLink(ST->left);
	if(N == NULL){
		N = new ItemSetNode;
		N->datastr = ST->datastr;
		N->count = ST->count;
		N->next = GetDataLink(ST->right);
	}
	else{
		ItemSetNode* tmpN = N;
		while(tmpN->next != NULL)    tmpN = tmpN->next;
		tmpN->next = new ItemSetNode;
		tmpN = tmpN->next;
		tmpN->datastr = ST->datastr;
		tmpN->count = ST->count;
		tmpN->next = GetDataLink(ST->right);
	}
	return N;    //返回的是链表头结点指针
}

BOOL has_infrequent_subset(CString set,ItemSet* IS)    //在IS中搜索验证set是否包含非频繁子集
{
	int nStart = 0,nEnd;
	ItemSet* IShead = IS;
	CString subset;
	do{
		//把截出来的单词去掉,得到k-1子集
		nEnd = set.Find(';',nStart);
		if(nStart == 0 && nEnd != -1)
			subset = set.Mid(nEnd+1);
		else if(nStart == 0 && nEnd == -1)
			subset = set;
		else if(nStart != 0 && nEnd == -1)
			subset = set.Left(nStart-1);
		else
			subset = set.Left(nStart)+set.Mid(nEnd+1);
		nStart = nEnd+1;
		//搜索该子集是否在k-1频繁集中
		IS = IShead;
		while(IS)
		{
			if(subset.CompareNoCase(IS->datastr) == 0)
				break;
			IS = IS->next;
		}
		if(IS == NULL)
			return TRUE;    //有非频繁子集
	}while(nEnd != -1);
	return FALSE;    //无非频繁子集
}

BOOL FindInTree(SearchTree* ST,CString str)
{
	if(ST == NULL) return FALSE;
	if(str.CompareNoCase(ST->datastr.GetBuffer()) < 0)
		return FindInTree(ST->left,str);
	else if(str.CompareNoCase(ST->datastr.GetBuffer()) > 0)
		return FindInTree(ST->right,str);
	else   //找到了
		return TRUE;
}

SearchTree* SearchAndAddInTree(SearchTree* ST,CString str,int k)
{
	if(ST == NULL){
		ST = new SearchTree;
		if(ST == NULL)
			cout<<"Out of space!!!"<<endl;
		else{
			ST->datastr = str;
			if(k == 1)      //1-项集二叉树边构建边计数
				ST->count = 1;
			else //k-项集二叉树先构建后计数
				ST->count = 0;
			ST->left = ST->right = NULL;
		}
	}
	else if(str.CompareNoCase(ST->datastr.GetBuffer()) < 0)
		ST->left = SearchAndAddInTree(ST->left,str,k);
	else if(str.CompareNoCase(ST->datastr.GetBuffer()) > 0)
		ST->right = SearchAndAddInTree(ST->right,str,k);
	else    //找到
		ST->count++;

	return ST;
}

ItemSetLink* SearchAndAddIn_kLink(ItemSetLink* IS,CString str)  //更新count字段  
{
	CString sw;
	ItemSetLink* IShead = IS;
	int nStart = 0,nEnd;
	BOOL bFound;
	CStringList SL;
	CString key;
	do{
		nEnd = str.Find(';',nStart);
		if(nEnd == -1)
			key = str.Mid(nStart);
		else
			key = str.Mid(nStart,nEnd - nStart);
		SL.AddTail(key);
		nStart = nEnd + 1;
	}while(nEnd != -1);

	while(IS)
	{
		bFound = TRUE;
		nStart = 0;
		do{
			nEnd = IS->datastr.Find(';',nStart);
			if(nEnd == -1)
				sw = IS->datastr.Mid(nStart);
			else
				sw = IS->datastr.Mid(nStart,nEnd - nStart);
			int index = 0;
			while(index < SL.GetCount())
			{
				CString tmp = SL.GetAt(SL.FindIndex(index));
				if(tmp.CompareNoCase(sw) == 0)
					break;
				index++;
			}
			if(index >= SL.GetCount()){
				bFound = FALSE;
				break;
			}
			nStart = nEnd + 1;
		}while(nEnd != -1);
		if(bFound == TRUE)
			IS->count++;
		IS = IS->next;
	}
	SL.RemoveAll();
	return IShead;
}

//BOOL CompareKeyInStr(CString strDB,CString strN)  //在strDB的关键字组中寻找strN的关键字是否都出现
//{
//	int nStart = 0,nEnd;
//	CString key;
//	do{
//		nEnd = strDB.Find(';',nStart);
//		if(nEnd == -1)
//			key = strDB.Mid(nStart);
//		else
//			key = strDB.Mid(nStart,nEnd - nStart);
//		if(key.CompareNoCase(strN) == 0)    //找到了
//			return TRUE;
//		nStart = nEnd + 1;
//	}while(nEnd != -1);
//	return FALSE;
	//int nStart_DB = 0,nStart_N = 0;
	//int nEnd_DB,nEnd_N;
	//CString key_db,key_n;
	////都读入第一个key
 //   nEnd_DB = strDB.Find(';',nStart_DB);
	//nEnd_N = strN.Find(';',nStart_N);
	//if(nEnd_DB == -1)
	//	key_db = strDB.Mid(nStart_DB);
	//else
	//	key_db = strDB.Mid(nStart_DB,nEnd_DB - nStart_DB);
	//if(nEnd_N == -1)
	//	key_n = strN.Mid(nStart_N);
	//else
	//	key_n = strN.Mid(nStart_N,nEnd_N - nStart_N);
	//nStart_DB = nEnd_DB + 1;
	//nStart_N = nEnd_N + 1;

	//do{
	//	if(key_db.CompareNoCase(key_n) == 0)    //找到一个关键字,则一起往后移
	//	{
	//		if(nEnd_DB != -1 && nEnd_N != -1)
	//		{
	//			nEnd_DB = strDB.Find(';',nStart_DB);
	//			nEnd_N = strN.Find(';',nStart_N);
	//			if(nEnd_DB == -1)
	//				key_db = strDB.Mid(nStart_DB);
	//			else
	//				key_db = strDB.Mid(nStart_DB,nEnd_DB - nStart_DB);
	//			if(nEnd_N == -1)
	//				key_n = strN.Mid(nStart_N);
	//			else
	//				key_n = strN.Mid(nStart_N,nEnd_N - nStart_N);
	//		}
	//		else 
	//			break;
	//	}
	//	else if(key_db.CompareNoCase(key_n) > 0)    //在strDB中不可能再找到一个关键字与当前的key_n匹配
	//		return FALSE;
	//	else    //StrDB往后移
	//	{
	//		if(nEnd_DB != -1){
	//			nEnd_DB = strDB.Find(';',nStart_DB);
	//			if(nEnd_DB == -1)
	//				key_db = strDB.Mid(nStart_DB);
	//			else
	//				key_db = strDB.Mid(nStart_DB,nEnd_DB - nStart_DB);
	//		}
	//		else
	//			break;
	//	}

	//	nStart_DB = nEnd_DB + 1;
	//	nStart_N = nEnd_N + 1;
	//}while(1);    //有一个串已到尾部,则跳出循环

	//if(nEnd_N == -1 && nEnd_DB == -1)
	//	if(key_db.CompareNoCase(key_n) == 0)
	//		return TRUE;
	//	else
	//		return FALSE;
	//else if(nEnd_N == -1 && nEnd_DB != -1)    //strN中的关键字已经全部找到
	//	return TRUE;
	//else    //strN中的关键字未找完,strDB已搜索完
	//	return FALSE;
//}

SearchTree* SearchAndDelInTree(SearchTree* ST,int min_count)
{
	if(ST == NULL)    return ST;
	ST->left = SearchAndDelInTree(ST->left,min_count);
	ST->right = SearchAndDelInTree(ST->right,min_count);
	//删除非频繁节点
	if(ST->count < min_count)
	{
		SearchTree* TmpNode;
		if(ST->left && ST->right)   //两个子树
		{
			TmpNode = FindMin(ST->right);
			ST->datastr = TmpNode->datastr;
			ST->count = TmpNode->count;
			ST->right = DeleteNode(ST->right,ST->datastr);
		}
		else    //一棵或没有子树
		{
			TmpNode = ST;
			if(ST->left == NULL)
				ST = ST->right;
			else if(ST->right == NULL)
				ST = ST->left;
			delete TmpNode;
		}
	}
	return ST;
}

SearchTree* FindMin(SearchTree* ST)
{
	if(ST != NULL)
		while(ST->left != NULL)
			ST = ST->left;
	return ST;
}

SearchTree* DeleteNode(SearchTree* ST,CString str)
{
	SearchTree* TmpNode;
	if(ST == NULL)
		cout<<"Element not found!!!"<<endl;
	else if(str.CompareNoCase(ST->datastr.GetBuffer()) < 0)
		ST->left = DeleteNode(ST->left,str);
	else if(str.CompareNoCase(ST->datastr.GetBuffer()) > 0)
		ST->right = DeleteNode(ST->right,str);
	//找到了节点
	else if(ST->left && ST->right)  //两个子树
	{
		TmpNode = FindMin(ST->right);
		ST->datastr = TmpNode->datastr;
		ST->count = TmpNode->count;
		ST->right = DeleteNode(ST->right,ST->datastr);
	}
	else   //一棵或没有子树
	{
		TmpNode = ST;
		if(ST->left == NULL)
			ST = ST->right;
		else if(ST->right == NULL)
			ST = ST->left;
		delete TmpNode;
	}

	return ST;
}

CString GetLine(CFile* const f)    //获得文件中的一行字符
{
	UINT n=0,row=0;
	CString linestr;
	char buf;
	n=f->Read(&buf,1);
	if(n==0) return "Wen_End";   //已到文件尾
	do{
		if(buf=='\r'){
			n = f->Read(&buf,1);  //把'\n'读出
			break;
		}
		linestr+=buf;
		n = f->Read(&buf,1);
	}while(n!=0);
	return linestr;
}

DBLink* ReadDataFromText(CString filename,int *RC)
{
	CFile ftxt;
	DBLink *D,*DBL;
	D = NULL;    DBL = NULL;
	if(!ftxt.Open(filename,CFile::modeRead)){
		cout<<"无法打开数据文件";
		return NULL;
	}
	CString linestr;
	while((linestr = GetLine(&ftxt)) != "Wen_End")
	{
		if(!linestr.IsEmpty())
		{
			if(D == NULL){
				D = new DBLink;
				DBL = D;
			}
			else{
				DBL->next = new DBLink;
				DBL = DBL->next;
			}

			DBL->datastr = linestr;
			DBL->next = NULL;
		}
		(*RC)++;
	}
	ftxt.Close();
	return D;
}

void PrintResult(ItemSet* IS,int k,CString exstr1)
{
	if(IS == NULL)  return;
	CString exstr;
	CFile fout;
	if(k == 1){  
		fout.Open("apriori_output.txt",CFile::modeCreate|CFile::modeWrite);
		fout.Write(exstr1.GetBuffer(),exstr1.GetLength());
	}
	else{
		fout.Open("apriori_output.txt",CFile::modeWrite);
		fout.SeekToEnd();
	}
	cout<<"频繁"<<k<<"-项集:"<<endl;
	exstr.Format("频繁%d-项集:\r\n",k);
	fout.Write(exstr,exstr.GetLength());
	while(IS != NULL){
		cout<<IS->datastr<<"  "<<IS->count<<endl;
		exstr.Format("%s  %d\r\n",IS->datastr.GetBuffer(),IS->count);
		fout.Write(exstr,exstr.GetLength());
		IS = IS->next;
	}
	cout<<endl;
	fout.Write("\r\n",2);
	fout.Close();
}

ItemSetLink* CreatePreItemSet(ItemSetLink* preIS,CString str,int k)
{
	if(preIS == NULL){
		preIS = new ItemSetLink;
		preIS->datastr = str;
		preIS->count = 0;
		preIS->next = NULL;
		return preIS;
	}
//preIS->next为NULL
	preIS->next = new ItemSetLink;
	preIS = preIS->next;
	preIS->datastr = str;
	preIS->count = 0;
	preIS->next = NULL;
	return preIS;
}

ItemSetLink* DeleteLink(ItemSetLink* IS)
{
	ItemSetLink* dIS;
	while(IS)
	{
		dIS = IS;
		IS = IS->next;
		delete dIS;
	}
	return NULL;
}

ItemSet* Del_k_ItemSet(ItemSet* IS,int min_count)
{
	ItemSet *tmpNode,*preNode,*IShead = IS;
	while(IS)
	{
		tmpNode = NULL;
		if(IS->count < min_count){    //删除当前节点
			if(IS == IShead){    //是头节点
				tmpNode = IShead;
				IShead = IS->next;
				preNode = IShead;
			}
			else{
				tmpNode = IS;
				preNode->next = IS->next;
			}
		}
		else    //当前节点保留
			preNode = IS;
		IS = IS->next;
		if(tmpNode)
			delete tmpNode;
	}
	return IShead;
}

SearchTree* DeleteTree(SearchTree* ST)
{
	if(ST == NULL)
		return NULL;
	ST->left = DeleteTree(ST->left);
	ST->right = DeleteTree(ST->right);
	delete ST;
	return NULL;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -