⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 myapriori.cpp

📁 apriori经典算法及一个改进算法的程序,里面有具体的解析与操作
💻 CPP
字号:
// MyApriori.cpp : 定义控制台应用程序的入口点。
//

#include "stdafx.h"
#include "MyApriori.h"
#include "conio.h"
#include "stdlib.h"
#ifdef _DEBUG
#define new DEBUG_NEW
#endif


// 唯一的应用程序对象

CWinApp theApp;

using namespace std;

int _tmain(int argc, TCHAR* argv[], TCHAR* envp[])
{
	int nRetCode = 0;

	// 初始化 MFC 并在失败时显示错误
	if (!AfxWinInit(::GetModuleHandle(NULL), NULL, ::GetCommandLine(), 0))
	{
		// TODO: 更改错误代码以符合您的需要
		_tprintf(_T("致命错误: MFC 初始化失败\n"));
		nRetCode = 1;
	}
	else
	{
		// TODO: 在此处为应用程序的行为编写代码。
		KeyTree *KT;
		ItemSet *IS,*preIS;
		char filename[30];
		double min_sup = 0.02;
		int Record_Count = 0,min_count;
		int k = 1;
		DWORD nStartTime,nEndTime,SumTime = 0;
		cout<<"输入数据文件名:";
		cin>>filename;
		cout<<"输入最小支持阈值min_sup:";
		cin>>min_sup;
		CString exstr1;
		exstr1 = "数据来源文件:";
		exstr1 += filename;
		exstr1 += "\r\n最小支持阈值:";
		exstr1.AppendFormat("%f\r\n\r\n",min_sup);
		nStartTime = GetTickCount();
		KT = ReadDataFromText(filename,&Record_Count);
		min_count = int(Record_Count*min_sup);
		IS = find_frequent_1_itemset(KT,min_count);   //实际是剪枝后生成链表
		nEndTime = GetTickCount();
		SumTime += nEndTime - nStartTime;
		PrintResult(IS,k,exstr1);
		while(IS != NULL)
		{
			k++;
			nStartTime = GetTickCount();
			preIS = apriori_gen(IS,k);
			if(preIS == NULL) break;
			IS = DeleteLink(IS);
			IS = find_frequent_k_itemset(KT,preIS,k,min_count);    //只是把preIS中的count字段作了更新并剪枝
			nEndTime = GetTickCount();
			SumTime += nEndTime - nStartTime;
			PrintResult(IS,k,exstr1);
		}
		IS = DeleteLink(IS);
		cout<<endl<<"My-Apriori算法运行完毕,为您找到最高"<<k-1<<"-项集。"<<endl;
		cout<<"算法总耗时:"<<SumTime<<"ms。"<<"按任意键结束!!!"<<endl;
		getch();
	}

	return nRetCode;
}

ItemSet* find_frequent_1_itemset(KeyTree* KT,int min_count)
{
	KT = SearchAndDelInTree(KT,min_count);//剪枝
	return GetDataLink(KT);    //生成链表
}

ItemSetLink* GetDataLink(KeyTree* KT)
{
	if(KT == NULL) return NULL;
	ItemSetLink* N;
	N = GetDataLink(KT->left);
	if(N == NULL){
		N = new ItemSetLink;
		N->datastr = KT->Key;
		N->count = KT->count;
		N->next = GetDataLink(KT->right);
	}
	else{
		ItemSetLink* tmpN = N;
		while(tmpN->next != NULL)    tmpN = tmpN->next;
		tmpN->next = new ItemSetLink;
		tmpN = tmpN->next;
		tmpN->datastr = KT->Key;
		tmpN->count = KT->count;
		tmpN->next = GetDataLink(KT->right);
	}
	return N;    //返回的是链表头结点指针
}

KeyTree* SearchAndDelInTree(KeyTree* KT,int min_count)
{
	if(KT == NULL)    return KT;
	KT->left = SearchAndDelInTree(KT->left,min_count);
	KT->right = SearchAndDelInTree(KT->right,min_count);
	//删除非频繁节点
	if(KT->count < min_count)
	{
		KeyTree* TmpNode;
		if(KT->left && KT->right)   //两个子树
		{
			TmpNode = FindMin(KT->right);
			KT->Key = TmpNode->Key;
			KT->count = TmpNode->count;
			KT->IndexSet = TmpNode->IndexSet;
			KT->right = DeleteNode(KT->right,KT->Key);
		}
		else    //一棵或没有子树
		{
			TmpNode = KT;
			if(KT->left == NULL)
				KT = KT->right;
			else if(KT->right == NULL)
				KT = KT->left;
			delete TmpNode;
		}
	}
	return KT;
}

KeyTree* FindMin(KeyTree* KT)
{
	if(KT != NULL)
		while(KT->left != NULL)
			KT = KT->left;
	return KT;
}

KeyTree* DeleteNode(KeyTree* KT,CString str)
{
	KeyTree* TmpNode;
	if(KT == NULL)
		cout<<"Element not found!!!"<<endl;
	else if(str.CompareNoCase(KT->Key.GetBuffer()) < 0)
		KT->left = DeleteNode(KT->left,str);
	else if(str.CompareNoCase(KT->Key.GetBuffer()) > 0)
		KT->right = DeleteNode(KT->right,str);
	//找到了节点
	else if(KT->left && KT->right)  //两个子树
	{
		TmpNode = FindMin(KT->right);
		KT->Key = TmpNode->Key;
		KT->count = TmpNode->count;
		KT->IndexSet = TmpNode->IndexSet;
		KT->right = DeleteNode(KT->right,KT->Key);
	}
	else   //一棵或没有子树
	{
		TmpNode = KT;
		if(KT->left == NULL)
			KT = KT->right;
		else if(KT->right == NULL)
			KT = KT->left;
		delete TmpNode;
	}

	return KT;
}

KeyTree* ReadDataFromText(CString filename,int *count)
{
	CFile ftxt;
	KeyTree* KT = NULL;
	if(!ftxt.Open(filename,CFile::modeRead)){
		cout<<"无法打开数据文件";
		return NULL;
	}
	CString linestr;
	while((linestr = GetLine(&ftxt)) != "Wen_End")
	{
		(*count)++;
		if(!linestr.IsEmpty()){
			CString sw;
			int nStart = 0,nEnd;
			do{
				nEnd = linestr.Find(';',nStart);
				if(nEnd == -1)
					sw = linestr.Mid(nStart);
				else
					sw = linestr.Mid(nStart,nEnd - nStart);
				nStart = nEnd + 1;
				KT =UpdateKeyTree(KT,sw,*count);
			}while(nEnd != -1);
		}
	}
	ftxt.Close();
	return KT;
}

KeyTree* UpdateKeyTree(KeyTree* KT,CString sw,int count)
{
	if(KT == NULL){
		KT = new KeyTree;
		KT->Key = sw;
		KT->IndexSet.Format("%d",count);
		KT->count = 1;
		KT->left = KT->right = NULL;
	}
	else if(sw.CompareNoCase(KT->Key) < 0)
		KT->left = UpdateKeyTree(KT->left,sw,count);
	else if(sw.CompareNoCase(KT->Key) > 0)
		KT->right = UpdateKeyTree(KT->right,sw,count);
	else    //树中已存在该关键词
	{
		KT->IndexSet.AppendFormat(";%d",count);
		KT->count++;
	}
	return KT;
}

CString GetLine(CFile* const f)    //获得文件中的一行字符
{
	UINT n=0,row=0;
	CString linestr;
	char buf;
	n=f->Read(&buf,1);
	if(n==0) return "Wen_End";   //已到文件尾
	do{
		if(buf=='\r'){
			n = f->Read(&buf,1);  //把'\n'读出
			break;
		}
		linestr+=buf;
		n = f->Read(&buf,1);
	}while(n!=0);
	return linestr;
}

void PrintResult(ItemSet* IS,int k,CString exstr1)
{
	if(IS == NULL)  return;
	CString exstr;
	CFile fout;
	if(k == 1){  
		fout.Open("myapriori_output.txt",CFile::modeCreate|CFile::modeWrite);
		fout.Write(exstr1.GetBuffer(),exstr1.GetLength());
	}
	else{
		fout.Open("myapriori_output.txt",CFile::modeWrite);
		fout.SeekToEnd();
	}
	cout<<"频繁"<<k<<"-项集:"<<endl;
	exstr.Format("频繁%d-项集:\r\n",k);
	fout.Write(exstr,exstr.GetLength());
	while(IS != NULL){
		cout<<IS->datastr<<"  "<<IS->count<<endl;
		exstr.Format("%s  %d\r\n",IS->datastr.GetBuffer(),IS->count);
		fout.Write(exstr,exstr.GetLength());
		IS = IS->next;
	}
	cout<<endl;
	fout.Write("\r\n",2);
	fout.Close();
}

ItemSetLink* apriori_gen(ItemSet* IS,int k)
{
	ItemSetLink *preIS1 = IS,*preIS2;
	ItemSetLink *ISk,*ISkhead = NULL;
	while(preIS1)
	{
		preIS2 = preIS1->next;
		while(preIS2)
		{
			CString tmp;    //临时存放候选节点
			tmp.Empty();
			if(k == 2)
				tmp = preIS1->datastr+ ';' + preIS2->datastr;
			else{    //注:各关键字集都是已经按字母序排好序的
				int index1,index2;
				CString tmp1,tmp2;
				index1 = preIS1->datastr.ReverseFind(';');
				index2 = preIS2->datastr.ReverseFind(';');
				tmp1 = preIS1->datastr.Left(index1);
				tmp2 = preIS2->datastr.Left(index2);
				if(tmp1 == tmp2)
					tmp = tmp1 + preIS1->datastr.Mid(index1) + preIS2->datastr.Mid(index2);   //preIS1的子串排序在preIS2的子串前
			}
			if(!tmp.IsEmpty()){
				if(!has_infrequent_subset(tmp,IS))    //无非频繁子集   tmp作为一个新结点的值插入候选链表中
					if(ISkhead == NULL){
						ISkhead = CreatePreItemSet(ISkhead,tmp,k);
						ISk = ISkhead;
					}
					else{
						ISk->next = CreatePreItemSet(ISk,tmp,k);    //直接从上一个节点开始插入新节点,减少无谓的链表遍历
						ISk = ISk->next;
					}
			}
			preIS2 = preIS2->next;
		}
		preIS1 = preIS1->next;
	}

	return ISkhead;
}

BOOL has_infrequent_subset(CString set,ItemSet* IS)
{
	int nStart = 0,nEnd;
	ItemSet* IShead = IS;
	CString subset;
	do{
		//把截出来的单词去掉,得到k-1子集
		nEnd = set.Find(';',nStart);
		if(nStart == 0 && nEnd != -1)
			subset = set.Mid(nEnd+1);
		else if(nStart == 0 && nEnd == -1)
			subset = set;
		else if(nStart != 0 && nEnd == -1)
			subset = set.Left(nStart-1);
		else
			subset = set.Left(nStart)+set.Mid(nEnd+1);
		nStart = nEnd+1;
		//搜索该子集是否在k-1频繁集中
		IS = IShead;
		while(IS)
		{
			if(subset.CompareNoCase(IS->datastr) == 0)
				break;
			IS = IS->next;
		}
		if(IS == NULL)
			return TRUE;    //有非频繁子集
	}while(nEnd != -1);
	return FALSE;    //无非频繁子集
}


ItemSetLink* CreatePreItemSet(ItemSetLink* preIS,CString str,int k)
{
	if(preIS == NULL){
		preIS = new ItemSetLink;
		preIS->datastr = str;
		preIS->count = 0;
		preIS->next = NULL;
		return preIS;
	}
//preIS->next为NULL
	preIS->next = new ItemSetLink;
	preIS = preIS->next;
	preIS->datastr = str;
	preIS->count = 0;
	preIS->next = NULL;
	return preIS;
}

ItemSet* find_frequent_k_itemset(KeyTree* KT,ItemSetLink* preIS,int k,int min_count)
{
	CString sw;
	ItemSet* IShead = preIS;
	CStringArray SA;
	SA.SetSize(k);
	while(preIS)    //对候选频繁链表中的每个节点均进行计数
	{
		int nStart = 0,nEnd;
		int n = 0;
		for(int i = 0; i < k; i++)
			SA[i].Empty();
		do{
			nEnd = preIS->datastr.Find(';',nStart);
			if(nEnd == -1)
				sw = preIS->datastr.Mid(nStart);
			else
				sw = preIS->datastr.Mid(nStart,nEnd - nStart);
			if(!sw.IsEmpty())
				SA.SetAt(n,FindInKeyTree(KT,sw));  //把找到的“关键字出现的记录表”添加到链表中
			n++;
			nStart = nEnd + 1;
		}while(nEnd != -1);
		preIS->count = CountForKey(&SA,k);
		preIS = preIS->next;
	}
	IShead = Del_k_ItemSet(IShead,min_count);
	return IShead;
}

CString FindInKeyTree(KeyTree* KT,CString str)
{
	if(KT == NULL)
		return "";
	else if(str.CompareNoCase(KT->Key.GetBuffer()) < 0)
		return FindInKeyTree(KT->left,str);
	else if(str.CompareNoCase(KT->Key.GetBuffer()) > 0)
		return FindInKeyTree(KT->right,str);
	else    //找到了
		return KT->IndexSet;
}


int CountForKey(CStringArray* SA,int k)
{
	int count = 0;
	int *nValue = new int[k];    //记录号
	int *nStart = new int[k];    //搜索起始下标
	int *nEnd = new int[k];    //搜索到的下标
	CString tmp;
	for(int i = 0; i < k; i++)
		nStart[i] = 0;  
	GetRecordNum(SA,nValue,nStart,nEnd,k);     //得到第一组记录号

	while(1)
	{
		int mindex;
		mindex = MinnIndex(nValue,k);
		if(mindex == -1){    //各记录号相等
			count++;
			if(SomeIsEnd(nStart,k))    //有记录表已到达末尾
				break;
			//都读下一个记录号
			GetRecordNum(SA,nValue,nStart,nEnd,k);
		}
		else{
			//值最小者读下一个记录号
			if(nStart[mindex] == 0)    //值最小的串已经到达末端
				break;
			tmp = SA->GetAt(mindex);
			nEnd[mindex] = tmp.Find(';',nStart[mindex]);
			if(nEnd[mindex] == -1)
				nValue[mindex] = atoi(tmp.Mid(nStart[mindex]).GetBuffer());
			else
				nValue[mindex] = atoi(tmp.Mid(nStart[mindex],nEnd[mindex] - nStart[mindex]).GetBuffer());
			nStart[mindex] = nEnd[mindex] + 1;
		}
	}
	delete nValue;
	delete nStart;
	delete nEnd;

	return count;
}

int MinnIndex(int nIndexArray[],int k)
{
	int mV = nIndexArray[0],mI = 0;
	BOOL equal = TRUE;
	for(int i = 1; i< k; i++){
		if(mV != nIndexArray[i]){
			equal = FALSE;
			mI = mV <= nIndexArray[i]? mI:i;
			mV = nIndexArray[mI];
		}
	}
	if(equal == TRUE)     //如果所有值相等,则返回-1
		mI = -1;
	return mI;
}

BOOL SomeIsEnd(int nStart[],int k)
{
	for(int i = 0; i < k; i++)
		if(nStart[i] == 0)
			return TRUE;
	return FALSE;
}

void GetRecordNum(CStringArray* SA,int nValue[],int nStart[],int nEnd[],int k)
{
	CString tmp;
	for(int i = 0;i < k;i++){    //得到SA各节点的下一个记录号
		tmp = SA->GetAt(i);
		nEnd[i] = tmp.Find(';',nStart[i]);
		if(nEnd[i] == -1)
			nValue[i] = atoi(tmp.Mid(nStart[i]).GetBuffer());
		else
			nValue[i] = atoi(tmp.Mid(nStart[i],nEnd[i] - nStart[i]).GetBuffer());
		nStart[i] = nEnd[i] + 1;
	}
}

ItemSet* Del_k_ItemSet(ItemSet* IS,int min_count)
{
	ItemSet *tmpNode,*preNode,*IShead = IS;
	while(IS)
	{
		tmpNode = NULL;
		if(IS->count < min_count){    //删除当前节点
			if(IS == IShead){    //是头节点
				tmpNode = IShead;
				IShead = IS->next;
				preNode = IShead;
			}
			else{
				tmpNode = IS;
				preNode->next = IS->next;
			}
		}
		else    //当前节点保留
			preNode = IS;
		IS = IS->next;
		if(tmpNode)
			delete tmpNode;
	}
	return IShead;
}

ItemSetLink* DeleteLink(ItemSetLink* IS)
{
	ItemSetLink* dIS;
	while(IS)
	{
		dIS = IS;
		IS = IS->next;
		delete dIS;
	}
	return NULL;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -