📄 appriori.c
字号:
#include <stdio.h>
#include <conio.h>
#include <math.h>
#define Maxnum 20
#define item_num 10
/*variate and structure define*/
int itemset_num,minsup_count;
typedef struct itemset{
char item[item_num];
int support_count;
int MaxFI;
struct itemset *next;
}itemset,*linkset;
linkset h[Maxnum];
int Max;
/*function*/
int getint(float n)
/*由于判断是否为频繁项集时需比较项频度(int类型)与最小频度(项数与最小支持度(float类型)之积)的大小,又不能强制转换,因此声明此函数*/
{
int i=(int)n;
if(n>(float)i) return i+1;
else return i;
}
int indexofstr(char s[],char c)
/*项集是用字符数组表示,在计算单项频度及保持连接生成项集时顺序中都要用到某单项在项集中的位置(下标),因此声明此函数*/
{
int i=0;
while(i<strlen(s))
{
if(c==s[i]) return i;
i++;
}
return -1;
}
char*join_itemset(char s[],char t[],int n)
/*项集之间的连接,n表示当前项集的长度*/
{
int i=0;
while(i<n-1)
{
if(s[i]==t[i])
i++;
else
{
s[i+1]='\0';
while(i+2<item_num)
{
s[i+2]='\0';
i++;
}
return s;
}
}
if(s[i]<t[i]) s[i+1]=t[i];
else
{
s[i+1]=s[i];
s[i]=t[i];
}
while(i+2<item_num)
{
s[i+2]='\0';
i++;
}
return s;
}
int is_contained(char s[],char t[])
/*判断字符数组s是否包含于t*/
{
int i=0,j=0,flag=0;
while(i<strlen(s)&&flag!=1)
{
while(j<strlen(t)&&flag!=1)
{
if(s[i]==t[j])
{
i++;
j++;
}
else j++;
if(i==strlen(s)) flag=1;
}
if(i<strlen(s)&&j==strlen(t)) flag=1;
}
if(i==strlen(s)&&s[i-1]==t[j-1]) return 1;
else return 0;
}
main()
{
int i,j,k,m,n,tem,str[item_num];
int counter[item_num]={0,0,0,0,0,0,0,0,0,0};
float minsupport,minconfidence,confidence;
char a,charset[item_num]={"abcdefghij"},temp[item_num],adhead[item_num],sequent[item_num];
linkset c,p,q,r;
FILE*fp;
/*get minsupport,minconfidence*/
if((fp=fopen("input.txt","r"))==NULL)
{
printf("Can't open cource file\n");
exit(0);
}
fscanf(fp,"%f%f",&minsupport,&minconfidence);
fclose(fp);
/*saveing the source data as Linklist;*/
if((fp=fopen("source.txt","r"))==NULL)
{
printf("Can't open cource file\n");
exit(0);
}
p=(linkset)malloc(sizeof(itemset));
p->next=NULL;
p->support_count=0;
p->MaxFI=0;
h[0]=p;
itemset_num=0;
while(!feof(fp))
{
q=(linkset)malloc(sizeof(itemset));
fscanf(fp,"%s",q->item);
q->support_count=0;
q->MaxFI=0;
q->next=NULL;
p->next=q;
p=q;
itemset_num++;
}
fclose(fp);
printf("minsupport=[%f],minconfidence=[%f]\n",minsupport,minconfidence);
printf("transactions [%d] in db is:\n",itemset_num);
minsup_count=getint(minsupport*itemset_num);
/*count item*/
p=h[0]->next;
while(p!=NULL)
{
printf("%s ",p->item);
for(i=0;i<strlen(p->item);i++)
{
if((j=indexofstr(charset,p->item[i]))!=-1)
counter[j]++;
}
p=p->next;
}
printf("\n");
/*1-itemset*/
p=(linkset)malloc(sizeof(itemset));
p->next=NULL;
p->support_count=0;
p->MaxFI=0;
h[1]=p;
for(i=0;i<item_num;i++) /* Thank xx-zjm's correction*/
{
if(counter[i]>=minsup_count)
{
q=(linkset)malloc(sizeof(itemset));
q->support_count=counter[i];
q->item[0]=charset[i];
for(j=1;j<item_num;j++)
q->item[j]='\0';
q->next=NULL;
p->next=q;
p=q;
}
}
/*k-itemset*/
i=1;
while(h[i]->next!=NULL)
{
c=(linkset)malloc(sizeof(itemset));
c->support_count=0;
c->MaxFI=1;
c->next=NULL;
h[i+1]=c;
/*generate C(k)_itemset from L(k-1)_itemset*/
p=h[i]->next;
while(p!=NULL)
{
q=p->next;
while(q!=NULL)
{
j=0;
while(j<strlen(temp))
{
temp[j]='\0';
j++;
}
strcpy(temp,p->item);
join_itemset(temp,q->item,i);
if(strlen(temp)>i)
{
r=(linkset)malloc(sizeof(itemset));
r->support_count=0;
strcpy(r->item,temp);
r->MaxFI=1;
r->next=NULL;
c->next=r;
c=r;
}
q=q->next;
}
p=p->next;
}
/*choose L(k)_itemset from C(k)_itemset*/
p=h[0]->next;
while(p!=NULL)
{
if(strlen(p->item)<i+1)
{
p=p->next;
continue;
}
q=h[i+1]->next;
while(q!=NULL)
{
if(is_contained(q->item,p->item))
q->support_count++;
q=q->next;
}
p=p->next;
}
c=h[i+1];
while(c->next!=NULL)
{
p=c->next;
if(p->support_count<minsup_count)
{
c->next=p->next;
free(p);
}
else
c=c->next;
}
i++;
}
Max=i;
/* output all frequent itemset*/
for(i=2;i<=Max;i++)
{
p=h[i]->next;
while(p!=NULL)
{
printf("%s %d\n",p->item,p->support_count);
p=p->next;
}
}
/* choose non-Maximal frequent itemset and label with 0*/
i=Max;
p=h[i]->next;
while(p==NULL)
{
i--;
p=h[i]->next;
}
for(j=i;j>2;j--)
{
p=h[j]->next;
while(p!=NULL)
{
q=h[j-1]->next;
while(q!=NULL)
{
if(is_contained(q->item,p->item))
q->MaxFI=0;
q=q->next;
}
p=p->next;
}
}
if((fp=fopen("result.txt","wt"))==NULL)
{
printf("Can't open cource file\n");
exit(0);
}
/*generate rules from Maximal frequent itemset*/
for(i=2;i<=Max;i++)
{
p=h[i]->next;
while(p!=NULL)
{
if(p->MaxFI==1)
{
fprintf(fp,"Rules from Maximal frequent itemset[%s|%d] are:\n",p->item,p->support_count);
strcpy(temp,p->item);
for(j=pow(2,i)-2;j>0;j--)
{
tem=j;
for(k=i-1;k>=0;k--)
{
adhead[k]='\0';
sequent[k]='\0';
str[k]=tem%2;
tem=tem/2;
}
m=0;
n=0;
for(k=0;k<i;k++)
{
if(str[k]==0) adhead[m++]=temp[k];
else sequent[n++]=temp[k];
}
k=strlen(adhead);
q=h[k]->next;
while(q!=NULL)
{
if(strcmp(q->item,adhead)==0)
{
confidence=(float)p->support_count/q->support_count;
if(confidence>=minconfidence)
fprintf(fp,"%s->%s|%.4f\n",adhead,sequent,confidence);
}
q=q->next;
}
}
}
p=p->next;
}
}
getchar();
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -