📄 dic.cpp
字号:
# include <iostream.h>
# include <stdio.h>
# include <string.h>
# define M 560//将数据分成4*M分
# define sup 0.5//support threshold
FILE *stream;
char suf[100][10];
char kitem[100][10]; //suf is used for storing k-itemset,while kitem is k+1-itemset
int suf_n;
int kitem_n;//分别表示上面数组有多少个元素
struct itemset{ //data structure
char item[20];
int count;//used for counting
int k;//k-itemset
int N;//used for recording where databse procedure scan
int flag;//used for store the state of itemset
struct itemset *next;
};
/* void creat_1itemset_chain(itemset* head);
void insert_kitemset_to_chain(itemset* head,char *s,int k);
int compare_two_item(char *a,int a1,char *b,int b1);//0 means a<b,a1 is the length of a,while b1 is b
int judge_one_dem(char s[],char a);
int judge_more_dem(char s[],char a[]);
void counting_itemset(itemset* head,char s[]);
void procedure_apriori_gen();
int procedure_has_infrequent_subset(char candidate[]);
int judge_if_generate_kitemset_or_not();
void sort_inner_item(char s[]);
void modify_state_flag(itemset* head,float sup);
void find_frequent_itemset(itemset* head,int k);//k is k-itemset's k
void find_frequent_itemset(itemset* head);//output itemset whoes flag is 4
*/
itemset *head;
itemset *creat_1itemset_chain()
{
itemset* p;
cout<<"Input 1-itemset(don't over 20 item): "<<endl;
head=NULL;
p=new itemset;
p->count=0;
p->k=1;
p->flag=0;
p->N=0;
while ((p->item[0]=getc(stdin))!='\n') //对输入的1-itemset创建链表
{
p->item[1]='\0';
p->next=head;
head=p;
p=new itemset;
p->count=0;
p->k=1;
p->flag=0;
p->N=0;
}
delete p; //释放最后一次申请的结构空间
itemset *p1,*p2;
p2=p1=head;
char temp;
for(p1=head;p1!=NULL;p1=p1->next)//对输入的1-itemset创建的链表进行排序
for(p2=p1;p2!=NULL;p2=p2->next)
if((p1->item[0])>(p2->item[0]))
{
temp = p1->item[0] ;
p1->item[0] = p2->item[0];
p2->item[0] = temp;
}
return(head);
}
int compare_two_item(char *a,int a1,char *b,int b1)//0 means a<b,a1 is the length of a,while b1 is b
{
int i,t=0,flag=0;//0 means a<b
if(a1>b1)
{
for(i=0;i<b1;i++)
{
if(*a<*b)
{
t=1;
break;
}
else if(*a>*b)
{
t=1;
flag=1;
break;
}
a++;
b++;
}
if(t==0)
return (0);
else if(t!=0)
return (flag);
}
else if(a1<=b1)
{
for(i=0;i<a1;i++)
{
if(*a<*b)
{
t=1;
break;
}
else if(*a>*b)
{
t=1;
flag=1;
break;
}
a++;
b++;
}
if(t==0)
return (0);
else if(t!=0)
return (flag);
}
}
void insert_kitemset_to_chain(itemset* head,char *s,int k)
{
itemset *p,*p1,*p2;
int i,t;
p2=p1=head;
p=new itemset;
for(i=0;i<k;i++)
{
p->item[i]=*s;
s++;
}
p->item[k]='\0';
p->k=k;
p->count=0;
p->flag=0;
p->N=0;
for(;p1!=NULL;p1=p1->next)
{
t=compare_two_item(p1->item,p1->k,p->item,p->k);
if(t)//有序插入某个节点
{
p1=p2;
p2=p2->next;
p1->next=p;
p->next=p2;
break;
}
else
p2=p1;
}
}
void sort_inner_item(char s[])
{
int n = strlen(s);
int i, j;
char temp;
for(i = 0; i < n - 1; i ++)
for(j = i + 1; j < n; j ++)
if(s[i] > s[j])
{
temp = s[i];
s[i] = s[j];
s[j] = temp;
}
}
int judge_one_dem(char s[],char a) //判断s[]中是否存在字符a,存在返回1,否则为0。即此函数为单个字符判断
{
int n = strlen(s);
int t=0;
for(int i=0;i<n;i++)
if(s[i]==a)
{
t=1;
break;
}
return (t);
}
int judge_more_dem(char s[],char a[])//判断s[]中是否存在字符a[](不考虑顺序),存在返回1,否则为0。即此函数为字符串判断
{
int an=strlen(a);
int t=1;//flag is used for judge a is belong to s or not
for(int i=0;i<an;i++)
if(judge_one_dem(s,a[i])==0)
{
t=0;
break;
}
return (t);
}
void counting_itemset(itemset* head,char s[])
{
itemset *p;
p=head;
for(;p!=NULL;p=p->next)
if(judge_more_dem(s,p->item)==1&&(p->N!=4))//只有当item包含在s中,并且未完成一遍扫描时才count++
{
p->count=p->count+1;
}
}
void modify_state_flag(itemset *head,float d)//修改标志位,所有的N++。
{ //如果N=4,即完成一遍DB scan,if flag是1则改为3;if flag 是2则改为4
itemset *p;
p=head;
for(;p!=NULL;p=p->next)//如果N=4,即完成一遍DB scan,if flag是1则改为3;if flag 是2则改为4
{
if(p->N!=4)
{
p->N=(p->N+1);
if(p->count<((p->N)*M*d))
p->flag=1;//when flag is 1 ,which means itemset is below support threshold
else
p->flag=2;//when flag is 2 ,which means itemset is over support threshold
}
if(p->N==4)
{
if(p->flag==1)
p->flag=3;
else if(p->flag==2)
p->flag=4;
}
}
}
void find_frequent_itemset(itemset* head,int k)//find frequent k-itemset
{
itemset *p;
p=head;
suf_n=0;
for(;p!=NULL;p=p->next)
if(p->k==k&&p->flag==2)
{
strcpy(suf[suf_n],p->item);
suf_n++;
}
}
int procedure_has_infrequent_subset(char candidate[])
{
int k=strlen(candidate);
char suf1[20][4];
int i,j,t=1,tj,m=1;
for(i=0;i<k-1;i++)
{
suf1[0][i]=candidate[i+1];
}
suf1[0][i]='\0';
for(i=1;i<k;i++)
{
for(j=0;j<m;j++)
suf1[i][j]=candidate[j];
for(j=m;j<k-1;j++)
suf1[i][j]=candidate[j+1];
suf1[i][j]='\0';
m++;
}
for(i=0;i<k;i++)
{
tj=0;
for(j=0;j<suf_n;j++)
{
if(judge_more_dem(suf[j],suf1[i]))
{
// cout<<"通过"<<endl;
tj=1;
break;
}
}
if(tj==0)
{
t=0;
break;
}
}
return (t);
}
void procedure_apriori_gen()
{
int i,j,t,m,flag,i1;
int n=strlen(suf[0]);
char candidate[10],candidate1[10];
for(i=0;i<suf_n;i++)
{
// strcpy(candidate1,suf[i]);
for(j=i+1;j<suf_n;j++)
{
strcpy(candidate1,suf[i]);
strcat(candidate1,suf[j]);
sort_inner_item(candidate1);
m=0;
candidate[0]=candidate1[0];
for(t=1;t<(n+n);t++)
{
if(candidate[m]!=candidate1[t])
{
m++;
candidate[m]=candidate1[t];
}
else
continue;
}
candidate[m+1]='\0';
int l=strlen(candidate);
if(l!=(n+1))
continue;
else if(procedure_has_infrequent_subset(candidate))
{
flag=0;
if(kitem_n!=0)
{
for(i1=0;i1<kitem_n;i1++)
if(judge_more_dem(kitem[i1],candidate))
{
flag=1;
break;
}
}
if(flag==0)
{
strcpy(kitem[kitem_n],candidate);
kitem_n++;
}
}
}
}
}
int judge_if_generate_kitemset_or_not()
{
if(suf_n<2)
return (0);
else
return (1);
}
void find_frequent_itemset(itemset* head)
{
itemset *p;
p=head;
for(;p!=NULL;p=p->next)
if(p->flag==4)
{
cout<<p->item<<"="<<p->count<<endl;
}
}
void output(itemset *head)
{
itemset *p;
p=head;
for(;p!=NULL;p=p->next)
cout<<p->item<<" "<<"count"<<"="<<p->count<<" "<<"k,N,falg分别为:"<<p->k<<" "<<p->N<<" "<<p->flag<<endl;
}
void frequent_item_sort_output(itemset *head)
{
itemset *p,*p1,*p2;
p=head;
p2=new itemset;
for(;p!=NULL;p=p->next)
for(p1=p;p1!=NULL;p1=p1->next)
{
if(p->count<p1->count)
{
strcpy(p2->item,p->item);
p2->count=p->count;
p2->flag=p->flag;
p2->k=p->k;
p2->N=p->N;
strcpy(p->item,p1->item);
p->count=p1->count;
p->flag=p1->flag;
p->k=p1->k;
p->N=p1->N;
strcpy(p1->item,p2->item);
p1->count=p2->count;
p1->flag=p2->flag;
p1->k=p2->k;
p1->N=p2->N;
}
}
delete p2;
}
void main( void )
{
head=creat_1itemset_chain();//creat 1-itemset
itemset *p;
p=head;
char s[20];
int i=0,j,k=1,t,m=1,n=0; //k record k-itemset
stream = fopen( "f.txt", "r" );
if( stream == NULL )
cout<<"The file fscanf.txt was not opened"<<endl;
else
{
fseek( stream, 0L, SEEK_SET );/* Set pointer to beginning of file: */
for(;m==1;)
{ n++;
if(head->flag==4&&n%4==0)
{
fseek( stream, 0L, SEEK_SET );
cout<<"指针已经设置到头节点处"<<endl;
}
for(j=0;j<M;j++)
{
fscanf( stream, "%s", s );/* Read data to s from file: */
sort_inner_item(s);//sort s like ABCD...
// cout<<s<<endl;
counting_itemset(head,s);//counting itemset which is in s
}
cout<<"第"<<i++<<"次循环:"<<endl;
modify_state_flag(head,sup);//修改标志位,所有的N++。如果N=4,即完成一遍DB scan,if flag是1则改为3;if flag 是2则改为4
output(head);
find_frequent_itemset(head,k);//找出k-项集的频繁集,修改标志位,并将频繁集存储于suf内
k++;
if(judge_if_generate_kitemset_or_not)
{
procedure_apriori_gen();//产生K+1-项集,并将此项集存储于Ck中
// dic.procedure_has_infrequent_subset();//找出并删除非真正的k+1候选集
for(t=0;t<kitem_n;t++)
insert_kitemset_to_chain(head,kitem[t],k);//将真正的k+1候选集加入链表内,并设置相应的标志位和其它值
suf_n=kitem_n=0;
}
p=head;
for(;p!=NULL;p=p->next)
if(p->flag<=2)
{
m=1;
break;
}
else
m=0;
}
fclose( stream );//close read txt
}
// output(head);
delete p;
cout<<"输出结果为:"<<endl;
find_frequent_itemset(head);//output all frequent itemset
frequent_item_sort_output(head);
cout<<"按照频繁大小输出为:"<<endl;
find_frequent_itemset(head);//output all frequent itemset
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -