📄 art.cpp
字号:
/* Program Description:
This program carries out unsupervised
learning and sorts samples into clusters
using the euclidean distance metric.
The maximum number of clusters which can
be created is as many as the number of input
patterns.
Input patterns and learned weights are stored
in files.
*/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#define NMXPATTERN 100 /* max number of input samples */
#define NMXATTR 100 /* max number of input attributes */
int ninput; /* number of input patterns */
int ninattr; /* number of input attributes */
float threshold; /* threshold */
int testing; /* 0=training, 1=testing */
float pattern[NMXPATTERN][NMXATTR];
float b[NMXPATTERN][NMXATTR]; /* bottom_up weights */
int cluster_tbl[NMXPATTERN][NMXATTR];
float x[NMXATTR]; /* input pattern */
float ed[NMXPATTERN]; /* Euclidean distances to cluster centers */
int active_nodes; /* number of current clusters */
FILE *in,*out;
int debug_flag;
int total_time; /* number of iterations */
/** get information from a user **/
void user_session()
{
char file_name[20];
printf("\nDisplay Euclidean distances for each iteration?");
printf("\n Yes = 1, No = 0:");
scanf("%d",&debug_flag);
printf("\n\nPlease enter threshold:");
scanf("%f",&threshold);
printf("\nEnter the name of the input data file:");
scanf("%s",file_name);
if((in=fopen(file_name,"r"))==NULL)
{
printf("\nCannot open input data file\n");
exit(0);
}
printf("\nHow many input training patterns?:");
scanf("%d",&ninput);
printf("\nHow many input attributes?:");
scanf("%d",&ninattr);
testing=0;
total_time=0;
}
/* read pattern data from input file */
void read_all_patterns()
{
int i,j;
for(i=0;i<ninput;i++)
for(j=0;j<ninattr;j++)
fscanf(in,"%e",&pattern[i][j]);
fclose(in);
}
/* creat the first cluster with the first pattern */
void first_node()
{
int i;
/*bottom_up weights = first pattern */
for(i=0;i<ninattr;i++)
b[0][i]=pattern[0][i];
active_nodes=1; /* number of clusters */
cluster_tbl[0][0]=1; /* number of members in cluster 0 */
}
/* create a new cluster node */
void form_new_node(int input_no)
{
int i;
for(i=0;i<ninattr;i++)
b[active_nodes][i]=pattern[input_no][i];
cluster_tbl[active_nodes][0]=1; /* number of member=1 */
cluster_tbl[active_nodes][1]=input_no;
active_nodes++; /* increment number of clusters */
}
/* calculate Euclidean distances from a pattern to cluster centers */
void compute_euc_dist()
{
int i,j;
if(debug_flag == 1)
printf("\nEuclidean distances to cluster centers\n");
for(j=0;j<active_nodes;j++)
{
ed[j]=0.0;
for(i=0;i<ninattr;i++)
ed[j]=ed[j]+((b[j][i]-x[i])*(b[j][i]-x[i]));
if(debug_flag == 1)printf("%.6f",ed[j]);
}
if(debug_flag == 1)printf("\n");
}
/* choose the cluster nearest to the
pattern and return cluster number if
pattern is within threshold radius.
If none available, then return -99 */
int compare_min_ed(int input_no)
{
int i,cluster_no;
float min;
min=10000;
for(i=0;i<active_nodes;i++)
if(ed[i]<min)
{
min=ed[i];
cluster_no=i;
}
if(debug_flag == 1)
printf("Ed = %.3f Node = %d Pat = %d\n",sqrt(ed[cluster_no]),cluster_no,input_no);
if(sqrt(ed[cluster_no])<=threshold)
return(cluster_no);
else return(-99);
}
/* include a new member in the cluster by updating weights */
void update_wts(int cluster_no,int input_no)
{
int i,no_member;
float n,m;
n=cluster_tbl[cluster_no][0];
m=n+1;
if(testing ==0)
{
for(i=0;i<ninattr;i++)
b[cluster_no][i]=((n/m)*b[cluster_no][i])+((1/m)*x[i]);
}
no_member = ++(cluster_tbl[cluster_no][0]);
cluster_tbl[cluster_no][no_member]=input_no;
}
void report()
{
int i,j,k,nbcol;
printf("\n\n");
printf("+----+-----+---------------------+\n");
printf("|Node|Count| Pattern numbers |\n");
printf("+----+-----+---------------------+\n");
k=0;
nbcol=6;
for(i=0;i<ninput;i++)
{
if(cluster_tbl[i][0]==0)
{
break;
}
printf("|%-4d|%-5d|",i,cluster_tbl[i][0]);
for(j=1;j<ninput+1;j++)
{
if((cluster_tbl[i][j]!=0)||((i==0)&&(j==1)))
{
if(k>nbcol)
{
printf("|\n| | |");
k=0;
}
printf("%-3d",cluster_tbl[i][j]);
k++;
}
}
for(j=k;j<=nbcol;j++)printf(" ");
printf("|\n");
printf("+----+-----+---------------------+\n");
k=0;
}
}
void print_bot_up_wts()
{
int i,j;
char *file_name = "weights.dat";
if((out = fopen(file_name,"w"))==NULL)
{
printf("\n Cannot open input data file\n");
exit(0);
}
printf("\n\nBottom_up weights");
for(i=0;i<active_nodes;i++)
{
printf("\n Cluster %d\n ",i);
for(j=0;j<ninattr;j++)
{
printf("%.3f ",b[i][j]);
fprintf(out,"%.3f ",b[i][j]);
}
printf("\n");
fprintf(out,"\n");
}
fclose(out);
}
void main()
{
int cluster_no,q;
int i;
user_session();
read_all_patterns();
first_node();
for(q=1;q<ninput;q++)
{
for(i=0;i<ninattr;i++)
x[i]=pattern[q][i];
compute_euc_dist();
if((cluster_no=compare_min_ed(q))>=0)
update_wts(cluster_no,q);
else
form_new_node(q);
}
report(); /* report clustering result */
print_bot_up_wts();
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -