⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cluster.c

📁 聚类算法的源码
💻 C
字号:
//////////////////////////////////////////////////////////////////////////////////////////////
//  聚类处理函数
//////////////////////////////////////////////////////////////////////////////////////////////
#include <io.h>#include <ctype.h>#include <errno.h>#include <math.h>#include <stdarg.h>#include <stdio.h>#include <stdlib.h>#include <string.h>#define BUFSIZE 2048char  buffer [BUFSIZE];
char  *no_mem_buffer;
#define MIN(A, B) ((A) < (B) ? (A) : (B))#define MAX(A, B) ((A) > (B) ? (A) : (B))typedef struct{    int   used;    int   n_items;    int   cl1;    int   cl2;    int   cluster;    float f;} CLUSTER;CLUSTER *cl;char  **labels;
int   *LDNum;
float **diff;
int   size;int   input_line = 0;int   sorted = 1;char  *programname;char  Out_of_memory [] = "Out of memory";

/*********************************************************************************
 *   其它功能函数
 *********************************************************************************/
void errit (char const *format, ...)
{
    va_list list;

    fprintf (stderr, "\nError %s: ", programname);

    va_start (list, format);
    vfprintf (stderr, format, list);

    fprintf (stderr, "\n\n");

    exit (1);
}

char const *SortCluster (int i)
{
    int  n;
    char const *s1, *s2;

    if (cl [i].cl1 < size)   s1 = labels [cl [i].cl1];
    else                     s1 = SortCluster (cl [i].cl1);
    
	if (cl [i].cl2 < size)   s2 = labels [cl [i].cl2];
    else                     s2 = SortCluster (cl [i].cl2);
    
	if (strcmp (s1, s2) > 0) {
        n = cl [i].cl1;
        cl [i].cl1 = cl [i].cl2;
        cl [i].cl2 = n;
        return s2;
    } 
	else  return s1;
}

void *s_malloc (size_t size)
{
    void *p;

    p = malloc (size);
    if (! p) {
	    free (no_mem_buffer);
	    errit (Out_of_memory);
    }
    return p;
}

char *s_strdup (const char *s)
{
    char *p = strdup (s);
    
	if (! p) {
	    free (no_mem_buffer);
	    errit (Out_of_memory);
    }
    return p;
}

int getline (FILE *fp, int required, char const *filename)
{
    int	i;

    for (;;) {
	    /* 从文件中读一字符串 */
		if (fgets (buffer, BUFSIZE, fp) == NULL) {
	        if (required)	errit ("Unexpected end of file in \"%s\"", filename);
	        else       		return 0;
		}
	    input_line++;

		i = strlen (buffer);
	    while (i && isspace (buffer [i - 1]))   buffer [--i] = '\0';
	
		i = 0;
	    while (buffer [i] && isspace (buffer [i]))  i++;
	
		if (buffer [i] == '#')  continue;
	    if (buffer [i]) {
	        memmove (buffer, (buffer + i), strlen (buffer + i) + 1);
	        return 1;
		}
    }
}

char  a_s [] = "Single";
char  a_c [] = "Complete";
char  a_g [] = "Group";
char  a_a [] = "Average";

void syntax ()
{
    fprintf (stderr, "\nSyntax: %s -s|-c|-g|-a [-u] [difference table file]\n\n"
                     "Clustering algorithm:\n"
                     "  -s : %s\n"
                     "  -c : %s\n"
                     "  -g : %s\n"
                     "  -a : %s\n"
                     "  -u  : unsorted\n\n",
	                 programname,
                     a_s,
                     a_c,
                     a_g,
                     a_a);

    exit (0);
}


/*********************************************************************************
 *   Cluster 距离的计算方法
 *********************************************************************************/
// 1: Single Link (Nearest Neighbor) 最小距离
void update_s (int i)
{
    int j, cl1, cl2;

    cl1 = cl [i].cl1;
    cl2 = cl [i].cl2;

	/* 重新计算此合并类与其它类的距离值 ---- 取其中的最小值 */
    diff [i][i] = 0.0;
	for (j = 0; j < i; j++) {
        // 选择两个合并聚类中最小的距离
        diff [i][j] = diff [j][i] = MIN (diff [cl1][j], diff [cl2][j]);
	}

	/* 把合并的两个子类的类下标赋值为合并后的新类编码 */
	for (j = 0; j < i; j++) {
        if (cl [j].cluster == cl [i].cl1 || cl [j].cluster == cl [i].cl2 ) cl [j].cluster = i;
	}
}

// 2: Complete Link 最大距离
void update_c (int i)
{
    int j, cl1, cl2;

    cl1 = cl [i].cl1;
    cl2 = cl [i].cl2;

    diff [i][i] = 0.0;
	for (j = 0; j < i; j++) {
        // 选择两个合并聚类中最大的距离
        diff [i][j] = diff [j][i] = MAX (diff [cl1][j], diff [cl2][j]);
	}

	for (j = 0; j < i; j++) {
        if (cl [j].cluster == cl [i].cl1 || cl [j].cluster == cl [i].cl2 ) cl [j].cluster = i;
	}
}

// 3: Group Average 平均距离
void update_g (int i)
{
    int j, cl1, cl2;

    cl1 = cl [i].cl1;
    cl2 = cl [i].cl2;

    diff [i][i] = 0.0;
    for (j = 0; j < i; j++) {
        diff[j][i]=diff[i][j]=( ((float) cl [cl1].n_items) * diff [j][cl1] + ((float) cl [cl2].n_items) * diff [j][cl2] )
                              / ((float) (cl [cl1].n_items + cl [cl2].n_items));
	}

	for (j = 0; j < i; j++) {
        if (cl [j].cluster == cl [i].cl1 || cl [j].cluster == cl [i].cl2 ) cl [j].cluster = i;
	}

}

// 4: Equal Weighted Average 平均值距离
void update_a (int i)
{
    int j, cl1, cl2;

    cl1 = cl [i].cl1;
    cl2 = cl [i].cl2;
    diff [i][i] = 0.0;

    for (j = 0; j < i; j++) {
        diff [j][i] = diff [i][j] = (float)( (diff [j][cl1] + diff [j][cl2]) * 0.5 );
	}

	for (j = 0; j < i; j++) {
        if (cl [j].cluster == cl [i].cl1 || cl [j].cluster == cl [i].cl2 ) cl [j].cluster = i;
	}

}


//////////////////////////////////////////////////////////////////////////////////////////////////////
//  主函数
//////////////////////////////////////////////////////////////////////////////////////////////////////
int main (int argc, char *argv []){    FILE   *fp;
    char   *infile, *name_update;

    int    i,j,k;
	int    cl1,cl2;    float  d;

	int    TotalNum;
	void   (*update_function)(int);     no_mem_buffer = (char *) malloc (1024);    programname = argv[0];    // 判定距离计算函数 ////////////////////////////////////////////////
	update_function = NULL;    while (argc > 1 && argv [1][0] == '-') {        if ( (!strcmp (argv [1], "-s")) ) {                update_function = update_s;                name_update =  a_s;        } else if (! strcmp (argv [1], "-c")) {                update_function = update_c;                name_update =  a_c;        } else if (! strcmp (argv [1], "-g")) {                update_function = update_g;                name_update =  a_g;        } else if (! strcmp (argv [1], "-a")) {                update_function = update_a;                name_update =  a_a;        } else if (! strcmp (argv [1], "-u")) {                sorted = 0;		} else  
			    syntax ();        
		/* 读下一个命令行参数 */
		argc--;   argv++;    }    
	// 如果距离计算函数为空,显示命令行格式出错 /////////////////////////////////
	if (! update_function)   syntax ();
	// 打开数据文件 ////////////////////////////////////////////////////////////    switch (argc) {	    case 1:	         if (isatty (fileno (stdin))) syntax ();	         fp = stdin;	         infile = "<stdin>";	         break;	    case 2:	         infile = argv [1];	         fp = fopen (infile, "r");			 if (! fp) {
				 errit ("Opening file \"%s\": %s", infile, strerror (errno));
			 }
	         break;	    default:	         syntax ();    }

    //  读取数据条总数量 //////////////////////////////////////////////////////
	getline (fp, 1, infile);
    sscanf (buffer, "%d", &TotalNum);

    //  读取数据条数量 ////////////////////////////////////////////////////////
	getline (fp, 1, infile);	if (sscanf (buffer, "%d", &size) != 1) {	   errit ("file \"%s\", line %i\nTable size expected", infile, input_line);
	}
	//printf(" # Labels_Num = %d \n",size);
	// 读取 Label 名称 && 每个  Label 含有的数据实例 //////////////////////////
	labels = (char **) s_malloc (size * sizeof (char *));
	LDNum = (int *) calloc((size+1),sizeof(int));    for (i = 0; i < size; i++) {        getline (fp, 1, infile);
        labels [i] = s_strdup (buffer);
		//printf("# labels[%d] = %s \n",i,labels [i]);

        getline (fp, 1, infile);
		LDNum[i] = (int)(atoi(&buffer[0]));
		// 读取每个正常度的数据实例数目
    }
    // 读取Label之间的距离值 /////////////////////////////////////////////////    // 分配空间
	diff = (float **) s_malloc ((2 * size - 1) * sizeof (float *));    for (i = 0; i < 2 * size - 1; i++) {        diff [i] = (float *) s_malloc ((2 * size - 1) * sizeof (float));
	}
    // 距离取值矩阵    
	for (i = 0; i < size; i++) {        diff [i][i] = 0.0;        for (j = 0; j < i; j++) {            getline (fp, 1, infile);            if (sscanf (buffer, "%f", &d) != 1) {                errit ("file \"%s\", line %i\nValue expected", infile, input_line);
			}    
			diff [j][i] = diff [i][j] = d;
            //printf("# diff[%d][%d] = %f \n",i,j,diff [i][j]);
		}    }
	// 关闭数据文件 //////////////////////////////////////////////////////////    if (fp != stdin)  fclose (fp);    cl = (CLUSTER *) s_malloc ((2 * size - 1) * sizeof (CLUSTER));    // 每个数据实例一个聚类,记录下各个聚类的情况
	for (i = 0; i < size; i++) {        cl [i].used = 0;        cl [i].n_items = 1;   // ???        cl [i].cl1 = -1;
		cl [i].cl2 = -1;        cl [i].f = 0.0;        cl [i].cluster = i;  /* Label 的下标 */    }
    // 进行聚类分析处理 ///////////////////////////////////////////    for (i = size; i < (2 * size - 1) ; i++) {        cl [i].used = 0;        d = 1000000.00;         
        // 找出所有 used =0 的类别之间的最小距离 diff [j][k] /////
		for (j = 0; j < i; j++) {            if (! cl [j].used) {				for (k = 0; k < j; k++) {	                if ((! cl [k].used) && (diff [j][k] < d)) {                        cl1 = j;                        cl2 = k;                        d = diff [j][k];					}
				}
			}
		}

		cl [i].n_items = cl [cl1].n_items + cl [cl2].n_items;        cl [cl1].used = cl [cl2].used = 1;        cl [i].cl1 = cl1;     /* Cluster 1 的下标值 */        cl [i].cl2 = cl2;     /* Cluster 2 的下标值 */        cl [i].f = d;         /* 距离值 */        cl [i].cluster = i;        update_function (i);  /* i 是当前最后处理聚类的下标 */    }	if (sorted)  SortCluster (2 * size - 2);

	// 数据输出 ///////////////////////////////////////////////////////////////
	printf ("%d\n",TotalNum);

	for (i = size; i < 2 * size - 1; i++) {        printf ("%i %f\n", i, cl [i].f);        /* < size 是具体的Label */
		if (cl [i].cl1 < size) {
			printf ("L %s\n", labels [cl [i].cl1]);
			printf ("%d\n",LDNum[cl[i].cl1]);       // 输出 label 中含有的数据实例数目
		}        /* > size 是一个类 */
		else {
			printf ("C %i\n", cl [i].cl1);
		}        
		if (cl [i].cl2 < size) {
			printf ("L %s\n", labels[cl [i].cl2]);
			printf ("%d\n",LDNum[cl[i].cl2]);       // 输出 label 中含有的数据实例数目
		}        else {
			printf ("C %i\n", cl [i].cl2);
		}    }
    return 0;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -