⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 kmean.c

📁 k-means 算法的工作过程说明如下:首先从n个数据对象任意选择 k 个对象作为初始聚类中心;而对于所剩下其它对象
💻 C
字号:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define K 2
#define D 12
#define N 113
#define LABEL_SIZE 30

double records[N][D];
char labels[N][LABEL_SIZE];

const double init_centers[K][D] = {
	{ 10.4923636	,
53.4163636	,
20.5090909,
87.7545455	,
306.8181818	,
1.7636364	,
240.4727273	,
215.6181818	,
47.1672727	,
0.2545455	,
0.2727273	,
0.1454545	} ,
 {8.847931	,
53.0568966	,
11.3206897	,
75.8189655	,
200.3448276	,
1.9310345	,
144.8103448	,
133.0689655	,
39.3586207	,
0.3103448	,
0.3793103	,
0.137931}	
}; 
double centers[K][D];
int member[N];

char all_labels[K][LABEL_SIZE];

void input_data()
{
	int i, j, found;
	FILE * file;
	char ch;
	int label_len = 0;

	file = fopen("iris.txt", "r");
    for (i = 0; i < N; ++i) {
		for (j = 0; j < D; ++j)
			fscanf(file, "%lf,", &(records[i][j]));
		fscanf(file, "%s", labels[i]);
//	printf("%s\n",labels[i]);
		found = 0;
		for (j = 0; j < label_len; ++j)
			if (strcmp(labels[i], all_labels[j]) == 0)
				found = 1;
		if (!found) {
			strcpy(all_labels[label_len], labels[i]);
//	printf("%s\n",all_labels[label_len]);
			++label_len;
		}
	}
	close(file);
}

double distance(double r1[D], double r2[D])
{
	double result = 0;
	int i;
	for (i = 0; i < D; ++i)
		result += (r1[i] - r2[i]) * (r1[i] - r2[i]);
	return result;
}

void center_to_member()
{
	int i, j;
	for (i = 0; i < N; ++i) 
       {
		double min_dist = 1e50;
		int min_j = -1;
		for (j = 0; j < K; ++j)
                {
			double curr_dist = distance(records[i], init_centers[j]);
			if (curr_dist < min_dist) 
            {
				min_dist = curr_dist;
				min_j = j;
			}
		}
		member[i] = min_j;
	}
}

void member_to_center()
{
	int i, j, p, num;
	for (i = 0; i < K; ++i)
		for (j = 0; j < D; ++j)
                {
			centers[i][j] = 0;
			num = 0;
			for (p = 0; p < N; ++p)
				if (member[p] == i)
                                {
					centers[i][j] += records[p][j];
					++num;
				}
			centers[i][j] = centers[i][j] / num;
		}
}

double distance_sum()
{
	int i, j;
	double result = 0;
	for (i = 0; i < N; ++i)
		result += distance(records[i], centers[member[i]]);
	return result;
}

void clustering()
{
	int i, j;
	double last_dist_sum, curr_dist_sum;

	for (i = 0; i < K; ++i)
		for (j = 0; j < D; ++j)
			centers[i][j] = init_centers[i][j];

	curr_dist_sum = 1e50;
	do {
		last_dist_sum = curr_dist_sum;
		center_to_member();
		member_to_center();
		curr_dist_sum = distance_sum();
		printf("curr dist sum = %lf\n", curr_dist_sum);
	} while (curr_dist_sum < last_dist_sum);
}

void print_result()
{
	int i, j, num, p;
	int label_count[K];
	FILE * file = fopen("hw4-2-output.txt", "w");

	for (i = 0; i < K; ++i)
		 {
		for (j = 0; j < K; ++j)
			label_count[j] = 0;

		/* number of points */
		num = 0;
		for (j = 0; j < N; ++j)
			if (member[j] == i)
			 {
				++num;
				for (p = 0; p < K; ++p)
					if (strcmp(labels[j], all_labels[p])==0)
						++label_count[p];
		       	}
		fprintf(file, "There are %d points in cluster %d\n", num, i);

		/* counts of labels */
		for (j = 0; j < K; ++j) 
		{
			fprintf(file, "%d [%s]  ", label_count[j], all_labels[j]); 
		}
		fprintf(file, "\n");
		
		/* cluster center */
		fprintf(file, "Cluster center =");
		for (j = 0; j < D; ++j)
			fprintf(file, " %lf", centers[i][j]);
		fprintf(file, "\n\n");
	}

	for (i = 0; i < N; ++i)
	 {
		fprintf(file, "%s  %d\n", labels[i], member[i]);
	}

	fclose(file);
}

int main()
{
	input_data();
	clustering();
	print_result();
	return 0;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -