📄 k_means.cpp
字号:
#include <ctime>
#include <vector>
#include "head.h"
#include <math.h>
#include <iostream>
using namespace std;
typedef vector<double> instance;
double distance( instance& a, instance & b) {
int i;
double result=0;
for ( i=0; i< a.size(); i++ ) {
result+= (a[i] - b[i]) * (a[i]-b[i]);
}
result=sqrt(result);
return result;
}
instance calculate_mean(int index, //cluster id
vector <instance> & v_list,
vector<int> & membership) {
int i,j;
int count=0;
instance tmp(v_list[0].size(),0);
for(i=0; i<v_list.size(); i++ ) {
if (membership[i]!= index)
continue;
count++;
for( j=0;j<tmp.size(); j++ ){
tmp[j]+= v_list[i][j];
}
}
for(i=0; i<tmp.size(); i++ ){
tmp[i]/=count;
}
return tmp;
}
void k_means(int num_k,
vector<instance> & v_list, vector<cluster> & k_cluster)
{
vector <instance> k_means;
vector <int> membership(v_list.size(), 0); //initiated to 0;
int i, j, k;
//*************************************
/* cout<<"test \n";
for(i=0; i<v_list.size(); i++) {
for(j=0; j<v_list[0].size(); j++) {
cout<<v_list[i][j]<<" ";
}
cout<<endl;
}
*/
//*************************************
instance tmp;
//random choose k different object; //store in k_means;
srand((unsigned)time(NULL));
k_means.push_back(v_list[ rand() % v_list.size() ] );
bool flag;
for (i=1; i<num_k; i++ ) {
flag=true;
while (flag) {
flag=false;
tmp= v_list[ rand() % v_list.size() ];
for (j=0; j<k_means.size(); j++ ) {
if (tmp == k_means[j] ){
flag=true;
break;
}
}
}
k_means.push_back(tmp);
}
bool change_member= true;
int index,n_iteration=1;
double min;
double t_distance;
while (change_member) {
change_member=false;
//int n_iteration=1;
cout<<"iteration: "<<n_iteration++<<endl;
for( i=0; i< v_list.size(); i++) { //clustering
min=distance(v_list[i],k_means[0]);
index=0;
for (k=1; k< num_k; k++ ) {
if ((t_distance=distance(v_list[i], k_means[k])) < min) {
min=t_distance;
index=k;
}
}
if (index!=membership[i]) {
change_member=true;
membership[i]=index;
}
}//for
for(i=0;i<num_k; i++) {
k_means[i]= calculate_mean(i, v_list, membership);
}
}//while
//cluster tmp_cluster;
k_cluster.assign(num_k);
for(i=0; i<num_k; i++) { //initiate k_cluster
k_cluster[i].n=0;
k_cluster[i].f1.assign(v_list[0].size(),0);
k_cluster[i].f2.assign(v_list[0].size(),0);
}
int cluster_id;
for(i=0; i< v_list.size(); i++) {
cluster_id=membership[i];
for(j=0; j<v_list[0].size(); j++) {
(k_cluster[cluster_id].f2) [j] += v_list[i][j] * v_list[i][j];
(k_cluster[cluster_id].f1) [j] += v_list[i][j];
//cout<<(k_cluster[cluster_id].f2) [j]<<endl;
}
k_cluster[cluster_id].n++;
//cout<<k_cluster[cluster_id].n<<endl;
}
//**************************************
cout<<"output the size of k clusters\n";
for(i=0; i<k_cluster.size(); i++) {
cout<<k_cluster[i].n<<endl;
}
//***********************************
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -