📄 clique.cc
字号:
// This is my clique program// Author: Weinan Wang#include <iostream.h> // for cin, cout#include <fstream.h> // for ifstream, ofstream#include <iomanip.h> // for setw#include <stdlib.h>#include <math.h> /* for ceil *//************************************************* * Definition * *************************************************//* maxmum length of the file name */#define MAX_fileName 14/* the threshold to determine which cluster is outlier */#define noiseThreshold 6/************************************************* * Define data structures * *************************************************//* each Point represent a spatial point */struct Point { double x; double y; int clusterNO; int cellx; int celly;};/* end Point *//* each of a Cell corresponing to a cell */struct Cell { int qualified; // whether the numebr of points in this cell is more than tau int numberPoints; // number of points contained in this cell. int checked; // See if this Cell's neighbours have been checked. int clusterNO; // This cell's cluster number.}; /* end Cell *//************************************************** * Declare static functions * **************************************************/static int parsingInput(int argc, char *argv[]);static void finish();static int initialize();static int readData();static int cluster();static int retrieve(int numberClusters, int i, int j);static int printResult();/************************************************* * Declare global variables * *************************************************//* fileName is the data file's name */char fileName[MAX_fileName + 1];/* numberOfPoints tells the number of data points */int numberOfPoints;/* resolution is used in the first step */float resolution;/* these four floats indicate the range of input data */float xmin, xmax, ymin, ymax;/* This is the threshold for clustering */float tau;int thresholdPoints; // tau% * numberOfPoints/* This is the array of data points */Point * data;/* This two dimentional array is the matrix of cells */Cell ** ptrCells;/* This is the number of rows and columns for the cell matrix */int cellRow, cellColumn;/* This variable count the number of clusters */int numberClusters; /********************************************************************************* * Name : main * * Function: main function * * Input : argc, argv * * Output : int * * The usage of the clique program should * * be of the form: * * clique fileName numberOfPoints resolution xmin xmax ymin ymax tau * * * * Each parameter's meanning is : * * clique -- program's name * * string fileName -- indicate the data file * * name which * * contains all the data vectors. * * int numberOfPoints -- the number of data points * * float resolution -- resolution of clustering * * float xmin -- the min value of x * * float xmax -- the max value of x * * float ymin -- the min value of y * * float ymax -- the max value of y * * float tau -- the threshold for clustering * *********************************************************************************/int main( int argc, char *argv[] ){ /* parse the command line */ if (parsingInput(argc, argv)<0) { cout << "parsing error!" << endl; return -1; } /* parse the command line */ if (initialize()<0) { cout << "initialize error!" << endl; finish(); return -1; } /* read data and assign into cells */ if (readData()<0) { cout << "readData error!" << endl; finish(); return -1; } if (cluster()<0) { cout << "cluster error!" << endl; finish(); return -1; } if (printResult()<0) { cout << "printResult error!" << endl; finish(); return -1; } finish(); return 0;}/* end main *//**************************************************************** * Name : finish * * Function: operations before finish * * Input : void * * Output : void * ****************************************************************/static void finish(){ int i; free(data); data = 0; // C++ prefer 0 instead of NULL for(i=0; i<cellRow; i++){ free(ptrCells[i]); ptrCells[i]=0; }/* end for i */ free(ptrCells); ptrCells=0; return;}/* end finish *//**************************************************************** * Name : printResult * * Function: print clustering result * * Input : void * * Output : int * ****************************************************************/static int printResult(){ int i, j; int * clusterCounter; clusterCounter = (int *)calloc(numberClusters+1, sizeof(int)); for(i=0; i<=numberClusters; i++) clusterCounter[i]=0; // This is the output file stream ofstream clusterFile("clusterResult.m", ios::out); if(!clusterFile){ cerr << "cannot open clusterResult.m" << endl; return -1; } for(i=0; i<numberOfPoints; i++){ data[i].clusterNO = ptrCells[data[i].celly][data[i].cellx].clusterNO; clusterCounter[data[i].clusterNO]+=1; }/* end for */ for(i=0; i<=numberClusters; i++){ if(clusterCounter[i]>0){ clusterFile << "c" << i << "=[" << endl; for(j=0; j<numberOfPoints; j++){ if(data[j].clusterNO == i){ clusterFile << data[j].x << " " << data[j].y << endl; }/* end if */ }/* end for j */ clusterFile << "];" << endl; // If this cluster is outlier, then merge it with noise cluster c0. if(clusterCounter[i]<noiseThreshold){ clusterFile << "c0=[ c" << i << " \n c0];" << endl; clusterFile << "clear c" << i <<";" << endl; } }/* end if */ }/* end for i */ free(clusterCounter); return 0;}/* end printResult *//**************************************************************** * Name : retrieve * * Function: retrieve neighbor of neighbor * * Input : void * * Output : int * * note : this is a recursive function. * ****************************************************************/static int retrieve(int numberClusters, int i, int j){ int l, m; if( ptrCells[i][j].checked != 1 && ptrCells[i][j].clusterNO == 0 ){ /* if this point has not been checked */ if( ptrCells[i][j].qualified > 0 ){ ptrCells[i][j].clusterNO = numberClusters; ptrCells[i][j].checked = 1; // check all the neighbors of the LL[i][j] if(i != 0){ l = i-1; retrieve(numberClusters, l, j); } if( i != (cellRow-1) ){ l = i+1; retrieve(numberClusters, l, j); } if(j != 0){ m = j-1; retrieve(numberClusters, i, m); } if( j != (cellColumn-1) ){ m = j+1; retrieve(numberClusters, i, m); } } else{ // else it is an outlier, so no need to check its neighbours ptrCells[i][j].clusterNO = 0; //means that it is a outlier ptrCells[i][j].checked = 1; // means this cell has been checked }/* end if...else... */ }/* end if */ return 0;}/* end retrieve *//**************************************************************** * Name : cluster * * Function: cluster based on ptrCells * * Input : void * * Output : int * ****************************************************************/static int cluster(){ int i, j; for(i=1; i<cellRow; i++){ for(j=1; j<cellColumn; j++){ if(ptrCells[i][j].checked != 1 && ptrCells[i][j].clusterNO == 0){ if(ptrCells[i][j].qualified > 0){ numberClusters++; if(retrieve(numberClusters, i, j)<0) { cout << "retrieve error! \n"; return -1; } } else{ ptrCells[i][j].clusterNO = 0; //means that it is a outlier ptrCells[i][j].checked = 1; // means this cell has been checked }/* end if...else... */ }/* end if */ }/* end for j */ }/* end for i */ #ifdef Debug_cluster cout << "ptrCells is : \n"; for(i=1; i<cellRow; i++){ for(j=1; j<cellColumn; j++){ cout << ptrCells[i][j].clusterNO << " "; }/* end for j */ cout << endl; }/* end for i */#endif cout << "total number of effective clusters is " << numberClusters << endl; return 0;}/* end cluster *//**************************************************************** * Name : readData * * Function: read in data and assign into cells. * * Input : void * * Output : int * ****************************************************************/static int readData(){ int i, j; ifstream inFile(fileName, ios::in); if(!inFile){ cerr << "cannot open data file!" << endl; return -1; }/* end if */ // read in data point one by one and arrange each data point into // corresponding cell. for (i=0; i<numberOfPoints; i++){ inFile >> data[i].x >> data[i].y; // cout << setw(15) << data[i].x << setw(15) << data[i].y << endl; data[i].cellx = (int)((data[i].x - xmin)/resolution); data[i].celly = (int)((data[i].y - ymin)/resolution); // cout << "cellx=" << data[i].cellx << "celly=" << data[i].celly << endl; (ptrCells[data[i].celly][data[i].cellx].numberPoints)++; }/* end for */ for(i=0; i<cellRow; i++){ for(j=0; j<cellColumn; j++){ if(ptrCells[i][j].numberPoints >= thresholdPoints) ptrCells[i][j].qualified = 1; }/* end for j */ }/* end for i */#ifdef Debug_readData for(i=0; i<cellRow; i++){ for(j=0; j<cellColumn; j++){ cout << ptrCells[i][j].numberPoints << " "; }/* end for j */ cout << "\n"; }/* end for i */#endif return 0;}/* end readData *//**************************************************************** * Name : initialize * * Function: initialize data * * Input : void * * Output : int * ****************************************************************/static int initialize() { int i, j; numberClusters = 0; thresholdPoints = (int)(numberOfPoints * tau/100.0); cout << "thresholdPoints = " << thresholdPoints << endl; // The use of ceil() is necessary cellColumn = (int)ceil((xmax-xmin)/resolution); cellRow = (int)ceil((ymax-ymin)/resolution); cout << "cellRow is " << cellRow << " cellColumn is " << cellColumn << endl; ptrCells = (Cell **)calloc(cellRow, sizeof(Cell *)); if(ptrCells == NULL) { cout << "cannot allocate memory for cells"; return -1; } for(i=0; i<cellRow; i++){ ptrCells[i] = (Cell *)calloc(cellColumn, sizeof(Cell)); if(ptrCells[i] == NULL) { cout << "cannot allocate memory for cells[" << i << "]" << endl; return -1; }/* end if */ }/* end for */ for(i=0; i<cellRow; i++){ for(j=0; j<cellColumn; j++){ ptrCells[i][j].qualified = 0; ptrCells[i][j].checked = 0; ptrCells[i][j].numberPoints=0; ptrCells[i][j].clusterNO = 0; }/* end for j */ }/* end for i */ data = (Point *)calloc(numberOfPoints, sizeof(Point)); if(data == NULL) { printf("cannot allocate memory for data! \n"); return -1; } return 0;}/* end initialize *//**************************************************************** * Name : parsingInput * * Function: parsing input command line * * Input : argc, argv * * Output : void * ****************************************************************/static int parsingInput(int argc, char *argv[]) { if(argc == 9) { if ((int)strlen(argv[1]) >= MAX_fileName) { printf("the name of the file is too long. " "It should be within %d characters \n", MAX_fileName); return -1; } else { sscanf(argv[1], "%s", fileName); } sscanf(argv[2], "%d", &numberOfPoints); sscanf(argv[3], "%f", &resolution); sscanf(argv[4], "%f", &xmin); sscanf(argv[5], "%f", &xmax); sscanf(argv[6], "%f", &ymin); sscanf(argv[7], "%f", &ymax); sscanf(argv[8], "%f", &tau); } /* end if */ else { cout << "argument error \n"; cout << "Usage of the program should be of the form: \n"; cout << "clique fileName numberOfPoints resolution xmin xmax ymin ymax tau \n" << endl; return -1; }/* end else */#ifdef Debug_parsingInput cout << "Input data is: \n"; cout << "fileName = " << fileName << "\n"; cout << "numberOfPoints = " << numberOfPoints << "\n"; cout << "resolution = " << resolution << endl; cout << " xmin = " << xmin; cout << "\n xmax = " << xmax; cout << "\n ymin = " << ymin; cout << "\n ymax = " << ymax << endl; cout << " tau = " << tau << endl;#endif return 0;}/* end parsingInput */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -