⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 clique.cc

📁 经典的基于网格和密度的聚类算法。适合处理大规模数据
💻 CC
字号:
// This is my clique program// Author: Weinan Wang#include <iostream.h> // for cin, cout#include <fstream.h>  // for ifstream, ofstream#include <iomanip.h> // for setw#include <stdlib.h>#include <math.h> /* for ceil *//************************************************* * Definition                                    * *************************************************//* maxmum length of the file name */#define MAX_fileName 14/* the threshold to determine which cluster is outlier */#define noiseThreshold 6/************************************************* * Define data structures                        * *************************************************//* each Point represent a spatial point */struct Point {  double x;  double y;  int clusterNO;  int cellx;  int celly;};/* end Point *//* each of a Cell corresponing to a cell */struct Cell {  int qualified; // whether the numebr of points in this cell is more than tau  int numberPoints; // number of points contained in this cell.  int checked; // See if this Cell's neighbours have been checked.   int clusterNO;   // This cell's cluster number.}; /* end Cell *//************************************************** * Declare static functions                       * **************************************************/static int parsingInput(int argc, char *argv[]);static void finish();static int initialize();static int readData();static int cluster();static int retrieve(int numberClusters, int i, int j);static int printResult();/************************************************* * Declare global variables                      * *************************************************//* fileName is the data file's name */char fileName[MAX_fileName + 1];/* numberOfPoints tells the number of data points */int numberOfPoints;/* resolution is used in the first step */float resolution;/* these four floats indicate the range of input data */float xmin, xmax, ymin, ymax;/* This is the threshold for clustering */float tau;int thresholdPoints; // tau% * numberOfPoints/* This is the array of data points */Point * data;/* This two dimentional array is the matrix of cells */Cell ** ptrCells;/* This is the number of rows and columns for the cell matrix */int cellRow, cellColumn;/* This variable count the number of clusters */int numberClusters; /********************************************************************************* * Name    : main                                                                * * Function: main function                                                       * * Input   : argc, argv                                                          * * Output  : int                                                                 * *           The usage of the clique program should                              * *           be of the form:                                                     * *           clique fileName numberOfPoints resolution xmin xmax ymin ymax tau   * *                                                                               * *           Each parameter's meanning is :                                      * *           clique -- program's name                                            * *           string fileName -- indicate the data file                           * *                              name which                                       * *                              contains all the data vectors.                   * *           int numberOfPoints -- the number of data points                     * *           float resolution -- resolution of clustering                        * *           float xmin -- the min value of x                                    * *           float xmax -- the max value of x                                    * *           float ymin -- the min value of y                                    * *           float ymax -- the max value of y                                    * *           float tau -- the threshold for clustering                           * *********************************************************************************/int main( int argc, char *argv[] ){   /* parse the command line */  if (parsingInput(argc, argv)<0)    {      cout << "parsing error!" << endl;      return -1;    }  /* parse the command line */  if (initialize()<0)    {      cout << "initialize error!" << endl;      finish();      return -1;    }  /* read data and assign into cells */  if (readData()<0)    {      cout << "readData error!" << endl;      finish();      return -1;    }  if (cluster()<0)    {      cout << "cluster error!" << endl;      finish();      return -1;    }  if (printResult()<0)    {      cout << "printResult error!" << endl;      finish();      return -1;    }  finish();  return 0;}/* end main *//**************************************************************** * Name    : finish                                             * * Function: operations before finish                           * * Input   :  void                                              * * Output  : void                                               * ****************************************************************/static void finish(){  int i;  free(data);  data = 0; // C++ prefer 0 instead of NULL  for(i=0; i<cellRow; i++){    free(ptrCells[i]);    ptrCells[i]=0;      }/* end for i */  free(ptrCells);  ptrCells=0;  return;}/* end finish *//**************************************************************** * Name    : printResult                                        * * Function: print clustering result                            * * Input   :  void                                              * * Output  : int                                                * ****************************************************************/static int printResult(){  int i, j;  int * clusterCounter;  clusterCounter = (int *)calloc(numberClusters+1, sizeof(int));  for(i=0; i<=numberClusters; i++)    clusterCounter[i]=0;  // This is the output file stream  ofstream clusterFile("clusterResult.m", ios::out);  if(!clusterFile){    cerr << "cannot open clusterResult.m" << endl;    return -1;  }  for(i=0; i<numberOfPoints; i++){    data[i].clusterNO = ptrCells[data[i].celly][data[i].cellx].clusterNO;    clusterCounter[data[i].clusterNO]+=1;  }/* end for */  for(i=0; i<=numberClusters; i++){    if(clusterCounter[i]>0){      clusterFile << "c" << i << "=[" << endl;      for(j=0; j<numberOfPoints; j++){	if(data[j].clusterNO == i){	  clusterFile << data[j].x << "  " << data[j].y << endl;	}/* end if */      }/* end for j */      clusterFile << "];" << endl;      // If this cluster is outlier, then merge it with noise cluster c0.      if(clusterCounter[i]<noiseThreshold){	clusterFile << "c0=[ c" << i << " \n c0];" << endl; 	clusterFile << "clear c" << i <<";" << endl;      }    }/* end if */  }/* end for i */  free(clusterCounter);  return 0;}/* end printResult *//**************************************************************** * Name    : retrieve                                           * * Function: retrieve neighbor of neighbor                      * * Input   :  void                                              * * Output  : int                                                * * note    : this is a recursive function.                      * ****************************************************************/static int retrieve(int numberClusters, int i, int j){  int l, m;  if( ptrCells[i][j].checked != 1 && ptrCells[i][j].clusterNO == 0 ){ /* if this point has not been checked */    if( ptrCells[i][j].qualified > 0 ){      ptrCells[i][j].clusterNO = numberClusters;      ptrCells[i][j].checked = 1;      // check all the neighbors of the LL[i][j]      if(i != 0){	l = i-1;	retrieve(numberClusters, l, j);      }          if( i != (cellRow-1) ){	l = i+1;	retrieve(numberClusters, l, j);      }          if(j != 0){	m = j-1;	retrieve(numberClusters, i, m);      }          if( j != (cellColumn-1) ){	m = j+1;	retrieve(numberClusters, i, m);      }    }    else{ // else it is an outlier, so no need to check its neighbours      ptrCells[i][j].clusterNO = 0; //means that it is a outlier      ptrCells[i][j].checked = 1; // means this cell has been checked    }/* end if...else... */  }/* end if */      return 0;}/* end retrieve *//**************************************************************** * Name    : cluster                                            * * Function: cluster based on ptrCells                          * * Input   :  void                                              * * Output  : int                                                * ****************************************************************/static int cluster(){  int i, j;  for(i=1; i<cellRow; i++){    for(j=1; j<cellColumn; j++){      if(ptrCells[i][j].checked != 1 && ptrCells[i][j].clusterNO == 0){	if(ptrCells[i][j].qualified > 0){	  numberClusters++;	  if(retrieve(numberClusters, i, j)<0)	    {	      cout << "retrieve error! \n";	      return -1;	    }	}	else{	  ptrCells[i][j].clusterNO = 0; //means that it is a outlier	  ptrCells[i][j].checked = 1; // means this cell has been checked	}/* end if...else... */      }/* end if */    }/* end for j */  }/* end for i */  #ifdef Debug_cluster  cout << "ptrCells is : \n";   for(i=1; i<cellRow; i++){    for(j=1; j<cellColumn; j++){      cout << ptrCells[i][j].clusterNO << " ";    }/* end for j */    cout << endl;  }/* end for i */#endif  cout << "total number of effective clusters is " << numberClusters << endl;  return 0;}/* end cluster *//**************************************************************** * Name    : readData                                           * * Function: read in data and assign into cells.                * * Input   : void                                               * * Output  : int                                                * ****************************************************************/static int readData(){  int i, j;  ifstream inFile(fileName, ios::in);  if(!inFile){    cerr << "cannot open data file!" << endl;    return -1;  }/* end if */  // read in data point one by one and arrange each data point into  // corresponding cell.  for (i=0; i<numberOfPoints; i++){    inFile >> data[i].x >> data[i].y;    //    cout << setw(15) << data[i].x << setw(15) << data[i].y << endl;    data[i].cellx = (int)((data[i].x - xmin)/resolution);    data[i].celly = (int)((data[i].y - ymin)/resolution);    //    cout << "cellx=" << data[i].cellx << "celly=" << data[i].celly << endl;    (ptrCells[data[i].celly][data[i].cellx].numberPoints)++;  }/* end for */  for(i=0; i<cellRow; i++){    for(j=0; j<cellColumn; j++){      if(ptrCells[i][j].numberPoints >= thresholdPoints)	ptrCells[i][j].qualified = 1;    }/* end for j */  }/* end for i */#ifdef Debug_readData  for(i=0; i<cellRow; i++){    for(j=0; j<cellColumn; j++){      cout << ptrCells[i][j].numberPoints << " ";    }/* end for j */    cout << "\n";  }/* end for i */#endif  return 0;}/* end readData *//**************************************************************** * Name    : initialize                                         * * Function: initialize data                                    * * Input   :  void                                              * * Output  : int                                                * ****************************************************************/static int initialize() {  int i, j;  numberClusters = 0;   thresholdPoints = (int)(numberOfPoints * tau/100.0);  cout << "thresholdPoints = " << thresholdPoints << endl;  // The use of ceil() is necessary  cellColumn = (int)ceil((xmax-xmin)/resolution);  cellRow = (int)ceil((ymax-ymin)/resolution);   cout << "cellRow is " << cellRow << " cellColumn is " << cellColumn << endl;  ptrCells = (Cell **)calloc(cellRow, sizeof(Cell *));  if(ptrCells == NULL)    {      cout << "cannot allocate memory for cells";      return -1;    }  for(i=0; i<cellRow; i++){    ptrCells[i] = (Cell *)calloc(cellColumn, sizeof(Cell));    if(ptrCells[i] == NULL)    {      cout << "cannot allocate memory for cells[" << i << "]" << endl;      return -1;    }/* end if */  }/* end for */  for(i=0; i<cellRow; i++){    for(j=0; j<cellColumn; j++){      ptrCells[i][j].qualified = 0;      ptrCells[i][j].checked = 0;      ptrCells[i][j].numberPoints=0;      ptrCells[i][j].clusterNO = 0;    }/* end for j */  }/* end for i */  data = (Point *)calloc(numberOfPoints, sizeof(Point));  if(data == NULL)    {      printf("cannot allocate memory for data! \n");      return -1;    }  return 0;}/* end initialize *//**************************************************************** * Name    : parsingInput                                       * * Function: parsing input command line                         * * Input   : argc, argv                                         * * Output  : void                                               * ****************************************************************/static int parsingInput(int argc, char *argv[]) {  if(argc == 9)    {      if ((int)strlen(argv[1]) >= MAX_fileName)	{	  printf("the name of the file is too long. "		 "It should be within %d characters \n", 		 MAX_fileName);	  return -1;	}      else	{	  sscanf(argv[1], "%s", fileName);	}      sscanf(argv[2], "%d", &numberOfPoints);      sscanf(argv[3], "%f", &resolution);      sscanf(argv[4], "%f", &xmin);      sscanf(argv[5], "%f", &xmax);      sscanf(argv[6], "%f", &ymin);      sscanf(argv[7], "%f", &ymax);      sscanf(argv[8], "%f", &tau);    } /* end if */  else    {      cout << "argument error \n";      cout << "Usage of the program should be of the form: \n";      cout << "clique fileName numberOfPoints resolution xmin xmax ymin ymax tau \n"  << endl;      return -1;    }/* end else */#ifdef Debug_parsingInput  cout << "Input data is: \n";  cout << "fileName = " << fileName << "\n";  cout << "numberOfPoints = " << numberOfPoints << "\n";  cout << "resolution = " << resolution << endl;  cout << " xmin = " << xmin;  cout << "\n xmax = " << xmax;  cout << "\n ymin = " << ymin;  cout << "\n ymax = " << ymax << endl;  cout <<  " tau = " << tau << endl;#endif  return 0;}/* end parsingInput */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -