⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 databin.h

📁 C++蚂蚁实现/C++蚂蚁实现 C++蚂蚁实现
💻 H
字号:
/*  Ant-based Clustering    Copyright (C) 2004 Julia Handl    Email: Julia.Handl@gmx.de    This program is free software; you can redistribute it and/or modify    it under the terms of the GNU General Public License as published by    the Free Software Foundation; either version 2 of the License, or    (at your option) any later version.    This program is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the    GNU General Public License for more details.    You should have received a copy of the GNU General Public License    along with this program; if not, write to the Free Software    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*//***************************************************date: 7.4.2003author: Julia Handl (julia.Handl@gmx.de)description: - wrapper class for data- data input- data normalisation- precomputation of dissimilarity matrix***************************************************/#ifndef DATABIN_JH_2003#define DATABIN_JH_2003#include "conf.h"#include "tmatrix.h"#include "databin.h"#include <fstream>#include "testset.h"#include "math.h"#include "random.h"template <class BINTYPE> class databin;template <class DBINTYPE> class docbin;/***************************************************       class data***************************************************/#define NRANSI#define MAXBIT 30#define MAXDIM 6static int iminarg1,iminarg2;#define IMIN(a,b) (iminarg1=(a),iminarg2=(b),(iminarg1) < (iminarg2) ? (iminarg1) : (iminarg2))template <class DATATYPE>class data {     friend class databin<DATATYPE>; private:    DATATYPE * vector;  protected:    conf * par; public:    int color;    int cluster;    public:    ~data();    /* Constructor for a data item that is initialized to the vector <d> and assigned  cluster number <cluster> and color <color> */    data(conf * c, DATATYPE * d, int color, int cluster);    /* Default constructor */    data(conf * c);  /* Constructor for a data item that is initialized to the vector <d> */    data(conf * c, DATATYPE * d);    /* Lenght of the data vector */    const int length();    DATATYPE square(DATATYPE x);    /* Write-Read access to individual components */    DATATYPE &operator[](const int i);    /* Distance computation between two data items */    const DATATYPE distanceto(data<DATATYPE> & d);    /* Addition of two data vectors */    void add(data<DATATYPE> & d);    /* Division of a data vector by <i> */    void div(int i);    /* Data vector is set to <d> */    void set(data<DATATYPE> & d);  /* Data vector is set to <d> */    void set(DATATYPE * d);    friend ostream & operator<< <DATATYPE>(ostream &, data<DATATYPE> &);};/***************************************************       class databin***************************************************/template <class BINTYPE>class databin { protected:    /* pointer to current parameter settings */    conf * par;     /* array of pointer to data objects */     data<BINTYPE> ** bin;    public:    /* precomputed disimilarity matrix */    tmatrix<BINTYPE> * distancematrix;    /* array of maxvalue for each attribute */    BINTYPE * maxvalue;    BINTYPE * minvalue;    BINTYPE * mean;    BINTYPE * std;     public:  /* Constructor, hard-coded data collection as described by <name> */  databin(conf * c, char * name);  /* Destructor */  ~databin();  /* Distance computation between two data items in the collection */  const BINTYPE d(const int index1, const int index2);  /* Direct access to precomputed dissimilarity matrix */  const BINTYPE precomputed_d(const int index1, const int index2);  /* Write-Read access to an individual data item in the collection */  data<BINTYPE> & operator[](const int i);  /* Data output */  void dataoutput(char * filename);  void distanceoutput(char * name, double mu);  /* Check whether this class label already exists */  int find(char * templabel, char ** classlabel, int & labelctr);  /* Generate new samples from the Normal Distribution (for artificial data) */  void regenerate(char * name);  /* Permute order of data items */  void permutate();};/***************************************************       class data - function definitions***************************************************/// access to vector lengthtemplate <class DATATYPE>const int data <DATATYPE>::length() {    return par->bindim;} // default constructortemplate <class DATATYPE>data <DATATYPE>::data(conf * c)  {    par = c;    vector = new DATATYPE[par->bindim];    color = 0;    cluster = 0;        for (int i=0; i<par->bindim; i++) {	vector[i] = 0;    }} // constructor if data vector is providedtemplate <class DATATYPE>data <DATATYPE>:: data(conf * c, DATATYPE * dat) {    par = c;    vector = new DATATYPE[par->bindim];    color = 0;    cluster = 0;          for (int i=0; i<par->bindim; i++) {	vector[i] = dat[i];    }} // constructor if data vector is providedtemplate <class DATATYPE>data <DATATYPE>:: data(conf * c, DATATYPE * dat, int col, int cl ) {    par = c;    vector = new DATATYPE[par->bindim];    color = col;    cluster = cl;          for (int i=0; i<par->bindim; i++) {	vector[i] = dat[i];    }}// destructortemplate <class DATATYPE>data <DATATYPE>::~data() {    delete [] vector;}// square functiontemplate <class DATATYPE> DATATYPE data <DATATYPE>::square(DATATYPE x) {    return x*x;}// write-read access to coordinates of dat vector template <class DATATYPE> DATATYPE &data <DATATYPE>::operator[](const int i) {    return vector[i];}// distance function defined between data vectors   template <class DATATYPE>const DATATYPE data <DATATYPE>::distanceto(data<DATATYPE> & dd) {      data & d = (data &)dd;    DATATYPE result = 0.0;    for (int i=0; i<par->bindim; i++) {	result += square(d.vector[i] - vector[i]);    }        return sqrt(result);}// addition of data vectors   template <class DATATYPE>void data <DATATYPE>::add(data<DATATYPE> & d) {    for (int i=0; i<par->bindim; i++) {	vector[i] += d.vector[i];    } }template <class DATATYPE> void data<DATATYPE>::set(data<DATATYPE> & d) {      for (int i=0; i<par->bindim; i++) {	  vector[i] = d.vector[i];      }}template <class DATATYPE> void data<DATATYPE>::set(DATATYPE * d) {      for (int i=0; i<par->bindim; i++) {	  vector[i] = d[i];      }}// division of a data vector by an integer   template <class DATATYPE>void data <DATATYPE>::div(int divisor) {    for (int i=0; i<par->bindim; i++) {	vector[i] /= double(divisor);    }}/***************************************************       class databin - function definitions***************************************************/// destructortemplate <class BINTYPE>databin <BINTYPE>::~databin() {#ifndef RANDDATA    for (int i=0; i<par->binsize; i++) {	delete bin[i];    }    delete [] bin;    delete [] maxvalue;#endif        delete distancematrix; //    cout << "Databin Destructor" << endl;}template <class BINTYPE> const BINTYPE databin <BINTYPE>::precomputed_d(const int index1, const int index2) {    return (*distancematrix)(index1,index2);}// distance function between the bin's data itemstemplate <class BINTYPE>inline const BINTYPE databin <BINTYPE>::d(const int index1, const int index2) {    return bin[index1]->distanceto(*bin[index2]);}// write-read access to data itemstemplate <class BINTYPE>inline data<BINTYPE> & databin <BINTYPE>::operator[](const int i) {    return *bin[i];} template <class BINTYPE> int databin <BINTYPE>::find(char * templabel, char ** classlabel, int & labelctr) {    for (int i=0; i<labelctr; i++) {	if (strcmp(classlabel[i],templabel) == 0) {	    return i;	}    }    strcpy(classlabel[labelctr], templabel);    labelctr++;    cout << "New Label: " << templabel << endl;    return labelctr-1;}   // constructor for class databin if a hard-coded test set (identified by <name>) is usedtemplate <class BINTYPE>databin <BINTYPE>::databin(conf * c, char * name) {        par = c;    testset t(name, c);    t.generate();        par->bindim = 2;    par->imax = (int)sqrt(double(par->binsize * 10));    par->jmax = par->imax;    par->maxspeed = int(sqrt(2.0*0.5*par->imax*0.5*par->imax));    par->generations = max(25,int(double(par->binsize) /20));    par->kclusters = par->num_cluster;#ifdef CLUSTERING    par->imax_som = 1;    par->jmax_som = par->kclusters;#endif#ifdef TOPMAPPING    par->imax_som = (int)sqrt(par->binsize);    par->jmax_som = (int)sqrt(par->binsize);#endif    maxvalue = new USED_DATA_TYPE[par->bindim];    minvalue = new USED_DATA_TYPE[par->bindim];    mean = new USED_DATA_TYPE[par->bindim];    std = new USED_DATA_TYPE[par->bindim];        for (int j=0; j<par->bindim; j++) {	mean[j] = 0.0;	for (int i=0; i<par->binsize; i++) {	    mean[j] += t.point_coordinates[i][j];	}    }     for (int j=0; j<par->bindim; j++) {	mean[j] /= double(par->binsize);    }    // compute standard deviation    for (int j=0; j<par->bindim; j++) {	std[j] = 0.0;	for (int i=0; i<par->binsize; i++) {	    double diff = t.point_coordinates[i][j]-mean[j];	    std[j] += diff*diff;	}	std[j] /= par->binsize;	std[j] = sqrt(std[j]);    }    for (int j=0; j<par->bindim; j++) {	for (int i=0; i<par->binsize; i++) {	  	  t.point_coordinates[i][j] -= mean[j];	  	  t.point_coordinates[i][j] /= std[j];	}    }        bin = new data<USED_DATA_TYPE>*[par->binsize];    if (bin == NULL) {	cerr << "Databin: Memory allocation failed" << endl;	exit(0);    }    int ctr = 0;    int color = 0;    for (int k=0; k<par->bindim; k++) {	maxvalue[k] = -100000000.0;	minvalue[k] = 100000000.0;    }    for (int i=0; i<par->num_cluster; i++) {	for (int j=0; j<par->size_cluster[i]; j++) { 	    bin[ctr] = new data<USED_DATA_TYPE>(par, t.point_coordinates[ctr],color+1, color);	    if (bin[ctr] == NULL) {		cerr << "Databin: Memory allocation failed" << endl;		exit(0);	    }	    for (int k=0; k<par->bindim; k++) {		maxvalue[k] = max(maxvalue[k], (*(bin[ctr]))[k]);		minvalue[k] = min(minvalue[k], (*(bin[ctr]))[k]);			    }	   	    ctr++;	}	color++;    }        distancematrix = new tmatrix<BINTYPE>(par->binsize);    if (distancematrix == NULL) {	cerr << "Databin: Memory allocation failed" << endl;	exit(0);    }        // print the original document positions to a gnuplot file     dataoutput("initialdata.dat");           // compute distances and mean    par->mu = 0.0;    par->max = 0.0;    for (int i=0; i<par->binsize; i++) {	for (int j=0; j<i; j++) {	    (*distancematrix)(i,j) = bin[i]->distanceto(*(bin[j]));	    par->mu += (*distancematrix)(i,j);	    par->max = max(par->max, (*distancematrix)(i,j));	}    }    par->mu /= 0.5*(par->binsize-1)*par->binsize;    // normalize data    for (int i=0; i<par->binsize; i++) {      for (int j=0; j<i; j++) {	(*distancematrix)(i,j) = (*distancematrix)(i,j) / par->max;      }    }    par->max /= par->max;    par->mu /= par->max;  }template <class BINTYPE> void databin <BINTYPE>::permutate() {        BINTYPE tempval;    long idum = rand();    for (int i=0; i<par->binsize; i++) {	int j = int(ran0(&idum)*(par->binsize));	data<BINTYPE> * temp = bin[i];	bin[i] = bin[j];	bin[j] = temp;	for (int k=0; k<par->binsize; k++) {	    if ((k != i)  && (k != j)) {		tempval = (*distancematrix)(i,k);		(*distancematrix)(i,k) = (*distancematrix)(j,k);		(*distancematrix)(j,k) = tempval;	    }	  	}    }     }template <class BINTYPE> void databin <BINTYPE>::regenerate(char * name) {            testset t(name, par);    t.generate();            for (int j=0; j<par->bindim; j++) {	mean[j] = 0.0;	for (int i=0; i<par->binsize; i++) {	    mean[j] += t.point_coordinates[i][j];	}    }     for (int j=0; j<par->bindim; j++) {	mean[j] /= par->binsize;    }    // compute standard deviation    for (int j=0; j<par->bindim; j++) {	std[j] = 0.0;	for (int i=0; i<par->binsize; i++) {	    double diff = t.point_coordinates[i][j]-mean[j];	    std[j] += diff*diff;	}	std[j] /= par->binsize;	std[j] = sqrt(std[j]);    }	            int ctr = 0;    int color = 0;    for (int k=0; k<par->bindim; k++) {	maxvalue[k] = 0.0;    }    for (int i=0; i<par->num_cluster; i++) {	for (int j=0; j<par->binsize/par->num_cluster; j++) { 	    bin[ctr] = new data<USED_DATA_TYPE>(par, t.point_coordinates[ctr],color+1, color);	  	    for (int k=0; k<par->bindim; k++) {		maxvalue[k] = max(maxvalue[k], abs((*(bin[ctr]))[k]));	    }	    ctr++;	}	color++;    }               // compute distances and mean    par->mu = 0.0;    par->max = 0.0;    for (int i=0; i<par->binsize; i++) {	for (int j=0; j<i; j++) {	    (*distancematrix)(i,j) = bin[i]->distanceto(*(bin[j]));	    par->mu += (*distancematrix)(i,j);	    par->max = max(par->max, (*distancematrix)(i,j));	}    }    par->mu /= 0.5*(par->binsize-1)*par->binsize;    // normalize data    for (int i=0; i<par->binsize; i++) {	for (int j=0; j<i; j++) {	    (*distancematrix)(i,j) = (*distancematrix)(i,j) / par->max;	}    }    par->mu /= par->max;    }template <class BINTYPE> void databin <BINTYPE>::dataoutput(char * name) {    ofstream datastream(name);    for (int i=0; i<par->binsize; i++) {	for (int j=0; j<par->bindim; j++) {	    datastream << (*(bin[i]))[j] << " ";	}	datastream << (*(bin[i])).cluster << endl;    }}template <class BINTYPE> void databin <BINTYPE>::distanceoutput(char * name, double mu) {    int size = 100;    double step = par->max / mu / double(size);    int histo[size];    for (int i=0; i<size; i++) histo[i] = 0;    ofstream datastream(name);    for (int i=0; i<par->binsize; i++) {	for (int j=0; j<i; j++) {	    histo[int((*distancematrix)(i,j) / mu / step) ]++;	}    }    for (int i=0; i<size; i++) {	datastream << double(i)*step  << " " << histo[i] << endl;    }}    template <class DATATYPE>ostream & operator<<(ostream & s, data<DATATYPE> & d) {     for (int i=0; i<d.length(); i++) {	s.precision(3);	s << d.vector[i] << " ";    }    s << endl;    return s;}#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -