📄 syntheticdata.cpp
字号:
/* Scalable K-means clustering softwareCopyright (C) 2000 Fredrik Farnstrom and James LewisThis program is free software; you can redistribute it and/ormodify it under the terms of the GNU General Public Licenseas published by the Free Software Foundation; either version 2of the License, or (at your option) any later version.This program is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See theGNU General Public License for more details.You should have received a copy of the GNU General Public Licensealong with this program; if not, write to the Free SoftwareFoundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.See the file README.TXT for more information.*//* syntheticdata.cpp */#include <math.h>#include <stdio.h>#include <stdlib.h>#include <time.h>#include "singleton.h"#include "subcluster.h"#include "database.h"#include "syntheticdata.h"SyntheticData::SyntheticData(int models, int dim, int points) : models(models), dimensions(dim), numPoints(points){ data = new float[dim]; mean = new float[models * dim]; stdDev = new float[models * dim]; DataFp = NULL; logFile = fopen("synthdat.txt", "w");}SyntheticData::~SyntheticData(void){ delete []data; delete []mean; delete []stdDev; fclose(logFile);}// Initialize the data points.void SyntheticData::initData(void){ int i,j,k; int model; float x1, x2, z1, z2; if (DataFp != NULL) { fclose(DataFp); DataFp = 0; } srand48(time(0));// srand48(17);// srand48(23); float modelWeight[models]; float totalWeight = 0; float randModel; // Initialize mean and standard deviation for each model. for (i = 0; i < models; i++) { modelWeight[i] = (float) drand48(); totalWeight += modelWeight[i]; double temp; for (j = 0; j < dimensions; j++) { temp = (double) (maxMean - minMean); mean[i * dimensions + j] = (float) drand48() * (maxMean - minMean) + minMean; stdDev[i * dimensions + j] = (float) sqrt(drand48() * (maxVar - minVar) + minVar); } } // Normalize model weights. for (i = 0; i < models; i++) { modelWeight[i] /= totalWeight; fprintf(logFile, "%f ", modelWeight[i]); printf("Model weight %f\n", modelWeight[i]); } fprintf(logFile, "\n"); fflush(logFile); FILE *f; if(!(f = fopen("/net/jimi/disk6/ffarnstrom/kdd/sddata.dat", "w"))) { fprintf(stderr, "Error: Can't write to sddata.dat.\n"); return; } // Generate data points from models. for(i = 0; i < numPoints; i++) { // Select the model to use. randModel = (float) drand48(); model = 0; while (randModel > modelWeight[model]) { randModel -= modelWeight[model]; model++; } // Generate a data point for the current model. for (j = 0; j < dimensions; j += 2) { x1 = (float) drand48(); x2 = (float) drand48(); z1 = (float) (sqrt(-2.0 * log (x1)) * cos(2.0 * M_PI * x2)); z2 = (float) (sqrt(-2.0 * log (x1)) * sin(2.0 * M_PI * x2)); z1 = z1 * stdDev[model * dimensions + j] + mean[model * dimensions + j]; z2 = z2 * stdDev[model * dimensions + j + 1] + mean[model * dimensions + j + 1]; data[j] = z1; if (j < dimensions - 1) { data[j + 1] = z2; } } // Write data point to file./* for (j = 0; j < dimensions; j++) { //fprintf(f, "%f ", data[j]); fwrite(data[j], sizeof(float),dimen } fprintf(f, "\n");*/ fwrite(data, sizeof(float), dimensions, f); } fclose(f); f = fopen("sdmsd.dat", "w"); //fprintf(f, "cm=[");/* for(i = 0; i < models; i++) { for(j = 0; j < dimensions; j++) fprintf(f, "%f ", mean[i*dimensions+j]); fprintf(f, ";\n"); } fprintf(f, "];\n");*/ fwrite(mean, sizeof(float), models*dimensions, f); fwrite(stdDev, sizeof(float), models*dimensions, f);/* fprintf(f, "cd=["); for(i = 0; i < models; i++) { for(j = 0; j < dimensions; j++) fprintf(f, "%f ", stdDev[i*dimensions+j]); fprintf(f, ";\n"); } fprintf(f, "];\n"); fprintf(f, "for i=1:size(cm,1),\n"); fprintf(f, "rectangle('Curvature',[1 1],'Position',[cm(i,1)-cd(i,1) cm(i,2)-cd(i,2)"\ " 2*cd(i,1) 2*cd(i,2)])\n"); fprintf(f, "end\n");*/ fclose(f);}// Return the cluster means for the synthetic data points.float *SyntheticData::getMeans(int model){ return &mean[model * dimensions];}// Reset counter, i.e return first point next time getPoint is called.void SyntheticData::reset(void){ if (DataFp != NULL) { fclose(DataFp); DataFp = 0; } if ((DataFp = fopen("sdmsd.dat", "r")) == NULL) { perror("Unable to open data file."); exit(1); }/* for (int i = 0; i < models; i++) { for (int j = 0; j < dimensions; j++) { if ((fscanf(DataFp, "%f", &mean[i*dimensions+j])) != 1) { perror("Error reading stat file."); exit(0); } } }*/ if (fread(mean, sizeof(float), models*dimensions, DataFp) == 0) { perror("Error reading stat file."); exit(0); } if (fread(stdDev, sizeof(float), models*dimensions, DataFp) == 0) { perror("Error reading stat file."); exit(0); }/* for (int i = 0; i < models; i++) { for (int j = 0; j < dimensions; j++) { if ((fscanf(DataFp, "%f", &stdDev[i*dimensions+j])) != 1) { perror("Error reading stat file."); exit(0); } } }*/ if (DataFp != NULL) { fclose(DataFp); DataFp = 0; } if ((DataFp = fopen("/net/jimi/disk6/ffarnstrom/kdd/sddata.dat", "r")) == NULL) { perror("Unable to open data file."); exit(1); }// printf("reset()\n");// numPoints = 0;}// Return a pointer to an array of floats containing the current point.// The allocated memory may be read by the user but belongs to the// Database class.float *SyntheticData::getPoint(void){/* for (int i = 0; i < dimensions; i++) { if ((fscanf(DataFp, "%f", &data[i]) != 1)) return 0; }*/ if (fread(data, sizeof(float), dimensions, DataFp) == 0) {//fprintf(stderr, "getPoint() %d %d\n", DataFp, numPoints); return 0; }//fprintf(stderr, "getPoint()\n"); return data;}/* End of file syntheticdata.cpp */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -