⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 kmltest.cpp

📁 高效的k-means算法实现
💻 CPP
📖 第 1 页 / 共 4 页
字号:
//				Maximum total stages given as paramters//				(a,...,d).//				Default: 0,0,0,0.//// Options affecting algorithm execution:// --------------------------------------////   Options used in Lloyd's Algorithm and Hybrid Algorithms://   --------------------------------------------------------//	damp_factor <float>	A dampening factor in the interval//				(0,1].  The value 1 is the standard//				Lloyd's algorithm.  Othewise, each point//				is only moved by this fraction of the//				way from its current location to the//				centroid.  Default: 1//	min_accum_rdl <float>	This is used in Lloyd's algorithm//				algorithm which perform multiple swaps.//				When performing p swaps, we actually may//				perform fewer than p.  We stop//				performing swaps, whenever the total//				distortion (from the start of the run)//				has decreased by at least this amount.//				Default: 0.10//	max_run_stage <int>	This is used in Lloyd's algorithm.  A//				run terminates after this many stages.//				Default: 100////   Options specific to the swap algorithm://   ---------------------------------------//	max_swaps <int>		Maximum swaps at any given stage.//				Default: 1////   Options specific to the hybrid algorithms://   ------------------------------------------//	min_consec_rdl <float>	This is used in the hybrid algorithms.//				If the RDL of two consecutive runs is//				less than this value Lloyd's algorithm//				is deemed to have converged, and the run//				ends.//   Options specific to the (complex) hybrid algorithm://   ---------------------------------------------------//	init_prob_accept <float> The initial probability of accepting a//				solution that does not improve the//				distortion.//	temp_run_length <int>	The number of stages before chaning the//				temperature.//	temp_reduc_factor <float> The factor by which temperature is//				reduced at the end of a temperature run.////  Example://  --------//    title Experiment_1A		# experiment title//    stats summary			# print summary information//    dim 2				# dimension 2////    data_size 5000			# 5000 data points//    colors 30				# ...broken into 30 clusters//    std_dev 0.025			# ...each with std deviation 0.025//    distribution clus_gauss		# clustered gaussian distribution//    seed 1				# random number seed//  gen_data_pts			# generate the data points//  //    max_tot_stage 20 0 0 0		# terminate Lloyd's after 20 stages//  print Running-lloyd's//  run_kmeans lloyd			# run using Lloyd's algorithm//  //    max_swaps 3			# at most 3 swaps//    max_tot_stage 0 3 0 2		# at most 3*k^2 = 1200 stages//    max_run_stage 50			# at most 50 stages per run//    min_accum_rdl 0.02		# stop run if distortion drops 2%//  print Running-swap//  run_kmeans swap			# run using swap heuristic//------------------------------------------------------------------------//------------------------------------------------------------------------//  Statistics output levels (see KMeans.h for corresponding enumeration)//------------------------------------------------------------------------static const string stat_table[N_STAT_LEVELS] = {	"silent",			// SILENT	"exec_time",			// EXEC_TIME	"summary",			// SUMMARY	"phase",			// PHASE	"run",				// RUN	"stage",			// STAGE	"step",				// STEP	"centers",			// CENTERS	"tree"};			// TREE//------------------------------------------------------------------------//  k-means algorithms//	(See KMeans.h for coresponding enumeration)//------------------------------------------------------------------------static const string kmAlgTable[N_KM_ALGS] = {	"lloyd",			// LLOYD only	"swap",				// SWAP only	"hybrid",			// HYBRID alternation	"EZ-hybrid",			// EZ_HYBRID alternation	"--illegal--"};			// RANDOM (not allowed)//------------------------------------------------------------------------//  Distributions//------------------------------------------------------------------------typedef enum {			// distributions	UNIFORM,			// uniform over cube [-1,1]^d.	GAUSS,				// Gaussian with mean 0	LAPLACE,			// Laplacian, mean 0 and var 1	CO_GAUSS,			// correlated Gaussian	CO_LAPLACE,			// correlated Laplacian	CLUS_GAUSS,			// clustered Gaussian	CLUS_ORTH_FLATS,		// clustered on orthog flats	CLUS_ELLIPSOIDS,		// clustered on ellipsoids	MULTI_CLUS,			// multi-sized clusters	N_DISTRIBS}	Distrib;static const string distr_table[N_DISTRIBS] = {	"uniform",			// UNIFORM	"gauss",			// GAUSS	"laplace",			// LAPLACE	"co_gauss",			// CO_GAUSS	"co_laplace",			// CO_LAPLACE	"clus_gauss",			// CLUS_GAUSS	"clus_orth_flats",		// CLUS_ORTH_FLATS	"clus_ellipsoids",		// CLUS_ELLIPSOIS	"multi_clus"};			// MULTI_CLUS//----------------------------------------------------------------------//  Short utility functions//	lookUp - look up a name in table and return index//----------------------------------------------------------------------static int lookUp(			// look up name in table    const string &arg,			// name to look up    const string *table,		// name table    int		size)			// table size{    int i;    for (i = 0; i < size; i++) {	if (arg == table[i]) return i;    }    return i;}//----------------------------------------------------------------------// elapsedTime// Utility for computing elapsed time.//----------------------------------------------------------------------#ifndef CLOCKS_PER_SEC			// define clocks-per-second if needed#define CLOCKS_PER_SEC          1000000	// (should be in time.h)#endifinline double elapsedTime(clock_t start) {    return double(clock() - start)/double(CLOCKS_PER_SEC);}//------------------------------------------------------------------------// Function declarations//------------------------------------------------------------------------static void runKmeans(			// run k-means algorithm    KMalg		alg,		// which algorithm    KMdataPtr		dataPts,	// data points    KMterm		&term);		// termination conditionstatic void genDataPts(			// generate data points    KMdataPtr		&dataPts,	// point array (returned)    bool		new_clust);	// new cluster centers desired?static void readDataPts(		// read data/query points from file    KMdataPtr		&dataPts,	// point array (returned)    int			n,		// desired number of points    const string	&file_nm);	// file namestatic void buildKcTree(		// build kc-tree for points    KMdataPtr		dataPts);	// point array//------------------------------------------------------------------------//  Default execution parameters//------------------------------------------------------------------------const int	DEF_dim		= 2;		// dimensionconst int	DEF_data_size	= 100;		// data sizeconst int	DEF_kcenters	= 5;		// number of centersconst int	DEF_max_swaps	= 1;		// max number of swapsconst double	DEF_damp_factor	= 1;		// Lloyd's dampening factorconst int	DEF_n_color	= 5;		// number of colorsconst bool	DEF_new_clust	= false;	// new clusters flagconst int	DEF_max_dim	= 1;		// max flat dimensionconst Distrib	DEF_distr	= UNIFORM;	// distributionconst double	DEF_std_dev	= 1.00;		// standard deviationconst double	DEF_corr_coef	= 0.05;		// correlation coefconst double	DEF_clus_sep	= 0.0;		// cluster separationconst int	DEF_max_visit	= 0;		// number of points visitedconst int	DEF_seed	= 0;		// seed for random numbers						// termination parametersconst KMterm	DEF_term(0, 0, 10, 1,		// max total stages (10*n)			 0.10,			// min consec RDL			 0.10,			// min accum RDL			 100,			// max run stages			 0.50,			// init. prob. of acceptance			 20,			// temp. run length			 0.75);			// temp. reduction factorconst StatLev	DEF_stats	= SUMMARY;	// statistics output levelconst bool	DEF_print_points= false;	// print points?const bool	DEF_show_assign	= false;	// show point assignments?const bool	DEF_validate	= false;	// validate point assignments?ostream* const	DEF_out		= &cout;	// standard output streamostream* const	DEF_err		= &cerr;	// error output streamistream* const	DEF_in		= &cin;		// standard input stream//------------------------------------------------------------------------//  kmltest global variables - Execution options//------------------------------------------------------------------------					// basic size quantitiesint		dim; 			// dimensionint		data_size; 		// data sizeint		kcenters; 		// number of centersint		max_swaps;		// max number of swaps per rundouble		damp_factor;		// Lloyd's dampening factor					// distribution parametersint		n_color;		// number of colors (clusters)bool		new_clust;		// generate new clusters?int		max_dim;		// maximum flat dimensionDistrib		distr; 			// distributiondouble		corr_coef; 		// correlation coefdouble		std_dev; 		// standard deviationdouble		std_dev_lo; 		// low standard deviationdouble		std_dev_hi; 		// high standard deviationdouble		clus_sep;		// cluster separation					// other parametersKMterm		term;			// termination parametersbool		print_points;		// print points after generating?bool		show_assign;		// show point assignments?bool		validate;		// validate point assignments?double		kc_build_time = 0.0;	// time to build the kc-treedouble		exec_time = 0.0;	// execution timeifstream	inStream;		// input streamofstream	outStream;		// output stream//------------------------------------------------------------------------//  Initialize global parameters//------------------------------------------------------------------------static void initGlobals(){    dim			= DEF_dim;		// init execution parameters    data_size		= DEF_data_size;    kcenters		= DEF_kcenters;    max_swaps		= DEF_max_swaps;    damp_factor		= DEF_damp_factor;    n_color		= DEF_n_color;		// distribution parameters    new_clust		= DEF_new_clust;    max_dim		= DEF_max_dim;    distr		= DEF_distr;    corr_coef		= DEF_corr_coef;    std_dev		= DEF_std_dev;    std_dev_lo		= DEF_std_dev;    std_dev_hi		= DEF_std_dev;    clus_sep		= DEF_clus_sep;						// other parameters    kmIdum		= -DEF_seed;		// init. global seed for ran0()    term		= DEF_term;    kmStatLev		= DEF_stats;		// level of statistics detail    print_points	= DEF_print_points;	// print points?    show_assign		= DEF_show_assign;	// show point assignments?    validate		= DEF_validate;		// validate point assignments?    kmOut		= DEF_out;    kmErr		= DEF_err;    kmIn		= DEF_in;    kc_build_time	= 0.0;			// execution times    exec_time		= 0.0;}//------------------------------------------------------------------------// getCmdArgs - get and process command line arguments////	Syntax://	kmltest [-i infile] [-o outfile]//	//	where://	    infile		directive input file//	    outfile		output file////	If file is not specified, then the standard input and standard//	output are the defaults.//------------------------------------------------------------------------void getCmdArgs(int argc, char *argv[]){    int i = 1;    while (i < argc) {				// read arguments	if (!strcmp(argv[i], "-i")) {		// -i option	    inStream.open(argv[++i], ios::in);	// open input file	    if (!inStream) {		kmError("Cannot open input file", KMabort);	    }	    kmIn = &inStream;			// make this input stream	}	else if (!strcmp(argv[i], "-o")) {	// -o option	    outStream.open(argv[++i], ios::out);// open output file	    if (!outStream) {		kmError("Cannot open output file", KMabort);	    }	    kmOut = &outStream;			// make this output stream	}	else {					// illegal syntax	    *kmErr << "Syntax:\n"		   << "kmltest [-i infile] [-o outfile]\n"		   << "    where:\n"		   << "    infile         directive input file\n"		   << "    outfile        output file\n";	    exit(1);				// exit	}	i++;    }}//------------------------------------------------------------------------// getDirective - skip comments and read next directive//	Returns true if directive read, and false if eof seen.//------------------------------------------------------------------------static bool skipComment(			// skip any comments    istream		&in)			// input stream{    char ch = 0;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -