📄 kmltest.cpp
字号:
// Maximum total stages given as paramters// (a,...,d).// Default: 0,0,0,0.//// Options affecting algorithm execution:// --------------------------------------//// Options used in Lloyd's Algorithm and Hybrid Algorithms:// --------------------------------------------------------// damp_factor <float> A dampening factor in the interval// (0,1]. The value 1 is the standard// Lloyd's algorithm. Othewise, each point// is only moved by this fraction of the// way from its current location to the// centroid. Default: 1// min_accum_rdl <float> This is used in Lloyd's algorithm// algorithm which perform multiple swaps.// When performing p swaps, we actually may// perform fewer than p. We stop// performing swaps, whenever the total// distortion (from the start of the run)// has decreased by at least this amount.// Default: 0.10// max_run_stage <int> This is used in Lloyd's algorithm. A// run terminates after this many stages.// Default: 100//// Options specific to the swap algorithm:// ---------------------------------------// max_swaps <int> Maximum swaps at any given stage.// Default: 1//// Options specific to the hybrid algorithms:// ------------------------------------------// min_consec_rdl <float> This is used in the hybrid algorithms.// If the RDL of two consecutive runs is// less than this value Lloyd's algorithm// is deemed to have converged, and the run// ends.// Options specific to the (complex) hybrid algorithm:// ---------------------------------------------------// init_prob_accept <float> The initial probability of accepting a// solution that does not improve the// distortion.// temp_run_length <int> The number of stages before chaning the// temperature.// temp_reduc_factor <float> The factor by which temperature is// reduced at the end of a temperature run.//// Example:// --------// title Experiment_1A # experiment title// stats summary # print summary information// dim 2 # dimension 2//// data_size 5000 # 5000 data points// colors 30 # ...broken into 30 clusters// std_dev 0.025 # ...each with std deviation 0.025// distribution clus_gauss # clustered gaussian distribution// seed 1 # random number seed// gen_data_pts # generate the data points// // max_tot_stage 20 0 0 0 # terminate Lloyd's after 20 stages// print Running-lloyd's// run_kmeans lloyd # run using Lloyd's algorithm// // max_swaps 3 # at most 3 swaps// max_tot_stage 0 3 0 2 # at most 3*k^2 = 1200 stages// max_run_stage 50 # at most 50 stages per run// min_accum_rdl 0.02 # stop run if distortion drops 2%// print Running-swap// run_kmeans swap # run using swap heuristic//------------------------------------------------------------------------//------------------------------------------------------------------------// Statistics output levels (see KMeans.h for corresponding enumeration)//------------------------------------------------------------------------static const string stat_table[N_STAT_LEVELS] = { "silent", // SILENT "exec_time", // EXEC_TIME "summary", // SUMMARY "phase", // PHASE "run", // RUN "stage", // STAGE "step", // STEP "centers", // CENTERS "tree"}; // TREE//------------------------------------------------------------------------// k-means algorithms// (See KMeans.h for coresponding enumeration)//------------------------------------------------------------------------static const string kmAlgTable[N_KM_ALGS] = { "lloyd", // LLOYD only "swap", // SWAP only "hybrid", // HYBRID alternation "EZ-hybrid", // EZ_HYBRID alternation "--illegal--"}; // RANDOM (not allowed)//------------------------------------------------------------------------// Distributions//------------------------------------------------------------------------typedef enum { // distributions UNIFORM, // uniform over cube [-1,1]^d. GAUSS, // Gaussian with mean 0 LAPLACE, // Laplacian, mean 0 and var 1 CO_GAUSS, // correlated Gaussian CO_LAPLACE, // correlated Laplacian CLUS_GAUSS, // clustered Gaussian CLUS_ORTH_FLATS, // clustered on orthog flats CLUS_ELLIPSOIDS, // clustered on ellipsoids MULTI_CLUS, // multi-sized clusters N_DISTRIBS} Distrib;static const string distr_table[N_DISTRIBS] = { "uniform", // UNIFORM "gauss", // GAUSS "laplace", // LAPLACE "co_gauss", // CO_GAUSS "co_laplace", // CO_LAPLACE "clus_gauss", // CLUS_GAUSS "clus_orth_flats", // CLUS_ORTH_FLATS "clus_ellipsoids", // CLUS_ELLIPSOIS "multi_clus"}; // MULTI_CLUS//----------------------------------------------------------------------// Short utility functions// lookUp - look up a name in table and return index//----------------------------------------------------------------------static int lookUp( // look up name in table const string &arg, // name to look up const string *table, // name table int size) // table size{ int i; for (i = 0; i < size; i++) { if (arg == table[i]) return i; } return i;}//----------------------------------------------------------------------// elapsedTime// Utility for computing elapsed time.//----------------------------------------------------------------------#ifndef CLOCKS_PER_SEC // define clocks-per-second if needed#define CLOCKS_PER_SEC 1000000 // (should be in time.h)#endifinline double elapsedTime(clock_t start) { return double(clock() - start)/double(CLOCKS_PER_SEC);}//------------------------------------------------------------------------// Function declarations//------------------------------------------------------------------------static void runKmeans( // run k-means algorithm KMalg alg, // which algorithm KMdataPtr dataPts, // data points KMterm &term); // termination conditionstatic void genDataPts( // generate data points KMdataPtr &dataPts, // point array (returned) bool new_clust); // new cluster centers desired?static void readDataPts( // read data/query points from file KMdataPtr &dataPts, // point array (returned) int n, // desired number of points const string &file_nm); // file namestatic void buildKcTree( // build kc-tree for points KMdataPtr dataPts); // point array//------------------------------------------------------------------------// Default execution parameters//------------------------------------------------------------------------const int DEF_dim = 2; // dimensionconst int DEF_data_size = 100; // data sizeconst int DEF_kcenters = 5; // number of centersconst int DEF_max_swaps = 1; // max number of swapsconst double DEF_damp_factor = 1; // Lloyd's dampening factorconst int DEF_n_color = 5; // number of colorsconst bool DEF_new_clust = false; // new clusters flagconst int DEF_max_dim = 1; // max flat dimensionconst Distrib DEF_distr = UNIFORM; // distributionconst double DEF_std_dev = 1.00; // standard deviationconst double DEF_corr_coef = 0.05; // correlation coefconst double DEF_clus_sep = 0.0; // cluster separationconst int DEF_max_visit = 0; // number of points visitedconst int DEF_seed = 0; // seed for random numbers // termination parametersconst KMterm DEF_term(0, 0, 10, 1, // max total stages (10*n) 0.10, // min consec RDL 0.10, // min accum RDL 100, // max run stages 0.50, // init. prob. of acceptance 20, // temp. run length 0.75); // temp. reduction factorconst StatLev DEF_stats = SUMMARY; // statistics output levelconst bool DEF_print_points= false; // print points?const bool DEF_show_assign = false; // show point assignments?const bool DEF_validate = false; // validate point assignments?ostream* const DEF_out = &cout; // standard output streamostream* const DEF_err = &cerr; // error output streamistream* const DEF_in = &cin; // standard input stream//------------------------------------------------------------------------// kmltest global variables - Execution options//------------------------------------------------------------------------ // basic size quantitiesint dim; // dimensionint data_size; // data sizeint kcenters; // number of centersint max_swaps; // max number of swaps per rundouble damp_factor; // Lloyd's dampening factor // distribution parametersint n_color; // number of colors (clusters)bool new_clust; // generate new clusters?int max_dim; // maximum flat dimensionDistrib distr; // distributiondouble corr_coef; // correlation coefdouble std_dev; // standard deviationdouble std_dev_lo; // low standard deviationdouble std_dev_hi; // high standard deviationdouble clus_sep; // cluster separation // other parametersKMterm term; // termination parametersbool print_points; // print points after generating?bool show_assign; // show point assignments?bool validate; // validate point assignments?double kc_build_time = 0.0; // time to build the kc-treedouble exec_time = 0.0; // execution timeifstream inStream; // input streamofstream outStream; // output stream//------------------------------------------------------------------------// Initialize global parameters//------------------------------------------------------------------------static void initGlobals(){ dim = DEF_dim; // init execution parameters data_size = DEF_data_size; kcenters = DEF_kcenters; max_swaps = DEF_max_swaps; damp_factor = DEF_damp_factor; n_color = DEF_n_color; // distribution parameters new_clust = DEF_new_clust; max_dim = DEF_max_dim; distr = DEF_distr; corr_coef = DEF_corr_coef; std_dev = DEF_std_dev; std_dev_lo = DEF_std_dev; std_dev_hi = DEF_std_dev; clus_sep = DEF_clus_sep; // other parameters kmIdum = -DEF_seed; // init. global seed for ran0() term = DEF_term; kmStatLev = DEF_stats; // level of statistics detail print_points = DEF_print_points; // print points? show_assign = DEF_show_assign; // show point assignments? validate = DEF_validate; // validate point assignments? kmOut = DEF_out; kmErr = DEF_err; kmIn = DEF_in; kc_build_time = 0.0; // execution times exec_time = 0.0;}//------------------------------------------------------------------------// getCmdArgs - get and process command line arguments//// Syntax:// kmltest [-i infile] [-o outfile]// // where:// infile directive input file// outfile output file//// If file is not specified, then the standard input and standard// output are the defaults.//------------------------------------------------------------------------void getCmdArgs(int argc, char *argv[]){ int i = 1; while (i < argc) { // read arguments if (!strcmp(argv[i], "-i")) { // -i option inStream.open(argv[++i], ios::in); // open input file if (!inStream) { kmError("Cannot open input file", KMabort); } kmIn = &inStream; // make this input stream } else if (!strcmp(argv[i], "-o")) { // -o option outStream.open(argv[++i], ios::out);// open output file if (!outStream) { kmError("Cannot open output file", KMabort); } kmOut = &outStream; // make this output stream } else { // illegal syntax *kmErr << "Syntax:\n" << "kmltest [-i infile] [-o outfile]\n" << " where:\n" << " infile directive input file\n" << " outfile output file\n"; exit(1); // exit } i++; }}//------------------------------------------------------------------------// getDirective - skip comments and read next directive// Returns true if directive read, and false if eof seen.//------------------------------------------------------------------------static bool skipComment( // skip any comments istream &in) // input stream{ char ch = 0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -