📄 kmlocal.h
字号:
//----------------------------------------------------------------------// File: KMlocal.h// Programmer: David Mount// Last modified: 05/14/04// Description: Include file for generic local search kmeans.//----------------------------------------------------------------------// Copyright (C) 2004-2005 David M. Mount and University of Maryland// All Rights Reserved.// // This program is free software; you can redistribute it and/or modify// it under the terms of the GNU General Public License as published by// the Free Software Foundation; either version 2 of the License, or (at// your option) any later version. See the file Copyright.txt in the// main directory.// // The University of Maryland and the authors make no representations// about the suitability or fitness of this software for any purpose.// It is provided "as is" without express or implied warranty.//----------------------------------------------------------------------#ifndef KM_LOCAL_H#define KM_LOCAL_H//----------------------------------------------------------------------// basic includes//----------------------------------------------------------------------#include <cmath> // math includes (exp, log)#include "KMeans.h" // kmeans includes#include "KMdata.h" // data points#include "KMfilterCenters.h" // centers#include "KMterm.h" // termination conditions#include "KMrand.h" // random number generationclass KMlocal;typedef KMlocal* KMlocalPtr; // generic algorithm pointer//------------------------------------------------------------------------// Generic Local Search Overview// KMlocal provides a generic algorithm for k-means clustering by// local search. This generic algorithm is controlled by a class,// KMlocal, whose methods provide the particular elements of each// algorithm.//// Overview:// ---------// The generic algorithm begins by generating an initial solution// "curr" and saving it in "best". These objects are local to the// KMlocal structure. The value of "curr" reflects the current// solution and "best" reflects the best solution seen so far.//// The algorithm consists of some number of basic iterations,// called "stages". Each stage involves the execution of one step// of either the swap heuristic or Lloyd's algorithm.//// Stages are grouped into "runs". Intuitively, a run involves a// (small) number of stages in search of a better solution. A run// might end, say, because a better solution was found or a fixed// number of stages have been performed without any improvement. //// After a run is finished, we check to see whether we want to// "accept" the solution. Presumably this happens if the cost is// lower, but it may happen even if the cost is inferior in other// circumstances (e.g., as part of a simulated annealing approach).// Accepting a solution means copying the current solution to the// saved solution. In some cases, the acceptance may involve// reseting the current solution to a random solution.//// Generic Algorithm:// ------------------// The generic local search algorithm is shown below. The various// functions are overridden by the specific classes which provide// the concrete details.// // reset() // resets curr and best// while ( !isDone() ) { // while not done// beginRun() // begin a new run// do { // do while run is not done// beginStage() // end of stage processing// method = selectMethod() // select a method// switch( method ) { // apply the method// LLOYD: curr.Lloyd(); break// SWAP: curr.Swap(); break// RANDOM: curr.Random(); break// }// endStage() // end of stage processing// } while ( !isRunDone() ) // while run is not done// tryAcceptance() // accept if appropriate// endRun() // end of run processing// }// return best // return best solution//// Termination Parameters// ----------------------// Each algorithm relies on various parameter settings to determine// termination conditions for runs and phases. These settings are// stored in a class KMterm (see KMterm.h).//// Parameters (in term)// --------------------// maxTotStage// Maximum number of stages total.//------------------------------------------------------------------------class KMlocal { // generic local searchprotected: // fixed quantities int nPts; // number of points int kCtrs; // number of centers int dim; // dimension KMterm term; // termination conditions int maxTotStage; // max total stages (from term) // varying quantities int stageNo; // current stage number int runInitStage; // stage at which run started KMfilterCenters curr; // current solution KMfilterCenters best; // saved solutionprotected: // utility functions virtual void printStageStats() { // print stage information if (kmStatLev >= STAGE) { *kmOut << "\t<stage: " << stageNo << " curr: " << curr.getAvgDist() << " best: " << best.getAvgDist() << " >" << endl; } }public: // constructor KMlocal(const KMfilterCenters &sol, const KMterm &t) : term(t), curr(sol), best(sol) { nPts = sol.getNPts(); kCtrs = sol.getK(); dim = sol.getDim(); stageNo = 0; maxTotStage = term.getMaxTotStage(kCtrs, nPts); } virtual ~KMlocal() { } // virtual destructor virtual KMfilterCenters execute(); // execute the algorithm int getTotalStages() const { // return total no. of stages return stageNo; }protected: // overridden by subclasses virtual void reset() { // reset everything stageNo = 0; runInitStage = 0; curr.genRandom(); // generate random centers curr.getDist(); // compute initial distortion best = curr; } virtual bool isDone() const { // are we done? return stageNo >= maxTotStage; } virtual void beginRun() { // begin of run processing runInitStage = stageNo; } virtual void beginStage() { } // start of stage processing virtual KMalg selectMethod() = 0; // method: LLOYD or SWAP virtual void endStage() { // end of stage processing stageNo++; } virtual bool isRunDone() { // is run done? return isDone(); } virtual void endRun() { } // end of run processing virtual void tryAcceptance() { // test acceptance if (curr.getDist() < best.getDist()) { // is current distortion lower? best = curr; // then best the current } }};//------------------------------------------------------------------------// Concrete Implementations// The generic KMlocal class is an abstract class. Each derived// class provides concrete implementation of the various member// functions, such as selectMethod() and isRunDone().////// Relative Distortion Loss (RDL):// -------------------------------// There are some concepts that are important to run/phase// transitions. One is the maximum number of stages. Most// algorithms provide some sort of parameter that limits the number// of stages that the algorithm can spend in a particular run// (before giving up). The second is the relative distortion loss,// or RDL. (See also KMterm.h.) The RDL is defined://// initDistortion - currDistortion// RDL = -------------------------------// initDistortion//// Note that a positive value indicates that the distortion has// decreased. The definition of "initDistortion" depends on the// algorithm. It may be the distortion of the previous stage// (RDL = consecutive RDL), or it may be the distortion at the// beginning of the run (RDL = accumulated RDL).//------------------------------------------------------------------------//------------------------------------------------------------------------// KMlocalLloyds - Lloyd's algorithm with random restarts// The algorithm is broken into phases, and each phase is broken// into runs. Each phase starts by sampling center points at// random. Each run is provided two parameters, a maximum number// of runs per stage (maxRunStage) and a minimum accumulated// relative distortion loss (minAccumRDL). If the accumulated RDL// for the run exceeds this value, then the run ends in success.// If the number of stages is exceeded before this happens, the run// ends in failure. The phase ends with the first failed run.//// This multi-level algorithm is implemented as follows. When a// run is finished, we test whether the accumlated RDL exceeds the// minAccumRDL. If not, then the run is unsuccessful and so we// need to end the phase.//// Parameters (in term)// --------------------// maxRunStage// Maximum number of stages per run.// minAccumRDL// Minimum accumlated RDL for stage to be considered// successful.//// State variables// ---------------// initRunDist// The distortion at the start of the run.// isNewPhase// True if this is the first stage at the start of a new// phase. If true, then random centers are generated.// Note that since random centers have been generated as// part of reset(), the initial value is false.//// Overridden methods// ------------------// reset()// Do base class resetting. Initialize isNewPhase to false// and save the initial run distortion.// selectMethod()// If new phase starting the select centers at random,// otherwise do Lloyds algorithm.// endStage()// Do base class endStage processing. If there has been// an improvement, save the current solution. Output stage// information.// isRunDone()// If the number of stages exceeds the maximum runs per// stages, then we are done. If this is the first stage of// the phase, then we are not done, and we do beginning of// phase processing (by resetting the isNewPhase flag and// save the current distortion). Otherwise, if the// relative distortion loss (RDL) is greater than// minAccumRDL then we are done (success).// endRun()// If accumulated RDL < minAccumRDL then the run ended// unsuccessfully (too many stages) and we request the// start of a new phase. Otherwise the run has ended// successfully, and we start a new run by saving the// current run distortion.//------------------------------------------------------------------------class KMlocalLloyds : public KMlocal {private: double initRunDist; // initial run distortion bool isNewPhase; // starting new phaseprotected: double accumRDL() // relative RDL for run { return (initRunDist - curr.getDist()) / initRunDist; } virtual void printStageStats() { // print end of stage info if (kmStatLev >= STAGE) { *kmOut << "\t<stage: " << stageNo << " curr: " << curr.getAvgDist() << " best: " << best.getAvgDist() << " accumRDL: " << accumRDL()*100 << "%" << " >" << endl; } } virtual void printRunStats() { // print end of run info if (kmStatLev >= STAGE) { *kmOut << " <Generating new random centers>" << endl; } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -