📄 kmlocal.h

📁 高效的k-means算法实现
💻 H
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
//----------------------------------------------------------------------//	File:           KMlocal.h//	Programmer:     David Mount//	Last modified:  05/14/04//	Description:    Include file for generic local search kmeans.//----------------------------------------------------------------------// Copyright (C) 2004-2005 David M. Mount and University of Maryland// All Rights Reserved.// // This program is free software; you can redistribute it and/or modify// it under the terms of the GNU General Public License as published by// the Free Software Foundation; either version 2 of the License, or (at// your option) any later version.  See the file Copyright.txt in the// main directory.// // The University of Maryland and the authors make no representations// about the suitability or fitness of this software for any purpose.// It is provided "as is" without express or implied warranty.//----------------------------------------------------------------------#ifndef KM_LOCAL_H#define KM_LOCAL_H//----------------------------------------------------------------------//  basic includes//----------------------------------------------------------------------#include <cmath>				// math includes (exp, log)#include "KMeans.h"				// kmeans includes#include "KMdata.h"				// data points#include "KMfilterCenters.h"			// centers#include "KMterm.h"				// termination conditions#include "KMrand.h"				// random number generationclass KMlocal;typedef KMlocal* KMlocalPtr;			// generic algorithm pointer//------------------------------------------------------------------------//  Generic Local Search Overview//	KMlocal provides a generic algorithm for k-means clustering by//	local search.  This generic algorithm is controlled by a class,//	KMlocal, whose methods provide the particular elements of each//	algorithm.////	Overview://	---------//	The generic algorithm begins by generating an initial solution//	"curr" and saving it in "best".  These objects are local to the//	KMlocal structure.   The value of "curr" reflects the current//	solution and "best" reflects the best solution seen so far.////	The algorithm consists of some number of basic iterations,//	called "stages".  Each stage involves the execution of one step//	of either the swap heuristic or Lloyd's algorithm.////	Stages are grouped into "runs".  Intuitively, a run involves a//	(small) number of stages in search of a better solution.  A run//	might end, say, because a better solution was found or a fixed//	number of stages have been performed without any improvement. ////	After a run is finished, we check to see whether we want to//	"accept" the solution.  Presumably this happens if the cost is//	lower, but it may happen even if the cost is inferior in other//	circumstances (e.g., as part of a simulated annealing approach).//	Accepting a solution means copying the current solution to the//	saved solution.  In some cases, the acceptance may involve//	reseting the current solution to a random solution.////	Generic Algorithm://	------------------//	The generic local search algorithm is shown below.  The various//	functions are overridden by the specific classes which provide//	the concrete details.//	//	reset()					// resets curr and best//	while ( !isDone() ) {			// while not done//	  beginRun()				// begin a new run//	  do {					// do while run is not done//	    beginStage()			// end of stage processing//	    method = selectMethod()		// select a method//	    switch( method ) {			// apply the method//	      LLOYD:  curr.Lloyd(); break//	      SWAP:   curr.Swap();  break//	      RANDOM: curr.Random();  break//	    }//	    endStage()				// end of stage processing//	  } while ( !isRunDone() )		// while run is not done//	  tryAcceptance()			// accept if appropriate//	  endRun()				// end of run processing//	}//	return best				// return best solution////	Termination Parameters//	----------------------//	Each algorithm relies on various parameter settings to determine//	termination conditions for runs and phases.  These settings are//	stored in a class KMterm (see KMterm.h).////	Parameters (in term)//	--------------------//	maxTotStage//		Maximum number of stages total.//------------------------------------------------------------------------class KMlocal {				// generic local searchprotected:    					// fixed quantities    int			nPts;			// number of points    int			kCtrs;			// number of centers    int			dim;			// dimension    KMterm		term;			// termination conditions    int			maxTotStage;		// max total stages (from term)					// varying quantities    int			stageNo;		// current stage number    int			runInitStage;		// stage at which run started    KMfilterCenters	curr;			// current solution    KMfilterCenters	best;			// saved solutionprotected:					// utility functions    virtual void printStageStats() {		// print stage information	if (kmStatLev >= STAGE) {            *kmOut << "\t<stage: "	<< stageNo                 << " curr: "		<< curr.getAvgDist()                 << " best: "		<< best.getAvgDist()		 << " >" << endl;	}    }public:    						// constructor    KMlocal(const KMfilterCenters &sol, const KMterm &t)	: term(t), curr(sol), best(sol) {	nPts    = sol.getNPts();	kCtrs   = sol.getK();	dim     = sol.getDim();	stageNo = 0;	maxTotStage = term.getMaxTotStage(kCtrs, nPts);    }    virtual ~KMlocal() { }			// virtual destructor    virtual KMfilterCenters execute();		// execute the algorithm    int getTotalStages() const {		// return total no. of stages      return stageNo;    }protected:					// overridden by subclasses    virtual void reset() {			// reset everything	stageNo = 0;	runInitStage = 0;	curr.genRandom();			// generate random centers	curr.getDist();				// compute initial distortion	best = curr;    }    virtual bool isDone() const {		// are we done?	return stageNo >= maxTotStage;    }    virtual void beginRun() {			// begin of run processing	runInitStage = stageNo;    }    virtual void beginStage() { }		// start of stage processing    virtual KMalg selectMethod() = 0;		// method: LLOYD or SWAP    virtual void endStage() {			// end of stage processing	stageNo++;    }    virtual bool isRunDone() {			// is run done?	return isDone();    }    virtual void endRun() { } 			// end of run processing    virtual void tryAcceptance() { 		// test acceptance	if (curr.getDist() < best.getDist()) {	// is current distortion lower?	    best = curr;			// then best the current	}    }};//------------------------------------------------------------------------// Concrete Implementations//	The generic KMlocal class is an abstract class.  Each derived//	class provides concrete implementation of the various member//	functions, such as selectMethod() and isRunDone().//////	Relative Distortion Loss (RDL)://	-------------------------------//	There are some concepts that are important to run/phase//	transitions.  One is the maximum number of stages.  Most//	algorithms provide some sort of parameter that limits the number//	of stages that the algorithm can spend in a particular run//	(before giving up).  The second is the relative distortion loss,//	or RDL.  (See also KMterm.h.)  The RDL is defined:////		initDistortion - currDistortion//       RDL =  -------------------------------//                    initDistortion////	Note that a positive value indicates that the distortion has//	decreased.  The definition of "initDistortion" depends on the//	algorithm.  It may be the distortion of the previous stage//	(RDL = consecutive RDL), or it may be the distortion at the//	beginning of the run (RDL = accumulated RDL).//------------------------------------------------------------------------//------------------------------------------------------------------------// KMlocalLloyds - Lloyd's algorithm with random restarts//	The algorithm is broken into phases, and each phase is broken//	into runs.  Each phase starts by sampling center points at//	random.  Each run is provided two parameters, a maximum number//	of runs per stage (maxRunStage) and a minimum accumulated//	relative distortion loss (minAccumRDL).  If the accumulated RDL//	for the run exceeds this value, then the run ends in success.//	If the number of stages is exceeded before this happens, the run//	ends in failure.  The phase ends with the first failed run.////	This multi-level algorithm is implemented as follows.  When a//	run is finished, we test whether the accumlated RDL exceeds the//	minAccumRDL.  If not, then the run is unsuccessful and so we//	need to end the phase.////	Parameters (in term)//	--------------------//	maxRunStage//		Maximum number of stages per run.//	minAccumRDL//		Minimum accumlated RDL for stage to be considered//		successful.////	State variables//	---------------//	initRunDist//		The distortion at the start of the run.//	isNewPhase//		True if this is the first stage at the start of a new//		phase.  If true, then random centers are generated.//		Note that since random centers have been generated as//		part of reset(), the initial value is false.////	Overridden methods//	------------------//	reset()//		Do base class resetting.  Initialize isNewPhase to false//		and save the initial run distortion.//	selectMethod()//		If new phase starting the select centers at random,//		otherwise do Lloyds algorithm.//	endStage()//		Do base class endStage processing.  If there has been//		an improvement, save the current solution.  Output stage//		information.//	isRunDone()//		If the number of stages exceeds the maximum runs per//		stages, then we are done.  If this is the first stage of//		the phase, then we are not done, and we do beginning of//		phase processing (by resetting the isNewPhase flag and//		save the current distortion).  Otherwise, if the//		relative distortion loss (RDL) is greater than//		minAccumRDL then we are done (success).//	endRun()//		If accumulated RDL < minAccumRDL then the run ended//		unsuccessfully (too many stages) and we request the//		start of a new phase. Otherwise the run has ended//		successfully, and we start a new run by saving the//		current run distortion.//------------------------------------------------------------------------class KMlocalLloyds : public KMlocal {private:    double	initRunDist;			// initial run distortion    bool	isNewPhase;			// starting new phaseprotected:    double accumRDL()				// relative RDL for run      { return (initRunDist - curr.getDist()) / initRunDist; }    virtual void printStageStats() {		// print end of stage info	if (kmStatLev >= STAGE) {	    *kmOut << "\t<stage: "	<< stageNo         	 << " curr: "		<< curr.getAvgDist()         	 << " best: "		<< best.getAvgDist()	    	 << " accumRDL: "	<< accumRDL()*100 << "%"		 << " >" << endl;	}    }    virtual void printRunStats() {		// print end of run info	if (kmStatLev >= STAGE) {	    *kmOut << "    <Generating new random centers>" << endl;	}    }
12 3 下一页
💿 文件大小 886 K
👤 上传用户 5201314
📂 所属分类人工智能/神经网络
🏷️ 相关标签

#k-means #算法
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -