📄 kmlocal.h

📁 高效的k-means算法实现
💻 H
📖 第 1 页 / 共 3 页
字号:
public:    						// constructor    KMlocalLloyds(const KMfilterCenters &sol, const KMterm &t)	  : KMlocal(sol, t) { }protected:					// overridden methods    virtual void reset() {    	KMlocal::reset();			// reset base class	isNewPhase = false;			// first phase was started	initRunDist = curr.getDist();		// initialize run dist        printStageStats();    }    virtual KMalg selectMethod() {		// method = Lloyd's    	return (isNewPhase ? RANDOM : LLOYD);	// ...unless start of phase    }    virtual void endStage() {			// end of stage processing	KMlocal::endStage();			// base class processing						// get distortions	if (curr.getAvgDist() < best.getAvgDist())	    best = curr;			// update if better	printStageStats();    }    virtual bool isRunDone() {			// is run done	if (KMlocal::isRunDone() ||		// check base conditions	    stageNo - runInitStage >= term.getMaxRunStage()) {	    return true;			// too many stages	}	else if (isNewPhase) {			// start of new phase?	    isNewPhase = false;			// remove flag	    initRunDist = curr.getDist();	// initialize run distortion	    return false;			// run is just starting	}	else {					// continue if improvement	  return accumRDL() >= term.getMinAccumRDL();	}    }    virtual void endRun() { 			// end of run processing	if (accumRDL() < term.getMinAccumRDL()){// unsuccessful run?	    isNewPhase = true;			// start a new phase	}	else {	    initRunDist = curr.getDist();	// next run distortion	}	printRunStats();    }};//------------------------------------------------------------------------// KMlocalSwap - an implementation of the p-Swap heuristic//	Each run consists of p executions of the swap heuristic.  Each//	sequence of swaps is treated as a single stage.////	Parameters (in term)//	--------------------//	(none)////	State variables//	---------------//	maxSwaps//		Maximum number of swaps per run.//	swapNo	Number of swaps so far this run.////	Overridden methods//	------------------//	beginRun()//		Reset count of swaps.//	selectMethod()//		Swap//	endStage()//		(Does nothing, since stage doesn't end until run ends).//	isRunDone()//		If the number of swaps (incremented) exceeds maxSwaps.//	endRun()//		Gets new distortion and increments stage counter.  This//		is the real end of a stage.//	tryAcceptance()//		If distortion decreased copy current to best, else//		restore best centers.//------------------------------------------------------------------------class KMlocalSwap : public KMlocal {private:    const int	maxSwaps;			// maximum swaps per run    int		swapNo;				// no. of swaps this runpublic:    						// constructor    KMlocalSwap(const KMfilterCenters &sol, const KMterm &t, int p = 1)	: KMlocal(sol, t), maxSwaps(p) {    }protected:					// overridden methods    virtual void reset() {    	KMlocal::reset();			// reset base class        printStageStats();    }    virtual void beginRun() {			// start of run processing	KMlocal::beginRun();			// base class processing	swapNo = 0;				// init number of swaps    }    virtual KMalg selectMethod() {		// method = Swap	return SWAP;    }    virtual void endStage() { }			// do nothing    virtual bool isRunDone() { 			// run is done	return  KMlocal::isRunDone() ||		// base class say's done		++swapNo >= maxSwaps;		// or enough swaps done    }    virtual void endRun() { 			// end of run processing	curr.getDist();    	stageNo++;	printStageStats();			// print stage info    }    virtual void tryAcceptance() { 		// test acceptance	if (curr.getDist() < best.getDist()) {	// current distortion lower?	  best = curr;				// then save the current	}	else {					// current distortion worse	  curr = best;				// restore old centers	}    }};//------------------------------------------------------------------------// KMlocalHybrid - a hybrid version combining Lloyd's and swaps.////	Overview//	--------//	This implements a hybrid algorithm, which combines both of the//	previous methods along with something like simulated annealing.//	Because this uses a fast annealing schedule (T = T*timeFact) it//	should probably be called simulated quenching.////	The algorithm's execution is broken into the following different//	groups.////	Stage:	One invocation of the Swap or Lloyd's algorithm.//	Run:	A run consists of a consecutive sequence of swaps//		followed by a consecutive sequence of Lloyd's steps.  A//		graphical representation of one run is presented below.////	      +--+            +---+             +--------> save -----> //	      |  |            |   |            Y|//	      V  |            V   |             |     N//	----> Swap ------>   Lloyd's   ----> Accept? ----> restore -->//	                                                     old////	Decisions//	---------//	There are three decisions which this algorithm needs to make.////	Continue swapping or move on to Lloyd's?//	    This is based on the simulated annealing decision choice,//	    based on the consecutive RDL.////	Make another pass through Lloyd's or go to acceptance?//	    This is based on whether the relative improvement since the//	    last stage (consecutive relative distortion loss) is above a//	    certain fixed threshold (minConsecRDL).////	Accept the current solution://	    If the current solution is better than the saved solution,//	    then yes.  Otherwise, use the simulated annealing decision//	    choice, based on the accumulated RDL.////	Simulated Annealing Choice//	--------------------------//	Following the principal of simulated annealing, we somtimes//	chose to accept a solution, even if it is not an improvement.//	This choice occurs with probability:////		exp(RDL/T),////	where RDL is the relative distortion loss (relative to the//	saved solution), and T is the current temperature.  Note that//	if RDL > 0 (improvement) then this quantity is > 1, and so we//	always accept.  (Note that this is not formally correct, since//	in simulated annealing the probability should be based on the//	absolute distortion change not the relative distortion change.)////	Annealing Schedule//	------------------//	The temperature value T is a decreasing function of the number//	of the number of stages.  It starts at some initial value T0 and//	decreases slowly over time.  Rather than using the standard (slow)//	Boltzman annealing schedule, we use the following fast annealing//	schedule, every L stages we set////		T = TF * T,////	where://		L (= tempRunLength) is an integer parameter set by the//		    user.  (Presumably it depends on the number of//		    centers and the dimension of the space.)//		TF (= tempReducFactor) is a real number of the form 1-x,//		    for some small x.////	These parameters are provided by the user and are stored in the//	termination condition class.////	Initial Temperature//	-------------------//	The initial temperature T0 is a tricky thing to set.  The user//	supplies a parameter p0 = initProbAccept, the initial//	probability of accepting a random swap.  However, the//	probability of acceting a swap depends on the given RDL value.//	To estimate this, for the first L runs we use p0 as the//	probability.  Over these runs we compute the average rdl value.//	Once the first L runs have completed, we set T0 so that:////		exp(-averageRDL/T0) = initProbAccept,////	or equivalently////		T0 = -averageRDL/(ln initProbAccept).////	Parameters (in term)//	--------------------//	initProbAccept//		Initial probability of accepting an solution that does//		not alter the distortion.//	tempRunLength//		The number of stages before chaning the temperature.//	tempReducFactor//		The factor by which temperature is reduced at the end of//		a temperature run.//	minConsecRDL//		Minimum consecutive RDL needed to keep Lloyd's algorithm//		going.////	State variables//	---------------//	temperature//		Temperature parameter for simulated annealing.//	initTempRunStage//		The stage number at the start of a temperature run.//	areSwapping//		True if we are still in swapping portion of a run, and//		false if we are in the Lloyd's portion of the run.//	prevDist//		In order to compute the consecutive RDL we need to save//		the distortion at the start of each trip through Lloyd's//		algorithm.//	save	Simulated annealing may move to a worse solution at//		times.  The "best" solution stores the global best.  The//		"save" solution is the solution that simulated annealing//		falls back to if a solution is not accepted.//	sumTrials//		Sum of rdl values during the initial trials, where we//		are trying to estimate the mean RDL value.//	trialCt//		A counter which starts at the number of initial trials.//		When it reaches 0, we compute the average RDL value and//		set the initial temperature value.////	Utilities particular to Simulated Annealing//	-------------------------------------------//	simAnnealAccept()//		Random acceptance test for simulated annealing.//	initTempRun()//		Initialize a temperature run process by computing the//		initial temperature value and saving the current//		stage number in initTempRunStage.//	isTempRunDone()//		Check whether the current temperature run is done.//	endTempRun()//		End a temperature run by updating the temperature value//		and saving the current stage number in initTempRunStage.////	Overridden methods//	------------------//	reset()//		Do base class resetting.  Initialize areSwapping to//		true.  Save the current solution in save.  Initialize//		temperature runs.//	beginStage()//		Save current distortion in prevDist (for computing//		consecutive RDL).//	selectMethod()//		If we are swapping, use Swap, and otherwise use Lloyd's.//	endStage()//		Increment the stage number, get the distortion, and print//		the end-of-stage information.//	isRunDone()//		If we are swapping, then compute the simulated annealing
💿 文件大小 886 K
👤 上传用户 5201314
📂 所属分类人工智能/神经网络
🏷️ 相关标签

#k-means #算法
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -