gmanifold.h

来自「一个由Mike Gashler完成的机器学习方面的includes neural」· C头文件 代码 · 共 265 行

H
265
字号
/*	Copyright (C) 2006, Mike Gashler	This library is free software; you can redistribute it and/or	modify it under the terms of the GNU Lesser General Public	License as published by the Free Software Foundation; either	version 2.1 of the License, or (at your option) any later version.	see http://www.gnu.org/copyleft/lesser.html*/#ifndef __GMANIFOLD_H__#define __GMANIFOLD_H__#include "GLearner.h"#include "GMacros.h"class GIntQueue;class GMatrix;class GVector;class GKNN;struct GManifoldSculptingNeighbor;// Manifold Sculptingclass GManifoldSculpting{public:	enum PreProcessType	{		PP_NONE, // Just use ManifoldSculpting to learn the whole manifold		PP_PCA, // Pre-process with PCA, then do local refinement with ManifoldSculpting		PP_LLE, // Pre-process with LLE, then clean up the mess with ManifoldSculpting	};protected:	int m_nDataPoints;	int m_nDimensions;	int m_nNeighbors;	int m_nDataIndex;	int m_nValueIndex;	int m_nRecordSize;	int m_nCurrentDimension;	int m_nTargetDimensions;	int m_nPass;	int m_nSmoothingAdvantage;	unsigned char* m_pData;	double m_dAveNeighborDist;	double m_dSquishingRate;	double m_dLearningRate;	GIntQueue* m_pQ;public:	GManifoldSculpting(int nDataPoints, int nDimensions, int nNeighbors);	~GManifoldSculpting();	// This is an all-in-one function for using ManifoldSculpting. It	// constructs a "ManifoldSculpting" object for the number of data	// points in "pData". Then it calls "SetData" to set all the	// data points in the collection and "SquishBegin" to	// initialize the manifold learner. Next it performs the	// pre-processing step with the specified dimensionality reduction	// algorithm and re-sets the data. Now it's ready to begin the	// squishing iterations. It calls "SquishPass"	// "nPreludeIterations" times, then it continues calling "SquishPass"	// until the error hasn't improved for "nIterationsSinceBest"	// iterations. It will return a new data collection in which all	// of the input data will be found in the first "nTargetDimensions"	// input dimensions. The remaining input dimensions will contain	// all zeros. Any output data is just copied straight over.	static GArffData* DoManifoldSculpting(PreProcessType ePreProcess, GArffRelation* pRelation, GArffData* pData, int nTargetDimensions, int nNeighbors, int nPreludeIterations, int nIterationsSinceBest);	// This method initializes the squisher in preparation for iterative squishing	void SquishBegin(int nTargetDimensions);	// Perform one iteration of squishing	double SquishPass(int nSeedDataPoint);	// Sets the data points from a collection	void SetData(GArffRelation* pRelation, GArffData* pData);	// Set a single data point. For unsupervised manifold learning, bAdjustable	// should always be true. For semi-supervised manifold learning, bAdjustable	// should only be false if this is one of the supervised (fixed) points.	void SetVector(int n, double* pValues, bool bAdjustable);	// Get a single (multi-dimensional) data point	inline double* GetVector(int n)	{		return (double*)((unsigned char*)GetRecord(n) + m_nValueIndex);	}	// Returns the number of data points	int GetDataPointCount() { return m_nDataPoints; }	// Set the rate of squishing. (.99 is a good value)	void SetSquishingRate(double d) { m_dSquishingRate = d; }	// Points that have already been adjusted in this pass will typically have	// more weight on the error heuristic than points that have not yet been	// adjusted in this pass. (This causes much faster convergence.) This method	// sets the weight ratio. For example, a value of 10 means points that have	// already been adjusted this pass will have 10 times the weight in the error	// heuristic.	void SetSmoothingAdvantage(int n) { m_nSmoothingAdvantage = n; }	// for internal use only	int DataPointSortCompare(struct GManifoldSculptingNeighbor* pA, struct GManifoldSculptingNeighbor* pB);	// Returns the current learning rate	double GetLearningRate() { return m_dLearningRate; }	// Returns the average distance between neighbors	double GetAveNeighborDist() { return m_dAveNeighborDist; }	// Counts the number of times that a point has a neighbor with an	// index that is >= nThreshold away from this points index. (If	// the manifold is sampled in order such that points are expected	// to find neighbors with indexes close to their own, this can serve	// to identify when parts of the manifold are too close to other	// parts for so many neighbors to be used.)	int CountShortcuts(int nThreshold);protected:	inline struct GManifoldSculptingMetaData* GetMetaData(int n)	{		GAssert(n >= 0 && n < m_nDataPoints, "out of range");		return (struct GManifoldSculptingMetaData*)&m_pData[n * m_nRecordSize + m_nDataIndex];	}	inline struct GManifoldSculptingMetaData* GetMetaData(struct GManifoldSculptingNeighbor* pNeighbors)	{		return (struct GManifoldSculptingMetaData*)(((unsigned char*)pNeighbors) + m_nDataIndex);	}	inline struct GManifoldSculptingNeighbor* GetRecord(int n)	{		GAssert(n >= 0 && n < m_nDataPoints, "out of range");		return (struct GManifoldSculptingNeighbor*)&m_pData[n * m_nRecordSize];	}	void CalculateMetadata(int nTargetDimensions);	int FindMostDistantNeighbor(struct GManifoldSculptingNeighbor* pNeighbors);	double CalculateDistance(int nPoint1, int nPoint2);	double CalculateVectorCorrelation(int a, int vertex, int b);	double CalculateDataPointError(int nPoint);	int AdjustDataPoint(int nPoint, int nTargetDimensions, double* pError);};// Principle Component Analysisclass GPCA{protected:	GArffRelation* m_pRelation;	GArffData* m_pInputData;	GArffData* m_pOutputData;	GPCA(GArffRelation* pRelation, GArffData* pData);public:	~GPCA();	// Performs principle component analysis on the input dimensions of	// the data. It doesn't drop any dimensions, it just squishes the	// data into the first input dimensions. If you want to drop dimensions,	// just ignore the last several input dimensions. You	// are repsonsible to delete the data set this returns. Output dimensions	// are just copied straight across.	static GArffData* DoPCA(GArffRelation* pRelation, GArffData* pData);#ifndef NO_TEST_CODE	static void Test();#endif // !NO_TEST_CODEprotected:	void DoPCA();	GArffData* DropOutputData();};// Local Linear Embeddingclass GLLE{protected:	GArffRelation* m_pRelation;	GArffData* m_pInputData;	GArffData* m_pOutputData;	int m_nNeighbors;	int* m_pNeighbors;	GMatrix* m_pWeights;	GLLE(GArffRelation* pRelation, GArffData* pData, int nNeighbors);public:	~GLLE();	// Performs LLE analysis on the input dimensions of the data. It	// doesn't drop any dimensions, it just pushes most of the data into	// the first several input dimensions. You are repsonsible to delete	// the data set this returns. Output dimensions are copied straight	// across.	static GArffData* DoLLE(GArffRelation* pRelation, GArffData* pData, int nNeighbors);protected:	void FindNeighbors();	void FindNeighborsTheSlowWay();	void ComputeWeights();	void ComputeEmbedding();	GArffData* DropOutputData();};class GManifoldPumper : public GSupervisedLearner{protected:	GKNN* m_pManifoldMap;	GArffRelation* m_pInputRelation;	int m_nNewDimensions;	int m_nManifoldSculptingNeighbors;	GArffRelation* m_pMapRelation;	GSupervisedLearner* m_pLearner;	bool m_bOwnLearner;	double* m_pVector;public:	GManifoldPumper(GArffRelation* pInputRelation, int nNewDimensions, int nManifoldMapNeighbors, int nManifoldSculptingNeighbors);	~GManifoldPumper();	// Set the learner to pump. This learner must have been constructed with the relation	// that is obtained by calling GetRelation on this GManifoldPumper object. You must	// call SetLearner before you can call Train. If bOwn is true then it will delete	// pLearner when this object is destructed.	void SetLearner(GSupervisedLearner* pLearner, bool bOwn);	// Pump the data and then train the learner with it. You must call SetLearner before	// you call this method.	virtual void Train(GArffData* pData);	// pOut should be an array of doubles of size GetRelation()->GetAttributeCount()	// pIn shoudl be an array of doubles of size m_pInputRelation->GetAttributeCount()	void PumpVector(double* pOut, const double* pIn);	// Makes a data set with additional dimensions	GArffData* PumpData(GArffData* pData);	// Evaluates the input values in the provided row and	// deduce the output values. pVector should not be pumped.	virtual void Eval(double* pVector);	// Evaluates the input values in the provided row and	// deduce the output values. pVector should be a pumped vector.	void EvalPumpedVector(double* pVector);protected:	GArffRelation* MakePumpedRelation(GArffRelation* pInputRelation, int nNewDimensions);	// Discard any training (but not any settings) so it can be trained again	virtual void Reset();};#endif // __GMANIFOLD_H__

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?