📄 gmanifold.h
字号:
#ifndef __GMANIFOLD_H__#define __GMANIFOLD_H__#include "GLearner.h"class GPointerQueue;class GMatrix;class GVector;class GKNN;// Manifold Sculptingclass GManifoldSculpting{public: enum PreProcessType { PP_NONE, // Just use ManifoldSculpting to learn the whole manifold PP_PCA, // Pre-process with PCA, then do local refinement with ManifoldSculpting PP_LLE, // Pre-process with LLE, then clean up the mess with ManifoldSculpting };protected: int m_nDataPoints; int m_nDimensions; int m_nNeighbors; int m_nDataIndex; int m_nValueIndex; int m_nRecordSize; int m_nCurrentDimension; int m_nTargetDimensions; int m_nPass; int m_nSmoothingAdvantage; unsigned char* m_pData; double m_dAveNeighborDist; double m_dSquishingRate; double m_dLearningRate; GPointerQueue* m_pQ;public: GManifoldSculpting(int nDataPoints, int nDimensions, int nNeighbors); ~GManifoldSculpting(); // This is an all-in-one function for using ManifoldSculpting. It // constructs a "ManifoldSculpting" object for the number of data // points in "pData". Then it calls "SetData" to set all the // data points in the collection and "SquishBegin" to // initialize the manifold learner. Next it performs the // pre-processing step with the specified dimensionality reduction // algorithm and re-sets the data. Now it's ready to begin the // squishing iterations. It calls "SquishPass" // "nPreludeIterations" times, then it continues calling "SquishPass" // until the error hasn't improved for "nIterationsSinceBest" // iterations. It will return a new data collection in which all // of the input data will be found in the first "nTargetDimensions" // input dimensions. The remaining input dimensions will contain // all zeros. Any output data is just copied straight over. static GArffData* DoManifoldSculpting(PreProcessType ePreProcess, GArffRelation* pRelation, GArffData* pData, int nTargetDimensions, int nNeighbors, int nPreludeIterations, int nIterationsSinceBest); // This method initializes the squisher in preparation for iterative squishing void SquishBegin(int nTargetDimensions); // Perform one iteration of squishing double SquishPass(int nSeedDataPoint); // Sets the data points from a collection void SetData(GArffRelation* pRelation, GArffData* pData); // Set a single data point. For unsupervised manifold learning, bAdjustable // should always be true. For partially-supervised manifold learning, bAdjustable // should only be false if this is one of the supervised (fixed) points. void SetDataPoint(int n, double* pValues, bool bAdjustable); // Get a single (multi-dimensional) data point double* GetDataPoint(int n); // Returns the number of data points int GetDataPointCount() { return m_nDataPoints; } // Set the rate of squishing. (.99 is a good value) void SetSquishingRate(double d) { m_dSquishingRate = d; } // todo: figure out how to explain what this is void SetSmoothingAdvantage(int n) { m_nSmoothingAdvantage = n; } // for internal use only int DataPointSortCompare(unsigned char* pA, unsigned char* pB);protected: inline struct GManifoldSculptingMetaData* GetMetaData(int n) { return (struct GManifoldSculptingMetaData*)&m_pData[n * m_nRecordSize + m_nDataIndex]; } inline struct GManifoldSculptingMetaData* GetMetaData(struct GManifoldSculptingNeighbor* pNeighbors) { return (struct GManifoldSculptingMetaData*)(((unsigned char*)pNeighbors) + m_nDataIndex); } void CalculateMetadata(int nTargetDimensions); int FindMostDistantNeighbor(struct GManifoldSculptingNeighbor* pNeighbors); double CalculateDistance(unsigned char* pA, unsigned char* pB); double CalculateVectorCorrelation(unsigned char* pA, unsigned char* pVertex, unsigned char* pB); double CalculateDataPointError(unsigned char* pDataPoint); int AjustDataPoint(unsigned char* pDataPoint, int nTargetDimensions, double* pError);};// Principle Component Analysisclass GPCA{protected: GArffRelation* m_pRelation; GArffData* m_pInputData; GArffData* m_pOutputData; GPCA(GArffRelation* pRelation, GArffData* pData);public: ~GPCA(); // Performs principle component analysis on the input dimensions of // the data. It doesn't drop any dimensions, it just squishes the // data into the first input dimensions. If you want to drop dimensions, // just ignore the last several input dimensions. The eigenvalues are // returned as well to help you decide how many dimensions to drop. You // are repsonsible to delete the data set this returns. Output dimensions // are just copied straight across. static GArffData* DoPCA(GArffRelation* pRelation, GArffData* pData, GVector* pOutEigenValues);protected: void DoPCA(GVector* pOutEigenValues); GArffData* DropOutputData();};// Local Linear Embeddingclass GLLE{protected: GArffRelation* m_pRelation; GArffData* m_pInputData; GArffData* m_pOutputData; int m_nNeighbors; int* m_pNeighbors; GMatrix* m_pWeights; GLLE(GArffRelation* pRelation, GArffData* pData, int nNeighbors);public: ~GLLE(); // Performs LLE analysis on the input dimensions of the data. It // doesn't drop any dimensions, it just pushes most of the data into // the first several input dimensions. You are repsonsible to delete // the data set this returns. Output dimensions are copied straight // across. static GArffData* DoLLE(GArffRelation* pRelation, GArffData* pData, int nNeighbors);protected: void FindNeighbors(); void ComputeWeights(); void ComputeEmbedding(); GArffData* DropOutputData();};class GManifoldPumper : public GSupervisedLearner{protected: GKNN* m_pManifoldMap; GArffRelation* m_pInputRelation; int m_nNewDimensions; int m_nManifoldSculptingNeighbors; GArffRelation* m_pMapRelation; GSupervisedLearner* m_pLearner; bool m_bOwnLearner; double* m_pVector;public: GManifoldPumper(GArffRelation* pInputRelation, int nNewDimensions, int nManifoldMapNeighbors, int nManifoldSculptingNeighbors); ~GManifoldPumper(); // Set the learner to pump. This learner must have been constructed with the relation // that is obtained by calling GetRelation on this GManifoldPumper object. You must // call SetLearner before you can call Train. If bOwn is true then it will delete // pLearner when this object is destructed. void SetLearner(GSupervisedLearner* pLearner, bool bOwn); // Pump the data and then train the learner with it. You must call SetLearner before // you call this method. virtual void Train(GArffData* pData); // pOut should be an array of doubles of size GetRelation()->GetAttributeCount() // pIn shoudl be an array of doubles of size m_pInputRelation->GetAttributeCount() void PumpVector(double* pOut, const double* pIn); // Makes a data set with additional dimensions GArffData* PumpData(GArffData* pData); // Evaluates the input values in the provided row and // deduce the output values. pVector should not be pumped. virtual void Eval(double* pVector); // Evaluates the input values in the provided row and // deduce the output values. pVector should be a pumped vector. void EvalPumpedVector(double* pVector);protected: GArffRelation* MakePumpedRelation(GArffRelation* pInputRelation, int nNewDimensions);};#endif // __GMANIFOLD_H__
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -