gmanifold.h
来自「一个由Mike Gashler完成的机器学习方面的includes neural」· C头文件 代码 · 共 265 行
H
265 行
/* Copyright (C) 2006, Mike Gashler This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. see http://www.gnu.org/copyleft/lesser.html*/#ifndef __GMANIFOLD_H__#define __GMANIFOLD_H__#include "GLearner.h"#include "GMacros.h"class GIntQueue;class GMatrix;class GVector;class GKNN;struct GManifoldSculptingNeighbor;// Manifold Sculptingclass GManifoldSculpting{public: enum PreProcessType { PP_NONE, // Just use ManifoldSculpting to learn the whole manifold PP_PCA, // Pre-process with PCA, then do local refinement with ManifoldSculpting PP_LLE, // Pre-process with LLE, then clean up the mess with ManifoldSculpting };protected: int m_nDataPoints; int m_nDimensions; int m_nNeighbors; int m_nDataIndex; int m_nValueIndex; int m_nRecordSize; int m_nCurrentDimension; int m_nTargetDimensions; int m_nPass; int m_nSmoothingAdvantage; unsigned char* m_pData; double m_dAveNeighborDist; double m_dSquishingRate; double m_dLearningRate; GIntQueue* m_pQ;public: GManifoldSculpting(int nDataPoints, int nDimensions, int nNeighbors); ~GManifoldSculpting(); // This is an all-in-one function for using ManifoldSculpting. It // constructs a "ManifoldSculpting" object for the number of data // points in "pData". Then it calls "SetData" to set all the // data points in the collection and "SquishBegin" to // initialize the manifold learner. Next it performs the // pre-processing step with the specified dimensionality reduction // algorithm and re-sets the data. Now it's ready to begin the // squishing iterations. It calls "SquishPass" // "nPreludeIterations" times, then it continues calling "SquishPass" // until the error hasn't improved for "nIterationsSinceBest" // iterations. It will return a new data collection in which all // of the input data will be found in the first "nTargetDimensions" // input dimensions. The remaining input dimensions will contain // all zeros. Any output data is just copied straight over. static GArffData* DoManifoldSculpting(PreProcessType ePreProcess, GArffRelation* pRelation, GArffData* pData, int nTargetDimensions, int nNeighbors, int nPreludeIterations, int nIterationsSinceBest); // This method initializes the squisher in preparation for iterative squishing void SquishBegin(int nTargetDimensions); // Perform one iteration of squishing double SquishPass(int nSeedDataPoint); // Sets the data points from a collection void SetData(GArffRelation* pRelation, GArffData* pData); // Set a single data point. For unsupervised manifold learning, bAdjustable // should always be true. For semi-supervised manifold learning, bAdjustable // should only be false if this is one of the supervised (fixed) points. void SetVector(int n, double* pValues, bool bAdjustable); // Get a single (multi-dimensional) data point inline double* GetVector(int n) { return (double*)((unsigned char*)GetRecord(n) + m_nValueIndex); } // Returns the number of data points int GetDataPointCount() { return m_nDataPoints; } // Set the rate of squishing. (.99 is a good value) void SetSquishingRate(double d) { m_dSquishingRate = d; } // Points that have already been adjusted in this pass will typically have // more weight on the error heuristic than points that have not yet been // adjusted in this pass. (This causes much faster convergence.) This method // sets the weight ratio. For example, a value of 10 means points that have // already been adjusted this pass will have 10 times the weight in the error // heuristic. void SetSmoothingAdvantage(int n) { m_nSmoothingAdvantage = n; } // for internal use only int DataPointSortCompare(struct GManifoldSculptingNeighbor* pA, struct GManifoldSculptingNeighbor* pB); // Returns the current learning rate double GetLearningRate() { return m_dLearningRate; } // Returns the average distance between neighbors double GetAveNeighborDist() { return m_dAveNeighborDist; } // Counts the number of times that a point has a neighbor with an // index that is >= nThreshold away from this points index. (If // the manifold is sampled in order such that points are expected // to find neighbors with indexes close to their own, this can serve // to identify when parts of the manifold are too close to other // parts for so many neighbors to be used.) int CountShortcuts(int nThreshold);protected: inline struct GManifoldSculptingMetaData* GetMetaData(int n) { GAssert(n >= 0 && n < m_nDataPoints, "out of range"); return (struct GManifoldSculptingMetaData*)&m_pData[n * m_nRecordSize + m_nDataIndex]; } inline struct GManifoldSculptingMetaData* GetMetaData(struct GManifoldSculptingNeighbor* pNeighbors) { return (struct GManifoldSculptingMetaData*)(((unsigned char*)pNeighbors) + m_nDataIndex); } inline struct GManifoldSculptingNeighbor* GetRecord(int n) { GAssert(n >= 0 && n < m_nDataPoints, "out of range"); return (struct GManifoldSculptingNeighbor*)&m_pData[n * m_nRecordSize]; } void CalculateMetadata(int nTargetDimensions); int FindMostDistantNeighbor(struct GManifoldSculptingNeighbor* pNeighbors); double CalculateDistance(int nPoint1, int nPoint2); double CalculateVectorCorrelation(int a, int vertex, int b); double CalculateDataPointError(int nPoint); int AdjustDataPoint(int nPoint, int nTargetDimensions, double* pError);};// Principle Component Analysisclass GPCA{protected: GArffRelation* m_pRelation; GArffData* m_pInputData; GArffData* m_pOutputData; GPCA(GArffRelation* pRelation, GArffData* pData);public: ~GPCA(); // Performs principle component analysis on the input dimensions of // the data. It doesn't drop any dimensions, it just squishes the // data into the first input dimensions. If you want to drop dimensions, // just ignore the last several input dimensions. You // are repsonsible to delete the data set this returns. Output dimensions // are just copied straight across. static GArffData* DoPCA(GArffRelation* pRelation, GArffData* pData);#ifndef NO_TEST_CODE static void Test();#endif // !NO_TEST_CODEprotected: void DoPCA(); GArffData* DropOutputData();};// Local Linear Embeddingclass GLLE{protected: GArffRelation* m_pRelation; GArffData* m_pInputData; GArffData* m_pOutputData; int m_nNeighbors; int* m_pNeighbors; GMatrix* m_pWeights; GLLE(GArffRelation* pRelation, GArffData* pData, int nNeighbors);public: ~GLLE(); // Performs LLE analysis on the input dimensions of the data. It // doesn't drop any dimensions, it just pushes most of the data into // the first several input dimensions. You are repsonsible to delete // the data set this returns. Output dimensions are copied straight // across. static GArffData* DoLLE(GArffRelation* pRelation, GArffData* pData, int nNeighbors);protected: void FindNeighbors(); void FindNeighborsTheSlowWay(); void ComputeWeights(); void ComputeEmbedding(); GArffData* DropOutputData();};class GManifoldPumper : public GSupervisedLearner{protected: GKNN* m_pManifoldMap; GArffRelation* m_pInputRelation; int m_nNewDimensions; int m_nManifoldSculptingNeighbors; GArffRelation* m_pMapRelation; GSupervisedLearner* m_pLearner; bool m_bOwnLearner; double* m_pVector;public: GManifoldPumper(GArffRelation* pInputRelation, int nNewDimensions, int nManifoldMapNeighbors, int nManifoldSculptingNeighbors); ~GManifoldPumper(); // Set the learner to pump. This learner must have been constructed with the relation // that is obtained by calling GetRelation on this GManifoldPumper object. You must // call SetLearner before you can call Train. If bOwn is true then it will delete // pLearner when this object is destructed. void SetLearner(GSupervisedLearner* pLearner, bool bOwn); // Pump the data and then train the learner with it. You must call SetLearner before // you call this method. virtual void Train(GArffData* pData); // pOut should be an array of doubles of size GetRelation()->GetAttributeCount() // pIn shoudl be an array of doubles of size m_pInputRelation->GetAttributeCount() void PumpVector(double* pOut, const double* pIn); // Makes a data set with additional dimensions GArffData* PumpData(GArffData* pData); // Evaluates the input values in the provided row and // deduce the output values. pVector should not be pumped. virtual void Eval(double* pVector); // Evaluates the input values in the provided row and // deduce the output values. pVector should be a pumped vector. void EvalPumpedVector(double* pVector);protected: GArffRelation* MakePumpedRelation(GArffRelation* pInputRelation, int nNewDimensions); // Discard any training (but not any settings) so it can be trained again virtual void Reset();};#endif // __GMANIFOLD_H__
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?