📄 gknn.h
字号:
/* Copyright (C) 2006, Mike Gashler This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. see http://www.gnu.org/copyleft/lesser.html*/#ifndef __GKNN_H__#define __GKNN_H__#include "GLearner.h"class GArffData;class GNeighborFinderNode;class GNeighborFinderLeaf;// This finds the k-nearest neighbors of a point. It divides the data// into halves until every hyper-rectangle contains at most a maximum// number of points. It uses these hyper-rectangles to do efficient// neighbor searches.class GNeighborFinder{protected: GNeighborFinderNode* m_pRoot; GArffRelation* m_pRelation; GArffData* m_pData; int m_nMaxPointsPerLeaf; GNeighborFinderLeaf** m_ppIterators; double* m_pMaxs;public: // nMaxPointsPerLeaf tells how many vectors are contained by each hyper-rectangle // before it is divided. Experimental results are still needed to determine how // to select this value. If it's too small, the algorithm will waste time with // empty hyper-rectangles. If it's too big, the algorithm will waste time // examining unnecessary data points. Perhaps the number of neighbors might be // a good value. GNeighborFinder(GArffRelation* pRelation, GArffData* pData, int nMaxPointsPerLeaf = 12); ~GNeighborFinder();#ifndef NO_TEST_CODE static void Test();#endif // !NO_TEST_CODE // pOutNeighbors and pOutSquaredDistances should both be arrays of size nNeighbors. // When it returns, these arrays will hold the indexes into the data set of the // nearest neighbors, and the squared distances respectively. nExclude is an index // that you don't want to get in the results. For example, if you are passing in // a vector from the data set, you may wish to exclude its index because you // already know it's close to itself. If you don't wish to exclude any indexes, just // set nExclude to -1. If there are not enough points in the data set to fill the // neighbor array, the empty ones will be set to -1. void FindNeighbors(int* pOutNeighbors, double* pOutSquaredDistances, int nNeighbors, double* pVector, int nExclude);protected: GNeighborFinderLeaf* FindCell(double* pVector); void Split(int nDimension, double dPivot); void GetNeighborsFromCell(GNeighborFinderLeaf* pCell, double* pVector, int* pOutNeighbors, double* pOutSquaredDistances, int nNeighbors, int* pnWorstNeighbor, int nExclude);};// Implements the K-Nearest Neighbor learning algorithmclass GKNN : public GSupervisedLearner{protected: int m_nNeighbors; GArffData* m_pRows; double* m_pScaleFactors; double* m_pEvalVector; int* m_pEvalNeighbors; double* m_pEvalDistances; GNeighborFinder* m_pNeighborFinder;public: GKNN(GArffRelation* pRelation, int nNeighbors); virtual ~GKNN(); // Makes a copy of the vector and adds it to the internal set void AddVector(double* pVector); // Compute the amount to scale each dimension so that all dimensions // have equal weight void ComputeScaleFactors(GArffData* pData); // Train with all the points in pData virtual void Train(GArffData* pData); // Evaluate with each neighbor having equal vote void EvalEqualWeight(double* pRow); // Evaluate with each neighbor having a linear vote void EvalLinearWeight(double* pRow); // Evaluates the input values in the provided row and // deduce the output values virtual void Eval(double* pRow) { EvalLinearWeight(pRow); } // Find the row that helps the lest with predictive accuracy (very expensive) int FindLeastHelpfulRow(GArffData* pTestSet); // Drops a point from the collection double* DropRow(int nRow);protected: void FindNeighbors(double* pRow);};#endif // __GKNN_H__
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -