garff.h

来自「一个由Mike Gashler完成的机器学习方面的includes neural」· C头文件代码 · 共 357 行
357 行
/*	Copyright (C) 2006, Mike Gashler	This library is free software; you can redistribute it and/or	modify it under the terms of the GNU Lesser General Public	License as published by the Free Software Foundation; either	version 2.1 of the License, or (at your option) any later version.	see http://www.gnu.org/copyleft/lesser.html*/#ifndef __GARFF_H__#define __GARFF_H__#include "GArray.h"#include "GSearch.h"// ARFF = Attribute-Relation File Formatclass GArffAttribute;class GArffData;class GPointerArray;class GMatrix;class GArffRelation{protected:	char* m_szName;	GPointerArray* m_pAttributes;	int m_nInputCount;	int* m_pInputIndexes;	int m_nOutputCount;	int* m_pOutputIndexes;public:	GArffRelation();	~GArffRelation();	// Parses an ARFF file and returns a GArffRelation and a GArffData.  You must delete them both.	// This will throw an exception if there's an error. (You should catch const char*)	static void ParseArffFile(GArffRelation** ppOutRelation, GArffData** ppOutData, const char* szFile, int nLen);	// Loads an ARFF file and returns a GArffRelation and a GArffData.  You must delete them both.	// This will throw an exception if there's an error. (You should catch const char*)	static void LoadArffFile(GArffRelation** ppOutRelation, GArffData** ppOutData, const char* szFilename);	// Writes out an ARFF file	void SaveArffFile(GArffData* pData, const char* szFilename);	// Returns the total number of attributes (both input and output) in this relation	int GetAttributeCount();	// Add an attribute to the relation	void AddAttribute(GArffAttribute* pAttr);	// Returns the number of input attributes in this relation	int GetInputCount();	// Returns the number of output attributes in this relation	int GetOutputCount();	// Returns the attribute index of the n'th input attribute	int GetInputIndex(int n);	// Returns the attribute index of the n'th output attribute	int GetOutputIndex(int n);	// Returns the attribute at the specified attribute index	GArffAttribute* GetAttribute(int nAttribute);	// Returns the sum of entropy (for discreet attributes) and variance (for continuous	// attributes) for all output values in the data set	double MeasureTotalOutputInfo(GArffData* pData);	// Returns the name of the relation	const char* GetName() { return m_szName; }	// Compute the square of the distance between the two points (using input values only)	double ComputeInputDistanceSquared(double* pRow1, double* pRow2);	// Compute the square of the distance between the two points (using output values only)	double ComputeOutputDistanceSquared(double* pRow1, double* pRow2);		// Computes the squared distance between input points after scaling by the value in	// the array pInputScales.  (pScales should be an array with size equal to	// the number of attributes in the relation, even though only the values corresponding	// to input attributes are actually used.)	double ComputeScaledInputDistanceSquared(double* pRow1, double* pRow2, double* pScales);	// Returns the number of continuous attributes in the relation	int CountContinuousAttributes();	// Counts the size of the corresponding vector-mode input vector	// Enumerations with nCap or more values will be treated as a single continuous value	int CountVectorModeInputs(int nCap = 25);	// Counts the size of the corresponding vector-mode output vector	// Enumerations with nCap or more values will be treated as a single continuous value	int CountVectorModeOutputs(int nCap = 25);	// Converts a full normal-mode vector (pIn) to a vector-mode	// input-only vector (pOut)	// Enumerations with nCap or more values will be treated as a single continuous value	void InputsToVectorMode(double* pIn, double* pOut, int nCap = 25);		// Converts a full normal-mode vector (pIn) to a vector-mode	// output-only vector (pOut)	// Enumerations with nCap or more values will be treated as a single continuous value	void OutputsToVectorMode(double* pIn, double* pOut, int nCap = 25);	// Converts a vector-mode output-only vector (pIn) to a normal-mode	// full vector (pOut). (The inputs of pOut are untouched).	// Enumerations with nCap or more values will be treated as a single continuous value	void VectorModeToOutputs(double* pIn, double* pOut, int nCap = 25);protected:	double* ParseDataRow(const char* szFile, int nLen, int nLine, int nCommentAttributes);	void CountInputs();};class GArffAttribute{protected:	char* m_szName;	int m_nValues;	char** m_szValues;	bool m_bIsInput;	GArffAttribute();public:	// If nValues is 0, then this is a continuous attribute.	// szValues can be NULL if the values aren't named.	GArffAttribute(bool bIsInput, int nValues, const char** szValues);	~GArffAttribute();	// Makes a deep copy of this object	GArffAttribute* NewCopy();	// Parse the attribute section of a ".arff" file	static GArffAttribute* Parse(const char* szFile, int nLen, int nLine);	// Returns true if this is a continuous (as opposed to discreet) attribute	bool IsContinuous() { return m_nValues == 0; }	// Makes the attribute continuous	void SetContinuous();	// Returns the index of the specified value	int FindEnumeratedValue(const char* szValue);	// Returns the number of discreet values in this attribute	int GetValueCount();	// Returns the name of this attribute	const char* GetName() { return m_szName; }	// Returns the n'th discreet value that this attribute can have	const char* GetValue(int n);	// Returns true if this is an input attribute	bool IsInput() { return m_bIsInput; }	// Sets whether this is an input or output attribute.	void SetIsInput(bool b) { m_bIsInput = b; }};class GArffData : public GPointerArray{public:	// nGrowSize specifies the amount of space (number of vectors) to initially	// allocate for data. It will dynamically resize as necessary.	GArffData(int nGrowSize);	~GArffData();	// Takes ownership of pVector	inline void AddVector(double* pVector) { AddPointer(pVector); }	// Returns a pointer to the vector	inline double* GetVector(int nIndex) { return ((double*)GetPointer(nIndex)); }	// Adds a copy of the vector to the data set	void CopyVector(double* pVector, int nAttributeCount);	// Swaps pVector with the vector at nIndex. You're responsible to delete the	// vector this returns	double* SwapVector(int nIndex, double* pVector);	// you must delete the vector this returns	double* DropVector(int nIndex);	// deletes the vector with the specified index	void DeleteVector(int nIndex);	// Abandons (leaks) all the vectors of data	void DropAllVectors();	// Randomizes the order	void Shuffle();	// Sorts the data from smallest to largest in the specified dimension	void Sort(int nDimension);	// Splits this set of data into two sets such that this set	// contains all vectors where the value in element "nColumn" is	// greater than dPivot and the set returned contains those	// less-than-or-equal-to dPivot.	GArffData* SplitByPivot(int nColumn, double dPivot);	// Splits this set of data into a unique set for each	// possible enumeration value of the attribute.  You are	// responsible to delete each set of data as well as the	// array of pointers that this returns	GArffData** SplitByAttribute(GArffRelation* pRelation, int nAttribute);	// Splits this set of data into two sets such that this set	// contains "nRows" vectors and the returned set contains the rest	GArffData* SplitBySize(int nRows);	// Steals all the vectors from pData and adds them to this set.	// (You still have to delete pData)	void Merge(GArffData* pData);	// Measures the entropy of this set relative to the specified attribute	double MeasureEntropy(GArffRelation* pRelation, int nColumn);	// Snaps all non-continuous output values to the nearest discreet value	void DiscretizeNonContinuousOutputs(GArffRelation* pRelation);	// Finds the min and the range of the values of the specified attribute	void GetMinAndRange(int nAttribute, double* pMin, double* pRange);	// Computes the arithmetic mean of a single attribute	double ComputeMean(int nAttribute);	// Finds the arithmetic means of all attributes	void GetMeans(double* pOutMeans, int nAttributes);	// Computes the average variance of a single attribute	double ComputeVariance(double dMean, int nAttribute);	// Finds the average variance of all the attributes	void GetVariance(double* pOutVariance, double* pMeans, int nAttributes);	// Throws out all of the vectors in which any of the first "nAttributes"	// attributes has a value that is more than "dStandardDeviations"	// deviations away from the mean of that attribute. Note that a better	// technique would be to compute Euclidian distance using all the	// attributes together, but I was feeling too lazy when I wrote this.	int RemoveOutlyers(double dStandardDeviations, int nAttributes);	// Normalizes the specified attribute values	void Normalize(int nAttribute, double dInputMin, double dInputRange, double dOutputMin, double dOutputRange);	// Normalize a value from the input min and range to the output min and range	static double Normalize(double dVal, double dInputMin, double dInputRange, double dOutputMin, double dOutputRange);	// Produce a vector in which each attribute holds the most common value for that attribute	double* MakeSetOfMostCommonOutputs(GArffRelation* pRelation);	// Returns true if all output values in the data set are the same	bool IsOutputHomogenous(GArffRelation* pRelation);	// Replaces missing data with random values	void RandomlyReplaceMissingData(GArffRelation* pRelation);	// Replaces all missing data with the most common value for the attribute	void ReplaceMissingAttributeWithMostCommonValue(GArffRelation* pRelation, int nAttribute);	// This is an efficient algorithm for iteratively computing the principle component	// vector of the data. See "EM Algorithms for PCA and SPCA" by Sam Roweis, 1998 NIPS.	// if bExtract is true, it will remove the component from the data (so you can call it	// again to get the second principle component, etc).	void ComputePrincipleComponent(int nDims, double* pOutVector, int nIterations, bool bExtract);	// Computes the covariance between two attributes	double ComputeCovariance(int nAttr1, double dMean1, int nAttr2, double dMean2);	// Computes the covariance matrix of the data	void ComputeCovarianceMatrix(GMatrix* pOutMatrix, GArffRelation* pRelation);	// Computes the probability of each possible value for one attribute given knowledge of	// a specific value for another of the attributes	void ComputeCoprobabilityMatrix(GMatrix* pOutMatrix, GArffRelation* pRelation, int nAttr, double noDataValue);	// Dump a representation of the data to stdout	void Print(int nAttributes);	// Computes the best pivot for minimizing the sum of the variance of each half	double ComputeMinimumVariancePivot(int nAttr);	// Computes the best pivot for minimizing the sum output info	bool PickPivotToReduceInfo(double* pOutPivot, double* pOutputInfo, GArffRelation* pRelation, int nAttr);/*	// This assumes that the relation has an even number of outputs. Even outputs (0, 2, 4, ...) represent	// the real component of a complex output value and odd outputs (1, 3, 5, ...) represent the imaginary	// component.	GArffData* SlowFourierTransform(GArffRelation* pRel, bool bForward);*/	// Adds nNoiseDims dimensions of random gaussian dimensions to the data. (Also adds corresponding	// attributes to pRelation).	void AddGaussianNoiseDimensions(GArffRelation* pRelation, int nNoiseDims);#ifndef NO_TEST_CODE	static void Test();#endif // !NO_TEST_CODE};class GArffDataRegressCritic : public GRealVectorCritic{protected:	GArffData* m_pData;	int m_nVariables;	int m_nAttrX;	int m_nAttrY;public:	GArffDataRegressCritic(GArffData* pData, int nVariables, int nAttrX, int nAttrY)		: GRealVectorCritic(nVariables)	{		m_pData = pData;		m_nAttrX = nAttrX;		m_nAttrY = nAttrY;	}	virtual ~GArffDataRegressCritic() {}protected:	virtual double ApplyVariables(double dX, double* pVariables) = 0;	virtual double ComputeError(double* pVector)	{		int nCount = m_pData->GetSize();		int i;		double* pVec;		double y;		double dError = 0;		for(i = 0; i < nCount; i++)		{			pVec = m_pData->GetVector(i);			y = ApplyVariables(pVec[m_nAttrX], pVector);			y -= pVec[m_nAttrY];			dError += (y * y);		}		return dError;	}};#endif // __GARFF_H__
garff.h - 源码说明

本页面展示了「一个由Mike Gashler完成的机器学习方面的includes neural net, naive bayesian classifier, decision tree, KNN, a genet」中的 garff.h 源码文件，采用 C头文件编程语言编写，共 357 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与classifier相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?