⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 garff.h

📁 一个非常有用的开源代码
💻 H
字号:
/*	Copyright (C) 2006, Mike Gashler	This library is free software; you can redistribute it and/or	modify it under the terms of the GNU Lesser General Public	License as published by the Free Software Foundation; either	version 2.1 of the License, or (at your option) any later version.	see http://www.gnu.org/copyleft/lesser.html*/#ifndef __GARFF_H__#define __GARFF_H__#include "GArray.h"#include "GSearch.h"// ARFF = Attribute-Relation File Formatclass GArffAttribute;class GArffData;class GPointerArray;class GMatrix;class GArffRelation{protected:	char* m_szName;	GPointerArray* m_pAttributes;	int m_nInputCount;	int* m_pInputIndexes;	int m_nOutputCount;	int* m_pOutputIndexes;public:	GArffRelation();	~GArffRelation();	// Parses an ARFF file and returns a GArffRelation and a GArffData.  You must delete them both	static GArffRelation* ParseFile(GArffData** ppOutData, const char* szFile, int nLen);	// Writes out an ARFF file	void SaveArffFile(GArffData* pData, const char* szFilename);	// Returns the total number of attributes (both input and output) in this relation	int GetAttributeCount();	// Add an attribute to the relation	void AddAttribute(GArffAttribute* pAttr);	// Returns the number of input attributes in this relation	int GetInputCount();	// Returns the number of output attributes in this relation	int GetOutputCount();	// Returns the attribute index of the n'th input attribute	int GetInputIndex(int n);	// Returns the attribute index of the n'th output attribute	int GetOutputIndex(int n);	// Returns the attribute at the specified attribute index	GArffAttribute* GetAttribute(int nAttribute);	// Returns the sum of entropy (for discreet attributes) and variance (for continuous	// attributes) for all output values in the data set	double MeasureTotalOutputInfo(GArffData* pData);	// Returns the name of the relation	const char* GetName() { return m_szName; }	// Compute the square of the distance between the two points (using input values only)	double ComputeInputDistanceSquared(double* pRow1, double* pRow2);	// Computes the squared distance between input points after scaling by the value in	// the array pInputScales.  (pScales should be an array with size equal to	// the number of attributes in the relation, even though only the values corresponding	// to input attributes are actually used.)	double ComputeScaledInputDistanceSquared(double* pRow1, double* pRow2, double* pScales);	// Returns the number of continuous attributes in the relation	int CountContinuousAttributes();protected:	double* ParseDataRow(const char* szFile, int nLen);	void CountInputs();};class GArffAttribute{protected:	char* m_szName;	int m_nValues;	char** m_szValues;	bool m_bIsInput;	GArffAttribute();public:	// If nValues is 0, then this is a continuous attribute.	// szValues can be NULL if the values aren't named.	GArffAttribute(bool bIsInput, int nValues, const char** szValues);	~GArffAttribute();	// Makes a deep copy of this object	GArffAttribute* NewCopy();	// Parse the attribute section of a ".arff" file	static GArffAttribute* Parse(const char* szFile, int nLen);	// Returns true if this is a continuous (as opposed to discreet) attribute	bool IsContinuous() { return m_nValues == 0; }	// Makes the attribute continuous	void SetContinuous();	// Returns the index of the specified value	int FindEnumeratedValue(const char* szValue);	// Returns the number of discreet values in this attribute	int GetValueCount();	// Returns the name of this attribute	const char* GetName() { return m_szName; }	// Returns the n'th discreet value that this attribute can have	const char* GetValue(int n);	// Returns true if this is an input attribute	bool IsInput() { return m_bIsInput; }	// Sets whether this is an input or output attribute.	void SetIsInput(bool b) { m_bIsInput = b; }};class GArffData : public GPointerArray{public:	// nGrowSize specifies the amount of space (number of vectors) to initially	// allocate for data. It will dynamically resize as necessary.	GArffData(int nGrowSize);	~GArffData();	// Takes ownership of pVector	inline void AddVector(double* pVector) { AddPointer(pVector); }	// Returns a pointer to the vector	inline double* GetVector(int nIndex) { return ((double*)GetPointer(nIndex)); }	// Adds a copy of the vector to the data set	void CopyVector(double* pVector, int nAttributeCount);	// you must delete the vector this returns	double* DropVector(int nIndex);	// Abandons (leaks) all the vectors of data	void DropAllVectors();	// Randomizes the order	void Shuffle();	// Sorts the data from smallest to largest in the specified dimension	void Sort(int nDimension);	// Splits this set of data into two sets such that this set	// contains all vectors where the value in element "nColumn" is	// greater than dPivot and the set returned contains those	// less-than-or-equal-to dPivot.	GArffData* SplitByPivot(int nColumn, double dPivot);	// Splits this set of data into a unique set for each	// possible enumeration value of the attribute.  You are	// responsible to delete each set of data as well as the	// array of pointers that this returns	GArffData** SplitByAttribute(GArffRelation* pRelation, int nAttribute);	// Splits this set of data into two sets such that this set	// contains "nRows" vectors and the returned set contains the rest	GArffData* SplitBySize(int nRows);	// Steals all the vectors from pData and adds them to this set.	// (You still have to delete pData)	void Merge(GArffData* pData);	// Measures the entropy of this set relative to the specified attribute	double MeasureEntropy(GArffRelation* pRelation, int nColumn);	// Snaps all non-continuous output values to the nearest discreet value	void DiscretizeNonContinuousOutputs(GArffRelation* pRelation);	// Finds the min and the range of the values of the specified attribute	void GetMinAndRange(int nAttribute, double* pMin, double* pRange);	// Computes the arithmetic mean of a single attribute	double ComputeMean(int nAttribute);	// Finds the arithmetic means of all attributes	void GetMeans(double* pOutMeans, int nAttributes);	// Computes the average variance of a single attribute	double ComputeVariance(double dMean, int nAttribute);	// Finds the average variance of all the attributes	void GetVariance(double* pOutVariance, double* pMeans, int nAttributes);	// Throws out all of the vectors in which any of the first "nAttributes"	// attributes has a value that is more than "dStandardDeviations"	// deviations away from the mean of that attribute. Note that a better	// technique would be to compute Euclidian distance using all the	// attributes together, but I was feeling too lazy when I wrote this.	int RemoveOutlyers(double dStandardDeviations, int nAttributes);	// Normalizes the specified attribute values	void Normalize(int nAttribute, double dInputMin, double dInputRange, double dOutputMin, double dOutputRange);	// Normalize a value from the input min and range to the output min and range	static double Normalize(double dVal, double dInputMin, double dInputRange, double dOutputMin, double dOutputRange);	// Produce a vector in which each attribute holds the most common value for that attribute	double* MakeSetOfMostCommonOutputs(GArffRelation* pRelation);	// Returns true if all output values in the data set are the same	bool IsOutputHomogenous(GArffRelation* pRelation);	// Replaces missing data with random values	void RandomlyReplaceMissingData(GArffRelation* pRelation);	// Replaces all missing data with the most common value for the attribute	void ReplaceMissingAttributeWithMostCommonValue(GArffRelation* pRelation, int nAttribute);	// Computes the covariance matrix of the data	void ComputeCovarianceMatrix(GMatrix* pOutMatrix, GArffRelation* pRelation);	// Computes the probability of each possible value for one attribute given knowledge of	// a specific value for another of the attributes	void ComputeCoprobabilityMatrix(GMatrix* pOutMatrix, GArffRelation* pRelation, int nAttr, double noDataValue);	// Dump a representation of the data to stdout	void Print(int nAttributes);	// Computes the best pivot for minimizing the sum of the variance of each half	double ComputeMinimumVariancePivot(int nAttr);	// Computes the best pivot for minimizing the sum output info	double ComputeMinimumInfoPivot(GArffRelation* pRelation, int nAttr, double* pOutputInfo);};class GArffDataRegressCritic : public GRealVectorCritic{protected:	GArffData* m_pData;	int m_nVariables;	int m_nAttrX;	int m_nAttrY;public:	GArffDataRegressCritic(GArffData* pData, int nVariables, int nAttrX, int nAttrY)		: GRealVectorCritic(nVariables)	{		m_pData = pData;		m_nAttrX = nAttrX;		m_nAttrY = nAttrY;	}	virtual ~GArffDataRegressCritic() {}protected:	virtual double ApplyVariables(double dX, double* pVariables) = 0;	virtual double ComputeError(double* pVector)	{		int nCount = m_pData->GetSize();		int i;		double* pVec;		double y;		double dError = 0;		for(i = 0; i < nCount; i++)		{			pVec = m_pData->GetVector(i);			y = ApplyVariables(pVec[m_nAttrX], pVector);			y -= pVec[m_nAttrY];			dError += (y * y);		}		return dError;	}};#endif // __GARFF_H__

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -