gpctree.cpp

来自「一个由Mike Gashler完成的机器学习方面的includes neural」· C++ 代码 · 共 441 行

CPP
441
字号
/*	Copyright (C) 2006, Mike Gashler	This library is free software; you can redistribute it and/or	modify it under the terms of the GNU Lesser General Public	License as published by the Free Software Foundation; either	version 2.1 of the License, or (at your option) any later version.	see http://www.gnu.org/copyleft/lesser.html*/#include "GPCTree.h"#include "GArff.h"#include "GVector.h"#include "GBits.h"#include <stdlib.h>class GPCTreeNode{public:	GPCTreeNode()	{	}	virtual ~GPCTreeNode()	{	}	virtual bool IsLeaf() = 0;};class GPCTreeInteriorNode : public GPCTreeNode{protected:	double* m_pCenter;	double* m_pNormal;	GPCTreeNode* m_pLeft;	GPCTreeNode* m_pRight;public:	GPCTreeInteriorNode(double* pCenter, double* pNormal)		: GPCTreeNode()	{		m_pCenter = pCenter;		m_pNormal = pNormal;		m_pLeft = NULL;		m_pRight = NULL;	}	virtual ~GPCTreeInteriorNode()	{		delete[] m_pCenter;		delete[] m_pNormal;		delete(m_pLeft);		delete(m_pRight);	}	virtual bool IsLeaf()	{		return false;	}	bool Test(double* pInputVector, int nInputs)	{		return GVector::TestPivot(m_pCenter, m_pNormal, pInputVector, nInputs);	}	void SetLeft(GPCTreeNode* pNode)	{		m_pLeft = pNode;	}	void SetRight(GPCTreeNode* pNode)	{		m_pRight = pNode;	}	GPCTreeNode* GetRight()	{		return m_pRight;	}	GPCTreeNode* GetLeft()	{		return m_pLeft;	}};class GPCTreeLeafNode : public GPCTreeNode{protected:	double* m_pOutputs;public:	GPCTreeLeafNode(GArffRelation* pRelation, double* pOutputs)		: GPCTreeNode()	{		int nPos = 0;		int nCount = pRelation->GetOutputCount();		m_pOutputs = new double[nCount];		int i, index, values;		for(i = 0; i < nCount; i++)		{			index = pRelation->GetOutputIndex(i);			GArffAttribute* pAttr = pRelation->GetAttribute(index);			if(pAttr->IsContinuous())				m_pOutputs[i] = pOutputs[nPos++];			else			{				values = pAttr->GetValueCount();				m_pOutputs[i] = (double)GVector::FindMax(&pOutputs[nPos], values);				nPos += values;			}		}	}	virtual ~GPCTreeLeafNode()	{		delete[] m_pOutputs;	}	virtual bool IsLeaf()	{		return true;	}	double* GetOutputs()	{		return m_pOutputs;	}};// ---------------------------------------------------------------GPCTree::GPCTree(GArffRelation* pRelation): GSupervisedLearner(pRelation){	m_pRoot = NULL;	m_pEvalVector = NULL;	Reset();}GPCTree::~GPCTree(){	delete(m_pRoot);	delete[] m_pEvalVector;}// virtualvoid GPCTree::Train(GArffData* pData){	delete(m_pRoot);	int nCount = pData->GetSize();	GArffData internalData(nCount);	int i;	double* pVectorIn;	double* pVectorOut;	for(i = 0; i < nCount; i++)	{		pVectorIn = pData->GetVector(i);		pVectorOut = new double[m_nInputVectorSize + m_nOutputVectorSize];		m_pRelation->InputsToVectorMode(pVectorIn, pVectorOut);		m_pRelation->OutputsToVectorMode(pVectorIn, pVectorOut + m_nInputVectorSize);		internalData.AddVector(pVectorOut);	}	double* pBuf = new double[2 * m_nOutputVectorSize + 2 * m_nInputVectorSize];	ArrayHolder<double*> hBuf(pBuf);	m_pRoot = BuildNode(&internalData, pBuf);}// virtualvoid GPCTree::Eval(double* pVector){	m_pRelation->InputsToVectorMode(pVector, m_pEvalVector);	GPCTreeNode* pNode = m_pRoot;	while(!pNode->IsLeaf())	{		if(((GPCTreeInteriorNode*)pNode)->Test(m_pEvalVector, m_nInputVectorSize))			pNode = ((GPCTreeInteriorNode*)pNode)->GetRight();		else			pNode = ((GPCTreeInteriorNode*)pNode)->GetLeft();	}	double* pOutputs = ((GPCTreeLeafNode*)pNode)->GetOutputs();	int nOutputs = m_pRelation->GetOutputCount();	int i, index;	for(i = 0; i < nOutputs; i++)	{		index = m_pRelation->GetOutputIndex(i);		pVector[index] = pOutputs[i];	}}GPCTreeNode* GPCTree::BuildNode(GArffData* pData, double* pBuf){	// Check for a leaf node	int nCount = pData->GetSize();	if(nCount < 2)	{		GAssert(nCount > 0, "no data left");		return new GPCTreeLeafNode(m_pRelation, pData->GetVector(0) + m_nInputVectorSize);	}	// Shift out the input data	int i;	for(i = 0; i < nCount; i++)		pData->SwapVector(i, pData->GetVector(i) + m_nInputVectorSize);	// Compute the output mean and principle component	double* pOutputMean = pBuf;	for(i = 0; i < m_nOutputVectorSize; i++)		pOutputMean[i] = pData->ComputeMean(i);	double* pPrincipleComponent = pBuf + m_nOutputVectorSize;	pData->ComputePrincipleComponent(m_nOutputVectorSize, pPrincipleComponent, 10, false);	// Shift the input data back in	for(i = 0; i < nCount; i++)		pData->SwapVector(i, pData->GetVector(i) - m_nInputVectorSize);	// Find the input mean of each cluster	double* pInputMean1 = pPrincipleComponent + m_nOutputVectorSize;	double* pInputMean2 = pInputMean1 + m_nInputVectorSize;	GVector::SetToZero(pInputMean1, m_nInputVectorSize);	GVector::SetToZero(pInputMean2, m_nInputVectorSize);	int nClusterSize1 = 0;	int nClusterSize2 = 0;	double* pVector;	for(i = 0; i < nCount; i++)	{		pVector = pData->GetVector(i);		if(GVector::TestPivot(pOutputMean, pPrincipleComponent, pVector + m_nInputVectorSize, m_nOutputVectorSize))		{			GVector::Add(pInputMean2, pVector, m_nInputVectorSize);			nClusterSize2++;		}		else		{			GVector::Add(pInputMean1, pVector, m_nInputVectorSize);			nClusterSize1++;		}	}	GVector::Multiply(pInputMean1, 1.0 / nClusterSize1, m_nInputVectorSize);	GVector::Multiply(pInputMean2, 1.0 / nClusterSize2, m_nInputVectorSize);	// Compute the input center	double* pCenter = new double[m_nInputVectorSize];	memcpy(pCenter, pInputMean1, sizeof(double) * m_nInputVectorSize);	GVector::Add(pCenter, pInputMean2, m_nInputVectorSize);	GVector::Multiply(pCenter, .5, m_nInputVectorSize);	// Compute the input normal	double* pNormal = new double[m_nInputVectorSize];	memcpy(pNormal, pInputMean1, sizeof(double) * m_nInputVectorSize);	GVector::Subtract(pNormal, pInputMean2, m_nInputVectorSize);	if(GVector::ComputeSquaredMagnitude(pNormal, m_nInputVectorSize) == 0)		GVector::GetRandomVector(pNormal, m_nInputVectorSize);	else		GVector::Normalize(pNormal, m_nInputVectorSize);	// Make the interior node	GPCTreeInteriorNode* pNode = new GPCTreeInteriorNode(pCenter, pNormal);	// Divide the data	GArffData other(pData->GetSize());	for(i = 0; i < pData->GetSize(); i++)	{		pVector = pData->GetVector(i);		if(pNode->Test(pVector, m_nInputVectorSize))		{			other.AddVector(pData->DropVector(i));			i--;		}	}	// If we couldn't separate anything, just return a leaf node	if(other.GetSize() == 0 || pData->GetSize() == 0)	{		delete(pNode);		return new GPCTreeLeafNode(m_pRelation, pOutputMean);	}	// Build the child nodes	pNode->SetLeft(BuildNode(pData, pBuf));	pNode->SetRight(BuildNode(&other, pBuf));	return pNode;}// virtualvoid GPCTree::Reset(){	delete(m_pRoot);	m_pRoot = NULL;	m_nInputVectorSize = m_pRelation->CountVectorModeInputs();	m_nOutputVectorSize = m_pRelation->CountVectorModeOutputs();	delete[] m_pEvalVector;	m_pEvalVector = new double[m_nInputVectorSize];}// ---------------------------------------------------------------GArbitraryTree::GArbitraryTree(GArffRelation* pRelation, bool bAxisAligned) : GSupervisedLearner(pRelation){	m_pRoot = NULL;	m_pEvalVector = NULL;	m_bAxisAligned = bAxisAligned;	Reset();}GArbitraryTree::~GArbitraryTree(){	delete(m_pRoot);	delete[] m_pEvalVector;}// virtualvoid GArbitraryTree::Train(GArffData* pData){	delete(m_pRoot);	int nCount = pData->GetSize();	GArffData internalData(nCount);	int i;	double* pVectorIn;	double* pVectorOut;	for(i = 0; i < nCount; i++)	{		pVectorIn = pData->GetVector(i);		pVectorOut = new double[m_nInputVectorSize + m_nOutputVectorSize];		m_pRelation->InputsToVectorMode(pVectorIn, pVectorOut);		m_pRelation->OutputsToVectorMode(pVectorIn, pVectorOut + m_nInputVectorSize);		internalData.AddVector(pVectorOut);	}	m_pRoot = BuildNode(&internalData);}// virtualvoid GArbitraryTree::Eval(double* pVector){	m_pRelation->InputsToVectorMode(pVector, m_pEvalVector);	GPCTreeNode* pNode = m_pRoot;	while(!pNode->IsLeaf())	{		if(((GPCTreeInteriorNode*)pNode)->Test(m_pEvalVector, m_nInputVectorSize))			pNode = ((GPCTreeInteriorNode*)pNode)->GetRight();		else			pNode = ((GPCTreeInteriorNode*)pNode)->GetLeft();	}	double* pOutputs = ((GPCTreeLeafNode*)pNode)->GetOutputs();	int nOutputs = m_pRelation->GetOutputCount();	int i, index;	for(i = 0; i < nOutputs; i++)	{		index = m_pRelation->GetOutputIndex(i);		pVector[index] = pOutputs[i];	}}GPCTreeNode* GArbitraryTree::BuildNode(GArffData* pData){	// Check for a leaf node	int nCount = pData->GetSize();	if(nCount < 2)	{		GAssert(nCount > 0, "no data left");		return new GPCTreeLeafNode(m_pRelation, pData->GetVector(0) + m_nInputVectorSize);	}	// Make the interior node	double* pCenter = new double[m_nInputVectorSize];	double* pNormal = new double[m_nInputVectorSize];	GPCTreeInteriorNode* pNode = new GPCTreeInteriorNode(pCenter, pNormal);	GArffData other(pData->GetSize());	// Loop until we find an acceptable division	double* pVector;	int i, nPatience;	for(nPatience = 10; nPatience > 0; nPatience--)	{		// Pick a random division		double* pVec1 = pData->GetVector(rand() % nCount);		double* pVec2 = pData->GetVector(rand() % nCount);		memcpy(pCenter, pVec1, sizeof(double) * m_nInputVectorSize);		GVector::Add(pCenter, pVec2, m_nInputVectorSize);		GVector::Multiply(pCenter, .5, m_nInputVectorSize);		if(m_bAxisAligned)		{			for(i = 0; i < m_nInputVectorSize; i++)				pNormal[i] = 0;			pNormal[rand() % m_nInputVectorSize] = 1;		}		else			GVector::GetRandomVector(pNormal, m_nInputVectorSize);		// Divide the data		for(i = 0; i < pData->GetSize(); i++)		{			pVector = pData->GetVector(i);			if(pNode->Test(pVector, m_nInputVectorSize))			{				other.AddVector(pData->DropVector(i));				i--;			}		}		// See if this division is acceptable		if(other.GetSize() > 0 && pData->GetSize() > 0)			break;		// Merge the data back together		pData->Merge(&other);		GAssert(pData->GetSize() == nCount, "something got lost");	}	// Create the node	if(nPatience > 0)	{		// Build the child nodes		pNode->SetLeft(BuildNode(pData));		pNode->SetRight(BuildNode(&other));		return pNode;	}	else	{		// Pick a random sample for the output		delete(pNode);		double* pVec = pData->GetVector(rand() % nCount);		return new GPCTreeLeafNode(m_pRelation, pVec + m_nInputVectorSize);	}}// virtualvoid GArbitraryTree::Reset(){	delete(m_pRoot);	m_pRoot = NULL;	m_nInputVectorSize = m_pRelation->CountVectorModeInputs();	m_nOutputVectorSize = m_pRelation->CountVectorModeOutputs();	delete[] m_pEvalVector;	m_pEvalVector = new double[m_nInputVectorSize];}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?