gpctree.cpp
来自「一个由Mike Gashler完成的机器学习方面的includes neural」· C++ 代码 · 共 441 行
CPP
441 行
/* Copyright (C) 2006, Mike Gashler This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. see http://www.gnu.org/copyleft/lesser.html*/#include "GPCTree.h"#include "GArff.h"#include "GVector.h"#include "GBits.h"#include <stdlib.h>class GPCTreeNode{public: GPCTreeNode() { } virtual ~GPCTreeNode() { } virtual bool IsLeaf() = 0;};class GPCTreeInteriorNode : public GPCTreeNode{protected: double* m_pCenter; double* m_pNormal; GPCTreeNode* m_pLeft; GPCTreeNode* m_pRight;public: GPCTreeInteriorNode(double* pCenter, double* pNormal) : GPCTreeNode() { m_pCenter = pCenter; m_pNormal = pNormal; m_pLeft = NULL; m_pRight = NULL; } virtual ~GPCTreeInteriorNode() { delete[] m_pCenter; delete[] m_pNormal; delete(m_pLeft); delete(m_pRight); } virtual bool IsLeaf() { return false; } bool Test(double* pInputVector, int nInputs) { return GVector::TestPivot(m_pCenter, m_pNormal, pInputVector, nInputs); } void SetLeft(GPCTreeNode* pNode) { m_pLeft = pNode; } void SetRight(GPCTreeNode* pNode) { m_pRight = pNode; } GPCTreeNode* GetRight() { return m_pRight; } GPCTreeNode* GetLeft() { return m_pLeft; }};class GPCTreeLeafNode : public GPCTreeNode{protected: double* m_pOutputs;public: GPCTreeLeafNode(GArffRelation* pRelation, double* pOutputs) : GPCTreeNode() { int nPos = 0; int nCount = pRelation->GetOutputCount(); m_pOutputs = new double[nCount]; int i, index, values; for(i = 0; i < nCount; i++) { index = pRelation->GetOutputIndex(i); GArffAttribute* pAttr = pRelation->GetAttribute(index); if(pAttr->IsContinuous()) m_pOutputs[i] = pOutputs[nPos++]; else { values = pAttr->GetValueCount(); m_pOutputs[i] = (double)GVector::FindMax(&pOutputs[nPos], values); nPos += values; } } } virtual ~GPCTreeLeafNode() { delete[] m_pOutputs; } virtual bool IsLeaf() { return true; } double* GetOutputs() { return m_pOutputs; }};// ---------------------------------------------------------------GPCTree::GPCTree(GArffRelation* pRelation): GSupervisedLearner(pRelation){ m_pRoot = NULL; m_pEvalVector = NULL; Reset();}GPCTree::~GPCTree(){ delete(m_pRoot); delete[] m_pEvalVector;}// virtualvoid GPCTree::Train(GArffData* pData){ delete(m_pRoot); int nCount = pData->GetSize(); GArffData internalData(nCount); int i; double* pVectorIn; double* pVectorOut; for(i = 0; i < nCount; i++) { pVectorIn = pData->GetVector(i); pVectorOut = new double[m_nInputVectorSize + m_nOutputVectorSize]; m_pRelation->InputsToVectorMode(pVectorIn, pVectorOut); m_pRelation->OutputsToVectorMode(pVectorIn, pVectorOut + m_nInputVectorSize); internalData.AddVector(pVectorOut); } double* pBuf = new double[2 * m_nOutputVectorSize + 2 * m_nInputVectorSize]; ArrayHolder<double*> hBuf(pBuf); m_pRoot = BuildNode(&internalData, pBuf);}// virtualvoid GPCTree::Eval(double* pVector){ m_pRelation->InputsToVectorMode(pVector, m_pEvalVector); GPCTreeNode* pNode = m_pRoot; while(!pNode->IsLeaf()) { if(((GPCTreeInteriorNode*)pNode)->Test(m_pEvalVector, m_nInputVectorSize)) pNode = ((GPCTreeInteriorNode*)pNode)->GetRight(); else pNode = ((GPCTreeInteriorNode*)pNode)->GetLeft(); } double* pOutputs = ((GPCTreeLeafNode*)pNode)->GetOutputs(); int nOutputs = m_pRelation->GetOutputCount(); int i, index; for(i = 0; i < nOutputs; i++) { index = m_pRelation->GetOutputIndex(i); pVector[index] = pOutputs[i]; }}GPCTreeNode* GPCTree::BuildNode(GArffData* pData, double* pBuf){ // Check for a leaf node int nCount = pData->GetSize(); if(nCount < 2) { GAssert(nCount > 0, "no data left"); return new GPCTreeLeafNode(m_pRelation, pData->GetVector(0) + m_nInputVectorSize); } // Shift out the input data int i; for(i = 0; i < nCount; i++) pData->SwapVector(i, pData->GetVector(i) + m_nInputVectorSize); // Compute the output mean and principle component double* pOutputMean = pBuf; for(i = 0; i < m_nOutputVectorSize; i++) pOutputMean[i] = pData->ComputeMean(i); double* pPrincipleComponent = pBuf + m_nOutputVectorSize; pData->ComputePrincipleComponent(m_nOutputVectorSize, pPrincipleComponent, 10, false); // Shift the input data back in for(i = 0; i < nCount; i++) pData->SwapVector(i, pData->GetVector(i) - m_nInputVectorSize); // Find the input mean of each cluster double* pInputMean1 = pPrincipleComponent + m_nOutputVectorSize; double* pInputMean2 = pInputMean1 + m_nInputVectorSize; GVector::SetToZero(pInputMean1, m_nInputVectorSize); GVector::SetToZero(pInputMean2, m_nInputVectorSize); int nClusterSize1 = 0; int nClusterSize2 = 0; double* pVector; for(i = 0; i < nCount; i++) { pVector = pData->GetVector(i); if(GVector::TestPivot(pOutputMean, pPrincipleComponent, pVector + m_nInputVectorSize, m_nOutputVectorSize)) { GVector::Add(pInputMean2, pVector, m_nInputVectorSize); nClusterSize2++; } else { GVector::Add(pInputMean1, pVector, m_nInputVectorSize); nClusterSize1++; } } GVector::Multiply(pInputMean1, 1.0 / nClusterSize1, m_nInputVectorSize); GVector::Multiply(pInputMean2, 1.0 / nClusterSize2, m_nInputVectorSize); // Compute the input center double* pCenter = new double[m_nInputVectorSize]; memcpy(pCenter, pInputMean1, sizeof(double) * m_nInputVectorSize); GVector::Add(pCenter, pInputMean2, m_nInputVectorSize); GVector::Multiply(pCenter, .5, m_nInputVectorSize); // Compute the input normal double* pNormal = new double[m_nInputVectorSize]; memcpy(pNormal, pInputMean1, sizeof(double) * m_nInputVectorSize); GVector::Subtract(pNormal, pInputMean2, m_nInputVectorSize); if(GVector::ComputeSquaredMagnitude(pNormal, m_nInputVectorSize) == 0) GVector::GetRandomVector(pNormal, m_nInputVectorSize); else GVector::Normalize(pNormal, m_nInputVectorSize); // Make the interior node GPCTreeInteriorNode* pNode = new GPCTreeInteriorNode(pCenter, pNormal); // Divide the data GArffData other(pData->GetSize()); for(i = 0; i < pData->GetSize(); i++) { pVector = pData->GetVector(i); if(pNode->Test(pVector, m_nInputVectorSize)) { other.AddVector(pData->DropVector(i)); i--; } } // If we couldn't separate anything, just return a leaf node if(other.GetSize() == 0 || pData->GetSize() == 0) { delete(pNode); return new GPCTreeLeafNode(m_pRelation, pOutputMean); } // Build the child nodes pNode->SetLeft(BuildNode(pData, pBuf)); pNode->SetRight(BuildNode(&other, pBuf)); return pNode;}// virtualvoid GPCTree::Reset(){ delete(m_pRoot); m_pRoot = NULL; m_nInputVectorSize = m_pRelation->CountVectorModeInputs(); m_nOutputVectorSize = m_pRelation->CountVectorModeOutputs(); delete[] m_pEvalVector; m_pEvalVector = new double[m_nInputVectorSize];}// ---------------------------------------------------------------GArbitraryTree::GArbitraryTree(GArffRelation* pRelation, bool bAxisAligned) : GSupervisedLearner(pRelation){ m_pRoot = NULL; m_pEvalVector = NULL; m_bAxisAligned = bAxisAligned; Reset();}GArbitraryTree::~GArbitraryTree(){ delete(m_pRoot); delete[] m_pEvalVector;}// virtualvoid GArbitraryTree::Train(GArffData* pData){ delete(m_pRoot); int nCount = pData->GetSize(); GArffData internalData(nCount); int i; double* pVectorIn; double* pVectorOut; for(i = 0; i < nCount; i++) { pVectorIn = pData->GetVector(i); pVectorOut = new double[m_nInputVectorSize + m_nOutputVectorSize]; m_pRelation->InputsToVectorMode(pVectorIn, pVectorOut); m_pRelation->OutputsToVectorMode(pVectorIn, pVectorOut + m_nInputVectorSize); internalData.AddVector(pVectorOut); } m_pRoot = BuildNode(&internalData);}// virtualvoid GArbitraryTree::Eval(double* pVector){ m_pRelation->InputsToVectorMode(pVector, m_pEvalVector); GPCTreeNode* pNode = m_pRoot; while(!pNode->IsLeaf()) { if(((GPCTreeInteriorNode*)pNode)->Test(m_pEvalVector, m_nInputVectorSize)) pNode = ((GPCTreeInteriorNode*)pNode)->GetRight(); else pNode = ((GPCTreeInteriorNode*)pNode)->GetLeft(); } double* pOutputs = ((GPCTreeLeafNode*)pNode)->GetOutputs(); int nOutputs = m_pRelation->GetOutputCount(); int i, index; for(i = 0; i < nOutputs; i++) { index = m_pRelation->GetOutputIndex(i); pVector[index] = pOutputs[i]; }}GPCTreeNode* GArbitraryTree::BuildNode(GArffData* pData){ // Check for a leaf node int nCount = pData->GetSize(); if(nCount < 2) { GAssert(nCount > 0, "no data left"); return new GPCTreeLeafNode(m_pRelation, pData->GetVector(0) + m_nInputVectorSize); } // Make the interior node double* pCenter = new double[m_nInputVectorSize]; double* pNormal = new double[m_nInputVectorSize]; GPCTreeInteriorNode* pNode = new GPCTreeInteriorNode(pCenter, pNormal); GArffData other(pData->GetSize()); // Loop until we find an acceptable division double* pVector; int i, nPatience; for(nPatience = 10; nPatience > 0; nPatience--) { // Pick a random division double* pVec1 = pData->GetVector(rand() % nCount); double* pVec2 = pData->GetVector(rand() % nCount); memcpy(pCenter, pVec1, sizeof(double) * m_nInputVectorSize); GVector::Add(pCenter, pVec2, m_nInputVectorSize); GVector::Multiply(pCenter, .5, m_nInputVectorSize); if(m_bAxisAligned) { for(i = 0; i < m_nInputVectorSize; i++) pNormal[i] = 0; pNormal[rand() % m_nInputVectorSize] = 1; } else GVector::GetRandomVector(pNormal, m_nInputVectorSize); // Divide the data for(i = 0; i < pData->GetSize(); i++) { pVector = pData->GetVector(i); if(pNode->Test(pVector, m_nInputVectorSize)) { other.AddVector(pData->DropVector(i)); i--; } } // See if this division is acceptable if(other.GetSize() > 0 && pData->GetSize() > 0) break; // Merge the data back together pData->Merge(&other); GAssert(pData->GetSize() == nCount, "something got lost"); } // Create the node if(nPatience > 0) { // Build the child nodes pNode->SetLeft(BuildNode(pData)); pNode->SetRight(BuildNode(&other)); return pNode; } else { // Pick a random sample for the output delete(pNode); double* pVec = pData->GetVector(rand() % nCount); return new GPCTreeLeafNode(m_pRelation, pVec + m_nInputVectorSize); }}// virtualvoid GArbitraryTree::Reset(){ delete(m_pRoot); m_pRoot = NULL; m_nInputVectorSize = m_pRelation->CountVectorModeInputs(); m_nOutputVectorSize = m_pRelation->CountVectorModeOutputs(); delete[] m_pEvalVector; m_pEvalVector = new double[m_nInputVectorSize];}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?