glearner.cpp
来自「一个由Mike Gashler完成的机器学习方面的includes neural」· C++ 代码 · 共 276 行
CPP
276 行
/* Copyright (C) 2006, Mike Gashler This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. see http://www.gnu.org/copyleft/lesser.html*/#include "GLearner.h"#include "GArff.h"#include <stdlib.h>#include <string.h>#include "GMacros.h"GSupervisedLearner::GSupervisedLearner(GArffRelation* pRelation){ m_pRelation = pRelation;}GSupervisedLearner::~GSupervisedLearner(){}double GSupervisedLearner::MeasurePredictiveAccuracy(GArffData* pData){ int nInputCount = m_pRelation->GetInputCount(); int nOutputCount = m_pRelation->GetOutputCount(); int nAttributes = m_pRelation->GetAttributeCount(); int nIndex; GTEMPBUF(double, pVector, nAttributes); double* pRow; int nRowCount = pData->GetSize(); double dCorrectCount = 0; double d; int nTotalCount = 0; int n, i; for(n = 0; n < nRowCount; n++) { pRow = pData->GetVector(n); // Copy the input values into the vector for(i = 0; i < nInputCount; i++) { nIndex = m_pRelation->GetInputIndex(i); pVector[nIndex] = pRow[nIndex]; } // Mess up the output values just to be safe for(i = 0; i < nOutputCount; i++) { nIndex = m_pRelation->GetOutputIndex(i); pVector[nIndex] = 1e100; } // Evaluate Eval(pVector); // Check the answer for(i = 0; i < nOutputCount; i++) { nIndex = m_pRelation->GetOutputIndex(i); if(m_pRelation->GetAttribute(nIndex)->IsContinuous()) { // Predictive accuracy doesn't really make sense for real values, // so we'll just use a squashed squared error for an estimate d = pRow[nIndex] - pVector[nIndex]; dCorrectCount += (1.0 - (1.0 / (1.0 + (d * d)))); } else { if((int)pVector[nIndex] == (int)pRow[nIndex]) dCorrectCount++; } nTotalCount++; } } return dCorrectCount / nTotalCount;}double GSupervisedLearner::MeasureMeanSquaredError(GArffData* pData){ int nInputCount = m_pRelation->GetInputCount(); int nOutputCount = m_pRelation->GetOutputCount(); int nAttributes = m_pRelation->GetAttributeCount(); int nIndex; GTEMPBUF(double, pVector, nAttributes); double* pRow; int nRowCount = pData->GetSize(); double dError = 0; double d; int n, i; for(n = 0; n < nRowCount; n++) { pRow = pData->GetVector(n); // Copy the input values into the sample for(i = 0; i < nInputCount; i++) { nIndex = m_pRelation->GetInputIndex(i); pVector[nIndex] = pRow[nIndex]; } // Mess up the output values just to be safe for(i = 0; i < nOutputCount; i++) { nIndex = m_pRelation->GetOutputIndex(i); pVector[nIndex] = 1e100; } // Evaluate Eval(pVector); // Check the answer for(i = 0; i < nOutputCount; i++) { nIndex = m_pRelation->GetOutputIndex(i); if(m_pRelation->GetAttribute(nIndex)->IsContinuous()) { d = pRow[nIndex] - pVector[nIndex]; dError += (d * d); } else { // Squared error doesn't really make sense for discreet // values, so we'll just say an incorrect classification // corresponds to an error of 1. if((int)pVector[nIndex] != (int)pRow[nIndex]) dError += 1; } } } return dError / (nRowCount * nOutputCount);}double GSupervisedLearner::CrossValidate(GArffData* pData, int nFolds, bool bRegression){ // Split the data into parts GArffData** pSets = (GArffData**)alloca(sizeof(GArffData*) * nFolds); int nSize = pData->GetSize() / nFolds + nFolds; int n, i, j; for(n = 0; n < nFolds; n++) pSets[n] = new GArffData(nSize); int nRowCount = pData->GetSize(); double* pRow; for(n = 0; n < nRowCount; n++) { pRow = pData->GetVector(n); pSets[n % nFolds]->AddVector(pRow); } // Do the training and testing double d; double dScore = 0; for(n = 0; n < nFolds; n++) { // Merge all sets but one GArffData* pTrainer = new GArffData(pData->GetSize()); for(i = 0; i < nFolds; i++) { if(i == n) continue; int nCount = pSets[i]->GetSize(); for(j = 0; j < nCount; j++) { pRow = pSets[i]->GetVector(j); pTrainer->AddVector(pRow); } } // Train the learner Reset(); Train(pTrainer); // Test it if(bRegression) d = MeasureMeanSquaredError(pSets[n]); else d = MeasurePredictiveAccuracy(pSets[n]); dScore += d; // Clean up pTrainer->DropAllVectors(); delete(pTrainer); } dScore /= nFolds; // Clean up for(n = 0; n < nFolds; n++) { pSets[n]->DropAllVectors(); delete(pSets[n]); } return dScore;}// ---------------------------------------------------------------GBaselineLearner::GBaselineLearner(GArffRelation* pRelation) : GSupervisedLearner(pRelation){ int nOutputs = pRelation->GetOutputCount(); m_pOutputs = new double[nOutputs];}// virtualGBaselineLearner::~GBaselineLearner(){ delete[] m_pOutputs;}// virtualvoid GBaselineLearner::Reset(){}// virtualvoid GBaselineLearner::Train(GArffData* pData){ int nOutputs = m_pRelation->GetOutputCount(); int i, j, index, val; int nCount = pData->GetSize(); int nMaxValues = 0; for(i = 0; i < nOutputs; i++) nMaxValues = MAX(nMaxValues, m_pRelation->GetAttribute(m_pRelation->GetOutputIndex(i))->GetValueCount()); GTEMPBUF(int, counts, nMaxValues); double dSum; GArffAttribute* pAttr; double* pVector; for(i = 0; i < nOutputs; i++) { index = m_pRelation->GetOutputIndex(i); pAttr = m_pRelation->GetAttribute(index); if(pAttr->IsContinuous()) { dSum = 0; for(j = 0; j < nCount; j++) { pVector = pData->GetVector(j); dSum += pVector[index]; } m_pOutputs[i] = dSum / nCount; } else { memset(counts, '\0', sizeof(int) * pAttr->GetValueCount()); for(j = 0; j < nCount; j++) { pVector = pData->GetVector(j); val = (int)pVector[index]; if(val >= 0) counts[val]++; } val = 0; for(j = 1; j < pAttr->GetValueCount(); j++) { if(counts[j] > counts[val]) val = j; } m_pOutputs[i] = (double)val; } }}// virtualvoid GBaselineLearner::Eval(double* pVector){ int nOutputs = m_pRelation->GetOutputCount(); int i; for(i = 0; i < nOutputs; i++) pVector[m_pRelation->GetOutputIndex(i)] = m_pOutputs[i];}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?