predacc.cpp
来自「一个由Mike Gashler完成的机器学习方面的includes neural」· C++ 代码 · 共 752 行 · 第 1/2 页
CPP
752 行
}
void PredAccView::SetTrainingSet(GArffRelation* pRelation, GArffData* pTrainingSet)
{
m_pDialog->SetTrainingSet(pRelation, pTrainingSet);
}
void PredAccView::SetTestSet(GArffRelation* pRelation, GArffData* pTestSet)
{
m_pDialog->SetTestSet(pRelation, pTestSet);
}
// -------------------------------------------------------------------------------
PredAccController::PredAccController()
: ControllerBase()
{
m_pPredAccView = new PredAccView(this);
m_pView = m_pPredAccView;
m_pRelation = NULL;
m_pTrainingSet = NULL;
m_pTestSet = NULL;
m_pLearner = NULL;
}
PredAccController::~PredAccController()
{
delete(m_pLearner);
delete(m_pRelation);
delete(m_pTrainingSet);
delete(m_pTestSet);
delete(m_pPredAccView);
}
void PredAccController::RunModal()
{
double timeOld = GTime::GetTime();
double time;
m_pView->Update();
while(m_bKeepRunning)
{
time = GTime::GetTime();
if(HandleEvents(time - timeOld)) // HandleEvents returns true if it thinks the view needs to be updated
{
m_pView->Update();
}
else
GThread::sleep(10);
timeOld = time;
}
}
void PredAccController::LoadTrainingSet(const char* szFilename)
{
delete(m_pTrainingSet);
m_pTrainingSet = NULL;
GArffRelation* pRelation;
GArffRelation::LoadArffFile(&pRelation, &m_pTrainingSet, szFilename);
pRelation->GetAttribute(pRelation->GetAttributeCount() - 1)->SetIsInput(false);
if(m_pRelation)
{
if(pRelation->GetAttributeCount() != m_pRelation->GetAttributeCount())
throw "mismatch relations";
delete(pRelation);
}
else
m_pRelation = pRelation;
m_pPredAccView->SetTrainingSet(m_pRelation, m_pTrainingSet);
}
void PredAccController::LoadTestSet(const char* szFilename)
{
delete(m_pTestSet);
m_pTestSet = NULL;
GArffRelation* pRelation;
GArffRelation::LoadArffFile(&pRelation, &m_pTestSet, szFilename);
pRelation->GetAttribute(pRelation->GetAttributeCount() - 1)->SetIsInput(false);
if(m_pRelation)
{
if(pRelation->GetAttributeCount() != m_pRelation->GetAttributeCount())
throw "mismatch relations";
delete(pRelation);
}
else
m_pRelation = pRelation;
m_pPredAccView->SetTestSet(m_pRelation, m_pTestSet);
}
void PredAccController::LoadAndSplitTrainingSet(const char* szFilename, double dTestPercent)
{
delete(m_pTrainingSet);
m_pTrainingSet = NULL;
delete(m_pTestSet);
m_pTestSet = NULL;
GArffRelation::LoadArffFile(&m_pRelation, &m_pTrainingSet, szFilename);
m_pRelation->GetAttribute(m_pRelation->GetAttributeCount() - 1)->SetIsInput(false);
m_pTrainingSet->Shuffle();
m_pTestSet = m_pTrainingSet->SplitBySize((int)(dTestPercent * m_pTrainingSet->GetSize() / 100));
m_pPredAccView->SetTrainingSet(m_pRelation, m_pTrainingSet);
m_pPredAccView->SetTestSet(m_pRelation, m_pTestSet);
}
void PredAccController::ShuffleTrainingSet()
{
if(m_pTrainingSet)
m_pTrainingSet->Shuffle();
m_pPredAccView->SetTrainingSet(m_pRelation, m_pTrainingSet);
}
void PredAccController::ShuffleTestSet()
{
if(m_pTestSet)
m_pTestSet->Shuffle();
m_pPredAccView->SetTestSet(m_pRelation, m_pTestSet);
}
void PredAccController::TrainAndTestSingleSet(int nAlgorithm)
{
GAssert(m_pTrainingSet, "no training set loaded");
Train(nAlgorithm, m_pRelation, m_pTrainingSet);
Test(nAlgorithm, m_pTrainingSet);
}
void PredAccController::TrainAndTest(int nAlgorithm)
{
GAssert(m_pTrainingSet, "no training set loaded");
GAssert(m_pTestSet, "no test set loaded");
Train(nAlgorithm, m_pRelation, m_pTrainingSet);
Test(nAlgorithm, m_pTestSet);
}
void PredAccController::DoNFoldCrossValidation(int nAlgorithm, int nParts)
{
// Make the last attribute an output attribute--todo: design a better way
m_pRelation->GetAttribute(m_pRelation->GetAttributeCount() - 1)->SetIsInput(false);
// m_pRelation->GetAttribute(0)->SetIsInput(false);
// Determine if it's a regression or classification problem
bool bRegression = true;
int i;
for(i = 0; i < m_pRelation->GetOutputCount(); i++)
{
if(!m_pRelation->GetAttribute(m_pRelation->GetOutputIndex(i))->IsContinuous())
{
bRegression = false;
break;
}
}
// Split the data into parts
GArffData** pSets = (GArffData**)alloca(sizeof(GArffData*) * nParts);
int nSize = m_pTrainingSet->GetSize() / nParts + nParts;
int n, j;
for(n = 0; n < nParts; n++)
pSets[n] = new GArffData(nSize);
int nRowCount = m_pTrainingSet->GetSize();
double* pRow;
for(n = 0; n < nRowCount; n++)
{
pRow = m_pTrainingSet->GetVector(n);
pSets[n % nParts]->AddVector(pRow);
}
// Do the training and testing
double d;
double dScore = 0;
int nCorrect = 0;
for(n = 0; n < nParts; n++)
{
// Merge all sets but one
GArffData* pTrainer = new GArffData(m_pTrainingSet->GetSize());
for(i = 0; i < nParts; i++)
{
if(i == n)
continue;
int nCount = pSets[i]->GetSize();
for(j = 0; j < nCount; j++)
{
pRow = pSets[i]->GetVector(j);
pTrainer->AddVector(pRow);
}
}
// Make the learner ant train it
GSupervisedLearner* pLearner = MakeLearner(nAlgorithm, m_pRelation);
pLearner->Train(pTrainer);
// Test it
if(bRegression)
d = pLearner->MeasureMeanSquaredError(pSets[n]);
else
d = pLearner->MeasurePredictiveAccuracy(pSets[n]);
printf("Cross Validation Set %d/%d = %f\n", n, nParts, d);
dScore += d;
// Clean up
delete(pLearner);
pTrainer->DropAllVectors();
delete(pTrainer);
}
dScore /= nParts;
// Show results
printf("\n\nFinal Cross Validation Results...\n");
if(bRegression)
printf("Average Mean Squared Error: %f\n", dScore);
else
printf("Average Predictive Accuracy: %f\n", dScore);
// Clean up
for(n = 0; n < nParts; n++)
{
pSets[n]->DropAllVectors();
delete(pSets[n]);
}
}
GSupervisedLearner* PredAccController::MakeLearner(int nAlgorithm, GArffRelation* pRelation)
{
if(nAlgorithm == 0)
{
printf("Decision Tree...\n");
GDecisionTree* pDecisionTree = new GDecisionTree(pRelation, GDecisionTree::MINIMIZE_ENTROPY);
return pDecisionTree;
}
else if(nAlgorithm == 1)
{
printf("Neural Net (o-8-i)...\n");
GNeuralNet* pNN = new GNeuralNet(pRelation);
pNN->AddLayer(8);
pNN->SetRunEpochs(400);
pNN->SetMaximumEpochs(2000);
return pNN;
}
else if(nAlgorithm == 2)
{
printf("Neural Net (o-4-4-i)...\n");
GNeuralNet* pNN = new GNeuralNet(pRelation);
pNN->AddLayer(4);
pNN->AddLayer(4);
pNN->SetRunEpochs(1000);
pNN->SetMaximumEpochs(5000);
return pNN;
}
else if(nAlgorithm == 3)
{
printf("Neural Net (o-10-10-i)...\n");
GNeuralNet* pNN = new GNeuralNet(pRelation);
pNN->AddLayer(10);
pNN->AddLayer(10);
pNN->SetRunEpochs(3000);
pNN->SetMaximumEpochs(15000);
return pNN;
}
else if(nAlgorithm == 4)
{
printf("Naive Bayes...\n");
GNaiveBayes* pNaiveBayes = new GNaiveBayes(pRelation);
return pNaiveBayes;
}
else if(nAlgorithm == 5)
{
printf("K-Nearest Neighbor...\n");
GKNN* pKNN = new GKNN(pRelation, 2, true);
return pKNN;
}
else if(nAlgorithm == 6)
{
printf("K-Nearest Neighbor...\n");
GKNN* pKNN = new GKNN(pRelation, 5, true);
return pKNN;
}
else if(nAlgorithm == 7)
{
printf("K-Nearest Neighbor...\n");
GKNN* pKNN = new GKNN(pRelation, 13, true);
return pKNN;
}
else if(nAlgorithm == 8)
{
printf("Axis Aligned Forest (100 trees)...\n");
GBag* pBag = new GBag(pRelation, 100);
int i;
for(i = 0; i < 100; i++)
pBag->AddLearner(new GArbitraryTree(pRelation, true));
return pBag;
}
else if(nAlgorithm == 9)
{
printf("Arbitrary Arboretum (100 trees)...\n");
GBag* pBag = new GBag(pRelation, 100);
int i;
for(i = 0; i < 100; i++)
pBag->AddLearner(new GArbitraryTree(pRelation, false));
return pBag;
}
else if(nAlgorithm == 10)
{
printf("PC Forest (100 trees)...\n");
GBag* pBag = new GBag(pRelation, 100);
int i;
for(i = 0; i < 100; i++)
pBag->AddLearner(new GPCTree(pRelation));
return pBag;
}
else if(nAlgorithm == 11)
{
printf("Pumped Neural Net...\n");
GManifoldPumper* pPumper = new GManifoldPumper(pRelation, 1, 6, 18);
GNeuralNet* pNN = new GNeuralNet(pPumper->GetRelation());
pNN->AddLayer(8);
pNN->SetRunEpochs(1000);
pNN->SetMaximumEpochs(5000);
pPumper->SetLearner(pNN, true);
return pPumper;
}
else if(nAlgorithm == 12)
{
printf("Pumped KNN...\n");
GManifoldPumper* pPumper = new GManifoldPumper(pRelation, 1, 6, 18);
GKNN* pKNN = new GKNN(pPumper->GetRelation(), 5, true);
pPumper->SetLearner(pKNN, true);
return pPumper;
}
else
{
GAssert(false, "unexpected algorithm");
return NULL;
}
}
void PredAccController::Train(int nAlgorithm, GArffRelation* pRelation, GArffData* pTrainingSet)
{
// Make the last attribute an output attribute--todo: design a better way
pRelation->GetAttribute(pRelation->GetAttributeCount() - 1)->SetIsInput(false);
// pRelation->GetAttribute(0)->SetIsInput(false);
// Make the learner
m_pLearner = MakeLearner(nAlgorithm, pRelation);
// Train it
printf("Dataset name: %s\n", pRelation->GetName());
printf("Training set size: %d\n", pTrainingSet->GetSize());
printf("Training...\n");
double dTimeStart = GTime::GetTime();
m_pLearner->Train(pTrainingSet);
printf("training time=%lf seconds\n", GTime::GetTime() - dTimeStart);
}
void PredAccController::Test(int nAlgorithm, GArffData* pTestSet)
{
printf("\nTesting...\n");
bool bGotDiscrete = false;
bool bGotContinuous = false;
int i;
for(i = 0; i < m_pRelation->GetOutputCount(); i++)
{
if(m_pRelation->GetAttribute(m_pRelation->GetOutputIndex(i))->IsContinuous())
bGotContinuous = true;
else
bGotDiscrete = true;
}
if(bGotDiscrete)
printf("Predictive Accuracy = %f\n", m_pLearner->MeasurePredictiveAccuracy(pTestSet));
if(bGotContinuous)
printf("Mean Squared Error = %f\n", m_pLearner->MeasureMeanSquaredError(pTestSet));
}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?