predacc.cpp

来自「一个由Mike Gashler完成的机器学习方面的includes neural」· C++ 代码 · 共 752 行 · 第 1/2 页

CPP
752
字号
}

void PredAccView::SetTrainingSet(GArffRelation* pRelation, GArffData* pTrainingSet)
{
	m_pDialog->SetTrainingSet(pRelation, pTrainingSet);
}

void PredAccView::SetTestSet(GArffRelation* pRelation, GArffData* pTestSet)
{
	m_pDialog->SetTestSet(pRelation, pTestSet);
}





// -------------------------------------------------------------------------------



PredAccController::PredAccController()
: ControllerBase()
{
	m_pPredAccView = new PredAccView(this);
	m_pView = m_pPredAccView;

	m_pRelation = NULL;
	m_pTrainingSet = NULL;
	m_pTestSet = NULL;

	m_pLearner = NULL;
}

PredAccController::~PredAccController()
{
	delete(m_pLearner);
	delete(m_pRelation);
	delete(m_pTrainingSet);
	delete(m_pTestSet);
	delete(m_pPredAccView);
}

void PredAccController::RunModal()
{
	double timeOld = GTime::GetTime();
	double time;
	m_pView->Update();
	while(m_bKeepRunning)
	{
		time = GTime::GetTime();
		if(HandleEvents(time - timeOld)) // HandleEvents returns true if it thinks the view needs to be updated
		{
			m_pView->Update();
		}
		else
			GThread::sleep(10);
		timeOld = time;
	}
}

void PredAccController::LoadTrainingSet(const char* szFilename)
{
	delete(m_pTrainingSet);
	m_pTrainingSet = NULL;
	GArffRelation* pRelation;
	GArffRelation::LoadArffFile(&pRelation, &m_pTrainingSet, szFilename);
	pRelation->GetAttribute(pRelation->GetAttributeCount() - 1)->SetIsInput(false);
	if(m_pRelation)
	{
		if(pRelation->GetAttributeCount() != m_pRelation->GetAttributeCount())
			throw "mismatch relations";
		delete(pRelation);
	}
	else
		m_pRelation = pRelation;
	m_pPredAccView->SetTrainingSet(m_pRelation, m_pTrainingSet);
}

void PredAccController::LoadTestSet(const char* szFilename)
{
	delete(m_pTestSet);
	m_pTestSet = NULL;
	GArffRelation* pRelation;
	GArffRelation::LoadArffFile(&pRelation, &m_pTestSet, szFilename);
	pRelation->GetAttribute(pRelation->GetAttributeCount() - 1)->SetIsInput(false);
	if(m_pRelation)
	{
		if(pRelation->GetAttributeCount() != m_pRelation->GetAttributeCount())
			throw "mismatch relations";
		delete(pRelation);
	}
	else
		m_pRelation = pRelation;
	m_pPredAccView->SetTestSet(m_pRelation, m_pTestSet);
}

void PredAccController::LoadAndSplitTrainingSet(const char* szFilename, double dTestPercent)
{
	delete(m_pTrainingSet);
	m_pTrainingSet = NULL;
	delete(m_pTestSet);
	m_pTestSet = NULL;
	GArffRelation::LoadArffFile(&m_pRelation, &m_pTrainingSet, szFilename);
	m_pRelation->GetAttribute(m_pRelation->GetAttributeCount() - 1)->SetIsInput(false);
	m_pTrainingSet->Shuffle();
	m_pTestSet = m_pTrainingSet->SplitBySize((int)(dTestPercent * m_pTrainingSet->GetSize() / 100));
	m_pPredAccView->SetTrainingSet(m_pRelation, m_pTrainingSet);
	m_pPredAccView->SetTestSet(m_pRelation, m_pTestSet);
}

void PredAccController::ShuffleTrainingSet()
{
	if(m_pTrainingSet)
		m_pTrainingSet->Shuffle();
	m_pPredAccView->SetTrainingSet(m_pRelation, m_pTrainingSet);
}

void PredAccController::ShuffleTestSet()
{
	if(m_pTestSet)
		m_pTestSet->Shuffle();
	m_pPredAccView->SetTestSet(m_pRelation, m_pTestSet);
}

void PredAccController::TrainAndTestSingleSet(int nAlgorithm)
{
	GAssert(m_pTrainingSet, "no training set loaded");
	Train(nAlgorithm, m_pRelation, m_pTrainingSet);
	Test(nAlgorithm, m_pTrainingSet);
}

void PredAccController::TrainAndTest(int nAlgorithm)
{
	GAssert(m_pTrainingSet, "no training set loaded");
	GAssert(m_pTestSet, "no test set loaded");
	Train(nAlgorithm, m_pRelation, m_pTrainingSet);
	Test(nAlgorithm, m_pTestSet);
}

void PredAccController::DoNFoldCrossValidation(int nAlgorithm, int nParts)
{
	// Make the last attribute an output attribute--todo: design a better way
	m_pRelation->GetAttribute(m_pRelation->GetAttributeCount() - 1)->SetIsInput(false);
//	m_pRelation->GetAttribute(0)->SetIsInput(false);

	// Determine if it's a regression or classification problem
	bool bRegression = true;
	int i;
	for(i = 0; i < m_pRelation->GetOutputCount(); i++)
	{
		if(!m_pRelation->GetAttribute(m_pRelation->GetOutputIndex(i))->IsContinuous())
		{
			bRegression = false;
			break;
		}
	}

	// Split the data into parts
	GArffData** pSets = (GArffData**)alloca(sizeof(GArffData*) * nParts);
	int nSize = m_pTrainingSet->GetSize() / nParts + nParts;
	int n, j;
	for(n = 0; n < nParts; n++)
		pSets[n] = new GArffData(nSize);
	int nRowCount = m_pTrainingSet->GetSize();
	double* pRow;
	for(n = 0; n < nRowCount; n++)
	{
		pRow = m_pTrainingSet->GetVector(n);
		pSets[n % nParts]->AddVector(pRow);
	}

	// Do the training and testing
	double d;
	double dScore = 0;
	int nCorrect = 0;
	for(n = 0; n < nParts; n++)
	{
		// Merge all sets but one
		GArffData* pTrainer = new GArffData(m_pTrainingSet->GetSize());
		for(i = 0; i < nParts; i++)
		{
			if(i == n)
				continue;
			int nCount = pSets[i]->GetSize();
			for(j = 0; j < nCount; j++)
			{
				pRow = pSets[i]->GetVector(j);
				pTrainer->AddVector(pRow);
			}
		}

		// Make the learner ant train it
		GSupervisedLearner* pLearner = MakeLearner(nAlgorithm, m_pRelation);
		pLearner->Train(pTrainer);

		// Test it
		if(bRegression)
			d = pLearner->MeasureMeanSquaredError(pSets[n]);
		else
			d = pLearner->MeasurePredictiveAccuracy(pSets[n]);
		printf("Cross Validation Set %d/%d = %f\n", n, nParts, d);
		dScore += d;

		// Clean up
		delete(pLearner);
		pTrainer->DropAllVectors();
		delete(pTrainer);
	}
	dScore /= nParts;

	// Show results
	printf("\n\nFinal Cross Validation Results...\n");
	if(bRegression)
		printf("Average Mean Squared Error: %f\n", dScore);
	else
		printf("Average Predictive Accuracy: %f\n", dScore);

	// Clean up
	for(n = 0; n < nParts; n++)
	{
		pSets[n]->DropAllVectors();
		delete(pSets[n]);
	}
}

GSupervisedLearner* PredAccController::MakeLearner(int nAlgorithm, GArffRelation* pRelation)
{
	if(nAlgorithm == 0)
	{
		printf("Decision Tree...\n");
		GDecisionTree* pDecisionTree = new GDecisionTree(pRelation, GDecisionTree::MINIMIZE_ENTROPY);
		return pDecisionTree;
	}
	else if(nAlgorithm == 1)
	{
		printf("Neural Net (o-8-i)...\n");
		GNeuralNet* pNN = new GNeuralNet(pRelation);
		pNN->AddLayer(8);
		pNN->SetRunEpochs(400);
		pNN->SetMaximumEpochs(2000);
		return pNN;
	}
	else if(nAlgorithm == 2)
	{
		printf("Neural Net (o-4-4-i)...\n");
		GNeuralNet* pNN = new GNeuralNet(pRelation);
		pNN->AddLayer(4);
		pNN->AddLayer(4);
		pNN->SetRunEpochs(1000);
		pNN->SetMaximumEpochs(5000);
		return pNN;
	}
	else if(nAlgorithm == 3)
	{
		printf("Neural Net (o-10-10-i)...\n");
		GNeuralNet* pNN = new GNeuralNet(pRelation);
		pNN->AddLayer(10);
		pNN->AddLayer(10);
		pNN->SetRunEpochs(3000);
		pNN->SetMaximumEpochs(15000);
		return pNN;
	}
	else if(nAlgorithm == 4)
	{
		printf("Naive Bayes...\n");
		GNaiveBayes* pNaiveBayes = new GNaiveBayes(pRelation);
		return pNaiveBayes;
	}
	else if(nAlgorithm == 5)
	{
		printf("K-Nearest Neighbor...\n");
		GKNN* pKNN = new GKNN(pRelation, 2, true);
		return pKNN;
	}
	else if(nAlgorithm == 6)
	{
		printf("K-Nearest Neighbor...\n");
		GKNN* pKNN = new GKNN(pRelation, 5, true);
		return pKNN;
	}
	else if(nAlgorithm == 7)
	{
		printf("K-Nearest Neighbor...\n");
		GKNN* pKNN = new GKNN(pRelation, 13, true);
		return pKNN;
	}
	else if(nAlgorithm == 8)
	{
		printf("Axis Aligned Forest (100 trees)...\n");
		GBag* pBag = new GBag(pRelation, 100);
		int i;
		for(i = 0; i < 100; i++)
			pBag->AddLearner(new GArbitraryTree(pRelation, true));
		return pBag;
	}
	else if(nAlgorithm == 9)
	{
		printf("Arbitrary Arboretum (100 trees)...\n");
		GBag* pBag = new GBag(pRelation, 100);
		int i;
		for(i = 0; i < 100; i++)
			pBag->AddLearner(new GArbitraryTree(pRelation, false));
		return pBag;
	}
	else if(nAlgorithm == 10)
	{
		printf("PC Forest (100 trees)...\n");
		GBag* pBag = new GBag(pRelation, 100);
		int i;
		for(i = 0; i < 100; i++)
			pBag->AddLearner(new GPCTree(pRelation));
		return pBag;
	}
	else if(nAlgorithm == 11)
	{
		printf("Pumped Neural Net...\n");
		GManifoldPumper* pPumper = new GManifoldPumper(pRelation, 1, 6, 18);
		GNeuralNet* pNN = new GNeuralNet(pPumper->GetRelation());
		pNN->AddLayer(8);
		pNN->SetRunEpochs(1000);
		pNN->SetMaximumEpochs(5000);
		pPumper->SetLearner(pNN, true);
		return pPumper;
	}
	else if(nAlgorithm == 12)
	{
		printf("Pumped KNN...\n");
		GManifoldPumper* pPumper = new GManifoldPumper(pRelation, 1, 6, 18);
		GKNN* pKNN = new GKNN(pPumper->GetRelation(), 5, true);
		pPumper->SetLearner(pKNN, true);
		return pPumper;
	}
	else
	{
		GAssert(false, "unexpected algorithm");
		return NULL;
	}
}

void PredAccController::Train(int nAlgorithm, GArffRelation* pRelation, GArffData* pTrainingSet)
{
	// Make the last attribute an output attribute--todo: design a better way
	pRelation->GetAttribute(pRelation->GetAttributeCount() - 1)->SetIsInput(false);
//	pRelation->GetAttribute(0)->SetIsInput(false);

	// Make the learner
	m_pLearner = MakeLearner(nAlgorithm, pRelation);

	// Train it
	printf("Dataset name: %s\n", pRelation->GetName());
	printf("Training set size: %d\n", pTrainingSet->GetSize());
	printf("Training...\n");
	double dTimeStart = GTime::GetTime();
	m_pLearner->Train(pTrainingSet);
	printf("training time=%lf seconds\n", GTime::GetTime() - dTimeStart);
}

void PredAccController::Test(int nAlgorithm, GArffData* pTestSet)
{
	printf("\nTesting...\n");
	bool bGotDiscrete = false;
	bool bGotContinuous = false;
	int i;
	for(i = 0; i < m_pRelation->GetOutputCount(); i++)
	{
		if(m_pRelation->GetAttribute(m_pRelation->GetOutputIndex(i))->IsContinuous())
			bGotContinuous = true;
		else
			bGotDiscrete = true;
	}
	if(bGotDiscrete)
		printf("Predictive Accuracy = %f\n", m_pLearner->MeasurePredictiveAccuracy(pTestSet));
	if(bGotContinuous)
		printf("Mean Squared Error = %f\n", m_pLearner->MeasureMeanSquaredError(pTestSet));
}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?