📄 gmanifold.cpp

📁 一个非常有用的开源代码
💻 CPP
📖 第 1 页 / 共 2 页
字号:
上一页 12
		}	}	return pDataOut;}// --------------------------------------------------------------------------GPCA::GPCA(GArffRelation* pRelation, GArffData* pData){	m_pRelation = pRelation;	m_pInputData = pData;	m_pOutputData = NULL;}GPCA::~GPCA(){	delete(m_pOutputData);}/*static*/ GArffData* GPCA::DoPCA(GArffRelation* pRelation, GArffData* pData, GVector* pOutEigenValues){	GPCA pca(pRelation, pData);	pca.DoPCA(pOutEigenValues);	return pca.DropOutputData();}void GPCA::DoPCA(GVector* pOutEigenValues){	// Compute the eigenvectors	GMatrix m;	m_pInputData->ComputeCovarianceMatrix(&m, m_pRelation);	GMatrix eigenVectors;	m.ComputeEigenVectors(pOutEigenValues, &eigenVectors);	m_pOutputData = new GArffData(m_pInputData->GetSize());	int nRowCount = m_pInputData->GetSize();	int nInputCount = m_pRelation->GetInputCount();	int nOutputCount = m_pRelation->GetOutputCount();	int nAttributeCount = m_pRelation->GetAttributeCount();	double* pInputRow;	double* pOutputRow;	int n, i, j, nIndex;	// Allocate space for the output	for(n = 0; n < nRowCount; n++)	{		pOutputRow = new double[nAttributeCount];		m_pOutputData->AddVector(pOutputRow);	}	// Compute the output	Holder<double*> hEigenVector(new double[nInputCount]);	double* pEigenVector = hEigenVector.Get();	Holder<double*> hInputVector(new double[nInputCount]);	double* pInputVector = hInputVector.Get();	for(i = 0; i < nInputCount; i++)	{		nIndex = m_pRelation->GetInputIndex(i);		for(n = 0; n < nInputCount; n++)			pEigenVector[n] = eigenVectors.Get(i, n);		for(n = 0; n < nRowCount; n++)		{			pInputRow = m_pInputData->GetVector(n);			for(j = 0; j < nInputCount; j++)				pInputVector[j] = pInputRow[m_pRelation->GetInputIndex(j)];			pOutputRow = m_pOutputData->GetVector(n);			pOutputRow[nIndex] = GVector::ComputeDotProduct(pInputVector, pEigenVector, nInputCount);		}	}	for(i = 0; i < nOutputCount; i++)	{		for(n = 0; n < nRowCount; n++)		{			nIndex = m_pRelation->GetOutputIndex(i);			pInputRow = m_pInputData->GetVector(n);			pOutputRow = m_pOutputData->GetVector(n);			pOutputRow[nIndex] = pInputRow[nIndex];		}		}}GArffData* GPCA::DropOutputData(){	GArffData* pData = m_pOutputData;	m_pOutputData = NULL;	return pData;}// --------------------------------------------------------------------------GLLE::GLLE(GArffRelation* pRelation, GArffData* pData, int nNeighbors){	m_pRelation = pRelation;	m_pInputData = pData;	m_nNeighbors = nNeighbors;	m_pNeighbors = NULL;	m_pWeights = NULL;	m_pOutputData = NULL;}GLLE::~GLLE(){	delete(m_pNeighbors);	delete(m_pWeights);	delete(m_pOutputData);}GArffData* GLLE::DropOutputData(){	GArffData* pData = m_pOutputData;	m_pOutputData = NULL;	return pData;}/*static*/ GArffData* GLLE::DoLLE(GArffRelation* pRelation, GArffData* pData, int nNeighbors){	GLLE lle(pRelation, pData, nNeighbors);	lle.FindNeighbors();	lle.ComputeWeights();	lle.ComputeEmbedding();	return lle.DropOutputData();}void GLLE::FindNeighbors(){	// Allocate space for the neighbors	delete(m_pNeighbors);	m_pNeighbors = new int[m_nNeighbors * m_pInputData->GetSize()];	// For now we'll just brute force it--todo: use a better neighbor finding technique	GTEMPBUF(double, pBestDistances, m_nNeighbors);	int nRowCount = m_pInputData->GetSize();	double* pRow1;	double* pRow2;	double d;	int i, j, k, nWorstBestDistance;	for(i = 0; i < nRowCount; i++)	{		// Reset the best distances		for(j = 0; j < m_nNeighbors; j++)			pBestDistances[j] = 1e100;		nWorstBestDistance = 0;		// Check every other point		pRow1 = m_pInputData->GetVector(i);		for(j = 0; j < nRowCount; j++)		{			if(j == i)				continue;			pRow2 = m_pInputData->GetVector(j);			d = m_pRelation->ComputeInputDistanceSquared(pRow1, pRow2);			if(d < pBestDistances[nWorstBestDistance])			{				pBestDistances[nWorstBestDistance] = d;				m_pNeighbors[i * m_nNeighbors + nWorstBestDistance] = j;				// Find new worst of the best distances				nWorstBestDistance = 0;				for(k = 1; k < m_nNeighbors; k++)				{					if(pBestDistances[k] > pBestDistances[nWorstBestDistance])						nWorstBestDistance = k;				}			}		}		GAssert(pBestDistances[nWorstBestDistance] < 1e99, "Failed to find enough neighbors");	}//for(i = 0; i < m_nNeighbors * m_pInputData->GetSize(); i++)//printf("%d\n", m_pNeighbors[i]);}void GLLE::ComputeWeights(){	int nDimensions = m_pRelation->GetInputCount();	int nRowCount = m_pInputData->GetSize();	GTEMPBUF(double, pVec, m_nNeighbors);	m_pWeights = new GMatrix(nRowCount, nRowCount); // todo: this should be a sparse matrix	double* pRowNeighbor;	double* pRow;	double dSum;	int n, i, j, nIndex;	for(n = 0; n < nRowCount; n++)	{		// Create a matrix of all the neighbors normalized around the origin		GMatrix z(m_nNeighbors, nDimensions);		pRow = m_pInputData->GetVector(n);		for(i = 0; i < m_nNeighbors; i++)		{			pRowNeighbor = m_pInputData->GetVector(m_pNeighbors[n * m_nNeighbors + i]);			for(j = 0; j < nDimensions; j++)			{				nIndex = m_pRelation->GetInputIndex(j);				z.Set(i, j, pRowNeighbor[nIndex] - pRow[nIndex]);			}		}		// Compute local covariance (sort of, it's actually relative to the original point, not the mean, so it's not really true covariance)		GMatrix transpose;		transpose.Copy(&z);		transpose.Transpose();		GMatrix covariance;		covariance.Multiply(&z, &transpose);		GAssert(covariance.GetRowCount() == covariance.GetColumnCount(), "expected a square matrix");		GAssert(covariance.GetRowCount() == m_nNeighbors, "unexpected size");		// if the number of neighbors is more than the number of dimensions then the covariance will not be full rank so we need to regularize it		if(m_nNeighbors > nDimensions)		{			double dReg = covariance.ComputeTrace() * .001;			int nRows = covariance.GetRowCount();			for(i = 0; i < nRows; i++)				covariance.Set(i, i, covariance.Get(i, i) + dReg);		}		// Compute the weights		for(i = 0; i < m_nNeighbors; i++)			pVec[i] = 1;		covariance.Solve(pVec);		// Normalize the weights (so they sum to one)		dSum = 0;		for(i = 0; i < m_nNeighbors; i++)			dSum += pVec[i];		for(i = 0; i < nRowCount; i++)			m_pWeights->Set(n, i, 0);		for(i = 0; i < m_nNeighbors; i++)			m_pWeights->Set(n, m_pNeighbors[n * m_nNeighbors + i], pVec[i] / dSum);	}/*	printf("Weights:\n");	m_pWeights->Print();	printf("\n");*/}void GLLE::ComputeEmbedding(){	//  Subtract the weights from the identity matrix	int row, col;	int nRowCount = m_pInputData->GetSize();	for(row = 0; row < nRowCount; row++)	{		for(col = 0; col < nRowCount; col++)			m_pWeights->Set(row, col, -m_pWeights->Get(row, col));	}	for(row = 0; row < nRowCount; row++)		m_pWeights->Set(row, row, m_pWeights->Get(row, row) + 1);	// Compute the cost matrix	GMatrix transposed;	transposed.Copy(m_pWeights);	transposed.Transpose();	GMatrix m;	m.Multiply(&transposed, m_pWeights);//	printf("Cost Matrix:\n");//	m.Print();//	printf("%f\t%f\n%f\t%f\n", m.Get(0, 0), m.Get(0, m.GetColumnCount() - 1), m.Get(m.GetSize() - 1, 0), m.Get(m.GetSize() - 1, m.GetColumnCount()));//	printf("\n");	// Compute eigen vectors	GMatrix mEigenVectors;	GVector vEigenValues;/*printf("Cost Matrix\n");m.PrintCorners(2);*/	m.ComputeEigenVectors(&vEigenValues, &mEigenVectors);//printf("Eigen Vectors\n");//mEigenVectors.PrintCorners(2);//mEigenVectors.Print();/*	// Print the eigen values and corresponding vectors	printf("Eigen values and vectors\n");	for(row = 0; row < nRowCount; row++)	{		printf("%f\n", vEigenValues.Get(row));		//for(col = 0; col < nRowCount; col++)		//	printf("\t%f\n", mEigenVectors.Get(row, col));	}*/	// Allocate space for the output data	m_pOutputData = new GArffData(nRowCount);	int nAttributes = m_pRelation->GetAttributeCount();	int nInputs = m_pRelation->GetInputCount();	for(row = 0; row < nRowCount; row++)		m_pOutputData->AddVector(new double[nAttributes]);	// Compute the transformed data by dividing the eigen vectors by the square root of the eigen values	double d;	int nColumns = nInputs;	//GAssert(nColumns < nRowCount, "Not enough data to compute values");	int nEigen = 1;//nRowCount - 2;	double* pInputRow;	double* pOutputRow;	int nIndex;	d = sqrt((double)nRowCount);	for(col = 0; col < nColumns; col++)	{/*		if(vEigenValues.Get(nEigen) >= 0)			d = 1 / sqrt(vEigenValues.Get(nEigen));		else			d = 1 / (-sqrt(-vEigenValues.Get(nEigen)));*/		nIndex = m_pRelation->GetInputIndex(col);		for(row = 0; row < nRowCount; row++)		{			pOutputRow = m_pOutputData->GetVector(row);			pOutputRow[nIndex] = mEigenVectors.Get(nEigen, row) * d;		}		nEigen++;//--;	}	int nOutputs = m_pRelation->GetOutputCount();	for(col = 0; col < nOutputs; col++)	{		nIndex = m_pRelation->GetOutputIndex(col);		for(row = 0; row < nRowCount; row++)		{			pInputRow = m_pInputData->GetVector(row);			pOutputRow = m_pOutputData->GetVector(row);			pOutputRow[nIndex] = pInputRow[nIndex];		}	}/*	// Print the final data	printf("Final data:\n");	for(row = 0; row < nRowCount; row++)	{		pOutputRow = m_pOutputData->GetVector(row);		printf("%f", pOutputRow[0]);		for(col = 1; col < nInputDimensions; col++)			printf("\t%f", pOutputRow[col]);		printf("\n");	}*/}// --------------------------------------------------------------------------GManifoldPumper::GManifoldPumper(GArffRelation* pInputRelation, int nNewDimensions, int nManifoldMapNeighbors, int nManifoldSculptingNeighbors) : GSupervisedLearner(MakePumpedRelation(pInputRelation, nNewDimensions)){	m_pLearner = NULL;	m_nManifoldSculptingNeighbors = nManifoldSculptingNeighbors;	m_pInputRelation = pInputRelation;	m_nNewDimensions = nNewDimensions;	m_pVector = new double[m_pRelation->GetAttributeCount()];	m_bOwnLearner = false;	// Make the manifold map	m_pMapRelation = new GArffRelation();	int i;	for(i = 0; i < pInputRelation->GetInputCount(); i++)	{		//GAssert(pInputRelation->GetAttribute(pInputRelation->GetInputIndex(i))->IsContinuous() == true, "GManifoldPumper only supports continuous input values");		m_pMapRelation->AddAttribute(new GArffAttribute(true, 0, NULL));	}	for(i = 0; i < nNewDimensions; i++)		m_pMapRelation->AddAttribute(new GArffAttribute(false, 0, NULL));	m_pManifoldMap = new GKNN(m_pMapRelation, nManifoldMapNeighbors);}GManifoldPumper::~GManifoldPumper(){	if(m_bOwnLearner)		delete(m_pLearner);	delete(m_pManifoldMap);	delete(m_pMapRelation);	delete[] m_pVector;}GArffRelation* GManifoldPumper::MakePumpedRelation(GArffRelation* pInputRelation, int nNewDimensions){	int i;	GArffRelation* pPumpedRelation = new GArffRelation();	for(i = 0; i < pInputRelation->GetInputCount(); i++)		pPumpedRelation->AddAttribute(new GArffAttribute(true, 0, NULL));	for(i = 0; i < nNewDimensions; i++)		pPumpedRelation->AddAttribute(new GArffAttribute(true, 0, NULL));	for(i = 0; i < pInputRelation->GetOutputCount(); i++)		pPumpedRelation->AddAttribute(new GArffAttribute(false, 0, NULL));	return pPumpedRelation;}void GManifoldPumper::SetLearner(GSupervisedLearner* pLearner, bool bOwn){	GAssert(pLearner->GetRelation() == GetRelation(), "The learner must be constructed with the relation obtained by calling GetRelation on this GManifoldPumper object");	m_pLearner = pLearner;	m_bOwnLearner = bOwn;}// virtualvoid GManifoldPumper::Train(GArffData* pData){	GAssert(m_pLearner, "SetLearner must be called before Train is called");	// Use Manifold Sculpting to reduce the dimensionality of the data	GArffData* pTransformedData = GManifoldSculpting::DoManifoldSculpting(GManifoldSculpting::PP_NONE /*PP_LLE*/, m_pInputRelation, pData, m_nNewDimensions, m_nManifoldSculptingNeighbors, 10, 100);	GAssert(pTransformedData->GetSize() == pData->GetSize(), "transformed data isn't consistent");	// Train the manifold map	int i, j;	double* pInputVector;	double* pTransformedVector;	GTEMPBUF(double, pVector, m_pMapRelation->GetAttributeCount());	for(i = 0; i < pData->GetSize(); i++)	{		pInputVector = pData->GetVector(i);		pTransformedVector = pTransformedData->GetVector(i);		for(j = 0; j < m_pMapRelation->GetInputCount(); j++)			pVector[j] = pInputVector[m_pInputRelation->GetInputIndex(j)];		for(j = 0; j < m_pMapRelation->GetOutputCount(); j++)			pVector[m_pMapRelation->GetInputCount() + j] = pTransformedVector[m_pInputRelation->GetInputIndex(j)];		m_pManifoldMap->AddVector(pVector);	}	// Train the learner	GArffData* pPumpedData = PumpData(pData);	Holder<GArffData*> hPumpedData(pPumpedData);	m_pLearner->Train(pPumpedData);}GArffData* GManifoldPumper::PumpData(GArffData* pData){	int i;	GArffData* pPumpedData = new GArffData(pData->GetSize());	double* pVector;	for(i = 0; i < pData->GetSize(); i++)	{		pVector = pData->GetVector(i);		double* pPumpedVector = new double[m_pRelation->GetAttributeCount()];		PumpVector(pPumpedVector, pVector);		pPumpedData->AddVector(pPumpedVector);	}	return pPumpedData;}void GManifoldPumper::PumpVector(double* pOut, const double* pIn){	int nInputRelationInputs = m_pInputRelation->GetInputCount();	int nInputRelationOutputs = m_pInputRelation->GetOutputCount();	int i;	// Copy the original input values	for(i = 0; i < nInputRelationInputs; i++)		pOut[i] = pIn[m_pInputRelation->GetInputIndex(i)];	// Compute the additional inputs	m_pManifoldMap->EvalLinearWeight(pOut);	// Copy the output values	for(i = 0; i < nInputRelationOutputs; i++)		pOut[m_pRelation->GetOutputIndex(i)] = pIn[m_pInputRelation->GetOutputIndex(i)];}// virtualvoid GManifoldPumper::Eval(double* pVector){	PumpVector(m_pVector, pVector);	m_pLearner->Eval(pVector);}void GManifoldPumper::EvalPumpedVector(double* pVector){	m_pLearner->Eval(pVector);}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -