⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gmanifold.cpp

📁 一个非常有用的开源代码
💻 CPP
📖 第 1 页 / 共 2 页
字号:
#include "GManifold.h"#include <stdio.h>#include "GArray.h"#include <math.h>#include "GPointerQueue.h"#include "GArff.h"#include "GMatrix.h"#include "GVector.h"#include "GBits.h"#include "GKNN.h"struct GManifoldSculptingNeighbor{	unsigned char* m_pNeighbor;	unsigned char* m_pNeighborsNeighbor;	double m_dCosTheta;	double m_dDistance;};struct GManifoldSculptingMetaData{	int m_nCycle;	bool m_bAdjustable;};GManifoldSculpting::GManifoldSculpting(int nDataPoints, int nDimensions, int nNeighbors){	m_nDataPoints = nDataPoints;	m_nDimensions = nDimensions;	m_nNeighbors = nNeighbors;	m_nDataIndex = sizeof(struct GManifoldSculptingNeighbor) * m_nNeighbors;	m_nValueIndex = m_nDataIndex + sizeof(struct GManifoldSculptingMetaData);	m_nRecordSize = m_nValueIndex + sizeof(double) * m_nDimensions;	m_pData = new unsigned char[m_nRecordSize * nDataPoints];	m_dSquishingRate = .99;	m_nTargetDimensions = 0;	m_nPass = 0;	m_dAveNeighborDist = 0;	m_pQ = NULL;	m_nSmoothingAdvantage = 10;}GManifoldSculpting::~GManifoldSculpting(){	delete(m_pData);	delete(m_pQ);}void GManifoldSculpting::SetDataPoint(int n, double* pValues, bool bAdjustable){	memcpy(&m_pData[n * m_nRecordSize + m_nValueIndex], pValues, sizeof(double) * m_nDimensions);	struct GManifoldSculptingMetaData* pData = GetMetaData(n);	pData->m_bAdjustable = bAdjustable;}void GManifoldSculpting::SetData(GArffRelation* pRelation, GArffData* pData){	GAssert(pData->GetSize() == m_nDataPoints, "wrong number of data points");	int nInputs = pRelation->GetInputCount();	GTEMPBUF(double, pInputs, nInputs);	double* pRow;	int n, i;	for(n = 0; n < m_nDataPoints; n++)	{		pRow = pData->GetVector(n);		for(i = 0; i < nInputs; i++)			pInputs[i] = pRow[pRelation->GetInputIndex(i)];		SetDataPoint(n, pInputs, true);	}}double* GManifoldSculpting::GetDataPoint(int n){	return (double*)&m_pData[n * m_nRecordSize + m_nValueIndex];}int GManifoldSculpting::DataPointSortCompare(unsigned char* pA, unsigned char* pB){	pA += m_nValueIndex;	pB += m_nValueIndex;	double dA = ((double*)pA)[m_nCurrentDimension];	double dB = ((double*)pB)[m_nCurrentDimension];	if(dA > dB)		return 1;	else if(dA < dB)		return -1;	return 0;}int GManifoldSculpting_DataPointSortCompare(void* pThis, void* pA, void* pB){	return ((GManifoldSculpting*)pThis)->DataPointSortCompare((unsigned char*)pA, (unsigned char*)pB);}int GManifoldSculpting::FindMostDistantNeighbor(struct GManifoldSculptingNeighbor* pNeighbors){	int nMostDistant = 0;	int n;	for(n = 1; n < m_nNeighbors; n++)	{		if(pNeighbors[n].m_dDistance > pNeighbors[nMostDistant].m_dDistance)			nMostDistant = n;	}	return nMostDistant;}double GManifoldSculpting::CalculateDistance(unsigned char* pA, unsigned char* pB){	pA += m_nValueIndex;	pB += m_nValueIndex;	double* pdA = (double*)pA;	double* pdB = (double*)pB;	double dTmp;	double dSum = 0;	int n;	for(n = 0; n < m_nDimensions; n++)	{		dTmp = pdB[n] - pdA[n];		dSum += (dTmp * dTmp);	}	return sqrt(dSum);}double GManifoldSculpting::CalculateVectorCorrelation(unsigned char* pA, unsigned char* pVertex, unsigned char* pB){	pA += m_nValueIndex;	pVertex += m_nValueIndex;	pB += m_nValueIndex;	double* pdA = (double*)pA;	double* pdV = (double*)pVertex;	double* pdB = (double*)pB;	double dDotProd = 0;	double dMagA = 0;	double dMagB = 0;	double dA, dB;	int n;	for(n = 0; n < m_nDimensions; n++)	{		dA = pdA[n] - pdV[n];		dB = pdB[n] - pdV[n];		dDotProd += (dA * dB);		dMagA += (dA * dA);		dMagB += (dB * dB);	}	if(dDotProd == 0)		return 0;	double dCorrelation = dDotProd / (sqrt(dMagA) * sqrt(dMagB));	GAssert(dCorrelation > -2 && dCorrelation < 2, "out of range");	return dCorrelation;}void GManifoldSculpting::CalculateMetadata(int nTargetDimensions){	// Calculate how far we need to look to have a good chance of finding the	// nearest neighbors	int nDimSpan = (int)(pow((double)m_nDataPoints, (double)1 / nTargetDimensions) * 2);	if(nDimSpan < m_nNeighbors * 2)		nDimSpan = m_nNeighbors * 2;	// Find the nearest neighbors	GPointerArray arr(m_nDataPoints);	int n, i, j;	for(n = 0; n < m_nDataPoints; n++)		arr.AddPointer(&m_pData[n * m_nRecordSize]);	// Initialize everybody's neighbors	struct GManifoldSculptingNeighbor* pNeighbors;	for(n = 0; n < m_nDataPoints; n++)	{		pNeighbors = (struct GManifoldSculptingNeighbor*)arr.GetPointer(n);		for(i = 0; i < m_nNeighbors; i++)		{			pNeighbors[i].m_pNeighbor = NULL;			pNeighbors[i].m_pNeighborsNeighbor = NULL;			pNeighbors[i].m_dDistance = 1e100;		}	}	// Find the nearest neighbors	int nMostDistantNeighbor;	int nStart, nEnd;	double dDistance;	unsigned char* pCandidate;	for(m_nCurrentDimension = 0; m_nCurrentDimension < m_nDimensions; m_nCurrentDimension++)	{		// Sort on the current dimension		arr.Sort(GManifoldSculpting_DataPointSortCompare, this);		// Do a pass in this dimension for every data point to search for nearest neighbors		for(n = 0; n < m_nDataPoints; n++)		{			// Check all the data points that are close in this dimension			pNeighbors = (struct GManifoldSculptingNeighbor*)arr.GetPointer(n);			nMostDistantNeighbor = FindMostDistantNeighbor(pNeighbors);			nStart = MAX(0, n - nDimSpan);			nEnd = MIN(m_nDataPoints - 1, n + nDimSpan);			for(i = nStart; i <= nEnd; i++)			{				if(i == n)					continue;				pCandidate = (unsigned char*)arr.GetPointer(i);				dDistance = CalculateDistance((unsigned char*)pNeighbors, pCandidate);				if(dDistance < pNeighbors[nMostDistantNeighbor].m_dDistance)				{					// Check to see if this is already a neighbor					for(j = 0; j < m_nNeighbors; j++)					{						if(pNeighbors[j].m_pNeighbor == pCandidate)							break;					}					if(j == m_nNeighbors)					{						// Make this a neighbor						pNeighbors[nMostDistantNeighbor].m_pNeighbor = pCandidate;						pNeighbors[nMostDistantNeighbor].m_dDistance = dDistance;						nMostDistantNeighbor = FindMostDistantNeighbor(pNeighbors);					}				}			}		}	}	// For each data point, find the most co-linear of each neighbor's neighbors	m_dAveNeighborDist = 0;	struct GManifoldSculptingMetaData* pData;	struct GManifoldSculptingNeighbor* pNeighborsNeighbors;	double dCosTheta;	for(n = 0; n < m_nDataPoints; n++)	{		pNeighbors = (struct GManifoldSculptingNeighbor*)arr.GetPointer(n);		pData = GetMetaData(pNeighbors);		pData->m_nCycle = -1;		for(i = 0; i < m_nNeighbors; i++)		{			m_dAveNeighborDist += pNeighbors[i].m_dDistance;			pNeighborsNeighbors = (struct GManifoldSculptingNeighbor*)pNeighbors[i].m_pNeighbor;			pCandidate = pNeighborsNeighbors[0].m_pNeighbor;			dCosTheta = CalculateVectorCorrelation((unsigned char*)pNeighbors, (unsigned char*)pNeighborsNeighbors, pCandidate);			pNeighbors[i].m_dCosTheta = dCosTheta;			pNeighbors[i].m_pNeighborsNeighbor = pCandidate;			for(j = 1; j < m_nNeighbors; j++)			{				pCandidate = pNeighborsNeighbors[j].m_pNeighbor;				dCosTheta = CalculateVectorCorrelation((unsigned char*)pNeighbors, (unsigned char*)pNeighborsNeighbors, pCandidate);				if(dCosTheta < pNeighbors[i].m_dCosTheta)				{					pNeighbors[i].m_dCosTheta = dCosTheta;					pNeighbors[i].m_pNeighborsNeighbor = pCandidate;				}			}		}	}	m_dAveNeighborDist /= (m_nDataPoints * m_nNeighbors);	m_dLearningRate = m_dAveNeighborDist * 10;}double GManifoldSculpting::CalculateDataPointError(unsigned char* pDataPoint){	double dError = 0;	double dDist;	double dTheta;	struct GManifoldSculptingNeighbor* pNeighbors = (struct GManifoldSculptingNeighbor*)pDataPoint;	struct GManifoldSculptingMetaData* pDataPointData = (struct GManifoldSculptingMetaData*)(pDataPoint + m_nDataIndex);	struct GManifoldSculptingMetaData* pNeighborData;	int n;	for(n = 0; n < m_nNeighbors; n++)	{		dDist = CalculateDistance(pDataPoint, pNeighbors[n].m_pNeighbor);		dDist -= pNeighbors[n].m_dDistance;		dDist /= MAX(m_dAveNeighborDist, 1e-10);		dDist *= dDist;		dTheta = CalculateVectorCorrelation(pDataPoint, pNeighbors[n].m_pNeighbor, pNeighbors[n].m_pNeighborsNeighbor);		dTheta -= pNeighbors[n].m_dCosTheta;		dTheta *= dTheta;		dTheta = MAX((double)0, dTheta - .01);		pNeighborData = (struct GManifoldSculptingMetaData*)(pNeighbors[n].m_pNeighbor + m_nDataIndex);		if(pNeighborData->m_nCycle != pDataPointData->m_nCycle && pNeighborData->m_bAdjustable)		{			dDist /= m_nSmoothingAdvantage;			dTheta /= m_nSmoothingAdvantage;		}		dError += (2 * dDist + dTheta);	}	return dError * dError;}int GManifoldSculpting::AjustDataPoint(unsigned char* pDataPoint, int nTargetDimensions, double* pError){	bool bMadeProgress = true;	double* pValues = (double*)(pDataPoint + m_nValueIndex);	double dErrorBase = CalculateDataPointError(pDataPoint);	double dError = 0;	double dStepSize = m_dLearningRate * (GBits::GetRandomDouble() * .1 + .9); // We multiply the learning rate by a random value so that the points can get away from each other	int n, nSteps;	for(nSteps = 0; bMadeProgress; nSteps++)	{		bMadeProgress = false;		for(n = 0; n < nTargetDimensions; n++)		{			pValues[n] += dStepSize;			dError = CalculateDataPointError(pDataPoint);			if(dError >= dErrorBase)			{				pValues[n] -= (dStepSize + dStepSize);				dError = CalculateDataPointError(pDataPoint);			}			if(dError >= dErrorBase)				pValues[n] += dStepSize;			else			{				dErrorBase = dError;				bMadeProgress = true;			}		}	}	*pError = dError;	return nSteps - 1; // the -1 is to undo the last incrementor}void GManifoldSculpting::SquishBegin(int nTargetDimensions){	GAssert(nTargetDimensions > 0 && nTargetDimensions < m_nDimensions, "out of range");	m_nTargetDimensions = nTargetDimensions;	m_nPass = 0;	// Calculate metadata	CalculateMetadata(nTargetDimensions);	// Make the queue	m_pQ = new GPointerQueue();}double GManifoldSculpting::SquishPass(int nSeedDataPoint){	double* pValues;	struct GManifoldSculptingMetaData* pData;	unsigned char* pDataPoint;	struct GManifoldSculptingNeighbor* pNeighbors;	int n, i;	// Squish the extra dimensions	for(n = 0; n < m_nDataPoints; n++)	{		pValues = (double*)(&m_pData[n * m_nRecordSize] + m_nValueIndex);		for(i = m_nTargetDimensions; i < m_nDimensions; i++)			pValues[i] *= m_dSquishingRate;	}	// Start at the seed point and correct outward in a breadth-first mannner	m_pQ->Push(&m_pData[nSeedDataPoint * m_nRecordSize]);	int nVisitedNodes = 0;	int nSteps = 0;	double dError = 0;	double dTotalError = 0;	while(m_pQ->GetSize() > 0)	{		// Check if this one has already been done		pDataPoint = (unsigned char*)m_pQ->Pop();		pData = (struct GManifoldSculptingMetaData*)(pDataPoint + m_nDataIndex);		if(pData->m_nCycle == m_nPass)			continue;		pData->m_nCycle = m_nPass;		nVisitedNodes++;		// Push all neighbors into the queue		pNeighbors = (struct GManifoldSculptingNeighbor*)pDataPoint;		for(n = 0; n < m_nNeighbors; n++)			m_pQ->Push(pNeighbors[n].m_pNeighbor);		// Ajust this data point		if(pData->m_bAdjustable)		{			nSteps += AjustDataPoint(pDataPoint, m_nTargetDimensions, &dError);			dTotalError += dError;		}	}	//GAssert(nVisitedNodes * 1.25 > m_nDataPoints, "manifold appears poorly sampled");	if(nSteps * 3 < m_nDataPoints)		m_dLearningRate *= .9;	else if(nSteps > m_nDataPoints * 9)		m_dLearningRate /= .9;//	printf("[Learning Rate: %f]", m_dLearningRate);	m_nPass++;	return dTotalError;}/*static*/ GArffData* GManifoldSculpting::DoManifoldSculpting(PreProcessType ePreProcess, GArffRelation* pRelation, GArffData* pData, int nTargetDimensions, int nNeighbors, int nPreludeIterations, int nIterationsSinceBest){	// Make the squisher	int nDimensions = pRelation->GetInputCount();	int nDataPoints = pData->GetSize();	GManifoldSculpting squisher(nDataPoints, nDimensions, nNeighbors);	squisher.SetData(pRelation, pData);	// Initialize and pre-process	squisher.SquishBegin(nTargetDimensions);	if(ePreProcess == PP_NONE)	{	}	else if(ePreProcess == PP_PCA)	{		GVector eigenValues;		GArffData* pPreprocessedData = GPCA::DoPCA(pRelation, pData, &eigenValues);		Holder<GArffData*> hPreprocessedData(pPreprocessedData);		squisher.SetData(pRelation, pPreprocessedData);	}	else if(ePreProcess == PP_LLE)	{		// LLE seems to require fewer neighbors than ManifoldSculpting. I'm		// not sure what the best ratio is. It should probably be another input		// parameter, but for now I'll just hack it.		int nLLENeighbors = MAX(8, nNeighbors / 2);		GArffData* pPreprocessedData = GLLE::DoLLE(pRelation, pData, nLLENeighbors);		Holder<GArffData*> hPreprocessedData(pPreprocessedData);		squisher.SetData(pRelation, pPreprocessedData);	}	else	{		GAssert(false, "Unrecognized pre-processing algorithm");	}	// Do the squishing passes	double d;	double dBestError = 0;	int n;	for(n = 0; n < nPreludeIterations; n++)		dBestError = squisher.SquishPass(rand() % nDataPoints);	for(n = 0; n < nIterationsSinceBest; n++)	{		d = squisher.SquishPass(rand() % nDataPoints);		if(d < dBestError)		{			dBestError = d;			n = 0;		}	}	// Convert data to an ARFF file	int nAttributeCount = pRelation->GetAttributeCount();	int nOutputs = pRelation->GetOutputCount();	GArffData* pDataOut = new GArffData(nDataPoints);	double* pRowIn;	double* pRowOut;	int i, nIndex;	for(n = 0; n < nDataPoints; n++)	{		pRowIn = pData->GetVector(n);		pRowOut = new double[nAttributeCount];		pDataOut->AddVector(pRowOut);		// Copy the output values straight over		for(i = 0; i < nOutputs; i++)		{			nIndex = pRelation->GetOutputIndex(i);			pRowOut[nIndex] = pRowIn[nIndex];		}		// Copy the squished input values		pRowIn = squisher.GetDataPoint(n);		for(i = 0; i < nDimensions; i++)		{			nIndex = pRelation->GetInputIndex(i);			pRowOut[nIndex] = pRowIn[i];

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -