⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gknn.cpp

📁 一个非常有用的开源代码
💻 CPP
📖 第 1 页 / 共 2 页
字号:
			d = rel.ComputeInputDistanceSquared(pVector, data.GetVector(pNeighbors[j]));			if(d > dWorstDistance)			{				dWorstDistance = d;				nWorstNeighbor = j;			}		}		for(j = 0; j < nPoints; j++)		{			pVector2 = data.GetVector(j);			d = rel.ComputeInputDistanceSquared(pVector, pVector2);			if(d == 0)			{			}			else if(d < dWorstDistance)			{				for(k = 0; k < nNeighbors; k++)				{					if(k > 0)					{						// This isn't a complete check, but it should be good enough						if(pNeighbors[k] == pNeighbors[k - 1])							throw "same neighbor twice";						if(pNeighbors[k] == pNeighbors[0])							throw "same neighbor twice";					}					if(pNeighbors[k] == i)						throw "failed to exclude itself";					if(data.GetVector(pNeighbors[k]) == pVector2)						break;				}				if(k >= nNeighbors)					throw "missed a neighbor";			}		}	}}#endif // !NO_TEST_CODEGNeighborFinderLeaf* GNeighborFinder::FindCell(double* pVector){	// Find the left node to hold the vector	GNeighborFinderNode* pNode = m_pRoot;	while(true)	{		if(pNode->IsLeaf())			return (GNeighborFinderLeaf*)pNode;		else		{			GNeighborFinderInterior* pInterior = (GNeighborFinderInterior*)pNode;			if(pVector[m_pRelation->GetInputIndex(pInterior->GetInput())] >= pInterior->GetPivot())				pNode = pInterior->GetGreater();			else				pNode = pInterior->GetLesser();		}	}}void GNeighborFinder::GetNeighborsFromCell(GNeighborFinderLeaf* pCell, double* pVector, int* pOutNeighbors, double* pOutSquaredDistances, int nNeighbors, int* pnWorstNeighbor, int nExclude){	GIntArray* pCellData = pCell->GetData();	int nCount = pCellData->GetSize();	int i, j, index;	double d;	for(i = 0; i < nCount; i++)	{		index = pCellData->GetInt(i);		if(index == nExclude)			continue;		double* pCandidate = m_pData->GetVector(index);		d = m_pRelation->ComputeInputDistanceSquared(pCandidate, pVector);		if(d < pOutSquaredDistances[*pnWorstNeighbor])		{			pOutNeighbors[*pnWorstNeighbor] = index;			pOutSquaredDistances[*pnWorstNeighbor] = d;			*pnWorstNeighbor = 0;			for(j = 1; j < nNeighbors; j++) // todo: a priority queue could make this faster when the number of neighbors is large			{				if(pOutSquaredDistances[j] > pOutSquaredDistances[*pnWorstNeighbor])					*pnWorstNeighbor = j;			}		}	}}void GNeighborFinder::FindNeighbors(int* pOutNeighbors, double* pOutSquaredDistances, int nNeighbors, double* pVector, int nExclude){	// Start with no neighbors	int i;	for(i = 0; i < nNeighbors; i++)	{		pOutNeighbors[i] = -1;		pOutSquaredDistances[i] = 1e200;	}	int nWorstNeighbor = 0;	// Start with the current cell	int nInputs = m_pRelation->GetInputCount();	GNeighborFinderLeaf* pMinCell = FindCell(pVector);	GNeighborFinderLeaf* pMaxCell = pMinCell;	GetNeighborsFromCell(pMinCell, pVector, pOutNeighbors, pOutSquaredDistances, nNeighbors, &nWorstNeighbor, nExclude);	// Search outward until we are guaranteed to have found the best neighbors	double d;	while(true)	{		// Find the nearest moveable boundary		int index;		int nClosest = -1;		bool bGreater = false;		double dClosest = 1e200;		for(i = 0; i < nInputs; i++)		{			index = m_pRelation->GetInputIndex(i);			if(pMinCell->GetLesser(i))			{				d = pMinCell->GetMin(i) - pVector[index];				d *= d;				if(d < dClosest)				{					nClosest = i;					bGreater = false;					dClosest = d;				}			}			if(pMaxCell->GetGreater(i))			{				d = pMaxCell->GetMax(i) - pVector[index];				d *= d;				if(d < dClosest)				{					nClosest = i;					bGreater = true;					dClosest = d;				}			}		}		// See if we're guaranteed to have the best neighbors yet		if(nClosest < 0 || dClosest >= pOutSquaredDistances[nWorstNeighbor])			break;		// Advance the boundary		if(bGreater)		{			for(i = 0; i < nInputs; i++)			{				m_ppIterators[i] = pMaxCell->GetGreater(nClosest);				m_pMaxs[i] = pMinCell->GetMin(i);			}			m_pMaxs[nClosest] = m_ppIterators[0]->GetMin(nClosest);			while(true)			{				GetNeighborsFromCell(m_ppIterators[0], pVector, pOutNeighbors, pOutSquaredDistances, nNeighbors, &nWorstNeighbor, nExclude);				for(i = 0; i < nInputs; i++)				{					if(m_ppIterators[i]->GetMin(i) > m_pMaxs[i])					{						m_ppIterators[i] = m_ppIterators[i]->GetLesser(i);						break;					}				}				if(i >= nInputs)					break;				for(i--; i >= 0; i--)					m_ppIterators[i] = m_ppIterators[i + 1];			}			pMaxCell = pMaxCell->GetGreater(nClosest);		}		else		{			for(i = 0; i < nInputs; i++)			{				m_ppIterators[i] = pMinCell->GetLesser(nClosest);				m_pMaxs[i] = pMaxCell->GetMax(i);			}			m_pMaxs[nClosest] = m_ppIterators[0]->GetMax(nClosest);			while(true)			{				GetNeighborsFromCell(m_ppIterators[0], pVector, pOutNeighbors, pOutSquaredDistances, nNeighbors, &nWorstNeighbor, nExclude);				for(i = 0; i < nInputs; i++)				{					if(m_ppIterators[i]->GetMax(i) < m_pMaxs[i])					{						m_ppIterators[i] = m_ppIterators[i]->GetGreater(i);						break;					}				}				if(i >= nInputs)					break;				for(i--; i >= 0; i--)					m_ppIterators[i] = m_ppIterators[i + 1];			}			pMinCell = pMinCell->GetLesser(nClosest);		}	}}// -------------------------------------------------------------------------------GKNN::GKNN(GArffRelation* pRelation, int nNeighbors) : GSupervisedLearner(pRelation){	m_nNeighbors = nNeighbors;	m_pRows = new GArffData(1024);	int nInputs = m_pRelation->GetInputCount();	m_pScaleFactors = new double[nInputs];	m_pEvalVector = new double[m_pRelation->GetAttributeCount()];	int n;	for(n = 0; n < nInputs; n++)		m_pScaleFactors[n] = 1;	m_pNeighborFinder = NULL;	m_pEvalNeighbors = new int[nNeighbors];	m_pEvalDistances = new double[nNeighbors];}GKNN::~GKNN(){	delete(m_pNeighborFinder);	delete(m_pRows);	delete[] m_pScaleFactors;	delete[] m_pEvalVector;	delete[] m_pEvalNeighbors;	delete[] m_pEvalDistances;}void GKNN::ComputeScaleFactors(GArffData* pData){	GAssert(m_pRows->GetSize() <= 0, "You should compute scale factors before you add any data");	int nInputs = m_pRelation->GetInputCount();	double dMean, dVariance;	int i, index;	for(i = 0; i < nInputs; i++)	{		index = m_pRelation->GetInputIndex(i);		if(m_pRelation->GetAttribute(index)->IsContinuous())		{			dMean = pData->ComputeMean(index);			dVariance = pData->ComputeVariance(dMean, index);			m_pScaleFactors[i] = sqrt(dVariance);		}		else			m_pScaleFactors[i] = 1;	}}void GKNN::AddVector(double* pRow){	delete(m_pNeighborFinder);	m_pNeighborFinder = NULL;	int nAttributes = m_pRelation->GetAttributeCount();	int nInputs = m_pRelation->GetInputCount();	double* pData = new double[nAttributes];	m_pRows->AddPointer(pData);	memcpy(pData, pRow, sizeof(double) * nAttributes);	int i;	for(i = 0; i < nInputs; i++)		pData[m_pRelation->GetInputIndex(i)] *= m_pScaleFactors[i];}void GKNN::Train(GArffData* pData){	ComputeScaleFactors(pData);	int nRows = pData->GetSize();	double* pRow;	int n;	for(n = 0; n < nRows; n++)	{		pRow = pData->GetVector(n);		AddVector(pRow);	}}void GKNN::FindNeighbors(double* pRow){	int nInputs = m_pRelation->GetInputCount();	if(!m_pNeighborFinder)		m_pNeighborFinder = new GNeighborFinder(m_pRelation, m_pRows, nInputs);	memcpy(m_pEvalVector, pRow, sizeof(double) * m_pRelation->GetAttributeCount());	int i;	for(i = 0; i < nInputs; i++)		m_pEvalVector[m_pRelation->GetInputIndex(i)] *= m_pScaleFactors[i];	m_pNeighborFinder->FindNeighbors(m_pEvalNeighbors, m_pEvalDistances, m_nNeighbors, m_pEvalVector, -1);}void GKNN::EvalEqualWeight(double* pRow){	FindNeighbors(pRow);	int nOutputs = m_pRelation->GetOutputCount();	GArffAttribute* pAttr;	int i, j, index;	double* pRowNeighbor;	for(i = 0; i < nOutputs; i++)	{		index = m_pRelation->GetOutputIndex(i);		pAttr = m_pRelation->GetAttribute(index);		if(pAttr->IsContinuous())		{			double dSum = 0;			for(j = 0; j < m_nNeighbors; j++)			{				pRowNeighbor = (double*)m_pRows->GetPointer(m_pEvalNeighbors[j]);				dSum += pRowNeighbor[index];			}			pRow[index] = dSum / m_nNeighbors;		}		else		{			GVector v(pAttr->GetValueCount());			for(j = 0; j < m_nNeighbors; j++)			{				pRowNeighbor = (double*)m_pRows->GetPointer(m_pEvalNeighbors[j]);				v.Add((int)pRowNeighbor[index], 1);			}			pRow[index] = (double)v.GetIndexOfBiggestValue();		}	}}void GKNN::EvalLinearWeight(double* pRow){	FindNeighbors(pRow);	int nOutputs = m_pRelation->GetOutputCount();	GArffAttribute* pAttr;	int i, j, index;	double d;	double* pRowNeighbor;	for(i = 0; i < nOutputs; i++)	{		index = m_pRelation->GetOutputIndex(i);		pAttr = m_pRelation->GetAttribute(index);		if(pAttr->IsContinuous())		{			double dSum = 0;			double dTot = 0;			for(j = 0; j < m_nNeighbors; j++)			{				pRowNeighbor = (double*)m_pRows->GetPointer(m_pEvalNeighbors[j]);				d = m_pRelation->ComputeInputDistanceSquared(pRow, pRowNeighbor);				d = 1 / (d + .0001);				dSum += d * pRowNeighbor[index];				dTot += d;			}			pRow[index] = dSum / dTot;		}		else		{			GVector v(pAttr->GetValueCount());			for(j = 0; j < m_nNeighbors; j++)			{				pRowNeighbor = (double*)m_pRows->GetPointer(m_pEvalNeighbors[j]);				d = m_pRelation->ComputeInputDistanceSquared(pRow, pRowNeighbor);				d = 1 / (d + .001);				v.Add((int)pRowNeighbor[index], d);			}			pRow[index] = (double)v.GetIndexOfBiggestValue();		}	}}double* GKNN::DropRow(int nRow){	int nCount = m_pRows->GetSize();	double* pRow = (double*)m_pRows->GetPointer(nRow);	m_pRows->SetPointer(nRow, m_pRows->GetPointer(nCount - 1));	m_pRows->DeleteCell(nCount - 1);	return pRow;}int GKNN::FindLeastHelpfulRow(GArffData* pTestSet){	double dBestError = 1e100;	int nBestIndex = 0;	int n;	double d;	for(n = m_pRows->GetSize() - 1; n >= 0; n--)	{		double* pTemp = DropRow(n);		d = MeasureMeanSquaredError(pTestSet);		m_pRows->AddPointer(pTemp);		if(d < dBestError)		{			dBestError = d;			nBestIndex = n;		}	}	return nBestIndex;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -