gknn.cpp
来自「一个由Mike Gashler完成的机器学习方面的includes neural」· C++ 代码 · 共 1,007 行 · 第 1/2 页
CPP
1,007 行
// Drop the vector pVector = m_pData->GetVector(nIndex); m_pData->SetPointer(nIndex, m_pData->GetVector(nOldIndex)); m_pData->DeleteCell(nOldIndex); return pVector;}GNeighborFinderLeaf* GNeighborFinder::FindCell(double* pVector){ // Find the left node to hold the vector GNeighborFinderNode* pNode = m_pRoot; while(true) { if(pNode->IsLeaf()) return (GNeighborFinderLeaf*)pNode; else { GNeighborFinderInterior* pInterior = (GNeighborFinderInterior*)pNode; if(pVector[m_pRelation->GetInputIndex(pInterior->GetInput())] >= pInterior->GetPivot()) pNode = pInterior->GetGreater(); else pNode = pInterior->GetLesser(); } }}void GNeighborFinder::GetNeighborsFromCell(GNeighborFinderLeaf* pCell, double* pVector, int* pOutNeighbors, double* pOutSquaredDistances, int nNeighbors, int* pnWorstNeighbor, int nExclude){ GIntArray* pCellData = pCell->GetData(); int nCount = pCellData->GetSize(); int i, j, index; double d; for(i = 0; i < nCount; i++) { index = pCellData->GetInt(i); if(index == nExclude) continue; double* pCandidate = m_pData->GetVector(index); d = m_pRelation->ComputeInputDistanceSquared(pCandidate, pVector); if(d < pOutSquaredDistances[*pnWorstNeighbor]) { pOutNeighbors[*pnWorstNeighbor] = index; pOutSquaredDistances[*pnWorstNeighbor] = d; *pnWorstNeighbor = 0; for(j = 1; j < nNeighbors; j++) // todo: a priority queue could make this faster when the number of neighbors is large { if(pOutSquaredDistances[j] > pOutSquaredDistances[*pnWorstNeighbor]) *pnWorstNeighbor = j; } } }}void GNeighborFinder::FindNeighbors(int* pOutNeighbors, double* pOutSquaredDistances, int nNeighbors, double* pVector, int nExclude){ // Start with no neighbors int i; for(i = 0; i < nNeighbors; i++) { pOutNeighbors[i] = -1; pOutSquaredDistances[i] = 1e200; } int nWorstNeighbor = 0; // Start with the current cell int nInputs = m_pRelation->GetInputCount(); GNeighborFinderLeaf* pMinCell = FindCell(pVector); GNeighborFinderLeaf* pMaxCell = pMinCell; GetNeighborsFromCell(pMinCell, pVector, pOutNeighbors, pOutSquaredDistances, nNeighbors, &nWorstNeighbor, nExclude); // Search outward until we are guaranteed to have found the best neighbors double d; while(true) { // Find the nearest moveable boundary int index; int nClosest = -1; bool bGreater = false; double dClosest = 1e200; for(i = 0; i < nInputs; i++) { index = m_pRelation->GetInputIndex(i); if(pMinCell->GetLesser(i)) { d = pMinCell->GetMin(i) - pVector[index]; d *= d; if(d < dClosest) { nClosest = i; bGreater = false; dClosest = d; } } if(pMaxCell->GetGreater(i)) { d = pMaxCell->GetMax(i) - pVector[index]; d *= d; if(d < dClosest) { nClosest = i; bGreater = true; dClosest = d; } } } // See if we're guaranteed to have the best neighbors yet if(nClosest < 0 || dClosest >= pOutSquaredDistances[nWorstNeighbor]) break; // Advance the boundary if(bGreater) { for(i = 0; i < nInputs; i++) { m_ppIterators[i] = pMaxCell->GetGreater(nClosest); m_pMaxs[i] = pMinCell->GetMin(i); } m_pMaxs[nClosest] = m_ppIterators[0]->GetMin(nClosest); while(true) { GetNeighborsFromCell(m_ppIterators[0], pVector, pOutNeighbors, pOutSquaredDistances, nNeighbors, &nWorstNeighbor, nExclude); for(i = 0; i < nInputs; i++) { if(m_ppIterators[i]->GetMin(i) > m_pMaxs[i]) { m_ppIterators[i] = m_ppIterators[i]->GetLesser(i); break; } } if(i >= nInputs) break; for(i--; i >= 0; i--) m_ppIterators[i] = m_ppIterators[i + 1]; } pMaxCell = pMaxCell->GetGreater(nClosest); } else { for(i = 0; i < nInputs; i++) { m_ppIterators[i] = pMinCell->GetLesser(nClosest); m_pMaxs[i] = pMaxCell->GetMax(i); } m_pMaxs[nClosest] = m_ppIterators[0]->GetMax(nClosest); while(true) { GetNeighborsFromCell(m_ppIterators[0], pVector, pOutNeighbors, pOutSquaredDistances, nNeighbors, &nWorstNeighbor, nExclude); for(i = 0; i < nInputs; i++) { if(m_ppIterators[i]->GetMax(i) < m_pMaxs[i]) { m_ppIterators[i] = m_ppIterators[i]->GetGreater(i); break; } } if(i >= nInputs) break; for(i--; i >= 0; i--) m_ppIterators[i] = m_ppIterators[i + 1]; } pMinCell = pMinCell->GetLesser(nClosest); } }}// -------------------------------------------------------------------------------GKNN::GKNN(GArffRelation* pRelation, int nNeighbors, bool bCopyInstances) : GSupervisedLearner(pRelation){ m_eInterpolationMethod = Linear; m_pLearner = NULL; m_bOwnLearner = false; m_bCopyInstances = bCopyInstances; m_nNeighbors = nNeighbors; m_pInstances = NULL; m_pScaleFactors = NULL; m_pEvalVector = NULL; m_pNeighborFinder = NULL; m_pEvalNeighbors = NULL; m_pEvalDistances = NULL; Reset();}GKNN::~GKNN(){ delete(m_pNeighborFinder); if(!m_bCopyInstances) m_pInstances->DropAllVectors(); delete(m_pInstances); delete[] m_pScaleFactors; delete[] m_pEvalVector; delete[] m_pEvalNeighbors; delete[] m_pEvalDistances;}void GKNN::SetInterpolationMethod(InterpolationMethod eMethod){ GAssert(eMethod != Learner, "Call SetInterpolationLearner instead"); m_eInterpolationMethod = eMethod;}void GKNN::SetInterpolationLearner(GSupervisedLearner* pLearner, bool bOwnLearner){ if(m_bOwnLearner) delete(m_pLearner); GAssert(pLearner->GetRelation() == GetRelation(), "Expected the learner to be constructed with the same relation as this object"); m_pLearner = pLearner; m_eInterpolationMethod = Learner; m_bOwnLearner = bOwnLearner;}void GKNN::ComputeScaleFactors(GArffData* pData){ GAssert(m_pInstances->GetSize() <= 0, "You should only compute scale factors before you add any data"); if(!m_bCopyInstances) return; // Can't scale vectors we don't own/* int nInputs = m_pRelation->GetInputCount(); int nOutputs = m_pRelation->GetOutputCount(); int i, j, index; GTEMPBUF(double, pOutputMeans, nOutputs); for(j = 0; j < nOutputs; j++) pOutputMeans[j] = pData->ComputeMean(m_pRelation->GetOutputIndex(j)); for(i = 0; i < nInputs; i++) { index = m_pRelation->GetInputIndex(i); if(m_pRelation->GetAttribute(index)->IsContinuous()) { double dInputMean = pData->ComputeMean(m_pRelation->GetInputIndex(i)); double d = 0; for(j = 0; j < nOutputs; j++) d = MAX(d, pData->ComputeCovariance(m_pRelation->GetInputIndex(i), dInputMean, m_pRelation->GetOutputIndex(j), pOutputMeans[j])); m_pScaleFactors[i] = d; } else m_pScaleFactors[i] = 1; }*/}void GKNN::AddVector(double* pVector){ double* pVec; if(m_bCopyInstances) { // Make a copy of the vector int nAttributes = m_pRelation->GetAttributeCount(); pVec = new double[nAttributes]; memcpy(pVec, pVector, sizeof(double) * nAttributes); // Scale the copy int i; int nInputs = m_pRelation->GetInputCount(); for(i = 0; i < nInputs; i++) pVec[m_pRelation->GetInputIndex(i)] *= m_pScaleFactors[i]; } else pVec = pVector; // Add it to the known instances if(m_pNeighborFinder) m_pNeighborFinder->AddVector(pVec); // GNeighborFinder has a pointer to m_pInstances, and it will add pVec to m_pInstances when we make this call else m_pInstances->AddPointer(pVec);}void GKNN::AddVectorAndDeleteNeighborIfClose(double* pVector, double dCloseDistance){ double* pVec; if(m_bCopyInstances) { // Make a copy of the vector int nAttributes = m_pRelation->GetAttributeCount(); pVec = new double[nAttributes]; memcpy(pVec, pVector, sizeof(double) * nAttributes); // Scale the copy int i; int nInputs = m_pRelation->GetInputCount(); for(i = 0; i < nInputs; i++) pVec[m_pRelation->GetInputIndex(i)] *= m_pScaleFactors[i]; } else pVec = pVector; // Delete the closest neighbor if it's closer than the specified threshold if(!m_pNeighborFinder) m_pNeighborFinder = new GNeighborFinder(m_pRelation, m_pInstances, m_pRelation->GetInputCount()); m_pNeighborFinder->FindNeighbors(m_pEvalNeighbors, m_pEvalDistances, 1, pVec, -1); if(m_pEvalDistances[0] < dCloseDistance * dCloseDistance) { double* pClosest = m_pNeighborFinder->DropVector(m_pEvalNeighbors[0]); if(m_bCopyInstances) delete[] pClosest; } // Add the vector m_pNeighborFinder->AddVector(pVec);}void GKNN::Train(GArffData* pData){ ComputeScaleFactors(pData); int nVectors = pData->GetSize(); double* pVector; int n; for(n = 0; n < nVectors; n++) { pVector = pData->GetVector(n); AddVector(pVector); }}void GKNN::FindNeighbors(double* pVector){ int nInputs = m_pRelation->GetInputCount(); if(!m_pNeighborFinder) m_pNeighborFinder = new GNeighborFinder(m_pRelation, m_pInstances, nInputs); if(m_bCopyInstances) { memcpy(m_pEvalVector, pVector, sizeof(double) * m_pRelation->GetAttributeCount()); int i; for(i = 0; i < nInputs; i++) m_pEvalVector[m_pRelation->GetInputIndex(i)] *= m_pScaleFactors[i]; m_pNeighborFinder->FindNeighbors(m_pEvalNeighbors, m_pEvalDistances, m_nNeighbors, m_pEvalVector, -1); } else m_pNeighborFinder->FindNeighbors(m_pEvalNeighbors, m_pEvalDistances, m_nNeighbors, pVector, -1);}void GKNN::InterpolateMean(double* pVector){ int nOutputs = m_pRelation->GetOutputCount(); GArffAttribute* pAttr; int i, j, k, index; double* pVectorNeighbor; for(i = 0; i < nOutputs; i++) { index = m_pRelation->GetOutputIndex(i); pAttr = m_pRelation->GetAttribute(index); if(pAttr->IsContinuous()) { double dSum = 0; for(j = 0; j < m_nNeighbors; j++) { k = m_pEvalNeighbors[j]; if(k >= 0) { pVectorNeighbor = m_pInstances->GetVector(k); dSum += pVectorNeighbor[index]; } } pVector[index] = dSum / m_nNeighbors; } else { int nValueCount = pAttr->GetValueCount(); int* pCounts = new int[nValueCount]; Holder<int*> hCounts(pCounts); memset(pCounts, '\0', sizeof(int) * nValueCount); for(j = 0; j < m_nNeighbors; j++) { k = m_pEvalNeighbors[j]; if(k >= 0) { pVectorNeighbor = m_pInstances->GetVector(k); pCounts[(int)pVectorNeighbor[index]]++; } } k = 0; for(j = 1; j < nValueCount; j++) { if(pCounts[j] > pCounts[k]) k = j; } pVector[index] = (double)k; } }}void GKNN::InterpolateLinear(double* pVector){ int nOutputs = m_pRelation->GetOutputCount(); GArffAttribute* pAttr; int i, j, k, index; double d; double* pVectorNeighbor; for(i = 0; i < nOutputs; i++) { index = m_pRelation->GetOutputIndex(i); pAttr = m_pRelation->GetAttribute(index); if(pAttr->IsContinuous()) { double dSum = 0; double dTot = 0; for(j = 0; j < m_nNeighbors; j++) { k = m_pEvalNeighbors[j]; if(k >= 0) { pVectorNeighbor = m_pInstances->GetVector(k); d = m_pEvalDistances[j]; d = 1.0 / MAX(d, 1e-9); // to be truly "linear", we should use sqrt(d) instead of d, but this is faster to compute dSum += d * pVectorNeighbor[index]; dTot += d; } } if(dTot > 0) pVector[index] = dSum / dTot; else pVector[index] = dSum; } else { int nValueCount = pAttr->GetValueCount(); double* pScores = new double[nValueCount]; ArrayHolder<double*> hScores(pScores); for(j = 0; j < nValueCount; j++) pScores[j] = 0; for(j = 0; j < m_nNeighbors; j++) { k = m_pEvalNeighbors[j]; if(k >= 0) { pVectorNeighbor = m_pInstances->GetVector(k); d = m_pEvalDistances[j]; d = 1 / (d + .00001); pScores[(int)pVectorNeighbor[index]] += d; } } pVector[index] = (double)GVector::FindMax(pScores, nValueCount); } }}void GKNN::InterpolateLearner(double* pVector){ GAssert(m_pLearner, "no learner is set"); m_pLearner->Reset(); int i, nNeighbor; GArffData data(m_nNeighbors); for(i = 0; i < m_nNeighbors; i++) { nNeighbor = m_pEvalNeighbors[i]; if(nNeighbor >= 0) data.AddVector(m_pInstances->GetVector(nNeighbor)); } m_pLearner->Train(&data); data.DropAllVectors(); m_pLearner->Eval(pVector);}// virtualvoid GKNN::Eval(double* pVector){ FindNeighbors(pVector); switch(m_eInterpolationMethod) { case Linear: InterpolateLinear(pVector); break; case Mean: InterpolateMean(pVector); break; case Learner: InterpolateLearner(pVector); break; default: GAssert(false, "unexpected enumeration"); break; }}// virtualvoid GKNN::Reset(){ if(m_pInstances) { if(!m_bCopyInstances) m_pInstances->DropAllVectors(); delete(m_pInstances); } m_pInstances = new GArffData(1024); delete[] m_pScaleFactors; delete[] m_pEvalVector; int nInputs = m_pRelation->GetInputCount(); m_pScaleFactors = new double[nInputs]; m_pEvalVector = new double[m_pRelation->GetAttributeCount()]; int n; for(n = 0; n < nInputs; n++) m_pScaleFactors[n] = 1; delete(m_pNeighborFinder); m_pNeighborFinder = NULL; delete[] m_pEvalNeighbors; delete[] m_pEvalDistances; m_pEvalNeighbors = new int[m_nNeighbors]; m_pEvalDistances = new double[m_nNeighbors];}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?