📄 gmanifold.cpp
字号:
} } return pDataOut;}// --------------------------------------------------------------------------GPCA::GPCA(GArffRelation* pRelation, GArffData* pData){ m_pRelation = pRelation; m_pInputData = pData; m_pOutputData = NULL;}GPCA::~GPCA(){ delete(m_pOutputData);}/*static*/ GArffData* GPCA::DoPCA(GArffRelation* pRelation, GArffData* pData, GVector* pOutEigenValues){ GPCA pca(pRelation, pData); pca.DoPCA(pOutEigenValues); return pca.DropOutputData();}void GPCA::DoPCA(GVector* pOutEigenValues){ // Compute the eigenvectors GMatrix m; m_pInputData->ComputeCovarianceMatrix(&m, m_pRelation); GMatrix eigenVectors; m.ComputeEigenVectors(pOutEigenValues, &eigenVectors); m_pOutputData = new GArffData(m_pInputData->GetSize()); int nRowCount = m_pInputData->GetSize(); int nInputCount = m_pRelation->GetInputCount(); int nOutputCount = m_pRelation->GetOutputCount(); int nAttributeCount = m_pRelation->GetAttributeCount(); double* pInputRow; double* pOutputRow; int n, i, j, nIndex; // Allocate space for the output for(n = 0; n < nRowCount; n++) { pOutputRow = new double[nAttributeCount]; m_pOutputData->AddVector(pOutputRow); } // Compute the output Holder<double*> hEigenVector(new double[nInputCount]); double* pEigenVector = hEigenVector.Get(); Holder<double*> hInputVector(new double[nInputCount]); double* pInputVector = hInputVector.Get(); for(i = 0; i < nInputCount; i++) { nIndex = m_pRelation->GetInputIndex(i); for(n = 0; n < nInputCount; n++) pEigenVector[n] = eigenVectors.Get(i, n); for(n = 0; n < nRowCount; n++) { pInputRow = m_pInputData->GetVector(n); for(j = 0; j < nInputCount; j++) pInputVector[j] = pInputRow[m_pRelation->GetInputIndex(j)]; pOutputRow = m_pOutputData->GetVector(n); pOutputRow[nIndex] = GVector::ComputeDotProduct(pInputVector, pEigenVector, nInputCount); } } for(i = 0; i < nOutputCount; i++) { for(n = 0; n < nRowCount; n++) { nIndex = m_pRelation->GetOutputIndex(i); pInputRow = m_pInputData->GetVector(n); pOutputRow = m_pOutputData->GetVector(n); pOutputRow[nIndex] = pInputRow[nIndex]; } }}GArffData* GPCA::DropOutputData(){ GArffData* pData = m_pOutputData; m_pOutputData = NULL; return pData;}// --------------------------------------------------------------------------GLLE::GLLE(GArffRelation* pRelation, GArffData* pData, int nNeighbors){ m_pRelation = pRelation; m_pInputData = pData; m_nNeighbors = nNeighbors; m_pNeighbors = NULL; m_pWeights = NULL; m_pOutputData = NULL;}GLLE::~GLLE(){ delete(m_pNeighbors); delete(m_pWeights); delete(m_pOutputData);}GArffData* GLLE::DropOutputData(){ GArffData* pData = m_pOutputData; m_pOutputData = NULL; return pData;}/*static*/ GArffData* GLLE::DoLLE(GArffRelation* pRelation, GArffData* pData, int nNeighbors){ GLLE lle(pRelation, pData, nNeighbors); lle.FindNeighbors(); lle.ComputeWeights(); lle.ComputeEmbedding(); return lle.DropOutputData();}void GLLE::FindNeighbors(){ // Allocate space for the neighbors delete(m_pNeighbors); m_pNeighbors = new int[m_nNeighbors * m_pInputData->GetSize()]; // For now we'll just brute force it--todo: use a better neighbor finding technique GTEMPBUF(double, pBestDistances, m_nNeighbors); int nRowCount = m_pInputData->GetSize(); double* pRow1; double* pRow2; double d; int i, j, k, nWorstBestDistance; for(i = 0; i < nRowCount; i++) { // Reset the best distances for(j = 0; j < m_nNeighbors; j++) pBestDistances[j] = 1e100; nWorstBestDistance = 0; // Check every other point pRow1 = m_pInputData->GetVector(i); for(j = 0; j < nRowCount; j++) { if(j == i) continue; pRow2 = m_pInputData->GetVector(j); d = m_pRelation->ComputeInputDistanceSquared(pRow1, pRow2); if(d < pBestDistances[nWorstBestDistance]) { pBestDistances[nWorstBestDistance] = d; m_pNeighbors[i * m_nNeighbors + nWorstBestDistance] = j; // Find new worst of the best distances nWorstBestDistance = 0; for(k = 1; k < m_nNeighbors; k++) { if(pBestDistances[k] > pBestDistances[nWorstBestDistance]) nWorstBestDistance = k; } } } GAssert(pBestDistances[nWorstBestDistance] < 1e99, "Failed to find enough neighbors"); }//for(i = 0; i < m_nNeighbors * m_pInputData->GetSize(); i++)//printf("%d\n", m_pNeighbors[i]);}void GLLE::ComputeWeights(){ int nDimensions = m_pRelation->GetInputCount(); int nRowCount = m_pInputData->GetSize(); GTEMPBUF(double, pVec, m_nNeighbors); m_pWeights = new GMatrix(nRowCount, nRowCount); // todo: this should be a sparse matrix double* pRowNeighbor; double* pRow; double dSum; int n, i, j, nIndex; for(n = 0; n < nRowCount; n++) { // Create a matrix of all the neighbors normalized around the origin GMatrix z(m_nNeighbors, nDimensions); pRow = m_pInputData->GetVector(n); for(i = 0; i < m_nNeighbors; i++) { pRowNeighbor = m_pInputData->GetVector(m_pNeighbors[n * m_nNeighbors + i]); for(j = 0; j < nDimensions; j++) { nIndex = m_pRelation->GetInputIndex(j); z.Set(i, j, pRowNeighbor[nIndex] - pRow[nIndex]); } } // Compute local covariance (sort of, it's actually relative to the original point, not the mean, so it's not really true covariance) GMatrix transpose; transpose.Copy(&z); transpose.Transpose(); GMatrix covariance; covariance.Multiply(&z, &transpose); GAssert(covariance.GetRowCount() == covariance.GetColumnCount(), "expected a square matrix"); GAssert(covariance.GetRowCount() == m_nNeighbors, "unexpected size"); // if the number of neighbors is more than the number of dimensions then the covariance will not be full rank so we need to regularize it if(m_nNeighbors > nDimensions) { double dReg = covariance.ComputeTrace() * .001; int nRows = covariance.GetRowCount(); for(i = 0; i < nRows; i++) covariance.Set(i, i, covariance.Get(i, i) + dReg); } // Compute the weights for(i = 0; i < m_nNeighbors; i++) pVec[i] = 1; covariance.Solve(pVec); // Normalize the weights (so they sum to one) dSum = 0; for(i = 0; i < m_nNeighbors; i++) dSum += pVec[i]; for(i = 0; i < nRowCount; i++) m_pWeights->Set(n, i, 0); for(i = 0; i < m_nNeighbors; i++) m_pWeights->Set(n, m_pNeighbors[n * m_nNeighbors + i], pVec[i] / dSum); }/* printf("Weights:\n"); m_pWeights->Print(); printf("\n");*/}void GLLE::ComputeEmbedding(){ // Subtract the weights from the identity matrix int row, col; int nRowCount = m_pInputData->GetSize(); for(row = 0; row < nRowCount; row++) { for(col = 0; col < nRowCount; col++) m_pWeights->Set(row, col, -m_pWeights->Get(row, col)); } for(row = 0; row < nRowCount; row++) m_pWeights->Set(row, row, m_pWeights->Get(row, row) + 1); // Compute the cost matrix GMatrix transposed; transposed.Copy(m_pWeights); transposed.Transpose(); GMatrix m; m.Multiply(&transposed, m_pWeights);// printf("Cost Matrix:\n");// m.Print();// printf("%f\t%f\n%f\t%f\n", m.Get(0, 0), m.Get(0, m.GetColumnCount() - 1), m.Get(m.GetSize() - 1, 0), m.Get(m.GetSize() - 1, m.GetColumnCount()));// printf("\n"); // Compute eigen vectors GMatrix mEigenVectors; GVector vEigenValues;/*printf("Cost Matrix\n");m.PrintCorners(2);*/ m.ComputeEigenVectors(&vEigenValues, &mEigenVectors);//printf("Eigen Vectors\n");//mEigenVectors.PrintCorners(2);//mEigenVectors.Print();/* // Print the eigen values and corresponding vectors printf("Eigen values and vectors\n"); for(row = 0; row < nRowCount; row++) { printf("%f\n", vEigenValues.Get(row)); //for(col = 0; col < nRowCount; col++) // printf("\t%f\n", mEigenVectors.Get(row, col)); }*/ // Allocate space for the output data m_pOutputData = new GArffData(nRowCount); int nAttributes = m_pRelation->GetAttributeCount(); int nInputs = m_pRelation->GetInputCount(); for(row = 0; row < nRowCount; row++) m_pOutputData->AddVector(new double[nAttributes]); // Compute the transformed data by dividing the eigen vectors by the square root of the eigen values double d; int nColumns = nInputs; //GAssert(nColumns < nRowCount, "Not enough data to compute values"); int nEigen = 1;//nRowCount - 2; double* pInputRow; double* pOutputRow; int nIndex; d = sqrt((double)nRowCount); for(col = 0; col < nColumns; col++) {/* if(vEigenValues.Get(nEigen) >= 0) d = 1 / sqrt(vEigenValues.Get(nEigen)); else d = 1 / (-sqrt(-vEigenValues.Get(nEigen)));*/ nIndex = m_pRelation->GetInputIndex(col); for(row = 0; row < nRowCount; row++) { pOutputRow = m_pOutputData->GetVector(row); pOutputRow[nIndex] = mEigenVectors.Get(nEigen, row) * d; } nEigen++;//--; } int nOutputs = m_pRelation->GetOutputCount(); for(col = 0; col < nOutputs; col++) { nIndex = m_pRelation->GetOutputIndex(col); for(row = 0; row < nRowCount; row++) { pInputRow = m_pInputData->GetVector(row); pOutputRow = m_pOutputData->GetVector(row); pOutputRow[nIndex] = pInputRow[nIndex]; } }/* // Print the final data printf("Final data:\n"); for(row = 0; row < nRowCount; row++) { pOutputRow = m_pOutputData->GetVector(row); printf("%f", pOutputRow[0]); for(col = 1; col < nInputDimensions; col++) printf("\t%f", pOutputRow[col]); printf("\n"); }*/}// --------------------------------------------------------------------------GManifoldPumper::GManifoldPumper(GArffRelation* pInputRelation, int nNewDimensions, int nManifoldMapNeighbors, int nManifoldSculptingNeighbors) : GSupervisedLearner(MakePumpedRelation(pInputRelation, nNewDimensions)){ m_pLearner = NULL; m_nManifoldSculptingNeighbors = nManifoldSculptingNeighbors; m_pInputRelation = pInputRelation; m_nNewDimensions = nNewDimensions; m_pVector = new double[m_pRelation->GetAttributeCount()]; m_bOwnLearner = false; // Make the manifold map m_pMapRelation = new GArffRelation(); int i; for(i = 0; i < pInputRelation->GetInputCount(); i++) { //GAssert(pInputRelation->GetAttribute(pInputRelation->GetInputIndex(i))->IsContinuous() == true, "GManifoldPumper only supports continuous input values"); m_pMapRelation->AddAttribute(new GArffAttribute(true, 0, NULL)); } for(i = 0; i < nNewDimensions; i++) m_pMapRelation->AddAttribute(new GArffAttribute(false, 0, NULL)); m_pManifoldMap = new GKNN(m_pMapRelation, nManifoldMapNeighbors);}GManifoldPumper::~GManifoldPumper(){ if(m_bOwnLearner) delete(m_pLearner); delete(m_pManifoldMap); delete(m_pMapRelation); delete[] m_pVector;}GArffRelation* GManifoldPumper::MakePumpedRelation(GArffRelation* pInputRelation, int nNewDimensions){ int i; GArffRelation* pPumpedRelation = new GArffRelation(); for(i = 0; i < pInputRelation->GetInputCount(); i++) pPumpedRelation->AddAttribute(new GArffAttribute(true, 0, NULL)); for(i = 0; i < nNewDimensions; i++) pPumpedRelation->AddAttribute(new GArffAttribute(true, 0, NULL)); for(i = 0; i < pInputRelation->GetOutputCount(); i++) pPumpedRelation->AddAttribute(new GArffAttribute(false, 0, NULL)); return pPumpedRelation;}void GManifoldPumper::SetLearner(GSupervisedLearner* pLearner, bool bOwn){ GAssert(pLearner->GetRelation() == GetRelation(), "The learner must be constructed with the relation obtained by calling GetRelation on this GManifoldPumper object"); m_pLearner = pLearner; m_bOwnLearner = bOwn;}// virtualvoid GManifoldPumper::Train(GArffData* pData){ GAssert(m_pLearner, "SetLearner must be called before Train is called"); // Use Manifold Sculpting to reduce the dimensionality of the data GArffData* pTransformedData = GManifoldSculpting::DoManifoldSculpting(GManifoldSculpting::PP_NONE /*PP_LLE*/, m_pInputRelation, pData, m_nNewDimensions, m_nManifoldSculptingNeighbors, 10, 100); GAssert(pTransformedData->GetSize() == pData->GetSize(), "transformed data isn't consistent"); // Train the manifold map int i, j; double* pInputVector; double* pTransformedVector; GTEMPBUF(double, pVector, m_pMapRelation->GetAttributeCount()); for(i = 0; i < pData->GetSize(); i++) { pInputVector = pData->GetVector(i); pTransformedVector = pTransformedData->GetVector(i); for(j = 0; j < m_pMapRelation->GetInputCount(); j++) pVector[j] = pInputVector[m_pInputRelation->GetInputIndex(j)]; for(j = 0; j < m_pMapRelation->GetOutputCount(); j++) pVector[m_pMapRelation->GetInputCount() + j] = pTransformedVector[m_pInputRelation->GetInputIndex(j)]; m_pManifoldMap->AddVector(pVector); } // Train the learner GArffData* pPumpedData = PumpData(pData); Holder<GArffData*> hPumpedData(pPumpedData); m_pLearner->Train(pPumpedData);}GArffData* GManifoldPumper::PumpData(GArffData* pData){ int i; GArffData* pPumpedData = new GArffData(pData->GetSize()); double* pVector; for(i = 0; i < pData->GetSize(); i++) { pVector = pData->GetVector(i); double* pPumpedVector = new double[m_pRelation->GetAttributeCount()]; PumpVector(pPumpedVector, pVector); pPumpedData->AddVector(pPumpedVector); } return pPumpedData;}void GManifoldPumper::PumpVector(double* pOut, const double* pIn){ int nInputRelationInputs = m_pInputRelation->GetInputCount(); int nInputRelationOutputs = m_pInputRelation->GetOutputCount(); int i; // Copy the original input values for(i = 0; i < nInputRelationInputs; i++) pOut[i] = pIn[m_pInputRelation->GetInputIndex(i)]; // Compute the additional inputs m_pManifoldMap->EvalLinearWeight(pOut); // Copy the output values for(i = 0; i < nInputRelationOutputs; i++) pOut[m_pRelation->GetOutputIndex(i)] = pIn[m_pInputRelation->GetOutputIndex(i)];}// virtualvoid GManifoldPumper::Eval(double* pVector){ PumpVector(m_pVector, pVector); m_pLearner->Eval(pVector);}void GManifoldPumper::EvalPumpedVector(double* pVector){ m_pLearner->Eval(pVector);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -