📄 dualgreedykplstest2.m
字号:
%A script to test greedy KPLS matrix approximation
clear all;
tol = 10^-5;
%[X, y, numExamples, numFeatures] = readCsvData('ionosphere.data');
[X, y, numExamples, numFeatures] = readCsvData('sonar-all.data');
X = normalise(X);
d = data;
d = addDataField(d, 'X', X, 'examples');
T = rank(X);
gKPLSParams.iterations = T;
gKPLSParams.doubleDeflation = 1;
gKPLSParams.dualFeatureDirection = 'dualMaxSparseKernelApprox';
gKPLSParams.X.kernel = getDefaultLinearKernel;
gKPLSParams.Y.name = ''; %Ignore Y
gKPLSParams.normalise = 0;
[subspaceInfo, trainInfo] = dualGeneralFeaturesTrain(d, gKPLSParams);
trainX = getDataFieldValue(d, 'X');
trainK = trainX*trainX';
tau = getDataFieldValue(trainInfo.data, 'X');
s = subspaceInfo.X.s;
r = subspaceInfo.X.r;
residuals = zeros(T, 1);
residuals2 = zeros(T, 1);
residuals3 = zeros(T, 1);
%There are two ways to approximate K (give the same result) - one is K = TQ'+ QT' - TRT'
%and the other is K = T(T'T)^-1T'K + KT(T'T)^-1T - T(T'T)^-1T'KT(T'T)^-1T'
for i=1:T
newTrainK = tau(:, 1:i)*s(:, 1:i)' + s(:, 1:i)*tau(:, 1:i)' - tau(:, 1:i)*diag(r(1:i))*tau(:, 1:i)';
newTrainK2 = tau(:, 1:i)*inv(tau(:, 1:i)'*tau(:, 1:i))*tau(:, 1:i)'*trainK + trainK*tau(:, 1:i)*inv(tau(:, 1:i)'*tau(:, 1:i))*tau(:, 1:i)'- tau(:, 1:i)*inv(tau(:, 1:i)'*tau(:, 1:i))*tau(:, 1:i)'*trainK*tau(:, 1:i)*inv(tau(:, 1:i)'*tau(:, 1:i))*tau(:, 1:i)';
gKPLSParams.iterations = i;
[testInfo, projectionInfo] = dualGeneralFeaturesApprox(d, d, subspaceInfo, gKPLSParams);
residuals(i) = trace(trainK - newTrainK);
residuals2(i) = trace(trainK - newTrainK2);
residuals3(i) = trace(trainK - getDataFieldValue(testInfo.data, 'K'));
end
if norm(residuals - residuals2) > tol
error('Matrix approximation of training set is wrong');
end
if norm(residuals - residuals3) > tol
error('Matrix approximation of training set using dualGeneralFeaturesApprox is wrong');
end
%plot(1:T, residuals, 'k');
%Now get an approximation on a test set and compare against computing it
%manually
[trainData, testData] = splitData2(d, 2/3);
numTrainExamples = getNumDataExamples(trainData);
numTestExamples = getNumDataExamples(testData);
T = 5;
gKPLSParams.iterations = T;
trainResiduals = zeros(T, 1);
trainResiduals2 = zeros(T, 1);
[subspaceInfo, trainInfo] = dualGeneralFeaturesTrain(trainData, gKPLSParams);
[testInfo, projectionInfo] = dualGeneralFeaturesApprox(trainData, trainData, subspaceInfo, gKPLSParams);
[testInfo2, projectionInfo] = dualGeneralFeaturesApprox(trainData, testData, subspaceInfo, gKPLSParams);
trainK = getDataFieldValue(trainData, 'X')*getDataFieldValue(trainData, 'X')';
trainKj = trainK;
b = zeros(numTrainExamples, T);
r = zeros(T, 1);
s = zeros(numTrainExamples, T);
tau = zeros(numTrainExamples, T);
I = eye(numTrainExamples);
%First get an approximation on the training set
for i=1:T
b(:, i) = dualMaxSparseKernelApprox(trainK, trainKj, 0, 0);
tau(:, i) = trainKj*b(:, i);
s(:, i) = trainKj*tau(:, i)/(tau(:, i)'*tau(:, i));
r(i) = s(:, i)'*tau(:, i)/(tau(:, i)'*tau(:, i));
trainKj = (I - tau(:, i)*tau(:, i)'/(tau(:, i)'*tau(:, i))) * trainKj * (I - tau(:, i)*tau(:, i)'/(tau(:, i)'*tau(:, i)));
gKPLSParams.iterations = i;
[testInfo, projectionInfo] = dualGeneralFeaturesApprox(trainData, trainData, subspaceInfo, gKPLSParams);
trainResiduals(i) = trace(trainKj);
trainResiduals2(i) = trace(trainK - getDataFieldValue(testInfo.data, 'K'));
end
if norm(trainResiduals - trainResiduals2) > tol
error('Approximation on training set is wrong');
end
%Now get an approximation on the test set
testTrainK = getDataFieldValue(testData, 'X')*getDataFieldValue(trainData, 'X')';
testTrainKj = testTrainK;
sHat = zeros(numTestExamples, T);
tauHat = zeros(numTestExamples, T);
testResiduals = zeros(T, 1);
testResiduals2 = zeros(T, 1);
for i=1:T
tauHat(:, i) = testTrainKj*b(:, i);
sHat(:, i) = testTrainKj*tau(:, i)/(tau(:, i)'*tau(:, i));
testTrainKj = testTrainKj - tauHat(:, i)*s(:, i)' - sHat(:, i)*tau(:, i)' + tauHat(:, i)*r(i)*tau(:, i)';
gKPLSParams.iterations = i;
[testInfo2, projectionInfo] = dualGeneralFeaturesApprox(trainData, testData, subspaceInfo, gKPLSParams);
testResiduals(i) = trace(testTrainKj);
testResiduals2(i) = trace(testTrainK - getDataFieldValue(testInfo2.data, 'K'));
end
if norm(testResiduals - testResiduals2) > tol
error('Approximation on test set is wrong');
end
%Now, test the approximations in the single deflation case
T = 5;
gKPLSParams.iterations = T;
gKPLSParams.doubleDeflation = 0;
gKPLSParams.dualFeatureDirection = 'dualMaxSparseKernelApprox';
gKPLSParams.X.kernel = getDefaultLinearKernel;
gKPLSParams.Y.name = ''; %Ignore Y
gKPLSParams.normalise = 0;
[subspaceInfo, trainInfo] = dualGeneralFeaturesTrain(trainData, gKPLSParams);
[testInfo, projectionInfo] = dualGeneralFeaturesApprox(trainData, trainData, subspaceInfo, gKPLSParams);
[testInfo2, projectionInfo2] = dualGeneralFeaturesApprox(trainData, testData, subspaceInfo, gKPLSParams);
tau = subspaceInfo.X.tau;
trainK = getDataFieldValue(trainData, 'X')*getDataFieldValue(trainData, 'X')';
newTrainK = tau*inv(tau'*tau)*tau'*trainK*tau*inv(tau'*tau)*tau';
if norm(projectionInfo.X.tauHat - subspaceInfo.X.tau) > tol
error('Tau computed for training set is wrong');
end
if norm(getDataFieldValue(testInfo.data, 'K') - newTrainK) > tol
error('Training projections are wrong for single deflation kernel approximation');
end
%Check kernel is still symmetric
if norm(newTrainK' - newTrainK) > tol
error('Training kernel approximation is not symmetric');
end
%Check that the kernel approximation only exists in the subspace defined by
%tau
if norm(newTrainK - newTrainK*tau*inv(tau'*tau)*tau') > tol
error('Kernel matrix approximation on training set is not in tau subspace');
end
if norm(newTrainK - tau*inv(tau'*tau)*tau'*newTrainK) > tol
error('Kernel matrix approximation on training set is not in tau subspace');
end
%Check approximation on test set
[testInfo3, projectionInfo3] = dualGeneralFeaturesProject(trainData, testData, subspaceInfo, gKPLSParams);
tauHat = getDataFieldValue(testInfo3.data, 'X');
newTestTrainK = tauHat*inv(tau'*tau)*tau'*trainK*tau*inv(tau'*tau)*tau';
if norm(projectionInfo2.X.tauHat - tauHat) > tol
error('Tau computed for test set is wrong');
end
if norm(getDataFieldValue(testInfo2.data, 'K') - newTestTrainK) > tol
error('Test projections are wrong for single deflation kernel approximation');
end
if norm(newTestTrainK - newTestTrainK*tau*inv(tau'*tau)*tau') > tol
error('Kernel matrix approximation on test set is not in tau subspace');
end
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -