📄 gmcv.m
字号:
function ll = gmcv(data, ks, nfold, nrep, sig)
% ll = gmcv(data, ks, nfold, nrep, [sig])
%
% performs n-fold cross validation on data using Gaussian Mixtures
% of complexities supplied in ks.
%
% INPUT:
%
% data(n,d) : data matrix (one data point per ROW)
% ks(nk) : vector specifying models (i.e. ks = [1:10])
% nfold : specifies how to split data. If it is a single
% number, it specifies how many folds to do (i.e. 10 fold cv).
% if it is a vector of length 2:
% nfold(1) : number of restarts of algorithm (different than nrep)
% nfold(2) : size of *training* set (i.e. 0.7)
% nrep : number of random restarts per model per fold
% sig : optional: 0/1 matrix of indicators of what elements to include
% in the sigma matrices.
%
% OUTPUT:
%
% ll(nfold,nk) : set of log-likelihoods. Each column represents a single model.
%
% by Igor Cadez, 10/27/98, revised 01/18/99.
%initialize variables
[n d] = size(data);
nk = length(ks);
if(~exist('sig'))
sig = ones(d);
end
%figure out whether to do folds or random draws...
nr = nfold(1);
if(length(nfold)>1)
foldflag = 0;
omega = nfold(2);
spl = round(n*omega);
else
foldflag = 1;
par = round((0:nr)*n/nr);
ind = randperm(n);
end
ll = zeros(nr, nk);
%the main loop
for(j=1:nr)
ik = 1;
for(i = ks)
if(foldflag)
xtest = data( ind(par(j)+1:par(j+1)) , : );
xtrain = [data( ind(1:par(j)), : ); data( ind(par(j+1)+1:end), : )];
else
ind = randperm(n);
xtrain = data(ind(1:spl),:);
xtest = data(ind((spl+1):end),:);
end
%do some random restarts to get the best insample fit
[alphabest mubest sigmabest lkhbest] = gm(xtrain, i, nrep, sig);
%get the out of sample ll, pack the results
ll(j, ik) = gmscore(xtest,alphabest,mubest,sigmabest);
ik = ik+1; %note: ks might skip some models, that's why ik is needed
end
end
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -