📄 pca_full.m

📁 Matlab package for PCA for datasets with missing values
💻 M
📖 第 1 页 / 共 2 页
字号:
12 下一页
%  PCA_FULL - PCA with unrestricted Gaussian posterior%%  PCA with unrestricted Gaussian pdfs (full covariance matrices) for%  approximating the posterior distributions of A(i,:) and S(:,j) in%  the model X(:,j) = Mu + A*S(:,j) + Noise. The noise is isotropic%  and Gaussian with the variance V.%%  [ A, S, Mu, V, CV, HP, LC ] = PCA_FULL( X, N ) identifies the model%  for the given data matrix X and number of principal components N.%  Matrix X can be either sparse with only observed values included%  (note that observed zeros should be replaced with eps) or a full%  matrix with missing values replaced by NaNs.%%  The function returns the mean values of the model parameters in A,%  S, Mu, and V. The posterior variances are stored in CV such that%  CV.A{j} is the posterior covariance matrix for A(i,:) and%  CV.S{CV.Isv(j)} (or CV.S{j} if CV.Isv is empty) is the same for%  S(:,j). HP contains point estimates of the hyperparameters: HP.Va%  is the prior variance for A(:,k) and HP.Vmu is the same for Mu.%  %  LC is a structure with learning curves (rms training and probing%  errors, cost function values, time).%%  PCA_FULL( X, N, 'algorithm', name ) specifies the algorithm:%   'ppca': Probabilistic PCA (no prior and point estimates for A, Mu)%   'map':  Prior on A(:,k) and Mu, point estimates for A(i,:) and Mu%   'vb':   Variational Bayesian PCA (prior on A and Mu, Gaussian%           posterior approximation for A(i,:) and Mu) {default}%  %  Other optional parameter/value pairs with {default values}:%   init       - Initialization type ({'random'} - random,%                filename: load from file, structure: from given data)%   maxiters   - Maximum number of iterations {1000}%   rotate2pca - Whether to perform rotation of A and S to speed-up%                convergence {1}%   uniquesv   - Whether to compute only unique covariance matrices of%                S(:,j) {1}%   minangle   - Termination by minimum angle between subspaces%                defined by A {1e-8}%   rmsstop    - Termination by rms training error ([] - no rms stop)%                {[ 100 1e-4 1e-3 ]}: 100 iterations, 1e-4 absolute%                tolerance, 1e-3 relative tolerance,%   cfstop     - Termination by cost function value {[]} (similarly to%                rmsstop). The cost function is computed only if this%                option is nonempty%   xprobe     - Validation data set (of the same dimensions as X)%   earlystop  - Whether to use early stopping based on probing error%   verbose    - Progress information display level (0,{1},2)%   display    - Plot progress {0}%   autosave   - Auto-save after each {600} seconds%   filename   - Name of the file for auto-save {'pca_f_autosave'}%%  OUT = PCA_FULL( X, N ) returns all the outputs in a single%  structure variable OUT. Learning can be continued as follows:%    out = pca_full( X, N );%    [ A, S, Mu, V, CV, HP, LC ] = pca_full( X, N, 'init', out );%  This software is provided "as is", without warranty of any kind.%  Alexander Ilin, Tapani Raikofunction [ A, S, Mu, V, cv, hp, lc ] = pca_full( X, ncomp, varargin )opts = struct( ...    'init',          'random',...    'maxiters',      1000,...    'bias',          1,...    'uniquesv',      1,...    'autosave',      600,...    'filename',      'pca_f_autosave',...    'minangle',      1e-8,...    'algorithm',     'vb',...    'earlystop',     0,...    'rmsstop',       [ 100 1e-4 1e-3 ],... % [] means no rms stop criteria    'cfstop',        [],... % [] means no cost stop criteria    'verbose',       1,...    'xprobe',        [],...    'rotate2pca',    1,...    'display',       0 );[ opts, errmsg, wrnmsg ] = argschk( opts, varargin{:} );if ~isempty(errmsg), error( errmsg ), endif ~isempty(wrnmsg), warning( wrnmsg ), endXprobe = opts.xprobe;switch opts.algorithmcase 'ppca'    use_prior = 0;    use_postervar = 0;case 'map'    use_prior = 1;    use_postervar = 0;case 'vb'    use_prior = 1;    use_postervar = 1;otherwise    error( 'Wrong value of the argument ''algorithm''' )end[n1x,n2x] = size(X);[ X, Xprobe, Ir, Ic, opts.init ] = rmempty( X, Xprobe,...                                            opts.init, opts.verbose );[n1,n2] = size(X);if issparse(X)    % X is a sparse matrix with only observed values    M = spones(X);    Mprobe = spones(Xprobe);else    % Missing values are marked as NaNs    M = ~isnan(X);    Mprobe = ~isnan(Xprobe);    X(X==0) = eps;    Xprobe(Xprobe==0) = eps;    X(isnan(X)) = 0;  Xprobe(isnan(Xprobe)) = 0;    endNobs_i = full(sum(M,2));ndata = sum(Nobs_i);nprobe = nnz(Mprobe);if nprobe == 0    Xprobe = [];    opts.earlystop = 0;end    [IX,JX,data] = find(X);clear data% Compute indices Isv: Sv{Isv(j)} gives Sv for j, j=1...n2if opts.uniquesv    [ nobscomb, obscombj, Isv ] = miscomb(M,opts.verbose);else    nobscomb = n2; Isv = []; obscombj = {};end[ A, S, Mu, V, Av, Sv, Muv ] = ...    InitParms( opts.init, n1, n2, ncomp, nobscomb, Isv );if ~use_postervar    % MAP estimation for Mu and A    Muv = [];    Av = {};endif isempty(Mu)    if opts.bias        Mu = sum(X,2) ./ Nobs_i;    else        Mu = zeros(n1,1);    endend[X,Xprobe] = SubtractMu( Mu, X, M, Xprobe, Mprobe, opts.bias );[rms,errMx] = compute_rms( X, A, S, M, ndata );prms = compute_rms( Xprobe, A, S, Mprobe, nprobe );lc.rms = rms; lc.prms = prms; lc.time = 0; lc.cost = NaN;dsph = DisplayInit( opts.display, lc );PrintFirstStep( opts.verbose, rms, prms );Aold = A;if use_prior    Va = ones(1,ncomp); Vmu = 1;else    Va = repmat(inf,1,ncomp); Vmu = inf;end% Parameters of the prior for variance parametershpVa = 0.001; hpVb = 0.001; hpV = 0.001;time_start = clock;time_autosave = time_start;ticfor iter = 1:opts.maxiters    % The prior is not updated at the begining of learning    % to avoid killing sources     if use_prior && iter > 10        % Update Va, Vmu        Va = sum( A.^2, 1 );        if ~use_postervar            Vmu = sum( Mu.^2 );        else            for i = 1:n1                Va = Va + diag(Av{i})';            end            Vmu = sum( Mu.^2 + Muv );        end        Va = (Va + 2*hpVa) / (n1 + 2*hpVb);        Vmu = (Vmu + 2*hpVa) / (n1 + 2*hpVb);    end        if opts.bias        dMu = full( sum(errMx,2) ./ Nobs_i );        if use_postervar            Muv = V ./ ( Nobs_i + V/Vmu );        end        th = 1 ./ ( 1 + V./Nobs_i/Vmu );          Mu_old = Mu;        Mu = th.*( Mu + dMu );        dMu = Mu - Mu_old;        [X,Xprobe] = SubtractMu( dMu, X, M, Xprobe, Mprobe, 1 );    end        % Update S    if isempty(Isv)        for j = 1:n2            %A_j = repmat(full(M(:,j)),1,ncomp) .* A;            A_j = repmat(M(:,j),1,ncomp) .* A;            Psi = A_j' * A_j + diag( repmat(V,1,ncomp) );            if use_postervar                for i = find(M(:,j))'                    Psi = Psi + Av{i};                end            end            invPsi = inv(Psi);            S(:,j) = invPsi * A_j' * X(:,j);            Sv{j} = V * invPsi;            PrintProgress( opts.verbose, j, n2, 'Updating S:' )        end    else        for k = 1:nobscomb            j = obscombj{k}(1);            %A_j = repmat(full(M(:,j)),1,ncomp) .* A;            A_j = repmat(M(:,j),1,ncomp) .* A;            Psi = A_j' * A_j + diag( repmat(V,1,ncomp) );            if use_postervar                for i = find(M(:,j))'                    Psi = Psi + Av{i};                end            end            invPsi = inv(Psi);            Sv{k} = V * invPsi;            tmp = invPsi * A_j';            for j = obscombj{k}                S(:,j) = tmp * X(:,j);            end            %S(:,obscombj{k}) = tmp * X(:,obscombj{k});            PrintProgress( opts.verbose, k, nobscomb, 'Updating S:' )        end    end    if opts.verbose == 2, fprintf('\r'), end        if opts.rotate2pca        [ dMu, A, Av, S, Sv ] = RotateToPCA( ...            A, Av, S, Sv, Isv, obscombj, opts.bias );        if opts.bias,             [X,Xprobe] = SubtractMu( dMu, X, M, Xprobe, Mprobe, opts.bias );            Mu = Mu + dMu;        end    end        % Update A    if opts.verbose == 2        fprintf('                                              \r')    end    for i = 1:n1        %S_i = repmat(full(M(i,:)),ncomp,1) .* S;        S_i = repmat(M(i,:),ncomp,1) .* S;        Phi = S_i * S_i' + diag(V./Va);        for j = find(M(i,:))            if isempty(Isv)                Phi = Phi + Sv{j};            else                 Phi = Phi + Sv{Isv(j)};            end        end        invPhi = inv(Phi);        A(i,:) = X(i,:) * S_i' * invPhi;        if use_postervar            Av{i} = V * invPhi;        end                PrintProgress( opts.verbose, i, n1, 'Updating A:' )    end    if opts.verbose == 2, fprintf('\r'), end        [rms,errMx] = compute_rms( X, A, S, M, ndata );    prms = compute_rms( Xprobe, A, S, Mprobe, nprobe );        % Update V    sXv = 0;    if isempty(Isv)        for r = 1:ndata            sXv = sXv + A(IX(r),:) * Sv{JX(r)} * A(IX(r),:)';            if use_postervar                sXv = sXv + S(:,JX(r))' * Av{IX(r)} * S(:,JX(r)) + ...                    sum( sum( Sv{JX(r)} .* Av{IX(r)} ) ) + ...                    Muv(IX(r));            end        end    else        for r = 1:ndata            sXv = sXv + A(IX(r),:) * Sv{Isv(JX(r))} * A(IX(r),:)';            if use_postervar                sXv = sXv + ...                    S(:,JX(r))' * Av{IX(r)} * S(:,JX(r)) + ...                    sum( sum( Sv{Isv(JX(r))} .* Av{IX(r)} ) ) + ...                    Muv(IX(r));            end        end    end    sXv = sXv + (rms^2)*ndata;        %V = rms^2 + V/ndata;    V = ( sXv + 2*hpV ) / (ndata + 2*hpV);        t = toc;    lc.rms = [ lc.rms rms ]; lc.prms = [ lc.prms prms ];    lc.time = [ lc.time t ];        if ~isempty(opts.cfstop)        cost = cf_full( X, A, S, Mu, V, Av, Sv, Isv, Muv, Va, Vmu,...
12 下一页
💿 文件大小 79 K
👤 上传用户 quanliao77
📂 所属分类 matlab例程
🏷️ 相关标签

#for #datasets #package #missing
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -