📄 regem.m
字号:
end end if strmatch('disp', fopts); dispon = options.disp; else dispon = 1; end if strmatch('Xmis0', fopts); Xmis0_given= 1; Xmis0 = options.Xmis0; if any(size(Xmis0) ~= [n,p]) error('OPTIONS.Xmis0 must have the same size as X.') end else Xmis0_given= 0; end if strmatch('C0', fopts); C0_given = 1; C0 = options.C0; if any(size(C0) ~= [p, p]) error('OPTIONS.C0 has size incompatible with X.') end else C0_given = 0; end if strmatch('Xcmp', fopts); Xcmp_given = 1; Xcmp = options.Xcmp; if any(size(Xcmp) ~= [n,p]) error('OPTIONS.Xcmp must have the same size as X.') end sXcmp = std(Xcmp); else Xcmp_given = 0; end % ================= end options ========================= % get indices of missing values and initialize matrix of imputed values indmis = find(isnan(X)); nmis = length(indmis); if nmis == 0 warning('No missing value flags found.') return % no missing values end [jmis,kmis] = ind2sub([n, p], indmis); Xmis = sparse(jmis, kmis, NaN, n, p); % matrix of imputed values Xerr = sparse(jmis, kmis, Inf, n, p); % standard error imputed vals. % for each row of X, assemble the column indices of the available % values and of the missing values kavlr = cell(n,1); kmisr = cell(n,1); for j=1:n kavlr{j} = find(~isnan(X(j,:))); kmisr{j} = find(isnan(X(j,:))); end if dispon disp(sprintf('\nREGEM:')) disp(sprintf('\tPercentage of values missing: %5.2f', nmis/(n*p)*100)) disp(sprintf('\tStagnation tolerance: %9.2e', stagtol)) disp(sprintf('\tMaximum number of iterations: %3i', maxit)) if (inflation ~= 1) disp(sprintf('\tResidual (co-)variance inflation: %6.3f ', inflation)) end if Xmis0_given & C0_given disp(sprintf(['\tInitialization with given imputed values and' ... ' covariance matrix.'])) elseif C0_given disp(sprintf(['\tInitialization with given covariance' ... ' matrix.'])) elseif Xmis0_given disp(sprintf(['\tInitialization with given imputed values.'])) else disp(sprintf('\tInitialization of missing values by mean substitution.')) end switch regress case 'mridge' disp(sprintf('\tOne multiple ridge regression per record:')) disp(sprintf('\t==> one regularization parameter per record.')) case 'iridge' disp(sprintf('\tOne individual ridge regression per missing value:')) disp(sprintf('\t==> one regularization parameter per missing value.')) case 'ttls' disp(sprintf('\tOne total least squares regression per record.')) disp(sprintf('\tFixed truncation parameter: %4i', trunc)) end if h_given disp(sprintf('\tFixed regularization parameter: %9.2e', optreg.regpar)) end if Xcmp_given disp(sprintf(['\n\tIter \tmean(peff) \t|X-Xcmp|/std(Xcmp) ' ... '\t|D(Xmis)|/|Xmis|'])) else disp(sprintf(['\n\tIter \tmean(peff) \t|D(Xmis)| ' ... '\t|D(Xmis)|/|Xmis|'])) end end % initial estimates of missing values if Xmis0_given % substitute given guesses for missing values X(indmis) = Xmis0(indmis); [X, M] = center(X); % center data to mean zero else [X, M] = center(X); % center data to mean zero X(indmis) = zeros(nmis, 1); % fill missing entries with zeros end if C0_given C = C0; else C = X'*X / dofC; % initial estimate of covariance matrix end it = 0; rdXmis = Inf; while (it < maxit & rdXmis > stagtol) it = it + 1; % initialize for this iteration ... CovRes = zeros(p,p); % ... residual covariance matrix peff_ave = 0; % ... average effective number of variables % scale variables to unit variance D = sqrt(diag(C)); const = (abs(D) < eps); % test for constant variables nconst = ~const; if sum(const) ~= 0 % do not scale constant variables D = D .* nconst + 1*const; end X = X ./ repmat(D', n, 1); % correlation matrix C = C ./ repmat(D', p, 1) ./ repmat(D, 1, p); if strmatch(regress, 'ttls') % compute eigendecomposition of correlation matrix [V, d] = peigs(C, neigs); peff_ave = trunc; end for j=1:n % cycle over records pm = length(kmisr{j}); % number of missing values in this record if pm > 0 pa = p - pm; % number of available values in this record % regression of missing variables on available variables switch regress case 'mridge' % one multiple ridge regression per record [B, S, h, peff] = mridge(C(kavlr{j},kavlr{j}), ... C(kmisr{j},kmisr{j}), ... C(kavlr{j},kmisr{j}), n-1, optreg); peff_ave = peff_ave + peff*pm/nmis; % add up eff. number of variables dofS = dofC - peff; % residual degrees of freedom % inflation of residual covariance matrix S = inflation * S; % bias-corrected estimate of standard error in imputed values Xerr(j, kmisr{j}) = dofC/dofS * sqrt(diag(S))'; case 'iridge' % one individual ridge regression per missing value in this record [B, S, h, peff] = iridge(C(kavlr{j},kavlr{j}), ... C(kmisr{j},kmisr{j}), ... C(kavlr{j},kmisr{j}), n-1, optreg); peff_ave = peff_ave + sum(peff)/nmis; % add up eff. number of variables dofS = dofC - peff; % residual degrees of freedom % inflation of residual covariance matrix S = inflation * S; % bias-corrected estimate of standard error in imputed values Xerr(j, kmisr{j}) = ( dofC * sqrt(diag(S)) ./ dofS)'; case 'ttls' % truncated total least squares with fixed truncation parameter [B, S] = pttls(V, d, kavlr{j}, kmisr{j}, trunc); dofS = dofC - trunc; % residual degrees of freedom % inflation of residual covariance matrix S = inflation * S; % bias-corrected estimate of standard error in imputed values Xerr(j, kmisr{j}) = dofC/dofS * sqrt(diag(S))'; end % missing value estimates Xmis(j, kmisr{j}) = X(j, kavlr{j}) * B; % add up contribution from residual covariance matrices CovRes(kmisr{j}, kmisr{j}) = CovRes(kmisr{j}, kmisr{j}) + S; end end % loop over records % rescale variables to original scaling X = X .* repmat(D', n, 1); Xerr = Xerr .* repmat(D', n, 1); Xmis = Xmis .* repmat(D', n, 1); C = C .* repmat(D', p, 1) .* repmat(D, 1, p); CovRes = CovRes .* repmat(D', p, 1) .* repmat(D, 1, p); % rms change of missing values dXmis = norm(Xmis(indmis) - X(indmis)) / sqrt(nmis); % relative change of missing values nXmis_pre = norm(X(indmis) + M(kmis)') / sqrt(nmis); if nXmis_pre < eps rdXmis = Inf; else rdXmis = dXmis / nXmis_pre; end % update data matrix X X(indmis) = Xmis(indmis); % re-center data and update mean [X, Mup] = center(X); % re-center data M = M + Mup; % updated mean vector % update covariance matrix estimate C = (X'*X + CovRes)/dofC; if dispon if Xcmp_given % imputed values in original scaling Xmis(indmis) = X(indmis) + M(kmis)'; % relative error of imputed values (relative to values in Xcmp) dXmis = norm( (Xmis(indmis)-Xcmp(indmis))./sXcmp(kmis)' ) ... / sqrt(nmis); disp(sprintf(' \t%3i \t %8.2e \t %10.3e \t %10.3e', ... it, peff_ave, dXmis, rdXmis)) else disp(sprintf(' \t%3i \t %8.2e \t%9.3e \t %10.3e', ... it, peff_ave, dXmis, rdXmis)) end end % display of diagnostics end % EM iteration % add mean to centered data matrix X = X + repmat(M, n, 1);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -