📄 ecmnmle.m
字号:
function [Mean, Covar] = ecmnmle(Data, InitMethod, MaxIter, Tolerance, Mean0, Covar0)
%ECMNMLE Estimate mean and covariance of incomplete multivariate normal data.
% Use the expectation conditional maximization (ECM) algorithm to estimate
% NUMSERIES x 1 mean column vector Mean and NUMSERIES x NUMSERIES covariance
% matrix Covar from Data, where Data has NUMSAMPLES independent
% identically-distributed samples of NUMSERIES random variables assumed to be
% multivariate normal with missing data indicated with NaNs. If no output
% arguments, display a plot of convergence (or non-convergence) of algorithm
% to a maximum likelihood estimate.
%
% [Mean, Covar] = ecmnmle(Data);
% [Mean, Covar] = ecmnmle(Data, InitMethod);
% [Mean, Covar] = ecmnmle(Data, InitMethod, MaxIter);
% [Mean, Covar] = ecmnmle(Data, InitMethod, MaxIter);
% [Mean, Covar] = ecmnmle(Data, InitMethod, MaxIter, Tolerance);
% [Mean, Covar] = ecmnmle(Data, InitMethod, MaxIter, Tolerance, Mean0, Covar0);
%
% ecmnmle(Data);
% ecmnmle(Data, InitMethod);
% ecmnmle(Data, InitMethod, MaxIter);
% ecmnmle(Data, InitMethod, MaxIter, Tolerance);
% ecmnmle(Data, InitMethod, MaxIter, Tolerance, Mean0, Covar0);
%
% Inputs:
% Data - NUMSAMPLES x NUMSERIES matrix with NUMSAMPLES samples of a
% NUMSERIES-dimensional random vector. Missing values are indicated
% by NaNs and are assumed to be "missing-at-random." Data is the only
% required argument.
%
% Optional Inputs:
% InitMethod - String to identify one of three initialization methods to
% obtain initial estimates for the mean and covariance of the data. The
% default method is 'nanskip'. If Mean0 and Covar0 have been supplied, then
% the initialization method, even if explicitly specified, is not executed.
% The initialization methods are:
% 'nanskip' - (default) Skip all records with NaNs.
% 'twostage' - Estimate mean, fill NaNs with mean, then estimate covar.
% 'diagonal' - Form a diagonal covar.
% MaxIter - Maximum number of iterations for ECM algorithm (default value is 50).
% Tolerance - Convergence tolerance for ECM algorithm (default value is
% sqrt(eps) which is about 1.0e-8 for double precision). If Tolerance <= 0,
% then do maximum number of iterations specified by MaxIter and do not
% evaluate the objective function at each step (unless no output
% arguments, which triggers plot of objective function).
% Mean0 - Initial user-supplied NUMSERIES x 1 column vector estimate for
% mean (overrides InitMethod). If Mean0 is specified, then Covar0 must also
% be specified.
% Covar0 - Initial user-supplied NUMSERIES x NUMSERIES matrix estimate for
% covariance (overrides InitMethod), where the input matrix must be
% positive-definite. If Covar0 is specified, then Mean0 must also be
% specified.
%
% Outputs:
% Mean - NUMSERIES x 1 column vector of maximum likelihood parameter estimates
% for the mean of the data using the ECM algorithm.
% Covar - NUMSERIES x NUMSERIES matrix of maximum likelihood covariance
% estimates for the covariance of the data using the ECM algorithm.
%
% Note: The ECM algorithm does not work for every possible pattern of missing
% values. The ECM algorithm will work in most cases but can fail to converge
% if the covariance becomes singular. If this occurs, plots of the
% log-likelihood function tend to have a constant upward slope over many
% iterations as the log of the negative determinant of the covariance goes to
% zero. In some cases, the objective will actually wander around due to machine
% precision errors. No general theory of missingness patterns exists to
% determine these cases (an example of a known failure mode occurs if two time
% series are proportional).
%
% References:
% [1] Roderick J. A. Little and Donald B. Rubin, Statistical Analysis with
% Missing Data, 2nd ed., John Wiley & Sons, Inc., 2002.
% [2] Xiao-Li Meng and Donald B. Rubin, "Maximum Likelihood Estimation via
% the ECM Algorithm," Biometrika, Vol. 80, No. 2, 1993, pp. 267-278.
% [3] Joe Sexton and Anders Rygh Swensen, "ECM Algorithms that Converge at
% the Rate of EM," Biometrika, Vol. 87, No. 3, 2000, pp. 651-662.
% [4] A. P. Dempster, N.M. Laird, and D. B. Rubin, "Maximum Likelihood from
% Incomplete Data via the EM Algorithm," Journal of the Royal Statistical
% Society, Series B, Vol. 39, No. 1, 1977, pp. 1-37.
%
% See also: ecmninit, ecmnobj, ecmnstd, ecmnfish, ecmnhess
% Author(s): R.Taylor, 4-18-2005
% Copyright 2005 The MathWorks, Inc.
% $Revision: $ $Date: $
% Step 1 - check arguments
if nargin < 6
Covar0 = [];
end
if nargin < 5
Mean0 = [];
end
if nargin < 4 || isempty(Tolerance)
Tolerance = sqrt(eps);
end
if nargin < 3 || isempty(MaxIter)
MaxIter = 50;
elseif MaxIter < 1
MaxIter = 1;
end
if nargin < 2 || isempty(InitMethod)
InitMethod = 'NANSKIP';
else
InitMethod = upper(InitMethod);
if ~any(strcmp(InitMethod,{'NANSKIP','TWOSTAGE','DIAGONAL'}))
warning('Finance:ecmnmle:UnknownInitMethodString', ...
'Unknown InitMethod string. Will use default NANSKIP.');
InitMethod = 'NANSKIP';
end
end
if nargin < 1 || isempty(Data)
error('Finance:ecmnmle:MissingInputArg', ...
'The required input argument Data is missing or empty.');
end
% Step 2 - initialization
[NumSamples, NumSeries] = size(Data);
DOF = NumSamples - NumSeries - sum(all(isnan(Data),2));
if DOF <= 0
error('Finance:ecmnmle:InsufficientData', ...
'Insufficient number of samples to estimate parameters.');
end
NaNCols = sum(isnan(Data),1);
if sum(NaNCols) == 0 % no NaNs in data so just do usual estimation
Mean = mean(Data)';
Covar = cov(Data,1);
return
end
if any(NaNCols > (NumSamples - 2))
error('Finance:ecmnmle:TooManyNaNs', ...
'One or more data series has either all NaNs or insufficient non-NaN values.');
end
if sum(sum(isinf(Data)))
error('Finance:ecmnmle:InfiniteValue', ...
'One or more infinite values found in data.');
end
if isempty(Mean0) || isempty(Covar0)
[Mean0, Covar0] = ecmninit(Data,InitMethod);
else
Mean0 = Mean0(:);
if ~all(size(Mean0) == [NumSeries, 1])
error('Finance:ecmnmle:IncompatibleInitMean', ...
'The initial mean vector Mean0 has wrong dimensions.');
end
if ~all(size(Covar0) == [NumSeries, NumSeries])
error('Finance:ecmnmle:IncompatibleInitCovar', ...
'The initial covariance matrix Covar0 has wrong dimensions.');
end
end
Mean = Mean0;
Covar = Covar0;
if (Tolerance > 0) || ((Tolerance <= 0) && (nargout < 1))
Objective = ecmnobj(Data,Mean,Covar);
end
Obj = [];
% Step 3 - main loop
for Iteration = 1:MaxIter
Mean0 = Mean;
Covar0 = Covar;
% Step 4 - mean expectation and conditional maximization
Z = zeros(NumSeries,1);
Mean = zeros(NumSeries,1);
WarnState = warning('off','MATLAB:nearlySingularMatrix');
Count = 0; % counts non-empty records
for i = 1:NumSamples
% expectation step
[mX, mY, CXX, CXY, CYY] = ecmnpart(Data(i,:),Mean0,Covar0);
if ~isempty(mY)
if isempty(mX)
Z = Data(i,:)';
else
P = isnan(Data(i,:));
Q = ~P;
Y = Data(i,Q)';
Z(P) = mX + CXY * (CYY \ (Y - mY));
Z(Q) = Y;
end
% conditional maximization step
Count = Count + 1;
Mean = Mean + Z;
end
end
warning(WarnState);
Mean = (1.0/Count) .* Mean;
% Step 5 - covariance expectation and conditional maximization
Z = zeros(NumSeries,1);
Covar = zeros(NumSeries,NumSeries);
WarnState = warning('off','MATLAB:nearlySingularMatrix');
Count = 0;
for i = 1:NumSamples
CovAdj = zeros(NumSeries,NumSeries);
% expectation step
[mX, mY, CXX, CXY, CYY] = ecmnpart(Data(i,:),Mean,Covar0);
if ~isempty(mY)
if isempty(mX)
Z = Data(i,:)';
else
P = isnan(Data(i,:));
Q = ~P;
Y = Data(i,Q)';
Z(P) = mX + CXY * (CYY \ (Y - mY));
Z(Q) = Y;
CovAdj(P,P) = CXX - CXY * inv(CYY) * CXY';
end
% conditional maximization step
Count = Count + 1;
Covar = Covar + (Z - Mean) * (Z - Mean)' + CovAdj;
end
end
warning(WarnState);
Covar = (0.5/Count) .* (Covar + Covar');
% Step 6 - evaluate objective and test for convergence
if (Tolerance > 0) || ((Tolerance <= 0) && (nargout < 1))
[CholCovar, CholState] = chol(Covar);
if CholState > 0
error('Finance:ecmnmle:NonPosDefCovar', ...
'Full covariance not positive-definite in iteration %d.',Iteration);
end
Objective = ecmnobj(Data,Mean,Covar,CholCovar);
Obj = [Obj; Objective];
if Iteration > 1
TestObj = Objective - Objective0;
TestMean = norm(Mean - Mean0)/sqrt(NumSeries);
if ((TestObj >= 0.0) && (TestObj < Tolerance)) || (TestMean < Tolerance)
break
end
end
Objective0 = Objective;
Mean0 = Mean;
if Iteration == MaxIter
warning('Finance:ecmnmle:EarlyConvergence', ...
'Maximum iterations completed, convergence criterion not satisfied ...');
end
end
end
% Step 7 - plot objectives if in display mode (no output args)
if ~nargout
figure(gcf);
plot(Obj,'-bo','MarkerFaceColor','b','MarkerSize',3);
title('\bfProgress of ECM Algorithm in ecmnmle');
xlabel('\bfIteration');
ylabel('\bfLog-Likelihood');
end
function [mX, mY, CXX, CXY, CYY] = ecmnpart(z, m, C)
%ECMNPART Partition moments according to missingness pattern
%
% Inputs:
% z - observation with possible NaN values in it
% m - current mean estimate of data
% C - current covariance estimate of data
%
% Outputs:
% mX - components of m associated with missing values in z
% mY - components of m associated with observed values in z
% CXX - components of C associated with paired missing values in z
% CXY - components of C associated with missing and observed values in z
% CYY - components of C associated with paired observed values in z
P = isnan(z);
Q = ~P;
mX = m(P);
mY = m(Q);
CXX = C(P,P);
CXY = C(P,Q);
CYY = C(Q,Q);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -