📄 lmtsregor.m
字号:
function [LMSout, blms, Rsq, error1]=LMTSregor(y, X, max_fits, max_points)
% % LMTSregor: Estimates the best fit line through tthe origin using a random trimming and least median squares
% %
% % Syntax;
% %
% % [LMSout,blms,Rsq]=LMTSregor(y, X, max_fits, max_points);
% %
% % **********************************************************************
% %
% % Description
% %
% % Least Median Trimmed Squares Through the Origin
% %
% % This program is a modification of LMSregor. It has been modified to
% % trim the input data sets and trim the number of combinations of
% % line fits that are processed. The trimming allows the program to
% % accomodate large data sets.
% %
% % This program performs the Least Median Trimmed Squares Robust
% % Regression thorugh the origin for simple or multiple columns of
% % data and outputs the regression parameters.
% %
% % Breakdown has been observed to occur at 50%; however, the breakdown
% % point is not known for all problems.
% %
% % **********************************************************************
% %
% % Input Variable Description
% %
% % y is the column vector of the dependent variable.
% %
% % X is the matrix of the independent variable. If it is one dimensional,
% % then it should be a column vector. If X is an empty matrix, then
% % X is assumed to be a column of integers starting from 0.
% %
% % max_fits is the number of best fit pairs of data.
% % The maximum value is 10000.
% % The default value is 1000 or the largest value allowed.
% %
% % max_points is the number of data points for curve fitting.
% % The maximum value is 100000.
% % The default value is 100000 or the largest value allowed.
% %
% % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% %
% % Output Variable Description
% %
% % LMSout is the LMTS estimated values vector.
% %
% % blms is the LMTS [intercept slopes] vector.
% %
% % Rsq is the R-squared regression coefficient error estimate of the fit.
% %
% % error1 is 1 if there is an error otherwise it is 0.
% %
% % **********************************************************************
% %
% % This program is originally the work of
% %
% % Alexandros Leontitsis
% % Institute of Mathematics and Statistics
% % University of Kent at Canterbury
% % Canterbury
% % Kent, CT2 7NF
% % U.K.
% %
% % University e-mail: al10@ukc.ac.uk (until December 2002)
% % Lifetime e-mail: leoaleq@yahoo.com
% % Homepage: http://www.geocities.com/CapeCanaveral/Lab/1421
% %
% % Sep 3, 2001.
% %
% % **********************************************************************
% %
% % Reference:
% % Rousseeuw PJ, Leroy AM (1987):
% % Robust regression and outlier detection. Wiley.
% %
% % **********************************************************************
%
% Example='1';
% % Establish an exact solution (xe, ye)
%
% xe=1/100*(1:10000)';
% ye=10*xe;
%
% % Create a noisy data set with an outlier (X, y)
%
% X=1/100*randn(size(xe))+xe;
% y=(randn(size(xe)))+10.*(X+randn(size(xe)));
%
% % Perform the robust median trimmed squares linear regression
% max_fits=1000;
% max_points=5000;
%
% % Outlier data points form a line with opposite slope
% % randomly select pcnt of the data points to be outliers
% pcnt=45;
% [ndraw]=rand_int(1, length(xe), pcnt/100*length(X), 1, 1);
% X(ndraw)=1/100*randn(size(ndraw))+1/100*ndraw;
% y(ndraw)=-(randn(size(ndraw)))-10.*(X(ndraw)+randn(size(ndraw)));
%
% [LMSout,blms,Rsq]=LMTSregor(y, X, max_fits, max_points);
% % plot the robust solution
% xr=xe;
% yr=polyval([blms(1) 0], xr);
%
% % Perform the typical regression solution
% xp=xr;
% p=polyfit(X, y, 1);
% yp=polyval(p, xp);
%
% figure(1); plot(X, y, 'linestyle', 'none', 'marker', '.', 'markersize', 3, 'markeredgecolor', 'k');
% hold on; plot(xe, ye, 'g', 'linewidth', 1);
% plot(xr, yr, 'r', 'linewidth', 1);
% plot(xp, yp, 'b', 'linewidth', 1);
% legend({'Scattered Data', 'Exact Solution', 'Robust Solution', 'Regular Regression'});
% xlim([1 100]);
% title({[num2str(100-pcnt), '% of the data are good'], [num2str(pcnt), '% of the data are outliers']}, 'fontsize', 20);
% xlabel('x-axis', 'fontsize', 18);
% ylabel('y-axis', 'fontsize', 18);
% set(gca, 'fontsize', 14);
%
% % **********************************************************************
% %
% % This program was modified by Edward L. Zechmann
% %
% % date 1 February 2008 Updated comments
% % added rand_int code to randomly select
% % data points
% %
% % modified 11 February 2008 Trimmed the input data arrays
% % updated comments
% %
% % modified 14 February 2008 Trimmed the input data arrays
% % updated comments.
% % Improved the error handling and default
% % values.
% %
% % modified 2 December 2008 Updated Comments.
% % Simplified code.
% %
% %
% %
% % **********************************************************************
% %
% % Feel free to modify this code.
% %
% % See also: LMTSreg, LMSreg, LMTSregor, LMSregor
% %
% set the flag to null
% set the error to no error
flag=0;
error1=0;
if (nargin < 1 || isempty(y)) || ~isnumeric(y)
warning('Not enough input arguments is empty or not numeric. Return empty array.');
flag=1;
error1=1;
n=1;
y=1;
else
% y must be a column vector
y=y(:);
% n is the length of the data set
n=length(y);
end
if nargin < 2 || isempty(X) || ~isnumeric(X)
% if X is omitted give it the values 1:n
X=(1:n)';
else
% X must be a 2-dimensional matrix
% With the data along the columns.
[mx, nx]=size(X);
if nx > mx
X=X';
end
if ndims(X) > 2
warning('Invalid data set X. Return empty array.');
flag=1;
error1=1;
end
if n~=size(X,1)
warning('The rows of X and y must have the same length');
flag=1;
error1=1;
end
end
% Calculate the output
if isequal(flag, 1)
LMSout=[];
blms=[];
Rsq=[];
else
LMSout=1;
blms=1;
Rsq=1;
error1=1;
pp=size(X,2);
% If not input, set the maximum number of fits
if nargin < 3 || isempty(max_fits) || ~isnumeric(max_fits)
% default value of max_fits is 1000
max_fits=min([10000, nchoosek(n, pp)]);
end
% make sure that max_fits does not exceed 10000
max_fits=min( [max_fits, nchoosek(n, pp), 10000]);
% If max_points is not an input, set the maximum number of points
% for the input arrays X and y to a reasonable value.
if nargin < 4 || isempty(max_points) || ~isnumeric(max_points)
max_points=max([min([n, 100000]), max_fits*pp]);
end
if max_points < max_fits
max_points=max_fits;
end
% Program Modified Here
% input data is trimmed
% best fit combinations are trimmed
[C, y, X, n, p]=LMS_trim(y, X, max_fits, max_points, 2);
% The "half" of the data points
h=floor(n/2)+floor((p+1)/2);
% Initialize the rmin parameter
rmin=Inf;
for i=1:size(C,1)
for j=1:p
A(j,:)=X(C(i,j),:);
b(j,1)=y(C(i,j));
end
if rank(A')==p
% Calculate the slopes
c=inv(A'*A)*A'*b;
% There is no intercept, so the estimation is straightforward
est=X*c;
r=y-est;
r2=r.^2;
r2=sort(r2);
rlms=r2(h);
if rlms<rmin
rmin=rlms;
blms=c;
LMSout=est;
% Chapter 2, eq. 3.12
Rsq=1-(median(abs(r))/median(abs(y)))^2;
end
end
end
end
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -