📄 lms_trim.m
字号:
function [C, y, X, n, p, error1]=LMS_trim(y, X, max_fits, max_points, flag2)
% % LMS_trim: Randomly trims the input data arrays and combinataions of data points.
% %
% % Syntax;
% %
% % [C, y, X, n, p, error1]=LMS_trim(y, X, max_fits, max_points, flag2);
% %
% % **********************************************************************
% %
% % Description
% %
% % This program trims the input data sets and trims the combinations of
% % best fit data pairs. The two trimming operations are performed
% % using a quick random uniform distribution to make all possible
% % combinations more equally probable. The output of this program
% % is used to perform the Least Median Trimmed Squares Robust Regression.
% %
% % **********************************************************************
% %
% % Input Variable Description
% %
% % y is the column vector of the dependent variable.
% %
% % X is the matrix of the independent variable. If it is one dimensional,
% % then it should be a column vector. If X is an empty matrix, then
% % X is assumed to be a column of integers starting from 0.
% %
% % max_fits is the number of best fit pairs of data.
% % The maximum value is 10000.
% % The default value is 1000 or the largest value allowed.
% %
% % max_points is the number of data points for curve fitting.
% % The maximum value is 100000.
% % The default value is 100000 or the largest value allowed.
% %
% % flag2 = 1 regular linear regression (unconstrained)
% % flag2 = 0 linear regression through the origin
% %
% % **********************************************************************
% %
% % Output Variable Description
% %
% % C is the matrix of data pairs for the best fit robust regression.
% %
% % y is the trimmed column vector of the dependent variable.
% %
% % X is the trimmed matrix of the independent variable. If it is one
% % dimensional, then it should be a column vector.
% %
% % n is the number of rows of the trimmed data array.
% %
% % p depends on whether the data is fit through the origin or not.
% % If flag2 is 1 then the data is unconstrained and p is the number
% % of columns of X + 1.
% %
% % If flag2~=1, then p is the number of columns of X.
% %
% % error1 is 1 if there is an error otherwise it is 0.
% %
% % **********************************************************************
%
% Example='1';
% % Establish an exact solution (xe, ye)
% % LMS_trim is used inside LMTSreg
%
% xe=1/100*(1:10000)';
% ye=10+10*xe;
%
% % Create a noisy data set with an outlier (X, y)
%
% X=1/100*randn(size(xe))+xe;
% y=(10+randn(size(xe)))+10.*(X+randn(size(xe)));
%
% % Perform the robust median trimmed squares linear regression
% max_fits=1000;
% max_points=5000;
%
% % Outlier data points form a line with opposite slope
% % randomly select pcnt of the data points to be outliers
% pcnt=45;
% [ndraw]=rand_int(1, length(xe), pcnt/100*length(X), 1, 1);
% X(ndraw)=1/100*randn(size(ndraw))+1/100*ndraw;
% y(ndraw)=-(10+randn(size(ndraw)))-10.*(X(ndraw)+randn(size(ndraw)));
%
% [LMSout,blms,Rsq]=LMTSreg(y, X, max_fits, max_points);
% % plot the robust solution
% xr=xe;
% yr=polyval([blms(2) blms(1)], xr);
%
% % Perform the typical regression solution
% xp=xr;
% p=polyfit(X, y, 1);
% yp=polyval(p, xp);
%
% figure(1); plot(X, y, 'linestyle', 'none', 'marker', '.', 'markersize', 3, 'markeredgecolor', 'k');
% hold on; plot(xe, ye, 'g', 'linewidth', 1);
% plot(xr, yr, 'r', 'linewidth', 1);
% plot(xp, yp, 'b', 'linewidth', 1);
% legend({'Scattered Data', 'Exact Solution', 'Robust Solution', 'Regular Regression'});
% xlim([1 100]);
% title({[num2str(100-pcnt), '% of the data are good'], [num2str(pcnt), '% of the data are outliers']}, 'fontsize', 20);
% xlabel('x-axis', 'fontsize', 18);
% ylabel('y-axis', 'fontsize', 18);
% set(gca, 'fontsize', 14);
%
% % **********************************************************************
% %
% % This program was written by Edward L. Zechmann
% %
% % date 1 February 2008 Updated comments
% %
% % modified 13 February 2008 Updated comments
% % added flag2 to cater to regression
% % through the origin and unconstrained
% % regression
% %
% % modified 14 February 2008 Updated comments
% % Improved the error handling and default
% % values.
% %
% % modified 2 December 2008 Fixed a bug in trimming the input data
% % arrays for the unconstrained case.
% % This fix improves accuracy for data
% % sets with less than 1000 points.
% %
% %
% %
% % **********************************************************************
% %
% % Feel free to modify this code.
% %
% % See also: LMTSreg, LMSreg, LMTSregor, LMSregor
% %
% set the flag to null
% set the error to no error
flag=0;
error1=0;
if (nargin < 1 || isempty(y)) || ~isnumeric(y)
warning('Not enough input arguments is empty or not numeric. Return empty array.');
flag=1;
error1=1;
n=1;
y=1;
else
% y must be a column vector
y=y(:);
% n is the length of the data set
n=length(y);
end
if nargin < 2 || isempty(X) || ~isnumeric(X)
% if X is omitted give it the values 1:n
X=(1:n)';
else
% X must be a 2-dimensional matrix
% With the data along the columns.
[mx, nx]=size(X);
if nx > mx
X=X';
end
if ndims(X) > 2
warning('Invalid data set X. Return empty array.');
flag=1;
error1=1;
end
if n~=size(X,1)
warning('The rows of X and y must have the same length');
flag=1;
error1=1;
end
end
% check the values for flag2
if nargin < 5 || isempty(flag2) || ~isnumeric(flag2)
flag2=1;
end
if ~isequal(flag2, 1)
flag2=2;
end
% pp is the number of parameters to be estimated from the untrimmed input
% data sets
if isequal(flag2, 1)
pp=size(X,2)+1;
else
pp=size(X,2);
end
% If not input, set the maximum number of fits
if nargin < 3 || isempty(max_fits) || ~isnumeric(max_fits)
% default value of max_fits is 1000
max_fits=min([1000, nchoosek(n, pp) ]);
end
% make sure that max_fits does not exceed 10000
max_fits=min( [max_fits, nchoosek(n, pp), 10000]);
% If max_points is not an input, set the maximum number of points
% for the input arrays X and y to a reasonable value.
if nargin < 4 || isempty(max_points) || ~isnumeric(max_points)
max_points=max([min([n, 100000]), max_fits*pp]);
end
if max_points < max_fits
max_points=max_fits;
end
if max_points > n || logical(max_points > max_fits*pp)
max_points=min([n, max_fits*pp]);
end
% For trimming, change the value of n to the maximum number of data points.
n=max_points;
% Trim the arrays of the input data to a total number of data points equal
% to max_points.
[pts]=rand_int(1, length(y), [max_points 1], 0, 1);
% The X and y inputs are trimmed
X=X(pts, :);
y=y(pts, :);
% p is the number of parameters to be estimated for the trimmed data sets
if isequal(flag2, 1)
p=size(X,2)+1;
else
p=size(X,2);
end
% Generate the C matrix of combinations.
if isequal(flag, 1)
C=[];
else
if nchoosek(n, p) <= max_fits
% Regime 1
% All the possible combinations of p with m-dimensional points
C=nchoosek(1:n, p);
elseif floor(n/p) < max_fits
% Regime 2
% some random combinations
[C]=rand_int(1, n, [floor(n/p) p], 0, 1);
else
% Regime 3
% sparse random combinations
if n < max_fits
np=n;
else
np=max_fits;
end
[C]=rand_int(1, n, [np p], 0, 1);
end
end
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -