📄 gtm_rspg1.m
字号:
% gtm_rspg1 - Modified version of gtm_rspg1 from the GTM toolbox
function llh = gtm_rspg1(beta, D, mode, orient)
%
% Log-likelihood and component responsibilities over a Gaussian mixture
%
% Modified version of gtm_rspg1 from the GTM toolbox
% --------------------------------------------------
% The responsibilities are returned via the global variable
% matrix gtmGlobalR. The responsibility gtmGlobalR(k,n) is
% the probability of a particular component in the Gaussian
% mixture, k, having generated a particular data point, n.
% It is calculated from the distances between the data point
% n and the centres of the mixture components, 1..K, and the
% inverse variance, beta, common to all components.
%
% Synopsis: llh = gtm_rspg(beta, D, mode, orient, gradv)
% llh = gtm_rspg(beta, D)
%
% Arguments: beta - a scalar value of the inverse variance common
% to all components of the mixture.
%
% D - dimensionality of space where the data and
% the Gaussian mixture lives; necessary to
% calculate the correct log-likelihood.
%
% mode - optional argument used to control the mode
% 0 def of calculation; it can be set to 0, 1 or 2
% corresponding to increasingly elaborate
% measure taken to reduce the amount of
% numerical errors; mode = 0 will be fast but
% less accurate, mode = 2 will be slow but
% more accurate; the default mode is 0
% In this modified version, only MODE 0 is guaranteed to
% be free of errors. Other modes have not been modified
%
%
% orient - 0 No orientation
% 1 oriented
%
% Return: llh - the log-likelihood of data under a the Gaussian
% mixture.
%
% Global variables: gtmGlobalR - an K-by-N responsibility matrix;
% gtmGlobalR(k,n) is the responsa-
% bility takened by mixture component
% k for data point n.
%
% gtmGlobalDIST - an K-by-N matrix in which element
% (k,n) is the Euclidean distance
% between the centre of component m
% and the data point n.
%
% gtmGlobalMinDist, gtmGlobalMaxDist -
% vectors containing the minimum and
% maximum of each column in DIST,
% respectively; 1-by-N; required
% iff m > 0.
%
% See also: gtm_resp, gtm_dstg, gtm_dist
%
% Version: The GTM Toolbox v1.0 beta
%
% Copyright: The GTM Toolbox is distributed under the GNU General Public
% Licence (version 2 or later); please refer to the file
% licence.txt, included with the GTM Toolbox, for details.
%
% (C) Copyright Markus Svensen, 1996
%
% - likelihood value INCORRECT with modified beta (now a vector)
% - likelihood INCORRECT with orientation
% (C) 2000.06.28 Kui-yu Chang
% http://lans.ece.utexas.edu/~kuiyu
% This program is free software; you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation; either version 2 of the License, or
% (at your option) any later version.
%
% This program is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with this program; if not, write to the Free Software
% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
% or check
% http://www.gnu.org/
if (nargin<4)
orient = 0;
if (nargin == 3)
if (mode < 0 | mode > 2 | mode~=fix(mode))
error(['Unknown mode of calculation: ', num2str(mode)]);
end
elseif (nargin == 2)
mode = 0;
end
end
if (D ~= fix(D))
error(['Invalid value for D: ', num2str(D)]);
end
global gtmGlobalDIST;
if (isempty(gtmGlobalDIST))
fprintf('gtm_rspg -- Warning: gtmGlobalDIST empty.\n');
end
[K, N] = size(gtmGlobalDIST);
if (mode > 0)
global gtmGlobalMinDist;
global gtmGlobalMaxDist;
if ([1,N] ~= size(gtmGlobalMaxDist) | [1,N] ~= size(gtmGlobalMinDist))
error(['gtm_rspg -- Warning: Mismatch in size between ', ...
'gtmGlobalDIST and gtmGlobalMinDist/gtmGlobalMaxDist.']);
end
end
global gtmGlobalR
if (isempty(gtmGlobalR))
fprintf('gtm_rspg -- Warning: gtmGlobalR empty.\n');
end
if (size(gtmGlobalDIST) ~= size(gtmGlobalR))
fprintf(['gtm_rspg -- Warning: Mismatch in size between ', ...
'gtmGlobalR and gtmGlobalDIST.\n']);
end
if (mode > 0)
% In calculation mode > 0, the distances between Gaussian centres
% and data points are shifted towards being centred around zero,
% w.r.t. the extreme (min- and max-) values.
% Since the difference between any two distances is still the same,
% the responsabilities will be the same. The advantage is that
% the risk of responsabilities dropping below zero (in finite precision)
% in the exponentiation below, due to large distances, is decreased.
% However, while we CAN calculate reliably with zero (0), we CAN'T
% calculate reliably with infinity (Inf). Hence, the shifting of distances
% must not be so large that the exponentiation yields infinity as result.
distCorr = (gtmGlobalMaxDist + gtmGlobalMinDist) ./ 2;
distCorr = min(distCorr,(gtmGlobalMinDist+700*(2/beta)));
% exp(709) < realmax < exp(710), plus a few digits margin to avoid
% overflow when calculating rSum below.
% Here a loop is preferred to array-operation involving a large
% temporary matrix, in order to limit memory usage.
for n = 1:N
gtmGlobalDIST(:,n) = gtmGlobalDIST(:,n) - distCorr(n);
end
end
% Since the normalisation factor of the Gaussians is cancelled out
% when normalising the responsabilities below (R = R*diag(1 ./ rSum))
% it is omitted here. This, however, is corrected for when calculating
% the log-likelihood further below.
%----------------------------------------------------
global gtmGlobalRfactor;
%size(gtmGlobalDIST) is M x N
if orient==0
gtmGlobalR = exp((-beta(1)/2)*gtmGlobalDIST);
% gtmGlobalR = exp((-beta'*ones(1,N)/2).*gtmGlobalDIST);
else
gtmGlobalR = gtmGlobalRfactor.*(exp(-gtmGlobalDIST/2));
end
%gtmGlobalRfactor(:,68:71) %no problem
%gtmGlobalDIST(:,68:71)
% shit = exp(-gtmGlobalDIST/2);
%shit(:,68:71)
%gtmGlobalR(:,68:71)
if (mode < 2)
rSum = sum(gtmGlobalR);
else
% In calculation mode >= 2, the columns of R are first sorted,
% so that the summation over columns is done in increasing order,
% which minimizes the amount of round-off error in the summation.
rSum = sum(gtm_sort(gtmGlobalR));
end
% replace small values of rSum with 1
% alternatively, lans_killnan can be used after 0/0, but less elegant
rSum = lans_replace(eps,1,rSum);
% Here a loop is preferred to an array-operation involving a large
% temporary matrix, in order to limit memory usage.
for n = 1:N
gtmGlobalR(:,n) = gtmGlobalR(:,n)./rSum(n);
end
%gtmGlobalR = lans_killnan(gtmGlobalR);
% In the calculation of the log-likelihood, constants so far omitted in the
% calculations are taken into account
if (mode < 1)
if orient==0
llh = sum(log(rSum)) + N*((D/2)*log(beta(1)/(2*pi)) - log(K));
else
llh = sum(log(rSum)) + N*(-(D/2)*log(2*pi)-log(K));
end
else
% NOT GUARANTEED TO WORK.
% If the distances were adjusted above, to improve numerical accuracy,
% this must be corrected for when calculating the log-likelihood.
llh = sum(log(rSum) + distCorr*(-beta(1)/2)) ...
+ N*((D/2)*log(beta/(2*pi)) - log(K));
end
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -