create.m

来自「KPCA是一种非线性的盲源分离方法」· M 代码 · 共 386 行

M
386
字号
function data = create(data)
%CREATE creates test data for running nldr algorithms.
%
% inputs:
%    data      a struct describing the test data
%              .dataset the number of the example, see code for more infos
%              .n       the number of data points (default=400)
%              .state   the initial state for the random numbers (default=0)
%              .noise   the variance of Gaussian noise to add (default=0)
%              other options for some of the data sets (see code)
%              alternatively, data = 1 chooses the dataset directly,
%              the number of points defaults to 1000
%
% outputs:
%    data      a struct containing .x the generated data, each column is
%              a data point, and other stuff:
%              .z     the "correct" embedding
%              .e     some random noise of same dimensionality
%              .x_noisefree  the noisefree version of .x, i.e.
%                     .x = .xnoise_free + sqrt(.noise) * .e
%
% (c) Stefan Harmeling, 2006
% using the examples of the original LLE and ISOMAP code.

if ~isfield(data, 'dataset'), 
  number = data;
  clear data
  data.dataset = number;
end
if ~isfield(data, 'n'), data.n = 400; end
if ~isfield(data, 'noise'), data.noise = 0.0; end
if ~isfield(data, 'state'), data.state = 0; end

% set the randomness
rand('state', data.state);
randn('state', data.state);

data.typ = 'data';
switch data.dataset
 case 0 % "swiss roll with hole" 
  data.name = 'swiss roll with hole';
  n = data.n;
  a = 1;   % swiss roll goes from a*pi to b*pi
  b = 4;   
  y = rand(2,n);
  % punch a rectangular hole at the center
  l1 = 0.05; l2 = 0.15;
  y = y - 0.5;
  ok = find((abs(y(1,:))>l1) | (abs(y(2,:))>l2));
  i = length(ok);
  y(:, 1:i) = y(:, ok);
  while (i<n)
    p = rand(2,1) - 0.5;
    if (abs(p(1))>l1) | (abs(p(2))>l2)
      i = i + 1;
      y(:,i) = p;
    end
  end
  y = y + 0.5;
  tt = (b-a)*y(1,:) + a;
  tt = pi*tt;
  height = 21*y(2,:);
  data.col = tt;
  data.x = [tt.*cos(tt); height; tt.*sin(tt)];
  data.z = [tt; height]; % the ground truth
  data.az = -4;
  data.el = 13;
  
 case -1 % "swiss roll" dataset extracted from LLE's swissroll.m
  data.name = 'uniform swiss roll';
  n = data.n;
  a = 1;   % swiss roll goes from a*pi to b*pi
  b = 4;   
  y = rand(2,n);
  data.z = y;  % the ground truth
  switch 1
   case 1
    % uniform distribution along the manifold (in data space)
    tt = sqrt((b*b-a*a)*y(1,:)+a*a);
   case 2
%    error('do not use this case')
    % nonuniform distribution along the manifold (in data space)
    tt = (b-a)*y(1,:) + a;  
  end
  tt = pi*tt;
  % now tt should go from a*pi to b*pi
  height = 21*y(2,:);
  data.col = tt;
  data.x = [tt.*cos(tt); height; tt.*sin(tt)];
  data.az = -4;
  data.el = 13;
  
 case 1 % "swiss roll (uniform in embedding space)" 
  % dataset extracted from LLE's swissroll.m
  data.name = 'classic swiss roll';
  n = data.n;
  a = 1;   % swiss roll goes from a*pi to b*pi
  b = 4;   
  y = rand(2,n);
  tt = (b-a)*y(1,:) + a;
  tt = pi*tt;
  height = 21*y(2,:);
  data.col = tt;
  data.x = [tt.*cos(tt); height; tt.*sin(tt)];
  data.z = [tt; height]; % the ground truth
  data.az = -4;
  data.el = 13;
  
 case 11 % "undersampled swiss roll"
  % dataset extracted from LLE's swissroll.m
  data.name = 'undersampled swiss roll';
  data.n = 100;
  n = data.n;
  a = 1;   % swiss roll goes from a*pi to b*pi
  b = 4;   
  y = rand(2,n);
  tt = (b-a)*y(1,:) + a;
  tt = pi*tt;
  height = 21*y(2,:);
  data.col = tt;
  data.x = [tt.*cos(tt); height; tt.*sin(tt)];
  data.z = [tt; height]; % the ground truth
  data.az = -4;
  data.el = 13;
  
 case 12 % "swiss roll"
  % dataset extracted from LLE's swissroll.m
  data.name = 'classic swiss roll';
  data.n = 400;
  n = data.n;
  a = 1;   % swiss roll goes from a*pi to b*pi
  b = 4;   
  y = rand(2,n);
  tt = (b-a)*y(1,:) + a;
  tt = pi*tt;
  height = 21*y(2,:);
  data.col = tt;
  data.x = [tt.*cos(tt); height; tt.*sin(tt)];
  data.z = [tt; height]; % the ground truth
  data.az = -4;
  data.el = 13;
  
 case 2 % "scurve" dataset extracted from LLE's scurve.m
  data.name = 'scurve';
  n = data.n;
  % I added 'ceil' and 'floor' to account for the case that n is odd
  angle = pi*(1.5*rand(1,ceil(n/2))-1); height = 5*rand(1,n);
  data.x = [[cos(angle), -cos(angle(1:floor(n/2)))]; height;[ sin(angle), 2-sin(angle)]];
  data.col = [angle, 1.5*pi + angle];
  data.z = [angle, 1.5*pi+angle; height]; % the ground truth
 
 case 3 % "square" dataset, a uniformly sampled 2D square randomly
         % rotated into higher dimensions
  data.name = 'square';
  n = data.n;
  d = 2;     % intrinsic dimension
  % optional parameter for dataset==3
  % data.D      dimension of the data
  if ~isfield(data, 'D'), data.D = 3; end
  % generate random rotation matrix
  D = data.D;
  A = randn(D, D);
  options.disp = 0;
  [R, dummy] = eigs(A*A', d, 'LM', options);
  tt = rand(d, n);
  data.col = tt(1,:);
  data.x = R*tt;
  data.z = tt;   % the ground truth
  data.az = 7;
  data.el = 40;
  
 case 4 % spiral: two dimensional "swiss roll"
  data.name = 'spiral';
  n = data.n;
  tt = (3*pi/2)*(1+2*rand(1, n));
  data.col = tt;
  data.x = [tt.*cos(tt); tt.*sin(tt)];
  data.z = tt; % the ground truth
  
 case -4 % spiral: two dimensional "swiss roll"
  data.name = 'noisy spiral';
  n = data.n;
  tt = (3*pi/2)*(1+2*rand(1, n));
  data.col = tt;
  data.x = [tt.*cos(tt); tt.*sin(tt)];
  data.x = data.x + randn(size(data.x));
  data.z = tt; % the ground truth
  
 case 5 % hole: a dataset with a hole
  data.name = 'hole';
  n = data.n;
  data.x = rand(2,n) - 0.5;
  % punch a rectangular hole at the center
  l1 = 0.2; l2 = 0.2;
  ok = find((abs(data.x(1,:))>l1) | (abs(data.x(2,:))>l2));
  i = length(ok);
  data.x(:, 1:i) = data.x(:, ok);
  while (i<n)
    p = rand(2,1) - 0.5;
    if (abs(p(1))>l1) | (abs(p(2))>l2)
      i = i + 1;
      data.x(:,i) = p;
    end
  end
  data.col = data.x(2,:);
  data.z = data.x;
  
 case 6 % P : taken from Saul's slides
  % note that for k=20, isomap and lle work fine which is very different
  % from the plots that Saul showed in his slides.
  data.name = 'P';
  load x
  x(2,:) = 500-x(2,:);
  data.x = x;
  data.z = x;
  data.col = data.z(2,:);
  data.n = size(x, 2);
  
 case 7 % fishbowl: uniform in data space
  gamma = 0.8;
  data.name = 'fishbowl (uniform in data space)';
  n = data.n;
  data.x = rand(3,n)-0.5;
  %project all data onto the surface of the unit sphere
  data.x = data.x ./ repmat(sqrt(sum(data.x.*data.x, 1)), [3 1]);
  ok = find(data.x(3,:) < gamma);
  i = length(ok);
  data.x(:, 1:i) = data.x(:, ok);
  while (i < n)
    p = rand(3,1)-0.5;
    p = p / sqrt(p'*p);
    if (p(3) < gamma)
      i = i+1;
      data.x(:, i) = p;
    end
  end
  % the projection on the plane works as follows:
  % start a beam from (0,0,1) through each surface point on the sphere
  % and look where it hits the xy plane.
  data.z = data.x(1:2,:) ./ repmat(1-data.x(3,:), [2 1]);
  data.col = data.x(3,:);
  data.az = -18;
  data.el = 16;
 case 8 % fishbowl: uniform in embedding space
  data.name = 'fishbowl (uniform in embedding space)';
  n = data.n;
  data.z = rand(2, n) - 0.5;
  % keep the disc
  ok = find(sum(data.z .* data.z) <= 0.25);
  i = length(ok);
  data.z(:, 1:i) = data.z(:, ok);
  while (i < n)
    p = rand(2,1) - 0.5;
    if (p'*p <= 0.25)
      i = i + 1;
      data.z(:, i) = p;
    end
  end
  gamma = 0.8;  % same role/parameter as in case 7
  data.z = 2*sqrt((1+gamma)/(1-gamma))*data.z;
  % project the disc onto the sphere
  alpha = 2 ./ (1 + sum(data.z .* data.z, 1));
  data.x = [repmat(alpha, [2 1]).*data.z; zeros(1, n)];
  data.x(3,:) = 1-alpha;
  data.col = data.x(3,:);
  data.az = -18;
  data.el = 16;
  
 case 9  % a gaussian blob
  data.name = 'gaussian blob';
  n = data.n;
  data.x = randn(3,n);
  data.z = data.x(2:3,:);
  data.col = data.x(3,:);
  
end


data.D = size(data.x, 1);  % dimensionality of the data
% finally generate noise
data.e = randn(size(data.x));
data.x_noisefree = data.x;  % the noise free data
data.x = data.x_noisefree + sqrt(data.noise)*data.e;

% precalculate the distanzmatrix
data.distances = distanz(data.x);


function d = distanz(x,y,type)
%DISTANZ calcs the distances between all vectors in x and y.
%
% usage:
%    d = distanz(x,y);
%
% inputs:
%    x      matrix with col vectors
%    y      matrix with col vectors (default == x)
%    type   the type of algorithm that is used (default==3)
%
% outputs:
%    d      distance matrix, not squared
%
% note:
%    part of the code is inspired by dist.m of the nntoolbox, other
%    part adapted from Francis Bach who took it from Roland
%    Bunschoten.
%
% sth * 19apr2002

if exist('type')~=1|isempty(type), type = 3; end
switch type
 case 1  % inspired by dist.m
  if exist('y')~=1|isempty(y)
    % here comes code just for x
    [rx,cx] = size(x);
    d = zeros(cx,cx);
    nuller = zeros(cx,1);
    for c = 1:cx
      d(c,:) = sum((x-x(:,c+nuller)).^2,1);
    end
  else
    % here comes code for x and y
    [rx,cx] = size(x);
    [ry,cy] = size(y);
    if rx~=ry, error('x and y do not fit'), end
    d = zeros(cx,cy);
    if cx>cy
      nuller = zeros(cx,1);
      for c = 1:cy
	d(:,c) = sum((x-y(:,c+nuller)).^2,1)';
      end
    else
      nuller = zeros(cy,1);
      for c = 1:cx
	d(c,:) = sum((x(:,c+nuller)-y).^2,1);
      end
    end
  end
 
 case 2  % same as case 1, but with repmat instead of nuller
  if exist('y')~=1|isempty(y)
    % here comes code just for x
    [rx,cx] = size(x);
    d = zeros(cx,cx);
    nuller = zeros(cx,1);
    for c = 1:cx
      d(c,:) = sum((x-repmat(x(:,c),[1 cx])).^2,1);
    end
  else
    % here comes code for x and y
    [rx,cx] = size(x);
    [ry,cy] = size(y);
    if rx~=ry, error('x and y do not fit'), end
    d = zeros(cx,cy);
    if cx>cy
      nuller = zeros(cx,1);
      for c = 1:cy
	d(:,c) = sum((x-repmat(y(:,c),[1 cx])).^2,1)';
      end
    else
      nuller = zeros(cy,1);
      for c = 1:cx
	d(c,:) = sum((repmat(x(:,c),[1 cy])-y).^2,1);
      end
    end
  end
  
 case 3  % inspired by Roland Bunschoten
  if exist('y')~=1|isempty(y)
    % here comes code just for x
    cx = size(x,2);
    xx = sum(x.*x,1); xz = x'*x;
    d = abs(repmat(xx',[1 cx]) - 2*xz + repmat(xx,[cx 1]));
  else
    % here comes code for x and y
    [rx,cx] = size(x);
    [ry,cy] = size(y);
    if rx~=ry, error('x and y do not fit'), end
    xx = sum(x.*x,1); yy = sum(y.*y,1);  xy = x'*y;  
    d = abs(repmat(xx',[1 cy]) + repmat(yy,[cx 1]) - 2*xy);
  end
end

d = sqrt(d);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?