📄 generate_data.m
字号:
function [X, labels, t] = generate_data(dataname, n, noise)%GENERATE_DATA Generates an artificial dataset%% [X, labels, t] = generate_data(dataname, n, noise)%% Generates an artificial dataset. Possible datasets are: 'swiss' for the Swiss roll% dataset, 'helix' for the helix dataset, 'twinpeaks' for the twinpeaks dataset,% '3d_clusters' for the 3D clusters dataset, and 'intersect' for the intersecting% dataset. The variable n indicates the number of datapoints to generate % (default = 1000). The variable noise indicates the amount of noise that% is added to the data (default = 0.05). The function returns the% high-dimensional dataset in X, and corresponding labels in labels. In% addition, the function returns the coordinates of the datapoints on the% underlying manifold in t.%%% This file is part of the Matlab Toolbox for Dimensionality Reduction v0.4b.% The toolbox can be obtained from http://www.cs.unimaas.nl/l.vandermaaten% You are free to use, change, or redistribute this code in any way you% want for non-commercial purposes. However, it is appreciated if you % maintain the name of the original author.%% (C) Laurens van der Maaten% Maastricht University, 2007 welcome; if ~exist('n', 'var') n = 1000; end if ~exist('noise', 'var') noise = 0.05; end switch dataname case 'swiss' t = (3 * pi / 2) * (1 + 2 * rand(n, 1)); height = 30 * rand(n, 1); X = [t .* cos(t) height t .* sin(t)] + noise * randn(n, 3); %labels = uint8(t); labels = rem(sum([round(t / 2) round(height / 12)], 2), 2); t = [t height]; case 'brokenswiss' t = [(3 * pi / 2) * (1 + 2 * rand(ceil(n / 2), 1) * .4); (3 * pi / 2) * (1 + 2 * (rand(floor(n / 2), 1) * .4 + .6))]; height = 30 * rand(n, 1); X = [t .* cos(t) height t .* sin(t)] + noise * randn(n, 3); labels = uint8(t); %labels = rem(sum([round(t / 2) round(height / 12)], 2), 2); t = [t height]; case 'changing_swiss' r = zeros(1, n); for i=1:n pass = 0; while ~pass rr = rand(1); if rand(1) > rr r(i) = rr; pass = 1; end end end t = (3 * pi / 2) * (1 + 2 * r); height = 21 * rand(1, n); X = [t .* cos(t); height; t .* sin(t)]' + noise * randn(n, 3); %labels = uint8(t)'; labels = rem(sum([round(t / 2); round(height / 10)], 1), 2)'; case 'helix' t = [1:n]' / n; t = t .^ (1.0) * 2 * pi; X = [(2 + cos(8 * t)) .* cos(t) (2 + cos(8 * t)) .* sin(t) sin(8 * t)] + noise * randn(n, 3); %labels = uint8(t); labels = rem(round(t * 1.5), 2); case 'twinpeaks' inc = 1.5 / sqrt(n); [xx2, yy2] = meshgrid(-1:inc:1); xy = 1 - 2 * rand(2, n); X = [xy; sin(pi * xy(1,:)) .* tanh(3 * xy(2,:))]' + noise * randn(n, 3); X(:,3) = X(:,3) * 10; t = xy'; %labels = uint8(X(:,3)); labels = rem(sum(round((X + repmat(min(X, [], 1), [size(X, 1) 1])) ./ 10), 2), 2); case '3d_clusters' numClusters = 5; centers = 10 * rand(numClusters, 3); D = L2_distance(centers', centers', 1); minDistance = min(D(D > 0)); k = 1; n2 = n - (numClusters - 1) * 9; X = repmat(0, [n 3]); labels = repmat(0, [n 1]); for i=1:numClusters for j=1:ceil(n2 / numClusters) X(k, 1:3) = centers(i, 1:3) + (rand(1, 3) - 0.5) * minDistance / sqrt(12); labels(k) = i; k = k + 1; end end X = X + noise * randn(size(X, 1), 3); t = []; case 'intersect' t = [1:n]' ./ n .* (2 * pi); x = cos(t); y = sin(t); height = rand(length(x), 1) * 5; X = [x x .* y height] + noise * randn(n, 3); %labels = uint8(5 * t); labels = rem(sum([round(t / 2) round(height / 2)], 2), 2); case 'difficult' % Generate underlying manifold no_dims = 5; no_points_per_dim = round(n ^ (1 / no_dims)); l = linspace(0, 1, no_points_per_dim); t = combn(l, no_dims); % Generate high-dimensional dataset X = [cos(t(:,1)) tanh(3 * t(:,2)) t(:,1) + t(:,3) t(:,4) .* sin(t(:,2)) sin(t(:,1) + t(:,5)) t(:,5) .* cos(t(:,2)) t(:,5) + t(:,4) t(:,2) t(:,3) .* t(:,4) t(:,1)]; X = X + noise * randn(size(X)); % Generate labels for dataset (2x2x2x2x2 checkerboard pattern) tt = 1 + round(t); labels = rem(sum(tt, 2), 2); otherwise error('Unknown dataset name.'); end
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -