⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 softmax.m

📁 国外编的信号识别的程序
💻 M
📖 第 1 页 / 共 2 页
字号:
  if ~isempty(mask)    if ~isa(mask, 'double') | ~isreal(mask) | ndims(mask) ~= 2 | ...	  ~any(any(mask)) | ~all(nonzeros(mask) == 1)      error('Mask must be a 2-d array of 0s and 1s.')    end        if isempty(weights)      weights = mask;      weights(find(mask)) = 1.4 * rand(nnz(mask), 1) - .7;      normw = 0;    elseif ndims(weights) ~= 2 | ~all(size(mask) == size(weights))      error('MASK and WEIGHTS must be same size.')          end  end  if normw    if ~isa(weights, 'double') | ~isreal(weights) | ...	  ndims(weights) ~= 2 | any(any(isnan(weights)))      error('Weights must be a 2-d real array.')    end        if isempty(mask)      mask = weights;      mask(find(weights)) = 1;    end    % rescale weights because of input normalisation    weights(1, p+2:end) = weights(1, p+2:end) + ...	range(1,:) * weights(2:p+1, p+2:end);        weights(2:p+1, p+2:end) = diag(diff(range)) ...	* weights(2:p+1, p+2:end);      end      if decay    % insert redundant output unit to balance weights for decay    % parameter    m = sum(weights(:, end-g+2:end), 2)/g;    weights = [weights(:, 1:end-g+1), -m, ...	       [weights(:, end-g+2:end) - repmat(m, 1, g-1)]];        mask = [mask(:, 1:end-g+1), ...	    sparse(any(mask(:, end-g+2:end), 2)), ...	    mask(:, end-g+2:end)];    % sum of weights from each unit to all outputs now sum to 0  end    weights(max(find(any(weights | mask, 2))) + 1:end, :) = [];  mask(size(weights, 1)+1:end, :) = [];else  if ~isempty(nhid)    if ~isa(nhid, 'double') | length(nhid) ~= prod(size(nhid)) ...	  | ~isreal(nhid) | any(round(nhid) ~= nhid | ...				isinf(nhid) | nhid < 0)      error('NUNITS units must be a vector of positive, finite integers.')    elseif length(nhid) > 1      if any(nhid <= 0)	error(['Cannot specify layers with no units with more than one' ...	       ' hidden layer.'])      end    elseif nhid == 0      nhid = [];    end  end  if isempty(skip)    skip = 0;  elseif skip    if length(nhid) > 1      error(['May not specify skip weights with more than one hidden' ...	     ' layer.'])          elseif nhid == 0      error('May not specify skip weights with no hidden layer.')          end    skip = 1;  else    skip = 0;  end      noutput = g - (decay == 0);  nunits = [p nhid noutput];  nweights = sum((nunits(1:end-1) + 1) .* nunits(2:end)) + ...      skip*p*noutput;  idx = cumsum([2 nunits]);  nlayer = length(nunits);  nunits = sum(nunits)+1;  weights = sparse(nunits - noutput, nunits, nweights);  mask = weights;  % connect bias to all hidden and output units.  mask(1, p+2:end) = 1;  if skip    % connect input units to all output units.    mask(2:p+1, end-noutput+1:end) = 1;  end  for i = 1:nlayer-1    % connect adjacent layers    mask(idx(i):idx(i+1)-1, idx(i+1):idx(i+2)-1) = 1;  end  weights(find(mask)) = 1.4 * rand(nweights, 1) - .7;endninput = max(find(~any(mask | weights)));if any(~any(mask(:,p+2:end) | weights(:,p+2:end)))  error(sprintf(['Unit %d has no input:\nInput units must have lowest' ...		 ' indeces.'], ...		min(find(~any(mask(:,p+2:end) | ...			      weights(:,p+2:end))))+p+1)) elseif ninput < p+1  error('Not enough input units.')elseif ninput > p+1  error('Too many input units.')endnoutput = diff(size(mask));if any(~any(mask | weights, 2))  error(sprintf(['Unit %d has no ouput:\nOutput units must have' ...		 ' highest indeces.'], ...		min(find(~any(mask | weights, 2)))))elseif noutput < g-(decay == 0)  error('Not enough output untis.')elseif noutput > g-(decay == 0)  error('Not enough input units.')endif any(any(tril(weights | mask)))  error('All weights must join a lower to a higher indexed unit.')endif isempty(maxit)  maxit = 200;elseif ~isa(maxit, 'double') | ~isreal(maxit) | length(maxit) ~= 1 ...      | round(maxit) ~= maxit | maxit < 0  error('MAXITER must be a positive integer.')end% initial states[E post grad] = feedprop(X, G, w, weights, mask, decay);H = eye(nnz(mask)); % inverse of Hessianoldweights = weights;oldE = E;  for iter = 1:maxit  dir = -H*grad; % direction vector  Ep = grad'*dir; % gradient from vector of partial derivitives (grad)  lambda = [1 0]'; % length and old length  lambdamin = 2*eps*max(abs(dir)./max(abs(oldweights(find(mask))), 1));  while 1    if lambda(1) < lambdamin      weights = oldweights;      break          end    % try point along dir    weights(find(mask)) = oldweights(find(mask)) + lambda(1)*dir;    E = feedprop(X, G, w, weights, mask, decay);    if E <= oldE + 1e-4*Ep*lambda(1)      break % good enough    elseif lambda(1) == 1      lambda = [-Ep/(2*(E - oldE - Ep)); 1];    else      ab = [1 -1; -lambda(2) lambda(1)] * diag(1./lambda.^2) * ...	   ([E; E2] - Ep*lambda - oldE) / diff(lambda);      lambda(2) = lambda(1);      if ab(1) == 0	if ab(2) == 0	  break	end	lambda(1) = -Ep/(2*ab(2));      else	labmda(1) = (-ab(2) + sqrt(ab(2)^2 - 3*ab(1)*Ep)) / ...	    (3* ab(1));      end    end        if ~isreal(lambda)      lambda(1) = .1*lambda(2);    else      lambda(1) = max(min(lambda(1), .5*lambda(2)), .1*lambda(2));    end    E2 = E;  end    if trace & ~rem(iter, 10)    disp(sprintf('Iter: %d; Err: %g', iter, E))  end    if oldE - E < 0 % indicates divergence (this appears to be normal                  % for large lambda    warning('Error diverged.')		      weights = oldweights;    E = oldE;    break  elseif oldE - E < E*n*eps % indicates convergence    if trace      disp('Error converged.')    end    break  end    grad1 = grad;  [oldE post grad] = feedprop(X, G, w, weights, mask, decay);  dir = weights(find(mask)) - oldweights(find(mask));  if max(dir./max(weights(find(mask)), 1)) < 4*eps    if trace      % convergence in weights but not error (probably too strict)      disp('Gradient converged.')    end    break  end  oldweights = weights;  dg = grad - grad1;  pdg = dir'*dg;  Hdg = H*dg;  gHg = dg'*Hdg;  u = dir/pdg - Hdg/gHg;  H = H + dir*dir'/pdg - Hdg*Hdg'/gHg + gHg*u*u';  endif decay  % get rid of redundant output by subtracting weights from other  % output weights  weights = [weights(:, 1:end-g), [weights(:, end-g+2:end) - ...		    repmat(weights(:, end-g+1), 1, g-1)]];end% rescale to actual non-normalised inputs so no one gets confused.weights(2:p+1, :) = spdiags(1./diff(range)', 0, p, p) ...    * weights(2:p+1, :);weights(1, p+2:end) = weights(1, p+2:end) - ...    range(1,:) * weights(2:p+1, p+2:end);f = class(struct('weights', weights), 'softmax', h);if nargout > 2  dev = 2*(E - decay*sum(weights(find(mask))));    if nargout > 3    hess = inv(H);  endendfunction [E, post, grad] = feedprop(X, G, w, weights, mask, decay)%FEEDPROP Feed-forward and back-propogate.%   [E, POST, GRAD] = FEEDPROP(X, G, W, WEIGHTS, MASK, DECAY)%   returns the error E, the posterior probabilities in the n by g%   matrix POST and the gradient vector of partial derivitives in%   the NNZ(MASK) length vector GRAD.[n p] = size(X);g = size(G, 2);logG = G; % so we don't run into NaNslogG(find(G)) = log(G(find(G)));ninput = p + 1;noutput = g - (decay == 0);nunits = size(weights, 2);nhidden = nunits - noutput - ninput;% outputs from individual units including network inputsy = [ones(n, 1), X, zeros(n, nhidden + noutput)];for i = ninput+1:nunits % hidden and output units  [idx, j, wgt] = find(weights(:, i));  nwgt = length(idx);  if nwgt % it has happened!    y(:, i) = sum(y(:, idx) * diag(wgt), 2);    if i <= nunits - noutput % hidden units are logistic      out = exp(y(:,i));      y(:, i) = out./(1+out);      y(isnan(y(:,i)), i) = 1; % if out is very large    end  endend% calculate normalised posterior probabilities using SOFTMAX critereon% if no decay, first class is always zeropost = [zeros(n, decay == 0), y(:, end-noutput+1:end)];post = exp(post - repmat(max(post(:, 2:end), [], 2), 1, g));post = post ./ repmat(sum(post, 2), 1, g);if any(any(~post & G))  E = inf;else  logpost = G; %zeros in G are not a problem. zeros in post mean		  %that weights are going off into infinity.  logpost(find(G)) = log(post(find(G)));  E = sum(w' * (G .* (logG - logpost) - G + post)) + ...      decay*sum(weights(find(mask)).^2);end  if nargout > 1    delta = [zeros(n, nhidden), ...	   post(:, 1 + (decay == 0):end) - G(:, 1 + (decay == 0):end)];    for i = nhidden:-1:1    [j idx wgt] = find(weights(i+ninput, :));    nwgt = length(idx);    delta(:, i) = y(:, i+ninput) .* (1 - y(:, i+ninput)) .* ...        sum(delta(:, idx-ninput) * spdiags(wgt', 0, nwgt, nwgt), 2);  end    [i j] = find(mask);  grad = (y(:, i) .* delta(:, j-ninput))' * w ...	 - 2*decay*weights(find(mask));end

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -