📄 naivebc.m

📁 这是模式识别方面的电子书籍
💻 M
字号:
%NAIVEBC Naive Bayes classifier% % 	W = NAIVEBC(A,N)% 	W = A*NAIVEBC([],N)%% 	W = NAIVEBC(A,DENSMAP)% 	W = A*NAIVEBC([],DENSMAP)% % INPUT	%   A       Training dataset%   N       Scalar number of bins (default: 10)%   DENSMAP Untrained mapping for density estimation%% OUTPUT%   W       Naive Bayes classifier mapping%% DESCRIPTION% The Naive Bayes Classifier estimates for every class and every feature% separately. Total class densities are constructed by assuming% independency and consequently multiplying the separate feature densities.%% The default version divides each axis into N bins, counts the number of% training examples for each of the classes in each of the bins, and % classifies the object to the class that gives maximum posterior % probability. Missing values will be put into a separate bin.%% If N is NaN it is optimised by REGOPTC.%% Alternatively an untrained mapping DENSMAP may be supplied that will be% used to estimate the densities per class and per features separately.% Examples are PARZENM and GAUSSM.%% SEE ALSO% DATASETS, MAPPINGS, PARZENM, GAUSSM, UDC, QDC, PARZENC, PARZENDC, REGOPTC% Copyright: R.P.W. Duin, r.p.w.duin@prtools.org% Faculty EWI, Delft University of Technology% P.O. Box 5031, 2600 GA Delft, The Netherlands  % $Id: naivebc.m,v 1.5 2007/06/15 09:58:30 duin Exp $function w = naivebc(a,arg2)	if (nargin < 2)		prwarning (2,'number of bins not specified, assuming 10.');		arg2 = 10; 	end	% No arguments given: return untrained mapping.	if (nargin < 1) | (isempty(a))		w = mapping(mfilename,{arg2});		w = setname(w,'Naive Bayes');		return	end	if (~ismapping(arg2))     % Second argument is not a mapping: training.				N = arg2;				if isnan(N)             % optimize complexity parameter			defs = {10};			parmin_max = [2,50];			w = regoptc(a,mfilename,{N},defs,[1],parmin_max,testc([],'soft'),0);				else				islabtype(a,'crisp');			isvaldfile(a,1,2); % at least 2 object per class, 2 classes			a = testdatasize(a);			[m,k,c] = getsize(a); M = classsizes(a);			% Train the mapping. First, find the scale and offset of the data 			% and normalise (this is very non-robust, but ok...)			offset_a = min(a); maxa = max(a); scale_a = maxa - offset_a;					if(any(scale_a==0))				prwarning (2,'one of the features has the same value for all data; scale change to realmin');				scale_a(scale_a==0) = realmin;					end			a = a - repmat(offset_a,m,1);			a = a ./ repmat(scale_a,m,1);			% P will contain the probability per bin per class, P0 the probability			% per class. The highest and lowest bounds will not be used; the lowest			% bound will be used to store the missing values.			p  = zeros(N+1,k,c);			% Count the number of objects for each of the classes.			for i = 1:c				Ic = findnlab(a,i);											% Extract one class.				Ia = ceil(N*+(a(Ic,:)));								% Find out in which bin it falls.				Ia(Ia<1) = 1; Ia(Ia>N) = N;							% Sanity check.				for j=1:N					p(j,:,i) = sum(Ia==j);								% Count for all bins.				end				p(N+1,:,i) = sum(~isnan(+a(Ic,:))); 	% The missing values.							% Use Bayes estimators are used, like elsewhere in PRTools.							p(:,:,i) = (p(:,:,i)+1) / (M(i)+N); 						% Probabilities.				p(:,:,i) = p(:,:,i) ./ repmat(scale_a/N,N+1,1); 	% Densities.			end			% Save all useful data.			pars.p0 = getprior(a); pars.p = p; pars.N = N;			pars.offset_a = offset_a; pars.scale_a = scale_a;			w = mapping(mfilename,'trained',pars,getlablist(a),k,c);			w = setname(w,'Naive Bayes');			w = setcost(w,a);					end			elseif isuntrained(arg2) % train given untrained mapping, e.g. parzen				[m,k,c] = getsize(a);		v = cell(c,k);		for i=1:c			b = seldat(a,i);			for j=1:k				v{i,j} = b(:,j)*arg2;			end		end		pars.dens = v;		pars.p0 = getprior(a);		w = mapping(mfilename,'trained',pars,getlablist(a),k,c);		w = setname(w,'Naive Bayes');		w = setcost(w,a);			else                      % Second argument is a mapping: testing.		w = arg2;		pars = getdata(w);		 	% Unpack the mapping.		[m,k] = getsize(a); 				if isfield(pars,'dens')						v = pars.dens;			[c,k] = size(v);					out = zeros(size(a,1),c);			for i=1:c				for j=1:k					out(:,i) = out(:,i) + +log(a(:,j)*v{i,j});				end			end						out = exp(out);					else						c = length(pars.p0);       % Could also use size(w.labels,1)...			% Shift and scale the test set.			a = a - repmat(pars.offset_a,m,1);			a = a ./ repmat(pars.scale_a,m,1);			% Classify the test set. First find in which bins the objects fall.			Ia = ceil(pars.N*+(a));			Ia(Ia<1) = 1; Ia(Ia>pars.N) = pars.N;			% Sanity check.			% Find the class probability for each object for each feature			out = zeros(m,k,c);			for i=1:k				out(:,i,:) = pars.p(Ia(:,i),i,:);			end			% Multiply the per-feature probs.			out = squeeze(prod(out,2));					if m == 1				out = out';			end		end				% Weight with class priors		out = out .* repmat(pars.p0,m,1);				% Done!		w = setdat(a,out,w);	endreturn
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -