⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 treec.m

📁 The pattern recognition matlab toolbox
💻 M
📖 第 1 页 / 共 2 页
字号:
	if isempty(t)		[f,j,t] = infcrit(a,nlab);		prwarning(3,'Maxcrit not feasible for decision tree, infcrit is used')	end	return%INFCRIT The information gain and its the best feature split.% % 	[f,j,t] = infcrit(A,nlabels)% % Computes over all features the information gain f for its best % threshold from the dataset A and its numeric labels. For f=1: % perfect discrimination, f=0: complete mixture. j is the optimum % feature, t its threshold. This is a lowlevel routine called for % constructing decision trees.% Copyright: R.P.W. Duin, duin@ph.tn.tudelft.nl% Faculty of Applied Physics, Delft University of Technology% P.O. Box 5046, 2600 GA Delft, The Netherlandsfunction [g,j,t] = infcrit(a,nlab)	prtrace(mfilename);	[m,k] = size(a);	c = max(nlab);	mininfo = ones(k,2);	% determine feature domains of interest	[sn,ln] = min(a,[],1); 	[sx,lx] = max(a,[],1);	JN = (nlab(:,ones(1,k)) == ones(m,1)*nlab(ln)') * realmax;	JX = -(nlab(:,ones(1,k)) == ones(m,1)*nlab(lx)') * realmax;	S = sort([sn; min(a+JN,[],1); max(a+JX,[],1); sx]);	% S(2,:) to S(3,:) are interesting feature domains	P = sort(a);	Q = (P >= ones(m,1)*S(2,:)) & (P <= ones(m,1)*S(3,:));	% these are the feature values in those domains	for f=1:k,		% repeat for all features		af = a(:,f);		JQ = find(Q(:,f));		SET = P(JQ,f)';		if JQ(1) ~= 1			SET = [P(JQ(1)-1,f), SET];		end		n = length(JQ);		if JQ(n) ~= m			SET = [SET, P(JQ(n)+1,f)];		end		n = length(SET) -1;		T = (SET(1:n) + SET(2:n+1))/2; % all possible thresholds		L = zeros(c,n); R = L;     % left and right node object counts per class		for j = 1:c			J = find(nlab==j); mj = length(J);			if mj == 0				L(j,:) = realmin*ones(1,n); R(j,:) = L(j,:);			else				L(j,:) = sum(repmat(af(J),1,n) <= repmat(T,mj,1)) + realmin;				R(j,:) = sum(repmat(af(J),1,n) > repmat(T,mj,1)) + realmin;			end		end		infomeas =  - (sum(L .* log10(L./(ones(c,1)*sum(L)))) ...			       + sum(R .* log10(R./(ones(c,1)*sum(R))))) ...		    ./ (log10(2)*(sum(L)+sum(R))); % criterion value for all thresholds		[mininfo(f,1),j] = min(infomeas);     % finds the best		mininfo(f,2) = T(j);     % and its threshold	end   	g = 1-mininfo(:,1)';	[finfo,j] = min(mininfo(:,1));		% best over all features	t = mininfo(j,2);			% and its threshold	return%FISHCRIT Fisher's Criterion and its best feature split % % 	[f,j,t] = fishcrit(A,nlabels)% % Computes the value of the Fisher's criterion f for all features % over the dataset A with given numeric labels. Two classes only. j % is the optimum feature, t its threshold. This is a lowlevel % routine called for constructing decision trees.% Copyright R.P.W. Duin, duin@ph.tn.tudelft.nl% Faculty of Applied Physics, Delft University of Technology% P.O. Box 5046, 2600 GA Delft, The Netherlandsfunction [f,j,t] = fishcrit(a,nlab)	prtrace(mfilename);	[m,k] = size(a);	c = max(nlab);	if c > 2		error('Not more than 2 classes allowed for Fisher Criterion')	end	% Get the mean and variances of both the classes:	J1 = find(nlab==1);	J2 = find(nlab==2);	u = (mean(a(J1,:),1) - mean(a(J2,:),1)).^2;	s = std(a(J1,:),0,1).^2 + std(a(J2,:),0,1).^2 + realmin;	% The Fisher ratio becomes:	f = u ./ s;	% Find then the best feature:	[ff,j] = max(f);	% Given the feature, compute the threshold:	m1 = mean(a(J1,j),1);	m2 = mean(a(J2,j),1);	w1 = m1 - m2; w2 = (m1*m1-m2*m2)/2;	if abs(w1) < eps % the means are equal, so the Fisher			 % criterion (should) become 0. Let us set the thresold			 % halfway the domain			 t = (max(a(J1,j),[],1) + min(a(J2,j),[],1)) / 2;	else		t = w2/w1;	end	return%INFSTOP Quinlan's Chi-square test for early stopping% % 	crt = infstop(A,nlabels,j,t)% % Computes the Chi-square test described by Quinlan [1] to be used % in maketree for forward pruning (early stopping) using dataset A % and its numeric labels. j is the feature used for splitting and t % the threshold. %% [1] J.R. Quinlan, Simplifying Decision Trees, % Int. J. Man - Machine Studies, vol. 27, 1987, pp. 221-234.% % See maketree, treec, classt, prune % Guido te Brake, TWI/SSOR, TU Delft.% Copyright: R.P.W. Duin, duin@ph.tn.tudelft.nl% Faculty of Applied Physics, Delft University of Technology% P.O. Box 5046, 2600 GA Delft, The Netherlandsfunction crt = infstop(a,nlab,j,t)	prtrace(mfilename);	[m,k] = size(a);	c = max(nlab);	aj = a(:,j);	ELAB = expandd(nlab); 	L = sum(ELAB(aj <= t,:),1) + 0.001;	R = sum(ELAB(aj > t,:),1) + 0.001;	LL = (L+R) * sum(L) / m;	RR = (L+R) * sum(R) / m;	crt = sum(((L-LL).^2)./LL + ((R-RR).^2)./RR);	return%PRUNEP Pessimistic pruning of a decision tree% % 	tree = prunep(tree,a,nlab,num)% % Must be called by giving a tree and the training set a. num is the % starting node, if omitted pruning starts at the root. Pessimistic % pruning is defined by Quinlan.% % See also maketree, treec, mapt % Guido te Brake, TWI/SSOR, TU Delft.% Copyright: R.P.W. Duin, duin@ph.tn.tudelft.nl% Faculty of Applied Physics, Delft University of Technology% P.O. Box 5046, 2600 GA Delft, The Netherlandsfunction tree = prunep(tree,a,nlab,num)	prtrace(mfilename);	if nargin < 4, num = 1; end;	[N,k] = size(a);	c = size(tree,2)-4;	if tree(num,3) == 0, return, end;	w = mapping('treec','trained',{tree,num},[1:c]',k,c);	ttt=tree_map(dataset(a,nlab),w);	J = testc(ttt)*N;	EA = J + nleaves(tree,num)./2;   % expected number of errors in tree	P = sum(expandd(nlab,c),1);     % distribution of classes					%disp([length(P) c])					[pm,cm] = max(P);     % most frequent class					E = N - pm;     % errors if substituted by leave					SD = sqrt((EA * (N - EA))/N);					if (E + 0.5) < (EA + SD)	     % clean tree while removing nodes						[mt,kt] = size(tree);						nodes = zeros(mt,1); nodes(num) = 1; n = 0;						while sum(nodes) > n;	     % find all nodes to be removed							n = sum(nodes);							J = find(tree(:,3)>0 & nodes==1);							nodes(tree(J,3)) = ones(length(J),1); 							nodes(tree(J,4)) = ones(length(J),1); 						end						tree(num,:) = [cm 0 0 0 P/N];						nodes(num) = 0; nc = cumsum(nodes);						J = find(tree(:,3)>0);% update internal references						tree(J,[3 4]) = tree(J,[3 4]) - reshape(nc(tree(J,[3 4])),length(J),2);						tree = tree(~nodes,:);% remove obsolete nodes					else 						K1 = find(a(:,tree(num,1)) <= tree(num,2));						K2 = find(a(:,tree(num,1)) >  tree(num,2));						tree = prunep(tree,a(K1,:),nlab(K1),tree(num,3));						tree = prunep(tree,a(K2,:),nlab(K2),tree(num,4));					end					return%PRUNET Prune tree by testset% % 	tree = prunet(tree,a)% % The test set a is used to prune a decision tree. % Copyright: R.P.W. Duin, duin@ph.tn.tudelft.nl% Faculty of Applied Physics, Delft University of Technology% P.O. Box 5046, 2600 GA Delft, The Netherlandsfunction tree = prunet(tree,a)	prtrace(mfilename);	[m,k] = size(a);	[n,s] = size(tree);	c = s-4;	erre = zeros(1,n);	deln = zeros(1,n);	w = mapping('treec','trained',{tree,1},[1:c]',k,c);	[f,lab,nn] = tree_map(a,w);  % bug, this works only if a is dataset, labels ???	[fmax,cmax] = max(tree(:,[5:4+c]),[],2);	nngood = nn([1:n]'+(cmax-1)*n);	errn = sum(nn,2) - nngood;% errors in each node	sd = 1;	while sd > 0		erre = zeros(n,1);		deln = zeros(1,n);		endn = find(tree(:,3) == 0)';	% endnodes		pendl = max(tree(:,3*ones(1,length(endn)))' == endn(ones(n,1),:)');		pendr = max(tree(:,4*ones(1,length(endn)))' == endn(ones(n,1),:)');		pend = find(pendl & pendr);		% parents of two endnodes		erre(pend) = errn(tree(pend,3)) + errn(tree(pend,4));		deln = pend(find(erre(pend) >= errn(pend))); % nodes to be leaved		sd = length(deln);		if sd > 0			tree(tree(deln,3),:) = -1*ones(sd,s);			tree(tree(deln,4),:) = -1*ones(sd,s);			tree(deln,[1,2,3,4]) = [cmax(deln),zeros(sd,3)];		end	end	return%NLEAVES Computes the number of leaves in a decision tree% % 	number = nleaves(tree,num)% % This procedure counts the number of leaves in a (sub)tree of the % tree by using num. If num is omitted, the root is taken (num = 1).% % This is a utility used by maketree. % Guido te Brake, TWI/SSOR, TU Delft% Copyright: R.P.W. Duin, duin@ph.tn.tudelft.nl% Faculty of Applied Physics, Delft University of Technology% P.O. Box 5046, 2600 GA Delft, The Netherlandsfunction number = nleaves(tree,num)	prtrace(mfilename);	if nargin < 2, num = 1; end	if tree(num,3) == 0		number = 1 ;	else		number = nleaves(tree,tree(num,3)) + nleaves(tree,tree(num,4));	end	return

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -