📄 naivebayes.m
字号:
function [predict, accuracy] = NaiveBayes(Train, Test)
%Input: Training set and testing set, each row represents a instance, last column is label( begins from zero)
%Output:predict label by NaiveBayes as well as its accuracy
Train_sample = Train(:,1:end-1);
Train_label = Train(:,end);
Test_sample = Test(:,1:end-1);
Test_label = Test(:,end);
Class_num = length(unique(Train_label));
Feature_num = size(Train_sample,2);
Para_mean = cell(1,Class_num);%Mean for each feature and class
Para_dev = cell(1,Class_num);%Dev for each feature and class
Sample_byclass = cell(1,Class_num);%Reorder the data set by class
Prior_prob = zeros(1,Class_num);%Prior probability of each class
for i=1:1:length(Train_sample)
Sample_byclass{1,Train_label(i,1)+1} = [Sample_byclass{1,Train_label(i,1)+1}; Train_sample(i,:)];
Prior_prob(1,Train_label(i,1)+1) = Prior_prob(1,Train_label(i,1)+1) + 1;
end
Prior_prob = Prior_prob/size(Train_sample,1); % Prior probability
for i=1:1:Class_num %model parameter
miu = mean(Sample_byclass{1,i});
delta = std(Sample_byclass{1,i});
Para_mean{1,i} = miu;
Para_dev{1,i} = delta;
end
predict = [];
for i=1:size(Test_sample) %length(Test_sample)
prob = log(Prior_prob);
hei=0;
for j=1:Class_num
for k=1:1:Feature_num %如果方差为0,调整
if Para_dev{1,j}(1,k) == 0
Para_dev{1,j}(1,k) = 0.1667;
end
%利用正态分布的密度函数,求概率。为方便计算,对概率求对数,在此我们假设各个属性间是相互独立的
hei=hei-(Test_sample(i,k)-Para_mean{1,j}(1,k))^2/(2*Para_dev{1,j}(1,k)^2) - log(Para_dev{1,j}(1,k));
end %对每一个属性结束
prob(1,j) = prob(1,j)+hei;
end %对每个类结束
[value index] = max(prob);
predict = [predict ; index-1];
end
accuracy = length(find(predict - Test_label ==0))/length(Test_label);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -