lda_classify.m
来自「一款数据挖掘的软件」· M 代码 · 共 81 行
M
81 行
% LDA_classify: implementation for linear discriminant analysis
%
% Parameters:
% para: parameters
% 1. RegFactor: regularization factor, default: 0
% 2. QDA: use qudratic discriminant analysis or not, default: 0
% X_train: training examples
% Y_train: training labels
% X_test: testing examples
% Y_test: testing labels
% num_class: number of classes
% class_set: set of class labels such as [1,-1], the first one is the
% positive label
%
% Require functions:
% ParseParameter, GetModelFilename
function [Y_compute, Y_prob] = LDA_classify(para, X_train, Y_train, X_test, Y_test, num_class, class_set)
global preprocess;
p = str2num(char(ParseParameter(para, {'-RegFactor'; '-QDA'}, {'0.1'; '0'})));
% Parameter estimation
if (~isempty(X_train)),
[data_mean, inv_sigma, num_data_class] = ParaEst(p, X_train, Y_train, num_class, class_set);
if (preprocess.TrainOnly == 1),
save(strcat(GetModelFilename, '.mat'), 'data_mean', 'inv_sigma', 'num_data_class');
end;
else
model = load(strcat(GetModelFilename, '.mat'));
data_mean = model.data_mean;
inv_sigma = model.inv_sigma;
num_data_class = model.num_data_class;
clear model;
end;
% Prediction
num_test = size(Y_test, 1);
Y_distance_matrix = zeros(num_test, num_class);
for i = 1:num_class,
% Calculate the distance
data_distance = X_test - repmat(data_mean(i, :), num_test, 1);
Y_distance_matrix(:, i) = sum((data_distance * inv_sigma) .* data_distance, 2);
end;
% Generate the labels and probabilities
[Y_distance Index] = min(Y_distance_matrix, [], 2);
Y_compute = class_set(Index);
Y_prob_matrix = exp(-0.5 * (Y_distance_matrix - repmat(min(Y_distance_matrix, [], 2), 1, num_class)));
Y_prob_matrix = repmat(num_data_class, num_test, 1) .* Y_prob_matrix;
sumYprob = sum(Y_prob_matrix, 2);
if (num_class == 2),
Y_prob = Y_prob_matrix(:, 1) ./ ((sumYprob == 0) + sumYprob);
else
Y_prob = max(Y_prob_matrix, [], 2) ./ ((sumYprob == 0) + sumYprob);
end;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [data_mean, inv_sigma, num_data_class] = ParaEst(p, X_train, Y_train, num_class, class_set)
RegFactor = p(1);
QDA = p(2);
num_feature = size(X_train, 2);
sigma = (1 - RegFactor) * cov(X_train) + RegFactor * eye(num_feature);
inv_sigma = inv(sigma);
data_mean = zeros(num_class, num_feature);
num_data_class = zeros(1, num_class);
for i = 1:num_class
% Convert the binary labels into 0 and 1
data = X_train(Y_train == class_set(i), :);
data_mean(i, :) = mean(data);
num_data_class(i) = size(data, 1);
if (QDA > 0),
sigma = (1 - RegFactor) * cov(data) + RegFactor * eye(num_feature);
inv_sigma = inv(sigma);
num_data_class(i) = num_data_class(i) / sqrt(det(sigma));
end;
end;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?