📄 iterdiscrim_apr.m
字号:
%compute the # of negative instances this feature against
for k = 1 : num_feature
%skip selected features and removed features
if feature_range(k) == 0 || feature_selected(k) == 1, continue; end;
idx = 0;
for i = 1 : num_train_bag
num_inst = size(train_bags(i).instance, 1);
for j = 1 : num_inst
if neg_excluded(idx + j) == 1, continue; end; %either positive instance or excluded negative instance
if dist_APR(idx + j, k) > margin || (dist_APR(idx+j, k) > 0 && dist_APR(idx+j, k) == max(dist_APR(idx+j,:)))
num_neg_against(k) = num_neg_against(k) + 1;
end
end
idx = idx + num_inst;
end
end
%find the most discriminative feature
[sort_ret, sort_idx] = sort(num_neg_against);
choice = sort_idx(length(sort_ret));
feature_selected(choice) = 1;
%update the list of remaining neg instances
idx = 0;
for i = 1 : num_train_bag
num_inst = size(train_bags(i).instance, 1);
for j = 1 : num_inst
if neg_excluded(idx + j) == 1, continue; end; %either positive instance or excluded negative instance
if dist_APR(idx + j, choice) > margin || (dist_APR(idx+j, choice) > 0 && dist_APR(idx+j, choice) == max(dist_APR(idx+j,:)))
neg_excluded(idx+j) = 1;
end
end
idx = idx + num_inst;
end
end
%if all feature are selected, the algorithm converges and returns
if all(feature_selected), break; end;
%update feature_range according to feature_selected
for k = 1 : num_feature
if feature_range(k) == 1 && feature_selected(k) == 0
feature_range(k) = 0;
end
end
end
fprintf('%d expands to ',APR_size(lb, ub, feature_range));
%expanding the APR using kernel density estimation
overall_lb = zeros(1, num_feature);
overall_ub = zeros(1, num_feature);
for k = 1 : num_feature;
for i = 1 : num_train_bag
overall_lb(k) = min(min(train_bags(i).instance(:,k)), overall_lb(k));
overall_ub(k) = max(max(train_bags(i).instance(:,k)), overall_ub(k));
end
end
overall_lb = overall_lb - 50;
overall_ub = overall_ub + 50;
grid_size = (overall_ub - overall_lb) ./ num_grid;
for k = 1 : num_feature;
idx = 1;
if feature_range(k) == 0, continue; end;
%choose the instances falling into the current APR for kernal estimation
for i = 1 : num_train_bag
if train_bags(i).label == 0, continue; end;
for j = 1 : size(train_bags(i).instance, 1)
if train_bags(i).instance(j,k) >= lb(k) && train_bags(i).instance(j,k) <= ub(k)
value(idx) = train_bags(i).instance(j, k);
idx = idx + 1;
end
end
end
grid = overall_lb(k) : grid_size(k) : overall_ub(k);
%calculate kernel width, which makes the inside probability with
%the current APR equal to the given inside probability
if inside_prob == 0.999
kernel_width = (ub(k) - lb(k)) / (2 * 3.291);
elseif inside_prob == 0.995
kernel_width = (ub(k) - lb(k)) / (2 * 2.807);
elseif inside_prob == 0.99
kernel_width = (ub(k) - lb(k)) / (2 * 2.576);
elseif inside_prob == 0.95
kernel_width = (ub(k) - lb(k)) / (2 * 1.960);
else
wrong = 1;
end
ds = ksdensity(value, grid, 'width', kernel_width);
%expand the APR according to the kernel estimation
accum_prob = 0;
lb_set = 0;
for i = 1 : length(ds) - 1
accum_prob = accum_prob + grid_size(k) * ((ds(i) + ds(i+1))/2);
if accum_prob > (outside_prob/2) && lb_set == 0
if lb(k) > grid(i), lb(k) = grid(i); end;
lb_set = 1;
end;
if accum_prob > (1 - outside_prob/2)
if ub(k) < grid(i), ub(k) = grid(i); end;
break;
end;
end
end
fprintf('%d.\n',APR_size(lb, ub, feature_range));
if (isfield(preprocess,'model_file') && ~isempty(preprocess.model_file))
%save the APR data if model_filename is provided
fid = fopen(preprocess.model_file, 'w');
if fid == -1, error('model file cannot be opened for writing!'); end;
fclose(fid);
save(preprocess.model_file, 'feature_range', 'lb', 'ub');
end;
end;
%prediction
idx = 0;
test_bag_label = zeros(num_test_bag, 1);
test_inst_label = zeros(num_test_inst, 1);
test_inst_prob = [];
for i = 1 : num_test_bag
num_inst = size(test_bags(i).instance, 1);
for j = 1 : num_inst
test_inst_label(idx + j) = (dist2APR(lb, ub, test_bags(i).instance(j, :), feature_range) == 0);
end
test_bag_prob(i) = sum(test_inst_label(idx+1 : idx+num_inst)) / num_inst;
test_bag_label(i) = any(test_inst_label(idx+1 : idx+num_inst));
idx = idx + num_inst;
end
function inst_choice = adjust_APR(bags, pos_bag_choice, pos_bag_repinst, feature_range, revisit_step)
num_step = length(pos_bag_choice);
idx = 1;
for i = 1 : num_step
if i ~= revisit_step
select_inst(idx, :) = bags(pos_bag_choice(i)).instance(pos_bag_repinst(i), :);
idx = idx + 1;
end
end
[lb, ub] = find_bounding_APR(select_inst, feature_range);
base_size = APR_size(lb, ub, feature_range);
num_inst_revisit = size(bags(pos_bag_choice(revisit_step)).instance, 1);
for i = 1 : num_inst_revisit
revisit_inst = bags(pos_bag_choice(revisit_step)).instance(i, :);
[lb, ub] = find_bounding_APR([select_inst; revisit_inst], feature_range);
size_increase(i) = APR_size(lb, ub, feature_range) - base_size;
end
[sort_ret, sort_idx] = sort(size_increase);
inst_choice = sort_idx(1);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [lb, ub] = find_bounding_APR(inst, feature_range)
[num_inst, num_feature] = size(inst);
if nargin < 2, feature_range = ones(1, num_feature); end;
ub = zeros(1, num_feature);
lb = zeros(1, num_feature);
for i = 1 : num_feature
if feature_range(i)
ub(i) = max(inst(:, i));
lb(i) = min(inst(:, i));
end
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function size = APR_size(lb, ub, feature_range)
num_feature = length(ub);
if nargin < 3, feature_range = ones(1, num_feature); end;
size = sum((ub - lb) .* feature_range);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function dist = dist2APR(lb, ub, inst, feature_range)
num_feature = length(ub);
if nargin < 4, feature_range = ones(1, num_feature); end;
dist = 0;
for i = 1:num_feature
if feature_range(i) == 0, continue; end;
if inst(i) < lb(i)
dist = dist + (lb(i) - inst(i));
elseif inst(i) > ub(i)
dist = dist + (inst(i) - ub(i)); end;
end
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -