📄 j
字号:
% gradient checking for W1
delta = 1e-8;
num_dE_dW1 = zeros(size(W1));
for j = 1:length(W1(:));
new_W1 = W1;
new_W1(j) = new_W1(j)+delta;
new_X1 = tanh([X0 one]*new_W1);
new_X2 = tanh([new_X1 one]*W2);
new_diff = T - new_X2; % error
new_E = sum(sum(new_diff.^2));
num_dE_dW1(j) = (new_E-E)/delta;
end
% tmp = max(max(abs(dE_dW1-num_dE_dW1)));
error_leng = sqrt(sum((dE_dW1(:)-num_dE_dW1(:)).^2));
grad_leng = sqrt(sum((dE_dW1(:)).^2));
diff_W1(i) = error_leng/grad_leng;
fprintf('===> Length of W1 grad error = %g (%.2g%%)\n', ...
error_leng, error_leng/grad_leng*100);
% gradient checking for W2
num_dE_dW2= zeros(size(W2));
for j = 1:length(W2(:));
new_W2 = W2;
new_W2(j) = new_W2(j)+delta;
new_X1 = tanh([X0 one]*W1);
new_X2 = tanh([new_X1 one]*new_W2);
new_diff = T - new_X2; % error
new_E = sum(sum(new_diff.^2));
num_dE_dW2(j) = (new_E-E)/delta;
end
% tmp = max(max(abs(dE_dW2-num_dE_dW2)));
error_leng = sqrt(sum((dE_dW2(:)-num_dE_dW2(:)).^2));
grad_leng = sqrt(sum((dE_dW2(:)).^2));
diff_W2(i) = error_leng/grad_leng;
fprintf('===> Length of W2 grad error = %g (%.2g%%)\n', ...
error_leng, error_leng/grad_leng*100);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -