ex4-1 : neural networks
1. feedforward and cost function (without regularization)
nnCostFunction.m
function [J grad] = nnCostFunction(nn_params, ...
input_layer_size, ...
hidden_layer_size, ...
num_labels, ...
X, y, lambda)
Theta1 = reshape(nn_params(1:hidden_layer_size * (input_layer_size + 1)), ...
hidden_layer_size, (input_layer_size + 1));
Theta2 = reshape(nn_params((1 + (hidden_layer_size * (input_layer_size + 1))):end), ...
num_labels, (hidden_layer_size + 1));
% ====================== YOUR CODE HERE ======================
% Instructions: You should complete the code by working through the
% following parts.
%
% Part 1: Feedforward the neural network and return the cost in the
% variable J. After implementing Part 1, you can verify that your
% cost function computation is correct by verifying the cost
% computed in ex4.m
%
% Calculate h(x) through the forward propagation
X = [ones(m, 1) X]; % X: 5000x(400+1)
a2 = sigmoid(X * Theta1'); % theta1: 25x401
a2 = [ones(m, 1) a2]; % a2: 5000x(25+1)
a3 = sigmoid(a2 * Theta2'); % theta2: 10x26
% a3: 5000x10
% h(x) == a3
% a3의 하나의 row에서 1열 ~ 10(K)열은 각각이 class를 나타냄. 1 ~ 10까지
% 따라서 Y(5000x10) 의 각 열은 1부터 10으로 채워져야함 -> 아님.
% Generate Y: 오답
%Y = zeros(m, num_labels);
%for i=1:m
% for j=1:num_labels
% Y(:,j) = j;
% end
%end
% Generate Y: 정답
% 하나의 row는 하나의 y 벡터여야함. 결과값 y(10x1) 벡터는 하나만 1이고 나머지는 0임.
% Y는 y'한것을 m(row갯수) 만큼 이어붙임 '
I = eye(num_labels);
Y = zeros(m, num_labels);
for i=1:m
Y(i, :)= I(y(i), :);
end
% Cost function J1
pos = -Y .* log(a3); % Y: 5000x10, a3: 5000x10
% Y .* a3: 5000x10
neg = (1 - Y) .* log(1 - a3);
J = (1/m) * sum(sum(pos - neg,2),1);
2. regularized cost function
% Cost function J + regularization
% theta1: 25x401
% theta2: 10x26
Theta1Reg = Theta1(:,2:size(Theta1,2));
Theta2Reg = Theta2(:,2:size(Theta2,2));
Reg = sum(sum((Theta1Reg).^2, 2)) + sum(sum((Theta2Reg).^2, 2));
J = J + (lambda/(2*m))*Reg;