Programming Exercise 2
Notification: This is a simplified code example, if you are attempting this class, don’t copy & submit since it won’t even work…
Plot function
1
2
3
4
5
6
7
8
9
10
11
12
13
|
function plotData(X, y)
figure; hold on;
pos = find(y==1); neg = find(y == 0);
plot(X(pos, 1), X(pos, 2), 'k+','LineWidth', 2, ...
'MarkerSize', 7);
plot(X(neg, 1), X(neg, 2), 'ko', 'MarkerFaceColor', 'y', ...
'MarkerSize', 7);
hold off;
end
|
Sigmiod function
1
2
3
4
5
6
7
8
9
10
11
12
13
|
function g = sigmoid(z)
g = zeros(size(z));
% Compute the sigmoid of each value of z (z can be a matrix,
% vector or scalar).
for row_iter = 1:size(z,1)
for col_iter = 1:size(z,2)
g(row_iter, col_iter) = 1/(1+e^(-z(row_iter,col_iter)));
end;
end
end
|
Cost function & gradient
1
2
3
4
5
6
|
function [J, grad] = costFunction(theta, X, y)
m = length(y);
J = (1/m)*sum(-y .* log(sigmoid(X*theta)) - (ones(m,1) - y) .* log(ones(m,1) - sigmoid(X*theta)));
grad = (X')*(sigmoid(X*theta) - y)*(1/m);
|
How gradient works:
$$ \begin{aligned} (\mathbf{X}^T) * (\mathbf{X} \mathbf{\theta} - \mathbf{y}) & = \begin{bmatrix}\mathbf{x_1} \\ \mathbf{x_2} \\ \cdots \\ \mathbf{x_m} \end{bmatrix} * \begin{bmatrix}h_\theta(\mathbf{x}^{(1)}) - y^{(1)} \\ h_\theta(\mathbf{x}^{(2)}) - y^{(2)} \\ \vdots \\ h_\theta(\mathbf{x}^{(m)}) - y^{(m)} \end{bmatrix} \\ & = \begin{bmatrix} \sum\limits_{i=1}^{m}((h_\theta(\mathbf{x}^{(i)}) - y^{(i)}) x_1^{(i)}) \\ \sum\limits_{i=1}^{m}((h_\theta(\mathbf{x}^{(i)}) - y^{(i)}) x_2^{(i)}) \\ \cdots \\ \sum\limits_{i=1}^{m}((h_\theta(\mathbf{x}^{(i)}) - y^{(i)}) x_i^{(i)}) \end{bmatrix} \end{aligned} $$
Cost function for regularzation
1
2
3
4
5
6
7
8
9
10
11
|
function [J, grad] = costFunctionReg(theta, X, y, lambda)
m = length(y);
n = size(theta,1);
J = (1/m)*sum(-y .* log(sigmoid(X*theta)) - (1 .- y) .* log(1 .- sigmoid(X*theta))) + (lambda/(2*m))*(theta'(2:n) * theta(2:n));
grad(1) = ((X')*(sigmoid(X*theta) - y)*(1/m))(1);
grad(2:n,:) = ((X')*(sigmoid(X*theta) - y)*(1/m) + (lambda/m)*theta)(2:n,:);
end
|
- Notice that the only difference from last section is the penalize term
- Notice we have to reinitialize $\theta_0$ both in J and grad
Prediction using the results
1
2
3
4
5
6
7
8
9
10
11
12
13
|
function p = predict(theta, X)
m = size(X, 1); % Number of training examples
p = sigmoid(X*theta);
for iter = 1:m
if (p(iter) >= 0.5)
p(iter) = 1;
else p(iter) = 0;
end;
end;
end
|
- Recall what X*theta is in the last section.