第一周
基础
$Warm\ Up\ Exercise$
— $warmUpExercise.m$
function A = warmUpExercise()
% ============= YOUR CODE HERE ==============
A = eye(5);
% ===========================================
end
$Plot\ Data$
— $plotData.m$
function plotData(x, y)
% ====================== YOUR CODE HERE ======================
figure(1);
plot(X, y, 'r');
xlabel("x");
ylabel("y");
% ===========================================================
end
$Compute\ Cost$
且代价函数直接套公式
$J(\theta)=\frac{1}{2m}\sum\limits_{i=1}^m(h_{\theta}(x^{(i)})-y^{(i)})^2$
— $computeCost.m$
function J = computeCost(X, y, theta)
m = length(y); % number of training examples
% You need to return the following variables correctly
J = 0;
% ====================== YOUR CODE HERE ======================
J = X * theta - y;
J = 1/(2*m) * sum(J .^ 2);
% =========================================================================
end
$Gradient\ Descent$
直接套公式即可
$Repeat \{ \\ \theta_j:=\theta_j-\alpha\frac{1}{m}\sum\limits_{i=1}^m(h_{\theta}(x^{(i)})-y^{(i)})x^{(i)}_j \\ \}$
— $gradientDescent.m$
function [theta, J_history] = gradientDescent(X, y, theta, alpha, num_iters)
m = length(y); % number of training examples
J_history = zeros(num_iters, 1);
for iter = 1:num_iters
% ====================== YOUR CODE HERE ======================
theta = theta - alpha/m * X' * (X*theta - y);
% ============================================================
% Save the cost J in every iteration
J_history(iter) = computeCost(X, y, theta);
end
end
附加
$Feature\ Normalize$
特征放缩,每个特征减去平均值并除以方差
— $featureNormalize.m$
function [X_norm, mu, sigma] = featureNormalize(X)
X_norm = X;
mu = zeros(1, size(X, 2));
sigma = zeros(1, size(X, 2));
% ====================== YOUR CODE HERE ======================
mu = mean(X);
sigma = std(X);
for i = 1:size(X, 1)
for j = 1:size(mu, 2)
X_norm(i,j) = (X_norm(i,j) - mu(1,j)) / sigma(1, j);
endfor
end
% ============================================================
end
$Compute\ CostMulti$
套公式
— $computeCostMulti.m$
function J = computeCostMulti(X, y, theta)
m = length(y); % number of training examples
J = 0;
% ====================== YOUR CODE HERE ======================
% You should set J to the cost.
J = 1 / (2 * m) * sum(((X * theta) - y) .^ 2);
% =========================================================================
end
$Gradient\ DescentMulti$
跟单个特征的算法类似,直接套公式即可
— $gradientDescentMulti.m$
function [theta, J_history] = gradientDescentMulti(X, y, theta, alpha, num_iters)
m = length(y); % number of training examples
J_history = zeros(num_iters, 1);
for iter = 1:num_iters
% ====================== YOUR CODE HERE ======================
theta = theta - alpha / m * (X' * (X * theta - y));
% ============================================================
% Save the cost J in every iteration
J_history(iter) = computeCostMulti(X, y, theta);
end
end
$Normal\ Equations$
直接套公式即可
$\theta=(X^TX)^{-1}X^Ty$
— $normalEqn.m$
function [theta] = normalEqn(X, y)
theta = zeros(size(X, 2), 1);
% ====================== YOUR CODE HERE ======================
theta = pinv((X'*X))*X'*y
% ============================================================
end
第二周
基础
$Sigmoid\ function$
$sigmoid$函数是$\frac{1}{1+e^{-x}}$
根据传进来的数据输出$sigmoid$
— $sigmoid.m$
function g = sigmoid(z)
%SIGMOID Compute sigmoid function
g = zeros(size(z));
% ====================== YOUR CODE HERE ======================
[m, n] = size(z);
for i = 1:m
for j = 1:n
g(i,j) = 1 / (1 + e ^ (-z(i, j)));
endfor
endfor
% =============================================================
end
$Cost\ function\ and\ gradient$
使用逻辑回归的代价函数公式
$J(\theta)=-\frac{1}{m}\sum\limits_{i=1}^m[y^{(i)}\log(h_{\theta}(x^{(i)}))+(1-y^{(i)})\log(1-h_{\theta}(x^{(i)}))]$ 直接求代价函数
关于代价函数的导数以后再证
$\frac{\partial J(\theta)}{\partial \theta_j}=\frac{1}{m}\sum\limits_{i=1}^m(h_{\theta}(x^{(i)})-y^{(i)})x^{(i)}_j$
— $costFunction.m$
function [J, grad] = costFunction(theta, X, y)
%COSTFUNCTION Compute cost and gradient for logistic regression
m = length(y);
J = 0;
grad = zeros(size(theta));
% ====================== YOUR CODE HERE ======================
h = sigmoid(X * theta);
J = -1 / m * (y' * log(h) + (1-y') * log(1-h));
grad = 1 / m * (X' * (h - y));
% =============================================================
end
$predict$
— $predict.m$
function p = predict(theta, X)
%PREDICT Predict whether the label is 0 or 1 using learned logistic
m = size(X, 1); % Number of training examples
p = zeros(m, 1);
% ====================== YOUR CODE HERE ======================
p = sigmoid(X * theta);
for i = 1:m
if p(i) >= 0.5
p(i) = 1;
else
p(i) = 0;
endif
end
% =========================================================================
end
附加
$Cost\ function\ and\ gradient$
代价函数:$J(\theta)=-\frac{1}{m}\sum\limits_{i=1}^m[y^{(i)}\log(h_{\theta}(x^{(i)}))+(1-y^{(i)})\log(1-h_{\theta}(x^{(i)}))]+\frac{\lambda}{2m}\sum\limits_{i=1}^n\theta_j^2$
导数:
$\frac{\partial J(\theta)}{\partial \theta_j}=\frac{1}{m}\sum\limits_{i=1}^m(h_{\theta}(x^{(i)})-y^{(i)})x^{(i)}_j \ for\ j = 0$
$\frac{\partial J(\theta)}{\partial \theta_j}=(\frac{1}{m}\sum\limits_{i=1}^m(h_{\theta}(x^{(i)})-y^{(i)})x^{(i)}_j)+\frac{\lambda}{m}\theta_j \ for\ j \ge 1$
— $costFuntionReg.m$
function [J, grad] = costFunctionReg(theta, X, y, lambda)
%COSTFUNCTIONREG Compute cost and gradient for logistic regression with regularization
m = length(y); % number of training examples
J = 0;
grad = zeros(size(theta));
% ====================== YOUR CODE HERE ======================
J = costFunction(theta, X, y) + lambda / (2 * m) * (sum(theta .^ 2) - theta(1) .^ 2);
h = sigmoid(X * theta);
grad = 1 / m * X' * (h - y) + lambda / m * theta;
grad(1) = 1 / m * (X' * (h - y))(1);
% =============================================================
end
第三周
$Vectorize\ Logistic\ Regression$
跟过拟合的代价函数类似,可以直接套
— $lrCostFunction.m$
function [J, grad] = lrCostFunction(theta, X, y, lambda)
%LRCOSTFUNCTION Compute cost and gradient for logistic
m = length(y);
J = 0;
grad = zeros(size(theta));
% ====================== YOUR CODE HERE ======================
h = sigmoid(X * theta);
J = -1 / m * (y' * log(h) + (1-y')*(log(1-h))) + lambda / (2 * m) * (sum(theta .^ 2) - theta(1) .^ 2);
grad = 1 / m * X' * (h - y) + lambda / m * theta;
grad(1) = 1 / m * (X' * (h - y))(1);
% =============================================================
grad = grad(:);
end
$One-vs-All\ Training$
利用$fmincg$来计算$all_theta$ ,$fmincg$计算训练集对每个数字算出的参数,把参数加入$all_theta$ 即可
— $oneVsAll.m$
function [all_theta] = oneVsAll(X, y, num_labels, lambda)
m = size(X, 1);
n = size(X, 2);
all_theta = zeros(num_labels, n + 1);
X = [ones(m, 1) X];
% ====================== YOUR CODE HERE ======================
initial_theta = zeros(n + 1, 1);
options = optimset('GradObj', 'on', 'MaxIter', 50);
for i = 1:num_labels
all_theta(i,:) = fmincg(@(t)(lrCostFunction(t, X, (y==i), lambda)), initial_theta, options);
endfor
% ========================================================================
end
$Predict\ for\ One-Vs-All$
用$X\times \theta^T$来计算每个样例对每个数字的几率
$[m\times(n+1)]\times[10\times(n+1)]^T$
利用max来得到最大几率的索引
— $predictOneVsAll.m$
function p = predictOneVsAll(all_theta, X)
m = size(X, 1);
num_labels = size(all_theta, 1);
p = zeros(size(X, 1), 1);
X = [ones(m, 1) X];
% ====================== YOUR CODE HERE ======================
all_p = sigmoid(X * all_theta');
[A, p] = max(all_p, [], 2);
% =========================================================================
end
$Implement\ Predict$
题目给出了$4000\to 25\to10$的神经网络,输入层400个参数,隐藏层25个,输出层10个,代表这个样例是某个数字的可能性
题目给出了参数$\Theta_1$和$\Theta_2$,我们只需要使用即可
— $predict.m$
function p = predict(Theta1, Theta2, X)
m = size(X, 1);
num_labels = size(Theta2, 1);
p = zeros(size(X, 1), 1);
% ====================== YOUR CODE HERE ======================
X = [ones(m, 1) X];
hide_layer = sigmoid(X * Theta1');
hide_layer = [ones(m, 1) hide_layer];
out_layer = sigmoid(hide_layer * Theta2');
[A, p] = max(out_layer, [], 2);
% =========================================================================
end