function [theta, J_history] = gradientDescent(X, y, theta, alpha, num_iters)
%GRADIENTDESCENT Performs gradient descent to learn theta
% theta = GRADIENTDESENT(X, y, theta, alpha, num_iters) updates theta by
% taking num_iters gradient steps with learning rate alpha
% Initialize some useful values
m = length(y); % number of training examples
m2 = length(theta); % number of theta
J_history = zeros(num_iters, 1);
for iter = 1:num_iters
delta = 0;
temp = theta;
for iter2 = 1:m2
for iter1 = 1:m
delta = delta + (X(iter1, :) * theta - y(iter1)) * X(iter1, iter2);
end
delta = delta / m;
temp(iter2, 1) = temp(iter2, 1) - alpha * delta;
end
theta = temp;
% ====================== YOUR CODE HERE ======================
% Instructions: Perform a single gradient step on the parameter vector
% theta.
%
% Hint: While debugging, it can be useful to print out the values
% of the cost function (computeCost) and gradient here.
%
% ============================================================
% Save the cost J in every iteration
J_history(iter) = computeCost(X, y, theta);
end