I wrote an implementation for multivariate linear regression in Python, for data in this link: http://www-bcf.usc.edu/~gareth/ISL/Advertising.csv
My main focus was to avoid loops as much as possible by using vectorization, and letting numpy's functions do all the work.
How can I improve my code for performance and enhanced vectorization?
This is my code:
import numpy as np
import matplotlib.pyplot as plt
# read data matrix
data = np.genfromtxt('Advertising.csv', delimiter=',',dtype=float, skiprows=1)
m, n = data.shape
X = data[:, 0:n-1]
Y = data[:, n-1].reshape((m, 1))
# add unity vector column with size m to the X matrix, to account for theta_0
ones = np.ones((m, 1))
X = np.hstack((ones, X))
iterations = 500 # gradient descent iterations count
alpha = 0.01 # learning rate
theta = np.random.rand(n, 1)
def plotData():
# get rid of X_0 constants column
features = X[:, 1: n]
plt.figure(1)
plt.subplot(131)
plt.plot(features[:, 0], Y, 'bo')
plt.title('Ad dollars spent on TV')
plt.ylabel('Sales')
plt.subplot(132)
plt.plot(features[:, 1], Y, 'ro')
plt.title('Ad dollars spent on Radio')
plt.ylabel('Sales')
plt.subplot(133)
plt.plot(features[:, 2], Y, 'yo')
plt.title('Ad dollars spent on Newspaper')
plt.ylabel('Sales')
plt.show()
def computeCost():
hypothesis = np.dot(X, theta);
delta = np.dot((hypothesis - Y).transpose(), (hypothesis - Y))
return (1 / m) * delta
# normalizeFeatures: scale-normalize all features to speed up gradient descent convergence
def normalizeFeatures():
# 1) generate Average vector mu, contains the average of each feature in the X matrix
# 2) generate Std. deviation vector sigma, contains the std. dev. of each feature in the X matrix
# 3) subtract average value and divide by the standard deviation, for each feature column
mu = np.ones((1, n))
sigma = np.ones((1, n))
# range() starts from 1 not 0, to skip the first all-ones constants column in the features matrix
for i in range(1,n):
mu[0][i] = np.mean(X[:, i])
sigma[0][i] = np.std(X[:, i])
X[:, i] = (X[:, i] - mu[0][i]) / sigma[0][i]
return mu, sigma;
# gradientDescent() calculates hypothesis equation coefficients using gradient descent algorithm
def gradientDescent(theta):
# vector to keep track of progression of cost function with each iteration
J_history = np.ones((iterations, 1))
for i in range(iterations):
delta = np.dot((np.dot(X,theta) - Y).transpose(), X).transpose()
theta -= (alpha/m) * delta
J_history[i, 0] = computeCost()
plt.plot(np.linspace(0, iterations, iterations), J_history)
plt.title('Cost function against number of iterations')
plt.xlabel('Number of iterations')
plt.ylabel('Cost function J(theta)')
plt.show()
return
# normalEquation() calculates hypothesis equation coefficients analytically
def normalEquation():
A = np.linalg.pinv(np.dot(X.transpose(), X))
B = np.dot(X.transpose(), Y)
theta = np.dot(A, B)
return theta
def predict(x_vector, mu, sigma):
# scale feature vector
for i in range(1, n):
x_vector[0][i] = (x_vector[0,i]- mu[0][i]) / sigma[0][i]
return np.dot(x_vector, theta)
if __name__ == '__main__':
plotData()
mu, sigma = normalizeFeatures()
gradientDescent(theta)
print('Hypothesis coefficients from gradient descent:\n {}'.format(theta))
print('Hypothesis coefficients from normal equation:\n {}'.format(normalEquation()))
prediction_vector = np.array([1, 40, 40, 48]).reshape(1,4)
print('Prediction for values [1, 40, 40, 48] is {}'.format(predict(prediction_vector, mu, sigma)))