2 views

I am trying to implement this algorithm to find the intercept and slope for a single variable:

ALGORITHM OF THE LINEAR REGRESSION

Here is my Python code to update the Intercept and slope. But it is not converging. RSS is Increasing with Iteration rather than decreasing and after some iteration, it's becoming infinite. I am not finding any error in implementing the algorithm. How Can I solve this problem? I have attached the csv file too. Here is the code.

import pandas as pd

import numpy as np

#This Function takes X value, Y value and vector of w0(intercept),w1(slope)

#INPUT FEATURES=X(sq.feet of house size)

#TARGET VALUE=Y (Price of House)

#W=np.array([w0,w1]).reshape(2,1)

#W=[w0,

#    w1]

intercept=W[0][0]

slope=W[1][0]

#Here i will get a list

#list is like this

#gd=[sum(predicted_value-(intercept+slope*x)),

#     sum(predicted_value-(intercept+slope*x)*x)]

gd=[sum(y-(intercept+slope*x) for x,y in zip(X,Y)),

sum(((y-(intercept+slope*x))*x) for x,y in zip(X,Y))]

return np.array(gd).reshape(2,1)

#Defining Resudual sum of squares

return sum((y-(W[0][0]+W[1][0]*x))**2 for x,y in zip(X,Y))

#Defining fixed parameters

#Learning Rate

n=0.0001

iteration=1500

#Intercept

w0=0

#Slope

w1=0

#Creating 2,1 vector of w0,w1 parameters

W=np.array([w0,w1]).reshape(2,1)

for i in range(iteration):

Here is the CSV file.

by (33.1k points)

Simple Linear Regression in Python:

Code:

import numpy as np

import pandas as pd

import math

from sys import stdout

def get_numpy_data(data, features, output):

#Adding a constant column with value 1 in the dataframe.

data['constant'] = 1

#Adding the name of the constant column in the feature list.

features = ['constant'] + features

#Creating Feature matrix(Selecting columns and converting to matrix).

features_matrix=data[features].as_matrix()

#Target column is converted to the numpy array

output_array=np.array(data[output])

return(features_matrix, output_array)

def predict_outcome(feature_matrix, weights):

weights=np.array(weights)

predictions = np.dot(feature_matrix, weights)

return predictions

def errors(output,predictions):

errors=predictions-output

return errors

def feature_derivative(errors, feature):

derivative=np.dot(2,np.dot(feature,errors))

return derivative

def regression_gradient_descent(feature_matrix, output, initial_weights, step_size, tolerance):

converged = False

#Initital weights are converted to numpy array

weights = np.array(initial_weights)

while not converged:

# compute the predictions based on feature_matrix and weights:

predictions=predict_outcome(feature_matrix,weights)

# compute the errors as predictions - output:

error=errors(output,predictions)

# while not converged, update each weight individually:

for i in range(len(weights)):

# Recall that feature_matrix[:, i] is the feature column associated with weights[i]

feature=feature_matrix[:, i]

# compute the derivative for weight[i]:

#predict=predict_outcome(feature,weights[i])

#err=errors(output,predict)

deriv=feature_derivative(error,feature)

# update the weight based on step size and derivative:

weights[i]=weights[i] - np.dot(step_size,deriv)

stdout.flush()

converged = True

return(weights)