import csv
import os
import numpy as np
from sklearn.linear_model import LinearRegression


DATA_FILE = 'weights.csv'
PATH = os.path.dirname(os.path.abspath(__file__))

# Read the data file
with open(os.path.join(PATH, DATA_FILE), 'r') as f:
    reader = csv.reader(f)
    data = list(reader)

# Remove the header
data = data[1:]

# Compute the input features, in this case, just height
inputs = [
        [
            float(height)
        ]
        for (height, weight) in data
    ]

# Compute the targets, in this case, just weight
targets = [
        float(weight)
        for (height, weight) in data
    ]

# Define the model
model = LinearRegression()

# Set the model parameters
model.coef_ = np.array([1.0])
model.intercept_ = 0.0

# Print the model parameters
print('Model parameters:')
print(f' a: {model.coef_[0]}')
print(f' b: {model.intercept_}')

# Compute standard deviation
predictions = model.predict(inputs)
errors = np.array(predictions) - np.array(targets)
print('Mean squared error: %f' % np.mean(errors**2))
print('Standard deviation: %f' % np.std(errors))

# Plot the data
import matplotlib.pyplot as plt
plt.scatter(inputs, targets, s=10)
plt.xlabel('Height')
plt.ylabel('Weight')

plt.plot(inputs, predictions, color='r')

plt.show()