
# linReg.py
# Least Squares Fit


import math
import matplotlib.pyplot as plt


def estimateCoefs(xs, ys):
  # estimating coefficients in y = b + mx
  n = len(xs)
  sumX = sum(xs)
  sumY = sum(ys)
  sumXY = sum([x*y for (x,y) in zip(xs,ys)])
  sumX2 = sum([x**2 for x in xs])
  m = (n*sumXY  - sumX*sumY)/(n*sumX2 - sumX**2)
  b = (sumY - m*sumX)/n  
  return b, m 


def calcCorrCoef(ys, yPreds):
  n = len(ys)
  my = sum(ys)/n
  rss = sum([(y - yP)**2 for (y,yP) in zip(ys,yPreds)])
    # residual sum of squares
  mspread = sum( [(y-my)**2 for y in ys])
    # spread of model around the mean
  return math.sqrt((mspread-rss)/mspread)


def plotReg(xs, ys, yPreds, equStr):
  plt.scatter(xs, ys, color = "m", 
               marker = "o", s = 30, label="data")
  plt.plot(xs, yPreds, color = "g", label=equStr)
  plt.xlabel('x')
  plt.ylabel('y')
  plt.legend()
  plt.show()


# data
xs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
ys = [1, 3, 2, 5, 7, 8, 8, 9, 10, 12]

# xs = [0,   1,   2,   2.5, 3]
# ys = [2.9, 3.7, 4.1, 4.4, 5]

# xs = [1, 2, 3, 4, 5, 6, 7]
# ys = [0.5, 2.5, 2.0, 4.0, 3.5, 6.0, 5.5]

b, m = estimateCoefs(xs, ys)
print(f"Est. line coefs: b = {b:.3f}, m = {m:.3f}")

yPreds = [b + m*x for x in xs]

r = calcCorrCoef(ys, yPreds)
print(f"Corr. coef:  {r:.3f}")
equStr = f"y={b:.3f} + {m:.3f}x  (r={r:.3f})"
plotReg(xs, ys, yPreds, equStr)