
# quadReg.py

import math, random
import matplotlib.pylab as plt
from gaussian import *


def quadFn(a, b, c, x):
  return a*x**2 + b*x + c


def estimateCoefs(xs, ys):
  # estimating coefficients in y = ax^2 + bx + c
  n = len(ys)
  sumX = sum(xs)
  sumY = sum(ys)
  sumXY = sum([x*y for (x,y) in zip(xs,ys)] )
  sumX2 = sum([x**2 for x in xs])
  sumX2Y = sum([x**2*y for (x,y) in zip(xs,ys)])
  sumX3 = sum([x**3 for x in xs])
  sumX4 = sum([x**4 for x in xs])

  row1 = [n,     sumX,  sumX2, sumY]
  row2 = [sumX,  sumX2, sumX3, sumXY]
  row3 = [sumX2, sumX3, sumX4, sumX2Y]
  xs = gaussian([row1, row2, row3])
  return xs[2], xs[1], xs[0]   # A, B, C


def calcCorrCoef(ys, yPreds):
  # calculate correlation coef
  n = len(ys)
  my = sum(ys)/n
  rss = sum([(y - yP)**2 for (y,yP) in zip(ys,yPreds)])
    # residual sum of squares
  mspread = sum( [(y-my)**2 for y in ys])
    # spread of model around the mean
  return math.sqrt((mspread-rss)/mspread)


def plotReg(xs, ys, yPreds, equStr):
  plt.scatter(xs, ys, color = "m", 
               marker = "o", s = 30, label="data")
  plt.plot(xs, yPreds, color = "g", label=equStr)
  plt.xlabel('x')
  plt.ylabel('y')
  plt.legend()
  plt.show()


# observations / data
# xs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
# ys = [1, 3, 2, 5, 7, 8, 8, 9, 10, 12]

xs = [1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5]
ys = [0,  5,  8, 11,  14, 15, 16, 17, 17]

A, B, C = estimateCoefs(xs, ys)
print(f"Est. quad coefs: {A:.3f} {B:.3f} {C:.3f}")

yPreds = [ quadFn(A,B,C,x) for x in xs]

r = calcCorrCoef(ys, yPreds)
print(f"Corr. coef:  {r:.3f}")
equStr = f"y={A:.3f}x^2 + {B:.3f}x + {C:.3f} (r={r:.3f})"
plotReg(xs, ys, yPreds, equStr)