
# heights.py

'''
https://matplotlib.org/stable/plot_types/stats/hist_plot.html#sphx-glr-plot-types-stats-hist-plot-py

https://www.kaggle.com/code/maksymkonevych/notebook31ff22e942/input?select=hs_heights_pair.csv

  Data on the heights of high school boys and girls is loaded from a local
  CSV file, and use to create two box plots. In addition, the means and 
  stdevs of the two groups are calculated and reported on the box plots
  as vertical lines -- a solid line for the mean, and two dashed lines
  for mean +/- stdev.
'''

import matplotlib.pyplot as plt 
from collections import defaultdict
import csv, math, statistics
from frange import *


def buildDist(fnm):
  columns = defaultdict(list) 
  with open(fnm) as f:
    reader = csv.DictReader(f) 
    for row in reader: 
      # read a row as {column1: value1, column2: value2,...}
      for (k,v) in row.items(): 
        columns[k].append(float(v)) 
  return columns

def stats(label, heights, col):
  # report the mean and stdev for the supplied heights data
  mean = statistics.mean(heights)
  stdev = statistics.stdev(heights)
  print("Statistics ("+ str(label) +")")
  print(f"Mean: {mean:.2f} ; stdev: {stdev:.2f}")
  minY, maxY = plt.ylim()
  plt.axvline(mean, color=col)
  plt.text(mean, maxY*0.9, f" {mean:.2f}")
  plt.axvline(mean+stdev, color=col, ls='dashed', lw=1)
  plt.axvline(mean-stdev, color=col, ls='dashed', lw=1)

# load data
columns =  buildDist('hs_heights_pair.csv')
boys = columns['boys']  # use CSV data column labels
girls = columns['girls']

# calculate a range for the box plots bins, and 
# split into boxes of width 0.5
minHeight = min([min(boys),min(girls)])
maxHeight = max([max(boys),max(girls)])
bins = list(frange(minHeight, maxHeight+0.5, 0.5))

plt.hist(boys, bins, label="Boys", alpha=0.7)
plt.hist(girls, bins, label="Girls", alpha=0.7)

# report the means and stdevs for the two groups
stats("Boys", boys, "blue")
stats("Girls", girls, "green")

plt.xlabel('Heights (cm)') 
plt.ylabel('No.') 
plt.title('High School Height') 
plt.legend() 
plt.show() 
