import numpy as np
from scipy.stats import norm, kstest, ks_2samp
from ss_setPlotParams import setPlotParams

def hw4q1_getdata(inflate_errors = 1.0):
    #The output uncertainties are inflated by a factor inflate_errors
    #read data
    data = np.loadtxt('Boyeretal2011_Cstars_KbandLF.csv', comments = '#', \
                      dtype = {'names' : ('k', 'dk'), 'formats': ('f4', 'f4')}, delimiter = ',')
    k = data['k']
    dk = data['dk']*inflate_errors
    data = 0.
    return k, dk

def hw4q1_getCB(k, dk, Niter, alpha):
    Ndata = len(k)
    #initialise the histogram bin edges
    h, binedges = np.histogram(k, bins = 40)
    Nbins = len(h)
    h = np.zeros([Niter, Nbins])
    ksamp = norm.rvs(loc = k, scale = dk, size = [Niter, Ndata]) #Realisations
    locmax = np.zeros(Niter) #location of the maximum of the KLF
    for i in range(Niter):
        htemp, binedgestemp = np.histogram(ksamp[i, :], bins = binedges)
        h[i, :] = htemp
        locmax[i] = binedges[np.argmax(htemp)]

    #Compute CI for the KLF maximu, and the CIs for each bin
    #   by retaining the central 100*(1-alpha)% of the points.
    frac = int(np.floor(alpha*Niter/2)) #fraction of points to be excluded on either side.
    #Compute the CI for the KLF maximum
    locmaxsorted = np.sort(locmax.copy())
    maxCI = locmaxsorted[frac:-frac]
    #Compute the CIs by retaining the central 95% of the points for each bin
    hsorted = np.sort(h.copy(), axis = 0)
    hCI = hsorted[frac:-frac, :] #This automatically removes frac points from the tail on either side.
    bincenters = (binedges[1:] + binedges[:-1])/2 #central locations of bins
    hCentral = hCI[int(np.floor(Niter/2)), :] #central values of samples for each bin
    eh = np.array([hCentral - hCI[0,:], hCI[-1,:] - hCentral]) #array with asymmetric error bars

    return maxCI, binedges, bincenters, hCentral, eh

def hw4q1_plot(maxCI, binedges, bincenters, hCentral, eh, outfile = 'KLF_original'):
    plt = setPlotParams()
    plt.figure(figsize = (8, 8))
    plt.plot(binedges[:-1], hCentral, label = 'Median', drawstyle = 'steps')
    binwidth = binedges[1] - binedges[0]
    plt.errorbar(bincenters - binwidth, hCentral, marker = 'o', linestyle = 'None', yerr = eh)
    maxmedian = np.median(maxCI)
    plt.plot([maxCI[0], maxCI[-1]], [np.max(hCentral), np.max(hCentral)], label = 'CI for maximum location')
    plt.plot(maxmedian, np.max(hCentral), 'ro', label = 'median of maximum location')
    plt.legend(loc = 'best')
    plt.savefig(outfile + '.png', bbox_inches = 'tight')

def hw4q1(Niter = 1000, alpha = 0.05):
    #first, generate CBs with original uncertainties from file.
    k, dk = hw4q1_getdata()
    maxCI, binedges, bincenters, hCentral, eh = hw4q1_getCB(k, dk, Niter, alpha)
    hw4q1_plot(maxCI, binedges, bincenters, hCentral, eh)
    #now, generate CBs with uncertainties inflated by 20x.
    k, dk = hw4q1_getdata(inflate_errors = 20)
    maxCI, binedges, bincenters, hCentral, eh = hw4q1_getCB(k, dk, Niter, alpha)
    hw4q1_plot(maxCI, binedges, bincenters, hCentral, eh, outfile = 'KLF_inflated_20x')
    #now, generate CBs with uncertainties inflated by 20x.
    k, dk = hw4q1_getdata(inflate_errors = 100)
    maxCI, binedges, bincenters, hCentral, eh = hw4q1_getCB(k, dk, Niter, alpha)
    hw4q1_plot(maxCI, binedges, bincenters, hCentral, eh, outfile = 'KLF_inflated_100x')

def hw4q2():
    #read data
    M31distmod = 24.9 #Distance modulus to M31 (mag)
    k1 = np.loadtxt('../Homeworks/MW_GCs.txt', comments = '#')
    k2 = np.loadtxt('../Homeworks/M31_GCs.txt', comments = '#')
    k2 = k2 - M31distmod

    ##The general version of the callable can be in the form of a lambda:
    ##  kstest(k1, lambda x: norm.cdf(x, loc = mu, scale = sigma))
    ##NOTE THAT PARAMETERS ESTIMATED FROM THE DATA MUST NOT BE USED TO GENERATE THE MODEL -- THE KS TEST IS INVALID IN THIS CASE.

    #Find D
    D, p = ks_2samp(k1, k2) #Should this be ECDF2?
    print("2-sample KS statistic for MW and M31 data: {}, with probability: {}."\
          .format(np.round(D, decimals = 3), np.round(p, decimals = 3)))
    if p <= 0.05:
        print("Null hypothesis rejected at 95% significance for MW and M31 samples.")
    else:
        print("Null hypothesis accepted at 95% significance for MW and M31 samples.")

    #Do t1 and t2 come from a standard normal?
    k1mean = k1.mean()
    k1std = k1.std()
    k2mean = k2.mean()
    k2std = k2.std()
    print("The mean and std for the MW sample are {} and {}.".format(np.round(k1mean, decimals = 3), np.round(k1std, decimals = 3)))
    print("The mean and std for the M31 sample are {} and {}.".format(np.round(k2mean, decimals = 3), np.round(k2std, decimals = 3)))
    t1 = (k1 - k1.mean())/k1.std(ddof = 1)
    t2 = (k2 - k2.mean())/k2.std(ddof = 1)
    D1, p1 = kstest(t1, 'norm')
    print("KS statistic for studentised version of MW magnitudes: {}, with probability: {}"\
          .format(np.round(D1, decimals = 3), np.round(p1, decimals = 3)))
    if p1 <= 0.05:
        print("Null hypothesis rejected at 95% significance for MW sample.")
    else:
        print("Null hypothesis accepted at 95% significance for MW sample.")
    D2, p2 = kstest(t2, 'norm')
    print("KS statistic for studentised version of M31 magnitudes: {}, with probability: {}"\
          .format(np.round(D2, decimals = 3), np.round(p2, decimals = 3)))
    if p2 <= 0.05:
        print("Null hypothesis rejected at 95% significance for M31 sample.")
    else:
        print("Null hypothesis accepted at 95% significance for M31 sample.")

    x = np.linspace(np.min(t1)*2, np.max(t1)*2, 1000) #grid to generate normal distribution
    fx = norm.pdf(x)
    #Plot the histograms for each set of t values 
    plt = setPlotParams()
    plt.figure(figsize = (8, 8))
    n, bins, patches = plt.hist(t1, label = 'MW', histtype = 'step')
    plt.plot(x, fx*np.max(n)/np.max(fx), label = 'Standard normal (scaled)')
    plt.legend(loc = 'best')
    plt.savefig('MW_GCs.png')
    plt.close()
    plt = setPlotParams()
    plt.figure(figsize = (8, 8))
    n, bins, patches = plt.hist(t2, label = 'M31', histtype = 'step')
    plt.plot(x, fx*np.max(n)/np.max(fx), label = 'Standard normal (scaled)')
    plt.legend(loc = 'best')
    plt.savefig('M31_GCs.png')