import numpy as np
from ss_setPlotParams import setPlotParams
from scipy.stats import norm

def scientific_tex(x, precision = 2):
    #Converts a float number into scientific notation in TeX format
    #   using np.format_float_scientific
    if len(x) == 1:
        #No uncertainty provided
        r = np.format_float_scientific(x, precision = precision)
        ra = r[:r.index('e')]; rb = int(r[r.index('e') + 1:]) #number and exponent
        if rb == 0:
            return ra
        else:
            return ra + r"\times 10^{" + "{}".format(rb) + r"}"
    else:
        #Uncertainty provided
        r = np.format_float_scientific(x[0], precision = precision)
        ra = r[:r.index('e')]; rb = int(r[r.index('e') + 1:]) #number and exponent
        rx1 = np.round(x[1]/10**rb, decimals = precision)
        return r"(" + ra + r"\pm" + "{}".format(rx1) + r")\times 10^{" + "{}".format(rb) + r"}"

def getdata(censored = True):
    if censored:
        data = np.loadtxt('Hoggetal2010_Table1.dat', comments = '#', skiprows = 5, \
                          dtype = {'names': ('id', 'x', 'y', 'sy', 'sx', 'rhoxy'), \
                                   'formats': ('i4', 'f4', 'f4', 'f4', 'f4', 'f4')})
    else:
        data = np.loadtxt('Hoggetal2010_Table1.dat', comments = '#', \
                          dtype = {'names': ('id', 'x', 'y', 'sy', 'sx', 'rhoxy'), \
                                   'formats': ('i4', 'f4', 'f4', 'f4', 'f4', 'f4')})
    return data

def fit(data, quadratic = False):
    #Invert the data matrices to get the best-fit parameter values and uncertainties
    ### Define A and Y matrices and the inverse of the covariance matrix
    if quadratic:
        A = np.array([np.ones(len(data)), data['x'], data['x']**2]).transpose()
    else:
        A = np.array([np.ones(len(data)), data['x']]).transpose()
    Y = np.array(data['y'])[:, np.newaxis]
    Sigma_1 = np.linalg.inv(np.diag(data['sy']**2))
    ### Compute best-fit parameters and parameter covariance matrix
    Temp = A.transpose().dot(Sigma_1)
    Sigma_Pars = np.linalg.inv(Temp.dot(A))
    X = Sigma_Pars.dot(Temp.dot(Y))
    return A, Y, X, Sigma_Pars

def hw8q1():
    data = getdata(censored = True)
    A, Y, X, Sigma_Pars = fit(data)
    b = X[0][0]
    m = X[1][0]
    sigma_b = np.sqrt(Sigma_Pars[0][0])
    sigma_m = np.sqrt(Sigma_Pars[1][1])
    rho_bm = Sigma_Pars[0][1]/(sigma_b*sigma_m)
    print("-----------------")
    print("Best-fit intercept: {} ± {}".\
          format(np.round(b, decimals = 2), np.round(sigma_b, decimals = 2)))
    print("Best-fit slope: {} ± {}".\
          format(np.round(m, decimals = 2), np.round(sigma_m, decimals = 2)))
    print("Correlation coefficient rho_bm = {}".\
          format(np.round(rho_bm, decimals = 2)))
    print("-----------------")
    #plot
    plt = setPlotParams()
    plt.figure(figsize = (8, 8))
    plt.xlabel(r'$x$')
    plt.ylabel(r'$y$')
    plt.xlim(0, 300)
    plt.ylim(0, 700)
    plt.errorbar(data['x'], data['y'], yerr = data['sy'], marker = "o", \
                 markerfacecolor = "black", markeredgecolor = "black", markersize = 8, \
                 linestyle = "", ecolor = "black", \
                 label = "Points 5--20 from Hogg et al. 2010 Table 1")
    ## overlay model
    plt.plot(data['x'], A.dot(X), label = "Best-fit linear model")
    plt.legend(loc = 'best')
    string = r"$y = (" + "{}".format(np.round(m, decimals = 2)) + r" \pm " + \
        "{}".format(np.round(sigma_m, decimals = 2)) + r")x + (" + \
        "{}".format(np.round(b, decimals = 2)) + r" \pm " + \
        "{}".format(np.round(sigma_b, decimals = 2)) + r")$"
    plt.text(150, 100, string, fontsize = 15, \
             bbox = dict(ec = 'k', fc = 'w', alpha = 0.9), \
             ha = 'center', va = 'center')
    plt.savefig('hw8q1.png', bbox_inches = 'tight')

def hw8q2():
    data = getdata(censored = False) #This gets all the data rows
    A, Y, X, Sigma_Pars = fit(data)
    b = X[0][0]
    m = X[1][0]
    sigma_b = np.sqrt(Sigma_Pars[0][0])
    sigma_m = np.sqrt(Sigma_Pars[1][1])
    rho_bm = Sigma_Pars[0][1]/(sigma_b*sigma_m)
    print("-----------------")
    print("Best-fit intercept: {} ± {}".\
          format(np.round(b, decimals = 2), np.round(sigma_b, decimals = 2)))
    print("Best-fit slope: {} ± {}".\
          format(np.round(m, decimals = 2), np.round(sigma_m, decimals = 2)))
    print("Correlation coefficient rho_bm = {}".\
          format(np.round(rho_bm, decimals = 2)))
    print("-----------------")
    #plot
    plt = setPlotParams()
    plt.figure(figsize = (8, 8))
    plt.xlabel(r'$x$')
    plt.ylabel(r'$y$')
    plt.xlim(0, 300)
    plt.ylim(0, 700)
    plt.errorbar(data['x'], data['y'], yerr = data['sy'], marker = "o", \
                 markerfacecolor = "black", markeredgecolor = "black", markersize = 8, \
                 linestyle = "", ecolor = "black", \
                 label = "Points 1--20 from Hogg et al. 2010 Table 1")
    ## overlay model
    plt.plot(data['x'], A.dot(X), label = "Best-fit linear model")
    plt.legend(loc = 'best')
    string = r"$y = (" + "{}".format(np.round(m, decimals = 2)) + r" \pm " + \
        "{}".format(np.round(sigma_m, decimals = 2)) + r")x + (" + \
        "{}".format(np.round(b, decimals = 2)) + r" \pm " + \
        "{}".format(np.round(sigma_b, decimals = 2)) + r")$"
    plt.text(150, 100, string, fontsize = 15, \
             bbox = dict(ec = 'k', fc = 'w', alpha = 0.9), \
             ha = 'center', va = 'center')
    plt.savefig('hw8q2.png', bbox_inches = 'tight')

def hw8q3():
    data = getdata(censored = True)
    A, Y, X, Sigma_Pars = fit(data, quadratic = True)
    b = X[0][0]
    m = X[1][0]
    q = X[2][0]
    sigma_b = np.sqrt(Sigma_Pars[0][0])
    sigma_m = np.sqrt(Sigma_Pars[1][1])
    sigma_q = np.sqrt(Sigma_Pars[2][2])
    rho_bm = Sigma_Pars[0][1]/(sigma_b*sigma_m)
    rho_bq = Sigma_Pars[0][2]/(sigma_b*sigma_q)
    rho_mq = Sigma_Pars[1][2]/(sigma_m*sigma_q)
    print("-----------------")
    print("Best-fit constant term b: {} ± {}".\
          format(np.round(b, decimals = 2), np.round(sigma_b, decimals = 2)))
    print("Best-fit linear term m: {} ± {}".\
          format(np.round(m, decimals = 2), np.round(sigma_m, decimals = 2)))
    print("Best-fit quadratic term q: " + scientific_tex(np.array([q, sigma_q]), precision = 3))
    print("Correlation coefficient rho_bm = {}".\
          format(np.round(rho_bm, decimals = 2)))
    print("Correlation coefficient rho_bq = {}".\
          format(np.round(rho_bq, decimals = 2)))
    print("Correlation coefficient rho_mq = {}".\
          format(np.round(rho_mq, decimals = 2)))
    print("-----------------")
    #plot
    plt = setPlotParams()
    plt.figure(figsize = (8, 8))
    plt.xlabel(r'$x$')
    plt.ylabel(r'$y$')
    plt.xlim(0, 300)
    plt.ylim(0, 700)
    plt.errorbar(data['x'], data['y'], yerr = data['sy'], marker = "o", \
                 markerfacecolor = "black", markeredgecolor = "black", markersize = 8, \
                 linestyle = "", ecolor = "black", \
                 label = "Points 5--20 from Hogg et al. 2010 Table 1")
    ## overlay model
    AC = A.copy()
    AC.sort(axis = 0)
    xc = data['x'].copy()
    xc.sort()
    plt.plot(xc, AC.dot(X), label = "Best-fit quadratic model")
    plt.legend(loc = 'best')
    string = r"$y = " + scientific_tex(np.array([q, sigma_q]), precision = 1) + r"x^2 + (" + \
        "{}".format(np.round(m, decimals = 2)) + r" \pm " + \
        "{}".format(np.round(sigma_m, decimals = 2)) + r")x + (" + \
        "{}".format(np.round(b, decimals = 2)) + r" \pm " + \
        "{}".format(np.round(sigma_b, decimals = 2)) + r")$"
    plt.text(150, 100, string, fontsize = 15, \
             bbox = dict(ec = 'k', fc = 'w', alpha = 0.9), \
             ha = 'center', va = 'center')
    plt.savefig('hw8q3.png', bbox_inches = 'tight')

def hw8q4():
    data_orig = getdata(censored = False) #Get all the points
    Ndata = len(data_orig)
    #First, fit full dataset and get best-fit parameter values.
    A, Y, X, Sigma_Pars = fit(data_orig)
    b = X[0][0]
    m = X[1][0]
    #Now, bootstrap.
    B = 1000 #Number of bootstrap resamples
    bb = np.zeros(B)
    mm = np.zeros(B)
    index = np.arange(Ndata)
    for i in range(B):
        j = np.random.choice(index, Ndata, replace = True)
        data_boot = data_orig[j]
        A, Y, X, Sigma_Pars = fit(data_boot)
        bb[i] = X[0][0]
        mm[i] = X[1][0]
    sigma_b = ((bb - b)**2).mean()
    sigma_m = ((mm - m)**2).mean()
    print("-----------------")
    print("Best-fit intercept: {}".format(np.round(b, decimals = 3)))
    print("Variance in intercept: {}".format(np.round(sigma_b**2, decimals = 3)))
    print("Best-fit slope: {}".format(np.round(m, decimals = 3)))
    print("Variance in slope: {}".format(np.round(sigma_m**2, decimals = 3)))
    print("-----------------")