import numpy as np from ss_setPlotParams import setPlotParams from scipy.stats import norm def scientific_tex(x, precision = 2): #Converts a float number into scientific notation in TeX format # using np.format_float_scientific if len(x) == 1: #No uncertainty provided r = np.format_float_scientific(x, precision = precision) ra = r[:r.index('e')]; rb = int(r[r.index('e') + 1:]) #number and exponent if rb == 0: return ra else: return ra + r"\times 10^{" + "{}".format(rb) + r"}" else: #Uncertainty provided r = np.format_float_scientific(x[0], precision = precision) ra = r[:r.index('e')]; rb = int(r[r.index('e') + 1:]) #number and exponent rx1 = np.round(x[1]/10**rb, decimals = precision) return r"(" + ra + r"\pm" + "{}".format(rx1) + r")\times 10^{" + "{}".format(rb) + r"}" def getdata(censored = True): if censored: data = np.loadtxt('Hoggetal2010_Table1.dat', comments = '#', skiprows = 5, \ dtype = {'names': ('id', 'x', 'y', 'sy', 'sx', 'rhoxy'), \ 'formats': ('i4', 'f4', 'f4', 'f4', 'f4', 'f4')}) else: data = np.loadtxt('Hoggetal2010_Table1.dat', comments = '#', \ dtype = {'names': ('id', 'x', 'y', 'sy', 'sx', 'rhoxy'), \ 'formats': ('i4', 'f4', 'f4', 'f4', 'f4', 'f4')}) return data def fit(data, quadratic = False): #Invert the data matrices to get the best-fit parameter values and uncertainties ### Define A and Y matrices and the inverse of the covariance matrix if quadratic: A = np.array([np.ones(len(data)), data['x'], data['x']**2]).transpose() else: A = np.array([np.ones(len(data)), data['x']]).transpose() Y = np.array(data['y'])[:, np.newaxis] Sigma_1 = np.linalg.inv(np.diag(data['sy']**2)) ### Compute best-fit parameters and parameter covariance matrix Temp = A.transpose().dot(Sigma_1) Sigma_Pars = np.linalg.inv(Temp.dot(A)) X = Sigma_Pars.dot(Temp.dot(Y)) return A, Y, X, Sigma_Pars def hw8q1(): data = getdata(censored = True) A, Y, X, Sigma_Pars = fit(data) b = X[0][0] m = X[1][0] sigma_b = np.sqrt(Sigma_Pars[0][0]) sigma_m = np.sqrt(Sigma_Pars[1][1]) rho_bm = Sigma_Pars[0][1]/(sigma_b*sigma_m) print("-----------------") print("Best-fit intercept: {} ± {}".\ format(np.round(b, decimals = 2), np.round(sigma_b, decimals = 2))) print("Best-fit slope: {} ± {}".\ format(np.round(m, decimals = 2), np.round(sigma_m, decimals = 2))) print("Correlation coefficient rho_bm = {}".\ format(np.round(rho_bm, decimals = 2))) print("-----------------") #plot plt = setPlotParams() plt.figure(figsize = (8, 8)) plt.xlabel(r'$x$') plt.ylabel(r'$y$') plt.xlim(0, 300) plt.ylim(0, 700) plt.errorbar(data['x'], data['y'], yerr = data['sy'], marker = "o", \ markerfacecolor = "black", markeredgecolor = "black", markersize = 8, \ linestyle = "", ecolor = "black", \ label = "Points 5--20 from Hogg et al. 2010 Table 1") ## overlay model plt.plot(data['x'], A.dot(X), label = "Best-fit linear model") plt.legend(loc = 'best') string = r"$y = (" + "{}".format(np.round(m, decimals = 2)) + r" \pm " + \ "{}".format(np.round(sigma_m, decimals = 2)) + r")x + (" + \ "{}".format(np.round(b, decimals = 2)) + r" \pm " + \ "{}".format(np.round(sigma_b, decimals = 2)) + r")$" plt.text(150, 100, string, fontsize = 15, \ bbox = dict(ec = 'k', fc = 'w', alpha = 0.9), \ ha = 'center', va = 'center') plt.savefig('hw8q1.png', bbox_inches = 'tight') def hw8q2(): data = getdata(censored = False) #This gets all the data rows A, Y, X, Sigma_Pars = fit(data) b = X[0][0] m = X[1][0] sigma_b = np.sqrt(Sigma_Pars[0][0]) sigma_m = np.sqrt(Sigma_Pars[1][1]) rho_bm = Sigma_Pars[0][1]/(sigma_b*sigma_m) print("-----------------") print("Best-fit intercept: {} ± {}".\ format(np.round(b, decimals = 2), np.round(sigma_b, decimals = 2))) print("Best-fit slope: {} ± {}".\ format(np.round(m, decimals = 2), np.round(sigma_m, decimals = 2))) print("Correlation coefficient rho_bm = {}".\ format(np.round(rho_bm, decimals = 2))) print("-----------------") #plot plt = setPlotParams() plt.figure(figsize = (8, 8)) plt.xlabel(r'$x$') plt.ylabel(r'$y$') plt.xlim(0, 300) plt.ylim(0, 700) plt.errorbar(data['x'], data['y'], yerr = data['sy'], marker = "o", \ markerfacecolor = "black", markeredgecolor = "black", markersize = 8, \ linestyle = "", ecolor = "black", \ label = "Points 1--20 from Hogg et al. 2010 Table 1") ## overlay model plt.plot(data['x'], A.dot(X), label = "Best-fit linear model") plt.legend(loc = 'best') string = r"$y = (" + "{}".format(np.round(m, decimals = 2)) + r" \pm " + \ "{}".format(np.round(sigma_m, decimals = 2)) + r")x + (" + \ "{}".format(np.round(b, decimals = 2)) + r" \pm " + \ "{}".format(np.round(sigma_b, decimals = 2)) + r")$" plt.text(150, 100, string, fontsize = 15, \ bbox = dict(ec = 'k', fc = 'w', alpha = 0.9), \ ha = 'center', va = 'center') plt.savefig('hw8q2.png', bbox_inches = 'tight') def hw8q3(): data = getdata(censored = True) A, Y, X, Sigma_Pars = fit(data, quadratic = True) b = X[0][0] m = X[1][0] q = X[2][0] sigma_b = np.sqrt(Sigma_Pars[0][0]) sigma_m = np.sqrt(Sigma_Pars[1][1]) sigma_q = np.sqrt(Sigma_Pars[2][2]) rho_bm = Sigma_Pars[0][1]/(sigma_b*sigma_m) rho_bq = Sigma_Pars[0][2]/(sigma_b*sigma_q) rho_mq = Sigma_Pars[1][2]/(sigma_m*sigma_q) print("-----------------") print("Best-fit constant term b: {} ± {}".\ format(np.round(b, decimals = 2), np.round(sigma_b, decimals = 2))) print("Best-fit linear term m: {} ± {}".\ format(np.round(m, decimals = 2), np.round(sigma_m, decimals = 2))) print("Best-fit quadratic term q: " + scientific_tex(np.array([q, sigma_q]), precision = 3)) print("Correlation coefficient rho_bm = {}".\ format(np.round(rho_bm, decimals = 2))) print("Correlation coefficient rho_bq = {}".\ format(np.round(rho_bq, decimals = 2))) print("Correlation coefficient rho_mq = {}".\ format(np.round(rho_mq, decimals = 2))) print("-----------------") #plot plt = setPlotParams() plt.figure(figsize = (8, 8)) plt.xlabel(r'$x$') plt.ylabel(r'$y$') plt.xlim(0, 300) plt.ylim(0, 700) plt.errorbar(data['x'], data['y'], yerr = data['sy'], marker = "o", \ markerfacecolor = "black", markeredgecolor = "black", markersize = 8, \ linestyle = "", ecolor = "black", \ label = "Points 5--20 from Hogg et al. 2010 Table 1") ## overlay model AC = A.copy() AC.sort(axis = 0) xc = data['x'].copy() xc.sort() plt.plot(xc, AC.dot(X), label = "Best-fit quadratic model") plt.legend(loc = 'best') string = r"$y = " + scientific_tex(np.array([q, sigma_q]), precision = 1) + r"x^2 + (" + \ "{}".format(np.round(m, decimals = 2)) + r" \pm " + \ "{}".format(np.round(sigma_m, decimals = 2)) + r")x + (" + \ "{}".format(np.round(b, decimals = 2)) + r" \pm " + \ "{}".format(np.round(sigma_b, decimals = 2)) + r")$" plt.text(150, 100, string, fontsize = 15, \ bbox = dict(ec = 'k', fc = 'w', alpha = 0.9), \ ha = 'center', va = 'center') plt.savefig('hw8q3.png', bbox_inches = 'tight') def hw8q4(): data_orig = getdata(censored = False) #Get all the points Ndata = len(data_orig) #First, fit full dataset and get best-fit parameter values. A, Y, X, Sigma_Pars = fit(data_orig) b = X[0][0] m = X[1][0] #Now, bootstrap. B = 1000 #Number of bootstrap resamples bb = np.zeros(B) mm = np.zeros(B) index = np.arange(Ndata) for i in range(B): j = np.random.choice(index, Ndata, replace = True) data_boot = data_orig[j] A, Y, X, Sigma_Pars = fit(data_boot) bb[i] = X[0][0] mm[i] = X[1][0] sigma_b = ((bb - b)**2).mean() sigma_m = ((mm - m)**2).mean() print("-----------------") print("Best-fit intercept: {}".format(np.round(b, decimals = 3))) print("Variance in intercept: {}".format(np.round(sigma_b**2, decimals = 3))) print("Best-fit slope: {}".format(np.round(m, decimals = 3))) print("Variance in slope: {}".format(np.round(sigma_m**2, decimals = 3))) print("-----------------")