import numpy as np from scipy.stats import norm, kstest, ks_2samp from ss_setPlotParams import setPlotParams def hw4q1_getdata(inflate_errors = 1.0): #The output uncertainties are inflated by a factor inflate_errors #read data data = np.loadtxt('Boyeretal2011_Cstars_KbandLF.csv', comments = '#', \ dtype = {'names' : ('k', 'dk'), 'formats': ('f4', 'f4')}, delimiter = ',') k = data['k'] dk = data['dk']*inflate_errors data = 0. return k, dk def hw4q1_getCB(k, dk, Niter, alpha): Ndata = len(k) #initialise the histogram bin edges h, binedges = np.histogram(k, bins = 40) Nbins = len(h) h = np.zeros([Niter, Nbins]) ksamp = norm.rvs(loc = k, scale = dk, size = [Niter, Ndata]) #Realisations locmax = np.zeros(Niter) #location of the maximum of the KLF for i in range(Niter): htemp, binedgestemp = np.histogram(ksamp[i, :], bins = binedges) h[i, :] = htemp locmax[i] = binedges[np.argmax(htemp)] #Compute CI for the KLF maximu, and the CIs for each bin # by retaining the central 100*(1-alpha)% of the points. frac = int(np.floor(alpha*Niter/2)) #fraction of points to be excluded on either side. #Compute the CI for the KLF maximum locmaxsorted = np.sort(locmax.copy()) maxCI = locmaxsorted[frac:-frac] #Compute the CIs by retaining the central 95% of the points for each bin hsorted = np.sort(h.copy(), axis = 0) hCI = hsorted[frac:-frac, :] #This automatically removes frac points from the tail on either side. bincenters = (binedges[1:] + binedges[:-1])/2 #central locations of bins hCentral = hCI[int(np.floor(Niter/2)), :] #central values of samples for each bin eh = np.array([hCentral - hCI[0,:], hCI[-1,:] - hCentral]) #array with asymmetric error bars return maxCI, binedges, bincenters, hCentral, eh def hw4q1_plot(maxCI, binedges, bincenters, hCentral, eh, outfile = 'KLF_original'): plt = setPlotParams() plt.figure(figsize = (8, 8)) plt.plot(binedges[:-1], hCentral, label = 'Median', drawstyle = 'steps') binwidth = binedges[1] - binedges[0] plt.errorbar(bincenters - binwidth, hCentral, marker = 'o', linestyle = 'None', yerr = eh) maxmedian = np.median(maxCI) plt.plot([maxCI[0], maxCI[-1]], [np.max(hCentral), np.max(hCentral)], label = 'CI for maximum location') plt.plot(maxmedian, np.max(hCentral), 'ro', label = 'median of maximum location') plt.legend(loc = 'best') plt.savefig(outfile + '.png', bbox_inches = 'tight') def hw4q1(Niter = 1000, alpha = 0.05): #first, generate CBs with original uncertainties from file. k, dk = hw4q1_getdata() maxCI, binedges, bincenters, hCentral, eh = hw4q1_getCB(k, dk, Niter, alpha) hw4q1_plot(maxCI, binedges, bincenters, hCentral, eh) #now, generate CBs with uncertainties inflated by 20x. k, dk = hw4q1_getdata(inflate_errors = 20) maxCI, binedges, bincenters, hCentral, eh = hw4q1_getCB(k, dk, Niter, alpha) hw4q1_plot(maxCI, binedges, bincenters, hCentral, eh, outfile = 'KLF_inflated_20x') #now, generate CBs with uncertainties inflated by 20x. k, dk = hw4q1_getdata(inflate_errors = 100) maxCI, binedges, bincenters, hCentral, eh = hw4q1_getCB(k, dk, Niter, alpha) hw4q1_plot(maxCI, binedges, bincenters, hCentral, eh, outfile = 'KLF_inflated_100x') def hw4q2(): #read data M31distmod = 24.9 #Distance modulus to M31 (mag) k1 = np.loadtxt('../Homeworks/MW_GCs.txt', comments = '#') k2 = np.loadtxt('../Homeworks/M31_GCs.txt', comments = '#') k2 = k2 - M31distmod ##The general version of the callable can be in the form of a lambda: ## kstest(k1, lambda x: norm.cdf(x, loc = mu, scale = sigma)) ##NOTE THAT PARAMETERS ESTIMATED FROM THE DATA MUST NOT BE USED TO GENERATE THE MODEL -- THE KS TEST IS INVALID IN THIS CASE. #Find D D, p = ks_2samp(k1, k2) #Should this be ECDF2? print("2-sample KS statistic for MW and M31 data: {}, with probability: {}."\ .format(np.round(D, decimals = 3), np.round(p, decimals = 3))) if p <= 0.05: print("Null hypothesis rejected at 95% significance for MW and M31 samples.") else: print("Null hypothesis accepted at 95% significance for MW and M31 samples.") #Do t1 and t2 come from a standard normal? k1mean = k1.mean() k1std = k1.std() k2mean = k2.mean() k2std = k2.std() print("The mean and std for the MW sample are {} and {}.".format(np.round(k1mean, decimals = 3), np.round(k1std, decimals = 3))) print("The mean and std for the M31 sample are {} and {}.".format(np.round(k2mean, decimals = 3), np.round(k2std, decimals = 3))) t1 = (k1 - k1.mean())/k1.std(ddof = 1) t2 = (k2 - k2.mean())/k2.std(ddof = 1) D1, p1 = kstest(t1, 'norm') print("KS statistic for studentised version of MW magnitudes: {}, with probability: {}"\ .format(np.round(D1, decimals = 3), np.round(p1, decimals = 3))) if p1 <= 0.05: print("Null hypothesis rejected at 95% significance for MW sample.") else: print("Null hypothesis accepted at 95% significance for MW sample.") D2, p2 = kstest(t2, 'norm') print("KS statistic for studentised version of M31 magnitudes: {}, with probability: {}"\ .format(np.round(D2, decimals = 3), np.round(p2, decimals = 3))) if p2 <= 0.05: print("Null hypothesis rejected at 95% significance for M31 sample.") else: print("Null hypothesis accepted at 95% significance for M31 sample.") x = np.linspace(np.min(t1)*2, np.max(t1)*2, 1000) #grid to generate normal distribution fx = norm.pdf(x) #Plot the histograms for each set of t values plt = setPlotParams() plt.figure(figsize = (8, 8)) n, bins, patches = plt.hist(t1, label = 'MW', histtype = 'step') plt.plot(x, fx*np.max(n)/np.max(fx), label = 'Standard normal (scaled)') plt.legend(loc = 'best') plt.savefig('MW_GCs.png') plt.close() plt = setPlotParams() plt.figure(figsize = (8, 8)) n, bins, patches = plt.hist(t2, label = 'M31', histtype = 'step') plt.plot(x, fx*np.max(n)/np.max(fx), label = 'Standard normal (scaled)') plt.legend(loc = 'best') plt.savefig('M31_GCs.png')