# pyhton2 - python3 compatibility from __future__ import division from __future__ import absolute_import from __future__ import print_function from __future__ import unicode_literals # script bootstrap.py ''' Illustrate bootstrap method calculate confidence intervall .. author:: Guenter Quast fuer den Kurs Rechnernutzung in der Physik ''' # ------------------------------------------------------------- import numpy as np import matplotlib.pyplot as plt import scipy.stats as stat import matplotlib.pyplot as plt def getData(n=100): # provide a data sample from uniform distribution #return 2*np.random.rand(n)-1. return np.random.randn(n) # Gauss distribution def fGauss(x, mu=0., sigma=1.): return (np.exp(-(x-mu)**2/2./sigma**2)/np.sqrt(2.*np.pi)/sigma) # ------------------------------------------------------------- ##### ---- main Program starts here ----- if __name__ == "__main__": # n=30 d0 = getData(n) # caculate classical mean and error on mean mean=d0.mean() std=np.sqrt( d0.var(ddof=1)/n ) # # generate multiple samples using Bootstrap method, # i.e. by resampling form data itself, niter=10000 # draw samples m = [] for i in range(niter): mi=np.random.choice(d0, size=n).mean() m.append(mi) m = np.array(m) # print result print('*==* Test Bootstrap Method:') print( ' mean value -/+ std %.3f : %.3f'%(mean-std, mean+std) ) print() print( '*==* Confidence Interval from bootstrap method:') print( ' 68%% CI %.3f : %.3f'\ %(np.percentile(m, 16 ),np.percentile(m, 84 )) ) # plot data and sample distribution fig0 = plt.figure('Sample from Gaussian Distribution', figsize=(7.5, 5.)) ax0 = fig0.add_subplot(1,1,1) x = np.linspace(-3., 3., 200) ax0.plot(x, fGauss(x), 'r-', label='Normalverteilung') ax0.plot(d0, np.ones(len(d0))*0.005, 'b|', markersize=20, label='Stichprobenwerte') ax0.set_xlabel('x') ax0.set_ylabel('Gauss(x)') plt.legend(loc='best') # plot resampled distribution of mean fig = plt.figure('Bootstrap example', figsize=(7.5, 5.)) ax = fig.add_subplot(1,1,1) bc, be, _ = ax.hist(m, 100, alpha=0.7) x = np.linspace(-3./np.sqrt(n), 3./np.sqrt(n), 200) ax.plot(x, (be[1]-be[0])*niter*fGauss(x, mu=mean, sigma=1./np.sqrt(n)), 'r--') ax.set_xlabel('distribution of means') ax.set_ylabel('frequency') ax.axvline(np.percentile(m,16), color='r', linestyle='--') ax.axvline(np.percentile(m,84), color='r', linestyle='--') ax.text(0.45, 0.95, '68% CL', color='darkred', transform=ax.transAxes) plt.show()