import numpy as np
from scipy import optimize
from scipy import stats
import urllib # for importing data from URL
import matplotlib as mpl
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
# Following is an Ipython magic command that puts figures in notebook.
%matplotlib notebook
# M.L. modification of matplotlib defaults
# Changes can also be put in matplotlibrc file,
# or effected using mpl.rcParams[]
mpl.style.use('classic')
plt.rc('figure', figsize = (6, 4.5)) # Reduces overall size of figures
plt.rc('axes', labelsize=16, titlesize=14)
plt.rc('figure', autolayout = True) # Adjusts supblot params for new size
def f(x,m,b):
'''Simple linear function with slope m and intercept b'''
return x*m + b
def chi2(x, y, u, m, b):
'''Chisquare as a function of data (x, y, and yerr=u), and model
parameters slope and intercept (m and b)'''
return np.sum((y - f(x, m, b))**2/u**2)
# To load from a file, use
# g = urllib.request.urlopen('https://www.eg.bucknell.edu/~phys310/skills/data_analysis/data3.dat')
# data = np.loadtxt(g)
# x, y, u = data.T
#
# Or: data = np.loadtxt("file.dat")
# Format: [[x1,y1,u1], [x2,y2,u2], ... ] where u1 is uncertainty in y1
data = np.array([[1, 2.947032612427293, 0.5],
[2, 6.168779380682309, 0.5],
[3, 7.1618838821688, 0.5],
[4, 9.590549514954866, 0.5],
[5, 11.20657, 0.5],
[6, 12.3123, 0.5],
[7, 15.0221,0.5],
[8, 16.9281, 0.5],
[9, 18.8221, 0.5],
[10., 21.7221, 0.5]])
x, y, u = data.T
xc = np.linspace(0,6,201) # quasi-continuous set of x's for function plot
plt.figure()
plt.title("data",fontsize=14)
plt.xlabel('$x$')
plt.ylabel('$y$')
plt.axhline(0,color='magenta')
plt.xlim(0,10)
plt.errorbar(x,y,yerr=u,fmt='o');
popt, pcov = optimize.curve_fit(f, x, y, sigma=u, absolute_sigma=True)
slope = popt[0]
alpha_m = np.sqrt(pcov[0,0]) # Std error in slope
intercept = popt[1]
alpha_b = np.sqrt(pcov[1,1]) # Std error in intercept
print("slope =", slope,"+/-", alpha_m,"\n")
print("intercept =", intercept,"+/-", alpha_b,"\n")
print("covariance matrix =","\n",pcov,"\n")
pcov_data = pcov
print("chi2 =", chi2(x, y, u,*popt))
# print("reduced chi2 = chi2/(len(x)-len(popt)) =", chi2(x, y, u, *popt)/(len(x)-len(popt)))
a = chi2(x,y,u,*popt)
slope = 1.9623049264728238 +/- 0.055048188152374236 intercept = 1.3954744434352022 +/- 0.34156501921217586 covariance matrix = [[ 0.0030303 -0.01666667] [-0.01666667 0.11666666]] chi2 = 9.387629609657065
xc = np.linspace(0,10,201) # quasi-continuous set of x's function plot
plt.figure()
plt.title("data with best fit line",fontsize=14)
plt.xlabel('$x$')
plt.ylabel('$y$')
plt.axhline(0, color='magenta')
plt.xlim(0,10) # Pad x-range on plot
plt.errorbar(x, y, yerr=u, fmt='o');
plt.plot(xc ,f(xc, slope, intercept));
plt.figure()
plt.axhline(0,color='magenta')
plt.title('normalized residuals')
plt.xlabel('$x$')
plt.ylabel('normalized residuals')
plt.grid(True)
plt.errorbar(x,(f(x,slope,intercept)-y)/u,1,fmt='o')
plt.xlim(0,10);
If normalized residuals look ok:
n_sim = 10000 # Number of data sets to simulate
count = 0
for i in range(n_sim):
mSim = np.array([]) # Array for simulated slopes
bSim = np.array([]) # Array for simulated intercepts
chi_sim = np.array([]) # Array for chisq from simulated sets
for i in range(n_sim):
ySim = stats.norm.rvs(slope*x+intercept,u) # Generate simulated data set
popt, pcov = optimize.curve_fit(f, x, ySim, sigma=u, absolute_sigma=True) # Fit simulated data
bSim = np.append(bSim, popt[1]) # Record intercept
mSim = np.append(mSim, popt[0]) # Record slope
aa = chi2(x,ySim,u,*popt)
chi_sim = np.append(chi_sim, aa)
if aa > a:
count += 1
print(count/n_sim)
0.3085
1 - stats.chi2.cdf(3.700621, df=3)
0.2956591105179691
m = np.linspace(1.5, 2.5, 201) # NOTE: You need to change the range of m to encompass your best fit value for slope (see above)
b = np.linspace(0.5, 2.5, 201) # NOTE: You need to change the range of b to encompass your best fit value for the intercept
M, B = np.meshgrid(m, b, indexing='ij')
Z = np.zeros((len(m),len(b)))
for i in range(len(m)):
for j in range(len(b)):
Z[i,j] = chi2(x, y, u, m[i],b[j]) - chi2(x, y, u, *popt)
plt.figure()
CS = plt.contour(M, B, Z, levels=[1,2.7,4,9]) # contour plots of chi2 = 1, 2.7, 4, 9 above chi2_min
#
#SC = plt.scatter(mSim, bSim, s=0.01) # overlay scatterplot of simulted data
#
plt.xlabel('$m$')
plt.ylabel('$b$')
plt.grid()
plt.axhline(intercept)
#plt.axhline(intercept + alpha_b, linestyle='--')
#plt.axhline(intercept - alpha_b, linestyle='--')
plt.axvline(slope)
#plt.axvline(slope + alpha_m,linestyle='--')
#plt.axvline(slope - alpha_m,linestyle='--')
plt.clabel(CS, inline=1, fontsize=10);
Note that the contours are tilted and squashed; this indicates that there is correlation between the two fit parameters, mainly caused because the y-intercept is not within the data range. These elongated contours are telling you that a small change in $m$ can, to some extent, be compensated for by a corresponding change in $b$.
Fits with correlated data aren't great, because they tend to lead to an overestimation of the uncertainties in the fit parameters. In this case, note tha thte uncertainty in $b$ is $\sim 0.5$ due to this correlation effect.
We can do better if we perform our linear fit on a dataset whose x-values are centered on $x=0$.
x_mean = np.mean(x) # determine the center of the distribution of x values
x_shift = x - x_mean # shift the array so that the x data are centered on x = 0
plt.figure()
plt.title("shifted data",fontsize=14)
plt.xlabel('$x_{shift}$')
plt.ylabel('$y$')
plt.axhline(0,color='magenta')
plt.xlim(0-x_mean,10-x_mean)
plt.errorbar(x_shift,y,yerr=u,fmt='o');
popt, pcov = optimize.curve_fit(f, x_shift, y, sigma=u, absolute_sigma=True)
slope = popt[0]
alpha_m = np.sqrt(pcov[0,0]) # Std error in slope
intercept = popt[1]
alpha_b = np.sqrt(pcov[1,1]) # Std error in intercept
print("slope =", slope,"+/-", alpha_m,"\n")
print("intercept =", intercept,"+/-", alpha_b,"\n")
print("covariance matrix =","\n",pcov,"\n")
pcov_data = pcov
print("chi2 =", chi2(x_shift, y, u,*popt))
# print("reduced chi2 = chi2/(len(x)-len(popt)) =", chi2(x, y, u, *popt)/(len(x)-len(popt)))
a = chi2(x_shift,y,u,*popt)
slope = 1.9623049264728234 +/- 0.05504818876854284 intercept = 12.188151539047729 +/- 0.15811388255358758 covariance matrix = [[3.03030309e-03 3.58792665e-11] [3.58792665e-11 2.49999999e-02]] chi2 = 9.387629609657035
m = np.linspace(1.7, 2.3, 201)
b = np.linspace(11.7, 12.7, 201)
M, B = np.meshgrid(m, b, indexing='ij')
Z = np.zeros((len(m),len(b)))
for i in range(len(m)):
for j in range(len(b)):
Z[i,j] = chi2(x_shift, y, u, m[i],b[j]) - chi2(x_shift, y, u, *popt)
plt.figure()
CS = plt.contour(M, B, Z, levels=[1,2.7,4,9]) # contour plots of chi2 = 1, 2.7, 4, 9 above chi2_min
#
#SC = plt.scatter(mSim, bSim, s=0.01) # overlay scatterplot of simulted data
#
plt.xlabel('$m$')
plt.ylabel('$b$')
plt.grid()
plt.axhline(intercept)
#plt.axhline(intercept + alpha_b, linestyle='--')
#plt.axhline(intercept - alpha_b, linestyle='--')
plt.axvline(slope)
#plt.axvline(slope + alpha_m,linestyle='--')
#plt.axvline(slope - alpha_m,linestyle='--')
plt.clabel(CS, inline=1, fontsize=10);
%version_information is an IPython magic extension for showing version information for dependency modules in a notebook;
See https://github.com/jrjohansson/version_information
%version_information
is available on Bucknell computers on the linux network. You can easily install it on
any computer.
%load_ext version_information
%version_information numpy, scipy, matplotlib