Projecting Atmospheric Carbon Dioxide concentrations Physics 281-01 Fall 2018 Homework Assignment 2 Sasha Bakker 12 Oct. 2018

Part 1: Loading and Plotting the Data

In [2]:
# Part A
# Import packages
import numpy as np
import matplotlib.pyplot as plt

# Import text file
allData = np.loadtxt('co2_data.txt')

# Initialize variables
decimalTime = allData[:,2]
concentration = allData[:,4]

# Part B 
# Plot the Data
plt.plot(decimalTime[0:], concentration[0:], color='orange')
plt.xlabel('Decimal Time (years)', fontsize=14, color='red')
plt.ylabel('Mole Fraction CO2 (ppm)', fontsize=14, color='red')
plt.title('CO2 Concentration vs. Time', fontsize=20, color='purple')
plt.show()

# Part C
# Initialize variables
allMaximums = list()
maximum = 0.0
allMinimums = list()
minimum = 500.0
i = 0

# This while loop creates the array of relative min's and max's
while i < len(concentration):
    
    # initialize needed values
    currentNum = concentration[i]
    if i == 0:
        prevNum = 0
    else:
        prevNum = concentration[i-1]
    
    
    # while the slope is positive
    while (currentNum - prevNum) >= 0:
        
        i += 1
        if i >= len(concentration): break
        
        # initialize needed values
        currentNum = concentration[i]
        prevNum = concentration[i-1]
        maximum = prevNum
        
        # if the slope changes to negative
        # the previous data point is a maximum
        if (currentNum - prevNum) <= 0:
            allMaximums.append(maximum)
            maximum = 0
            i +=1
            break
            
    # while the slope is negative    
    while (currentNum - prevNum) <= 0:
    
        i += 1
        if i >= len(concentration): break
        
        # initialize needed values
        currentNum = concentration [i]
        prevNum = concentration[i-1]
        minimum = prevNum
        
        # if the slope changes to positive
        # the previous data point is a minimum
        if(currentNum - prevNum) >=0:
            allMinimums.append(minimum)
            minimum = 500
            i +=1
            break
        
        
# convert the lists to arrays       
allMaximums = np.array(allMaximums)
allMinimums = np.array(allMinimums)

# sum all of the differences between min's and max's
sum = 0
for j in range(0,len(allMinimums)):
    sum += np.absolute(allMaximums[j]-allMinimums[j])

# calculate the average amplitude
avgDifference = sum / len(allMinimums)
avgAmplitude = avgDifference/2

# print the average amplitude
print("The average amplitude of oscillation is %.3f ppm"%(avgAmplitude))
The average amplitude of oscillation is 2.884 ppm

Part 2: Linear and Quadratic fits: Part A:

In [5]:
# Import packages
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import scipy.optimize as spo
import scipy.stats as sts

# Import text file
allData = np.loadtxt('co2_data.txt')

# Initialize variables
decimalTime = allData[:,2]
decimalTime -= 1958.0
concentration = allData[:,4]
x = decimalTime
y = concentration


# Plot the Data
plt.plot(decimalTime[0:], concentration[0:], color='orange')
plt.xlabel('Decimal Time (years)', fontsize=14, color='red')
plt.ylabel('Mole Fraction CO2 (ppm)', fontsize=14, color='red')
plt.title('CO2 Concentration vs. Time', fontsize=20, color='purple')

#Part A
# Define linear function
def line(x, yInt, slope):
    return slope*x+yInt
    
# Perform linear fit
# z is an array for m,b in y = mx + b
# cc is the covariance of these values
z,cc = np.polyfit(x,y,1,cov=True)
(up0,up1) = np.sqrt(np.diag(cc)) #1-sigma uncertanties of parameters
slope = z[0]
intercept = z[1]
xmod = np.linspace(x[0],x[-1],len(x))
ymod = line(xmod,intercept,slope) 
plt.plot(xmod,ymod)

# Show the plot
plt.show()

# Create new figure
plt.figure()

# Find residuals
residuals = y - ymod

# Plot residuals
plt.scatter(x, residuals, marker='.')
plt.xlabel('Decimal Time (years)', fontsize=14, color='red')
plt.ylabel('Mole Fraction Residuals (ppm)', fontsize=14, color='red')
plt.title('Linear Fit Residuals vs. Time', fontsize=20, color='purple')
plt.show()

# Calculate chi-squared
yfit = line(x,intercept,slope)
yys = (yfit-y)**2
chisqr = sum(yys) / (len(y)-2)


# Print values
print("linear fit: y = mx + b")
print("reduced chi-squared =",(chisqr),"\nparameters and 1-sigma uncertanties:")
print("m = %f +/- %f\nb = %f +/- %f"%(slope,up0,intercept,up1))
linear fit: y = mx + b
reduced chi-squared = 16.964396838548144 
parameters and 1-sigma uncertanties:
m = 1.549587 +/- 0.008765
b = 306.435268 +/- 0.307414

Part 2: Linear and Quadratic fits: Part B:

I do not think the linear fit is sufficient. This is for 3 reasons:

1.) The chi-squared value is absurdly large. To have a good fit, chi-square should be as small as possible.

2.) The residual plot shows a pattern. To have a good fit, the residual plot should be random.

3.) Sets of data points are either above or below the linear fit, the fit does not travel through the osciallating data points.

Due to this, the linear fit is not appropriate for the provided data.

Part 2: Linear and Quadratic fits: Part C:

In [4]:
# Import packages
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import scipy.optimize as spo
import scipy.stats as sts

# Import text file
allData = np.loadtxt('co2_data.txt')

# Initialize variables
decimalTime = allData[:,2]
decimalTime -= 1958.0
concentration = allData[:,4]
x = decimalTime
y = concentration


# Plot the Data
plt.plot(decimalTime[0:], concentration[0:], color='orange')
plt.xlabel('Decimal Time (years)', fontsize=14, color='red')
plt.ylabel('Mole Fraction CO2 (ppm)', fontsize=14, color='red')
plt.title('CO2 Concentration vs. Time', fontsize=20, color='purple')


#Part C

# Define quadratic function
def quad(x,p0,p1,p2):
    return p0 + p1*x + p2*x**2
    

# perform quadratic fit
guesses = (1,1,1)
(p0,p1,p2),cc = spo.curve_fit(quad,x,y,p0=guesses,sigma=None)
(up0,up1,up2) = np.sqrt(np.diag(cc)) #1-sigma uncertanties of parameters
xmod = np.linspace(x[0],x[-1],len(y))
ymod = quad(xmod,p0,p1,p2) 
plt.plot(xmod,ymod)

# Calculate reduced chi-squared
yfit = quad(x,p0,p1,p2)
yys = (yfit-y)**2
chisqr = sum(yys)/(len(x) - 3)


# Show the plot
plt.show()

# Create new figure
plt.figure()

# Find residuals  
residuals = y - ymod

# Plot residuals
plt.scatter(x, residuals, marker='.')
plt.xlabel('Decimal Time (years)', fontsize=14, color='red')
plt.ylabel('Mole Fraction Residuals (ppm)', fontsize=14, color='red')
plt.title('Quadratic Fit Residuals vs. Time', fontsize=20, color='purple')
plt.show()

# Print values
print("quadratic fit: y = p0 + p1*x + p2*x**2")
print("reduced chi-squared =",(chisqr),"\nparameters and 1-sigma uncertanties:")
print("p0 = %f +/- %f\np1 = %f +/- %f\np2 = %f +/- %f"%(p0,up0,p1,up1,p2,up2))
quadratic fit: y = p0 + p1*x + p2*x**2
reduced chi-squared = 4.9068790196101535 
parameters and 1-sigma uncertanties:
p0 = 314.319847 +/- 0.249365
p1 = 0.776157 +/- 0.018926
p2 = 0.012714 +/- 0.000301

I think the quadratic fit is sufficient. This is for 3 reasons:

1.) The chi-squared value is mimimized as compared to the linear fit. Although it is not 1, this is because only such a good fit polynomial fit can be made with the oscillating data points.

2.) The residual plot does not show pattern. The fit is good because the residuals appear randomly scattered.

3.) The fit travels through the oscillating data points.

Due to this, the quadratic fit is appropriate for the given data.

Part 3: Projecting future CO2 levels:

In [13]:
# First, generate a plot with the data and your two best fits with the time axis 
#extended to 100 years
# Find the value of the CO2 concentration at t=100 years in your linear fit and
# your quadratic fit. Print these valyes with units.

# Import packages
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import scipy.optimize as spo
import scipy.stats as sts

# Import text file
allData = np.loadtxt('co2_data.txt')

# Initialize variables
decimalTime = allData[:,2]
decimalTime -= 1958.0
concentration = allData[:,4]
x = decimalTime
y = concentration


# Plot the Data
plt.plot(decimalTime[0:], concentration[0:], color='orange')
plt.xlabel('Decimal Time (years)', fontsize=14, color='red')
plt.ylabel('Mole Fraction CO2 (ppm)', fontsize=14, color='red')
plt.title('CO2 Concentration vs. Time', fontsize=20, color='purple')

#Part A
# Define linear function
def line(x, yInt, slope):
    return slope*x+yInt
    
# Perform linear fit
# z is an array for m,b in y = mx + b
# cc is the covariance of these values
z,cc = np.polyfit(x,y,1,cov=True)
(up0,up1) = np.sqrt(np.diag(cc)) #1-sigma uncertanties of parameters
slope = z[0]
intercept = z[1]
xmod = np.linspace(x[0],100,len(x))
ymod = line(xmod,intercept,slope) 
plt.plot(xmod,ymod, color='purple')

# Save value at 100 years
lineHundred = ymod[-1]

# Define quadratic function
def quad(x,p0,p1,p2):
    return p0 + p1*x + p2*x**2
    

# perform quadratic fit
guesses = (1,1,1)
(p0,p1,p2),cc = spo.curve_fit(quad,x,y,p0=guesses,sigma=None)
(up0,up1,up2) = np.sqrt(np.diag(cc)) #1-sigma uncertanties of parameters
xmod = np.linspace(x[0],100,len(y))
ymod = quad(xmod,p0,p1,p2) 
plt.plot(xmod,ymod)

# Save value at 100 years
quadHundred = ymod[-1]

# Show plot
plt.show()

# Print values
print("linear fit at t = 100: concentration =",lineHundred,"ppm")
print("quadratic fit at t = 100: concentration =",quadHundred,"ppm")
linear fit at t = 100: concentration = 461.39397049804336 ppm
quadratic fit at t = 100: concentration = 519.0747331744284 ppm