### Diurnal CO2

Feb. 13, 2022

CO2 is a well-known gas but a fine resolution to confirm a daily trend is far from easy. In this one, we will see a clear trend of CO2 concentration driven by photosynthesis/respiration

CO2 conc. increased during the night, peaking concentration near the sunrise. The opposite trend occurred during the day time.

# Library ¶

In [1]:
import warnings
warnings.filterwarnings('ignore') # make the output cleaner

In [2]:
import re
import datetime
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os

In [3]:
plt.style.use('default')
plt.rcParams["figure.figsize"] = (8,6)


# Prepare data ¶

• this post is the second part of analyzing hourly CO2 data . The data should be downloaded in hourly folder
• if you prefer starting from this one, please repreat the part 2.1 to get proper setup.

# Diurnal trend ¶

• draw night shade given sunrise and sunset time and dataframe with CO2 concentration over time
• you can see directly from NOAA website the most recent data WKT
In [4]:
# convert to datetime object
def make_dt(row):
try:
year = row['year']
month =  row['month']
day =  row['day']
hour = row['hour']
minute = row['minute']
second = row['second']
dt = datetime.datetime(int(year), int(month), int(day),
int(hour), int(minute), int(second))
return dt
except Exception as e:
pass
print(f'Exception converting datetime: {e}')
print(row)
return None

In [5]:
def clean_co2_text(fpath):
with open(fpath) as f:
for i, line in enumerate(lines):
if line.startswith('site_code'):
#             print(line)
print(i)
break
cols = lines[i].split(r'\s+')[-1].strip().split(' ')

first_line = re.split(r'\s+', lines[i+1].strip())
if len(first_line) != len(cols):
print(lines[i+1])
print(first_line)
return None

data = lines[i+1:]
data = [re.split(r'\s+', line.strip()) for line in data]
df = pd.DataFrame(data=data, columns=cols)
df.dropna(inplace=True)
df['time'] = df.apply(make_dt, axis=1)
df.value = pd.to_numeric(df.value)
df.intake_height = pd.to_numeric(df.intake_height)
df = df[['time', 'value', 'intake_height']]
df.set_index('time', inplace=True)
return df

In [6]:
# load file into a clean dataframe
df = clean_co2_text('./hourly/co2_tower-insitu_1_ccgg_ASCIItext/co2_wkt_tower-insitu_1_ccgg_HourlyData.txt')

148

In [7]:
df.head()

Out[7]:
value intake_height
time
2001-02-14 00:00:00 379.1 30.0
2001-02-14 00:00:00 377.0 61.0
2001-02-14 00:00:00 374.8 122.0
2001-02-14 00:00:00 372.4 457.0
2001-02-14 01:00:00 382.7 9.0
In [8]:
# clean data is slow, if you need to repeat it, copy and store it is a better approach
dfo = df.copy(deep=True)

In [9]:
# pivot table
df = df.pivot(columns='intake_height', values='value')

In [10]:
# it can be very noisy and slow to see the whole range of data
# this dft is selected a shorter timespan
dft = df[(df.index.year == 2002) & (df.index.month==8) & (df.index.day <=4)]
dft.plot()

Out[10]:
• there are a clear trend of CO 2 each day. The concentration went up and reduced with different height at the same time. This has to do with photosynthesis or the accumulation of CO 2 production. Assuming photosynthesis/resperatory are the main drivers.
• the effect of CO 2 is strongest with the lowest height. This supports the driver are at ground level such as trees
• not easy to see the pattern yet since the graph not yet cleary display the hours and the daytime
In [11]:
# this function draws a shade for the day to easy see the pattern

def day_shade(df, ax, srise=[6, 45], sset=[20, 26]):
'''draw a orange shade for the day'''

sunrise = datetime.time(srise[0], srise[1])
sunset = datetime.time(sset[0], sset[1])
dates = list(set(df.index.date))
#     co2_max = df[df.columns[0]].max()
#     co2_min = df[df.columns[-1]].min()
co2_max = df.max().max()
co2_min = df.min().min()
for date in dates:
_sunset = datetime.datetime.combine(date, sunset)
_sunrise = datetime.datetime.combine(date, sunrise)
#         print(_sunrise, _sunset)

ax.fill_between(x = df.index,
y1= co2_min,
y2 = co2_max,
where=(df.index >= _sunrise) & (df.index <= _sunset),
alpha=0.1, fc='orange')
return None

In [12]:
dft = df[(df.index.year == 2002) & (df.index.month==8) & (df.index.day <=4)]

In [13]:
# convert timestamp from GMT to the local
dft['lst'] = dft.index + datetime.timedelta(hours=-6)
dft.set_index('lst', inplace=True)

Out[13]:
intake_height 9.0 30.0 61.0 122.0 244.0 457.0
lst
2002-07-31 18:00:00 368.1 367.3 367.2 367.1 367.1 367.1
2002-07-31 19:00:00 369.2 368.1 367.4 366.9 366.1 366.4
2002-07-31 20:00:00 372.0 370.3 369.2 367.6 365.1 365.8
2002-07-31 21:00:00 374.4 372.8 371.2 368.8 365.6 365.8
2002-07-31 22:00:00 377.1 375.4 374.3 371.8 368.6 366.3
In [14]:
dft = df[(df.index.year == 2002) & (df.index.month==8) & (df.index.day <=4)]
dft['lst'] = dft.index + datetime.timedelta(hours=-6)
dft.set_index('lst', inplace=True)
fig, ax = plt.subplots()
ax.plot(dft, lw=.5);

• now we have more confident that during the daytime, the CO 2 reduces starting from sunrise. The opposite trend occurs during the nighttime and peaking the concentration close the the sunrise
In [15]:
def plot_with_slider(df, start_date=None, end_date=None):

#     assert isinstance(start_date, datetime.date), "check start_date"
assert isinstance(start_date, int), "check start_date type"
assert start_date < end_date, 'start date before end date'
assert start_date >0 , 'start date is not proper'
assert start_date < end_date < 31, 'end date is out of range'

dft = df[(df.index.year == 2002) & (df.index.month==8) &
(df.index.day >= start_date) & (df.index.day <= end_date)]
dft['lst'] = dft.index + datetime.timedelta(hours=-6)
dft.set_index('lst', inplace=True)
labels = list(dft.columns)

fig, ax = plt.subplots()

ax.plot(dft, lw=.8)
ax.legend(labels)
ax.set_title('Hourly CO2 concentration at different intake height, Moody, TX')
ax.set_ylabel('Concentration, ppm')

fig.autofmt_xdate()
fig.show()

In [16]:
# this one fixes the month and year, but we can change them in the function above
# or add more argument to the function
plot_with_slider(df, 1, 10)

In [17]:
def plot_with_slider(df, year=None, month=None, start_date=None, end_date=None):

#     assert isinstance(start_date, datetime.date), "check start_date"
assert isinstance(start_date, int), "check start_date type"
assert start_date < end_date, 'start date before end date'
assert start_date >0 , 'start date is not proper'
assert start_date < end_date < 31, 'end date is out of range'
assert year in df.index.year, 'year is out of range'
assert month in df.index.month, 'month is out of range'

dft = df[(df.index.year == year) & (df.index.month==month) &
(df.index.day >= start_date) & (df.index.day <= end_date)]
dft['lst'] = dft.index + datetime.timedelta(hours=-6)
dft.set_index('lst', inplace=True)
labels = list(dft.columns)

fig, ax = plt.subplots()

ax.plot(dft, lw=.8)
ax.legend(labels)
ax.set_title('Hourly CO2 concentration at different intake height, Moody, TX')
ax.set_ylabel('Concentration, ppm')

fig.autofmt_xdate()
fig.show()

In [18]:
plot_with_slider(df, 2003, 8, 1, 10)

In [19]:
# or define them in a dictionary format
kws  = {'year': 2003,
'month': 8,
'start_date': 10,
'end_date': 15}
plot_with_slider(df, **kws)

In [20]:
# format the x ticks
import matplotlib.dates as mdates

In [21]:
from matplotlib.dates import DayLocator, HourLocator, DateFormatter, drange

In [22]:
def plot_with_slider(df, year=None, month=None, start_date=None, end_date=None):

#     assert isinstance(start_date, datetime.date), "check start_date"
assert isinstance(start_date, int), "check start_date type"
assert start_date < end_date, 'start date before end date'
assert start_date >0 , 'start date is not proper'
assert start_date < end_date < 31, 'end date is out of range'
assert year in df.index.year, 'year is out of range'
assert month in df.index.month, 'month is out of range'

dft = df[(df.index.year == year) & (df.index.month==month) &
(df.index.day >= start_date) & (df.index.day <= end_date)]
from_ = dft.index.min().strftime('%Y-%B-%d')
to_ = dft.index.max().strftime('%Y-%B-%d')

dft['lst'] = dft.index + datetime.timedelta(hours=-6)
dft.set_index('lst', inplace=True)
labels = list(dft.columns)
labels = [int(x) for x in labels]

fig, ax = plt.subplots(figsize=(10,6))

ax.plot(dft, lw=.8)
ax.legend(labels, ncol=len(labels)+1, loc='upper center')
fig.suptitle('Hourly ${CO}_2$ concentration at different intake height, Moody, TX')
ax.set_title(f'From: {from_} to {to_}')
ax.set_ylabel('Concentration, ppm')
ax.xaxis.set_major_locator(DayLocator())
ax.xaxis.set_minor_locator(HourLocator(range(0, 25, 6)))
ax.xaxis.set_major_formatter(DateFormatter('%d'))
fig.savefig('co2_diurnal.png', dpi=200)
fig.show()


In [23]:
kws  = {'year': 2002,
'month': 8,
'start_date': 1,
'end_date': 10}
plot_with_slider(df, **kws)

In [ ]:


get Jupyter Notebook: