Diurnal CO2

Feb. 13, 2022

CO2 is a well-known gas but a fine resolution to confirm a daily trend is far from easy. In this one, we will see a clear trend of CO2 concentration driven by photosynthesis/respiration

CO2 conc. increased during the night, peaking concentration near the sunrise. The opposite trend occurred during the day time.


Library

In [1]:
import warnings
warnings.filterwarnings('ignore') # make the output cleaner
In [2]:
import re
import datetime
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
In [3]:
plt.style.use('default')
plt.rcParams["figure.figsize"] = (8,6)

Prepare data

  • this post is the second part of analyzing hourly CO2 data . The data should be downloaded in hourly folder
  • if you prefer starting from this one, please repreat the part 2.1 to get proper setup.

TOP

Diurnal trend

  • let return to the question: could we see any trend
  • draw night shade given sunrise and sunset time and dataframe with CO2 concentration over time
  • you can see directly from NOAA website the most recent data WKT
In [4]:
# convert to datetime object
def make_dt(row):
    try:
        year = row['year']
        month =  row['month']
        day =  row['day']
        hour = row['hour']
        minute = row['minute']
        second = row['second']
        dt = datetime.datetime(int(year), int(month), int(day),
                               int(hour), int(minute), int(second))
        return dt
    except Exception as e:   
        pass
        print(f'Exception converting datetime: {e}')
        print(row)
    return None
In [5]:
def clean_co2_text(fpath):
    with open(fpath) as f:
        lines = f.readlines()
    for i, line in enumerate(lines):
        if line.startswith('site_code'):
#             print(line)
            print(i)
            break
    cols = lines[i].split(r'\s+')[-1].strip().split(' ')
    
    first_line = re.split(r'\s+', lines[i+1].strip())
    if len(first_line) != len(cols):
        print(lines[i+1])
        print(first_line)
        return None
    
    data = lines[i+1:]
    data = [re.split(r'\s+', line.strip()) for line in data]
    df = pd.DataFrame(data=data, columns=cols)
    df.dropna(inplace=True)
    df['time'] = df.apply(make_dt, axis=1)
    df.value = pd.to_numeric(df.value)
    df.intake_height = pd.to_numeric(df.intake_height)
    df = df[['time', 'value', 'intake_height']]
    df.set_index('time', inplace=True)
    return df
In [6]:
# load file into a clean dataframe
df = clean_co2_text('./hourly/co2_tower-insitu_1_ccgg_ASCIItext/co2_wkt_tower-insitu_1_ccgg_HourlyData.txt')
148
In [7]:
df.head()
Out[7]:
value intake_height
time
2001-02-14 00:00:00 379.1 30.0
2001-02-14 00:00:00 377.0 61.0
2001-02-14 00:00:00 374.8 122.0
2001-02-14 00:00:00 372.4 457.0
2001-02-14 01:00:00 382.7 9.0
In [8]:
# clean data is slow, if you need to repeat it, copy and store it is a better approach
dfo = df.copy(deep=True)
In [9]:
# pivot table
df = df.pivot(columns='intake_height', values='value')
In [10]:
# it can be very noisy and slow to see the whole range of data
# this dft is selected a shorter timespan
dft = df[(df.index.year == 2002) & (df.index.month==8) & (df.index.day <=4)]
dft.plot()
Out[10]:
  • there are a clear trend of CO 2 each day. The concentration went up and reduced with different height at the same time. This has to do with photosynthesis or the accumulation of CO 2 production. Assuming photosynthesis/resperatory are the main drivers.
  • the effect of CO 2 is strongest with the lowest height. This supports the driver are at ground level such as trees
  • not easy to see the pattern yet since the graph not yet cleary display the hours and the daytime
In [11]:
# this function draws a shade for the day to easy see the pattern

def day_shade(df, ax, srise=[6, 45], sset=[20, 26]):
    '''draw a orange shade for the day'''
    
    sunrise = datetime.time(srise[0], srise[1])
    sunset = datetime.time(sset[0], sset[1])
    dates = list(set(df.index.date))
#     co2_max = df[df.columns[0]].max()
#     co2_min = df[df.columns[-1]].min()
    co2_max = df.max().max()
    co2_min = df.min().min()
    for date in dates:
        _sunset = datetime.datetime.combine(date, sunset)
        _sunrise = datetime.datetime.combine(date, sunrise)
#         print(_sunrise, _sunset)
        
        ax.fill_between(x = df.index,
                    y1= co2_min,
                    y2 = co2_max,
                    where=(df.index >= _sunrise) & (df.index <= _sunset), 
                    alpha=0.1, fc='orange')
    return None
In [12]:
dft = df[(df.index.year == 2002) & (df.index.month==8) & (df.index.day <=4)]
In [13]:
# convert timestamp from GMT to the local
dft['lst'] = dft.index + datetime.timedelta(hours=-6)
dft.set_index('lst', inplace=True)
dft.head()
Out[13]:
intake_height 9.0 30.0 61.0 122.0 244.0 457.0
lst
2002-07-31 18:00:00 368.1 367.3 367.2 367.1 367.1 367.1
2002-07-31 19:00:00 369.2 368.1 367.4 366.9 366.1 366.4
2002-07-31 20:00:00 372.0 370.3 369.2 367.6 365.1 365.8
2002-07-31 21:00:00 374.4 372.8 371.2 368.8 365.6 365.8
2002-07-31 22:00:00 377.1 375.4 374.3 371.8 368.6 366.3
In [14]:
dft = df[(df.index.year == 2002) & (df.index.month==8) & (df.index.day <=4)]
dft['lst'] = dft.index + datetime.timedelta(hours=-6)
dft.set_index('lst', inplace=True)
fig, ax = plt.subplots()
day_shade(dft, ax)
ax.plot(dft, lw=.5);
  • now we have more confident that during the daytime, the CO 2 reduces starting from sunrise. The opposite trend occurs during the nighttime and peaking the concentration close the the sunrise
In [15]:
def plot_with_slider(df, start_date=None, end_date=None):
    
#     assert isinstance(start_date, datetime.date), "check start_date"
    assert isinstance(start_date, int), "check start_date type"
    assert start_date < end_date, 'start date before end date'
    assert start_date >0 , 'start date is not proper'
    assert start_date < end_date < 31, 'end date is out of range'
    
    dft = df[(df.index.year == 2002) & (df.index.month==8) & 
             (df.index.day >= start_date) & (df.index.day <= end_date)]
    dft['lst'] = dft.index + datetime.timedelta(hours=-6)
    dft.set_index('lst', inplace=True)
    labels = list(dft.columns)
    
    fig, ax = plt.subplots()
    
    ax.plot(dft, lw=.8)
    ax.legend(labels)
    day_shade(dft, ax)
    ax.set_title('Hourly CO2 concentration at different intake height, Moody, TX')
    ax.set_ylabel('Concentration, ppm')
    
    
    fig.autofmt_xdate()
    fig.show()
In [16]:
# this one fixes the month and year, but we can change them in the function above
# or add more argument to the function
plot_with_slider(df, 1, 10)
In [17]:
def plot_with_slider(df, year=None, month=None, start_date=None, end_date=None):
    
#     assert isinstance(start_date, datetime.date), "check start_date"
    assert isinstance(start_date, int), "check start_date type"
    assert start_date < end_date, 'start date before end date'
    assert start_date >0 , 'start date is not proper'
    assert start_date < end_date < 31, 'end date is out of range'
    assert year in df.index.year, 'year is out of range'
    assert month in df.index.month, 'month is out of range'
    
    dft = df[(df.index.year == year) & (df.index.month==month) & 
             (df.index.day >= start_date) & (df.index.day <= end_date)]
    dft['lst'] = dft.index + datetime.timedelta(hours=-6)
    dft.set_index('lst', inplace=True)
    labels = list(dft.columns)
    
    fig, ax = plt.subplots()
    
    ax.plot(dft, lw=.8)
    ax.legend(labels)
    day_shade(dft, ax)
    ax.set_title('Hourly CO2 concentration at different intake height, Moody, TX')
    ax.set_ylabel('Concentration, ppm')
    
    
    fig.autofmt_xdate()
    fig.show()
In [18]:
plot_with_slider(df, 2003, 8, 1, 10)
In [19]:
# or define them in a dictionary format
kws  = {'year': 2003,
       'month': 8,
       'start_date': 10,
       'end_date': 15}
plot_with_slider(df, **kws)
In [20]:
# format the x ticks 
import matplotlib.dates as mdates
In [21]:
from matplotlib.dates import DayLocator, HourLocator, DateFormatter, drange
In [22]:
def plot_with_slider(df, year=None, month=None, start_date=None, end_date=None):
    
#     assert isinstance(start_date, datetime.date), "check start_date"
    assert isinstance(start_date, int), "check start_date type"
    assert start_date < end_date, 'start date before end date'
    assert start_date >0 , 'start date is not proper'
    assert start_date < end_date < 31, 'end date is out of range'
    assert year in df.index.year, 'year is out of range'
    assert month in df.index.month, 'month is out of range'
    
    dft = df[(df.index.year == year) & (df.index.month==month) & 
             (df.index.day >= start_date) & (df.index.day <= end_date)]
    from_ = dft.index.min().strftime('%Y-%B-%d')
    to_ = dft.index.max().strftime('%Y-%B-%d')
    
    dft['lst'] = dft.index + datetime.timedelta(hours=-6)
    dft.set_index('lst', inplace=True)
    labels = list(dft.columns)
    labels = [int(x) for x in labels]
    
    fig, ax = plt.subplots(figsize=(10,6))
    
    ax.plot(dft, lw=.8)
    ax.legend(labels, ncol=len(labels)+1, loc='upper center')
    day_shade(dft, ax)
    fig.suptitle('Hourly ${CO}_2$ concentration at different intake height, Moody, TX')
    ax.set_title(f'From: {from_} to {to_}')
    ax.set_ylabel('Concentration, ppm')
    ax.xaxis.set_major_locator(DayLocator())
    ax.xaxis.set_minor_locator(HourLocator(range(0, 25, 6)))
    ax.xaxis.set_major_formatter(DateFormatter('%d'))
    fig.savefig('co2_diurnal.png', dpi=200)
    fig.show()
    
In [23]:
kws  = {'year': 2002,
       'month': 8,
       'start_date': 1,
       'end_date': 10}
plot_with_slider(df, **kws)
In [ ]:
 
get Jupyter Notebook: