Generate tidal geomedian filmstrips

What does this notebook do?

This notebooks demonstrates how to tidally tag remotely sensed imagery using the OSU Tidal Prediction Software or OTPS model, and create geomedian composites for a given set of epochs and tidal range. These geomedian composites are exported as geoTIFFs, and plotted as ‘filmstrip’ plots that compare change over time.


You need to run the following commands from the command line prior to launching jupyter notebooks from the same terminal so that the required libraries and paths are set:

module use /g/data/v10/public/modules/modulefiles

module load dea/20180515 (currently using an older version of ``dea`` due to a bug in ``xr.concat``; will be reverted to ``module load dea`` in future)

module load otps

If you find an error or bug in this notebook, please either create an ‘Issue’ in the Github repository, or fix it yourself and create a ‘Pull’ request to contribute the updated notebook back into the repository (See the repository README for instructions on creating a Pull request).

Date: August 2018

Author: Robbi Bishop-Taylor, Claire Phillips

Tags: tidal_model, OTPS, tidal_tagging, predict_tide, composites, dask, write_geotiff, filmstrip_plot, geopandas, point_in_polygon

Import modules

import os
import sys
import datacube
import itertools
import warnings
import numpy as np
import xarray as xr
import pandas as pd
import geopandas as gpd
from otps import TimePoint
from otps import predict_tide
from datetime import datetime, timedelta
from datacube.utils import geometry
from datacube.utils.geometry import CRS
from datacube_stats.statistics import GeoMedian
from dateutil.relativedelta import relativedelta
from datacube.helpers import ga_pq_fuser
from datacube.helpers import write_geotiff
from import masking
from shapely.geometry import Point
from collections import defaultdict
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter

# For nicer notebook plotting, hide warnings (comment out for real analysis)

# Create datacube instance
dc = datacube.Datacube(app='Tidal geomedian filmstrips')

def date_range(start_date, end_date, increment, period):

    """Generate dates seperated by given time increment/period"""

    result = []
    nxt = start_date
    delta = relativedelta(**{period:increment})
    while nxt <= end_date:
        nxt += delta
    return result

def ds_to_rgb(ds, bands=['red', 'green', 'blue'], reflect_stand=4000):

    """Converts an xarray dataset to a three band array"""

    rawimg = np.transpose(ds[bands].to_array().data, [1, 2, 0])
    img_toshow = (rawimg / reflect_stand).clip(0, 1)
    return img_toshow

Failed to resolve driver datacube.plugins.index::s3aio_index

Geomedian filmstrip parameters

Set the area, time period and sensors of interest, and tide limits and epoch length used to produce each geomedian composite. This is the only cell that needs to be edited to run the notebook.

# Set up centre of study area and buffer size in metres for data extraction
study_area = 'gulfcarpentaria'  # name used as prefix for output files
lat, lon = -17.58253, 139.82613  # centre of study area
buffer = 3500  # metre units to extend region of interest on each side of centre point

# Set up compositing parameters
time_period = ('2000-01-01', '2018-01-01')  # Total date range to import data from
sensors = ['ls5', 'ls7', 'ls8']  # Landsat sensors from which to import data from
ls7_slc_off = False  # Whether to include striped > May 31 2003 Landsat 7 SLC-off data
lower_tideheight = 0.4  # Minimum proportion of the observed tidal range to include
upper_tideheight = 0.6  # Maximum proportion of the observed tidal range to include
epoch_years = 6  # Length of each epoch used to compute geomedians (i.e. 5 years = 1988 to 1993)

Return observations for each sensor for the entire time series

Use dask to lazily load Landsat data and PQ for each sensor for the entire time series. No data is actually loaded here: this is saved until after the layers have been filtered by tidal stage and date to save processing time.

# If output data and figure directories doesn't exist, create them
if not os.path.isdir('output_data/{}/'.format(study_area)):

if not os.path.isdir('figures/{}/'.format(study_area)):

# Create list of epochs between start and end of time_period in datetime format
start = datetime.strptime(time_period[0], '%Y-%m-%d')
end = datetime.strptime(time_period[1], '%Y-%m-%d')
epochs = date_range(start, end, epoch_years, 'years')

# Print list of epochs
epochs_strings = [epoch.strftime('%Y-%m-%d') for epoch in epochs][:-1]
print('Processing {} epochs: {}'.format(len(epochs_strings), ', '.join(epochs_strings)))

# Identify tide point by importing ITEM polygons
polygons_gpd = gpd.read_file('/g/data/r78/intertidal/GA_native_tidal_model.shp')
polygon_gpd = polygons_gpd[polygons_gpd.intersects(Point(lon, lat))]
tidepost_lon, tidepost_lat = polygon_gpd[['lon', 'lat']].values[0]
print('Using tidepost coordinates: {} E, {} S\n'.format(tidepost_lon, tidepost_lat))

# Set up query
x, y = geometry.point(lon, lat, CRS('WGS84')).to_crs(CRS('EPSG:3577')).points[0]
query = {'x': (x - buffer, x + buffer),
         'y': (y - buffer, y + buffer),
         'time': time_period,
         'crs': 'EPSG:3577'}

# Output dicts to hold entire time-series for each sensor
sensor_dict = {}
pq_dict = {}

# For each sensor, dask load data and compute tide heights for each sensor
for sensor in sensors:

    # The following code will fail if there is no data for the sensor in time_period;
    # this try statement catches this and skips the affected sensor

        # Return observations matching query without actually loading them using dask
        sensor_all = dc.load(product = '{}_nbart_albers'.format(sensor),
                         group_by = 'solar_day',
                         dask_chunks={'time': 1},

        # Load PQ data matching query without actually loading them using dask
        pq_all = dc.load(product = '{}_pq_albers'.format(sensor),
                        group_by = 'solar_day',
                        dask_chunks={'time': 1},

        # Remove Landsat 7 SLC-off from PQ layer if ls7_slc_off = False
        if not ls7_slc_off and sensor == 'ls7':
            sensor_all = sensor_all.where(sensor_all.time < np.datetime64('2003-05-30'), drop=True)

        # Use the tidal mode to compute tide heights for each observation:
        print('Computing tidal heights for {} {} observations'.format(len(sensor_all.time), sensor))
        obs_datetimes ='M8[s]').astype('O').tolist()
        obs_timepoints = [TimePoint(tidepost_lon, tidepost_lat, dt) for dt in obs_datetimes]
        obs_predictedtides = predict_tide(obs_timepoints)
        obs_tideheights = [predictedtide.tide_m for predictedtide in obs_predictedtides]

        # Assign tide heights to the dataset as a new variable
        sensor_all['tide_heights'] = xr.DataArray(obs_tideheights, [('time', sensor_all.time)])

        # Append results for each sensor to a dictionary with sensor name as the key
        sensor_dict[sensor] = sensor_all
        pq_dict[sensor] = pq_all

    except AttributeError:

        print('{} selection failed due to likely lack of data in time_period; skipping'.format(sensor))
Processing 3 epochs: 2000-01-01, 2006-01-01, 2012-01-01
Using tidepost coordinates: 140.26 E, -17.52 S

Computing tidal heights for 154 ls5 observations
Computing tidal heights for 69 ls7 observations
Computing tidal heights for 107 ls8 observations

Subset data by tidal height and epoch time range

Use image time-stamps to compute tidal heights at the time of each observation, and take a subset of the entire Landsat and PQ time series based on tide height and epoch date range for each sensor.

# If any sensor has 0 observations, remove it from the dictionary before proceeding
sensor_dict = {key:value for key, value in sensor_dict.items() if len(value.time) > 0}

# Calculate max and min tide heights for the entire time series and all sensors
obs_min = np.min([sensor_ds.tide_heights.min() for sensor_ds in sensor_dict.values()])
obs_max = np.max([sensor_ds.tide_heights.max() for sensor_ds in sensor_dict.values()])
obs_range = obs_max - obs_min

# Calculate tidal limits used for subsequent data selection
sel_min = obs_min + (obs_range * lower_tideheight)
sel_max = obs_min + (obs_range * upper_tideheight)
print('Analysing tidal heights of {0:.2f} m to {1:.2f} m out of an observed local tidal '
      'range of {2:.2f} m to {3:.2f} m'.format(sel_min, sel_max, obs_min, obs_max))

# Create dictionaries to hold filtered sensor data for each epoch
sensor_epoch_dict = defaultdict(list)
pq_epoch_dict = defaultdict(list)

for epoch, sensor in itertools.product(epochs[:-1], sensor_dict.keys()):

    # Select dataset
    sensor_all = sensor_dict[sensor]
    pq_all = pq_dict[sensor]

    # Filter to keep only observations that have matching PQ data
    time = (sensor_all.time - pq_all.time).time
    sensor_subset = sensor_all.sel(time=time)
    pq_subset = pq_all.sel(time=time)

    # Filter by tidal stage
    sensor_subset = sensor_subset.where((sensor_subset.tide_heights >= sel_min) &
                                        (sensor_subset.tide_heights <= sel_max), drop = True)

    # Filter pq to same timesteps (this avoids conversion to float by `.where`)
    pq_subset = pq_subset.sel(time = sensor_subset.time)

    # Identify from and to date strings
    from_date = epoch.strftime('%Y-%m-%d')
    to_date = (epoch + relativedelta(years=epoch_years)).strftime('%Y-%m-%d')

    # Subset sensor to from and to date for epoch
    print('    Filtering from {} to {} for {}'.format(from_date, to_date, sensor))
    sensor_subset = sensor_subset.sel(time=slice(from_date, to_date))
    pq_subset = pq_subset.sel(time=slice(from_date, to_date))

    # Add subsetted data to dicts (one key matching a list of sensor data for each epoch)
Analysing tidal heights of -0.14 m to 0.42 m out of an observed local tidal range of -1.24 m to 1.52 m
    Filtering from 2000-01-01 to 2006-01-01 for ls5
    Filtering from 2000-01-01 to 2006-01-01 for ls7
    Filtering from 2000-01-01 to 2006-01-01 for ls8
    Filtering from 2006-01-01 to 2012-01-01 for ls5
    Filtering from 2006-01-01 to 2012-01-01 for ls7
    Filtering from 2006-01-01 to 2012-01-01 for ls8
    Filtering from 2012-01-01 to 2018-01-01 for ls5
    Filtering from 2012-01-01 to 2018-01-01 for ls7
    Filtering from 2012-01-01 to 2018-01-01 for ls8

Combine multiple sensors, load data and generate geomedians

For each epoch, combine all sensors into one dataset, load the data for the first time using dask’s .compute(), then composite all timesteps into a single array using a geometric median computation.

# Dict to hold output geomedian composits
geomedian_dict = {}
tideheights_dict = {}

for from_date in sensor_epoch_dict.keys():

    # For the first time, actually load data for all sensors and then combine into one dataset
    print('Loading and combining {} data for {}'.format(", ".join(sensor_dict.keys()), from_date))
    sensor_combined = xr.concat([i.compute() for i in sensor_epoch_dict[from_date]], dim='time')
    pq_combined = xr.concat([i.compute() for i in pq_epoch_dict[from_date]], dim='time')
    print('    {} observations combined'.format(len(sensor_combined.time)))

    # Manually add flag definition back in (it is lost during the concatenation)
    pq_combined.pixelquality.attrs['flags_definition'] = pq_all.pixelquality.flags_definition

    # Sort output datasets by date
    sensor_combined = sensor_combined.sortby('time')
    pq_combined = pq_combined.sortby('time')

    # Identify pixels with no clouds/shadows in either ACCA for Fmask
    good_quality = masking.make_mask(pq_combined.pixelquality,

    # Apply mask to set all PQ-affected pixels to NaN and set nodata to NaN
    print('    Applying PQ mask and setting nodata to NaN')
    sensor_combined = sensor_combined.where(good_quality)
    sensor_combined = masking.mask_invalid_data(sensor_combined)

    # Compute geomedian composite using all timesteps
    print('    Computing geometric median')
    geomedian_ds = GeoMedian().compute(sensor_combined)

    # Export to file
    filename = 'output_data/{0}/{0}_{1}.tif'.format(study_area, from_date)
    print('    Exporting to {}'.format(filename))
    write_geotiff(filename=filename, dataset=geomedian_ds)

    # Assign to dict
    geomedian_dict[from_date] = geomedian_ds

Loading and combining ls5, ls7, ls8 data for 2000-01-01
    28 observations combined
    Applying PQ mask and setting nodata to NaN
    Computing geometric median
    Exporting to output_data/gulfcarpentaria/gulfcarpentaria_2000-01-01.tif
Loading and combining ls5, ls7, ls8 data for 2006-01-01
    32 observations combined
    Applying PQ mask and setting nodata to NaN
    Computing geometric median
    Exporting to output_data/gulfcarpentaria/gulfcarpentaria_2006-01-01.tif
Loading and combining ls5, ls7, ls8 data for 2012-01-01
    27 observations combined
    Applying PQ mask and setting nodata to NaN
    Computing geometric median
    Exporting to output_data/gulfcarpentaria/gulfcarpentaria_2012-01-01.tif

Plot geomedians as filmstrips

Plots geomedians for each epoch as single true and false-colour filmstrip plots:

# Set up dict of band combinations to iterate through to produce filmstrip plots
band_dict = {'falsecolour': ['swir1', 'nir', 'green'],
             'truecolour': ['red', 'green', 'blue']}

# Load ITEM confidence layer that matches dataset and export to file
item_data = dc.load(product='item_v2_conf', **query)
              profile_override={'nodata': -6666})

# Create a filmstrip plot for each band combination
for name, bands in band_dict.items():

    # Compute number of epochs for figure construction
    epochs_n = len(geomedian_dict.keys())
    print('Generating {}-panel {} filmstrip plot'.format(1 + epochs_n, name))

    # Set up plot with multiple columns (ITEM confidence + n epochs)
    fig, axes = plt.subplots(ncols=1 + epochs_n, nrows=1,
                             figsize=(15, 15), sharex=True, sharey=True)

    # Remove space between subplots
    fig.subplots_adjust(wspace=0, hspace=0)

    # Plot ITEM confidence in first column
    axes[0].imshow(item_data.isel(time=0).stddev.values, cmap='RdYlBu_r',
                   vmin=0.05, vmax=0.35, interpolation='bilinear')
    axes[0].set_title('ITEM confidence', fontsize=6)

    # Remove axes ticks and space on left and bottom of plot

    # Progressively add epoch geomedians to remaining subplot columns
    for i, from_date in enumerate(sensor_epoch_dict.keys()):

        # Compute upper date bound for plot title
        to_date = (datetime.strptime(from_date, '%Y-%m-%d') + \

        # Geomedian for given epoch
        epoch_ds = geomedian_dict[from_date]

        # Convert ds to three band array, using a different reflect_stand
        # value for True and False colour to produce bright/vibrant plots
        epoch_array = ds_to_rgb(epoch_ds, bands=bands,
                                reflect_stand={'falsecolour': 4000,
                                               'truecolour': 2500}[name])

        # Plot data into column
        axes[i + 1].imshow(epoch_array, interpolation='bilinear')
        axes[i + 1].set_title('{} to {}'.format(from_date, to_date), fontsize=6)

        # Remove axes ticks and space on left and bottom of plot
        axes[i + 1].get_xaxis().set_visible(False)
        axes[i + 1].get_yaxis().set_visible(False)

    # Export combined figure to file
    filename = 'figures/{0}/{0}_{1}.png'.format(study_area, name)
    print('    Exporting to {}'.format(filename))
    fig.savefig(filename, dpi=300, bbox_inches='tight', pad_inches=0)

Generating 4-panel falsecolour filmstrip plot
    Exporting to figures/gulfcarpentaria/gulfcarpentaria_falsecolour.png
Generating 4-panel truecolour filmstrip plot
    Exporting to figures/gulfcarpentaria/gulfcarpentaria_truecolour.png

Create tidal modelled vs observed plot

Create a plot comparing selected Landsat observations to all Landsat observations and the entire tidal history of the study area:

# For each hour between start and end of time series, predict tide and add to list
all_times = date_range(start, end, 1, 'hours')
tp_model = [TimePoint(tidepost_lon, tidepost_lat, dt) for dt in all_times]
tides_model = [tide.tide_m for tide in predict_tide(tp_model)]

# Covert to dataframe of modelled dates and tidal heights
modelled_df = pd.DataFrame({'tide_heights': tides_model}, index=pd.DatetimeIndex(all_times))

# Return dataframe of previously-generated observed dates and tidal heights
observed_df = xr.concat([i.tide_heights for i in sensor_dict.values()], dim='time').to_dataframe()

# Set up plot
fig = plt.figure(figsize=(10, 4))
plt.ylabel('Tide height (m)')

plt.annotate('Tidepost: {} E, {} S ({})'.format(tidepost_lon, tidepost_lat, study_area),
             xy=(0, 1.2), xycoords='axes fraction',
             xytext=(5, -3), textcoords='offset points',
             fontsize=10, verticalalignment='top', fontweight='bold')

# Plot modelled values as grey background
plt.plot(modelled_df.index, modelled_df.tide_heights,
         color='gainsboro', linewidth=0.6, zorder=1, label='OTPS model')

# Plot all observations as black points
plt.scatter(observed_df.index, observed_df.tide_heights,
            s=4, color='darkgrey', marker='o', zorder=2, label='All observations')

# Plot selected observations in red by clipping observed_df to min and max tide selection
selected_df = observed_df[observed_df.tide_heights.between(sel_min-0.001, sel_max+0.001)]
plt.scatter(selected_df.index, selected_df.tide_heights,
            s=10, color='black', marker='o', zorder=2, label='Selected observations')

# Plot horizontal lines defining border of selected tidal range
plt.axhline(y=sel_min, color='red', alpha=0.2)
plt.axhline(y=sel_max, color='red', alpha=0.2)

# Add vertical lines and annotation defining each epoch
for epoch in epochs[:-1]:

    # Compute from and to date strings
    from_date = epoch.strftime('%Y-%m-%d')
    to_date = (epoch + relativedelta(years=epoch_years)).strftime('%Y-%m-%d')

    # Add vertical line and epoch titles
    plt.axvline(x=epoch, color='red', alpha=0.2)
    plt.annotate('{} to {}'.format(from_date, to_date),
                 xy=(epoch, modelled_df.tide_heights.max()),
                 xytext=(5, 10), textcoords='offset points', fontsize=7)

# Add legend
plt.legend(bbox_to_anchor=(0.975, 1.2), loc=1, borderaxespad=0, ncol=3,
           handletextpad=0.1, frameon=False, columnspacing=0.3, fontsize=7)

# Export plot
filename = 'figures/{0}/{0}_tideobs.png'.format(study_area)
print('    Exporting to {}'.format(filename))
fig.savefig(filename, dpi=300, bbox_inches='tight', pad_inches=0)
    Exporting to figures/gulfcarpentaria/gulfcarpentaria_tideobs.png
[ ]: