Basic Workflow Example
This example demonstrates how to set up and run a basic MarrmotFlow workflow with a single catchment and simple forcing data.
Overview
In this example, we will:
Load catchment boundary data
Prepare meteorological forcing data
Create a MarrmotFlow workflow
Configure model parameters
Analyze results
Prerequisites
Make sure you have the following data files:
Catchment boundary shapefile (
basin.shp)Climate data in NetCDF format (
climate_data.nc)
# Example data structure
data/
├── basin.shp
├── basin.shx
├── basin.dbf
├── basin.prj
└── climate_data.nc
Step 1: Import Libraries
import geopandas as gpd
import xarray as xr
import pandas as pd
import matplotlib.pyplot as plt
from marrmotflow import MARRMOTWorkflow
Step 2: Load and Inspect Data
Load Catchment Data
# Load catchment boundary
catchment = gpd.read_file("data/basin.shp")
# Inspect the data
print("Catchment Information:")
print(f"Number of catchments: {len(catchment)}")
print(f"CRS: {catchment.crs}")
print(f"Columns: {list(catchment.columns)}")
print(f"Bounds: {catchment.total_bounds}")
# Plot catchment
fig, ax = plt.subplots(figsize=(8, 6))
catchment.plot(ax=ax, facecolor='lightblue', edgecolor='black')
ax.set_title('Study Catchment')
plt.show()
Load Climate Data
# Load climate data
climate = xr.open_dataset("data/climate_data.nc")
# Inspect the data
print("Climate Data Information:")
print(climate)
print(f"Time range: {climate.time.min().item()} to {climate.time.max().item()}")
print(f"Variables: {list(climate.data_vars)}")
# Check if data covers catchment
lon_min, lat_min, lon_max, lat_max = catchment.total_bounds
print(f"Catchment bounds: [{lon_min:.2f}, {lat_min:.2f}] to [{lon_max:.2f}, {lat_max:.2f}]")
print(f"Climate lon range: [{climate.lon.min().item():.2f}, {climate.lon.max().item():.2f}]")
print(f"Climate lat range: [{climate.lat.min().item():.2f}, {climate.lat.max().item():.2f}]")
Visualize Climate Data
# Plot sample climate data
fig, axes = plt.subplots(2, 1, figsize=(12, 8))
# Plot precipitation time series for a sample point
sample_precip = climate.precipitation.isel(lon=0, lat=0)
sample_precip.plot(ax=axes[0])
axes[0].set_title('Sample Precipitation Time Series')
axes[0].set_ylabel('Precipitation (mm/day)')
# Plot temperature time series
sample_temp = climate.temperature.isel(lon=0, lat=0)
sample_temp.plot(ax=axes[1])
axes[1].set_title('Sample Temperature Time Series')
axes[1].set_ylabel('Temperature (°C)')
plt.tight_layout()
plt.show()
Step 3: Create MarrmotFlow Workflow
Basic Configuration
# Define forcing variable mapping
forcing_vars = {
"precip": "precipitation", # Map to variable name in NetCDF
"temp": "temperature"
}
# Define units (if different from defaults)
forcing_units = {
"precip": "mm/day",
"temp": "celsius"
}
# Create workflow
workflow = MARRMOTWorkflow(
name="BasicExample",
cat=catchment,
forcing_files="data/climate_data.nc",
forcing_vars=forcing_vars,
forcing_units=forcing_units,
pet_method="penman_monteith",
model_number=7, # HBV-96 model
forcing_time_zone="UTC",
model_time_zone="America/Vancouver"
)
print("Workflow created successfully!")
print(f"Workflow name: {workflow.name}")
Step 4: Run Workflow (Conceptual)
Note
The actual execution methods depend on the current implementation of MarrmotFlow. This shows the conceptual approach.
# Execute workflow (implementation dependent)
# results = workflow.run()
# For this example, we'll demonstrate expected workflow
print("Workflow configuration:")
print(f"- Model: HBV-96 (model {workflow.model_number})")
print(f"- PET method: {workflow.pet_method}")
print(f"- Catchments: {len(workflow.cat)}")
print(f"- Forcing variables: {workflow.forcing_vars}")
Step 5: Analyze Configuration
Model Parameters
# Access default model configuration
from marrmotflow._default_dicts import default_model_dict
hbv_config = default_model_dict[7]
print(f"Model name: {hbv_config['name']}")
print(f"Description: {hbv_config['description']}")
print("\\nModel parameters:")
for param_name, param_info in hbv_config["parameters"].items():
print(f" {param_name}: {param_info['default']} "
f"(range: {param_info['range']}) - {param_info['description']}")
Data Summary
# Analyze forcing data
precip_stats = climate.precipitation.mean(dim=['lon', 'lat'])
temp_stats = climate.temperature.mean(dim=['lon', 'lat'])
print("Climate Data Summary:")
print(f"Mean annual precipitation: {precip_stats.sum().item():.1f} mm")
print(f"Mean temperature: {temp_stats.mean().item():.1f} °C")
print(f"Min temperature: {temp_stats.min().item():.1f} °C")
print(f"Max temperature: {temp_stats.max().item():.1f} °C")
Step 6: Visualization and Validation
Plot Forcing Data
# Create comprehensive plots
fig, axes = plt.subplots(3, 2, figsize=(15, 12))
# Precipitation time series
precip_ts = climate.precipitation.mean(dim=['lon', 'lat'])
precip_ts.plot(ax=axes[0, 0])
axes[0, 0].set_title('Area-averaged Precipitation')
axes[0, 0].set_ylabel('Precipitation (mm/day)')
# Temperature time series
temp_ts = climate.temperature.mean(dim=['lon', 'lat'])
temp_ts.plot(ax=axes[0, 1])
axes[0, 1].set_title('Area-averaged Temperature')
axes[0, 1].set_ylabel('Temperature (°C)')
# Monthly precipitation climatology
monthly_precip = precip_ts.groupby('time.month').mean()
monthly_precip.plot(ax=axes[1, 0], marker='o')
axes[1, 0].set_title('Monthly Precipitation Climatology')
axes[1, 0].set_xlabel('Month')
axes[1, 0].set_ylabel('Precipitation (mm/day)')
# Monthly temperature climatology
monthly_temp = temp_ts.groupby('time.month').mean()
monthly_temp.plot(ax=axes[1, 1], marker='o', color='red')
axes[1, 1].set_title('Monthly Temperature Climatology')
axes[1, 1].set_xlabel('Month')
axes[1, 1].set_ylabel('Temperature (°C)')
# Precipitation spatial pattern (annual mean)
annual_precip = climate.precipitation.mean(dim='time')
im1 = annual_precip.plot(ax=axes[2, 0], cmap='Blues')
axes[2, 0].set_title('Annual Mean Precipitation')
# Temperature spatial pattern (annual mean)
annual_temp = climate.temperature.mean(dim='time')
im2 = annual_temp.plot(ax=axes[2, 1], cmap='RdYlBu_r')
axes[2, 1].set_title('Annual Mean Temperature')
plt.tight_layout()
plt.show()
Data Quality Checks
# Check for missing data
precip_missing = climate.precipitation.isnull().sum()
temp_missing = climate.temperature.isnull().sum()
print("Data Quality Assessment:")
print(f"Missing precipitation values: {precip_missing.item()}")
print(f"Missing temperature values: {temp_missing.item()}")
# Check for unrealistic values
negative_precip = (climate.precipitation < 0).sum()
extreme_temp = ((climate.temperature < -50) | (climate.temperature > 50)).sum()
print(f"Negative precipitation values: {negative_precip.item()}")
print(f"Extreme temperature values: {extreme_temp.item()}")
# Data coverage check
time_coverage = len(climate.time)
expected_days = (climate.time[-1] - climate.time[0]).dt.days.item() + 1
coverage_percent = (time_coverage / expected_days) * 100
print(f"Temporal coverage: {coverage_percent:.1f}% ({time_coverage}/{expected_days} days)")
Step 7: Expected Results Structure
# Example of expected results structure after workflow execution
expected_results = {
'model_7': {
'discharge': 'Daily discharge time series (mm/day)',
'states': {
'snow_storage': 'Snow water equivalent (mm)',
'soil_moisture': 'Soil moisture storage (mm)',
'groundwater': 'Groundwater storage (mm)'
},
'fluxes': {
'evapotranspiration': 'Actual ET (mm/day)',
'snowmelt': 'Snow melt rate (mm/day)',
'recharge': 'Groundwater recharge (mm/day)'
},
'parameters': {
'TT': 'Temperature threshold (°C)',
'C0': 'Degree-day factor (mm/°C/day)',
# ... other parameters
},
'performance': {
'water_balance_error': 'Daily water balance closure (%)',
'annual_totals': {
'precipitation': 'mm/year',
'evapotranspiration': 'mm/year',
'discharge': 'mm/year'
}
}
}
}
print("Expected results structure:")
for key, value in expected_results['model_7'].items():
if isinstance(value, dict):
print(f"{key}:")
for subkey, subvalue in value.items():
print(f" {subkey}: {subvalue}")
else:
print(f"{key}: {value}")
Complete Example Script
#!/usr/bin/env python3
"""
Complete basic MarrmotFlow workflow example
"""
import geopandas as gpd
import xarray as xr
import matplotlib.pyplot as plt
from marrmotflow import MARRMOTWorkflow
def main():
# Load data
print("Loading data...")
catchment = gpd.read_file("data/basin.shp")
climate = xr.open_dataset("data/climate_data.nc")
# Validate data
print("Validating data...")
assert len(catchment) > 0, "No catchments found"
assert 'precipitation' in climate.data_vars, "Precipitation not found"
assert 'temperature' in climate.data_vars, "Temperature not found"
# Create workflow
print("Creating workflow...")
workflow = MARRMOTWorkflow(
name="BasicExample",
cat=catchment,
forcing_files="data/climate_data.nc",
forcing_vars={
"precip": "precipitation",
"temp": "temperature"
},
forcing_units={
"precip": "mm/day",
"temp": "celsius"
},
pet_method="penman_monteith",
model_number=7
)
print(f"Workflow '{workflow.name}' created successfully!")
print(f"Ready to run HBV-96 model for {len(catchment)} catchment(s)")
# Additional analysis could be added here
if __name__ == "__main__":
main()
Next Steps
After completing this basic example, you can:
Explore multi-model comparisons: See Multi-Model Comparison
Add observed data: Compare model results with observations
Calibrate parameters: Optimize model parameters for your catchment
Extend spatial analysis: Work with multiple catchments
Climate change studies: Use future climate projections
Troubleshooting
Common issues and solutions:
File not found errors:
import os
print(f"Current directory: {os.getcwd()}")
print(f"Files in data/: {os.listdir('data') if os.path.exists('data') else 'data/ not found'}")
CRS mismatch:
# Ensure consistent coordinate systems
if catchment.crs != 'EPSG:4326':
catchment = catchment.to_crs('EPSG:4326')
Data type issues:
# Ensure time is properly formatted
climate['time'] = pd.to_datetime(climate.time)
Memory issues with large datasets:
# Use dask for large datasets
climate = xr.open_dataset("data/climate_data.nc", chunks={'time': 365})