"""Copyright 2025 DTCG Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
=====
Calibrate OGGM models.
"""
from datetime import datetime
import numpy as np
import pandas as pd
import xarray as xr
from dateutil.tz import UTC
from oggm import GlacierDirectory, cfg, utils
from oggm.core import massbalance
from tqdm import tqdm
[docs]
class Calibrator:
"""Bindings for calibrating OGGM models.
Attributes
----------
"""
[docs]
def __init__(self, model_matrix: dict = None):
super().__init__()
if not model_matrix:
self.model_matrix = {}
else:
self.model_matrix = model_matrix
[docs]
def set_model_matrix(self, name: str, model, geo_period: str, **kwargs):
"""Set model parameters for calibration.
Parameters
----------
name : str
Name of configuration parameters. Ideally this should match
the settings filesuffix.
model : oggm.core.massbalance.MassBalanceModel
OGGM mass balance model class.
geo_period : str
Reference mass balance period in the format
YYYY-MM-DD_YYYY-MM-DD.
kwargs
Additional arguments passed to the mass balance model
class.
"""
matrix = {
name: {
"model": model,
"geo_period": geo_period,
}
}
if kwargs:
matrix[name].update(kwargs)
self.model_matrix.update(matrix)
def get_nearest(self, items, pivot):
return min(items, key=lambda x: abs(x - pivot))
[docs]
def get_calibrated_models(
self,
gdir: GlacierDirectory,
model_class,
ref_mb: pd.DataFrame,
geodetic_period: str = "",
years: list = None,
model_calib: dict = None,
model_flowlines: dict = None,
smb: dict = None,
daily: bool = False,
calibration_filesuffix: str = "",
calibration_parameters: dict = None,
**kwargs,
) -> tuple:
"""Get calibrated models.
Note this uses all three calibration parameters, with ``prcp_fac``
as the first parameter.
Parameters
----------
gdir : GlacierDirectory
Glacier directory.
model_class : oggm.MassBalanceModel
Any mass balance model that subclasses MonthlyTIModel.
ref_mb : pd.DataFrame
Reference mass balance.
geodetic_period : str, default empty string
The reference calibration period in the format: "Y-M-D_Y-M-D"
years : list, default None
Years for which to calculate the specific mass balance. Ensure
these are float years when using ``MonthlyTI``.
model_calib : dict
Store calibrated models derived from ``mb_model_class``
model_flowlines : dict
Store calibrated ``MultipleFlowlineMassBalanceModel``.
smb : dict
Store specific mass balance.
daily : bool, default False
Process daily specific mass balance.
calibration_filesuffix : str, default empty string
Calibration filesuffix.
calibration_parameters : dict, default None
Extra arguments passed to ``mb_calibration_from_scalar_mb``.
kwargs
Extra arguments passed to the mass balance model used for
calibration.
Returns
-------
tuple
Calibrated model instances for each calibration parameter,
calibrated flowline models for each parameter,
and specific mass balance for each calibrated flowline model.
"""
model_name = model_class.__name__
if not geodetic_period:
geodetic_period = cfg.PARAMS["geodetic_mb_period"]
if model_calib is None:
model_calib = {}
if model_flowlines is None:
model_flowlines = {}
if smb is None:
smb = {}
if not calibration_parameters:
calibration_parameters = {
"calibrate_param1": "melt_f",
"calibrate_param2": "prcp_fac",
"calibrate_param3": "temp_bias",
}
if not calibration_filesuffix:
calibration_filesuffix = f"{model_name}_{geodetic_period}"
model_key = calibration_filesuffix.removesuffix("Model")
# This follows mb_calibration_from_geodetic_mb
prcp_fac = massbalance.decide_winter_precip_factor(gdir)
mi, ma = cfg.PARAMS["prcp_fac_min"], cfg.PARAMS["prcp_fac_max"]
prcp_fac_min = utils.clip_scalar(prcp_fac * 0.8, mi, ma)
prcp_fac_max = utils.clip_scalar(prcp_fac * 1.2, mi, ma)
if "SfcType_Cryosat_2015" in calibration_filesuffix:
calibration_parameters.update(
{
"calibrate_param1": "prcp_fac",
"calibrate_param2": "temp_bias",
"calibrate_param3": "melt_f",
"melt_f": 4.728225163624522,
}
)
massbalance.mb_calibration_from_scalar_mb(
gdir,
ref_mb=ref_mb,
ref_mb_period=geodetic_period,
**calibration_parameters,
prcp_fac=prcp_fac,
prcp_fac_min=prcp_fac_min,
prcp_fac_max=prcp_fac_max,
mb_model_class=model_class,
overwrite_gdir=True,
use_2d_mb=False,
settings_filesuffix=calibration_filesuffix,
observations_filesuffix=calibration_filesuffix,
**kwargs,
)
if not kwargs:
model_calib[model_key] = model_class(
gdir,
settings_filesuffix=calibration_filesuffix,
)
model_flowlines[model_key] = massbalance.MultipleFlowlineMassBalance(
gdir,
mb_model_class=model_class,
use_inversion_flowlines=True,
settings_filesuffix=calibration_filesuffix,
)
else:
model_calib[model_key] = model_class(
gdir,
settings_filesuffix=calibration_filesuffix,
**kwargs,
)
model_flowlines[model_key] = massbalance.MultipleFlowlineMassBalance(
gdir,
mb_model_class=model_class,
use_inversion_flowlines=True,
settings_filesuffix=calibration_filesuffix,
**kwargs,
)
fls = gdir.read_pickle("inversion_flowlines")
if not years:
years = utils.float_years_timeseries(
y0=2000, y1=2019, include_last_year=True, monthly=False
)
if not daily:
smb[model_key] = model_flowlines[model_key].get_specific_mb(
fls=fls, year=years
)
else:
smb[model_key] = model_flowlines[model_key].get_specific_mb(
fls=fls, year=years, time_resolution="daily"
)
return model_calib, model_flowlines, smb
[docs]
def get_geodetic_mb(self, gdir: GlacierDirectory) -> pd.DataFrame:
"""Get geodetic mass balances for a glacier.
Returns
-------
pd.DataFrame
Geodetic mass balances for a specific glacier for different
reference periods.
"""
pd_geodetic = utils.get_geodetic_mb_dataframe()
return pd_geodetic.loc[gdir.rgi_id]
[docs]
def get_calibration_mb(self, ref_mb: pd.DataFrame, geo_period: str) -> float:
"""Get calibration mass balance for a specific reference period.
Parameters
----------
ref_mb : pd.DataFrame
Reference mass balances for a glacier.
geo_period : str
Reference calibration period. This should be a value in
ref_mb["period"].
Returns
-------
float
Mass balance used for calibration.
"""
geodetic_mb = ref_mb.loc[ref_mb.period == geo_period].dmdtda * 1000
return geodetic_mb
[docs]
def calibrate(
self, gdir: GlacierDirectory, model_matrix: dict, ref_mb: float
) -> tuple:
"""Calibrate an OGGM glacier model.
Parameters
----------
gdir : GlacierDirectory
Glacier directory.
model_matrix : dict
Model parameters for different run configurations.
ref_mb : float
Reference mass balance.
Returns
-------
tuple
Calibrated mass balance model instances, flowlines, and
specific mass balances for each run configuration in the
model matrix.
"""
# Store results
mb_model_calib = {}
mb_model_flowlines = {}
smb = {}
for matrix_name, model_params in tqdm(model_matrix.items()):
mb_model_class = model_params["model"]
geo_period = model_params["geo_period"]
calibration_filesuffix = f"{matrix_name}_{geo_period}"
cfg.PARAMS["geodetic_mb_period"] = geo_period
mb_geodetic = self.get_calibration_mb(ref_mb=ref_mb, geo_period=geo_period)
mb_model_calib, mb_model_flowlines, smb = self.get_calibrated_models(
gdir=gdir,
model_class=mb_model_class,
ref_mb=mb_geodetic,
geodetic_period=geo_period,
model_calib=mb_model_calib,
model_flowlines=mb_model_flowlines,
smb=smb,
daily=model_params.get("daily", False),
calibration_filesuffix=calibration_filesuffix,
)
return mb_model_calib, mb_model_flowlines, smb
[docs]
class CalibratorCryotempo(Calibrator):
[docs]
def __init__(self, model_matrix: dict = None):
super().__init__(model_matrix=model_matrix)
[docs]
def set_model_matrix(
self,
name: str = "SfcType_Cryosat",
model=massbalance.SfcTypeTIModel,
geo_period: str = "2011-01-01_2020-01-01",
daily: bool = True,
source: str = "CryoTEMPO-EOLIS",
**kwargs,
):
"""Set model parameters for calibration.
Parameters
----------
name : str
Name of configuration parameters. Ideally this should match
the settings filesuffix.
model : oggm.core.massbalance.MassBalanceModel
OGGM mass balance model class.
geo_period : str
Reference mass balance period in the format
YYYY-MM-DD_YYYY-MM-DD.
kwargs
Additional arguments passed to the mass balance model
class.
"""
return super().set_model_matrix(
name,
model,
geo_period,
daily=daily,
source=source,
**kwargs,
)
[docs]
def get_eolis_dates(self, ds: xr.Dataset) -> np.ndarray:
"""Get time index from CryoTEMPO-EOLIS dataset.
Parameters
----------
ds : xr.Dataset
CryoTEMPO-EOLIS dataset.
Returns
-------
np.ndarray
Time index adjusted to UTC.
"""
return np.array([datetime.fromtimestamp(t, tz=UTC) for t in ds.t.values])
[docs]
def get_eolis_mean_dh(self, ds: xr.Dataset) -> np.ndarray:
"""Get
Parameters
----------
ds : xr.Dataset
CryoTEMPO-EOLIS dataset.
Returns
-------
np.ndarray
Time series of spatial mean of elevation change.
"""
mean_time_series = [
np.nanmean(elevation_change_map.where(ds.glacier_mask == 1))
for elevation_change_map in ds.eolis_gridded_elevation_change
]
return np.array(mean_time_series)
[docs]
def get_temporal_bounds(self, dates: list, year_start: int, year_end: int) -> tuple:
"""Get start and end dates of geodetic and observational periods.
This matches the nearest available dates for irregular time
indices.
Parameters
----------
dates : list
Time index. This may be irregularly spaced.
year_start : int
Start year of a desired period.
year_end : int
End year of a desired period.
Returns
-------
tuple[datetime]
Start and end dates of geodetic reference period, and the
nearest available start and end dates for observations.
"""
year_start = datetime(year_start, 1, 1, tzinfo=UTC)
year_end = datetime(year_end, 1, 1, tzinfo=UTC)
# EOLIS data is in 30-day periods, so get closest available date
data_start = self.get_nearest(dates, year_start)
data_end = self.get_nearest(dates, year_end)
return year_start, year_end, data_start, data_end
[docs]
def get_dmdtda(
self, dataset, dates: list, year_start: datetime, year_end: datetime
) -> float:
"""
Get dmdtdA from a CryoTEMPO-EOLIS dataset.
Parameters
----------
dataset : xr.DataArray
CryoTEMPO-EOLIS data.
dates : list
Datetimes for each timestep in the data period.
year_start : datetime
Start of reference period.
year_end : datetime
End of reference period.
Returns
-------
float
dmdtdA for a glacier.
"""
calib_frame = pd.DataFrame(
{
"dh": dataset.eolis_elevation_change_timeseries,
"dh_sigma": dataset.eolis_elevation_change_sigma_timeseries,
},
index=dates,
)
dt = (year_end - year_start).total_seconds() / cfg.SEC_IN_YEAR
# dmdtda in kg m-2 yr-1, area not needed as we already have a mean dh
# (dh = dV / A)
bulk_density = 850 # not cfg.PARAMS["ice_density"]?
dh = calib_frame["dh"].loc[year_end] - calib_frame["dh"].loc[year_start]
# Convert to meters water-equivalent per year to have the same unit
# as Hugonnet
dmdtda = (dh * bulk_density / dt) / 1000
return dmdtda
[docs]
def set_geodetic_mb_from_dataset(
self,
gdir: GlacierDirectory,
dataset: xr.Dataset,
year_start: int = 2011,
year_end: int = 2020,
) -> pd.DataFrame:
"""Set the geodetic mass balance from enhanced gridded data.
Parameters
----------
gdir : GlacierDirectory
Glacier directory.
dataset : xr.Dataset
CryoTEMPO-EOLIS dataset.
year_start : int, default 2011
Start of reference period.
year_end : int, default 2020
End of reference period.
Returns
-------
pd.DataFrame
Geodetic mass balances for various glaciers.
"""
dates = self.get_eolis_dates(dataset)
year_start, year_end, data_start, data_end = self.get_temporal_bounds(
dates=dates, year_start=year_start, year_end=year_end
)
dmdtda = self.get_dmdtda(
dataset=dataset, dates=dates, year_start=data_start, year_end=data_end
)
geodetic_mb_period = (
f"{year_start.strftime('%Y-%m-%d')}_{year_end.strftime('%Y-%m-%d')}"
)
observations_period = (
f"{data_start.strftime('%Y-%m-%d')}_{data_end.strftime('%Y-%m-%d')}"
)
geodetic_mb = {
"rgiid": [gdir.rgi_id],
"period": geodetic_mb_period,
"observations_period": observations_period,
"area": gdir.rgi_area_m2,
"dmdtda": dmdtda,
"source": "CryoTEMPO-EOLIS",
"err_dmdtda": 0.0,
"reg": 6,
"is_cor": False,
}
return pd.DataFrame.from_records(geodetic_mb, index="rgiid")
[docs]
def get_geodetic_mb(
self, gdir: GlacierDirectory, dataset: xr.Dataset = None
) -> pd.DataFrame:
"""Get geodetic mass balances for a glacier.
Parameters
----------
gdir : GlacierDirectory
Glacier directory.
dataset : xr.Dataset
CryoTEMPO-EOLIS data.
Returns
-------
pd.DataFrame
Geodetic mass balances for a specific glacier RGI ID.
"""
pd_geodetic = utils.get_geodetic_mb_dataframe()
pd_geodetic["source"] = "Hugonnet"
if dataset:
period = [(2011, 2020), (2015, 2016)]
for years in period:
geodetic_mb = self.set_geodetic_mb_from_dataset(
gdir=gdir, dataset=dataset, year_start=years[0], year_end=years[1]
)
pd_geodetic = pd.concat([pd_geodetic, geodetic_mb])
return pd_geodetic.loc[gdir.rgi_id]
[docs]
def get_calibration_mb(
self, ref_mb: pd.DataFrame, geo_period: str, source: str
) -> float:
"""
Parameters
----------
ref_mb : pd.DataFrame
Reference geodetic mass balances.
geo_period : str
Geodetic reference period in the format <YYYY-MM-DD_YYYY-MM-DD>.
source : str
Reference mass balance source, e.g. "Hugonnet".
Returns
-------
float
Geodetic mass balance for calibration period.
"""
geodetic_mb = (
ref_mb.loc[
np.logical_and(ref_mb["source"] == source, ref_mb.period == geo_period)
].dmdtda.values[0]
* 1000
)
return geodetic_mb
[docs]
def calibrate(
self, gdir: GlacierDirectory, model_matrix: dict, ref_mb: pd.DataFrame, **kwargs
) -> tuple:
"""Calibrate and run OGGM models using a model matrix.
Parameters
----------
gdir : GlacierDirectory
Glacier directory.
model_matrix : dict
Model parameters for various calibration runs.
ref_mb : Geodetic mass balances.
Returns
-------
tuple
Calibrated mass balance models, flowlines, and specific
mass balances for each set of configuration parameters in
``model_matrix``.
"""
# Store results
mb_model_calib = {}
mb_model_flowlines = {}
smb = {}
for matrix_name, model_params in tqdm(model_matrix.items()):
daily = model_params["daily"]
mb_model_class = model_params["model"]
geo_period = model_params["geo_period"]
source = model_params["source"]
calibration_filesuffix = f"{matrix_name}_{geo_period}"
mb_geodetic = self.get_calibration_mb(
ref_mb=ref_mb, geo_period=geo_period, source=source
)
if issubclass(mb_model_class, massbalance.SfcTypeTIModel) and daily:
sfc_kwargs = {"climate_resolution": "daily"}
else:
sfc_kwargs = {}
mb_model_calib, mb_model_flowlines, smb = self.get_calibrated_models(
gdir=gdir,
model_class=mb_model_class,
ref_mb=mb_geodetic,
geodetic_period=geo_period,
model_calib=mb_model_calib,
model_flowlines=mb_model_flowlines,
smb=smb,
daily=daily,
calibration_filesuffix=calibration_filesuffix,
**kwargs,
**sfc_kwargs,
)
return mb_model_calib, mb_model_flowlines, smb
[docs]
def run_calibration(
self, gdir: GlacierDirectory, datacube, model=massbalance.SfcTypeTIModel
):
"""Run calibration.
Parameters
----------
gdir : GlacierDirectory
Glacier directory.
datacube : GeoZarrHandler
L1 or L2 datacube.
model : massbalance.MassBalanceModel, default SfcTypeTIModel.
OGGM mass balance model class, ideally DailyTIModel or similar.
Returns
-------
tuple[dict, dict, dict]
Calibrated mass balance models, flowlines, and specific
mass balances for each set of configuration parameters in
``model_matrix``.
"""
if not datacube:
ref_mb = self.get_geodetic_mb(gdir=gdir)
else:
ref_mb = self.get_geodetic_mb(gdir=gdir, dataset=datacube)
if isinstance(model, str):
try:
model = getattr(massbalance, model)
except:
model = massbalance.DailyTIModel
if not datacube:
source = "Hugonnet"
geo_period = cfg.PARAMS["geodetic_mb_period"]
sfc_model_kwargs = {}
else:
source = "Cryosat"
geo_period = ("2011-01-01_2020-01-01",)
sfc_model_kwargs = {
"resolution": "day",
"gradient_scheme": "annual",
"check_data_exists": False,
}
self.set_model_matrix(
name=f"{model.__name__}_{source}",
model=model,
geo_period=geo_period,
daily=True,
source=source,
**sfc_model_kwargs,
)
mb_model_calib, mb_model_flowlines, smb = self.calibrate(
model_matrix=self.model_matrix, gdir=gdir, ref_mb=ref_mb
)
return mb_model_calib, mb_model_flowlines, smb