Source code for meshflow.utility.forcing_prep

"""
Containing important functions for preparing "forcing" database for MESH
models
"""

# import third-party libraries
import cdo  # binary required >2.2.1
import xarray as xr  # requires xarray >2023.7.0
import pint_xarray  # requires typing_extensions >4.7.1
import pint  # pint >0.22

# import built-in libraries
from typing import (
    Sequence,
    Dict,
    Optional,
)

from zoneinfo import ZoneInfo
from datetime import datetime



[docs]
def prepare_mesh_forcing(
    path: str,
    variables: Sequence[str],
    units: Dict[str, str],
    hru_dim: Optional[str] = None,
    unit_registry: pint.UnitRegistry = None,
    to_units: Optional[Dict[str, str]] = None,
    aggregate: bool = False,
    local_attrs: Optional[Dict[str, str]] = None,
    global_attrs: Optional[Dict[str, str]] = None,
) -> xr.Dataset:
    """
    Prepare a MESH forcing file by merging, converting, and annotating data.

    Parameters
    ----------
    path : str
        Path to input forcing files.
    variables : Sequence[str]
        Sequence of variable names to include in the output file.
    units : dict of str
        Dictionary mapping variable names to their units.
    hru_dim : str, optional
        Name of the HRU dimension to use in the output dataset.
    unit_registry : pint.UnitRegistry, optional
        Pint unit registry for unit conversion. Default is None.
    to_units : dict of str, optional
        Dictionary mapping variable names to target units for conversion.
    aggregate : bool, default False
        If True, merge multiple input files into one using CDO. If False,
        assumes input files are already in the correct format and reads them
        directly.
        This is useful for MESH, which only reads one file.
        If `aggregate` is False, the input files are assumed to be already
        in the correct chunk format and are read directly.
        If `aggregate` is True, CDO is used to merge the files.
        Note that CDO is a binary dependency and must be installed separately.
    local_attrs : dict of dict, optional
        Dictionary of local attributes for each forcing variable.
        The keys are variable names and the values are dictionaries
        of attributes to assign to each variable.
        
        Example:
            {'air_temperature': {'long_name': 'Air Temperature',
                                 'units': 'K'}}
    global_attrs : dict of str, optional
        Dictionary of global attributes for the output dataset. The keys are
        attribute names and the values are their descriptions.

    Returns
    -------
    xarray.Dataset
        Merged and converted dataset containing forcing variables.

    Raises
    ------
    TypeError
        If `variables` is not a sequence of string values.
    ValueError
        If units for variables are not provided, or if any variable in
        `units` cannot be found in the dataset.

    Notes
    -----
    - Merges all input forcing files into a single NetCDF file, as MESH only
      reads one file. CDO is used for merging, but the function returns an
      xarray.Dataset.
    - The `variables` sequence should include:
      'air_pressure', 'specific_humidity', 'air_temperature', 'wind_speed',
      'precipitation', 'shortwave_radiation', 'longwave_radiation'.
    - The `units` and `to_units` dictionaries must contain the same keys as
      `variables`.
    - The merge functionality may be expanded in future versions.
    """
    # if `units` is not provided, throw an exception
    if not units:
        raise ValueError("`units` associated with `variables` elements must"
                         " be provided")

    # merge all the input forcing files as MESH only reads on file
    # CDO is used due to its stability, however, an xarray.DataSet
    # is returned

    if aggregate:
        cdo_obj = cdo.Cdo()  # CDO object
        ds = cdo_obj.mergetime(input=path, returnXArray=variables)  # Mergeing
    else:
        # if `aggregate` is False, we assume that the input files are already
        # in proper chunk format and we just read the file
        ds = xr.open_dataset(path)

    # check to see if all the keys included in the `units` dictionary are
    # found inside the `variables` sequence
    for k in units:
        if k not in ds:
            raise ValueError(f"item {k} defined in `units` cannot be found"
                             " in `variables`")

    # if all elements of `variables` not found in `units`,
    # assign them to None
    for v in variables:
        if v not in units:
            units[v] = None

    # now assign the units
    ds = ds.pint.quantify(units=units, unit_registry=unit_registry)

    # if `to_units` is defined
    if to_units:
        ds = ds.pint.to(units=to_units)

    # print the netCDF file
    ds = ds.pint.dequantify()

    # rename remapped output name
    var = [i for i in ds.dims.keys() if i != 'time']
    ds = ds.transpose().rename({var[0]: hru_dim})

    # convert calendar to 'standard' based on MESH's input standard
    ds = ds.convert_calendar(calendar='standard')

    # assigning local attributes for each variable
    if local_attrs:
        for var, val in local_attrs.items():
            for attr, desc in val.items():
                ds[var].attrs[attr] = desc

    # assigning global attributes for `ddb`
    if global_attrs:
        # empty global attribute dictionary first
        ds.attrs = {}
        # assign new global attributes
        for attr, desc in global_attrs.items():
            ds.attrs[attr] = desc

    # assure the dtype of `subbasin` is integer
    try: 
        ds['subbasin'] = ds['subbasin'].astype(int)
    except:
        raise TypeError("`subbasin` values should be able to be downcasted to integer.")

    return ds




[docs]
def freq_long_name(
    freq_alias: str
) -> str:
    """
    Return the full name of a time offset alias based on pandas conventions.

    Parameters
    ----------
    freq_alias : str
        Time offset alias representing time interval frequency, such as
        'H' for 'hours', 'T' for 'minutes', etc.

    Returns
    -------
    str
        Full name of the time offset (e.g., 'hours', 'minutes').

    Raises
    ------
    TypeError
        If `freq_alias` is not a string.
    ValueError
        If `freq_alias` is not a recognized offset alias.

    Examples
    --------
    >>> freq_long_name('H')
    'hours'
    >>> freq_long_name('T')
    'minutes'
    """

    if not isinstance(freq_alias, str):
        raise TypeError(
            f"frequency value of '{freq_alias}' is not acceptable"
        )

    if freq_alias in ('H', 'h'):
        return 'hours'
    elif freq_alias in ('T', 'min'):
        return 'minutes'
    elif freq_alias in ('S',):
        return 'seconds'
    elif freq_alias in ('L', 'ms'):
        return 'milliseconds'
    else:
        raise ValueError(
            f"frequency value '{freq_alias}' is not acceptable"
        )




[docs]
def calculate_time_difference(
    initial_time_zone: str,
    target_time_zone: str
) -> int | float:
    """
    Calculate the time difference in hours between two IANA time zones.

    Parameters
    ----------
    initial_time_zone : str
        IANA time zone name (e.g., 'America/Toronto').
    target_time_zone : str
        IANA time zone name (e.g., 'UTC', 'Europe/London').

    Returns
    -------
    float or int
        Time difference in hours between the two time zones. If the time
        zones are the same, returns 0.

    Raises
    ------
    AssertionError
        If either time zone argument is not a string.
    ValueError
        If the time zone format is incorrect or not valid.

    Notes
    -----
    Time zone naming follows IANA conventions:
    https://data.iana.org/time-zones/releases/tzdb-2025b.tar.lz

    Examples
    --------
    >>> calculate_time_difference('UTC', 'America/Toronto')
    -4.0
    >>> calculate_time_difference('America/Edmonton', 'America/Toronto')
    2.0
    """
    # Routine error checks
    assert isinstance(initial_time_zone, str), (
        "forcing time zone needs to be of dtype `str`."
    )
    assert isinstance(target_time_zone, str), (
        "target time zone needs to be of dtype `str`."
    )

    # Define the datetime you want to compare (current time in UTC)
    dt = datetime.now(tz=ZoneInfo("UTC"))

    # Specify the IANA time zones
    tz1 = ZoneInfo(initial_time_zone)
    tz2 = ZoneInfo(target_time_zone)

    # Convert to target time zones
    dt_tz1 = dt.astimezone(tz1)
    dt_tz2 = dt.astimezone(tz2)

    # Get the UTC offsets
    offset1 = dt_tz1.utcoffset()
    offset2 = dt_tz2.utcoffset()

    # Calculate the time difference
    diff = offset2 - offset1

    return diff.total_seconds() / 3600  # Convert seconds to hours