Source code for pydoit_nb.serialization

"""
Serialization tools

This module includes a default yaml converter. This may be exactly what you
need for your use case. If it isn't, the code will help you see how to set up
and customise a converter. This example, along with the cattrs docs, should
help you get going. We mainly use :func:`cattrs.register_structure_hook_func`
and :func:`cattrs.register_unstructure_hook_func` (see also
`here <https://catt.rs/en/stable/cattrs.html#cattrs.BaseConverter.register_structure_hook_func>`_
), which aren't as heavily documented, but are what we want. You may find
the other examples in
`the docs on customisation <https://catt.rs/en/stable/customizing.html#customizing-class-un-structuring>`_
more helpful.

One example of something we don't support well is preservation of data types
when structuring and unstructuring pint types. This is just a complicated
problem to do with type hinting. For most applications, we have found that the
default numpy data type is fine (everything becomes float64, which is generally
fine).

One other clear example of something we don't support here is structuring and
unstructuring pandas objects as yaml. The reason is that we can't see an easy
way to get the round-tripping to work in edge cases related to index naming and
data type preservation. There are ways to solve this problem, but they require
more care and time than we can put in right now. As two suggestions for
possible solutions: 1) use a back-end that isn't yaml 2) store information
about index types etc. in addition to the data frame (pandas itself doesn't do
this though, which makes us a bit nervous about how hard it could be to do in
the general case, although specific use cases should be far more tractable and
easy to test). If you'd like to discuss this more, please raise an issue.
"""

from __future__ import annotations

import warnings
from collections.abc import Sequence
from pathlib import Path
from typing import Any, TypeVar, Union, cast

import cattrs.preconf.pyyaml
import numpy as np
import numpy.typing as nptype

from .typing import ConfigBundleLike, Converter

try:
    from typing_extensions import TypeAlias
except ImportError:  # >= python 3.11
    # remove type ignore when mypy applied with python 3.11
    from typing import TypeAlias  # type: ignore

try:
    import pint

    HAS_PINT = True
except ImportError:  # pragma: no cover
    HAS_PINT = False

T = TypeVar("T")
U = TypeVar("U")
N = TypeVar("N", bound=nptype.NDArray[Union[np.floating[Any], np.integer[Any]]])


[docs]def write_config_in_config_bundle_to_disk( config_bundle: ConfigBundleLike[U], converter: Converter, ) -> Path: """ Write the configuration in a configuration bundle to disk The configuration is written in the path specified by ``config_bundle.config_hydrated_path`` Parameters ---------- config_bundle Configuration bundle to write to disk converter Object that can serialize the configuration bundle's hydrated config Returns ------- Path in which ``config_bundle.config_hydrated`` was written """ write_path = config_bundle.config_hydrated_path with open(write_path, "w") as fh: fh.write(converter.dumps(config_bundle.config_hydrated)) return write_path
[docs]def load_config_from_file(config_file: Path, target: type[T], converter: Converter) -> T: """ Load configuration from file Parameters ---------- config_file File from which to load configuration target Class to load converter Converter to use to convert from ``config_file``'s contents to an instance of ``target`` Returns ------- Loaded instance of ``target`` """ with open(config_file) as fh: config = converter.loads(fh.read(), target) return config
converter_yaml = cattrs.preconf.pyyaml.make_converter() UnstructuredArray: TypeAlias = Union[Sequence[Union[int, float]], Sequence["UnstructuredArray"]]
[docs]def unstructure_np_array(arr: N) -> UnstructuredArray: """ Unstructure :obj:`npt.ArrayLike` This simply converts it to a list so is probably not very fast. However, this is just an example so could easily be optimised for production use if needed. Parameters ---------- arr Array to unstructure Returns ------- Unstructured array """ return cast(UnstructuredArray, arr.tolist())
[docs]def structure_np_array(inp: UnstructuredArray, target_type: type[N]) -> N: """ Structure :obj:`npt.ArrayLke` The inverse of :func:`unstructure_np_array` Parameters ---------- inp Data to structure target_type Type the data should be returned as Returns ------- Structured array """ # Can't get mypy to behave, hence type ignore comments throughout here # TODO: push docs PR up into cattrs # See https://github.com/python-attrs/cattrs/issues/194#issuecomment-987341893 target_dtype = target_type.__args__[1].__args__[0] # type: ignore return np.array([target_dtype(row) for row in inp]) # type: ignore
def _is_np_array(inp: Any) -> bool: return inp is np.ndarray or (getattr(inp, "__origin__", None) is np.ndarray) converter_yaml.register_unstructure_hook_func(_is_np_array, unstructure_np_array) converter_yaml.register_structure_hook_func(_is_np_array, structure_np_array)
[docs]def unstructure_np_scalar(number: np.number[Any]) -> float | int: """ Unstructure :obj:`np.number` This simply converts to a primative type. Parameters ---------- number Number to unstructure Returns ------- Unstructured number """ if isinstance(number, np.floating): return float(number) return int(number)
[docs]def structure_np_scalar(inp: float | int, target_type: type[T]) -> T: """ Structure :obj:`np.number` The inverse of :func:`unstructure_np_array` Parameters ---------- inp Data to structure target_type Type the data should be returned as Returns ------- Structured number """ # Can't get mypy to behave here either return target_type(inp) # type: ignore
def _is_np_scalar(inp: Any) -> bool: return issubclass(inp, np.number) converter_yaml.register_unstructure_hook_func(_is_np_scalar, unstructure_np_scalar) converter_yaml.register_structure_hook_func(_is_np_scalar, structure_np_scalar) if HAS_PINT: UnstructuredPint: TypeAlias = Union[tuple[Union[int, float], str], tuple[UnstructuredArray, str]] def unstructure_pint(inp: pint.UnitRegistry.Quantity) -> UnstructuredPint: """ Unstructure a :mod:`pint` quantity. Parameters ---------- inp :obj:`pint.UnitRegistry.Quantity` to unstructure Returns ------- Unstructured :obj:`pint.UnitRegistry.Quantity` """ if _is_np_scalar(type(inp.magnitude)): return (unstructure_np_scalar(inp.magnitude), str(inp.units)) if isinstance(inp.magnitude, (float, int)): return (inp.magnitude, str(inp.units)) return (unstructure_np_array(inp.magnitude), str(inp.units)) def structure_pint( inp: UnstructuredPint, target_type: type[pint.UnitRegistry.Quantity] ) -> pint.UnitRegistry.Quantity: """ Structure :obj:`pint.UnitRegistry.Quantity` Parameters ---------- inp Unstructured data. If this is a string containing a slash, we try and convert it to a fraction but this isn't super safe so we also raise a warning. target_type Type to create Returns ------- Structured :obj:`pint.UnitRegistry.Quantity` """ # pint not playing nice with mypy ur = pint.get_application_registry() # type: ignore if isinstance(inp[0], str) and "/" in inp[0]: msg = ( f"Received {inp[0]=}. " "We are assuming that this is meant to be interpreted as a float64. " "It would be safer to put a decimal value into your config, " "or make a merge request to pydoit-nb to make this handling safer." ) warnings.warn(msg) toks = inp[0].split("/") mag = np.float64(toks[0]) / float(toks[1]) return ur.Quantity(mag, inp[1]) # type: ignore # Can't do dtype control until pint allows it again with e.g. # pint.Quantity[np.array[np.float64]] return ur.Quantity(np.array(inp[0]), inp[1]) # type: ignore def _is_pint(inp: Any) -> bool: # I don't love this way of checking, but I couldn't work out how to else to make it work return hasattr(inp, "units") & hasattr(inp, "magnitude") & ("pint" in str(inp)) converter_yaml.register_unstructure_hook_func(_is_pint, unstructure_pint) converter_yaml.register_structure_hook_func(_is_pint, structure_pint) else: # pragma: no cover # TODO: decide whether lack of pint should raise a warning or not pass