Source code for pydoit_nb.notebook_step
"""
Notebook-based step
A notebook-based step is the combination of a notebook and the configuration
to run it.
"""
from __future__ import annotations
import copy
from collections.abc import Iterable
from pathlib import Path
from typing import TYPE_CHECKING, Any, Generic, Protocol, TypeVar
from attrs import frozen
from pydoit_nb.config_handling import get_step_config_ids
from .typing import ConfigBundleLike, Converter, DoitTaskSpec
if TYPE_CHECKING:
from .notebook import ConfiguredNotebook, UnconfiguredNotebook
C = TypeVar("C")
CB_contra = TypeVar("CB_contra", contravariant=True, bound=ConfigBundleLike[Any])
[docs]class ConfigureNotebooksCallable(Protocol[CB_contra]):
"""Callable that can be used for configuring notebooks"""
def __call__( # noqa: D102
self,
unconfigured_notebooks: Iterable[UnconfiguredNotebook],
config_bundle: CB_contra,
step_name: str,
step_config_id: str,
) -> list[ConfiguredNotebook]:
... # pragma: no cover
[docs]@frozen
class UnconfiguredNotebookBasedStep(Generic[C, CB_contra]):
"""
An unconfigured notebook-based step
A step is a step in the overall workflow. A notebook-based step can be made
up of one or more notebooks. These are then configured at run-time with the
run-time information so they can then be turned into doit task(s).
"""
step_name: str
"""Name of the step"""
unconfigured_notebooks: list[UnconfiguredNotebook]
"""Unconfigured notebooks that make up this step"""
configure_notebooks: ConfigureNotebooksCallable[CB_contra]
"""Function which can configure the notebooks based on run-time information"""
[docs] def gen_notebook_tasks(
self,
config_bundle: CB_contra,
root_dir_raw_notebooks: Path,
converter: Converter | None = None,
clean: bool = True,
) -> Iterable[DoitTaskSpec]:
"""
Generate notebook tasks for this step
Parameters
----------
config_bundle
Configuration bundle to use when generating the tasks
root_dir_raw_notebooks
Root directory in which the raw notebooks live
converter
Instance that can serialise the configuration used by each notebook
clean
If we run `doit clean`, should the targets of each task be
removed?
Yields
------
Task specifications for use with :mod:`doit`
"""
unconfigured_notebooks = self.unconfigured_notebooks
unconfigured_notebooks_base_tasks = {}
for nb in unconfigured_notebooks:
base_task = {
"basename": f"({nb.notebook_path}) {nb.summary}",
"name": None,
"doc": nb.doc,
}
# yield copy of base task to avoid being mangled by doit
yield copy.deepcopy(base_task)
unconfigured_notebooks_base_tasks[nb.notebook_path] = base_task
step_config_ids = get_step_config_ids(getattr(config_bundle.config_hydrated, self.step_name))
notebook_output_dir_step = config_bundle.root_dir_output_run / "notebooks-executed" / self.step_name
for step_config_id in step_config_ids:
configured_notebooks = self.configure_notebooks(
unconfigured_notebooks,
config_bundle=config_bundle,
step_name=self.step_name,
step_config_id=step_config_id,
)
if len(unconfigured_notebooks) != len(configured_notebooks):
msg = (
"The number of unconfigured and configured notebooks is not the same. "
"We haven't yet thought through this use case. "
"Please raise an issue at https://github.com/climate-resource/pydoit-nb to discuss."
)
raise NotImplementedError(msg)
notebook_output_dir_step_id = notebook_output_dir_step / step_config_id
for nb_configured in configured_notebooks:
notebook_task = nb_configured.to_doit_task(
root_dir_raw_notebooks=root_dir_raw_notebooks,
notebook_output_dir=notebook_output_dir_step_id,
base_task=unconfigured_notebooks_base_tasks[
nb_configured.unconfigured_notebook.notebook_path
],
converter=converter,
clean=clean,
)
yield notebook_task