Source code for nbprint.config.core.config

from ast import literal_eval
from pathlib import Path
from pprint import pprint
from sys import version_info
from typing import Type, Union

from ccflow import CallableModel, ContextType, Flow, ResultType
from hydra import compose, initialize_config_dir
from hydra.utils import instantiate
from nbformat import NotebookNode, read as nb_read
from nbformat.v4 import new_notebook
from pkn import getSimpleLogger
from pydantic import Field, PrivateAttr, field_validator, model_validator
from typing_extensions import Self

from nbprint import __version__
from nbprint.config.base import BaseModel, Role, _append_or_extend
from nbprint.config.content import Content, ContentCode, ContentMarkdown
from nbprint.config.exceptions import NBPrintPathIsYamlError, NBPrintPathOrModelMalformedError
from nbprint.config.page import Page

from .content import ContentMarshall
from .context import Context
from .outputs import Outputs, OutputsProcessing
from .parameters import PapermillParameters, Parameters

__all__ = (
    "Configuration",
    "load",
)

_log = getSimpleLogger("nbprint.config.core.config")


[docs] class Configuration(CallableModel, BaseModel): name: str resources: dict[str, BaseModel] = Field(default_factory=dict) outputs: Outputs parameters: Parameters = Field(default_factory=Parameters) page: Page = Field(default_factory=Page) context: Context = Field(default_factory=Context) content: ContentMarshall = Field(default_factory=ContentMarshall) # basic metadata tags: list[str] = Field(default_factory=list) role: Role = Role.CONFIGURATION ignore: bool = True pagedjs: bool = True debug: bool = True # internals _multi: bool = PrivateAttr(default=False) _nb_var_name: str = PrivateAttr(default="nbprint_config") _nb_vars: set = PrivateAttr(default_factory=set) @field_validator("tags", mode="after") @classmethod def _ensure_tags(cls, v: list[str]) -> list[str]: if "nbprint:config" not in v: v.append("nbprint:config") return v @field_validator("resources", mode="before") @classmethod def _convert_resources_from_obj(cls, value) -> dict[str, BaseModel]: if value is None: value = {} if isinstance(value, dict): for k, v in value.items(): value[k] = BaseModel._to_type(v) return value @field_validator("outputs", mode="before") @classmethod def _convert_outputs_from_obj(cls, v) -> Outputs: return BaseModel._to_type(v, Outputs) @field_validator("parameters", mode="before") @classmethod def _convert_parameters_from_obj(cls, v) -> Parameters: return BaseModel._to_type(v, Parameters) @field_validator("page", mode="before") @classmethod def _convert_page_from_obj(cls, v) -> Page: return BaseModel._to_type(v, Page) @field_validator("context", mode="before") @classmethod def _convert_context_from_obj(cls, v) -> Context: return BaseModel._to_type(v, Context) @staticmethod def _convert_content_from_list(v) -> ContentMarshall: for i, element in enumerate(v): if isinstance(element, str): v[i] = Content(type_=element) elif isinstance(element, dict): v[i] = BaseModel._to_type(element) return ContentMarshall(middlematter=v) @staticmethod def _convert_content_from_dict(v) -> ContentMarshall: for key in ContentMarshall.model_fields: if key in v and isinstance(v[key], list): v[key] = Configuration._convert_content_from_list(v[key]).all return ContentMarshall(**v)
[docs] @field_validator("content", mode="before") @classmethod def convert_content_from_obj(cls, v) -> ContentMarshall: if v is None: return ContentMarshall() if isinstance(v, list): return cls._convert_content_from_list(v) if isinstance(v, dict): return cls._convert_content_from_dict(v) return v
@model_validator(mode="after") def _validate(self) -> Self: self.context.parameters = self.parameters self.outputs._compute_outputs(config=self) return self # NOTE: this shouldve been possible via a wrap validator, # but alas i could not get it to work def __setattr__(self, name: str, value) -> None: if name == "parameters" and self.parameters: value = BaseModel._to_type(value, Parameters) if isinstance(value, dict) and "type_" in value else PapermillParameters.model_validate(value) if type(value) is not type(self.parameters): # replace wholesale super().__setattr__(name, value) else: # Union new parameters with existing parameters for k, v in value.model_dump(mode="json", exclude_unset=True, exclude={"type_"}).items() if isinstance(value, Parameters) else value.items(): setattr(self.parameters, k, v) self._validate() return None return super().__setattr__(name, value) @staticmethod def _init_content(values) -> ContentMarshall: if "content" not in values: values["content"] = ContentMarshall() elif isinstance(values["content"], list): values["content"] = ContentMarshall(middlematter=values["content"]) elif isinstance(values["content"], dict): values["content"] = ContentMarshall(**values["content"]) else: e = f"Unexpected content format when loading from notebook: {type(values['content'])}" raise RuntimeError(e) @staticmethod def _cell_to_content(cell) -> Content: source = cell.source.strip() if not source: # skip empty cells return None # Cells may have nbprint metadata from the UI extension # "nbprint": { # "attrs": "", # "class": "", # "class_selector": "", # "css": "", # "data": "{}", # "element_selector": "", # "esm": "", # "id": "", # "ignore": true, # "role": "parameters", # "type_": "nbprint.config.core.parameters.Parameters" # }, if "metadata" in cell and "nbprint" in cell["metadata"]: nbprint_cell_meta = cell["metadata"]["nbprint"] # TODO: not all fields are serdes symmetric nbprint_cell_meta.pop("attrs", None) nbprint_cell_meta.pop("class", None) nbprint_cell_meta.pop("class_selector", None) nbprint_cell_meta.pop("element_selector", None) else: nbprint_cell_meta = {} # Attach cell tags in to content if "tags" in cell["metadata"]: nbprint_cell_meta["tags"] = cell["metadata"]["tags"] # If this is an nbprint defined type, use that if "type_" in nbprint_cell_meta: content_type = nbprint_cell_meta["type_"] content_model = BaseModel._to_type({"type_": content_type}, Content) return content_model.model_validate(nbprint_cell_meta) # Set source nbprint_cell_meta["content"] = cell.source # Default handling: treat as code or markdown content if cell.cell_type in {"code"}: content = ContentCode.model_validate(nbprint_cell_meta) elif cell.cell_type in {"markdown"}: content = ContentMarkdown.model_validate(nbprint_cell_meta) else: # Skip, log warning _log.warning(f"Unsupported cell type when loading from notebook: {cell.cell_type}") return content @staticmethod def _parse_parameters_cell(cell) -> dict: new_parameters = {} param_lines = cell.source.splitlines() for line in param_lines: if "=" in line: key, value = line.split("=", 1) key = key.strip() value = value.strip() # Attempt to eval the value to get correct type try: evaluated_value = literal_eval(value) except SyntaxError: evaluated_value = value new_parameters[key] = evaluated_value return new_parameters @staticmethod def _process_cells(values, nb_content: NotebookNode) -> None: new_parameters = {} cells_to_process = nb_content.cells # TODO: if first cell has tags, insert at front instead of appending if cells_to_process and "metadata" in cells_to_process[0] and "parameters" in cells_to_process[0]["metadata"].get("tags", []): # Parse first cell for parameters first_cell = cells_to_process[0] # skip first cell cells_to_process = cells_to_process[1:] # Pull out the parameters object and ensure everything is present if "parameters" not in values: values["parameters"] = PapermillParameters() # Pull out the parameters object and ensure everything is present new_parameters = Configuration._parse_parameters_cell(first_cell) else: cells_to_process = nb_content.cells for cell in cells_to_process: cell_instance = Configuration._cell_to_content(cell) if cell_instance is not None: values["content"].middlematter.append(cell_instance) for k, v in new_parameters.items(): if k in values["parameters"].model_fields and getattr(values["parameters"], k) is None: setattr(values["parameters"], k, v) elif isinstance(values["parameters"], PapermillParameters) and (k not in values["parameters"].vars or values["parameters"].vars[k] is None): values["parameters"].vars[k] = v @model_validator(mode="before") @classmethod def _append_notebook_content(cls, values) -> None: if values.get("notebook") is None: return values cls._init_content(values) file = Path(values.pop("notebook")) with file.open("r", encoding="utf-8") as path_file: nb_content = nb_read(path_file, as_version=4) cls._process_cells(values, nb_content) return values
[docs] def generate(self, **_) -> list[NotebookNode]: nb = new_notebook() nb.metadata.nbprint = {} nb.metadata.nbprint.version = __version__ nb.metadata.nbprint.tags = [] nb.metadata.nbprint.nbprint = {} nb.metadata.nbprint.language = f"python{version_info.major}.{version_info.minor}" base_meta = { "tags": [], "nbprint": {}, } nb.cells = [] # start with parameters for papermill compat # use `parent=None` because we parameters is first cell, and we wont instantiate the config # until the next cell _append_or_extend(nb.cells, self.parameters.generate(metadata=base_meta.copy(), config=self, parent=None)) # now do the configuration itself _append_or_extend(nb.cells, self._generate_self(metadata=base_meta.copy())) # now do the context object # pass in parent=self, attr=context so we do config.context _append_or_extend(nb.cells, self.context.generate(metadata=base_meta.copy(), config=self, parent=self, attr="context")) # resources: dict[str, SerializeAsAny[BaseModel]] = Field(default_factory=dict) # TODO: omitting resources, referenced directly in yaml # cell.metadata.nbprint.resources = {k: v.model_dump_json(by_alias=True) for k, v in self.resources.items()} # outputs: SerializeAsAny[Outputs] # TODO: skipping, consumed internally # now setup the page layout # pass in parent=self, attr=page so we do config.page _append_or_extend(nb.cells, self.page.generate(metadata=base_meta.copy(), config=self, parent=self, attr="page")) # now iterate through the content, recursively generating for i, content in enumerate(self.content.all): _append_or_extend( nb.cells, content.generate(metadata=base_meta.copy(), config=self, parent=self, attr="content", counter=i), ) # Finally, run the outputs cell # NOTE: outputs cell doesnt usually actually do anything, unless # it is set to run in-context, in which case it will only # execute inside the notebook and note outside _append_or_extend(nb.cells, self.outputs.generate(metadata=base_meta.copy(), config=self, parent=self, attr="outputs")) return nb
def _generate_self(self, metadata: dict) -> NotebookNode: cell = super()._base_generate(metadata=metadata, config=self) # omit the data cell.metadata.nbprint.data = "" # add extras cell.metadata.nbprint.debug = self.debug cell.metadata.nbprint.pagedjs = self.pagedjs # add resources # TODO: do this or no? # cell.metadata.nbprint.resources = {k: v.model_dump_json(by_alias=True) for k, v in self.resources.items()} cell.metadata.nbprint.outputs = self.outputs.model_dump_json(by_alias=True) return cell def _generate_resources_cells(self, metadata: dict | None = None) -> NotebookNode: cell = super()._base_generate(metadata=metadata, config=None) # omit the data cell.metadata.nbprint.data = "" # add resources # mod = ast.Module(body=[], type_ignores=[]) # for k, v in self.resources.items(): # ... return cell
[docs] @staticmethod def load(path_or_model: Union[str, Path, dict, "Configuration"], name: str) -> "Configuration": if isinstance(path_or_model, Configuration): return path_or_model if isinstance(path_or_model, str) and path_or_model.endswith(".yml"): raise NBPrintPathIsYamlError(path_or_model) if isinstance(path_or_model, str) and path_or_model.endswith(".yaml"): path_or_model = Path(path_or_model).resolve() if isinstance(path_or_model, Path): path_or_model = path_or_model.resolve() folder = str(path_or_model.parent) file = str(path_or_model.name) with initialize_config_dir(version_base=None, config_dir=folder, job_name=name): cfg = compose(config_name=file, overrides=[f"+name={name}"]) config = instantiate(cfg, _convert_="all") if not isinstance(config, Configuration): config = Configuration.model_validate(config) return config raise NBPrintPathOrModelMalformedError(path_or_model)
[docs] def run(self, dry_run: bool = False, *, _multi: bool = False) -> Path | None: gen = self.generate() ret = None if self.debug: pprint(gen) if not dry_run: ret = self.outputs.run(self, gen) if ret in (None, OutputsProcessing.STOP): # Either a handled problem or user requested stop # Return None to indicate a problem # TODO: revisit return None if not dry_run and not self._multi and self.outputs.postprocess: # Run postprocessing self.outputs.postprocess.object([self]) # NOTE: as of this point, we're "done" # reset in case we want to run again self._reset() return ret
def _reset(self) -> None: # reset ourselves in case we need to rerun self._nb_vars = set() self.context._context_generated = False # ccflow integration @property def context_type(self) -> Type[ContextType]: return self.parameters.__class__ @property def result_type(self) -> Type[ResultType]: return self.outputs.__class__ @Flow.call def __call__(self, context): # noqa: ANN204 # NOTE: make a copy to avoid mutation during flow runs interfering with caching # update parameters if changed if context != self.parameters: self.parameters = context self.run() return self.outputs
load = Configuration.load