Source code for nbprint.config.outputs.nbconvert

import os
from pathlib import Path
from typing import Literal

from ccflow import PyObjectPath
from nbformat import NotebookNode
from pydantic import Field, PrivateAttr, field_validator

from nbprint.config import Configuration, Outputs, OutputsProcessing

__all__ = ("HTMLOutputs", "NBConvertOutputs", "NBConvertShortCircuitOutputs", "NotebookOutputs", "PDFOutputs", "WebHTMLOutputs", "short_circuit_hook")


[docs] class NBConvertOutputs(Outputs): target: Literal["ipynb", "notebook", "html", "webhtml", "pdf", "webpdf"] | None = "html" # TODO: nbconvert types execute: bool | None = True timeout: int | None = 600 template: str | None = "nbprint" # TODO: maybe allow collecting by index # collect_cells: list[int] = Field(default=[], description="List of cell indices to collect outputs from.") collect_outputs: bool = Field( default=False, description=("Whether to collect cell outputs into the context. Cells with tag `nbprint:output:<key>` will be collected under `<key>`.") ) execute_hook: PyObjectPath | None = Field( default=None, description=( "A callable hook that is called after nbconvert execution of the notebook. " "It is passed the config instance. " "If it returns something non-None, that value is returned by `run` instead of the output path." "NOTE: Parent/child class hooks may also be called." ), ) nbconvert_hook: PyObjectPath | None = Field( default=None, description=( "A callable hook that is called after nbconvert of the previously executed notebook. " "It is passed the config instance. " "If it returns something non-None, that value is returned by `run` instead of the output path." "NOTE: Parent/child class hooks may also be called." ), ) _collected_cells: dict[int | str, list[dict[str, str]]] = PrivateAttr(default_factory=dict) @property def outputs(self) -> dict[int | str, list[dict[str, str]]]: # NOTE: parent class has `output` return self._collected_cells
[docs] @field_validator("target", mode="before") @classmethod def validate_target(cls, v) -> str: if v is None: return "html" if v == "pdf": return "webpdf" if v == "notebook": return "ipynb" return v
def _compute_outputs(self, config: "Configuration") -> None: super()._compute_outputs(config=config) # Update executed path if needed if self.execute: self._nb_executed_path = self.notebook.parent / f"{self.notebook.stem}.executed.ipynb" # Update output path if self.target == "webpdf": target = "pdf" elif self.target == "webhtml": target = "html" else: target = self.target if self.target == "ipynb" and self.execute: self._output_path = Path(str(self.output).replace(".ipynb", ".executed.ipynb")) else: self._output_path = Path(str(self.output).replace(".ipynb", f".{target}")) @staticmethod def _get_output_key(cell: NotebookNode) -> str | None: """Get the output key from cell metadata or tags.""" if "nbprint" in cell.metadata and "output" in cell.metadata.nbprint: return cell.metadata.nbprint.output for tag in cell.metadata.get("tags", []): if tag.startswith("nbprint:output:"): return tag.split("nbprint:output:")[1] return None def _extract_cell_outputs(self) -> None: """Extract outputs from selected cells into the context.""" # We're going to: # - read the notebook # - go through each cell and look for nbprint metadata # - either `nbprint:output:<key>` tag or # - `nbprint` metadata with `output` key # - collect outputs from those cells into self._collected_cells, such that: # - the mimetype is used to determine the type of output # - if we know how to deal, store natively # - else, store as-is from nbformat import reads notebook_content = self.executed_notebook.read_text() nb = reads(notebook_content, as_version=4) for cell in nb.cells: if "nbprint" not in cell.metadata and not any(tag.startswith("nbprint:output:") for tag in cell.metadata.get("tags", [])): continue output_key = self._get_output_key(cell) if output_key is None: continue outputs = [] for output in cell.get("outputs", []): output_data = {} if "data" in output: output_data = dict(output["data"].items()) elif "text" in output: output_data["text/plain"] = output["text"] outputs.append(output_data) if output_key not in self._collected_cells: self._collected_cells[output_key] = [] self._collected_cells[output_key].extend(outputs)
[docs] def run(self, config: "Configuration", gen: NotebookNode) -> Path: from nbconvert.nbconvertapp import main as execute_nbconvert # Run parent to create notebook notebook = super().run(config=config, gen=gen) # If notebook is None, we stop if notebook in (None, OutputsProcessing.STOP): return OutputsProcessing.STOP # TODO: fix in nbconvert output = str(self.output).replace(".webpdf", ".pdf").replace(".pdf", "") if self.target == "webpdf" else str(self.output) cmd = [ str(notebook), f"--to={self.target}", f"--output={output}", f"--template={self.template}", ] # We have some cheats here because we have to os.environ["_NBPRINT_IN_NBCONVERT"] = "1" os.environ["PSP_JUPYTER_HTML_EXPORT"] = "1" if self.execute: nbex_cmd = [ str(notebook), "--to=notebook", f"--output={self.executed_notebook!s}", "--execute", f"--ExecutePreprocessor.timeout={self.timeout}", ] # Update cmd to use executed notebook cmd[0] = str(self.executed_notebook) # Execute nbconvert execute_nbconvert(nbex_cmd) # Extract cells by tags self._extract_cell_outputs() if self.execute_hook and self.execute_hook.object(config) in (OutputsProcessing.STOP, None): return OutputsProcessing.STOP if not (self.execute and self.target == "ipynb"): # If target is notebook, we already did it above execute_nbconvert(cmd) if self.nbconvert_hook and self.nbconvert_hook.object(config) in (OutputsProcessing.STOP, None): return OutputsProcessing.STOP return self.output
[docs] class NotebookOutputs(NBConvertOutputs): target: Literal["ipynb"] = "ipynb"
[docs] class HTMLOutputs(NBConvertOutputs): target: Literal["html"] = "html"
[docs] class WebHTMLOutputs(NBConvertOutputs): target: Literal["webhtml"] = "webhtml"
[docs] class PDFOutputs(NBConvertOutputs): target: Literal["webpdf"] = "webpdf"
def short_circuit_hook(config: "Configuration") -> OutputsProcessing | bool: """A hook that short-circuits processing if a certain cell returns True.""" return ( OutputsProcessing.STOP if config.outputs.outputs and "stop" in config.outputs.outputs and any(outcome.get("text/plain", "").strip().lower() == "true" for outcome in config.outputs.outputs["stop"]) else True )
[docs] class NBConvertShortCircuitOutputs(NBConvertOutputs): """A specialized NBConvertOutputs that installs a default hook to stop processing if a certain cell with tag nbprint:output:stop returns True. """ execute_hook: PyObjectPath = Field( default=PyObjectPath("nbprint.config.outputs.nbconvert.short_circuit_hook"), description="A hook that short-circuits processing if a certain cell with tag nbprint:output:stop returns True.", )