Source code for nbprint.config.outputs.nbconvert

import os
from pathlib import Path
from typing import Literal

from ccflow import PyObjectPath
from nbformat import NotebookNode
from pydantic import Field, PrivateAttr, field_validator

from nbprint.config import Configuration, Outputs, OutputsProcessing

__all__ = ("HTMLOutputs", "NBConvertOutputs", "NBConvertShortCircuitOutputs", "NotebookOutputs", "PDFOutputs", "WebHTMLOutputs", "short_circuit_hook")



[docs]
class NBConvertOutputs(Outputs):
    target: Literal["ipynb", "notebook", "html", "webhtml", "pdf", "webpdf"] | None = "html"  # TODO: nbconvert types
    execute: bool | None = True
    timeout: int | None = 600
    template: str | None = "nbprint"

    # TODO: maybe allow collecting by index
    # collect_cells: list[int] = Field(default=[], description="List of cell indices to collect outputs from.")
    collect_outputs: bool = Field(
        default=False, description=("Whether to collect cell outputs into the context. Cells with tag `nbprint:output:<key>` will be collected under `<key>`.")
    )
    execute_hook: PyObjectPath | None = Field(
        default=None,
        description=(
            "A callable hook that is called after nbconvert execution of the notebook. "
            "It is passed the config instance. "
            "If it returns something non-None, that value is returned by `run` instead of the output path."
            "NOTE: Parent/child class hooks may also be called."
        ),
    )
    nbconvert_hook: PyObjectPath | None = Field(
        default=None,
        description=(
            "A callable hook that is called after nbconvert of the previously executed notebook. "
            "It is passed the config instance. "
            "If it returns something non-None, that value is returned by `run` instead of the output path."
            "NOTE: Parent/child class hooks may also be called."
        ),
    )

    _collected_cells: dict[int | str, list[dict[str, str]]] = PrivateAttr(default_factory=dict)

    @property
    def outputs(self) -> dict[int | str, list[dict[str, str]]]:
        # NOTE: parent class has `output`
        return self._collected_cells


[docs]
    @field_validator("target", mode="before")
    @classmethod
    def validate_target(cls, v) -> str:
        if v is None:
            return "html"
        if v == "pdf":
            return "webpdf"
        if v == "notebook":
            return "ipynb"
        return v


    def _compute_outputs(self, config: "Configuration") -> None:
        super()._compute_outputs(config=config)
        # Update executed path if needed
        if self.execute:
            self._nb_executed_path = self.notebook.parent / f"{self.notebook.stem}.executed.ipynb"
        # Update output path
        if self.target == "webpdf":
            target = "pdf"
        elif self.target == "webhtml":
            target = "html"
        else:
            target = self.target
        if self.target == "ipynb" and self.execute:
            self._output_path = Path(str(self.output).replace(".ipynb", ".executed.ipynb"))
        else:
            self._output_path = Path(str(self.output).replace(".ipynb", f".{target}"))

    @staticmethod
    def _get_output_key(cell: NotebookNode) -> str | None:
        """Get the output key from cell metadata or tags."""
        if "nbprint" in cell.metadata and "output" in cell.metadata.nbprint:
            return cell.metadata.nbprint.output
        for tag in cell.metadata.get("tags", []):
            if tag.startswith("nbprint:output:"):
                return tag.split("nbprint:output:")[1]
        return None

    def _extract_cell_outputs(self) -> None:
        """Extract outputs from selected cells into the context."""
        # We're going to:
        # - read the notebook
        # - go through each cell and look for nbprint metadata
        #   - either `nbprint:output:<key>` tag or
        #   - `nbprint` metadata with `output` key
        # - collect outputs from those cells into self._collected_cells, such that:
        #     - the mimetype is used to determine the type of output
        #     - if we know how to deal, store natively
        #     - else, store as-is

        from nbformat import reads

        notebook_content = self.executed_notebook.read_text()
        nb = reads(notebook_content, as_version=4)

        for cell in nb.cells:
            if "nbprint" not in cell.metadata and not any(tag.startswith("nbprint:output:") for tag in cell.metadata.get("tags", [])):
                continue

            output_key = self._get_output_key(cell)
            if output_key is None:
                continue

            outputs = []
            for output in cell.get("outputs", []):
                output_data = {}
                if "data" in output:
                    output_data = dict(output["data"].items())
                elif "text" in output:
                    output_data["text/plain"] = output["text"]
                outputs.append(output_data)
            if output_key not in self._collected_cells:
                self._collected_cells[output_key] = []
            self._collected_cells[output_key].extend(outputs)


[docs]
    def run(self, config: "Configuration", gen: NotebookNode) -> Path:
        from nbconvert.nbconvertapp import main as execute_nbconvert

        # Run parent to create notebook
        notebook = super().run(config=config, gen=gen)

        # If notebook is None, we stop
        if notebook in (None, OutputsProcessing.STOP):
            return OutputsProcessing.STOP

        # TODO: fix in nbconvert
        output = str(self.output).replace(".webpdf", ".pdf").replace(".pdf", "") if self.target == "webpdf" else str(self.output)

        cmd = [
            str(notebook),
            f"--to={self.target}",
            f"--output={output}",
            f"--template={self.template}",
        ]

        # We have some cheats here because we have to
        os.environ["_NBPRINT_IN_NBCONVERT"] = "1"
        os.environ["PSP_JUPYTER_HTML_EXPORT"] = "1"

        if self.execute:
            nbex_cmd = [
                str(notebook),
                "--to=notebook",
                f"--output={self.executed_notebook!s}",
                "--execute",
                f"--ExecutePreprocessor.timeout={self.timeout}",
            ]

            # Update cmd to use executed notebook
            cmd[0] = str(self.executed_notebook)

            # Execute nbconvert
            execute_nbconvert(nbex_cmd)

            # Extract cells by tags
            self._extract_cell_outputs()

            if self.execute_hook and self.execute_hook.object(config) in (OutputsProcessing.STOP, None):
                return OutputsProcessing.STOP

        if not (self.execute and self.target == "ipynb"):
            # If target is notebook, we already did it above
            execute_nbconvert(cmd)

        if self.nbconvert_hook and self.nbconvert_hook.object(config) in (OutputsProcessing.STOP, None):
            return OutputsProcessing.STOP
        return self.output





[docs]
class NotebookOutputs(NBConvertOutputs):
    target: Literal["ipynb"] = "ipynb"




[docs]
class HTMLOutputs(NBConvertOutputs):
    target: Literal["html"] = "html"




[docs]
class WebHTMLOutputs(NBConvertOutputs):
    target: Literal["webhtml"] = "webhtml"




[docs]
class PDFOutputs(NBConvertOutputs):
    target: Literal["webpdf"] = "webpdf"



def short_circuit_hook(config: "Configuration") -> OutputsProcessing | bool:
    """A hook that short-circuits processing if a certain cell returns True."""
    return (
        OutputsProcessing.STOP
        if config.outputs.outputs
        and "stop" in config.outputs.outputs
        and any(outcome.get("text/plain", "").strip().lower() == "true" for outcome in config.outputs.outputs["stop"])
        else True
    )



[docs]
class NBConvertShortCircuitOutputs(NBConvertOutputs):
    """A specialized NBConvertOutputs that installs a default hook to stop processing if a certain cell
    with tag nbprint:output:stop returns True.
    """

    execute_hook: PyObjectPath = Field(
        default=PyObjectPath("nbprint.config.outputs.nbconvert.short_circuit_hook"),
        description="A hook that short-circuits processing if a certain cell with tag nbprint:output:stop returns True.",
    )