Source code for pimlico.cli.jupyter

# This file is part of Pimlico
# Copyright (C) 2020 Mark Granroth-Wilding
# Licensed under the GNU LGPL v3.0 - https://www.gnu.org/licenses/lgpl-3.0.en.html

"""
A command to start a Jupyter notebook for a given pipeline, providing access
to its modules and their outputs.

"""
import json
import os
import sys

from pimlico.core.dependencies.licenses import BSD

from pimlico import PIMLICO_ROOT
from pimlico.cli.subcommands import PimlicoCLISubcommand
from pimlico.core.dependencies.python import PythonPackageOnPip


[docs]class JupyterCmd(PimlicoCLISubcommand): """ Creates and runs a Jupyter notebook for the loaded pipeline. The pipeline is made easily available within the notebook, providing a way to load the modules and get their outputs. This is a useful way to explore the data or analyses coming out of your modules. Once a module has been run, you can load it from a notebook and manipulate, explore, visualize, etc to results. A new directory is automatically created in your project root to contain the pipeline's notebooks. (You can override the location of this using ``--notebook-dir``). An example notebook is created there, to show you how to load the pipeline. From within a notebook, load a pipeline like so: .. code-block:: py from pimlico import get_jupyter_pipeline pipeline = get_jupyter_pipeline() Now you can access the modules of the pipeline through this pipeline object: .. code-block:: py mod = pipeline["my_module"] And get data from its outputs (provided the module's been run): .. code-block:: py print(mod.status) output = mod.get_output("output_name") """ command_name = "jupyter" command_help = "Create and start a new Jupyter notebook for the pipeline"
[docs] def add_arguments(self, parser): parser.add_argument("--notebook-dir", action="store", help="Use a custom directory as the notebook directory. By default, a directory will be " "created according to: <pimlico_root>/../notebooks/<pipeline_name>/")
[docs] def run_command(self, pipeline, opts): if not jupyter_dependency.available(pipeline.local_config): print("Jupyter not currently installed in local environment: installing") jupyter_dependency.install(pipeline.local_config) # Now Jupyter should be installed, so we can import the main function for running notebooks from notebook.notebookapp import main print("Jupyter installed and successfully imported") # Set up a directory that will be used as the notebook directory for this pipeline if opts.notebook_dir is not None: notebook_dir = opts.notebook_dir print("Using custom notebook directory: {}".format(notebook_dir)) else: notebook_dir = os.path.abspath(os.path.join(PIMLICO_ROOT, "..", "notebooks", pipeline.name)) print("Using notebook directory for pipeline {}: {}".format(pipeline.name, notebook_dir)) # Create the directory tree if necessary if not os.path.exists(notebook_dir): print("Creating notebook dir {}".format(notebook_dir)) os.makedirs(notebook_dir) # Create an example source file that loads the pipeline if len(pipeline.modules) == 0: # Can't give an example module name, as there aren't any modules example_module_name = "module_name" example_output = "output_name" else: example_module_name = pipeline.modules[-1] example_mod = pipeline[example_module_name] if len(example_mod.available_outputs) == 0: # Can't give example output name, as the module doesn't have any outputs example_output = "output_name" else: example_output = example_mod.available_outputs[0] example_code = EXAMPLE_CODE.format( example_module_name=example_module_name, example_output_name=example_output ) print("Adding example notebook") with open(os.path.join(notebook_dir, "example.ipynb"), "w") as f: f.write(make_notebook(example_code)) # Make the currently loaded pipeline available from within Jupyter notebooks via an environment var os.environ["JUPYTER_PIPELINE"] = os.path.abspath(pipeline.filename) print("Running Jupyter...") print("------------------") print("From within a notebook, you can access the loaded '{}' pipeline by:") print(" from pimlico import get_jupyter_pipeline") print(" pipeline = get_jupyter_pipeline()") print() sys.argv = [sys.argv[0], "--notebook-dir", notebook_dir] sys.exit(main())
jupyter_dependency = PythonPackageOnPip("jupyter", homepage_url="https://jupyter.org/", license=BSD) EXAMPLE_CODE = """\ # This is an example of how to load your pipeline from a notebook from pimlico import get_jupyter_pipeline pipeline = get_jupyter_pipeline() # Now you can access the modules of the pipeline through this pipeline object mod = pipeline["{example_module_name}"] # And get data from its outputs (provided the module's been run) print(mod.status) output = mod.get_output("{example_output_name}") """
[docs]def make_notebook(code_text): data = { "cells": [], "metadata": {}, "nbformat": 4, "nbformat_minor": 2, } for line in code_text.split("\n\n"): data["cells"].append({ "cell_type": "code", "execution_count": None, "metadata": {}, "outputs": [], "source": [line], }) return json.dumps(data, indent=4)