Source code for pimlico.utils.docs.examplegen

# This file is part of Pimlico
# Copyright (C) 2020 Mark Granroth-Wilding
# Licensed under the GNU LGPL v3.0 - https://www.gnu.org/licenses/lgpl-3.0.en.html

"""
Tool to generate Pimlico docs for example config files.

Each example config file is (for now) just shown in full in the docs.
"""
from __future__ import print_function

import argparse
import os
import warnings
from itertools import takewhile, dropwhile

from pimlico import install_core_dependencies, EXAMPLES_DIR, REPO_SOURCE_HTML_ROOT, \
    PIMLICO_ROOT
from pimlico.core.config import PipelineConfig
from pimlico.utils.docs.modulegen import indent
from .rest import format_heading

DOC_TEMPLATE = """\
.. _{ref_name}:

{title}

This is an example Pimlico pipeline.

{code_intro}

Pipeline config
===============

.. code-block:: ini
   
{config_text}

{module_list}

"""

INDEX_TEMPLATE = """\
.. _example-pipelines:

Example pipelines
~~~~~~~~~~~~~~~~~

Pimlico comes with a number of example pipelines to demonstrate how to 
use it. 

A more extensive set of examples is also provided in the form of 
:ref:`test pipelines <test-pipelines>`, which give a small example of the 
usage of individual core modules and are used as unit tests for the modules.

Available pipelines
===================

.. toctree::
   :maxdepth: 2
   :titlesonly:

   {generated}


Running
=======

To run example pipelines, you can use the script ``run_example.sh`` in Pimlico's 
``example`` directory, e.g.:

.. code-block:: sh
   
   ./example_pipeline.sh simple/tokenize.conf status

This will load a single example pipeline from the given config file and show the 
execution status of the modules.

"""


[docs]def build_example_config_doc(base_path, rel_path): conf_file = os.path.join(base_path, rel_path) root_rel = os.path.relpath(conf_file, PIMLICO_ROOT) # Load the raw data with open(conf_file, "r") as f: conf_data = f.read() if conf_data.startswith("# TODO"): # This example pipeline is not ready to be included: skip it warnings.warn("Skipping pipeline {}, since it starts with a TODO".format(conf_file)) return # Use the special local config file for examples examples_lc_path = os.path.join(EXAMPLES_DIR, "examples_local_config") # Try loading the test pipeline try: pipeline = PipelineConfig.load(conf_file, local_config=examples_lc_path) except Exception as e: warnings.warn("Could not load example pipeline {}: {}. Not building doc".format(conf_file, e)) return # Look for initial comments and extract them to the page text lines = conf_data.splitlines() try: first_non_comment = next(dropwhile(lambda nl: nl[1].startswith("#"), enumerate(lines)))[0] except StopIteration: first_non_comment = 0 initial_comments = "\n".join(line[2:] for line in lines[:first_non_comment]) # Drop any blank lines from the start conf_data = "\n".join(dropwhile(lambda l: not l.strip(), lines[first_non_comment:])) # Generate a link to the source code source_url = "{}{}".format(REPO_SOURCE_HTML_ROOT, "/".join(root_rel.split(os.path.sep))) code_intro = "The complete config file for this example pipeline is below. `Source file <{}>`_\n\n{}"\ .format(source_url, initial_comments) # Check what module types are used module_types = [m.module_package_name() for m in pipeline] # Some modules will have no package name identifying them, as they're dataset loaders module_types = [m for m in module_types if m and m.startswith("pimlico.modules")] if len(module_types): module_list = """\ {} The following Pimlico module types are used in this pipeline: {} """.format( format_heading(1, "Modules"), "\n".join(" * :mod:`{}`".format(mod) for mod in module_types) ) else: module_list = "" # Use the pipeline name as the reference ref_name = "example-pipeline-{}".format(pipeline.name.replace("_", "-")) return DOC_TEMPLATE.format( title=format_heading(0, pipeline.name), ref_name=ref_name, config_text=indent(3, conf_data), code_intro=code_intro, module_list=module_list ), ref_name, module_types
[docs]def build_index(generated, output_dir): with open(os.path.join(output_dir, "index.rst"), "w") as f: f.write(INDEX_TEMPLATE.format( generated="\n ".join(generated) )) print("Wrote index to {}".format(os.path.join(output_dir, "index.rst")))
[docs]def build_example_config_docs(example_config_dir, output_dir): generated = [] module_refs = [] for base_dir, dirs, filenames in os.walk(example_config_dir): for filename in filenames: if filename.endswith(".conf"): # Get the path relative to the test base rel_path = os.path.relpath(base_dir, example_config_dir) if rel_path == ".": rel_path = "" ex_rel_path = os.path.join(rel_path, filename) print("Building {}".format(ex_rel_path)) # Build the doc's text doc = build_example_config_doc(example_config_dir, ex_rel_path) if doc is not None: doc_text, ref_name, modules = doc # Work out what to call the file out_filename = ex_rel_path.replace(".conf", ".rst").replace(os.path.sep, ".") with open(os.path.join(output_dir, out_filename), "w") as f: f.write(doc_text) print(" Written to {}".format(out_filename)) generated.append(out_filename) module_refs.append((ref_name, modules)) if len(set(generated)) < len(generated): warnings.warn("Multiple test config files were found with the same name") build_index(generated, output_dir) # Output a list of what modules are used by what tests # This will be read in by the module doc builder to include a list in each module's docs module_list_fn = os.path.join(output_dir, "module_list.tsv") with open(module_list_fn, "w") as f: f.write("\n".join("{}\t{}".format(ref_name, ", ".join(modules)) for (ref_name, modules) in module_refs)) print("Module reference list output to {}".format(module_list_fn))
if __name__ == "__main__": parser = argparse.ArgumentParser(description="Generate documentation RST files from Pimlico example config files") parser.add_argument("output_dir", help="Where to put the .rst files") opts = parser.parse_args() output_dir = os.path.abspath(opts.output_dir) # Install basic Pimlico requirements install_core_dependencies() print("Pimlico example config doc generator") if not os.path.exists(EXAMPLES_DIR): print("Example config dir could not be found: {}".format(EXAMPLES_DIR)) else: print("Building example config docs from pipelines found in {}".format(EXAMPLES_DIR)) if not os.path.isdir(output_dir): os.makedirs(output_dir) build_example_config_docs(EXAMPLES_DIR, output_dir)