Source code for pimlico.cli.data_editor.run

import sys
import argparse
from code import interact

import os

from pimlico.datatypes import load_datatype

from pimlico.core.config import PipelineConfig


[docs]def run_editor(dataset_root, datatype_name):
    # Create an empty pipeline
    pipeline = PipelineConfig.empty()
    local = {"pipeline": pipeline, "p": pipeline}
    print("Empty PipelineConfig object is available as variable 'pipeline' (or 'p')")
    # Trying loading the datatype
    datatype = load_datatype(datatype_name)
    print("Loaded datatype: {}".format(datatype))

    if not os.path.exists(dataset_root):
        # *** Write mode ***
        print("Creating a new dataset at {}".format(dataset_root))
        # Create a writer
        # TODO We should allow different args/kwargs to be passed in here
        writer = datatype.get_writer(dataset_root, pipeline)
        print("Created dataset writer".format(dataset_root))
        print("Writer is available as variable 'writer' (or 'w')")
        local["writer"] = local["w"] = writer
    else:
        # *** Read mode ***
        print("Attempting to read a dataset of type {} from {}".format(datatype, dataset_root))
        # Create a reader, via a reader setup
        setup = datatype([dataset_root])
        if not setup.ready_to_read():
            print("Data not ready to read from {}".format(dataset_root))
            print("Reader setup available as variable 'setup'")
            local["setup"] = setup
        else:
            reader = setup(pipeline)
            print("Dataset loaded. Reader available as variable 'reader' (or 'r')")
            local["reader"] = local["r"] = reader

    # Enter the interpreter
    interact(local=local)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Dataset editor tool")
    parser.description = """\
Either edits an existing dataset (not yet implemented) or creates a new one.

If the given path exists, we attempt to load a dataset of the given 
datatype from there and an appropriate reader is created.

Otherwise, an appropriate writer will be created, writing data to the 
given dataset root.

You are then taken to a Python shell, where you can access to reader or 
writer to manipulate the dataset.

"""
    parser.add_argument("dataset_root", help="Root directory for the dataset")
    parser.add_argument("datatype", help="PimlicoDatatype to use to create a new dataset or read the existing one. "
                                         "Given as a fully qualified path, or a shortcut. "
                                         "See pimlico.datatypes.load_datatype() for more details")
    opts = parser.parse_args()

    run_editor(opts.dataset_root, opts.datatype)