Source code for pimlico.core.modules.inputs
# This file is part of Pimlico
# Copyright (C) 2016 Mark Granroth-Wilding
# Licensed under the GNU GPL v3.0 - http://www.gnu.org/licenses/gpl-3.0.en.html
"""
Base classes and utilities for input modules in a pipeline.
"""
from .base import BaseModuleInfo
from pimlico.core.modules.base import BaseModuleExecutor
[docs]class InputModuleInfo(BaseModuleInfo):
"""
Base class for input modules. These don't get executed in general, they just provide a way to iterate over
input data.
You probably don't want to subclass this. It's usually simplest to define a datatype for reading the input
data and then just specify its class as the module's type. This results in a subclass of this module info
being created dynamically to read that data.
Note that module_executable is typically set to False and the base class does this. However, some input
modules need to be executed before the input is usable, for example to collect stats about the input
data.
"""
module_type_name = "input"
module_executable = False
[docs] def instantiate_output_datatype(self, output_name, output_datatype):
raise NotImplementedError("input module type (%s) must implement its own datatype instantiator" %
self.module_type_name)
[docs]def input_module_factory(datatype):
"""
Create an input module class to load a given datatype.
"""
class DatatypeInputModuleInfo(InputModuleInfo):
module_type_name = "%s_input" % datatype.datatype_name
module_readable_name = "%s datatype input" % datatype.datatype_name
module_outputs = [("data", datatype)]
module_options = datatype.input_module_options
def instantiate_output_datatype(self, output_name, output_datatype):
return output_datatype.create_from_options(self.get_output_dir(output_name), self.pipeline, self.options)
if datatype.requires_data_preparation:
# This module needs to be executed
class DataPreparationExecutor(BaseModuleExecutor):
def execute(self):
# Get the datatype instance
datatype_instance = self.info.get_output("data")
# Run the special data preparation method
datatype_instance.prepare_data(self.info.get_absolute_output_dir("data"), self.log)
DatatypeInputModuleInfo.module_executable = True
DatatypeInputModuleInfo.module_executor_override = DataPreparationExecutor
return DatatypeInputModuleInfo