Source code for pimlico.core.dependencies.python

# This file is part of Pimlico
# Copyright (C) 2016 Mark Granroth-Wilding
# Licensed under the GNU GPL v3.0 - http://www.gnu.org/licenses/gpl-3.0.en.html

"""
Tools for Python library dependencies.

Provides superclasses for Python library dependencies and a selection of commonly used dependency instances.

"""
import sys
from pkgutil import find_loader

from pimlico.core.dependencies.base import SoftwareDependency


[docs]class PythonPackageDependency(SoftwareDependency): """ Base class for Python dependencies. Provides import checks, but no installation routines. Subclasses should either provide install() or installation_instructions(). The import checks do not (as of 0.6rc) actually import the package, as this may have side-effects that are difficult to account for, causing odd things to happen when you check multiple times, or try to import later. Instead, it just checks whether the package finder is about to locate the package. This doesn't guarantee that the import will succeed. """ def __init__(self, package, name, **kwargs): super(PythonPackageDependency, self).__init__(name, **kwargs) self.package = package
[docs] def problems(self, local_config): probs = super(PythonPackageDependency, self).problems(local_config) # To avoid having any impact on the system state during this check, we don't try actually importing the package pkg_loader = find_loader(self.package) if pkg_loader is None: probs.append("package importer could not locate %s" % self.package) return probs
[docs] def import_package(self): """ Try importing package_name. By default, just uses `__import__`. Allows subclasses to allow for special import behaviour. Should raise an `ImportError` if import fails. """ return __import__(self.package)
[docs] def get_installed_version(self, local_config): """ Tries to import a __version__ variable from the package, which is a standard way to define the package version. """ # Import the package # We're allowed to assume that available() returns True, so this import should work pck = self.import_package() # Try a load of different names that would denote the version string possible_names = ["__version__", "__VERSION__", "__release__"] for var_name in possible_names: if hasattr(pck, var_name): return str(getattr(pck, var_name)) # None of these worked: fall back to default behaviour return super(PythonPackageDependency, self).get_installed_version(local_config)
def __eq__(self, other): return isinstance(other, PythonPackageDependency) and self.package == other.package
[docs]class PythonPackageSystemwideInstall(PythonPackageDependency): """ Dependency on a Python package that needs to be installed system-wide. """ def __init__(self, package_name, name, pip_package=None, apt_package=None, yum_package=None, **kwargs): super(PythonPackageSystemwideInstall, self).__init__(package_name, name, **kwargs) self.pip_package = pip_package self.apt_package = apt_package self.yum_package = yum_package
[docs] def installable(self): return False
[docs] def installation_instructions(self): if self.pip_package is not None: pip_message = "\n\nInstall with Pip using:\n pip install '%s'" % self.pip_package else: pip_message = "" if self.apt_package is not None: apt_message = "\n\nOn Ubuntu/Debian systems, install using:\n sudo apt-get install %s" % self.apt_package else: apt_message = "" if self.yum_package is not None: yum_message = "\n\nOn Red Hat/Fedora systems, install using:\n sudo yum install %s" % self.yum_package else: yum_message = "" return "This Python library must be installed system-wide (which requires superuser privileges)%s%s%s" % \ (pip_message, apt_message, yum_message)
[docs]class PythonPackageOnPip(PythonPackageDependency): """ Python package that can be installed via pip. Will be installed in the virtualenv if not available. """ def __init__(self, package, name=None, pip_package=None, **kwargs): # Package names tend to be identical to the software name, so there's no need to specify both if name is None: name = package # If pip_package is given, use that as pip install target instead of package name # For cases where Python package name doesn't coincide with install target self.pip_package = pip_package or package super(PythonPackageOnPip, self).__init__(package, name, **kwargs)
[docs] def installable(self): return True
[docs] def install(self, local_config, trust_downloaded_archives=False): import subprocess # Use subprocess to call Pip: the recommended way to use it programmatically subprocess.check_call([sys.executable, '-m', 'pip', 'install', self.pip_package]) # Refresh sys.path so we can import the installed package import site reload(site)
def _old_install(self, local_config, trust_downloaded_archives=False): """ This is an old approach to installing programmatically using Pip. Technically, this way of using Pip is unsupported and, sure enough, you end up running into horrible errors with differing versions of Pip. An alternative, more supported approach is now implemented, but this is left here in case we need to incorporate anything from it. """ try: from pip import __version__ except ImportError: # Very very old versions don't define this import pkg_resources __version__ = pkg_resources.get_distribution('pip').version if int(__version__.split(".")[0]) >= 7: # Later version of pip, need to do this differently from pip.index import PackageFinder from pip.req import InstallRequirement, RequirementSet from pip.locations import src_prefix from pip.compat import logging_dictConfig from pip.utils.logging import IndentingFormatter from pip.download import PipSession import logging from tempfile import mkdtemp import shutil # Configure logging so we get verbose output logging_dictConfig({ "version": 1, "disable_existing_loggers": False, "filters": { "exclude_warnings": { "()": "pip.utils.logging.MaxLevelFilter", "level": logging.WARNING, }, }, "formatters": { "indent": { "()": IndentingFormatter, "format": "%(message)s", }, }, "handlers": { "console": { "level": "DEBUG", "class": "pip.utils.logging.ColorizedStreamHandler", "stream": "ext://sys.stdout", "filters": ["exclude_warnings"], "formatter": "indent", }, "console_errors": { "level": "WARNING", "class": "pip.utils.logging.ColorizedStreamHandler", "stream": "ext://sys.stderr", "formatter": "indent", }, }, # Previously, got super-verbose debugging by configuring root logger as follows # However, this had a horrible effect on later logging # Could possibly be solved by removing these handlers after installation. Simplest to stop doing this #"root": { # "level": "DEBUG", # "handlers": list(filter(None, [ # "console", # "console_errors", # None, # ])), #}, # Disable any logging besides WARNING unless we have DEBUG level # logging enabled. These use both pip._vendor and the bare names # for the case where someone unbundles our libraries. "loggers": dict( (name, {"level": "DEBUG", "handlers": ["console", "console_errors"]}) for name in ["pip._vendor", "distlib", "requests", "urllib3"] ), }) session = PipSession() # Create a temporary build dir build_dir = mkdtemp(suffix="pip_build") try: requirement_set = RequirementSet(build_dir, src_prefix, None, session=session) requirement_set.add_requirement(InstallRequirement.from_line(self.pip_package)) finally: shutil.rmtree(build_dir) install_options = [] global_options = [] finder = PackageFinder(find_links=[], index_urls=["https://pypi.python.org/simple/"], session=session) requirement_set.prepare_files(finder) # Run installation requirement_set.install(install_options, global_options) else: from pip.index import PackageFinder from pip.req import InstallRequirement, RequirementSet from pip.locations import build_prefix, src_prefix # Enable verbose output # NB: This only works on old versions of Pip try: from pip.log import logger logger.add_consumers((logger.INFO, sys.stdout)) except: pass # Build a requirement set containing just the package we need requirement_set = RequirementSet(build_dir=build_prefix, src_dir=src_prefix, download_dir=None) requirement_set.add_requirement(InstallRequirement.from_line(self.pip_package)) install_options = [] global_options = [] finder = PackageFinder(find_links=[], index_urls=["http://pypi.python.org/simple/"]) requirement_set.prepare_files(finder, force_root_egg_info=False, bundle=False) # Run installation requirement_set.install(install_options, global_options) # Refresh sys.path so we can import the installed package import site reload(site) def __repr__(self): return "PythonPackageOnPip<%s%s>" % (self.name, (" (%s)" % self.package) if self.package != self.name else "")
[docs] def get_installed_version(self, local_config): from pip.commands.show import search_packages_info # Use Pip to get the version number of the installed version installed_packages = list(search_packages_info(self.pip_package)) if len(installed_packages): # Found the Pip package info: this contains the version return installed_packages[0]["version"] else: # Pip package not found # This can happen because the package wasn't installed with Pip, but is available because it's importable return super(PythonPackageOnPip, self).get_installed_version(local_config)
################################### # Some commonly used dependencies # ################################### numpy_dependency = PythonPackageSystemwideInstall("numpy", "Numpy", pip_package="numpy", yum_package="numpy", apt_package="python-numpy", url="http://www.numpy.org/") scipy_dependency = PythonPackageSystemwideInstall("scipy", "Scipy", pip_package="scipy", yum_package="scipy", apt_package="python-scipy", url="https://www.scipy.org/scipylib/") theano_dependency = PythonPackageOnPip("theano", pip_package="Theano") tensorflow_dependency = PythonPackageOnPip("tensorflow") # We usually need h5py for reading/storing models h5py_dependency = PythonPackageOnPip("h5py", pip_package="h5py") # This version of the Keras dependency assumes we're using the theano backend keras_theano_dependency = PythonPackageOnPip("keras", dependencies=[theano_dependency, h5py_dependency]) keras_tensorflow_dependency = PythonPackageOnPip("keras", dependencies=[tensorflow_dependency, h5py_dependency]) # This version does not depend on any of the backend packages # This allows you to be ambivalent about which one is used, but means the package is not checked keras_dependency = PythonPackageOnPip("keras", dependencies=[h5py_dependency]) sklearn_dependency = PythonPackageOnPip( "sklearn", "Scikit-learn", pip_package="scikit-learn", dependencies=[numpy_dependency, scipy_dependency] ) gensim_dependency = PythonPackageOnPip("gensim", "Gensim", dependencies=[numpy_dependency, scipy_dependency]) ### Special behaviour for bs4
[docs]def safe_import_bs4(): """ BS can go very slowly if it tries to use chardet to detect input encoding Remove chardet and cchardet from the Python modules, so that import fails and it doesn't try to use them This prevents it getting stuck on reading long input files """ import sys sys.modules["cchardet"] = None sys.modules["chardet"] = None # Now we can import BS import bs4 return bs4
[docs]class BeautifulSoupDependency(PythonPackageOnPip): """ Test import with special BS import behaviour. """ def __init__(self): super(BeautifulSoupDependency, self).__init__("bs4", pip_package="beautifulsoup4", name="Beautiful Soup")
[docs] def import_package(self): return safe_import_bs4()
beautiful_soup_dependency = BeautifulSoupDependency()