# This file is part of Pimlico
# Copyright (C) 2020 Mark Granroth-Wilding
# Licensed under the GNU LGPL v3.0 - https://www.gnu.org/licenses/lgpl-3.0.en.html
"""
Tools for Python library dependencies.
Provides superclasses for Python library dependencies and a selection of commonly used dependency instances.
"""
from builtins import str
import sys
from past.builtins import reload
from pkgutil import find_loader
from pimlico.core.dependencies.licenses import GNU_LGPL_V2, BSD, APACHE_V2, MIT
import pkg_resources
from pkg_resources import parse_version, parse_requirements
from pimlico.core.dependencies.base import SoftwareDependency
[docs]class PythonPackageDependency(SoftwareDependency):
"""
Base class for Python dependencies. Provides import checks, but no installation routines. Subclasses should
either provide install() or installation_instructions().
The import checks do not (as of 0.6rc) actually import the package, as this may have side-effects that are
difficult to account for, causing odd things to happen when you check multiple times, or try to import later.
Instead, it just checks whether the package finder is about to locate the package. This doesn't guarantee that
the import will succeed.
"""
def __init__(self, package, name, **kwargs):
super(PythonPackageDependency, self).__init__(name, **kwargs)
self.package = package
[docs] def problems(self, local_config):
probs = super(PythonPackageDependency, self).problems(local_config)
# To avoid having any impact on the system state during this check, we don't try actually importing the package
try:
pkg_loader = find_loader(self.package)
except ImportError:
probs.append("could not find loader to try locating %s" % self.package)
else:
if pkg_loader is None:
probs.append("package importer could not locate %s" % self.package)
return probs
[docs] def import_package(self):
"""
Try importing package_name. By default, just uses `__import__`. Allows subclasses to allow for
special import behaviour.
Should raise an `ImportError` if import fails.
"""
return __import__(self.package)
[docs] def get_installed_version(self, local_config):
"""
Tries to import a __version__ variable from the package, which is a standard way to define the package version.
"""
# Import the package
# We're allowed to assume that available() returns True, so this import should work
pck = self.import_package()
# Try a load of different names that would denote the version string
possible_names = ["__version__", "__VERSION__", "__release__"]
for var_name in possible_names:
if hasattr(pck, var_name):
return str(getattr(pck, var_name))
# None of these worked: fall back to default behaviour
return super(PythonPackageDependency, self).get_installed_version(local_config)
def __eq__(self, other):
return isinstance(other, PythonPackageDependency) and self.package == other.package
def __hash__(self):
return hash(self.package)
[docs]class PythonPackageSystemwideInstall(PythonPackageDependency):
"""
Dependency on a Python package that needs to be installed system-wide.
"""
def __init__(self, package_name, name, pip_package=None, apt_package=None, yum_package=None, **kwargs):
super(PythonPackageSystemwideInstall, self).__init__(package_name, name, **kwargs)
self.pip_package = pip_package
self.apt_package = apt_package
self.yum_package = yum_package
[docs] def installable(self):
return False
[docs] def installation_instructions(self):
if self.pip_package is not None:
pip_message = "\n\nInstall with Pip using:\n pip install '%s'" % self.pip_package
else:
pip_message = ""
if self.apt_package is not None:
apt_message = "\n\nOn Ubuntu/Debian systems, install using:\n sudo apt-get install %s" % self.apt_package
else:
apt_message = ""
if self.yum_package is not None:
yum_message = "\n\nOn Red Hat/Fedora systems, install using:\n sudo yum install %s" % self.yum_package
else:
yum_message = ""
return "This Python library must be installed system-wide (which requires superuser privileges)%s%s%s" % \
(pip_message, apt_message, yum_message)
[docs]class PythonPackageOnPip(PythonPackageDependency):
"""
Python package that can be installed via pip. Will be installed in the virtualenv if not available.
Allows specification of a minimum version. If an earlier version is installed,
it will be upgraded.
Name is the readable software name. Package is a the package that is imported in Python.
"""
def __init__(self, package, name=None, pip_package=None, upgrade_only_if_needed=False, min_version=None, editable=False, **kwargs):
"""
:type editable: boolean
:param editable: Pass the --editable option to pip when installing. Use with e.g. Git urls as packages.
"""
self.editable = editable
self.min_version = min_version
self.upgrade_only_if_needed = upgrade_only_if_needed
# Package names tend to be identical to the software name, so there's no need to specify both
if name is None:
name = package
# If pip_package is given, use that as pip install target instead of package name
# For cases where Python package name doesn't coincide with install target
self.pip_package = pip_package or package
super(PythonPackageOnPip, self).__init__(package, name, **kwargs)
[docs] def installable(self):
return True
[docs] def install(self, local_config, trust_downloaded_archives=False):
import subprocess
options = []
if self.upgrade_only_if_needed:
options.extend(["--upgrade-strategy", "only-if-needed"])
elif self.min_version is not None:
options.append("--upgrade")
if self.editable:
options.append("--editable")
if self.min_version is not None:
package = "{}>={}".format(self.pip_package, self.min_version)
else:
package = self.pip_package
# Use subprocess to call Pip: the recommended way to use it programmatically
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--no-warn-script-location'] + options + [package])
# Refresh sys.path so we can import the installed package
import site
reload(site)
[docs] def problems(self, local_config):
problems = super(PythonPackageOnPip, self).problems(local_config)
if not problems and self.min_version is not None:
# Also check that it's a sufficient version
inst_version = self.get_installed_version(local_config)
if parse_version(self.min_version) > parse_version(inst_version):
problems.append("{} is installed, but only version {}: {} required".format(
self.name, inst_version, self.min_version
))
return problems
def __repr__(self):
return "PythonPackageOnPip<%s%s>" % (self.name, (" (%s)" % self.package) if self.package != self.name else "")
[docs] def get_installed_version(self, local_config):
reqs = list(parse_requirements(self.package))
if len(reqs) != 1:
raise ValueError("pip_package='{}', which could not be parsed as a requirement".format(self.package))
# Reload the working set in case something's been installed since loaded
reload(pkg_resources)
# Look up the package
dist = pkg_resources.working_set.find(reqs[0])
if dist is None:
# Pip package not found
# This can happen because the package wasn't installed with Pip, but is available because it's importable
return super(PythonPackageOnPip, self).get_installed_version(local_config)
else:
# Found the Pip package info: this contains the version
return dist.version
###################################
# Some commonly used dependencies #
###################################
urwid_dependency = PythonPackageOnPip("urwid", homepage_url="http://urwid.org/", license=GNU_LGPL_V2)
numpy_dependency = PythonPackageOnPip("numpy", "Numpy", homepage_url="https://numpy.org/", license=BSD)
scipy_dependency = PythonPackageOnPip("scipy", "Scipy", homepage_url="https://www.scipy.org/", license=BSD)
theano_dependency = PythonPackageOnPip("theano", pip_package="Theano")
tensorflow_dependency = PythonPackageOnPip("tensorflow", homepage_url="https://www.tensorflow.org/", license=APACHE_V2)
# We usually need h5py for reading/storing models
h5py_dependency = PythonPackageOnPip("h5py", pip_package="h5py", homepage_url="https://www.h5py.org/", license=BSD)
# This version of the Keras dependency assumes we're using the theano backend
keras_theano_dependency = PythonPackageOnPip("keras", dependencies=[theano_dependency, h5py_dependency],
homepage_url="https://keras.io/", license=MIT)
keras_tensorflow_dependency = PythonPackageOnPip("keras", dependencies=[tensorflow_dependency, h5py_dependency],
homepage_url="https://keras.io/", license=MIT)
# This version does not depend on any of the backend packages
# This allows you to be ambivalent about which one is used, but means the package is not checked
keras_dependency = PythonPackageOnPip("keras", dependencies=[h5py_dependency],
homepage_url="https://keras.io/", license=MIT)
pytorch_dependency = PythonPackageOnPip("torch", "PyTorch", homepage_url="https://pytorch.org/")
pyro_dependency = PythonPackageOnPip("pyro", "Pyro", pip_package="pyro-ppl", dependencies=[pytorch_dependency],
homepage_url="http://pyro.ai/", license=APACHE_V2)
sklearn_dependency = PythonPackageOnPip(
"sklearn", "Scikit-learn", pip_package="scikit-learn", dependencies=[numpy_dependency, scipy_dependency],
homepage_url="https://scikit-learn.org/stable/", license=BSD
)
pandas_dependency = PythonPackageOnPip("pandas", homepage_url="https://pandas.pydata.org/", license=BSD)
# Gensim relies on Requests, which needs urllib3>=1.23 to work,
# but this isn't enforced in the dependencies
requests_dependency = PythonPackageOnPip("requests", min_version="2.20")
gensim_dependency = PythonPackageOnPip(
"gensim", "Gensim",
dependencies=[numpy_dependency, scipy_dependency, requests_dependency],
upgrade_only_if_needed=True,
# In 3.3.0 embedding storage was changed, so it's important we're on the right
# side of that release
min_version="3.3.0",
homepage_url="https://radimrehurek.com/gensim/", license=GNU_LGPL_V2,
)
spacy_dependency = PythonPackageOnPip("spacy", homepage_url="https://spacy.io/", license=MIT)
fasttext_dependency = PythonPackageOnPip("fasttext", homepage_url="https://fasttext.cc/", license=MIT)
huggingface_datasets_dependency = PythonPackageOnPip("datasets",
homepage_url="https://github.com/huggingface/datasets",
license=APACHE_V2)
### Special behaviour for bs4
[docs]def safe_import_bs4():
"""
BS can go very slowly if it tries to use chardet to detect input encoding
Remove chardet and cchardet from the Python modules, so that import fails and it doesn't try to use them
This prevents it getting stuck on reading long input files
"""
import sys
sys.modules["cchardet"] = None
sys.modules["chardet"] = None
# Now we can import BS
import bs4
return bs4
[docs]class BeautifulSoupDependency(PythonPackageOnPip):
"""
Test import with special BS import behaviour.
"""
def __init__(self):
super(BeautifulSoupDependency, self).__init__(
"bs4", pip_package="beautifulsoup4", name="Beautiful Soup",
homepage_url="https://www.crummy.com/software/BeautifulSoup/bs4/doc/",
license=MIT,
)
[docs] def import_package(self):
return safe_import_bs4()
beautiful_soup_dependency = BeautifulSoupDependency()
nltk_dependency = PythonPackageOnPip("nltk", "NLTK", homepage_url="https://www.nltk.org/", license=APACHE_V2)
[docs]class NLTKResource(SoftwareDependency):
"""
Check for and install NLTK resources, using NLTK's own downloader.
"""
[docs] def problems(self, local_config):
problems = super(NLTKResource, self).problems(local_config)
# Check whether the resource is available
try:
from nltk.downloader import _downloader
except:
# If NLTK isn't even installed, we can't check whether the resource is there
problems.append("NLTK not installed: cannot check for resource '{}'".format(self.name))
else:
try:
resource_installed = _downloader.is_installed(self.name)
except Exception as e:
problems.append("Error checking NLTK resource status for {}: {}".format(self.name, e))
else:
if not resource_installed:
problems.append("NLTK resource '{}' not installed".format(self.name))
return problems
[docs] def installable(self):
return True
[docs] def install(self, local_config, trust_downloaded_archives=False):
from nltk import download
download(self.name)
[docs] def dependencies(self):
return super(NLTKResource, self).dependencies() + [nltk_dependency]