#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Utilities for calculating package dependencies and drawing graphs"""
import glob
import os
import re
import tarfile
import tempfile
from io import BytesIO
from .bootstrap import set_environment
from .build import get_output_path
from .build import get_parsed_recipe
from .build import get_rendered_metadata
from .build import next_build_number
from .log import echo_info
from .log import get_logger
logger = get_logger(__name__)
[docs]def compute_adjencence_matrix(
gl,
package,
conda_config,
main_channel,
recurse_regexp=r"^(bob|beat|batl|gridtk)(\.)?(?!-).*$",
current={},
ref="master",
deptypes=[],
):
"""
Given a target package, returns an adjacence matrix with its dependencies
returned via the conda-build API
Parameters
----------
gl : object
Pre-instantiated instance of the gitlab server API to use, of type
:py:class:`gitlab.Gitlab`.
package : str
Name of the package, including its group in the format
``group/package``
conda_config : dict
Dictionary of conda configuration options loaded from command-line and
read from defaults available.
main_channel : str
Main channel to consider when looking for the next build number of
the target package
recurse_regexp : str
Regular expression to use, for determining where to recurse for
resolving dependencies. Typically, this should be set to a list of
packages which exists in gitlab. If it includes more than that, then
we may not be able to reach the package repository and an error will be
raised. The default expression avoids recursing over bob/beat-devel
packages.
current : dict
Current list of packages already inspected - useful for recurrent calls
to this function, to avoid potential cyclic dependencies. Corresponds
to the current return value of this function.
ref : str
Name of the git reference (branch, tag or commit hash) to use
deptypes : list
A list of dependence types to preserve when building the graph. If
empty, then preserve all. You may set values "build", "host",
"run" and "test", in any combination
Returns
-------
adjacence_matrix : dict
A dictionary that contains the dependencies of all packages considered
in the recursion. The keys are the name of the packages, the values,
correspond to the dependencies (host, build, run and test) as a list of
strings.
"""
use_package = gl.projects.get(package)
deptypes = deptypes if deptypes else ["host", "build", "run", "test"]
if use_package.attributes["path_with_namespace"] in current:
return current
echo_info(
"Resolving graph for %s@%s"
% (use_package.attributes["path_with_namespace"], ref)
)
with tempfile.TemporaryDirectory() as tmpdir:
logger.debug("Downloading archive for %s...", ref)
archive = use_package.repository_archive(ref=ref) # in memory
logger.debug("Archive has %d bytes", len(archive))
with tarfile.open(fileobj=BytesIO(archive), mode="r:gz") as f:
f.extractall(path=tmpdir)
# use conda-build API to figure out all dependencies
recipe_dir = glob.glob(os.path.join(tmpdir, "*", "conda"))[0]
logger.debug("Resolving conda recipe for package at %s...", recipe_dir)
if not os.path.exists(recipe_dir):
raise RuntimeError(
"The conda recipe directory %s does not " "exist" % recipe_dir
)
version_candidate = os.path.join(recipe_dir, "..", "version.txt")
if os.path.exists(version_candidate):
version = open(version_candidate).read().rstrip()
set_environment("BOB_PACKAGE_VERSION", version)
# pre-renders the recipe - figures out the destination
metadata = get_rendered_metadata(recipe_dir, conda_config)
rendered_recipe = get_parsed_recipe(metadata)
path = get_output_path(metadata, conda_config)[0]
# gets the next build number
build_number, _ = next_build_number(main_channel, os.path.basename(path))
# at this point, all elements are parsed, I know the package version,
# build number and all dependencies
# exclude stuff we are not interested in
# host and build should have precise numbers to be used for building
# this package.
if "host" not in deptypes:
host = []
else:
host = rendered_recipe["requirements"].get("host", [])
if "build" not in deptypes:
build = []
else:
build = rendered_recipe["requirements"].get("build", [])
# run dependencies are more vague
if "run" not in deptypes:
run = []
else:
run = rendered_recipe["requirements"].get("run", [])
# test dependencies even more vague
if "test" not in deptypes:
test = []
else:
test = rendered_recipe.get("test", {}).get("requires", [])
# for each of the above sections, recurse in figuring out dependencies,
# if dependencies match a target set of globs
recurse_compiled = re.compile(recurse_regexp)
def _re_filter(ll):
return [k for k in ll if recurse_compiled.match(k)]
all_recurse = set()
all_recurse |= set([z.split()[0] for z in _re_filter(host)])
all_recurse |= set([z.split()[0] for z in _re_filter(build)])
all_recurse |= set([z.split()[0] for z in _re_filter(run)])
all_recurse |= set([z.split()[0] for z in _re_filter(test)])
# complete the package group, which is not provided by conda-build
def _add_default_group(p):
if p.startswith("bob") or p.startswith("gridtk"):
return "/".join(("bob", p))
elif p.startswith("beat"):
return "/".join(("beat", p))
elif p.startswith("batl"):
return "/".join(("batl", p))
else:
logger.warning(
"Do not know how to recurse to package %s "
"(to which group does it belong?) - skipping...",
p,
)
return None
all_recurse = set([_add_default_group(k) for k in all_recurse])
if None in all_recurse:
all_recurse.remove(None)
# do not recurse for packages we already know
all_recurse -= set(current.keys())
logger.info("Recursing over the following packages: %s", ", ".join(all_recurse))
for dep in all_recurse:
dep_adjmtx = compute_adjencence_matrix(
gl,
dep,
conda_config,
main_channel,
recurse_regexp=recurse_regexp,
ref=ref,
deptypes=deptypes,
)
current.update(dep_adjmtx)
current[package] = dict(
host=host,
build=build,
run=run,
test=test,
version=rendered_recipe["package"]["version"],
name=rendered_recipe["package"]["name"],
build_string=os.path.basename(path).split("-")[-1].split(".")[0],
)
return current
[docs]def generate_graph(adjacence_matrix, deptypes, whitelist):
"""
Computes a graphviz/dot representation of the build graph
Parameters
----------
adjacence_matrix : dict
A dictionary containing the adjacence matrix, that states the
dependencies for each package in the build, to other packages
deptypes : list
A list of dependence types to preserve when building the graph. If
empty, then preserve all. You may set values "build", "host",
"run" and "test", in any combination
whitelist : str
Regular expression for matching strings to preserve while building
the graph
Returns
-------
graph : graphviz.Digraph
The generated graph
"""
from graphviz import Digraph
whitelist_compiled = re.compile(whitelist)
deptypes = deptypes if deptypes else ["host", "build", "run", "test"]
graph = Digraph()
nodes = {}
# generate nodes for all packages we want to track explicitly
for package, values in adjacence_matrix.items():
if not whitelist_compiled.match(values["name"]):
logger.debug(
"Skipping main package %s (did not match whitelist)",
values["name"],
)
continue
name = values["name"] + "\n" + values["version"] + "\n" + values["build_string"]
nodes[values["name"]] = graph.node(
values["name"], name, shape="box", color="blue"
)
# generates nodes for all dependencies
for package, values in adjacence_matrix.items():
# ensures we only have the most complete dependence in the our list
deps = {}
to_consider = set()
for k in deptypes:
to_consider |= set(values[k])
for dep in to_consider:
name = dep.split()[0]
if name not in deps or (name in deps and not deps[name]):
deps[name] = dep.split()[1:]
for ref, parts in deps.items():
if not whitelist_compiled.match(ref):
logger.debug("Skipping dependence %s (did not match whitelist)", ref)
continue
if not any([k == ref for k in nodes.keys()]):
# we do not have a node for that dependence, create it
name = str(ref) # new string
if len(parts) >= 1:
name += "\n" + parts[0] # dep version
if len(parts) >= 2:
name += "\n" + parts[1] # dep build
nodes[ref] = graph.node(ref, name)
# connects package -> dep
graph.edge(values["name"], ref)
return graph