#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
"""Tools for interacting with the running computer or GPU"""
import os
import subprocess
import shutil
import psutil
import logging
logger = logging.getLogger(__name__)
_nvidia_smi = shutil.which("nvidia-smi")
"""Location of the nvidia-smi program, if one exists"""
GB = float(2 ** 30)
"""The number of bytes in a gigabyte"""
[docs]def run_nvidia_smi(query, rename=None):
"""Returns GPU information from query
For a comprehensive list of options and help, execute ``nvidia-smi
--help-query-gpu`` on a host with a GPU
Parameters
----------
query : list
A list of query strings as defined by ``nvidia-smi --help-query-gpu``
rename : :py:class:`list`, Optional
A list of keys to yield in the return value for each entry above. It
gives you the opportunity to rewrite some key names for convenience.
This list, if provided, must be of the same length as ``query``.
Returns
-------
data : :py:class:`tuple`, None
An ordered dictionary (organized as 2-tuples) containing the queried
parameters (``rename`` versions). If ``nvidia-smi`` is not available,
returns ``None``. Percentage information is left alone,
memory information is transformed to gigabytes (floating-point).
"""
if _nvidia_smi is not None:
if rename is None:
rename = query
else:
assert len(rename) == len(query)
values = subprocess.getoutput(
"%s --query-gpu=%s --format=csv,noheader"
% (_nvidia_smi, ",".join(query))
)
values = [k.strip() for k in values.split(",")]
t_values = []
for k in values:
if k.endswith("%"):
t_values.append(float(k[:-1].strip()))
elif k.endswith("MiB"):
t_values.append(float(k[:-3].strip()) / 1024)
else:
t_values.append(k) #unchanged
return tuple(zip(rename, t_values))
[docs]def gpu_constants():
"""Returns GPU (static) information using nvidia-smi
See :py:func:`run_nvidia_smi` for operational details.
Returns
-------
data : :py:class:`tuple`, None
If ``nvidia-smi`` is not available, returns ``None``, otherwise, we
return an ordered dictionary (organized as 2-tuples) containing the
following ``nvidia-smi`` query information:
* ``gpu_name``, as ``gpu_name`` (:py:class:`str`)
* ``driver_version``, as ``gpu_driver_version`` (:py:class:`str`)
* ``memory.total``, as ``gpu_memory_total`` (transformed to gigabytes,
:py:class:`float`)
"""
return run_nvidia_smi(
("gpu_name", "driver_version", "memory.total"),
("gpu_name", "gpu_driver_version", "gpu_memory_total"),
)
[docs]def gpu_log():
"""Returns GPU information about current non-static status using nvidia-smi
See :py:func:`run_nvidia_smi` for operational details.
Returns
-------
data : :py:class:`tuple`, None
If ``nvidia-smi`` is not available, returns ``None``, otherwise, we
return an ordered dictionary (organized as 2-tuples) containing the
following ``nvidia-smi`` query information:
* ``memory.used``, as ``gpu_memory_used`` (transformed to gigabytes,
:py:class:`float`)
* ``memory.free``, as ``gpu_memory_free`` (transformed to gigabytes,
:py:class:`float`)
* ``utilization.memory``, as ``gpu_memory_percent``,
(:py:class:`float`, in percent)
* ``utilization.gpu``, as ``gpu_utilization``,
(:py:class:`float`, in percent)
"""
return run_nvidia_smi(
("memory.used", "memory.free", "utilization.memory", "utilization.gpu"),
(
"gpu_memory_used",
"gpu_memory_free",
"gpu_memory_percent",
"gpu_percent",
),
)
_CLUSTER = []
"""List of processes currently being monitored"""
[docs]def cpu_constants():
"""Returns static CPU information about the current system.
Returns
-------
data : tuple
An ordered dictionary (organized as 2-tuples) containing these entries:
0. ``cpu_memory_total`` (:py:class:`float`): total memory available,
in gigabytes
1. ``cpu_count`` (:py:class:`int`): number of logical CPUs available
"""
return (
("cpu_memory_total", psutil.virtual_memory().total / GB),
("cpu_count", psutil.cpu_count(logical=True)),
)
[docs]def cpu_log():
"""Returns process (+child) information using ``psutil``.
This call examines the current process plus any spawn child and returns the
combined resource usage summary for the process group.
Returns
-------
data : tuple
An ordered dictionary (organized as 2-tuples) containing these entries:
0. ``cpu_memory_used`` (:py:class:`float`): total memory used from
the system, in gigabytes
1. ``cpu_rss`` (:py:class:`float`): RAM currently used by
process and children, in gigabytes
2. ``cpu_vms`` (:py:class:`float`): total memory (RAM + swap) currently
used by process and children, in gigabytes
3. ``cpu_percent`` (:py:class:`float`): percentage of the total CPU
used by this process and children (recursively) since last call
(first time called should be ignored). This number depends on the
number of CPUs in the system and can be greater than 100%
4. ``cpu_processes`` (:py:class:`int`): total number of processes
including self and children (recursively)
5. ``cpu_open_files`` (:py:class:`int`): total number of open files by
self and children
"""
global _CLUSTER
if (not _CLUSTER) or (_CLUSTER[0] != psutil.Process()): # initialization
this = psutil.Process()
_CLUSTER = [this] + this.children(recursive=True)
# touch cpu_percent() at least once for all
[k.cpu_percent(interval=None) for k in _CLUSTER]
else:
# check all cluster components and update process list
# done so we can keep the cpu_percent() initialization
children = _CLUSTER[0].children()
stored_children = set(_CLUSTER[1:])
current_children = set(_CLUSTER[0].children())
keep_children = stored_children - current_children
new_children = current_children - stored_children
[k.cpu_percent(interval=None) for k in new_children]
_CLUSTER = _CLUSTER[:1] + list(keep_children) + list(new_children)
memory_info = [k.memory_info() for k in _CLUSTER]
return (
("cpu_memory_used", psutil.virtual_memory().used / GB),
("cpu_rss", sum([k.rss for k in memory_info]) / GB),
("cpu_vms", sum([k.vms for k in memory_info]) / GB),
("cpu_percent", sum(k.cpu_percent(interval=None) for k in _CLUSTER)),
("cpu_processes", len(_CLUSTER)),
("cpu_open_files", sum(len(k.open_files()) for k in _CLUSTER)),
)