Coverage for src/bob/pipelines/config/distributed/slurm_cpu_default.py: 0%
17 statements
« prev ^ index » next coverage.py v7.6.0, created at 2024-07-12 21:32 +0200
« prev ^ index » next coverage.py v7.6.0, created at 2024-07-12 21:32 +0200
1"""This config creates a Dask Client configured to use Slurm workers.
3A Dask SLURMScheduler is spun up locally, and will submit Dask Workers to be run
4on the Slurm grid.
6The Client can then send work to the Scheduler who will dispatch it to workers
7and scale the number of workers accordingly.
9The slurm account name must be stored in ``~/.config/bobrc.toml``
10(``slurm.account`` entry). Set it with:
11```
12bob config set slurm.account your-project-name
13```
15You can specify your conda **base** path with the ``conda.base_path`` entry in
16``~/.config/bobrc.toml``; otherwise, it defaults to ``~/miniconda3``.
18You can specify the conda environment to use in the Dask Workers with the
19``conda.slurm_prefix`` entry in ``~/.config/bobrc.toml``; otherwise, it will try
20to activate the currently activated **local** environment (or do nothing if no
21conda environment is active).
22"""
24import os
26from pathlib import Path
28from clapper.rc import UserDefaults
29from dask.distributed import Client
30from dask_jobqueue import SLURMCluster
32rc = UserDefaults(path="bobrc.toml")
34# Tries to activate the correct environment in this order:
35# 1. the conda env specified in bobrc.toml conda.slurm_prefix;
36# 2. the conda env in which this script is running;
37# 3. no conda env.
38conda_base_path = Path(rc.get("conda.base_path", default="~/miniconda3"))
39conda_setup_script = conda_base_path / "etc" / "profile.d" / "conda.sh"
40conda_current_prefix = rc.get(
41 "conda.slurm_prefix", default=os.environ.get("CONDA_PREFIX", default="")
42)
44job_script_prologue = []
45if conda_current_prefix != "":
46 job_script_prologue.extend(
47 [
48 f"source {conda_setup_script}",
49 f"conda activate {conda_current_prefix}",
50 ]
51 )
53if "slurm.account" not in rc:
54 raise RuntimeError(
55 f"Could not retrieve slurm.account from config ({rc.path}). "
56 "Please set the account / project name with: "
57 "bob config set slurm.account your-project-name"
58 )
60cluster = SLURMCluster(
61 n_workers=1,
62 queue="cpu", # Slurm's partition
63 account=rc.get("slurm.account"), # Billing project
64 cores=1, # per job
65 memory="8 GB", # per job
66 walltime="00:30:00",
67 local_directory="/tmp/dask", # Fast but ephemeral NVMe storage
68 log_directory="./logs",
69 job_script_prologue=job_script_prologue,
70 protocol="tcp://",
71 scheduler_options={
72 "protocol": "tcp://",
73 "port": 8786, # Workers will connect to the scheduler on that port
74 },
75 worker_extra_args=[
76 "--worker-port",
77 "60001:63000", # Workers will be reachable by the Client on those ports
78 ],
79)
81cluster.adapt(
82 minimum=1,
83 maximum=128,
84 wait_count=5,
85 interval=10,
86 target_duration="10s",
87)
89dask_client = Client(cluster)