Coverage for src/bob/pipelines/config/distributed/slurm_cpu_default.py: 0%

17 statements  

« prev     ^ index     » next       coverage.py v7.6.0, created at 2024-07-12 21:32 +0200

1"""This config creates a Dask Client configured to use Slurm workers. 

2 

3A Dask SLURMScheduler is spun up locally, and will submit Dask Workers to be run 

4on the Slurm grid. 

5 

6The Client can then send work to the Scheduler who will dispatch it to workers 

7and scale the number of workers accordingly. 

8 

9The slurm account name must be stored in ``~/.config/bobrc.toml`` 

10(``slurm.account`` entry). Set it with: 

11``` 

12bob config set slurm.account your-project-name 

13``` 

14 

15You can specify your conda **base** path with the ``conda.base_path`` entry in 

16``~/.config/bobrc.toml``; otherwise, it defaults to ``~/miniconda3``. 

17 

18You can specify the conda environment to use in the Dask Workers with the 

19``conda.slurm_prefix`` entry in ``~/.config/bobrc.toml``; otherwise, it will try 

20to activate the currently activated **local** environment (or do nothing if no 

21conda environment is active). 

22""" 

23 

24import os 

25 

26from pathlib import Path 

27 

28from clapper.rc import UserDefaults 

29from dask.distributed import Client 

30from dask_jobqueue import SLURMCluster 

31 

32rc = UserDefaults(path="bobrc.toml") 

33 

34# Tries to activate the correct environment in this order: 

35# 1. the conda env specified in bobrc.toml conda.slurm_prefix; 

36# 2. the conda env in which this script is running; 

37# 3. no conda env. 

38conda_base_path = Path(rc.get("conda.base_path", default="~/miniconda3")) 

39conda_setup_script = conda_base_path / "etc" / "profile.d" / "conda.sh" 

40conda_current_prefix = rc.get( 

41 "conda.slurm_prefix", default=os.environ.get("CONDA_PREFIX", default="") 

42) 

43 

44job_script_prologue = [] 

45if conda_current_prefix != "": 

46 job_script_prologue.extend( 

47 [ 

48 f"source {conda_setup_script}", 

49 f"conda activate {conda_current_prefix}", 

50 ] 

51 ) 

52 

53if "slurm.account" not in rc: 

54 raise RuntimeError( 

55 f"Could not retrieve slurm.account from config ({rc.path}). " 

56 "Please set the account / project name with: " 

57 "bob config set slurm.account your-project-name" 

58 ) 

59 

60cluster = SLURMCluster( 

61 n_workers=1, 

62 queue="cpu", # Slurm's partition 

63 account=rc.get("slurm.account"), # Billing project 

64 cores=1, # per job 

65 memory="8 GB", # per job 

66 walltime="00:30:00", 

67 local_directory="/tmp/dask", # Fast but ephemeral NVMe storage 

68 log_directory="./logs", 

69 job_script_prologue=job_script_prologue, 

70 protocol="tcp://", 

71 scheduler_options={ 

72 "protocol": "tcp://", 

73 "port": 8786, # Workers will connect to the scheduler on that port 

74 }, 

75 worker_extra_args=[ 

76 "--worker-port", 

77 "60001:63000", # Workers will be reachable by the Client on those ports 

78 ], 

79) 

80 

81cluster.adapt( 

82 minimum=1, 

83 maximum=128, 

84 wait_count=5, 

85 interval=10, 

86 target_duration="10s", 

87) 

88 

89dask_client = Client(cluster)