1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
|
import json
import sys
from typing import List
_setup_paths: List[str] = []
try:
import radical.pilot as rp
import radical.utils as ru
except ImportError:
pass
MPI = "mpi"
RP_ENV = "rp"
CLIENT = "client"
RPEX_ENV = "ve_rpex"
MPI_WORKER = "MPIWorker"
DEFAULT_WORKER = "DefaultWorker"
class ResourceConfig:
"""
This ResourceConfig class is an abstraction of the resource
configuration of the RAPTOR layer in the RADICAL-Pilot runtime system.
This class sets up the default configuration values for the executor and
allows the user to specify different resource requirements flexibly.
For more information:
https://radicalpilot.readthedocs.io/en/stable/tutorials/raptor.html
Parameters
----------
masters : int
The number of masters to be deployed by RAPTOR.
Default is 1.
workers : int
The number of workers to be deployed by RAPTOR.
Default is 1.
worker_gpus_per_node : int
The number of GPUs a worker will operate on per node.
Default is 0.
worker_cores_per_node : int
The number of CPU cores a worker will operate on per node.
Default is 4.
cores_per_master : int
The number of cores a master will operate on per node.
Default is 1.
nodes_per_worker : int
The number of nodes to be occupied by every worker.
Default is 1.
pilot_env_path : str
The path to an exisitng pilot environment.
Default is an empty string (RADICAL-Pilot will create one).
pilot_env_name : str
The name of the pilot environment.
Default is "ve_rpex".
pilot_env_pre_exec : list
List of commands to be executed before starting the pilot environment.
Default is an empty list.
pilot_env_type : str
The type of the pilot environment (e.g., 'venv', 'conda').
Default is "venv".
pilot_env_setup : list
List of setup commands/packages for the pilot environment.
Default is an empty list.
python_v : str
The Python version to be used in the pilot environment.
Default is determined by the system's Python version.
worker_type : str
The type of worker(s) to be deployed by RAPTOR on the compute
resources.
Default is "DefaultWorker".
"""
masters: int = 1
workers: int = 1
worker_gpus_per_node: int = 0
worker_cores_per_node: int = 4
cores_per_master: int = 1
nodes_per_worker: int = 1
pilot_env_mode: str = CLIENT
pilot_env_path: str = ""
pilot_env_type: str = "venv"
pilot_env_name: str = RP_ENV
pilot_env_pre_exec: List[str] = []
pilot_env_setup: List[str] = _setup_paths
python_v: str = f'{sys.version_info[0]}.{sys.version_info[1]}'
worker_type: str = DEFAULT_WORKER
def get_config(cls, path=None):
# Default ENV mode for RP is to reuse
# the client side. If this is not the case,
# then RP will create a new env named ve_rpex
# The user need to make sure that under:
# $HOME/.radical/pilot/configs/*_resource.json
# that virtenv_mode = local
if cls.pilot_env_mode != CLIENT:
cls.pilot_env_name = RPEX_ENV
if MPI in cls.worker_type.lower() and \
"mpi4py" not in cls.pilot_env_setup:
cls.pilot_env_setup.append("mpi4py")
cfg = {
'n_masters': cls.masters,
'n_workers': cls.workers,
'worker_type': cls.worker_type,
'gpus_per_node': cls.worker_gpus_per_node,
'cores_per_node': cls.worker_cores_per_node,
'cores_per_master': cls.cores_per_master,
'nodes_per_worker': cls.nodes_per_worker,
'pilot_env': {
"version": cls.python_v,
"name": cls.pilot_env_name,
"path": cls.pilot_env_path,
"type": cls.pilot_env_type,
"setup": cls.pilot_env_setup,
"pre_exec": cls.pilot_env_pre_exec
},
'pilot_env_mode': cls.pilot_env_mode,
'master_descr': {
"ranks": 1,
"cores_per_rank": 1,
"mode": rp.RAPTOR_MASTER,
"named_env": cls.pilot_env_name,
},
'worker_descr': {
"mode": rp.RAPTOR_WORKER,
"named_env": cls.pilot_env_name,
"raptor_file": "./rpex_worker.py",
"raptor_class": cls.worker_type if
cls.worker_type.lower() != MPI else MPI_WORKER,
"ranks": cls.nodes_per_worker * cls.worker_cores_per_node,
"gpus_per_rank": cls.nodes_per_worker * cls.worker_gpus_per_node,
}}
# Convert the class instance to a Json file or a Config dict.
if path:
config_path = 'rpex.cfg'
config_path = path + '/' + config_path
with open(config_path, 'w') as f:
json.dump(cfg, f, indent=4)
else:
config_obj = ru.Config(from_dict=cfg)
return config_obj
|