1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
|
"""
this example launch several sleep command to slurm and check them
warning: in order to run properly the working directory must be know by the node as well.
usually using /tmp_14_days/{XXX} is the simplest
"""
import os
from sluurp.executor import submit
from sluurp.job import SBatchScriptJob, cancel_slurm_job, get_job_status
slurm_config = {
"memory": 1, # RAM in GB
"partition": "nice-long", # slurm partition
# "n_gpus": 1,
# "job_name": "test sluurp",
# "python_venv": "/scisoft/tomotools/activate dev", # optional python environement to source before script command
# "modules": ("tomotools", ), # optional module to source before script command
# "sbatch_extra_params": {"export": "NONE", "gpu_card": "a40"}, # extra parameters to provide to sbatch like 'export'
}
# step 1: launch the jobs
os.makedirs("slurm_scripts", exist_ok=True)
jobs_future = {}
for i in range(5):
job = SBatchScriptJob(
slurm_config=slurm_config,
script=("sleep 5", f"echo job {i} done"),
script_path=f"slurm_scripts/sleep_script_{i}.sh",
clean_script=False,
)
future = submit(job, timeout=200)
jobs_future[job] = future
# cancel one job
cancel_slurm_job(list(jobs_future.keys())[0].job_id)
# print status of the jobs
for i, job in enumerate(jobs_future.keys()):
print(f"job {i} status is {get_job_status(job.job_id)}")
# wait until jobs is finished
print("get job result")
[print(job_future.result()) for job_future in jobs_future.values()]
|