File: test34.2

package info (click to toggle)
slurm-wlm 22.05.8-4%2Bdeb12u3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 48,492 kB
  • sloc: ansic: 475,246; exp: 69,020; sh: 8,862; javascript: 6,528; python: 6,444; makefile: 4,185; perl: 4,069; pascal: 131
file content (265 lines) | stat: -rwxr-xr-x 6,819 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
#!/usr/bin/env expect
############################################################################
# Purpose: Test of Slurm functionality
#          Validate that preemption by qos is enforced
############################################################################
# Copyright (C) 2011-2014 SchedMD LLC
# Written by Nathan Yee <nyee32@schedmd.com>
#
# This file is part of Slurm, a resource management program.
# For details, see <https://slurm.schedmd.com/>.
# Please also read the included file: DISCLAIMER.
#
# Slurm is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with Slurm; if not, write to the Free Software Foundation, Inc.
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
############################################################################
source ./globals
source ./globals_accounting

set user        ""
set acct_1      "${test_name}_acct1"
set acct_2      "${test_name}_acct2"
set qos_1       "${test_name}_qos1"
set qos_2       "${test_name}_qos2"
set file_in     "$test_dir/job_script"
set nodes       [llength [get_nodes_by_state]]
set job_id      0
set qos_1_id    0
set qos_2_id    0

# job states
set done_state      "DONE"
set pending_state   "PENDING"
set preempted_state "PREEMPTED"
set running_state   "RUNNING"

set eligible_now_job_directive "EligibleTime=now"

if {[get_config_param "PreemptType"] ne "preempt/qos"} {
	skip "This test requires that PreemptType=preempt/qos"
}
if {[get_config_param "AccountingStorageType"] ne "accounting_storage/slurmdbd"} {
	skip "This test requires use of Slurmdbd"
}
if {$nodes < 2} {
	skip "Not enough available nodes ($nodes < 2)"
}

regexp "($number)" [get_config_param "MinJobAge"] {} min_job_age
if {$min_job_age < 10} {
	skip "MinJobAge configured too low for this test ($min_job_age < 10)"
}

proc acct_setup { acct_name qos_name pre_qos pre_mode } {
	global user nodes sacctmgr

	set added 0
	spawn $sacctmgr -i create qos $qos_name preempt=$pre_qos preemptmode=$pre_mode maxnodes=$nodes
	expect {
		-re "Adding QOS" {
			incr added 1
			exp_continue
		}
		timeout {
			fail "sacctmgr is not responding"
		}
		eof {
			wait
		}
	}

	spawn $sacctmgr -i create account $acct_name qos=$qos_name
	expect {
		-re "Adding Account" {
			incr added 1
			exp_continue
		}
		timeout {
			fail "sacctmgr is not responding"
		}
		eof {
			wait
		}
	}

	spawn $sacctmgr -i add user $user account=$acct_name
	expect {
		-re "Associations" {
			incr added 1
			exp_continue
		}
		timeout {
			fail "sacctmgr is not responding"
		}
		eof {
			wait
		}
	}

	if {$added != 3} {
		fail "Account was not created properly"
	}
}

proc sub_job { acct1 {het_job false} } {
	global nodes file_in sbatch number

	set job_id 0
	if { $het_job } {
		set nodesless1 [expr $nodes - 1]
		spawn $sbatch -o/dev/null --exclusive -N1 -A$acct1 : -N$nodesless1 -A$acct1 $file_in
	} else {
		spawn $sbatch -o/dev/null --exclusive -N$nodes -A$acct1 $file_in
	}
	expect {
		-re "Submitted batch job ($number)" {
			set job_id $expect_out(1,string)
			exp_continue
		}
		timeout {
			fail "sbatch is not responding"
		}
		eof {
			wait
		}
	}

	if { $job_id == 0 } {
		fail "sbatch did not submit job"
	} else {
		return $job_id
	}
}

proc cleanup {} {
	global user acct_1 acct_2 qos_1 qos_2 qos_1_id qos_2_id sacctmgr

	set cleanacct(0) $acct_1
	set cleanacct(1) $acct_2
	set cleanqos(0) $qos_1
	set cleanqos(1) $qos_2

	set clean 0

	wait_for_account_done $acct_1,$acct_2

	for {set i 0} {$i<2} {incr i 1} {
		run_command "$sacctmgr -i remove user $user where account=$cleanacct($i)"
		spawn $sacctmgr delete -i account $cleanacct($i)
		expect {
			-re "(Deleting accounts|Nothing deleted)"  {
				incr clean 1
				exp_continue
			}
			timeout {
				fail "sacctmgr is not responding"
			}
			eof {
				wait
			}
		}

		spawn $sacctmgr delete -i qos $cleanqos($i)
		expect {
			-re "(Deleting QOS|Nothing deleted)" {
				incr clean 1
				exp_continue
			}
			timeout {
				fail "sacctmgr is not responding"
			}
			eof {
				wait
			}
		}
	}

	if { $clean != 4 } {
		log_warn "Unable to clean up accounts and qos"
	}
}

cleanup
make_bash_script $file_in "sleep 30"

set user [get_my_user_name]

# Preempt modes to test
set preempt_mode_cancel  "cancel"
set preempt_mode_requeue "requeue"
set preempt_mode_list "$preempt_mode_cancel $preempt_mode_requeue"

# Job types to test for each mode
set job_type_batch "batch"
set job_type_het   "het"
set job_type_list "$job_type_batch $job_type_het"

log_info "**** Cleanup from previous run ****"

acct_setup $acct_1 $qos_1 "" $preempt_mode_cancel
acct_setup $acct_2 $qos_2 $qos_1 cluster

foreach preempt_mode $preempt_mode_list {
	foreach job_type $job_type_list {
		set mode_type_string [string toupper "$preempt_mode $job_type"]
		log_info "*** TEST PREEMPT $mode_type_string JOB ***"

		set mod_qos_vals(preemptmode) $preempt_mode
		mod_qos $qos_1 [array get mod_qos_vals]

		if { $job_type == $job_type_batch } {
			set have_het_job 0
		} else {
			set have_het_job 1
		}

		set qos_1_id [sub_job $acct_1 $have_het_job]
		wait_for_job -fail $qos_1_id $running_state $have_het_job

		set qos_2_id [sub_job $acct_2]
		wait_for_job -fail $qos_2_id $running_state

		if { $preempt_mode == $preempt_mode_cancel } {
			set jobs_ok false
			wait_for {$jobs_ok} {
				set jobs_ok [check_job_state $qos_1_id $preempted_state $have_het_job]
			}
			subtest {$jobs_ok} "Job ($qos_1_id) should be fully in the $preempted_state state"
		} else {
			# Requeue state
			set jobs_ok false
			wait_for {$jobs_ok} {
				set jobs_ok [check_job_state $qos_1_id $pending_state $have_het_job]
			}
			subtest {$jobs_ok} "Job ($qos_1_id) should be fully in the $pending_state state"

			# Make job eligible to run now to avoid delay
			run_command "$scontrol update job $qos_2_id $eligible_now_job_directive"
			wait_for_job -fail $qos_2_id $done_state
			# Wait for requeued job to restart

			# Make job eligible to run now to avoid delay
			run_command "$scontrol update job $qos_1_id $eligible_now_job_directive"
			wait_for_job -fail $qos_1_id $running_state $have_het_job

			set jobs_ok false
			wait_for {$jobs_ok} {
				set jobs_ok [check_job_state $qos_1_id $running_state $have_het_job]
			}
			subtest {$jobs_ok} "Job ($qos_1_id) should be fully in the $running_state state"
		}

		wait_for_account_done $acct_1,$acct_2
	}
}