File: inc3.11.7

package info (click to toggle)
slurm-wlm 22.05.8-4%2Bdeb12u3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 48,492 kB
  • sloc: ansic: 475,246; exp: 69,020; sh: 8,862; javascript: 6,528; python: 6,444; makefile: 4,185; perl: 4,069; pascal: 131
file content (297 lines) | stat: -rw-r--r-- 7,964 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
#!/usr/bin/env expect
############################################################################
# Purpose: Test of Slurm functionality
#          to be called from test3.11
#	   Several cases for core based reservations
#          Plugin select/cons_res needed
#
############################################################################
# Copyright (C) 2009 Lawrence Livermore National Security
# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
# Written by Dave Bremer <dbremer@llnl.gov>
# CODE-OCEC-09-009. All rights reserved.
#
#
# This file is part of Slurm, a resource management program.
# For details, see <https://slurm.schedmd.com/>.
# Please also read the included file: DISCLAIMER.
#
# Slurm is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with Slurm; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
############################################################################

proc inc3_11_7 {} {
	global re_word_str bin_sleep cluster_cpus cores_per_node
	global def_partition file_in number part_cores part_node
	global part_node_cnt part_node_cores
	global sbatch scancel scontrol user_name wait_for_job

	set res_name "resv3.11.7"

	log_info "+++++ STARTING TEST 7 +++++"

	# Make a reservation, just to get node size information
	set ret_code [create_res $res_name "StartTime=now Duration=1 Nodes=$part_node User=$user_name"]
	if {$ret_code != 0} {
		fail "Unable to create a valid reservation"
	}
	# Delete the reservation
	set ret_code [delete_res $res_name]
	if {$ret_code != 0} {
		fail "Unable to delete reservation ($res_name)"
	}

	# Now make a reservation using half the cores on that node
	# There is no way to specify the Nodes in a reservation with CoreCnt,
	# so hope that we get a node with the same size
	set corecnt [ expr ($part_node_cores / 2) ]
	set ret_code [create_res $res_name "StartTime=now Duration=60 Nodes=$part_node CoreCnt=$corecnt User=$user_name"]
	if {$ret_code != 0} {
		fail "Unable to create a valid reservation"
	}

	set res_info [get_reservations $res_name]
	if { ![dict exists $res_info $res_name] } {
		delete_res $res_name
		fail "Unable to get info about reservation ($res_name)"
	}
	lassign [get_node_cpus [dict get $res_info $res_name "Nodes"]] cputot threadcnt
	set thread_res_num [ expr $corecnt * $threadcnt ]

	# Make the job script
	make_bash_script $file_in "$bin_sleep 100"

	# Sleep short time in case clocks are not synchronized
	sleep 5

	# (First test) Submit the batch job: a simple job using just 1 core inside the reservation
	set job_id 0
	spawn $sbatch -n1 --reservation=$res_name --output=/dev/null $file_in
	expect {
		-re "Submitted batch job ($number)" {
			set job_id $expect_out(1,string)
			exp_continue
		}
		timeout {
			delete_res $res_name
			cancel_job $job_id
			fail "sbatch not responding"
		}
		eof {
			wait
		}
	}
	if {$job_id == 0} {
		delete_res $res_name
		fail "Batch submit failure"
	}

	subtest {[wait_for_job $job_id RUNNING] == 0} "A simple job using just 1 core should run inside the reservation"

	cancel_job $job_id
	sleep 1

	# (Second test) Submit the batch job: a job using all cores allocated by the reservation
	spawn $sbatch -n$thread_res_num --reservation=$res_name --output=/dev/null $file_in
	expect {
		-re "Submitted batch job ($number)" {
			set job_id $expect_out(1,string)
			exp_continue
		}
		timeout {
			delete_res $res_name
			cancel_job $job_id
			fail "sbatch not responding"
		}
		eof {
			wait
		}
	}
	if {$job_id == 0} {
		delete_res $res_name
		fail "Batch submit failure"
	}

	# Show the job, make sure reservation tag is right
	subtest {[wait_for_job $job_id RUNNING] == 0} "A job using all cores allocated by the reservation should run"

	cancel_job $job_id
	sleep 1

	# (Third test) Submit the batch job: a job using all cores not allocated by the reservation
	spawn $sbatch -n$thread_res_num --nodelist=[dict get $res_info $res_name "Nodes"] --output=/dev/null $file_in
	expect {
		-re "Submitted batch job ($number)" {
			set job_id $expect_out(1,string)
			exp_continue
		}
		timeout {
			delete_res $res_name
			cancel_job $job_id
			fail "sbatch not responding"
		}
		eof {
			wait
		}
	}
	if {$job_id == 0} {
		delete_res $res_name
		fail "Batch submit failure"
	}

	# Show the job, make sure reservation tag is right
	subtest {[wait_for_job $job_id RUNNING] == 0} "A job using all cores not allocated by the reservation should run"

	cancel_job $job_id

	# (Fourth test) Submit a batch job: a job using more cores than allocated by the reservation
	set thread_res_num [ expr ($thread_res_num + 1) ]
	spawn $sbatch -n$thread_res_num --reservation=$res_name --output=/dev/null $file_in
	expect {
		-re "Submitted batch job ($number)" {
			set job_id $expect_out(1,string)
			exp_continue
		}
		timeout {
			delete_res $res_name
			cancel_job $job_id
			fail "sbatch not responding"
		}
		eof {
			wait
		}
	}
	if {$job_id == 0} {
		delete_res $res_name
		fail "Batch submit failure"
	}

	sleep 10

	# Show the job, make sure reservation tag is right
	spawn $scontrol show job $job_id
	expect {
		-re "Invalid job id specified" {
			delete_res $res_name
			cancel_job $job_id
			fail "Job ($job_id) not found"
		}
		-re "JobState=PENDING" {
			log_info "Job $job_id is PENDING as expected"
			exp_continue
		}
		-re "JobState=RUNNING" {
			delete_res $res_name
			cancel_job $job_id
			fail "Job ($job_id) is RUNNING but it should not be"
		}
		timeout {
			delete_res $res_name
			cancel_job $job_id
			fail "scontrol not responding"
		}
		eof {
			wait
		}
	}

	cancel_job $job_id

	# (Fifth test) Submit a batch job: a job specifying node in reservation and
	#              using more cores than allocated by the reservation
	spawn $sbatch -n$thread_res_num --nodelist=[dict get $res_info $res_name "Nodes"] --nodes=1 --output=/dev/null $file_in
	expect {
		-re "Submitted batch job ($number)" {
			set job_id $expect_out(1,string)
			exp_continue
		}
		timeout {
			delete_res $res_name
			cancel_job $job_id
			fail "sbatch not responding"
		}
		eof {
			wait
		}
	}
	if {$job_id == 0} {
		delete_res $res_name
		fail "Batch submit failure"
	}

	sleep 10

	# Show the job, make sure reservation tag is right
	spawn $scontrol show job $job_id
	expect {
		-re "Invalid job id specified" {
			delete_res $res_name
			cancel_job $job_id
			fail "Job ($job_id) not found"
		}
		-re "JobState=PENDING" {
			log_info "Job $job_id is PENDING as expected"
			exp_continue
		}
		-re "JobState=RUNNING" {
			delete_res $res_name
			cancel_job $job_id
			fail "Job ($job_id) is RUNNING but it should not be"
		}
		timeout {
			delete_res $res_name
			cancel_job $job_id
			fail "scontrol not responding"
		}
		eof {
			wait
		}
	}

	# Cancelling the job now. Let's see if it runs once reservation is deleted
	#
	# Delete the reservation
	set ret_code [delete_res $res_name]
	if {$ret_code != 0} {
		cancel_job $job_id
		fail "Unable to delete reservation ($res_name)"
	}

	sleep 10

	# Show the job
	spawn $scontrol show job $job_id
	expect {
		-re "Invalid job id specified" {
			fail "Job ($job_id) not found"
		}
		-re "JobState=PENDING" {
			cancel_job $job_id
			fail "Job ($job_id) is PENDING but it should not be"
		}
		-re "JobState=RUNNING" {
			log_info "Job $job_id is RUNNING as expected"
			exp_continue
		}
		timeout {
			cancel_job $job_id
			fail "scontrol not responding"
		}
		eof {
			wait
		}
	}

	cancel_job $job_id
}