File: ch-fromhost

package info (click to toggle)
charliecloud 0.43-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 3,116 kB
  • sloc: python: 6,021; sh: 4,284; ansic: 3,863; makefile: 598
file content (535 lines) | stat: -rwxr-xr-x 18,337 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
#!/bin/sh

# The basic algorithm here is that we build up a list of file
# source:destination pairs separated by newlines, then walk through them and
# copy them into the image.
#
# The colon separator is to avoid the difficulty of iterating through a
# sequence of pairs with no arrays or structures in POSIX sh. We could avoid
# it by taking action immediately upon encountering each file in the argument
# list, but that would (a) yield a half-injected image for basic errors like
# misspellings on the command line and (b) would require the image to be first
# on the command line, which seems awkward.
#
# The newline separator is for the same reason and also because it's
# convenient for input from --cmd and --file.
#
# Note on looping through the newlines in a variable: The approach in this
# script is to set IFS to newline, loop, then restore. This is awkward but
# seemed the least bad. Alternatives include:
#
#   1. Piping echo into "while read -r": This executes the while in a
#      subshell, so variables don't stick.
#
#   2. Here document used as input, e.g.:
#
#        while IFS= read -r FILE; do
#          ...
#        done <<EOF
#        $FILES
#        EOF
#
#      This works but seems more awkward.
#
#   3. Here string, e.g. 'while IFS= read -r FILE; do ... done <<< "$FILES"'.
#      This is a bashism.

ch_lib=$(cd "$(dirname "$0")" && pwd)/../lib
. "${ch_lib}/base.sh"

set -e

usage=$(cat <<EOF
Inject files from the host into an image directory, with various magic.

Usage:

  $ ch-fromhost [OPTION ...] [FILE_OPTION ...] IMGDIR

Which files to inject (one or more required; can be repeated):

  -c, --cmd     CMD   listed in the stdout of CMD
  -f, --file    FILE  listed in file FILE
  -p, --path    PATH  inject the file at PATH

  --cray-cxi      inject cray’s libfabric and cxi libraries from cray host
  --cray-gni      inject gemini/aries libfabric for MPI on xc40 machines
  --nvidia        recommended by nVidia (via \"nvidia-container-cli list\")

Destination within image:

  -d, --dest DST   place following files in IMGDIR/DST, overriding inference

Options:

  --print-cray-fi  print inferred destination for libfabric replacement
  --print-fi       print inferred destination for libfabric provider(s)
  --print-lib      print inferred destination for shared libraries
  --no-ldconfig    don’t run ldconfig even if we injected shared libraries
  -h, --help       print this help and exit
  -q, --quiet      make the program more quiet, can be repeated
  -v, --verbose    list the injected files
  --version        print version and exit

NOTE: This command is experimental. Features may be incomplete and/or buggy.
EOF
)

dest=
image=
newline='
'
inject_files=      # source:destination files to inject
inject_mkdirs=     # directories to create in image (image-rooted)
inject_unlinks=    # files to rm -f (not rmdir or rm -Rf) (image-rooted)
cray_fi_dest=
cray_fi_found=
fi_prov_dest=
fi_prov_found=
lib_dest=
lib_found=
print_cray_fi_dest=
print_fi_dest=
print_lib_dest=
no_ldconfig=

ensure_nonempty () {
    [ "$2" ] || FATAL -- "$1 must not be empty"
}

is_bin () {
    case $1 in
        */bin*|*/sbin*)
            return 0
            ;;
        *)
            return 1
    esac
}

is_so () {
    case $1 in
        */lib*)
            return 0
            ;;
        *.so)
            return 0
            ;;
        *)
            return 1
    esac
}

enqueue_file () {
    old_ifs="$IFS"
    IFS="$newline"
    d="${dest:-$2}"
    VERBOSE "enqueue file(s)"
    for f in $1; do
        case $f in
            *:*)
                FATAL "paths can't contain colon: ${f}"
                ;;
        esac
        if is_so "$f"; then
            case $f in
            *libfabric.so)
                if ldd "$f" | grep libcxi > /dev/null 2>&1; then
                    DEBUG "cray libfabric: ${f}"
                    cray_fi_found=yes
                    host_libfabric=$f
                else
                    DEBUG "libfabric: ${f}"
                    lib_found=yes
                fi
                ;;
            *-fi.so)
                DEBUG "libfabric shared provider: ${f}"
                fi_prov_found=yes
                # Providers, like Cray's libgnix-fi.so, link against paths that
                # need to be bind-mounted at run-time. Some of these paths need
                # to be added to ldconf; thus, add the linked paths to a ldconf
                # list to be added into the image.
                ldds=$(ldd "$f" 2>&1 | grep lib | awk '{print $3}' | sort -u)
                for l in $ldds; do
                    ld=$(dirname "$(readlink -f "$l")")
                    # Avoid duplicates and host libfabric.so.
                    if     [ "$(echo "$ld_conf" | grep -c "$ld")" -eq 0 ] \
                        && [ "$(echo "$ld" | grep -c "libfabric.so")" -eq 0 ]; \
                           then
                        enqueue_ldconf "$ld"
                    fi
                done
                ;;
            *)
                DEBUG "shared library: ${f}"
                lib_found=yes
                ;;
            esac
        fi
        # This adds a delimiter only for the second and subsequent files.
        # https://chris-lamb.co.uk/posts/joining-strings-in-posix-shell
        #
        # If destination empty, we'll infer it later.
        inject_files="${inject_files:+$inject_files$newline}$f:$d"
    done
    IFS="$old_ifs"
}

enqueue_ldconf () {
    [ "$1" ]
    ld_conf="${ld_conf:+$ld_conf$newline}$1"
}

queue_mkdir () {
    [ "$1" ]
    inject_mkdirs="${inject_mkdirs:+$inject_mkdirs$newline}$1"
}

queue_unlink () {
    [ "$1" ]
    inject_unlinks="${inject_unlinks:+$inject_unlinks$newline}$1"
}

warn () {
    printf '< warn >! %s\n' "$1" 1>&2
}

warn_fi_var () {
    if [ -n "$FI_PROVIDER" ]; then
        warn "FI_PROVIDER=$FI_PROVIDER set; this will be preferred provider at runtime."
    fi
    if [ -n "$FI_PROVIDER_PATH" ]; then
        warn "FI_PROVIDER_PATH=$FI_PROVIDER_PATH set; --dest required"
    fi
}

if [ "$#" -eq 0 ]; then
        usage 1
fi

while [ $# -gt 0 ]; do
    opt=$1; shift
    if ! parse_basic_arg "$opt"; then
        case $opt in
            -c|--cmd)
                ensure_nonempty --cmd "$1"
                out=$($1) || FATAL "command failed: $1"
                enqueue_file "$out"
                shift
                ;;
            --cray-cxi)
                warn_fi_var
                if [ -z "$CH_FROMHOST_OFI_CXI" ]; then
                    FATAL "CH_FROMHOST_OFI_CXI is not set"
                fi
                enqueue_file "$CH_FROMHOST_OFI_CXI"
                ;;
            --cray-gni)
                warn_fi_var
                if [ -z "$CH_FROMHOST_OFI_GNI" ]; then
                    FATAL "CH_FROMHOST_OFI_GNI is not set"
                fi
                enqueue_file "$CH_FROMHOST_OFI_GNI"
                ;;
            -d|--dest)
                ensure_nonempty --dest "$1"
                dest=$1
                shift
                ;;
            -f|--file)
                ensure_nonempty --file "$1"
                out=$(cat "$1") || FATAL "cannot read file: ${1}"
                enqueue_file "$out"
                shift
                ;;
            # Note: Specifying any of the --print-* options along with one of
            # the file specification options will result in all the file
            # gathering and checking work being discarded.
            --print-cray-fi)
                cray_fi_found=yes
                print_cray_fi_dest=yes
                ;;
            --print-fi)
                fi_prov_found=yes
                print_fi_dest=yes
                ;;
            --print-lib)
                lib_found=yes
                print_lib_dest=yes
                ;;
            --no-ldconfig)
                no_ldconfig=yes
                ;;
            --nvidia)
                    out=$(nvidia-container-cli list --binaries --libraries) \
                || FATAL "nvidia-container-cli failed; does this host have GPUs?"
                enqueue_file "$out"
                ;;
            -p|--path)
                ensure_nonempty --path "$1"
                enqueue_file "$1"
                shift
                ;;
            -*)
                INFO "invalid option: ${opt}"
                usage
                ;;
            *)
                ensure_nonempty "image path" "${opt}"
                [ -z "$image" ] || FATAL "duplicate image: ${opt}"
                [ -d "$opt" ] || FATAL "image not a directory: ${opt}"
                image="$opt"
                ;;
        esac
    fi
done

if [ -n "$FI_PROVIDER_PATH" ] && [ -n "$fi_prov_found" ] && [ -z "$dest" ]; then
    FATAL "FI_PROVIDER_PATH set; missing --dest"
fi

[ "$image" ] || FATAL "no image specified"

if [ -n "$cray_fi_found" ]; then
    # There is no Slingshot provider CXI; to leverage slingshot we need to
    # replace the image libfabric.so with Cray's.
    VERBOSE "searching image for inferred libfabric destination"
    img_libfabric=$(find "$image" -name  "libfabric.so")
    [ -n "$img_libfabric" ] || FATAL "libfabric.so not found in $image"
    DEBUG "found $img_libfabric"
    if [ "$(echo "$img_libfabric" | wc -l)" -ne 1 ]; then
        warn 'found more than one libfabric.so'
    fi
    img_libfabric_path=$(echo "$img_libfabric" | sed "s@$image@@")
    cray_fi_dest=$(dirname "/$img_libfabric_path")

    # Since cray's libfabric isn't a standard provider, to use slingshot we
    # must also add any missing linked libraries from the host.
    VERBOSE "adding cray libfabric libraries"
    ldds=$(ldd "$host_libfabric" 2>&1 | grep lib | awk '{print $3}' | sort -u)
    for l in $ldds; do
        # Do not replace any libraries found in the image, experimentation has
        # shown this to be problematic. Perhaps revisit in the future. For now,
        # both MPICH and OpenMPI examples work with this conservative approach.
        file_found=$(find "${image}" -name "$(basename "$l")")
        if [ -n "$file_found" ]; then
            DEBUG "skipping $l"
            continue
        fi
        enqueue_file "$l"
        file_dir=$(dirname "$l")
        # Avoid duplicates.
        if [ "$(echo "$ld_conf" | grep -c "$file_dir")" -eq 0 ]; then
            enqueue_ldconf "$file_dir"
        fi
    done
fi

if [ -n "$lib_found" ]; then
    # We want to put the libraries in the first directory that ldconfig
    # searches, so that we can override (or overwrite) any of the same library
    # that may already be in the image.
    VERBOSE "asking ldconfig for inferred shared library destination"
    # "ldconfig -Nv" gives pointless warnings on stderr even if successful; we
    # don't want to show those to users (unless -vv or higher). However, we
    # don't want to simply pipe stderr to /dev/null because this hides real
    # errors. Thus, use the following abomination to pipe stdout and stderr to
    # *separate grep commands*. See: https://stackoverflow.com/a/31151808
    if [ "$log_level" -lt 2 ]; then  # VERBOSE or lower
        stderr_filter='(^|dynamic linker, ignoring|given more than once|No such file or directory)$'
    else                             # DEBUG or higher
        stderr_filter=weird_al_yankovic_will_not_appear_in_ldconfig_output
    fi
    lib_dest=$( { "${ch_bin}/ch-run" "$image" -- /sbin/ldconfig -Nv \
                  2>&1 1>&3 3>&- | grep -Ev "$stderr_filter" ; } \
                3>&1 1>&2 | grep -E '^/' | cut -d: -f1 | head -1 )
    [ -n "$lib_dest" ] || FATAL 'empty path from ldconfig'
    [ -z "${lib_dest%%/*}" ] || FATAL "bad path from ldconfig: ${lib_dest}"
    VERBOSE "inferred shared library destination: ${image}/${lib_dest}"
fi

if [ -n "$fi_prov_found" ]; then
    # The libfabric provider can be specified with FI_PROVIDER. The path the
    # search for shared providers at can be specified with FI_PROVIDER_PATH
    # (undocumented). This complicates the inferred destination because these
    # variables can be inherited from the host or explicitly set in the
    # image's /ch/environment file.
    #
    # For simplicity, the inferred injection destination is the always the
    # 'libfabric' directory at the path where libfabric.so is found. If it
    # does not exist, create it. Warn if FI_PROVIDER_PATH or FI_PROVIDER is
    # found in the the image's /ch/environment file.
    VERBOSE "searching ${image} for libfabric shared provider destination"
    ch_env_p=$(grep -E '^FI_PROVIDER_PATH=' "${image}/ch/environment") \
             || true # avoid -e exit
    ch_env_p=${ch_env_p##*=}
    if  [ -n "$ch_env_p" ]; then
       warn "FI_PROVIDER_PATH in ${image}/ch/environment; consider --dest"
    fi
    img_libfabric=$(find "$image" -name 'libfabric.so')
    img_libfabric_path=$(echo "$img_libfabric" | sed "s@$image@@")
    DEBUG "found: ${image}${img_libfabric_path}"
    fi_prov_dest=$(dirname "/${img_libfabric_path}")
    fi_prov_dest="${fi_prov_dest}/libfabric"
    queue_mkdir "$fi_prov_dest"
    VERBOSE "inferred provider destination: $fi_prov_dest"
fi

if [ -n "$print_lib_dest" ]; then
    echo "$lib_dest"
    exit 0
fi

if [ -n "$print_fi_dest" ]; then
    echo "$fi_prov_dest"
fi

if [ -n "$print_cray_fi_dest" ]; then
    echo "$cray_fi_dest"
fi

if [ -f /etc/opt/cray/release/cle-release ]; then
    # Cray needs a pile of hugetlbfs filesystems mounted at
    # /var/lib/hugetlbfs/global. Create mount point for ch-run.
    queue_mkdir /var/lib/hugetlbfs
    # UGNI
    if [ ! -L /etc/opt/cray/release/cle-release ]; then
        # ALPS libraries require the contents of this directory to be present
        # at the same path as the host. Create the mount point here, then
        # ch-run bind-mounts it later.
        queue_mkdir /var/opt/cray/alps/spool

        # The cray-ugni provider will link against cray’s libwlm_detect so.
        # Create the mount point for ch-run.
        queue_mkdir /opt/cray/wlm_detect

        # libwlm_detect.so requires file(s) to present at the same path as the
        # host. Create mount point for ch-run.
        queue_mkdir /etc/opt/cray/wlm_detect

        # OFI uGNI provider, libgnix-fi.so, links against the Cray host’s
        # libxpmem, libudreg, libalpsutil, libalpslli, and libugni; create
        # mount points for ch-run to use later.
        queue_mkdir /opt/cray/udreg
        queue_mkdir /opt/cray/xpmem
        queue_mkdir /opt/cray/ugni
        queue_mkdir /opt/cray/alps
    fi
    # CXI (slingshot)
    if [ -f /opt/cray/etc/release/cos ]; then
        # Newer Cray Shasta environments require the contents of this
        # directory to be present at the same path as the host. Create mount
        # points for ch-run to use later.
        queue_mkdir /var/spool/slurmd
    fi
fi

[ "$inject_files" ] || FATAL "empty file list"

VERBOSE "injecting into image: ${image}"

old_ifs="$IFS"
IFS="$newline"

# Process unlink list.
for u in $inject_unlinks; do
    DEBUG "deleting: ${image}${u}"
    rm -f "${image}${u}"
done

# Process bind-mount destination targets.
for d in $inject_mkdirs; do
    DEBUG "mkdir: ${image}${d}"
    mkdir -p "${image}${d}"
done

# Process ldconfig targets.
if [ "$fi_prov_found" ] || [ "$cray_fi_found" ]; then
    if [ ! -f "${image}/etc/ld.so.conf" ]; then
        DEBUG "creating empty ld.so.conf"
        touch "${image}/etc/ld.so.conf"
    fi
    if !   grep -F 'include ld.so.conf.d/*.conf' "${image}/etc/ld.so.conf" \
         > /dev/null 2>&1; then
        DEBUG "ld.so.conf: adding 'include ld.so.conf.d/*.conf'"
        echo 'include ld.so.conf.d/*.conf' >> "${image}/etc/ld.so.conf"
    fi
    # Prepare image ch-ofi.conf.
    printf '' > "${image}/etc/ld.so.conf.d/ch-ofi.conf"
    # add ofi dso provider ld library dirs.
    for c in $ld_conf; do
        DEBUG "ld.so.conf: adding ${c}"
        echo "$c" >> "${image}/etc/ld.so.conf.d/ch-ofi.conf"
    done
fi

for file in $inject_files; do
    f="${file%%:*}"
    d="${file#*:}"
    infer=
    if is_bin "$f" && [ -z "$d" ]; then
        d=/usr/bin
        infer=" (inferred)"
    elif is_so "$f" && [ -z "$d" ]; then
        case "$f" in
        *libfabric.so)
            d=$lib_dest
            if ldd "$f" | grep libcxi > /dev/null 2>&1; then
                d=$cray_fi_dest
            fi
            ;;
        *-fi.so)
            d=$fi_prov_dest
            ;;
        *)
            d=$lib_dest
            ;;
        esac
        infer=" (inferred)"
    fi
    VERBOSE "${f} -> ${d}${infer}"
    [ "$d" ] || FATAL "no destination for: ${f}"
    [ -z "${d%%/*}" ] || FATAL "not an absolute path: ${d}"
    [ -d "${image}${d}" ] || FATAL "not a directory: ${image}${d}"
    if [ ! -w "${image}${d}" ]; then
        # Some images unpack with unwriteable directories; fix. This seems
        # like a bit of a kludge to me, so I'd like to remove this special
        # case in the future if possible. (#323)
        INFO "${image}${d} not writeable; fixing"
        chmod u+w "${image}${d}" || FATAL "can't chmod u+w: ${image}${d}"
    fi
       cp --dereference --preserve=all "$f" "${image}${d}" \
    || FATAL "cannot inject: ${f}"
done
IFS="$old_ifs"

if       [ -z "$no_ldconfig" ] \
    && {    [ "$lib_found" ] \
         || [ "$fi_prov_found" ] \
         || [ "$cray_fi_found" ] ;} then
    VERBOSE "running ldconfig"
    "${ch_bin}/ch-run" -w "$image" -- /sbin/ldconfig 2> /dev/null \
        || FATAL 'ldconfig error'
    if [ -n "$fi_prov_found" ] || [ -n "$cray_fi_found" ]; then
        VERBOSE "validating ldconfig cache"
        for file in $inject_files; do
            f="$(basename "${file%%:*}")"
            f=$(   "${ch_bin}/ch-run" "$image" \
                -- find / \
                        -not \( -path /proc -prune \) \
                        -not \( -path /dev -prune \) \
                        -not \( -path /tmp -prune \) \
                        -not \( -path /sys -prune \) \
                        -not \( -path /var/opt/cray -prune \) \
                        -not \( -path /etc/opt/cray -prune \) \
                        -name  "$f")
            if [ "$("${ch_bin}/ch-run" "$image" -- ldd "$f" | grep -c 'not found ')" -ne 0 ]; then
                FATAL "ldconfig: '${ch_bin}/ch-run $image -- ldd $f' failed"
            fi
        done
    fi
else
    VERBOSE "not running ldconfig"
fi
echo 'done'