1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805
|
#!/bin/bash
# Copyright (c) 2020, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# The views and conclusions contained in the software and documentation are those
# of the authors and should not be interpreted as representing official policies,
# either expressed or implied, of the FreeBSD Project.
DEBUG=${DEBUG:-0} # Debug mode
BF_REG=$(dirname $0)/bf-reg
RSHIM_PIPE="/tmp/rshim_pipe"
LOG_FILE="/tmp/bfb-install.log"
run_cmd_local_ready=0 # whether run_cmd* local functions are ready to use
run_cmd_remote_ready=0 # whether run_cmd* remote functions are ready to use
usage ()
{
echo "Usage: $0 [options]"
echo "Options:"
echo " -b, --bfb <bfb_file> BFB image file to use."
echo " -c, --config <config_file> Optional configuration file."
echo " -f, --rootfs <rootfs_file> Optional rootfs file."
echo " -h, --help Show help message."
echo " -k, --keep-log Do not clear the log after reading during bfb install."
echo " -m, --remote-mode <mode> Remote mode to use (scp, nc, ncpipe)."
echo " -r, --rshim <device> Rshim device, format [<ip>:<port>:]rshim<N>."
echo " -R, --reverse-nc Reverse netcat mode."
echo " -u, --runtime Runtime upgrade (BF3 + local rshim only)."
echo " -v, --verbose Enable verbose output."
}
# Function to print messages in verbose mode
# Usage: echo_v "Your message"
echo_v() {
if [ "$verbose" -eq 1 ]; then
echo "$@"
fi
}
# Function to print messages for debugging
# Usage: echo_dbg "Your message"
echo_dbg() {
if [ -n "$DEBUG" ] && [ "$DEBUG" -eq 1 ]; then
echo "$@"
fi
}
# Run a command locally or remotely via SSH
#
# $1: mode (local or remote)
# $2: command
#
# Global variables used:
# $ip, $sudo_prefix, $run_cmd_local_ready, $run_cmd_remote_ready
#
# Example:
# run_cmd local "ls -l"
# run_cmd remote "ls -l"
run_cmd()
{
if [ $# -lt 2 ]; then
echo "Error: run_cmd() needs at least 2 arguments."
exit 1
fi
local mode=$1
local command=$2
echo_dbg "Running command in $mode mode: $command"
if [ "$mode" == "local" ]; then
if [ $run_cmd_local_ready -eq 0 ]; then
echo "Error: 'run_cmd local' are not ready to use"
exit 1
fi
$sudo_prefix sh -c "$command"
elif [ "$mode" == "remote" ]; then
if [ $run_cmd_remote_ready -eq 0 ]; then
echo "Error: 'run_cmd remote' are not ready to use"
exit 1
fi
# Execute the command networkly via SSH
ssh root@$ip "$command"
else
echo "Error: invalid mode: $mode"
return 1
fi
}
# Run a command locally or remotely via SSH and exit on error with a custom
# message
#
# $1: mode (local or remote)
# $2: command
# $3: custom error message (optional)
#
# Global variables used: $ip
run_cmd_exit()
{
if [ $# -lt 2 ]; then
echo "Error: run_cmd_exit() requires at least 2 arguments."
exit 1
fi
local mode=$1
local command=$2
local error_msg="${3:-"Error: Command failed"}: [$command]"
run_cmd "$mode" "$command"
local RETVAL=$?
if [ $RETVAL -ne 0 ]; then
echo "$error_msg"
exit $RETVAL
fi
}
# Return the local IP address
#
# Global variables used: $ip
get_local_ip()
{
if [ -z "$ip" ]; then
echo "The global variable \$ip is not defined."
return 1
fi
# Capture both stdout and stderr of the ip route get command
readarray -t output <<< "$(ip route get $ip 2>&1)"
# Check for known error messages in the command output
if echo "${output[@]}" | grep -qi "error"; then
echo "Error: Invalid IP address or routing error."
return 1
fi
# Parse the output for the local IP address, which is right after "src"
local_ip=$(echo "${output[@]}" | awk '/src/ {for(i=1;i<=NF;i++) if($i=="src") {print $(i+1); exit}}')
# Check if a local IP was found
if [ -z "$local_ip" ]; then
echo "Failed to determine the local IP address."
return 1
fi
echo $local_ip
}
clear_boot_fifo()
{
local cnt
while true; do
# Read BOOT_FIFO_DATA if BOOT_FIFO_COUNT != 0
cnt=`${BF_REG} $(basename ${rshim_node}) 0x13001000.64 | awk '{print $3}'`
cnt=$((cnt))
[ $cnt -eq 0 ] && break
${BF_REG} $(basename ${rshim_node}) 0x13002000.64 >/dev/null
done
}
# Push the boot stream to rshim via local rshim
push_boot_stream_via_local_rshim()
{
# Push the boot stream to local rshim
echo "Pushing bfb${cfg:+ + cfg}${rootfs:+ + rootfs}"
if [ $runtime -eq 1 ]; then
# Skip reset when pushing bfb
echo "BOOT_RESET_SKIP 1" > ${rshim_node}/misc
# Clear the current boot fifo
clear_boot_fifo
# Set SP2.BIT2=1
${BF_REG} $(basename ${rshim_node}) 0x13000c48.64 0x4 >/dev/null
# Set SWINT2.BIT2
${BF_REG} $(basename ${rshim_node}) 0x13000318.64 0x4 >/dev/null
fi
if ! $sudo_prefix sh -c "cat ${bfb} ${cfg:+$cfg} ${rootfs:+${rootfs}} ${pv:+| ${pv} | cat -} > ${rshim_node}/boot"; then
echo "Error: Failed to push boot stream via local rshim"
exit 1
fi
if [ $runtime -eq 1 ]; then
echo "BOOT_RESET_SKIP 0" > ${rshim_node}/misc
fi
}
# Push the boot stream to rshim via remote rshim with scp
#
# Global variables used
# $bfb, $cfg, $rootfs, $pv, $ip, $rshim
push_boot_stream_via_remote_rshim_scp()
{
# Push the boot stream to remote rshim via ssh copy
echo "Pushing bfb${cfg:+ + cfg}${rootfs:+ + rootfs} to ${ip} via scp"
if ! sh -c "cat ${bfb} ${cfg:+$cfg} ${rootfs:+${rootfs}} ${pv:+| ${pv} | cat -} | ssh root@$ip \"cat > ${rshim_node}/boot\""; then
echo "Error: Failed to push boot stream via remote rshim with scp"
exit 1
fi
}
# Push the boot stream to rshim via remote rshim with netcat
#
# Global variables used:
# $bfb, $cfg, $rootfs, $pv, $ip, $port, $rshim_node, $reverse_nc
push_boot_stream_via_remote_rshim_nc()
{
timeout=20 # in seconds
data="${bfb} ${cfg:+$cfg} ${rootfs:+${rootfs}}"
# Push the boot stream to remote rshim via netcat
echo "Pushing $data to ${ip} via netcat $( [ "$reverse_nc" -eq 0 ] || echo '(in reverse mode)' )"
if [ "$reverse_nc" -eq 1 ]; then
# Remote as the netcat server and local as the client
echo "Starting a netcat server on the remote host..."
# We use nohup to keep the server running after the SSH session is closed.
# We use dd instead of writing directly to the file to avoid the error of
# "write: Interrupted system call"; The bs size doesn't matter.
start_nc_server_cmd="nohup nc -l -p $port | dd bs=1M of=${rshim_node}/boot 2>/dev/null &"
run_cmd_exit remote "$start_nc_server_cmd"
sleep 3 # delay to make sure the server is ready
wait_for_remote_process nc $timeout \
"Error: Failed to start the remote netcat server"
echo "Sending bitstream to a remote host with RSHIM..."
nc_client_cmd="cat $data ${pv:+| ${pv} | cat -} | nc $ip $port"
run_cmd_exit local "$nc_client_cmd"
else
# Local as the netcat server and remote as the client
echo "Starting a netcat server on the local host..."
start_nc_server_cmd="cat $data ${pv:+| ${pv} | cat -} | nc --send-only -l -p $port &"
run_cmd_exit local "$start_nc_server_cmd"
sleep 3 # delay to make sure the server is ready
echo "Sending bitstream from remote to local host with RSHIM..."
local_ip=$(get_local_ip)
# must put remote nc in the background to avoid blocking the script
nc_client_cmd="nohup nc $local_ip $port 2>/dev/null | dd bs=1M of=${rshim_node}/boot &>/dev/null"
run_cmd_exit remote "$nc_client_cmd" &
wait_for_remote_process nc $timeout \
"Error: Failed to start the remote netcat server"
fi
}
# Run a script remotely via SSH to forward data from a network receiver (netcat
# server or client) to the rshim device node. This separate script is needed
# for improved performance
run_pipe_to_rshim_script()
{
# Execute script remotely via SSH
# shellcheck disable=SC2087
ssh root@$ip 'sh -s' << EOF
RSHIM_PIPE=${RSHIM_PIPE:-"/tmp/rshim_pipe"}
RSHIM_BOOT_NODE=${rshim_node:-"/dev/rshim0"}/boot
BLOCK_SIZE=2048000 # smaller block size performs worse
if [ -e "\$RSHIM_PIPE" ]; then
rm \$RSHIM_PIPE
fi
mkfifo \$RSHIM_PIPE
if ! dd if=\$RSHIM_PIPE of=\$RSHIM_BOOT_NODE bs=\$BLOCK_SIZE; then
echo "Error occurred in dd command"
exit 1
fi
EOF
}
# Use SSH execution to check whether a process is running on the remote host
#
# $1: process name
# $2: timeout in seconds
# $3: custom error message (optional)
wait_for_remote_process()
{
default_timeout=10
local process=$1
local timeout=${2:-$default_timeout}
local error_msg="${3:-Error: Time out waiting for launching process $process}"
local cmd="while true; do pgrep -x $process >/dev/null && break; sleep 1; done"
local timeout_cmd="timeout $timeout sh -c \"$cmd\""
run_cmd_exit remote "$timeout_cmd" "$error_msg"
}
# You can call execute_remote_script multiple times as needed in your script
# Push the boot stream to rshim via remote rshim with netcat and a persistent
# pipe
#
# Global variables used:
# $bfb, $cfg, $rootfs, $pv, $ip, $port, $rshim_node,
# $pid_wait_timeout, $RSHIM_PIPE, $PIPE_READER_LOG
push_boot_stream_via_remote_rshim_ncpipe()
{
timeout=20 # in seconds
data="${bfb} ${cfg:+$cfg} ${rootfs:+${rootfs}}"
echo "Starting the remote pipe-to-rshim process..."
run_pipe_to_rshim_script &
wait_for_remote_process dd $timeout \
"Error: Failed to start the remote pipe-to-rshim process"
if [ "$reverse_nc" -eq 1 ]; then
echo "Starting the remote netcat server"
nc_server_cmd="nohup nc -l -p $port | dd bs=1M of=$RSHIM_PIPE 2>/dev/null &"
run_cmd_exit remote "$nc_server_cmd"
# It could be very slow to start the netcat server on the remtoe.
wait_for_remote_process nc $timeout \
"Error: Failed to start the remote netcat server"
# Push the boot stream to remote rshim via netcat + persistent pipe
echo "Pushing $data with nc + pipe to Remote"
nc_client_cmd="cat $data ${pv:+| ${pv} | cat -} | nc $ip $port"
run_cmd_exit local "$nc_client_cmd"
else # reverse_nc
# In reverse mode, the local host is the netcat server and the remote host
# is the client.
echo "Starting the local netcat server"
nc_server_cmd="cat $data ${pv:+| ${pv} | cat -} | nc --send-only -l -p $port &"
echo_dbg "Running command in local mode: $cmd"
run_cmd_exit local "$nc_server_cmd"
local_ip=$(get_local_ip)
echo "Starting remote netcat client to get data"
nc_client_cmd="nohup nc $local_ip $port > $RSHIM_PIPE 2>/dev/null"
run_cmd_exit remote "$nc_client_cmd" &
wait_for_remote_process nc $timeout \
"Error: Failed to start the remote netcat server"
fi
}
# Push the BFB stream to rshim
#
# Global variables used:
# $mode, $remote_mode
push_boot_stream()
{
if [ "$mode" == "local" ]; then
push_boot_stream_via_local_rshim
else
if [ "$remote_mode" == "scp" ]; then
push_boot_stream_via_remote_rshim_scp
elif [ "$remote_mode" == "nc" ]; then
push_boot_stream_via_remote_rshim_nc
elif [ "$remote_mode" == "ncpipe" ]; then
push_boot_stream_via_remote_rshim_ncpipe
fi
fi
}
# Checks BF3 NIC_MODE
check_nic_mode()
{
local str
[ "$mode" != "local" ] && return
# Only needs to check for BF3.
str=`cat ${rshim_node}/misc | grep DEV_INFO | grep BlueField-3`
[ -z "$str" ] && return
# Get PCIE BDF
str=`cat ${rshim_node}/misc | grep DEV_NAME | awk '{print $2}'`
str=${str:5}
str=${str/.*/.0}
pcie_bdf="$str"
# Check NIC mode
str=`mlxconfig -d $pcie_bdf -e q INTERNAL_CPU_OFFLOAD_ENGINE 2>/dev/null | grep INTERNAL_CPU_OFFLOAD_ENGINE | awk '{print $(NF-1)}'`
if [ ."$str" = ."DISABLED(1)" ]; then
is_nic_mode=1
else
is_nic_mode=0
fi
}
# Wait for RSHIM to finish updating by monitoring keywords in the RSHIM log
#
# Global variables used:
# $mode, $rshim_node, $remote_mode
wait_for_update_to_finish()
{
# 'filter0' indicates bfb installation completion, thus CLEAR_ON_READ
# can be disabled for next boot. 'filter' is related to specific mode
# which might include extra configuration or booting, and is used as
# the exit condition of the bfb-install script.
local filter0 filter
filter0="Rebooting\.\.\.|finished|DPU is ready|Linux up|CRITICAL ERROR"
if [ $runtime -eq 0 ]; then
filter0="$filter0|In Enhanced NIC mode"
else
filter0="Runtime upgrade finished"
fi
if [ $is_nic_mode -eq 1 ]; then
filter="In Enhanced NIC mode"
else
filter="$filter0"
fi
echo "Collecting BlueField booting status. Press Ctrl+C to stop…"
# Enable CLEAR_ON_READ, so rshim log will be cleared after read.
if [ $clear_on_read -eq 1 ]; then
run_cmd_exit $mode "echo 'CLEAR_ON_READ 1' > ${rshim_node}/misc"
fi
# Set display level to 2 to show more information
run_cmd_exit $mode "echo 'DISPLAY_LEVEL 2' > ${rshim_node}/misc"
# Create log file with date
echo "# $(date)" > ${LOG_FILE}
last=""
finished=0
while [ $finished -eq 0 ]; do
last_len=${#last}
cmd_get_log="cat ${rshim_node}/misc | sed -n '/^ INFO/,\$p'"
cur=$(run_cmd_exit $mode "$cmd_get_log")
cur_len=${#cur}
sleep 1
if echo ${cur} | grep -Ei "$filter" >/dev/null; then
finished=1
fi
if echo ${cur} | grep -Ei "$filter0" >/dev/null; then
# Disable CLEAR_ON_READ.
run_cmd_exit $mode "echo 'CLEAR_ON_READ 0' > ${rshim_node}/misc"
fi
# Overwrite if current length smaller than previous length.
if [ ${last_len} -eq 0 -o ${last_len} -gt ${cur_len} ]; then
echo "${cur}" | sed '/^[[:space:]]*$/d' | tee -a ${LOG_FILE}
last="${cur}"
continue
fi
# Overwrite if first portion does not match.
sub_cur=$(echo "${cur}" | dd bs=1 count=${last_len} 2>/dev/null)
if [ "${sub_cur}" != "${last}" ]; then
echo "${cur}" | sed '/^[[:space:]]*$/d' | tee -a ${LOG_FILE}
last="${cur}"
continue
fi
# Nothing if no update.
if [ ${last_len} -eq ${cur_len} ]; then
[ $finished -eq 0 ] && continue;
fi
# Print the diff.
echo "${cur}" | dd bs=1 skip=${last_len} 2>/dev/null | \
sed '/^[[:space:]]*$/d' | tee -a ${LOG_FILE}
last="${cur}"
done
# Disable CLEAR_ON_READ.
run_cmd_exit $mode "echo 'CLEAR_ON_READ 0' > ${rshim_node}/misc"
}
# Clean up function whenever the script exits
# shellcheck disable=SC2317
cleanup() {
# prevent cleanup from being called multiple times
if [ "$cleanup_started" -eq 1 ]; then
exit 1
fi
cleanup_started=1
if [ "$?" -ne 0 ]; then
echo "BlueField Update Failed"
fi
# Kill all netcat related processes on both ends
if [ $run_cmd_local_ready -eq 1 ]; then
run_cmd local "pgrep -x nc >/dev/null && pgrep -x nc | xargs kill -9"
fi
if [ $run_cmd_remote_ready -eq 1 ]; then
run_cmd remote "pgrep -x nc >/dev/null && pgrep -x nc | xargs kill -9"
if [ $remote_mode == "nc" ] || [ $remote_mode == "ncpipe" ]; then
run_cmd remote \
"pgrep pipe_to_rshim >/dev/null && pgrep pipe_to_rshim | xargs kill -9"
run_cmd remote "rm -f $RSHIM_PIPE"
fi
fi
if [ $runtime -eq 1 ]; then
# Reset to default state.
echo "BOOT_RESET_SKIP 0" > ${rshim_node}/misc
# Cleanup SP2.BIT2 and trigger SWINT2.
sp2=`${BF_REG} $(basename ${rshim_node}) 0x13000c48.64 | awk '{print $3}'`
sp2=$((sp2 & ~4))
${BF_REG} $(basename ${rshim_node}) 0x13000c48.64 $sp2 >/dev/null
${BF_REG} $(basename ${rshim_node}) 0x13000318.64 0x4 >/dev/null
fi
# Disable CLEAR_ON_READ.
if [ $mode == "local" ] && [ $run_cmd_local_ready -eq 1 ]; then
run_cmd_exit local "echo 'CLEAR_ON_READ 0' > ${rshim_node}/misc"
fi
if [ $mode == "remote" ] && [ $run_cmd_remote_ready -eq 1 ]; then
run_cmd_exit remote "echo 'CLEAR_ON_READ 0' > ${rshim_node}/misc"
fi
# Bind driver back for NIC mode.
if [ $is_nic_mode -eq 1 -a -n "$pcie_bdf" ]; then
echo "$pcie_bdf" > /sys/bus/pci/drivers/mlx5_core/bind
fi
}
# Main
default_remote_mode=scp
default_nc_port=9527 # default nc server port for nc* methods
bfb=
cfg=
rootfs=
mode=local # Values can be local or remote
remote_mode= # Values can be scp, nc, or ncpipe
rshim= # rshim device string, format [<ip>:<port>:]rshim<N>
runtime=0 # Values can be 0 or 1.
verbose=0 # Values can be 0 or 1.
reverse_nc=0 # Values can be 0 or 1.
clear_on_read=1 # Values can be 0 or 1.
rshim_node= # rshim device identifier, e.g. rshim0
ip= # IP address for remote host
port=
cleanup_started=0
trap cleanup EXIT INT TERM
is_nic_mode=0 # Flag to indicate whether DPU in NIC mod or not
pcie_bdf="" # PCIE BDF
options=`getopt -n bfb-install -o b:c:f:hkm:r:Ruv \
-l bfb:,config:,rootfs:,help,keep-log,remote-mode:,reverse-nc,rshim:,runtime,verbose \
-- "$@"`
if [ $? != 0 ]; then echo "Command line error" >&2; exit 1; fi
eval set -- $options
while [ "$1" != -- ]; do
case $1 in
--bfb|-b) shift; bfb=$1 ;;
--config|-c) shift; cfg=$1 ;;
--rootfs|-f) shift; rootfs=$1 ;;
--help|-h) usage; exit 0 ;;
--keep-log|-k) clear_on_read=0 ;;
--remote-mode|-m) shift; remote_mode=$1 ;;
--rshim|-r) shift; rshim=$1 ;;
--reverse-nc|-R) reverse_nc=1 ;;
--runtime|-u) runtime=1 ;;
--verbose|-v) verbose=1 ;;
--) shift; break;;
*) echo "Error: Invalid argument: $1" >&2; usage >&2; exit 1 ;;
esac
shift
done
# Parameter checks
# Check if bfb and rshim are set and non-empty
if [ -z "${bfb}" -o -z "${rshim}" ]; then
echo "Error: Need to provide both bfb file and rshim device name."
usage >&2
exit 1
fi
# Parse rshim for IP, optional port, and device identifier
if echo "$rshim" | grep -q ':'; then
mode=remote
remote_mode=${remote_mode:-$default_remote_mode}
ip=$(echo "$rshim" | cut -d':' -f1 | tr -d '\n')
# Attempt to extract a potential port number
potential_port=$(echo "$rshim" | cut -s -d':' -f2)
# Attempt to extract a potential rshim device identifier
potential_rshim_node=$(echo "$rshim" | cut -s -d':' -f3)
if [ -n "$potential_rshim_node" ]; then
# If there's a third field, it's clearly the rshim device, and the second
# field is the port
port=$potential_port
rshim_node=$potential_rshim_node
else
# If there's no third field, the second field could be either the port or
# the rshim device
if echo "$potential_port" | grep -qE '^[[:digit:]]+$'; then
# If the second field is numeric, it's the port, and the rshim device is
# missing
port=$potential_port
# This scenario implies a malformed rshim argument as the rshim device
# identifier is missing
echo "Error: Missing rshim device identifier." >&2
usage >&2
exit 1
else
# The second field is not numeric, so it's the rshim device
rshim_node=$potential_port
fi
fi
else
# Local mode, rshim_node is directly the value of rshim
rshim_node=$rshim
# Adjust log file to be per rshim
LOG_FILE=${LOG_FILE%.*}-$(basename $rshim_node).log
fi
# We don't allow remote modes for local rshim
if [ $mode == "local" ] && [ -n "$remote_mode" ]; then
echo "Error: Remote mode is not supported for local rshim."
exit 1
fi
if [ $mode == "remote" ] ; then
# convert potential host name to IP address
ip=$(getent ahosts $ip | awk '{print $1}' | head -n 1)
# We don't allow localhost for remote modes
if [ $ip == "127.0.0.1" ]; then
echo "Error: localhost is not supported for remote mode."
exit 1
fi
# Check allowed remote modes
if [ $remote_mode == "scp" ]; then
# We don't support port selection for scp mode
if [ -n "$port" ]; then
echo "Error: Port selection is not supported for scp mode."
usage >&2
exit 1
fi
elif [ $remote_mode == "nc" ] || [ $remote_mode == "ncpipe" ]; then
port=${port:-$default_nc_port}
if ! echo "$port" | grep -qE '^[0-9]+$'; then
echo "Error: Invalid port number: $port" >&2
usage >&2
exit 1
fi
else
echo "Error: Invalid remote mode: $remote_mode"
usage >&2
exit 1
fi
fi
# Check if rshim_node starts with "/" and add "/dev/" if not
if [ ."$(echo "${rshim_node}" | cut -c1-1)" != ."/" ]; then
rshim_node="/dev/${rshim_node}"
fi
if [ $verbose -eq 1 ]; then
echo "Updating BlueField with $mode RSHIM"
echo " BFB file: $bfb"
[ -n "$cfg" ] && echo " Config File:: $cfg"
[ -n "$rootfs" ] && echo " Rootfs File: $rootfs"
if [ "$mode" = "remote" ]; then
echo " Remote Update Mode: $remote_mode"
echo " Remote Host IP: $ip"
if [ "$remote_mode" = "nc" ] || [ "$remote_mode" = "ncpipe" ]; then
[ -n "$port" ] && echo " Remote port: $port"
[ "$reverse_nc" -eq 1 ] && echo " Using reverse netcat mode"
fi
fi
echo " RSHIM Device Node: $rshim_node"
fi
# Setup checks
# Check if bfb file exists
if [ ! -e "${bfb}" ]; then
echo "Error: ${bfb} not found."
exit 1
fi
# Check if rootfs exists if set
if [ -n "${rootfs}" ] && [ ! -e "${rootfs}" ]; then
echo "Error: ${rootfs} not found."
exit 1
fi
# Check if cfg exists if set
if [ -n "${cfg}" ] && [ ! -e "${cfg}" ]; then
echo "Error: ${cfg} not found."
exit 1
fi
check_root_cmd="[ \$(id -u) -eq 0 ]"
echo "Checking if local host has root access..."
if ! eval "$check_root_cmd"; then
echo " Warning: No host root access. Trying sudo"
sudo_prefix="sudo"
fi
run_cmd_local_ready=1
rshim_check_cmd="[ -e ${rshim_node}/boot ]"
if [ $mode == "local" ]; then
run_cmd_exit local "$check_root_cmd" \
"Error: current login does not have sudo"
echo "Checking if rshim driver is running locally..."
run_cmd_exit local "$rshim_check_cmd" \
"Error: rshim driver not found at $rshim"
fi
if [ $mode == "remote" ]; then
echo "Checking if remote host is reachable..."
ping_cmd="ping -c 1 $ip >/dev/null 2>&1"
run_cmd_exit local "$ping_cmd"
echo "Checking if Remote has SSH server running..."
run_cmd_exit local "nc -z $ip 22" \
"Error: Remote does not have SSH server running"
echo "Checking if Remote has password-less root SSH access..."
if ! ssh -o BatchMode=yes -o ConnectTimeout=5 root@$ip "exit"; then
echo "Error: Remote does not have password-less (public key authentication) root SSH access"
exit 1
fi
run_cmd_remote_ready=1
echo "Checking if rshim driver is running remotely..."
run_cmd_exit remote "$rshim_check_cmd" \
"Error: remote rshim driver not found"
echo "Lowering the priority of the remote rshim process..."
run_cmd_exit remote "renice -n 19 -p \$(pgrep rshim)" \
"Error: Failed to lower the priority of the remote rshim process"
if [ $remote_mode == "nc" ] || [ $remote_mode == "ncpipe" ]; then
echo "Checking if local netcat is installed..."
run_cmd_exit local "command -v nc > /dev/null" \
"Error in $mode mode: Netcat is not installed locally"
echo "Checking if remote netcat is installed..."
run_cmd_exit remote "command -v nc > /dev/null" \
"Error in $mode mode: Netcat is not installed remotely"
# Try to test-connect the netcat port to see if it's available
if [ $reverse_nc -eq 1 ]; then
echo "Checking if remote netcat port $port is available..."
run_cmd_exit remote "! nc -z $ip $port" \
"Error: remote netcat port $port is not available"
else
echo "Checking if local netcat port $port is available..."
run_cmd_exit local "! nc -z localhost $port" \
"Error: local netcat port $port is not available"
fi
fi
fi
pv=$(which pv 2>/dev/null)
if [ -z "${pv}" ]; then
echo "Warn: 'pv' command not found. Continue without showing BFB progress."
fi
# Check NIC mode and unbind mlx5_core driver in NIC mode.
check_nic_mode
if [ $is_nic_mode -eq 1 -a -n "$pcie_bdf" ]; then
echo "$pcie_bdf" > /sys/bus/pci/drivers/mlx5_core/unbind 2>/dev/null
fi
push_boot_stream
wait_for_update_to_finish
|