1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
|
#!/bin/bash
export PATH="$PATH:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
declare -a SLOTS
declare -a OSDS_BY_NVME
declare -a IDS
declare -a LVS
declare -a LUKS
declare -A OSDS_UUID
force=0
randomize=0
usage() {
MSG="""
Usage: $basename $0 [-h] [-n </dev/nvmeXnY>] [-f] [-a]
Argument:
-n: nvme disk (nvmeXn2). If not set, operates on all nvmeXn2 disks
-f: force disk reset. Usefull if the number of max attempts has been reached
-a: After reseting the nvme disk, archive ceph OSD crashes.
-r: Randomize start between 1-90 seconds.
"""
printf "$MSG"
}
# Options
#
while getopts "afhn:r" OPTION
do
case $OPTION in
h)
usage
exit
;;
a)
archive=1
;;
f)
force=1
;;
n)
NVME=$OPTARG
;;
r)
randomize=1
;;
esac
done
archive_osd_crash() {
OSD=$1
ceph crash ls-new | grep -w $OSD | awk '{print $1}' | while read i
do
ceph crash archive $i
echo "crash $i archived"
done
}
check_attempts() {
SN=$1
# Get the current timestamp
current_timestamp=$(date +"%s")
# 30 * 24 hours in seconds
hours=$((30 * 24 * 60 * 60 ))
# Calculate the timestamp for x hours ago
hours_ago=$((current_timestamp - hours))
count=$(awk -v start_time="$hours_ago" -v sn="$SN" '
{
log_time=$1
if (log_time >= start_time && $3 == sn) {
print $0
}
}
' /var/log/ct-repair-nvme-osd.log | wc -l)
echo "$count"
}
[ $randomize -eq 1 ] && sleep $((RANDOM % 90))
echo Listing disks to repair.
[ -z "$NVME" ] && DEVS=$(lsblk | awk '{print $1}' | grep n2)
[ ! -z "$NVME" ] && DEVS=${NVME}
[ -z "$DEVS" ] && echo "No disk to repair" && exit 0
echo $DEVS
# Retrieve OSDs For this Host
for DEV in $DEVS
do
SN=$(nvme list | grep $DEV | tail -n 1 | awk '{print $2}')
log_count=$(check_attempts $SN)
echo "Number of auto repairs for $DEV $SN : $log_count"
# Check if count exceeds 3 and exit the script
if [ $log_count -ge 1 ] && [ $force -ne 1 ]; then
echo "Already ran ct-repair-nvme-osd $log_count times for $DEV $SN. Not retrying."
continue
fi
bad_series=("000060232912" "000060232913" "000060232937" "000060233094" "000060233095" "000060233128")
valid=true
for prefix in "${bad_series[@]}"; do
if [[ $SN == $prefix* ]]; then
valid=false
break
fi
done
if ! $valid && [ $force -ne 1 ]; then
echo $(date +"%s") $DEV $SN - Part of bad series >> /var/log/ct-repair-nvme-osd.log
continue
fi
echo $(date +"%s") $DEV $SN >> /var/log/ct-repair-nvme-osd.log
IDX=$(echo ${DEV} | grep -o '[[:digit:]]*' |head -1)
SEARCH=" nvme${IDX}n1 "
OSDS=$(ceph device ls-by-host $(hostname) | grep "${SEARCH}" | awk -F ${SEARCH} '{print $2}'| awk -F' osd.' '{for(i=2;i<=NF;i++) print $i};'|xargs)
OSDS_BY_NVME[$IDX]=$OSDS
for OSD in $OSDS; do
ID=$(cat /var/lib/ceph/osd/ceph-${OSD}/lockbox.keyring | grep client | awk -F'.' '{print $3}'| tr -d "]")
OSDS_UUID[$ID]=${OSD}
done
IDX=$(echo ${DEV} | grep -o '[[:digit:]]*' |head -1)
# Physical nvme slot
SLOTS[$IDX]=$(cat /sys/block/${DEV}/device/address | xargs -i{} lspci -v -s {} | grep "Physical Slot"| awk -F':' '{print $2}'|xargs)
# VG
VG=$(pvs --noheadings -o vg_name /dev/${DEV} 2>/dev/null|xargs)
# LV path and LUKS crypt UUID
OLDIFS=$IFS
IFS=$'\n'
for line in $(lvs --noheadings -o lv_path,lv_uuid,lv_name ${VG}); do
NAME=$(echo $line| awk '{print $3}'|awk -F 'block-' '{print $2}')
OSD=${OSDS_UUID[$NAME]}
IDS[$OSD]=$NAME
LUKS[$OSD]=$(echo $line| awk '{print $2}')
LVS[$OSD]=$(echo $line| awk '{print $1}')
done
IFS=$OLDIFS
done
echo
for nvme in ${!SLOTS[@]}; do
echo Nvme${nvme}:
echo -e "\tSLOT: ${SLOTS[$nvme]}"
echo -e "\tOSDS:${OSDS_BY_NVME[$nvme]}"
for OSD in ${OSDS_BY_NVME[$nvme]}; do
echo osd.${OSD}
echo -e "\tIDS: ${IDS[$OSD]}"
echo -e "\tLUKS: ${LUKS[$OSD]}"
echo -e "\tLVS: ${LVS[$OSD]}"
done
done
exit
echo
# Iterating on each disk
for nvme in ${!SLOTS[@]}; do
# Powering off the disk
echo Powering off nvme${nvme}
(echo 0 > /sys/bus/pci/slots/${SLOTS[$nvme]}/power)
sleep 5
# Cleaning device mapper table
for OSD in ${OSDS_BY_NVME[$nvme]}; do
echo Cleaning ${LUKS[$OSD]}
dmsetup remove ${LUKS[$OSD]}
echo Cleaning ${LVS[$OSD]}
dmsetup remove ${LVS[$OSD]}
done
sleep 1
# Powering on teh disk
echo Powering on nvme${nvme}
(echo 1 > /sys/bus/pci/slots/${SLOTS[$nvme]}/power)
sleep 5
for OSD in ${OSDS_BY_NVME[$nvme]}; do
echo Opening LUKS ${LUKS[$OSD]}
(ceph config-key get dm-crypt/osd/${IDS[$OSD]}/luks | /usr/sbin/cryptsetup --key-file - --allow-discards luksOpen ${LVS[$OSD]} ${LUKS[$OSD]})
echo Enabling osd.${OSD}
chown -R ceph:ceph /var/lib/ceph/osd/ceph-${OSD}
ceph-bluestore-tool --cluster=ceph prime-osd-dir --dev /dev/mapper/${LUKS[$OSD]} --path /var/lib/ceph/osd/ceph-${OSD} --no-mon-config
rm -f /var/lib/ceph/osd/ceph-${OSD}/block
ln -snf /dev/mapper/${LUKS[$OSD]} /var/lib/ceph/osd/ceph-${OSD}/block
DM=$(ls -l /dev/mapper/${LUKS[$OSD]} | awk -F'->' '{print $2}'| awk -F'/' '{print $2}')
chown -R ceph:ceph /dev/${DM}
chown -R ceph:ceph /var/lib/ceph/osd/ceph-${OSD}
systemctl enable ceph-volume@lvm-${OSD}-${IDS[$OSD]}
systemctl enable --runtime ceph-osd@${OSD}
echo Restarting OSD service : ${OSD}
systemctl start ceph-osd@${OSD}
sleep 10
done
if (lsblk /dev/nvme${nvme}n1 &>/dev/null| grep crypt); then
OUTPUT="${OUTPUT}\nOSD Repair Failed for NVME ${nvme}"
OUTPUT="${OUTPUT}\nYou can retry this script !"
else
OUTPUT="${OUTPUT}\nOSD Repair for NVME ${nvme} OK :"
OUTPUT="${OUTPUT}\n $(lsblk /dev/nvme${nvme}n1 --noheading)'"
# Look for OSD crashes for 60 seconds and archive them.
if [ $archive -eq 1 ]
then
for i in {1..6}
do
for OSD in ${OSDS_BY_NVME[$nvme]}
do
archive_osd_crash ${OSD}
done
sleep 10
done
fi
fi
done
printf "$OUTPUT\n"
|