File: ct-repair-nvme-osd

package info (click to toggle)
ceph-tools 0.0.40
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 112 kB
  • sloc: python: 703; sh: 626; makefile: 15
file content (216 lines) | stat: -rwxr-xr-x 6,537 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
#!/bin/bash
export PATH="$PATH:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
declare -a SLOTS
declare -a OSDS_BY_NVME
declare -a IDS
declare -a LVS
declare -a LUKS
declare -A OSDS_UUID
force=0
randomize=0
usage() {
    MSG="""
    Usage: $basename $0 [-h] [-n </dev/nvmeXnY>] [-f] [-a]

    Argument:
        -n: nvme disk (nvmeXn2). If not set, operates on all nvmeXn2 disks
	-f: force disk reset. Usefull if the number of max attempts has been reached
	-a: After reseting the nvme disk, archive ceph OSD crashes.
	-r: Randomize start between 1-90 seconds.

 """
    printf "$MSG" 
}

# Options
#
while getopts "afhn:r" OPTION
do
        case $OPTION in
                h)
                        usage
			exit
                        ;;
                a)
                        archive=1
                        ;;
                f)
                        force=1
                        ;;
                n)
                        NVME=$OPTARG
                        ;;
                r)
                        randomize=1
                        ;;
        esac
done

archive_osd_crash() {
	OSD=$1
        ceph crash ls-new | grep -w $OSD | awk '{print $1}' | while read i
	do
	       	ceph crash archive $i
		echo "crash $i archived"
	done
}

check_attempts() {
	SN=$1
	# Get the current timestamp
	current_timestamp=$(date +"%s")

	# 30 * 24 hours in seconds
	hours=$((30 * 24 * 60 * 60 ))

	# Calculate the timestamp for x hours ago
	hours_ago=$((current_timestamp - hours))

	count=$(awk -v start_time="$hours_ago" -v sn="$SN" '
            {
                log_time=$1
                if (log_time >= start_time && $3 == sn) {
                    print $0
                }
            }
        ' /var/log/ct-repair-nvme-osd.log | wc -l)

        echo "$count"
}
[ $randomize -eq 1 ] && sleep $((RANDOM % 90))

echo Listing disks to repair.
[ -z "$NVME" ] && DEVS=$(lsblk | awk '{print $1}' | grep n2)
[ ! -z "$NVME" ] && DEVS=${NVME}
[ -z "$DEVS" ] && echo "No disk to repair" && exit 0
echo $DEVS
# Retrieve OSDs For this Host
for DEV in $DEVS
do
    SN=$(nvme list | grep $DEV | tail -n 1 | awk '{print $2}')

    log_count=$(check_attempts $SN)
    echo "Number of auto repairs for $DEV $SN : $log_count"
    # Check if count exceeds 3 and exit the script
    if [ $log_count -ge 1 ] && [ $force -ne 1 ]; then
	echo "Already ran ct-repair-nvme-osd $log_count times for $DEV $SN. Not retrying."
        continue
    fi
    bad_series=("000060232912" "000060232913" "000060232937" "000060233094" "000060233095" "000060233128")
    valid=true
    for prefix in "${bad_series[@]}"; do
      if [[ $SN == $prefix* ]]; then
        valid=false
        break
      fi
    done

    if ! $valid  && [ $force -ne 1 ]; then
        echo $(date +"%s") $DEV $SN - Part of bad series >> /var/log/ct-repair-nvme-osd.log
        continue
    fi

    echo $(date +"%s") $DEV $SN >> /var/log/ct-repair-nvme-osd.log
    IDX=$(echo ${DEV} | grep -o '[[:digit:]]*' |head -1)
    SEARCH=" nvme${IDX}n1 "
    OSDS=$(ceph device ls-by-host $(hostname) | grep "${SEARCH}" | awk -F ${SEARCH} '{print $2}'| awk -F' osd.' '{for(i=2;i<=NF;i++) print $i};'|xargs)
    OSDS_BY_NVME[$IDX]=$OSDS
    for OSD in $OSDS; do
        ID=$(cat /var/lib/ceph/osd/ceph-${OSD}/lockbox.keyring | grep client | awk -F'.' '{print $3}'| tr -d "]")
        OSDS_UUID[$ID]=${OSD}
    done

  IDX=$(echo ${DEV} | grep -o '[[:digit:]]*' |head -1)
  # Physical nvme slot
  SLOTS[$IDX]=$(cat /sys/block/${DEV}/device/address | xargs -i{} lspci -v -s {} | grep "Physical Slot"| awk -F':' '{print $2}'|xargs)
  # VG
  VG=$(pvs --noheadings -o vg_name /dev/${DEV} 2>/dev/null|xargs)
  # LV path and LUKS crypt UUID
  OLDIFS=$IFS
  IFS=$'\n'
  for line in $(lvs --noheadings -o lv_path,lv_uuid,lv_name ${VG}); do
    NAME=$(echo $line| awk '{print $3}'|awk -F 'block-' '{print $2}')
    OSD=${OSDS_UUID[$NAME]}
    IDS[$OSD]=$NAME
    LUKS[$OSD]=$(echo $line| awk '{print $2}')
    LVS[$OSD]=$(echo $line| awk '{print $1}')
  done
  IFS=$OLDIFS
done

echo
for nvme in ${!SLOTS[@]}; do
    echo Nvme${nvme}:
    echo -e "\tSLOT: ${SLOTS[$nvme]}"
    echo -e "\tOSDS:${OSDS_BY_NVME[$nvme]}"
    for OSD in ${OSDS_BY_NVME[$nvme]}; do
        echo osd.${OSD}
        echo -e "\tIDS: ${IDS[$OSD]}"
        echo -e "\tLUKS: ${LUKS[$OSD]}"
        echo -e "\tLVS: ${LVS[$OSD]}"
    done
done
exit

echo
# Iterating on each disk
for nvme in ${!SLOTS[@]}; do
    # Powering off the disk
    echo Powering off nvme${nvme}
    (echo 0 > /sys/bus/pci/slots/${SLOTS[$nvme]}/power)
    sleep 5
    # Cleaning device mapper table
    for OSD in ${OSDS_BY_NVME[$nvme]}; do
        echo Cleaning ${LUKS[$OSD]}
        dmsetup remove ${LUKS[$OSD]}
        echo Cleaning ${LVS[$OSD]}
        dmsetup remove ${LVS[$OSD]}
    done
    sleep 1

    # Powering on teh disk
    echo Powering on nvme${nvme}
    (echo 1 > /sys/bus/pci/slots/${SLOTS[$nvme]}/power)
    sleep 5

    for OSD in ${OSDS_BY_NVME[$nvme]}; do
        echo Opening LUKS ${LUKS[$OSD]}
        (ceph config-key get dm-crypt/osd/${IDS[$OSD]}/luks | /usr/sbin/cryptsetup --key-file - --allow-discards luksOpen ${LVS[$OSD]} ${LUKS[$OSD]})

        echo Enabling osd.${OSD}
        chown -R ceph:ceph /var/lib/ceph/osd/ceph-${OSD}
        ceph-bluestore-tool --cluster=ceph prime-osd-dir --dev /dev/mapper/${LUKS[$OSD]} --path /var/lib/ceph/osd/ceph-${OSD} --no-mon-config
        rm -f /var/lib/ceph/osd/ceph-${OSD}/block
        ln -snf /dev/mapper/${LUKS[$OSD]} /var/lib/ceph/osd/ceph-${OSD}/block
        DM=$(ls -l /dev/mapper/${LUKS[$OSD]} | awk -F'->' '{print $2}'| awk -F'/' '{print $2}')
        chown -R ceph:ceph /dev/${DM}
        chown -R ceph:ceph /var/lib/ceph/osd/ceph-${OSD}
        systemctl enable ceph-volume@lvm-${OSD}-${IDS[$OSD]}
        systemctl enable --runtime ceph-osd@${OSD}

        echo Restarting OSD service : ${OSD}
        systemctl start ceph-osd@${OSD}
	sleep 10
    done
    if (lsblk /dev/nvme${nvme}n1 &>/dev/null| grep crypt); then
        OUTPUT="${OUTPUT}\nOSD Repair Failed for NVME ${nvme}"
        OUTPUT="${OUTPUT}\nYou can retry this script !"
     else
        OUTPUT="${OUTPUT}\nOSD Repair for NVME ${nvme} OK :"
        OUTPUT="${OUTPUT}\n $(lsblk /dev/nvme${nvme}n1 --noheading)'"
	# Look for OSD crashes for 60 seconds and archive them.
	if [ $archive -eq 1 ] 
	then
	  for i in {1..6}
	  do
	    for OSD in ${OSDS_BY_NVME[$nvme]}
	    do
  	      archive_osd_crash ${OSD}
            done
	    sleep 10
          done
	fi
    fi
done

printf "$OUTPUT\n"