File: mdcheck

package info (click to toggle)
mdadm 4.5-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 3,948 kB
  • sloc: ansic: 46,817; sh: 2,481; makefile: 281; python: 44
file content (251 lines) | stat: -rw-r--r-- 6,119 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
#!/bin/bash

# Copyright (C) 2014-2017 Neil Brown <neilb@suse.de>
#
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    Author: Neil Brown
#    Email: <neilb@suse.com>

# This script should be run periodically to automatically
# perform a 'check' on any md arrays.
#
# It supports a 'time budget' such that any incomplete 'check'
# will be checkpointed when that time has expired.
# A subsequent invocation will allow the 'check' to continue.
#
# Arrays are identified by UUID and the 'sync_completed' value is stored
# in /var/lib/mdcheck/MD_UUID_$UUID. When the script has finished checking
# an array, it creates a file /var/lib/mdcheck/Checked_$UUID.
#
# Modes are:
#   --continue      Don't start new checks, only continue previously started
#                   ones for which MD_UUID_$UUID already exists.
#   --start         Like --continue, but also start new checks for arrays
#                   for which the file Checked_$UUID does not exist.
#   --restart:      Enable restarting checks that were previously finished
#                   (remove all Checked_* files) and exit immediately.
#                   This mode doesn't start any array checks.
#
# With none of these options given, a new check from 0 will be started on
# all arrays, even those that hadn't finished before.
#
# Options are:
#   --duration    This is passed to "date --date=$duration" to find out
#		  when to finish


# If the script is run from systemd, simply write to the journal on stderr.
# Otherwise, use logger.
log() {
    if [[ "$INVOCATION_ID" ]]; then
	    echo "$@" >&2
    else
	    logger -p daemon.info "mdcheck: $*"
    fi
}

# get device name from sysfs
devname() {
    local dev
    [[ -f "$1/uevent" ]] && \
	    dev=$(eval "$(cat "$1/uevent")"; echo -n "$DEVNAME")
    [[ "$dev" && -b "/dev/$dev" ]] || {
	log "failed to read DEVNAME from $1"
	return 1
    }
    echo -n "/dev/$dev"
}

usage() {
    echo >&2 'Usage: mdcheck [mode] [options]

Mode:
	--help		print this help
	--continue	only continue previously started checks
	--start		continue, and start check on arrays that have not been checked
	--restart	re-enable checking previously finished arrays
	<EMPTY>		start check from position 0 on all arrays
Options:
	--duration <time-offset>
			set the amount of time to run the checks for
			(<time-offset> must be understood by "date --date")'
}

set_mode() {
    [[ "$MODE" ]] && {
        echo >&2 'ERROR: only one of --continue, --start, or --restart may be set
'
	usage
	exit 1
    }
    MODE=$1
}

args=$(getopt -o "" -l help,continue,start,restart,duration: -n mdcheck -- "$@")
rv=$?
if [ $rv -ne 0 ]; then
	usage
	exit $rv
fi

eval set -- $args

MODE=
endtime=
while [ " $1" != " --" ]
do
    case $1 in
	--help )
		usage
		exit 0
		;;
	--continue|--start|--restart)
		set_mode "$1"
		;;
	--duration )
		shift; dur=$1
		endtime=$(date --date "$dur" "+%s")
		;;
    esac
    shift
done
shift

[[ $# -eq 0 ]] || {
    usage
    exit 1
}

case $MODE in
    --restart)
	    log 'Re-enabling array checks for previously finished arrays'
	    rm -f /var/lib/mdcheck/Checked_*
	    exit 0
	    ;;
    "")
	    log 'Starting new check from 0 on all MD RAID arrays'
	    rm -f /var/lib/mdcheck/Checked_* /var/lib/mdcheck/MD_UUID_*
	    ;;
esac

# We need a temp file occasionally...
tmp=/var/lib/mdcheck/.md-check-$$
cnt=0

cleanup() {
    # We've waited, and there are still checks running.
    # Time to stop them.
    for i in `eval echo {1..$cnt}`
    do
	eval fl=\$MD_${i}_fl
	eval sys=\$MD_${i}_sys
	eval dev=\$MD_${i}_dev

	if [ -z "$fl" ]; then continue; fi

	if [ "`cat $sys/md/sync_action`" != 'check' ]
	then
	    eval MD_${i}_fl=
	    rm -f $fl
	    continue;
	fi
	echo idle > $sys/md/sync_action
	cat $sys/md/sync_min > $fl
	log pause checking $dev at `cat $fl`
    done
    rm -f "$tmp"
}

trap 'exit 129' 2 3 15
trap 'cleanup' 0

# firstly, clean out really old state files
mkdir -p /var/lib/mdcheck
find /var/lib/mdcheck -name "MD_UUID*" -type f -mtime +180 -exec rm {} \;

# Now look at each md device.
for sync_act in /sys/block/*/md/sync_action
do
	[ -e "$sync_act" ] || continue
	if [ "`cat $sync_act`" != 'idle' ]
	then # This array is busy
		continue
	fi

	sys=${sync_act%/md/*}
	dev=$(devname "$sys") || continue
	BINDIR/mdadm --detail --export "$dev" | grep '^MD_UUID=' > $tmp || continue
	source $tmp
	[[ "$MD_UUID" ]] || continue

	fl="/var/lib/mdcheck/MD_UUID_$MD_UUID"
	checked="${fl/MD_UUID_/Checked_}"
	if [[ -f "$fl" ]]; then
		[[ ! -f "$checked" ]] || {
		    log "WARNING: $checked exists, continuing anyway"
		}
		start=`cat "$fl"`
	elif [[ ! -f "$checked" && "$MODE" != --continue ]]; then
		start=0
	else # nothing to do
		continue
	fi

	: "$((cnt+=1))"
	eval MD_${cnt}_fl=\$fl
	eval MD_${cnt}_sys=\$sys
	eval MD_${cnt}_dev=\$dev
	echo $start > $fl
	echo $start > $sys/md/sync_min
	echo check > $sys/md/sync_action
	log checking $dev from $start
done

if [ -z "$endtime" ]
then
	exit 0
fi

while [ `date +%s` -lt $endtime ]
do
	any=
	for i in `eval echo {1..$cnt}`
	do
		eval fl=\$MD_${i}_fl
		eval sys=\$MD_${i}_sys
		eval dev=\$MD_${i}_dev

		if [ -z "$fl" ]; then continue; fi

		if [ "`cat $sys/md/sync_action`" != 'check' ]
		then
			log finished checking $dev
			eval MD_${i}_fl=
			rm -f "$fl"
			touch "${fl/MD_UUID_/Checked_}"
			continue;
		fi
		read a rest < $sys/md/sync_completed
		echo $a > $fl
		any=yes
	done
	# mdcheck_continue.timer is started by mdcheck_start.timer.
	# When the check action can be finished in mdcheck_start.service,
	# it doesn't need mdcheck_continue anymore.
	if [ -z "$any" ]; then
		systemctl stop mdcheck_continue.timer
		exit 0;
	fi
	sleep 220 &
	wait $!
done