1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249
|
#!/bin/sh
# This must run as root as CTDB tool commands need to access CTDB socket
[ "$(id -u)" -eq 0 ] || exec sudo "$0" "$@"
# statd must be configured to use this script as its high availability call-out.
#
# In most Linux versions this can be done using something like the following...
#
# /etc/sysconfig/nfs (Red Hat) or /etc/default/nfs-common (Debian):
# NFS_HOSTNAME=myhostname
# STATD_HOSTNAME="${NFS_HOSTNAME} -H /etc/ctdb/statd-callout"
#
# Newer Red Hat Linux variants instead use /etc/nfs.conf:
# [statd]
# name = myhostname
# ha-callout = /etc/ctdb/statd-callout
[ -n "$CTDB_BASE" ] || \
CTDB_BASE=$(d=$(dirname "$0") ; cd -P "$d" ; echo "$PWD")
. "${CTDB_BASE}/functions"
# Overwrite this so we get some logging
die ()
{
script_log "statd-callout" "$@"
exit 1
}
# Try different variables to find config file for NFS_HOSTNAME
load_system_config "nfs" "nfs-common"
# If NFS_HOSTNAME not set then try to pull it out of /etc/nfs.conf
if [ -z "$NFS_HOSTNAME" ] && type nfsconf >/dev/null 2>&1 ; then
NFS_HOSTNAME=$(nfsconf --get statd name)
fi
[ -n "$NFS_HOSTNAME" ] || \
die "NFS_HOSTNAME is not configured. statd-callout failed"
############################################################
ctdb_setup_state_dir "service" "nfs"
# script_state_dir set by ctdb_setup_state_dir()
# shellcheck disable=SC2154
d="${script_state_dir}/statd-callout"
mkdir -p "$d" || die "Failed to create directory \"${d}\""
cd "$d" || die "Failed to change directory to \"${d}\""
pnn=$(ctdb_get_pnn)
############################################################
send_notifies ()
{
_smnotify="${CTDB_HELPER_BINDIR}/smnotify"
# State must monotonically increase, across the entire
# cluster. Use seconds since epoch and hope the time is in
# sync across nodes. Even numbers mean service is shut down,
# odd numbers mean service is started.
# Intentionally round to an even number
# shellcheck disable=SC2017
_state_even=$(( $(date '+%s') / 2 * 2))
_prev=""
while read _sip _cip ; do
# NOTE: Consider optimising smnotify to read all the
# data from stdin and then run it in the background.
# Reset stateval for each serverip
if [ "$_sip" != "$_prev" ] ; then
_stateval="$_state_even"
fi
# Send notifies for server shutdown
"$_smnotify" --client="$_cip" --ip="$_sip" \
--server="$_sip" --stateval="$_stateval"
"$_smnotify" --client="$_cip" --ip="$_sip" \
--server="$NFS_HOSTNAME" --stateval="$_stateval"
# Send notifies for server startup
_stateval=$((_stateval + 1))
"$_smnotify" --client="$_cip" --ip="$_sip" \
--server="$_sip" --stateval="$_stateval"
"$_smnotify" --client="$_cip" --ip="$_sip" \
--server="$NFS_HOSTNAME" --stateval="$_stateval"
done
}
delete_records ()
{
while read _sip _cip ; do
_key="statd-state@${_sip}@${_cip}"
echo "\"${_key}\" \"\""
done | $CTDB ptrans "ctdb.tdb"
}
############################################################
case "$1" in
# Keep a single file to keep track of the last "add-client" or
# "del-client'. These get pushed to ctdb.tdb during "update",
# which will generally be run once each "monitor" cycle. In this
# way we avoid scalability problems with flood of persistent
# transactions after a "notify" when all the clients re-take their
# locks.
add-client)
# statd does not tell us to which IP the client connected so
# we must add it to all the IPs that we serve
cip="$2"
date=$(date '+%s')
# x is intentionally ignored
# shellcheck disable=SC2034
$CTDB ip -X |
tail -n +2 |
while IFS="|" read x sip node x ; do
[ "$node" = "$pnn" ] || continue # not us
key="statd-state@${sip}@${cip}"
echo "\"${key}\" \"${date}\"" >"$key"
done
;;
del-client)
# statd does not tell us from which IP the client disconnected
# so we must add it to all the IPs that we serve
cip="$2"
# x is intentionally ignored
# shellcheck disable=SC2034
$CTDB ip -X |
tail -n +2 |
while IFS="|" read x sip node x ; do
[ "$node" = "$pnn" ] || continue # not us
key="statd-state@${sip}@${cip}"
echo "\"${key}\" \"\"" >"$key"
done
;;
update)
files=$(echo statd-state@*)
if [ "$files" = "statd-state@*" ] ; then
# No files!
exit 0
fi
# Filter out lines for any IP addresses that are not currently
# hosted public IP addresses.
ctdb_ips=$($CTDB ip | tail -n +2)
sed_expr=$(echo "$ctdb_ips" |
awk -v pnn="$pnn" 'pnn == $2 {
ip = $1; gsub(/\./, "\\.", ip);
printf "/statd-state@%s@/p\n", ip }')
# Intentional multi-word expansion for multiple files
# shellcheck disable=SC2086
items=$(sed -n "$sed_expr" $files)
if [ -n "$items" ] ; then
if echo "$items" | $CTDB ptrans "ctdb.tdb" ; then
# shellcheck disable=SC2086
rm $files
fi
fi
;;
notify)
# we must restart the lockmanager (on all nodes) so that we get
# a clusterwide grace period (so other clients don't take out
# conflicting locks through other nodes before all locks have been
# reclaimed)
# we need these settings to make sure that no tcp connections survive
# across a very fast failover/failback
#echo 10 > /proc/sys/net/ipv4/tcp_fin_timeout
#echo 0 > /proc/sys/net/ipv4/tcp_max_tw_buckets
#echo 0 > /proc/sys/net/ipv4/tcp_max_orphans
# Delete the notification list for statd, we don't want it to
# ping any clients
rm -f /var/lib/nfs/statd/sm/*
rm -f /var/lib/nfs/statd/sm.bak/*
# We must also let some time pass between stopping and
# restarting the lock manager. Otherwise there is a window
# where the lock manager will respond "strangely" immediately
# after restarting it, which causes clients to fail to reclaim
# their locks.
nfs_callout_init
"$CTDB_NFS_CALLOUT" "stop" "nlockmgr" >/dev/null 2>&1
sleep 2
"$CTDB_NFS_CALLOUT" "start" "nlockmgr" >/dev/null 2>&1
# we now need to send out additional statd notifications to ensure
# that clients understand that the lockmanager has restarted.
# we have three cases:
# 1, clients that ignore the ip address the stat notification came from
# and ONLY care about the 'name' in the notify packet.
# these clients ONLY work with lock failover IFF that name
# can be resolved into an ipaddress that matches the one used
# to mount the share. (==linux clients)
# This is handled when starting lockmanager above, but those
# packets are sent from the "wrong" ip address, something linux
# clients are ok with, buth other clients will barf at.
# 2, Some clients only accept statd packets IFF they come from the
# 'correct' ip address.
# 2a,Send out the notification using the 'correct' ip address and also
# specify the 'correct' hostname in the statd packet.
# Some clients require both the correct source address and also the
# correct name. (these clients also ONLY work if the ip addresses
# used to map the share can be resolved into the name returned in
# the notify packet.)
# 2b,Other clients require that the source ip address of the notify
# packet matches the ip address used to take out the lock.
# I.e. that the correct source address is used.
# These clients also require that the statd notify packet contains
# the name as the ip address used when the lock was taken out.
#
# Both 2a and 2b are commonly used in lockmanagers since they maximize
# probability that the client will accept the statd notify packet and
# not just ignore it.
# For all IPs we serve, collect info and push to the config database
# Construct a sed expression to take catdb output and produce pairs of:
# server-IP client-IP
# but only for the server-IPs that are hosted on this node.
ctdb_all_ips=$($CTDB ip all | tail -n +2)
sed_expr=$(echo "$ctdb_all_ips" |
awk -v pnn="$pnn" 'pnn == $2 {
ip = $1; gsub(/\./, "\\.", ip);
printf "s/^key.*=.*statd-state@\\(%s\\)@\\([^\"]*\\).*/\\1 \\2/p\n", ip }')
statd_state=$($CTDB catdb ctdb.tdb | sed -n "$sed_expr" | sort)
[ -n "$statd_state" ] || exit 0
echo "$statd_state" | send_notifies
echo "$statd_state" | delete_records
# Remove any stale touch files (i.e. for IPs not currently
# hosted on this node and created since the last "update").
# There's nothing else we can do with them at this stage.
echo "$ctdb_all_ips" |
awk -v pnn="$pnn" 'pnn != $2 { print $1 }' |
while read sip ; do
rm -f "statd-state@${sip}@"*
done
;;
esac
|