1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
|
#!/usr/bin/env python3
#
# DISCLAIMER: THIS SCRIPT COMES WITH NO WARRANTY OR GUARANTEE
# OF ANY KIND.
#
# DISCLAIMER 2: THIS TOOL USES A CEPH FEATURE MARKED "(developers only)"
# YOU SHOULD NOT RUN THIS UNLESS YOU KNOW EXACTLY HOW THOSE
# FUNCTIONALITIES WORK.
#
# upmap-remapped.py
#
# Usage (print only): ./upmap-remapped.py
# Usage (production): ./upmap-remapped.py | sh
#
# Optional to ignore PGs that are backfilling and not backfill+wait:
# Usage: ./upmap-remapped.py --ignore-backfilling
#
# This tool will use ceph's pg-upmap-items functionality to
# quickly modify all PGs which are currently remapped to become
# active+clean. I use it in combination with the ceph-mgr upmap
# balancer and the norebalance state for these use-cases:
#
# - Change crush rules or tunables.
# - Adding capacity (add new host, rack, ...).
#
# In general, the correct procedure for using this script is:
#
# 1. Backup your osdmaps, crush maps, ...
# 2. Set the norebalance flag.
# 3. Make your change (tunables, add osds, etc...)
# 4. Run this script a few times. (Remember to | sh)
# 5. Cluster should now be 100% active+clean.
# 6. Unset the norebalance flag.
# 7. The ceph-mgr balancer in upmap mode should now gradually
# remove the upmap-items entries which were created by this
# tool.
#
# Hacked by: Dan van der Ster <daniel.vanderster@cern.ch>
import json, subprocess, sys
def eprint(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)
try:
OSDS = json.loads(subprocess.getoutput('ceph osd ls -f json'))
DF = json.loads(subprocess.getoutput('ceph osd df -f json | jq .nodes'))
except ValueError:
eprint('Error loading OSD IDs')
sys.exit(1)
ignore_backfilling = False
for arg in sys.argv[1:]:
if arg == "--ignore-backfilling":
eprint ("All actively backfilling PGs will be ignored.")
ignore_backfilling = True
def crush_weight(id):
for o in DF:
if o['id'] == id:
return o['crush_weight'] * o['reweight']
return 0
def gen_upmap(up, acting, replicated=False):
assert(len(up) == len(acting))
pairs = []
for p in zip(up, acting):
if p[0] != p[1] and p[0] in OSDS and crush_weight(p[1]) > 0:
pairs.append(p)
# if replicated, remove indirect mappings
# e.g. ceph osd pg-upmap-items 4.5fd 603 383 499 804 804 530 &
if replicated:
p = list(pairs)
u = set([x[0] for x in p])
a = set([x[1] for x in p])
pairs = list(zip(u-a, a-u))
return pairs
def upmap_pg_items(pgid, mapping):
if len(mapping):
print('ceph osd pg-upmap-items %s ' % pgid, end='')
for pair in mapping:
print('%s %s ' % pair, end='')
print('&')
def rm_upmap_pg_items(pgid):
print('ceph osd rm-pg-upmap-items %s &' % pgid)
# start here
# discover remapped pgs
try:
remapped_json = subprocess.getoutput('ceph pg ls remapped -f json')
remapped = json.loads(remapped_json)
except ValueError:
eprint('Error loading remapped pgs')
sys.exit(1)
# nautilus compat
try:
_remapped = remapped['pg_stats']
remapped = _remapped
except KeyError:
eprint("There are no remapped PGs")
sys.exit(0)
# discover existing upmaps
osd_dump_json = subprocess.getoutput('ceph osd dump -f json')
osd_dump = json.loads(osd_dump_json)
upmaps = osd_dump['pg_upmap_items']
# discover pools replicated or erasure
pool_type = {}
try:
for line in subprocess.getoutput('ceph osd pool ls detail').split('\n'):
if 'pool' in line:
x = line.split(' ')
pool_type[x[1]] = x[3]
except:
eprint('Error parsing pool types')
sys.exit(1)
# discover if each pg is already upmapped
has_upmap = {}
for pg in upmaps:
pgid = str(pg['pgid'])
has_upmap[pgid] = True
# handle each remapped pg
print('while ceph status | grep -q "peering\|activating"; do sleep 2; done')
num = 0
for pg in remapped:
if num == 50:
print('wait; sleep 4; while ceph status | grep -q "peering\|activating"; do sleep 2; done')
num = 0
if ignore_backfilling:
if "backfilling" in pg['state']:
continue
pgid = pg['pgid']
try:
if has_upmap[pgid]:
rm_upmap_pg_items(pgid)
num += 1
continue
except KeyError:
pass
up = pg['up']
acting = pg['acting']
pool = pgid.split('.')[0]
if pool_type[pool] == 'replicated':
try:
pairs = gen_upmap(up, acting, replicated=True)
except:
continue
elif pool_type[pool] == 'erasure':
try:
pairs = gen_upmap(up, acting)
except:
continue
else:
eprint('Unknown pool type for %s' % pool)
sys.exit(1)
upmap_pg_items(pgid, pairs)
num += 1
print('wait; sleep 4; while ceph status | grep -q "peering\|activating"; do sleep 2; done')
|