File: ct-upmap-remapped

package info (click to toggle)
ceph-tools 0.0.40
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 112 kB
  • sloc: python: 703; sh: 626; makefile: 15
file content (173 lines) | stat: -rw-r--r-- 4,613 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
#!/usr/bin/env python3
#
# DISCLAIMER: THIS SCRIPT COMES WITH NO WARRANTY OR GUARANTEE
# OF ANY KIND.
#
# DISCLAIMER 2: THIS TOOL USES A CEPH FEATURE MARKED "(developers only)"
# YOU SHOULD NOT RUN THIS UNLESS YOU KNOW EXACTLY HOW THOSE
# FUNCTIONALITIES WORK.
#
# upmap-remapped.py
#
# Usage (print only): ./upmap-remapped.py
# Usage (production): ./upmap-remapped.py | sh
#
# Optional to ignore PGs that are backfilling and not backfill+wait:
# Usage: ./upmap-remapped.py --ignore-backfilling
#
# This tool will use ceph's pg-upmap-items functionality to
# quickly modify all PGs which are currently remapped to become
# active+clean. I use it in combination with the ceph-mgr upmap
# balancer and the norebalance state for these use-cases:
#
# - Change crush rules or tunables.
# - Adding capacity (add new host, rack, ...).
#
# In general, the correct procedure for using this script is:
#
# 1. Backup your osdmaps, crush maps, ...
# 2. Set the norebalance flag.
# 3. Make your change (tunables, add osds, etc...)
# 4. Run this script a few times. (Remember to | sh)
# 5. Cluster should now be 100% active+clean.
# 6. Unset the norebalance flag.
# 7. The ceph-mgr balancer in upmap mode should now gradually
#    remove the upmap-items entries which were created by this
#    tool.
#
# Hacked by: Dan van der Ster <daniel.vanderster@cern.ch>


import json, subprocess, sys

def eprint(*args, **kwargs):
  print(*args, file=sys.stderr, **kwargs)

try:
  OSDS = json.loads(subprocess.getoutput('ceph osd ls -f json'))
  DF = json.loads(subprocess.getoutput('ceph osd df -f json | jq .nodes'))
except ValueError:
  eprint('Error loading OSD IDs')
  sys.exit(1)

ignore_backfilling = False
for arg in sys.argv[1:]:
  if arg == "--ignore-backfilling":
    eprint ("All actively backfilling PGs will be ignored.")
    ignore_backfilling = True

def crush_weight(id):
  for o in DF:
    if o['id'] == id:
      return o['crush_weight'] * o['reweight']
  return 0

def gen_upmap(up, acting, replicated=False):
  assert(len(up) == len(acting))
  pairs = []
  for p in zip(up, acting):
    if p[0] != p[1] and p[0] in OSDS and crush_weight(p[1]) > 0:
      pairs.append(p)

  # if replicated, remove indirect mappings
  # e.g. ceph osd pg-upmap-items 4.5fd 603 383 499 804 804 530 &
  if replicated:
    p = list(pairs)
    u = set([x[0] for x in p])
    a = set([x[1] for x in p])
    pairs = list(zip(u-a, a-u))
  return pairs

def upmap_pg_items(pgid, mapping):
  if len(mapping):
    print('ceph osd pg-upmap-items %s ' % pgid, end='')
    for pair in mapping:
      print('%s %s ' % pair, end='')
    print('&')

def rm_upmap_pg_items(pgid):
  print('ceph osd rm-pg-upmap-items %s &' % pgid)


# start here

# discover remapped pgs
try:
  remapped_json = subprocess.getoutput('ceph pg ls remapped -f json')
  remapped = json.loads(remapped_json)
except ValueError:
  eprint('Error loading remapped pgs')
  sys.exit(1)

# nautilus compat
try:
  _remapped = remapped['pg_stats']
  remapped = _remapped
except KeyError:
  eprint("There are no remapped PGs")
  sys.exit(0)

# discover existing upmaps
osd_dump_json = subprocess.getoutput('ceph osd dump -f json')
osd_dump = json.loads(osd_dump_json)
upmaps = osd_dump['pg_upmap_items']

# discover pools replicated or erasure
pool_type = {}
try:
  for line in subprocess.getoutput('ceph osd pool ls detail').split('\n'):
    if 'pool' in line:
      x = line.split(' ')
      pool_type[x[1]] = x[3]
except:
  eprint('Error parsing pool types')
  sys.exit(1)

# discover if each pg is already upmapped
has_upmap = {}
for pg in upmaps:
  pgid = str(pg['pgid'])
  has_upmap[pgid] = True

# handle each remapped pg
print('while ceph status | grep -q "peering\|activating"; do sleep 2; done')
num = 0
for pg in remapped:
  if num == 50:
    print('wait; sleep 4; while ceph status | grep -q "peering\|activating"; do sleep 2; done')
    num = 0

  if ignore_backfilling:
    if "backfilling" in pg['state']:
      continue

  pgid = pg['pgid']

  try:
    if has_upmap[pgid]:
      rm_upmap_pg_items(pgid)
      num += 1
      continue
  except KeyError:
    pass

  up = pg['up']
  acting = pg['acting']
  pool = pgid.split('.')[0]
  if pool_type[pool] == 'replicated':
    try:
      pairs = gen_upmap(up, acting, replicated=True)
    except:
      continue
  elif pool_type[pool] == 'erasure':
    try:
      pairs = gen_upmap(up, acting)
    except:
      continue
  else:
    eprint('Unknown pool type for %s' % pool)
    sys.exit(1)
  upmap_pg_items(pgid, pairs)
  num += 1

print('wait; sleep 4; while ceph status | grep -q "peering\|activating"; do sleep 2; done')