1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382
|
from typing import Optional, TYPE_CHECKING
import unittest
import time
import logging
from io import StringIO
from teuthology.exceptions import CommandFailedError
if TYPE_CHECKING:
from tasks.mgr.mgr_test_case import MgrCluster
log = logging.getLogger(__name__)
class TestTimeoutError(RuntimeError):
pass
class RunCephCmd:
def run_ceph_cmd(self, *args, **kwargs):
"""
*args and **kwargs must contain arguments that are accepted by
vstart_runner.LocalRemote._do_run() or teuhology.orchestra.run.run()
methods.
"""
if kwargs.get('args') is None and args:
if len(args) == 1:
args = args[0]
kwargs['args'] = args
return self.mon_manager.run_cluster_cmd(**kwargs)
def get_ceph_cmd_result(self, *args, **kwargs):
"""
*args and **kwargs must contain arguments that are accepted by
vstart_runner.LocalRemote._do_run() or teuhology.orchestra.run.run()
methods.
"""
if kwargs.get('args') is None and args:
if len(args) == 1:
args = args[0]
kwargs['args'] = args
return self.run_ceph_cmd(**kwargs).exitstatus
def get_ceph_cmd_stdout(self, *args, **kwargs):
"""
*args and **kwargs must contain arguments that are accepted by
vstart_runner.LocalRemote._do_run() or teuhology.orchestra.run.run()
methods.
"""
if kwargs.get('args') is None and args:
if len(args) == 1:
args = args[0]
kwargs['args'] = args
kwargs['stdout'] = kwargs.pop('stdout', StringIO())
return self.run_ceph_cmd(**kwargs).stdout.getvalue()
def assert_retval(self, proc_retval, exp_retval):
msg = (f'expected return value: {exp_retval}\n'
f'received return value: {proc_retval}\n')
assert proc_retval == exp_retval, msg
def _verify(self, proc, exp_retval=None, exp_errmsgs=None):
if exp_retval is None and exp_errmsgs is None:
raise RuntimeError('Method didn\'t get enough parameters. Pass '
'return value or error message expected from '
'the command/process.')
if exp_retval is not None:
self.assert_retval(proc.returncode, exp_retval)
if exp_errmsgs is None:
return
if isinstance(exp_errmsgs, str):
exp_errmsgs = (exp_errmsgs, )
exp_errmsgs = tuple([e.lower() for e in exp_errmsgs])
proc_stderr = proc.stderr.getvalue().lower()
msg = ('didn\'t find any of the expected string in stderr.\n'
f'expected string -\n{exp_errmsgs}\n'
f'received error message -\n{proc_stderr}\n'
'note: received error message is converted to lowercase')
for e in exp_errmsgs:
if e in proc_stderr:
break
# this else is meant for the for loop above.
else:
assert False, msg
def negtest_ceph_cmd(self, args, retval=None, errmsgs=None, **kwargs):
"""
Conduct a negative test for the given Ceph command.
retval and errmsgs are parameters to confirm the cause of command
failure.
*args and **kwargs must contain arguments that are accepted by
vstart_runner.LocalRemote._do_run() or teuhology.orchestra.run.run()
methods.
NOTE: errmsgs is expected to be a tuple, but in case there's only one
error message, it can also be a string. This method will add the string
to a tuple internally.
"""
kwargs['args'] = args
# execution is needed to not halt on command failure because we are
# conducting negative testing
kwargs['check_status'] = False
# log stdout since it may contain something useful when command fails
kwargs['stdout'] = StringIO()
# stderr is needed to check for expected error messages.
kwargs['stderr'] = StringIO()
proc = self.run_ceph_cmd(**kwargs)
self._verify(proc, retval, errmsgs)
return proc
class CephTestCase(unittest.TestCase, RunCephCmd):
"""
For test tasks that want to define a structured set of
tests implemented in python. Subclass this with appropriate
helpers for the subsystem you're testing.
"""
# Environment references
mounts = None
fs = None
recovery_fs = None
backup_fs = None
ceph_cluster = None
mds_cluster = None
mgr_cluster: Optional['MgrCluster'] = None
ctx = None
mon_manager = None
# Declarative test requirements: subclasses should override these to indicate
# their special needs. If not met, tests will be skipped.
REQUIRE_MEMSTORE = False
def _init_mon_manager(self):
# if vstart_runner.py has invoked this code
if 'Local' in str(type(self.ceph_cluster)):
from tasks.vstart_runner import LocalCephManager
self.mon_manager = LocalCephManager(ctx=self.ctx)
# else teuthology has invoked this code
else:
from tasks.ceph_manager import CephManager
self.mon_manager = CephManager(self.ceph_cluster.admin_remote,
ctx=self.ctx, logger=log.getChild('ceph_manager'))
def setUp(self):
self._mon_configs_set = set()
self._init_mon_manager()
self.admin_remote = self.ceph_cluster.admin_remote
self.ceph_cluster.mon_manager.raw_cluster_cmd("log",
"Starting test {0}".format(self.id()))
if self.REQUIRE_MEMSTORE:
objectstore = self.ceph_cluster.get_config("osd_objectstore", "osd")
if objectstore != "memstore":
# You certainly *could* run this on a real OSD, but you don't want to sit
# here for hours waiting for the test to fill up a 1TB drive!
raise self.skipTest("Require `memstore` OSD backend (test " \
"would take too long on full sized OSDs")
def tearDown(self):
self.config_clear()
self.ceph_cluster.mon_manager.raw_cluster_cmd("log",
"Ended test {0}".format(self.id()))
def config_clear(self):
for section, key in self._mon_configs_set:
self.config_rm(section, key)
self._mon_configs_set.clear()
def _fix_key(self, key):
return str(key).replace(' ', '_')
def config_get(self, section, key):
key = self._fix_key(key)
return self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "get", section, key).strip()
def config_show(self, entity, key):
key = self._fix_key(key)
return self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "show", entity, key).strip()
def config_minimal(self):
return self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "generate-minimal-conf").strip()
def config_rm(self, section, key):
key = self._fix_key(key)
self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "rm", section, key)
# simplification: skip removing from _mon_configs_set;
# let tearDown clear everything again
def config_set(self, section, key, value):
key = self._fix_key(key)
self._mon_configs_set.add((section, key))
self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "set", section, key, str(value))
def cluster_cmd(self, command: str):
assert self.ceph_cluster is not None
return self.ceph_cluster.mon_manager.raw_cluster_cmd(*(command.split(" ")))
def assert_cluster_log(self, expected_pattern, invert_match=False,
timeout=10, watch_channel=None, present=True):
"""
Context manager. Assert that during execution, or up to 5 seconds later,
the Ceph cluster log emits a message matching the expected pattern.
:param expected_pattern: A string that you expect to see in the log output
:type expected_pattern: str
:param watch_channel: Specifies the channel to be watched. This can be
'cluster', 'audit', ...
:type watch_channel: str
:param present: Assert the log entry is present (default: True) or not (False).
:type present: bool
"""
ceph_manager = self.ceph_cluster.mon_manager
class ContextManager(object):
def match(self):
found = expected_pattern in self.watcher_process.stdout.getvalue()
if invert_match:
return not found
return found
def __enter__(self):
self.watcher_process = ceph_manager.run_ceph_w(watch_channel)
def __exit__(self, exc_type, exc_val, exc_tb):
fail = False
if not self.watcher_process.finished:
# Check if we got an early match, wait a bit if we didn't
if present and self.match():
return
elif not present and self.match():
fail = True
else:
log.debug("No log hits yet, waiting...")
# Default monc tick interval is 10s, so wait that long and
# then some grace
time.sleep(5 + timeout)
self.watcher_process.stdin.close()
try:
self.watcher_process.wait()
except CommandFailedError:
pass
if present and not self.match():
log.error(f"Log output: \n{self.watcher_process.stdout.getvalue()}\n")
raise AssertionError(f"Expected log message not found: '{expected_pattern}'")
elif fail or (not present and self.match()):
log.error(f"Log output: \n{self.watcher_process.stdout.getvalue()}\n")
raise AssertionError(f"Unexpected log message found: '{expected_pattern}'")
return ContextManager()
def wait_for_health(self, pattern, timeout, check_in_detail=None):
"""
Wait until 'ceph health' contains messages matching the pattern
Also check if @check_in_detail matches detailed health messages
only when @pattern is a code string.
"""
def seen_health_warning():
health = self.ceph_cluster.mon_manager.get_mon_health(debug=False, detail=bool(check_in_detail))
codes = [s for s in health['checks']]
summary_strings = [s[1]['summary']['message'] for s in health['checks'].items()]
if len(summary_strings) == 0:
log.debug("Not expected number of summary strings ({0})".format(summary_strings))
return False
else:
for ss in summary_strings:
if pattern in ss:
return True
if pattern in codes:
if not check_in_detail:
return True
# check if the string is in detail list if asked
detail_strings = [ss['message'] for ss in \
[s for s in health['checks'][pattern]['detail']]]
log.debug(f'detail_strings: {detail_strings}')
for ds in detail_strings:
if check_in_detail in ds:
return True
log.debug(f'detail string "{check_in_detail}" not found')
log.debug("Not found expected summary strings yet ({0})".format(summary_strings))
return False
log.info(f"waiting {timeout}s for health warning matching {pattern}")
self.wait_until_true(seen_health_warning, timeout)
def wait_for_health_clear(self, timeout):
"""
Wait until `ceph health` returns no messages
"""
def is_clear():
health = self.ceph_cluster.mon_manager.get_mon_health()
return len(health['checks']) == 0
self.wait_until_true(is_clear, timeout)
def wait_until_equal(self, get_fn, expect_val, timeout, reject_fn=None, period=5):
elapsed = 0
while True:
val = get_fn()
if val == expect_val:
return
elif reject_fn and reject_fn(val):
raise RuntimeError("wait_until_equal: forbidden value {0} seen".format(val))
else:
if elapsed >= timeout:
raise TestTimeoutError("Timed out after {0} seconds waiting for {1} (currently {2})".format(
elapsed, expect_val, val
))
else:
log.debug("wait_until_equal: {0} != {1}, waiting (timeout={2})...".format(val, expect_val, timeout))
time.sleep(period)
elapsed += period
log.debug("wait_until_equal: success")
@classmethod
def wait_until_true(cls, condition, timeout, check_fn=None, period=5):
elapsed = 0
retry_count = 0
while True:
if condition():
log.debug("wait_until_true: success in {0}s and {1} retries".format(elapsed, retry_count))
return
else:
if elapsed >= timeout:
if check_fn and check_fn() and retry_count < 5:
elapsed = 0
retry_count += 1
log.debug("wait_until_true: making progress, waiting (timeout={0} retry_count={1})...".format(timeout, retry_count))
else:
raise TestTimeoutError("Timed out after {0}s and {1} retries".format(elapsed, retry_count))
else:
log.debug("wait_until_true: waiting (timeout={0} retry_count={1})...".format(timeout, retry_count))
time.sleep(period)
elapsed += period
@classmethod
def wait_until_true_and_hold(cls, condition, timeout, success_hold_time, check_fn=None, period=5):
"""
Wait until the condition is met and check if the condition holds for the remaining time.
"""
elapsed = 0
retry_count = 0
assert success_hold_time < timeout, "success_hold_time should not be greater than timeout"
while True:
if condition():
success_time_elapsed = 0
while success_time_elapsed < success_hold_time and condition():
success_time_elapsed += 1
time.sleep(1)
elapsed += 1
if success_time_elapsed == success_hold_time:
log.debug("wait_until_true_and_hold: success for {0}s".format(success_hold_time))
return
else:
if elapsed >= timeout:
if check_fn and check_fn() and retry_count < 5:
elapsed = 0
retry_count += 1
log.debug("wait_until_true_and_hold: making progress, waiting (timeout={0} retry_count={1})...".format(timeout, retry_count))
else:
raise TestTimeoutError("Timed out after {0}s and {1} retries".format(elapsed, retry_count))
else:
log.debug("wait_until_true_and_hold waiting (timeout={0} retry_count={1})...".format(timeout, retry_count))
time.sleep(period)
elapsed += period
|