1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
|
import random
import numpy as np
import pytest
import torch
from common_utils import (
CUDA_NOT_AVAILABLE_MSG,
IN_FBCODE,
IN_OSS_CI,
IN_RE_WORKER,
MPS_NOT_AVAILABLE_MSG,
OSS_CI_GPU_NO_CUDA_MSG,
)
def pytest_configure(config):
# register an additional marker (see pytest_collection_modifyitems)
config.addinivalue_line("markers", "needs_cuda: mark for tests that rely on a CUDA device")
config.addinivalue_line("markers", "needs_mps: mark for tests that rely on a MPS device")
config.addinivalue_line("markers", "dont_collect: mark for tests that should not be collected")
config.addinivalue_line("markers", "opcheck_only_one: only opcheck one parametrization")
def pytest_collection_modifyitems(items):
# This hook is called by pytest after it has collected the tests (google its name to check out its doc!)
# We can ignore some tests as we see fit here, or add marks, such as a skip mark.
#
# Typically, here, we try to optimize CI time. In particular, the GPU CI instances don't need to run the
# tests that don't need CUDA, because those tests are extensively tested in the CPU CI instances already.
# This is true for both OSS CI and the fbcode internal CI.
# In the fbcode CI, we have an additional constraint: we try to avoid skipping tests. So instead of relying on
# pytest.mark.skip, in fbcode we literally just remove those tests from the `items` list, and it's as if
# these tests never existed.
out_items = []
for item in items:
# The needs_cuda mark will exist if the test was explicitly decorated with
# the @needs_cuda decorator. It will also exist if it was parametrized with a
# parameter that has the mark: for example if a test is parametrized with
# @pytest.mark.parametrize('device', cpu_and_cuda())
# the "instances" of the tests where device == 'cuda' will have the 'needs_cuda' mark,
# and the ones with device == 'cpu' won't have the mark.
needs_cuda = item.get_closest_marker("needs_cuda") is not None
needs_mps = item.get_closest_marker("needs_mps") is not None
if needs_cuda and not torch.cuda.is_available():
# In general, we skip cuda tests on machines without a GPU
# There are special cases though, see below
item.add_marker(pytest.mark.skip(reason=CUDA_NOT_AVAILABLE_MSG))
if needs_mps and not torch.backends.mps.is_available():
item.add_marker(pytest.mark.skip(reason=MPS_NOT_AVAILABLE_MSG))
if IN_FBCODE:
# fbcode doesn't like skipping tests, so instead we just don't collect the test
# so that they don't even "exist", hence the continue statements.
if not needs_cuda and IN_RE_WORKER:
# The RE workers are the machines with GPU, we don't want them to run CPU-only tests.
continue
if needs_cuda and not torch.cuda.is_available():
# On the test machines without a GPU, we want to ignore the tests that need cuda.
# TODO: something more robust would be to do that only in a sandcastle instance,
# so that we can still see the test being skipped when testing locally from a devvm
continue
if needs_mps and not torch.backends.mps.is_available():
# Same as above, but for MPS
continue
elif IN_OSS_CI:
# Here we're not in fbcode, so we can safely collect and skip tests.
if not needs_cuda and torch.cuda.is_available():
# Similar to what happens in RE workers: we don't need the OSS CI GPU machines
# to run the CPU-only tests.
item.add_marker(pytest.mark.skip(reason=OSS_CI_GPU_NO_CUDA_MSG))
if item.get_closest_marker("dont_collect") is not None:
# currently, this is only used for some tests we're sure we don't want to run on fbcode
continue
out_items.append(item)
items[:] = out_items
def pytest_sessionfinish(session, exitstatus):
# This hook is called after all tests have run, and just before returning an exit status.
# We here change exit code 5 into 0.
#
# 5 is issued when no tests were actually run, e.g. if you use `pytest -k some_regex_that_is_never_matched`.
#
# Having no test being run for a given test rule is a common scenario in fbcode, and typically happens on
# the GPU test machines which don't run the CPU-only tests (see pytest_collection_modifyitems above). For
# example `test_transforms.py` doesn't contain any CUDA test at the time of
# writing, so on a GPU test machine, testpilot would invoke pytest on this file and no test would be run.
# This would result in pytest returning 5, causing testpilot to raise an error.
# To avoid this, we transform this 5 into a 0 to make testpilot happy.
if exitstatus == 5:
session.exitstatus = 0
@pytest.fixture(autouse=True)
def prevent_leaking_rng():
# Prevent each test from leaking the rng to all other test when they call
# torch.manual_seed() or random.seed() or np.random.seed().
# Note: the numpy rngs should never leak anyway, as we never use
# np.random.seed() and instead rely on np.random.RandomState instances (see
# issue #4247). We still do it for extra precaution.
torch_rng_state = torch.get_rng_state()
builtin_rng_state = random.getstate()
nunmpy_rng_state = np.random.get_state()
if torch.cuda.is_available():
cuda_rng_state = torch.cuda.get_rng_state()
yield
torch.set_rng_state(torch_rng_state)
random.setstate(builtin_rng_state)
np.random.set_state(nunmpy_rng_state)
if torch.cuda.is_available():
torch.cuda.set_rng_state(cuda_rng_state)
|