1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
|
#!/usr/bin/env python3
# Owner(s): ["oncall: r2p"]
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import argparse
import os
from pathlib import Path
def parse_args():
parser = argparse.ArgumentParser(description="test script")
parser.add_argument(
"--fail",
default=False,
action="store_true",
help="forces the script to throw a RuntimeError",
)
# file is used for assertions
parser.add_argument(
"--touch-file-dir",
"--touch_file_dir",
type=str,
help="dir to touch a file with global rank as the filename",
)
return parser.parse_args()
def main():
args = parse_args()
env_vars = [
"LOCAL_RANK",
"RANK",
"GROUP_RANK",
"ROLE_RANK",
"ROLE_NAME",
"LOCAL_WORLD_SIZE",
"WORLD_SIZE",
"ROLE_WORLD_SIZE",
"MASTER_ADDR",
"MASTER_PORT",
"TORCHELASTIC_RESTART_COUNT",
"TORCHELASTIC_MAX_RESTARTS",
"TORCHELASTIC_RUN_ID",
"OMP_NUM_THREADS",
"TEST_SENTINEL_PARENT",
"TORCHELASTIC_ERROR_FILE",
]
print("Distributed env vars set by agent:")
for env_var in env_vars:
value = os.environ[env_var]
print(f"{env_var} = {value}")
if args.fail:
raise RuntimeError("raising exception since --fail flag was set")
else:
file = os.path.join(args.touch_file_dir, os.environ["RANK"])
Path(file).touch()
print(f"Success, created {file}")
if __name__ == "__main__":
main()
|