File: utils.py

package info (click to toggle)
python-parsl 2025.12.01%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 12,112 kB
  • sloc: python: 24,369; makefile: 352; sh: 252; ansic: 45
file content (505 lines) | stat: -rw-r--r-- 14,867 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
import inspect
import logging
import os
import re
import shlex
import subprocess
import threading
import time
from contextlib import contextmanager
from types import TracebackType
from typing import (
    IO,
    Any,
    Callable,
    Dict,
    Generator,
    List,
    Optional,
    Sequence,
    Tuple,
    Union,
)

import typeguard
from typing_extensions import Type

import parsl
from parsl.app.errors import BadStdStreamFile
from parsl.version import VERSION

try:
    import setproctitle as setproctitle_module
except ImportError:
    _setproctitle_enabled = False
else:
    _setproctitle_enabled = True


logger = logging.getLogger(__name__)


@typeguard.typechecked
def get_version() -> str:
    version = parsl.__version__  # type: str
    work_tree = os.path.dirname(os.path.dirname(__file__))
    git_dir = os.path.join(work_tree, '.git')
    if os.path.exists(git_dir):
        env = {'GIT_WORK_TREE': work_tree, 'GIT_DIR': git_dir}
        try:
            cmd = shlex.split('git rev-parse --short HEAD')
            head = subprocess.check_output(cmd, env=env).strip().decode('utf-8')
            diff = subprocess.check_output(shlex.split('git diff HEAD'), env=env)
            status = 'dirty' if diff else 'clean'
            version = f'{VERSION}-{head}-{status}'
        except Exception:
            pass

    return version


@typeguard.typechecked
def get_all_checkpoints(rundir: str = "runinfo") -> Sequence[str]:
    """Finds the checkpoints from all runs in the rundir.

    Kwargs:
       - rundir(str) : Path to the runinfo directory

    Returns:
       - a list suitable for the checkpoint_files parameter of `Config`

    """

    if not os.path.isdir(rundir):
        return []

    dirs = sorted(os.listdir(rundir))

    checkpoints = []

    for runid in dirs:

        checkpoint = os.path.abspath(f'{rundir}/{runid}/checkpoint')

        if os.path.isdir(checkpoint):
            checkpoints.append(checkpoint)

    return checkpoints


@typeguard.typechecked
def get_last_checkpoint(rundir: str = "runinfo") -> Sequence[str]:
    """Finds the checkpoint from the last run, if one exists.

    Note that checkpoints are incremental, and this helper will not find
    previous checkpoints from earlier than the most recent run. If you
    want that behaviour, see `get_all_checkpoints`.

    Kwargs:
       - rundir(str) : Path to the runinfo directory

    Returns:
     - a list suitable for the checkpoint_files parameter of `Config`,
       with 0 or 1 elements

    """
    if not os.path.isdir(rundir):
        return []

    dirs = sorted(os.listdir(rundir))

    if len(dirs) == 0:
        return []

    last_runid = dirs[-1]
    last_checkpoint = os.path.abspath(f'{rundir}/{last_runid}/checkpoint')

    if not os.path.isdir(last_checkpoint):
        return []

    return [last_checkpoint]


@typeguard.typechecked
def get_std_fname_mode(
    fdname: str,
    stdfspec: Union[os.PathLike, str, Tuple[str, str], Tuple[os.PathLike, str]]
) -> Tuple[str, str]:
    import parsl.app.errors as pe
    if isinstance(stdfspec, (str, os.PathLike)):
        fname = stdfspec
        mode = 'a+'
    elif isinstance(stdfspec, tuple):
        if len(stdfspec) != 2:
            # this is annotated as unreachable because the type annotation says
            # it cannot be reached. Earlier versions of typeguard did not enforce
            # that type annotation at runtime, though, and the parameters to this
            # function come from the user.
            # When typeguard lower bound is raised to around version 4, this
            # unreachable can be removed.
            msg = (f"std descriptor {fdname} has incorrect tuple length "  # type: ignore[unreachable]
                   f"{len(stdfspec)}")
            raise pe.BadStdStreamFile(msg)
        fname, mode = stdfspec

    path = os.fspath(fname)

    if isinstance(path, str):
        return path, mode
    elif isinstance(path, bytes):
        return path.decode(), mode
    else:
        raise BadStdStreamFile(f"fname has invalid type {type(path)}")


@contextmanager
def wait_for_file(path: str, seconds: int = 10) -> Generator[None, None, None]:
    for _ in range(0, int(seconds * 100)):
        time.sleep(seconds / 100.)
        if os.path.exists(path):
            break
    yield


@contextmanager
def time_limited_open(path: str, mode: str, seconds: int = 1) -> Generator[IO, None, None]:
    with wait_for_file(path, seconds):
        logger.debug("wait_for_file yielded")
    f = open(path, mode)
    yield f
    f.close()


def wtime_to_minutes(time_string: str) -> int:
    ''' wtime_to_minutes

    Convert standard wallclock time string to minutes.

    Args:
        - Time_string in HH:MM:SS format

    Returns:
        (int) minutes

    '''
    hours, mins, seconds = time_string.split(':')
    total_mins = int(hours) * 60 + int(mins)
    if total_mins < 1:
        msg = (f"Time string '{time_string}' parsed to {total_mins} minutes, "
               f"less than 1")
        logger.warning(msg)
    return total_mins


class RepresentationMixin:
    """A mixin class for adding a __repr__ method.

    The __repr__ method will return a string equivalent to the code used to instantiate
    the child class, with any defaults included explicitly. The __max_width__ class variable
    controls the maximum width of the representation string. If this width is exceeded,
    the representation string will be split up, with one argument or keyword argument per line.

    Any arguments or keyword arguments in the constructor must be defined as attributes, or
    an AttributeError will be raised.

    Examples
    --------
    >>> from parsl.utils import RepresentationMixin
    >>> class Foo(RepresentationMixin):
            def __init__(self, first, second, third='three', fourth='fourth'):
                self.first = first
                self.second = second
                self.third = third
                self.fourth = fourth
    >>> bar = Foo(1, 'two', fourth='baz')
    >>> bar
    Foo(1, 'two', third='three', fourth='baz')
    """
    __max_width__ = 80

    _validate_repr = False

    def __repr__(self) -> str:
        init = self.__init__  # type: ignore[misc]

        # This test looks for a single layer of wrapping performed by
        # functools.update_wrapper, commonly used in decorators. This will
        # allow RepresentationMixin to see through a single such decorator
        # applied to the __init__ method of a class, and find the underlying
        # arguments. It will not see through multiple layers of such
        # decorators, or cope with other decorators which do not use
        # functools.update_wrapper.

        if hasattr(init, '__wrapped__'):
            init = init.__wrapped__

        argspec = inspect.getfullargspec(init)
        if len(argspec.args) > 1 and argspec.defaults is not None:
            defaults = dict(zip(reversed(argspec.args), reversed(argspec.defaults)))
        else:
            defaults = {}

        if self._validate_repr:
            for arg in argspec.args[1:]:
                if not hasattr(self, arg):
                    template = (f'class {self.__class__.__name__} uses {arg} in the'
                                f' constructor, but does not define it as an '
                                f'attribute')
                    raise AttributeError(template)

        default = "<unrecorded>"

        if len(defaults) != 0:
            args = [getattr(self, a, default) for a in argspec.args[1:-len(defaults)]]
        else:
            args = [getattr(self, a, default) for a in argspec.args[1:]]
        kwargs = {key: getattr(self, key, default) for key in defaults}

        kwonlyargs = {key: getattr(self, key, default) for key in argspec.kwonlyargs}
        kwargs.update(kwonlyargs)

        def assemble_multiline(args: List[str], kwargs: Dict[str, object]) -> str:
            def indent(text: str) -> str:
                lines = text.splitlines()
                if len(lines) <= 1:
                    return text
                return "\n".join("    " + line for line in lines).strip()
            args = [f"\n    {indent(repr(a))}," for a in args]
            kwargsl = [f"\n    {k}={indent(repr(v))}" for k, v in
                       sorted(kwargs.items())]

            info = "".join(args) + ", ".join(kwargsl)
            return self.__class__.__name__ + f"({info}\n)"

        def assemble_line(args: List[str], kwargs: Dict[str, object]) -> str:
            kwargsl = [f'{k}={repr(v)}' for k, v in sorted(kwargs.items())]

            info = ", ".join([repr(a) for a in args] + kwargsl)
            return self.__class__.__name__ + f"({info})"

        if len(assemble_line(args, kwargs)) <= self.__class__.__max_width__:
            return assemble_line(args, kwargs)
        else:
            return assemble_multiline(args, kwargs)


class AtomicIDCounter:
    """A class to allocate counter-style IDs, in a thread-safe way.
    """

    def __init__(self) -> None:
        self.count = 0
        self.lock = threading.Lock()

    def get_id(self) -> int:
        with self.lock:
            new_id = self.count
            self.count += 1
            return new_id


def setproctitle(title: str) -> None:
    if _setproctitle_enabled:
        setproctitle_module.setproctitle(title)
    else:
        logger.warn(f"setproctitle not enabled for process {title}")


class Timer:
    """This class will make a callback periodically, with a period
    specified by the interval parameter.

    This is based on the following logic :

    .. code-block:: none


        BEGIN (INTERVAL, THRESHOLD, callback) :
            start = current_time()

            while (current_time()-start < INTERVAL) :
                 wait()
                 break

            callback()

    """

    def __init__(self, callback: Callable, *args: Any, interval: Union[float, int] = 5, name: Optional[str] = None) -> None:
        """Initialize the Timer object.
        We start the timer thread here

        KWargs:
             - interval (int or float) : number of seconds between callback events
             - name (str) : a base name to use when naming the started thread
        """

        self.interval = max(0, interval)
        self.cb_args = args
        self.callback = callback

        self._kill_event = threading.Event()
        tname = f"Timer-Thread-{id(self)}"
        if name:
            tname = f"{name}-{tname}"
        self._thread = threading.Thread(
            target=self._wake_up_timer, name=tname, daemon=True
        )
        self._thread.start()

    def _wake_up_timer(self) -> None:
        self.make_callback()

        while not self._kill_event.wait(self.interval):
            self.make_callback()

    def make_callback(self) -> None:
        """Makes the callback and resets the timer.
        """
        try:
            self.callback(*self.cb_args)
        except Exception:
            logger.error("Callback threw an exception - logging and proceeding anyway", exc_info=True)

    def close(self, timeout: Optional[float] = None) -> None:
        """Merge the threads and terminate.
        """
        self._kill_event.set()
        self._thread.join(timeout=timeout)


class AutoCancelTimer(threading.Timer):
    """
    Extend threading.Timer for use as a context manager

    Example:

        with AutoCancelTimer(delay, your_callback):
            some_func()

    If `some_func()` returns before the delay is up, the timer will
    be cancelled.
    """
    def __enter__(self) -> "AutoCancelTimer":
        self.start()
        return self

    def __exit__(
        self,
        exc_type: Optional[Type[BaseException]],
        exc_val: Optional[BaseException],
        exc_tb: Optional[TracebackType]
    ) -> None:
        self.cancel()


def sanitize_dns_label_rfc1123(raw_string: str) -> str:
    """Convert input string to a valid RFC 1123 DNS label.

    Parameters
    ----------
    raw_string : str
        String to sanitize.

    Returns
    -------
    str
        Sanitized string.

    Raises
    ------
    ValueError
        If the string is empty after sanitization.
    """
    # Convert to lowercase and replace non-alphanumeric characters with hyphen
    sanitized = re.sub(r'[^a-z0-9]', '-', raw_string.lower())

    # Remove consecutive hyphens
    sanitized = re.sub(r'-+', '-', sanitized)

    # DNS label cannot exceed 63 characters
    sanitized = sanitized[:63]

    # Strip after trimming to avoid trailing hyphens
    sanitized = sanitized.strip("-")

    if not sanitized:
        raise ValueError(f"Sanitized DNS label is empty for input '{raw_string}'")

    return sanitized


def sanitize_dns_subdomain_rfc1123(raw_string: str) -> str:
    """Convert input string to a valid RFC 1123 DNS subdomain.

    Parameters
    ----------
    raw_string : str
        String to sanitize.

    Returns
    -------
    str
        Sanitized string.

    Raises
    ------
    ValueError
        If the string is empty after sanitization.
    """
    segments = raw_string.split('.')

    sanitized_segments = []
    for segment in segments:
        if not segment:
            continue
        sanitized_segment = sanitize_dns_label_rfc1123(segment)
        sanitized_segments.append(sanitized_segment)

    sanitized = '.'.join(sanitized_segments)

    # DNS subdomain cannot exceed 253 characters
    sanitized = sanitized[:253]

    # Strip after trimming to avoid trailing dots or hyphens
    sanitized = sanitized.strip(".-")

    if not sanitized:
        raise ValueError(f"Sanitized DNS subdomain is empty for input '{raw_string}'")

    return sanitized


def execute_wait(cmd: str, walltime: Optional[int] = None) -> Tuple[int, str, str]:
    ''' Synchronously execute a commandline string on the shell.

    Args:
        - cmd (string) : Commandline string to execute
        - walltime (int) : walltime in seconds

    Returns:
        - retcode : Return code from the execution
        - stdout  : stdout string
        - stderr  : stderr string
    '''
    try:
        logger.debug("Creating process with command '%s'", cmd)
        proc = subprocess.Popen(
            cmd,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            shell=True,
            preexec_fn=os.setpgrp
        )
        logger.debug("Created process with pid %s. Performing communicate", proc.pid)
        (stdout, stderr) = proc.communicate(timeout=walltime)
        retcode = proc.returncode
        logger.debug("Process %s returned %s", proc.pid, proc.returncode)

    except Exception:
        logger.exception(f"Execution of command failed:\n{cmd}")
        raise
    else:
        logger.debug("Execution of command in process %s completed normally", proc.pid)

    return (retcode, stdout.decode("utf-8"), stderr.decode("utf-8"))