File: socket.py

package info (click to toggle)
cloud-init 25.1.4-1%2Bdeb13u1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 12,100 kB
  • sloc: python: 134,496; sh: 3,879; makefile: 128; javascript: 30; xml: 22
file content (171 lines) | stat: -rw-r--r-- 6,068 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# This file is part of cloud-init. See LICENSE file for license information.
"""A module for common socket helpers."""
import logging
import os
import socket
import sys
from contextlib import suppress

from cloudinit import performance
from cloudinit.settings import DEFAULT_RUN_DIR

LOG = logging.getLogger(__name__)


def sd_notify(message: str):
    """Send a sd_notify message.

    :param message: sd-notify message (must be valid ascii)
    """
    socket_path = os.environ.get("NOTIFY_SOCKET", "")

    if not socket_path:
        # not running under systemd, no-op
        return

    elif socket_path[0] == "@":
        # abstract
        socket_path.replace("@", "\0", 1)

    # unix domain
    elif socket_path[0] != "/":
        raise OSError("Unsupported socket type")

    with socket.socket(
        socket.AF_UNIX, socket.SOCK_DGRAM | socket.SOCK_CLOEXEC
    ) as sock:
        LOG.info("Sending sd_notify(%s)", str(message))
        sock.connect(socket_path)
        sock.sendall(message.encode("ascii"))


class SocketSync:
    """A two way synchronization protocol over Unix domain sockets."""

    def __init__(self, *names: str):
        """Initialize a synchronization context.

        1) Ensure that the socket directory exists.
        2) Bind a socket for each stage.

        Binding the sockets on initialization allows receipt of stage
        "start" notifications prior to the cloud-init stage being ready to
        start.

        :param names: stage names, used as a unique identifiers
        """
        self.stage = ""
        self.remote = ""
        self.first_exception = ""
        self.systemd_exit_code = 0
        self.experienced_any_error = False
        self.sockets = {
            name: socket.socket(
                socket.AF_UNIX, socket.SOCK_DGRAM | socket.SOCK_CLOEXEC
            )
            for name in names
        }
        # ensure the directory exists
        os.makedirs(f"{DEFAULT_RUN_DIR}/share", mode=0o700, exist_ok=True)
        # removing stale sockets and bind
        for name, sock in self.sockets.items():
            socket_path = f"{DEFAULT_RUN_DIR}/share/{name}.sock"
            with suppress(FileNotFoundError):
                os.remove(socket_path)
            sock.bind(socket_path)

    def __call__(self, stage: str):
        """Set the stage before entering context.

        This enables the context manager to be initialized separately from
        each stage synchronization.

        :param stage: the name of a stage to synchronize

        Example:
            sync = SocketSync("stage 1", "stage 2"):
            with sync("stage 1"):
                pass
            with sync("stage 2"):
                pass
        """
        if stage not in self.sockets:
            raise ValueError(f"Invalid stage name: {stage}")
        self.stage = stage
        return self

    def __enter__(self):
        """Wait until a message has been received on this stage's socket.

        Once the message has been received, enter the context.
        """
        if os.isatty(sys.stdin.fileno()):
            LOG.info(
                "Stdin is a tty, so skipping stage synchronization protocol"
            )
            return
        self.systemd_exit_code = 0
        sd_notify(
            "STATUS=Waiting on external services to "
            f"complete before starting the {self.stage} stage."
        )
        # block until init system sends us data
        # the first value returned contains a message from the init system
        #     (should be "start")
        # the second value contains the path to a unix socket on which to
        #     reply, which is expected to be /path/to/{self.stage}-return.sock
        sock = self.sockets[self.stage]
        with performance.Timed(f"Waiting to start stage {self.stage}"):
            chunk, self.remote = sock.recvfrom(5)

        if b"start" != chunk:
            # The protocol expects to receive a command "start"
            self.__exit__(None, None, None)
            raise ValueError(f"Received invalid message: [{str(chunk)}]")
        elif f"{DEFAULT_RUN_DIR}/share/{self.stage}-return.sock" != str(
            self.remote
        ):
            # assert that the return path is in a directory with appropriate
            # permissions
            self.__exit__(None, None, None)
            raise ValueError(f"Unexpected path to unix socket: {self.remote}")

        sd_notify(f"STATUS=Running ({self.stage} stage)")
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """Notify the socket that this stage is complete."""
        message = f"Completed socket interaction for boot stage {self.stage}"
        if exc_type:
            # handle exception thrown in context
            self.systemd_exit_code = 1
            self.experienced_any_error = True
            status = f"{repr(exc_val)} in {exc_tb.tb_frame}"
            message = (
                'fatal error, run "systemctl status cloud-init-main.service" '
                'and "cloud-init status --long" for more details'
            )
            if not self.first_exception:
                self.first_exception = status
            LOG.fatal(status)
            sd_notify(f"STATUS={status}")

        self.experienced_any_error = self.experienced_any_error or bool(
            self.systemd_exit_code
        )
        sock = self.sockets[self.stage]
        sock.connect(self.remote)

        # the returned message will be executed in a subshell
        # hardcode this message rather than sending a more informative message
        # to avoid having to sanitize inputs (to prevent escaping the shell)
        sock.sendall(
            f"echo '{message}'; exit {self.systemd_exit_code};".encode()
        )
        sock.close()

        # suppress exception - the exception was logged and the init system
        # notified of stage completion (and the exception received as a status
        # message). Raising an exception would block the rest of boot, so carry
        # on in a degraded state.
        return True