File: criu-ns

package info (click to toggle)
criu 4.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 11,500 kB
  • sloc: ansic: 139,280; python: 7,484; sh: 3,824; java: 2,799; makefile: 2,659; asm: 1,137; perl: 206; xml: 117; exp: 45
file content (342 lines) | stat: -rwxr-xr-x 10,263 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
#!/usr/bin/env python3
import ctypes
import ctypes.util
import errno
import sys
import os
import fcntl
import termios
import time

# <sched.h> constants for unshare
CLONE_NEWNS = 0x00020000
CLONE_NEWPID = 0x20000000

# <sys/mount.h> - constants for mount
MS_REC = 16384
MS_PRIVATE = 1 << 18
MS_SLAVE = 1 << 19

# Load libc bindings
_libc = ctypes.CDLL(ctypes.util.find_library("c"), use_errno=True)

try:
    _unshare = _libc.unshare
except AttributeError:
    raise OSError(errno.EINVAL, "unshare is not supported on this platform")
else:
    _unshare.argtypes = [ctypes.c_int]
    _unshare.restype = ctypes.c_int

try:
    _setns = _libc.setns
except AttributeError:
    raise OSError(errno.EINVAL, "setns is not supported on this platform")
else:
    _setns.argtypes = [ctypes.c_int, ctypes.c_int]
    _setns.restype = ctypes.c_int

try:
    _mount = _libc.mount
except AttributeError:
    raise OSError(errno.EINVAL, "mount is not supported on this platform")
else:
    _mount.argtypes = [
        ctypes.c_char_p,
        ctypes.c_char_p,
        ctypes.c_char_p,
        ctypes.c_ulong,
        ctypes.c_void_p
    ]
    _mount.restype = ctypes.c_int


def _mount_new_proc():
    """
    Mount new /proc filesystem.
    """
    if _mount(None, b"/", None, MS_SLAVE | MS_REC, None):
        _errno = ctypes.get_errno()
        raise OSError(_errno, errno.errorcode[_errno])
    if _mount(b'proc', b'/proc', b'proc', 0, None):
        _errno = ctypes.get_errno()
        raise OSError(_errno, errno.errorcode[_errno])


def _wait_for_process_status(criu_pid):
    """
    Wait for CRIU to exit and report the status back.
    """
    while True:
        try:
            (pid, status) = os.wait()
            if pid == criu_pid:
                # The following code block is based on
                # os.waitstatus_to_exitcode() introduced in Python 3.9
                # and we implement this for comparability with older
                # versions of Python.
                if os.WIFSIGNALED(status):
                    return os.WTERMSIG(status)
                elif os.WIFEXITED(status):
                    return os.WEXITSTATUS(status)
                elif os.WIFSTOPPED(status):
                    return os.WSTOPSIG(status)
                else:
                    raise Exception("CRIU was terminated by an "
                                    "unidentified reason")
        except OSError:
            return -251


def run_criu(args):
    """
    Spawn CRIU binary
    """
    print(sys.argv)

    if "--criu-binary" in args:
        try:
            opt_index = args.index("--criu-binary")
            path = args[opt_index + 1]
            del args[opt_index:opt_index + 2]
            args.insert(0, "criu")
            os.execv(path, args)
            raise OSError(errno.ENOENT, "No such command")
        except (ValueError, IndexError, FileNotFoundError):
            raise OSError(errno.ENOENT, "--criu-binary missing argument")
    else:
        args.insert(0, "criu")
        os.execvp("criu", args)
        raise OSError(errno.ENOENT, "No such command")


# pidns_holder creates a process that is reparented to the init.
#
# The init process can exit if it doesn't have any child processes and its
# pidns is destroyed in this case. CRIU dump is running in the target pid
# namespace and it kills dumped processes at the end. We need to create a
# holder process to be sure that the pid namespace will not be destroy before
# criu exits.
def pidns_holder():
    r, w = os.pipe()
    pid = os.fork()
    if pid == 0:
        pid = os.fork()
        if pid == 0:
            os.close(w)
            # The write end is owned by the parent process and it is closed by
            # kernel when the parent process exits.
            os.read(r, 1)
        sys.exit(0)
    os.waitpid(pid, 0)


def wrap_restore():
    restore_args = sys.argv[1:]
    if '--restore-sibling' in restore_args:
        raise OSError(errno.EINVAL, "--restore-sibling is not supported")

    # Unshare pid namespace
    if _unshare(CLONE_NEWPID) != 0:
        _errno = ctypes.get_errno()
        raise OSError(_errno, errno.errorcode[_errno])

    restore_detached = False
    if '-d' in restore_args:
        restore_detached = True
        restore_args.remove('-d')
    if '--restore-detached' in restore_args:
        restore_detached = True
        restore_args.remove('--restore-detached')

    restore_pidfile = None
    if '--pidfile' in restore_args:
        try:
            opt_index = restore_args.index('--pidfile')
            restore_pidfile = restore_args[opt_index + 1]
            del restore_args[opt_index:opt_index + 2]
        except (ValueError, IndexError, FileNotFoundError):
            raise OSError(errno.ENOENT, "--pidfile missing argument")

        if not restore_pidfile.startswith('/'):
            for base_dir_opt in ['--work-dir', '-W', '--images-dir', '-D']:
                if base_dir_opt in restore_args:
                    try:
                        opt_index = restore_args.index(base_dir_opt)
                        restore_pidfile = os.path.join(restore_args[opt_index + 1], restore_pidfile)
                        break
                    except (ValueError, IndexError, FileNotFoundError):
                        raise OSError(errno.ENOENT, base_dir_opt + " missing argument")

    criu_pid = os.fork()
    if criu_pid == 0:
        # Unshare mount namespace
        if _unshare(CLONE_NEWNS) != 0:
            _errno = ctypes.get_errno()
            raise OSError(_errno, errno.errorcode[_errno])

        os.setsid()
        # Set stdin tty to be a controlling tty of our new session, this is
        # required by --shell-job option, as for it CRIU would try to set a
        # process group of restored root task to be a foreground group on the
        # terminal.
        if '--shell-job' in restore_args or '-j' in restore_args:
            if os.isatty(sys.stdin.fileno()):
                fcntl.ioctl(sys.stdin.fileno(), termios.TIOCSCTTY, 1)
            else:
                raise OSError(errno.EINVAL, 'The stdin is not a tty for a --shell-job')

        _mount_new_proc()
        run_criu(restore_args)

    if restore_pidfile:
        restored_pid = None
        retry = 5

        while not restored_pid and retry:
            with open('/proc/%d/task/%d/children' % (criu_pid, criu_pid)) as f:
                line = f.readline().strip()
                if len(line):
                    restored_pid = line
                    break
            retry -= 1
            time.sleep(1)

        if restored_pid:
            with open(restore_pidfile, 'w+') as f:
                f.write(restored_pid)
        else:
            print("Warn: Search of restored pid for --pidfile option timeouted")

    if restore_detached:
        return 0

    return _wait_for_process_status(criu_pid)


def get_varg(args):
    for i in range(1, len(sys.argv)):
        if sys.argv[i] not in args:
            continue

        if i + 1 >= len(sys.argv):
            break

        return (sys.argv[i + 1], i + 1)

    return (None, None)


def _set_namespace(fd):
    """Join namespace referred to by fd"""
    if _setns(fd, 0) != 0:
        _errno = ctypes.get_errno()
        raise OSError(_errno, errno.errorcode[_errno])


def is_my_namespace(fd, ns):
    """Returns True if fd refers to current namespace"""
    return os.stat('/proc/self/ns/%s' % ns).st_ino == os.fstat(fd).st_ino


def set_pidns(tpid, pid_idx):
    """
    Join pid namespace. Note, that the given pid should
    be changed in -t option, as task lives in different
    pid namespace.
    """
    ns_fd = os.open('/proc/%s/ns/pid' % tpid, os.O_RDONLY)
    if not is_my_namespace(ns_fd, "pid"):
        for line in open('/proc/%s/status' % tpid):
            if not line.startswith('NSpid:'):
                continue
            ls = line.split()
            if ls[1] != tpid:
                os.close(ns_fd)
                raise OSError(errno.ESRCH, 'No such pid')

            print('Replace pid {} with {}'.format(tpid, ls[2]))
            sys.argv[pid_idx] = ls[2]
            break
        else:
            os.close(ns_fd)
            raise OSError(errno.ENOENT, 'Cannot find NSpid field in proc')
        _set_namespace(ns_fd)
    os.close(ns_fd)


def set_mntns(tpid):
    """
    Join mount namespace. Trick here too -- check / and .
    will be the same in target mntns.
    """
    ns_fd = os.open('/proc/%s/ns/mnt' % tpid, os.O_RDONLY)
    if not is_my_namespace(ns_fd, "mnt"):
        root_st = os.stat('/')
        cwd_st = os.stat('.')
        cwd_path = os.path.realpath('.')

        _set_namespace(ns_fd)

        os.chdir(cwd_path)
        root_nst = os.stat('/')
        cwd_nst = os.stat('.')

        def steq(st, nst):
            return (st.st_dev, st.st_ino) == (nst.st_dev, nst.st_ino)

        if not steq(root_st, root_nst):
            os.close(ns_fd)
            raise OSError(errno.EXDEV, 'Target ns / is not as current')
        if not steq(cwd_st, cwd_nst):
            os.close(ns_fd)
            raise OSError(errno.EXDEV, 'Target ns . is not as current')
    os.close(ns_fd)


def wrap_dump():
    (pid, pid_idx) = get_varg(('-t', '--tree'))
    if pid is None:
        raise OSError(errno.EINVAL, 'No --tree option given')

    set_pidns(pid, pid_idx)
    set_mntns(pid)

    pidns_holder()

    criu_pid = os.fork()
    if criu_pid == 0:
        run_criu(sys.argv[1:])
    return _wait_for_process_status(criu_pid)


def show_usage():
    print("""
Usage:
  {0} dump|pre-dump -t PID [<options>]
  {0} restore [<options>]
\nCommands:
  dump           checkpoint a process/tree identified by pid
  pre-dump       pre-dump task(s) minimizing their frozen time
  restore        restore a process/tree
  check          checks whether the kernel support is up-to-date
""".format(sys.argv[0]))


if __name__ == "__main__":
    if len(sys.argv) == 1:
        show_usage()
        exit(1)

    action = sys.argv[1]
    if action == 'restore':
        res = wrap_restore()
    elif action in ['dump', 'pre-dump']:
        res = wrap_dump()
    elif action == 'check':
        run_criu(sys.argv[1:])
    else:
        print('Unsupported action {} for criu-ns'.format(action))
        res = -1

    sys.exit(res)