File: force.py

package info (click to toggle)
charliecloud 0.43-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 3,084 kB
  • sloc: python: 6,021; sh: 4,284; ansic: 3,863; makefile: 598
file content (452 lines) | stat: -rw-r--r-- 18,131 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
import re

import charliecloud as ch
import filesystem as fs


## Globals ##

FAKEROOT_DEFAULT_CONFIGS = {

   # General notes:
   #
   # 1. Semantics of these configurations. (Character limits are to support
   #    tidy code and message formatting.)
   #
   #    a. This is a dictionary of configurations, which themselves are
   #       dictionaries.
   #
   #    b. Key is an arbitrary tag; user-visible. There’s no enforced
   #       character set but let’s stick with [a-z0-9_] for now and limit to
   #       at most 10 characters.
   #
   #    c. A configuration has the following keys.
   #
   #       name ... Human-readable name for the configuration. Max 46 chars.
   #
   #       match .. Tuple; first item is the name of a file and the second is
   #                a regular expression. If the regex matches any line in the
   #                file, that configuration is used for the image.
   #
   #       init ... List of tuples containing POSIX shell commands to perform
   #                fakeroot installation and any other initialization steps.
   #
   #                Item 1: Command to detect if the step is necessary. If the
   #                  command exits successfully, the step is already
   #                  complete; if unsuccessful, it is still needed. The sense
   #                  of the test is so something like "is command FOO
   #                  available?", which seems the most common command, does
   #                  not require negation.
   #
   #                  The test should be fairly permissive; e.g., if the image
   #                  already has a fakeroot implementation installed, but
   #                  it’s a different one than we would have chosen, the
   #                  command should succeed.
   #
   #                  IMPORTANT: This command must have no side effects,
   #                  because it is normally run in all matching images, even
   #                  if --force is not specified. Note that talking to the
   #                  internet is a side effect!
   #
   #                Item 2: Command to do the init step.
   #
   #                I.e., to perform each fakeroot initialization step,
   #                ch-image does roughly:
   #
   #                  if ( ! $CMD_1 ); then
   #                      $CMD_2
   #                  fi
   #
   #                For both commands, the output is visible to the user but
   #                is not analyzed.
   #
   #       cmds ... List of RUN command words that need fakeroot injection.
   #                Each item in the list is matched against each
   #                whitespace-separated word in the RUN instructions. For
   #                example, suppose that each is the list “dnf”, “rpm”, and
   #                “yum”; consider the following RUN instructions:
   #
   #                  RUN ['dnf', 'install', 'foo']
   #                  RUN dnf install foo
   #
   #                These are fairly standard forms. “dnf” matches both, the
   #                first on the first element in the list and the second
   #                after breaking the shell command on whitespace.
   #
   #                  RUN true&&dnf install foo
   #
   #                This third example does *not* match (false negative)
   #                because breaking on whitespace yields “true&&dnf”,
   #                “install”, and “foo”; none of these words are “dnf”.
   #
   #                  RUN echo dnf install foo
   #
   #                This final example *does* match (false positive) because
   #                the second word *is* “dnf”; the algorithm isn’t smart
   #                enough to realize that it’s an argument to “echo”.
   #
   #                The last two illustrate that the algorithm uses simple
   #                whitespace delimiters, not even a partial shell parser.
   #
   #       each ... List of words to prepend to RUN instructions that match
   #                cmd_each. For example, if each is ["fr", "-z"], then these
   #                instructions:
   #
   #                  RUN ['dnf', 'install', 'foo']
   #                  RUN dnf install foo
   #
   #                become:
   #
   #                   RUN ['fr', '-z', 'dnf', 'install', 'foo']
   #                   RUN ['fr', '-z', '/bin/sh', '-c', 'dnf install foo']
   #
   #                (Note that “/bin/sh -c” is how shell-form RUN instructions
   #                are executed regardless of --force.)
   #
   # 2. The first match wins. However, because dictionary ordering can’t be
   #    relied on yet, since it was introduced in Python 3.6 [1], matches
   #    should be disjoint.
   #
   #    [1]: https://docs.python.org/3/library/stdtypes.html#dict
   #
   # 3. A matching configuration is considered applicable if any of the
   #    fakeroot-able commands are present. We do nothing if the config isn’t
   #    applicable. We do not look for other matches.
   #
   # 4. There are three implementations of fakeroot that I could find:
   #    fakeroot, fakeroot-ng, and pseudo. As of 2020-09-02:
   #
   #    * fakeroot-ng and pseudo use a daemon process, while fakeroot does
   #      not. pseudo also uses a persistent database.
   #
   #    * fakeroot-ng does not support ARM; pseudo supports many architectures
   #      including ARM.
   #
   #    * “Old” fakeroot seems to have had version 1.24 on 2019-09-07 with
   #      the most recent commit 2020-08-12.
   #
   #    * fakeroot-ng is quite old: last upstream release was 0.18 in 2013,
   #      and its source code is on Sourceforge.
   #
   #    * pseudo is also a bit old: last upstream version was 1.9.0 on
   #      2018-01-20, and the last Git commit was 2019-08-02.
   #
   #    Generally, we select the first one that seems to work in the order
   #    fakeroot, pseudo, fakeroot-ng.
   #
   # 5. Why grep a specified file vs. simpler alternatives?
   #
   #    * Look at image name: Misses derived images, large number of names
   #      seems a maintenance headache, :latest changes.
   #
   #    * grep the same file for each distro: No standardized file for this.
   #      [FIXME: This may be wrong; see issue #1292.]
   #
   #    * Ask lsb_release(1): Not always installed, requires executing ch-run.

   # Fedora notes:
   #
   # 1. The minimum supported version was chosen somewhat arbitrarily based on
   #    versions available for testing, i.e., what was on Docker Hub.
   #
   # 2. The fakeroot package is in the base repository set so enabling EPEL is
   #    not required.
   #
   # 3. Must be before “rhel8” because that matches Fedora too.

   "fedora":
   { "name": "Fedora 24+",
     "match":  ("/etc/fedora-release", r"release (?!1?[0-9] |2[0-3] )"),
     "init": [ ("command -v fakeroot > /dev/null",
                "dnf install -y fakeroot") ],
     "cmds": ["dnf", "rpm", "yum"],
     "each": ["fakeroot"] },

   # RHEL (and rebuilds like CentOS, Alma, Springdale, Rocky) notes:
   #
   # 1. These seem to have only fakeroot, which is in EPEL, not the standard
   #    repos.
   #
   # 2. Unlike some derivatives, RHEL itself doesn’t have the epel-release rpm
   #    in the standard repos; install via rpm for both to be consistent.
   #
   # 3. Enabling EPEL can have undesirable side effects, e.g. different
   #    version of things in the base repo that breaks other things. Thus,
   #    when we are done with EPEL, we uninstall it. Existing EPEL
   #    installations are left alone. (Such breakage is an EPEL bug, but we do
   #    commonly encounter it.)
   #
   # 4. “yum repolist” has a lot of side effects, e.g. locking the RPM
   #    database and asking configured repos for something or other.

   "rhel7":
   { "name": "RHEL 7 and derivatives",
     "match": ("/etc/redhat-release", r"release 7\."),
     "init": [ ("command -v fakeroot > /dev/null",
                "set -e; "
                r"if ! grep -Eq '\[epel\]' /etc/yum.conf /etc/yum.repos.d/*; then "
                "yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm; "
                "yum install -y fakeroot; "
                "yum remove -y epel-release; "
                "else "
                "yum install -y fakeroot; "
                "fi; ") ],
     "cmds": ["dnf", "rpm", "yum"],
     "each": ["fakeroot"] },

   "rhel8":
   { "name": "RHEL 8+ and derivatives",
     "match":  ("/etc/redhat-release", r"release (?![0-7]\.)"),
     "init": [ ("command -v fakeroot > /dev/null",
                "set -e; "
                r"if ! grep -Eq '\[epel\]' /etc/yum.conf /etc/yum.repos.d/*; then "
                # Macro %rhel from *-release* RPM, e.g. redhat-release-server
                # or centos-linux-release; thus reliable.
                "dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-$(rpm -E %rhel).noarch.rpm; "
                "dnf install -y fakeroot; "
                "dnf remove -y epel-release; "
                "else "
                "dnf install -y fakeroot; "
                "fi; ") ],
     "cmds": ["dnf", "rpm", "yum"],
     "each": ["fakeroot"] },

   # Debian/Ubuntu notes:
   #
   # 1. In recent Debian-based distributions apt(8) runs as an unprivileged
   #    user by default. This makes *all* apt operations fail in an
   #    unprivileged container because it can’t drop privileges. There are
   #    multiple ways to turn the “sandbox” off. AFAICT, none are documented,
   #    but this one at least appears in Google searches a lot.
   #
   #    apt also doesn’t drop privileges if there is no user _apt; in my
   #    testing, sometimes this user is present and sometimes not, for reasons
   #    I don’t understand. If not present, you get this warning:
   #
   #      W: No sandbox user '_apt' on the system, can not drop privileges
   #
   #    Configuring apt not to use the sandbox seemed cleaner than deleting
   #    this user and eliminates the warning.
   #
   # 2. If we wanted to test if a fakeroot package was installed, we could say:
   #
   #      dpkg-query -Wf '${Package}\n' \
   #      | egrep '^(fakeroot|fakeroot-ng|pseudo)$'

   "debderiv":
   { "name": "Debian 9+, Ubuntu 14+, or other derivative",
     "match": ("/etc/os-release", r"Debian GNU/Linux (?![0-8] )|Ubuntu (?![0-9]\.|1[0-3]\.)|ID_LIKE=debian"),
     "init": [ ("apt-config dump | fgrep -q 'APT::Sandbox::User \"root\"'"
                " || ! fgrep -q _apt /etc/passwd",
                "echo 'APT::Sandbox::User \"root\";'"
                " > /etc/apt/apt.conf.d/no-sandbox"),
                ("command -v fakeroot > /dev/null",
                 # update b/c base image ships with no package indexes
                 "apt-get update && apt-get install -y fakeroot") ],
     "cmds": ["apt", "apt-get", "dpkg"],
     "each": ["fakeroot"] },

   "suse":
   { "name": "(Open)SUSE 42.2+",  # no fakeroot before this
     # I don’t know if there are OpenSUSE derivatives
     "match": ("/etc/os-release", r"ID_LIKE=.*suse"),
     "init": [ ("command -v fakeroot > /dev/null",
                # fakeroot seems to have a missing dependency, otherwise
                # failing with missing getopt in the fakeroot script.
                "zypper refresh; zypper install -y fakeroot /usr/bin/getopt") ],
     "cmds": ["zypper", "rpm"],
     "each": ["fakeroot"] },

   # pacman doesn’t seem to have proper dependencies like dpkg and rpm. It
   # happens that fakeroot can fail because the downloaded version is linked
   # against a newer glibc (at least) than is in the base image. We could also
   # update glibc (and its dependency util-linux), but that causes the tests
   # to fail with fchownat() errors. Another possibility is to add “-u” to
   # update all installed packages, but that may not be what a user wants.
   "arch":
   { "name": "Arch Linux",
   "match": ("/etc/os-release", r"ID=arch"),  # /etc/arch-release empty
   "init": [ ("command -v fakeroot > /dev/null",
              "pacman -Syq --noconfirm fakeroot") ],
   "cmds": ["pacman"],
   "each": ["fakeroot"] },

   "alpine":
   { "name": "Alpine, any version",
    "match": ("/etc/alpine-release", r"[0-9]\.[0-9]+\.[0-9]+"),
    "init": [ ("command -v fakeroot > /dev/null",
               "apk update; apk add fakeroot") ],
    "cmds": ["apk"],
    "each": ["fakeroot"] },
}

# Default value of --force-cmd.
#
# NOTE: apt(8) tells people not to use it in scripts, but they do it anyway.
FORCE_CMD_DEFAULT = { "apt":     ["-o", "APT::Sandbox::User=root"],
                      "apt-get": ["-o", "APT::Sandbox::User=root"] }


## Functions ###

def force_cmd_parse(text):
   # 1. Split on “,” preceded by even number of backslashes.
   #
   # FIXME: Said backslashes are removed in the split, so you can’t have a
   # component with trailing backslashes. That seems rare so I’m not fixing
   # for now.
   args = re.split(r"(?<!\\)(?:\\\\)*,", text)
   # 2. Reject list of length < 2.
   if (len(args) < 2):
      ch.FATAL("--force-cmd: need at least one ARG")
   # 3. Reject list with empty first item.
   if (args[0] == ""):
      ch.FATAL("--force-cmd: CMD can’t be empty")
   # 4. Replace “\x” for any char x ⇒ literal “x”.
   args = [re.sub(r"\\(.)", r"\1", a) for a in args]
   return (args[0], args[1:])

def new(image_path, force_mode, force_cmds):
   """Return a new forcer object appropriate for image at image_path in mode
      force_mode. If no such object can be found, exit with error."""
   if (force_mode == ch.Force_Mode.NONE):
      return Nope()
   elif (force_mode == ch.Force_Mode.FAKEROOT):
      return Fakeroot(image_path)
   elif (force_mode == ch.Force_Mode.SECCOMP):
      return Seccomp(force_cmds)
   else:
      assert False, "unreachable code reached"


## Classes ##

class Base:

   __slots__ = ("run_modified_ct",)  # number of RUN instructions modified

   def __init__(self):
      self.run_modified_ct = 0

   @property
   def ch_run_args(self):
      "Extra arguments for ch-run."
      return []

   def run_modified(self, args, env):
      """Modify the RUN arguments args as needed, and return the result, which
         is a new list even if unmodified. env is the environment for the RUN
         instruction. May have significant side effects, including running
         other commands in the container."""
      args_new = self.run_modified_(args, env)
      if (args_new != args):
         self.run_modified_ct += 1
         ch.INFO("--force: RUN: new command: %s" % args_new)
      return args_new

   def run_modified_(self, args, env):
      return args.copy()


class Fakeroot(Base):

   __slots__ = ("tag",
                "name",
                "init",
                "cmds",
                "each",
                "install_done",
                "image_path")

   def __init__(self, image_path):
      super().__init__()
      match = False
      for (tag, cfg) in FAKEROOT_DEFAULT_CONFIGS.items():
         ch.VERBOSE("workarounds: testing config: %s" % tag)
         file_path = fs.Path("%s/%s" % (image_path, cfg["match"][0]))
         if (file_path.is_file() and file_path.grep_p(cfg["match"][1])):
            match = True
            break
      if (not match):
         ch.FATAL("--force=fakeroot not available (no suitable config found)")
      self.image_path = image_path
      self.tag = tag
      for i in ("name", "init", "cmds", "each"):
         setattr(self, i, cfg[i])
      self.install_done = False
      ch.INFO("--force=fakeroot: will use: %s: %s" % (self.tag, self.name))

   def install(self, img_path, env):
      for (i, (test_cmd, init_cmd)) in enumerate(self.init, 1):
         ch.INFO("--force=fakeroot: init step %s: checking: $ %s"
                 % (i, test_cmd))
         args = ["/bin/sh", "-c", test_cmd]
         exit_code = ch.ch_run_modify(img_path, args, env, fail_ok=True)
         if (exit_code == 0):
            ch.INFO("--force=fakeroot: init step %d: exit %d, step not needed"
                    % (i, exit_code))
         else:
            ch.INFO("--force=fakeroot: init step %d: $ %s" % (i, init_cmd))
            args = ["/bin/sh", "-c", init_cmd]
            ch.ch_run_modify(img_path, args, env)

   def needs_inject(self, args):
      """Return True if the command in args seems to need fakeroot injection,
         False otherwise."""
      for word in self.cmds:
         for arg in args:
            if (word in arg.split()):  # arg words separate by whitespace
               return True
      return False

   def run_modified_(self, args, env):
      if (not self.needs_inject(args)):
         ch.VERBOSE("--force=fakeroot: RUN: doesn’t need injection")
         return args.copy()
      if (self.install_done):
         ch.VERBOSE("--force=fakeroot: already installed")
      else:
         self.install(self.image_path, env)
         self.install_done = True
      return self.each + args


class Nope(Base):
   pass


class Seccomp(Base):

   __slots__ = ("force_cmds",)

   def __init__(self, force_cmds):
      super().__init__()
      self.force_cmds = force_cmds

   @property
   def ch_run_args(self):
      return super().ch_run_args + ["--seccomp"]

   def run_modified_(self, args, env):
      args = args.copy()
      for (cmd, args_inject) in self.force_cmds.items():
         args_new = list()
         for word in args:
            if (word == cmd):
               # It’s a list-style RUN, e.g.:
               #
               #   RUN ["apt", "install", "-y", "foo"]
               args_new += [word] + args_inject
            else:
               # It’s a shell-style RUN, e.g.:
               #
               #   RUN apt install -y foo
               #   RUN echo foo&&apt install -y foo
               #   RUN ["/bin/sh", "-c", "apt install -y foo"]
               #
               # Note this is a no-op if the command doesn’t contain cmd.
               str_inject = ch.argv_to_string(args_inject)
               args_new.append(re.sub(r"\b(%s)(\s)" % cmd,
                                      r"\1 %s\2" % str_inject, word))
         args = args_new
      return args