1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273
|
/* Copyright © Triad National Security, LLC, and others. */
/* Function prefixes:
fuse_ libfuse; docs: https://libfuse.github.io/doxygen/globals.html
sqfs_ll_ SquashFUSE; no docs but: https://github.com/vasi/squashfuse
sq_ Charliecloud */
#define _GNU_SOURCE
// config.h and fuse.h are below for this file
#include <sys/prctl.h>
#include <sys/wait.h>
#include <unistd.h>
// SquashFUSE has a bug [1] where ll.h includes SquashFUSE's own config.h.
// This clashes with our own config.h, as well as the system headers because
// it defines _POSIX_C_SOURCE. By defining SQFS_CONFIG_H, SquashFUSE's
// config.h skips itself.
// [1]: https://github.com/vasi/squashfuse/issues/65
#define SQFS_CONFIG_H
// But then FUSE_USE_VERSION isn't defined, which makes other parts of ll.h
// puke. Looking at their code, it seems the only values used are 32 (for
// libfuse3) and 26 (for libfuse2), so we can just blindly define it.
#define FUSE_USE_VERSION 32
// SquashFUSE redefines __le16 unless HAVE_LINUX_TYPES_LE16 is defined. We are
// assuming it is defined in <linux/types.h> on your machine.
#define HAVE_LINUX_TYPES_LE16
// The forget operation in libfuse3 takes uint64_t as third parameter,
// while SquashFUSE defaults to unsigned long as used in libfuse2.
// This causes a mess on arches with different size of these types,
// so explicitly switch to the libfuse3 variant.
#define HAVE_FUSE_LL_FORGET_OP_64T
// Now we can include ll.h.
#include <squashfuse/ll.h>
#include "config.h" // here to avoid potential clash with SquashFUSE config.h
#include "all.h"
/** Types **/
/* A SquashFUSE mount. SquashFUSE allocates ll for us but not chan; use
pointers for both for consistency. */
struct squash {
char *mountpt; // path to mount point
sqfs_ll_chan *chan; // FUSE channel associated with SquashFUSE mount
sqfs_ll *ll; // SquashFUSE low-level data structure
};
/** Constants **/
/* This mapping tells libfuse what functions implement which FUSE operations.
It is passed to sqfs_ll_mount(). Why it is not internal to SquashFUSE I
have no idea. */
struct fuse_lowlevel_ops OPS = {
.getattr = &sqfs_ll_op_getattr,
.opendir = &sqfs_ll_op_opendir,
.releasedir = &sqfs_ll_op_releasedir,
.readdir = &sqfs_ll_op_readdir,
.lookup = &sqfs_ll_op_lookup,
.open = &sqfs_ll_op_open,
.create = &sqfs_ll_op_create,
.release = &sqfs_ll_op_release,
.read = &sqfs_ll_op_read,
.readlink = &sqfs_ll_op_readlink,
.listxattr = &sqfs_ll_op_listxattr,
.getxattr = &sqfs_ll_op_getxattr,
.forget = &sqfs_ll_op_forget,
.statfs = &stfs_ll_op_statfs
};
/** Global variables **/
/* SquashFUSE mount. Initialized in sq_mount() and then used in most of the
other functions in this file. It's a global because the signal handler
needs access to it. */
struct squash sq;
/* True if exit request signal handler received SIGCHLD. */
volatile bool sigchld_received;
/* True if any exit request signal has been received. */
volatile bool loop_terminating = false;
/** Function prototypes (private) **/
void sq_done_request(int signum);
int sq_loop();
void sq_mount(const char *img_path, char *mountpt);
/** Functions **/
/* Signal handler to end the FUSE loop. This simply requests FUSE to end its
loop, causing fuse_session_loop() to exit. */
void sq_done_request(int signum)
{
if (!loop_terminating) { // only act on first signal
loop_terminating = true;
sigchld_received = (signum == SIGCHLD);
fuse_session_exit(sq.chan->session);
}
}
/* Mount SquashFS archive c->img_path on directory c->newroot. If the latter
is NULL, then mkdir(2) the default mount point and assign its path to
c->newroot. After mounting, fork; the child returns immediately while the
parent runs the FUSE loop until the child exits and then exits itself,
with the same exit code as the child (unless something else went wrong). */
void sq_fork(struct container *c)
{
pid_t pid_child;
struct stat st;
// Default mount point?
if (c->newroot == NULL) {
char *subdir = asprintf_ch("/%s.ch/mnt", username);
c->newroot = cat("/var/tmp", subdir);
VERBOSE("using default mount point: %s", c->newroot);
mkdirs("/var/tmp", subdir, NULL, NULL);
}
// Verify mount point exists and is a directory. (SquashFS file path
// already checked in img_type_get().)
Zfe (stat(c->newroot, &st), "can't stat mount point: %s", c->newroot);
Tf_ (S_ISDIR(st.st_mode), "not a directory: %s", c->newroot);
// Mount SquashFS. Use PR_SET_NO_NEW_PRIVS to actively reject running
// fusermount3(1) setuid, even if it’s installed that way.
Zfe (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), "can't set no_new_privs");
sq_mount(c->img_ref, c->newroot);
// Now that the filesystem is mounted, we can fork without race condition.
// The child returns to caller and runs the user command. When that exits,
// the parent gets SIGCHLD.
pid_child = fork_ch();
if (pid_child > 0) // parent (child does nothing here)
exit(sq_loop());
}
/* Run the squash loop to completion and return the exit code of the user
command. Warning: This sets up but does not restore signal handlers. */
int sq_loop(void)
{
sigset_t mask;
struct sigaction fin, ign;
int looped, exit_code, child_status;
// Unblock signals we wish to handle because we can't predict or assume mask
// contents inherited from SLURM [0]. Use pthread_sigmask(3) instead of
// sigprocmask(2) because it handles both single and multi-thread calling
// process signal masks.
//
// [0]: https://www.gnu.org/software/libc/manual/html_node/Process-Signal-Mask.html
Z__ (sigemptyset(&mask));
Z__ (sigaddset(&mask, SIGCHLD)); // user command exits
Z__ (sigaddset(&mask, SIGHUP)); // terminal/session terminated
Z__ (sigaddset(&mask, SIGINT)); // Control-C
Z__ (sigaddset(&mask, SIGPIPE)); // broken pipe; we don't use pipes
Z__ (sigaddset(&mask, SIGTERM)); // somebody asked us to exit
Z__ (pthread_sigmask(SIG_UNBLOCK, &mask, NULL));
// Set up signal handlers. Avoid fuse_set_signal_handlers() because we need
// to catch a different set of signals, letting some be handled by the user
// command [1]. Use sigaction(2) instead of signal(2) because the latter's
// man page [2] says “avoid its use” and there are reports of bad
// interactions with libfuse [3].
//
// [1]: https://unix.stackexchange.com/questions/176235
// [2]: https://man7.org/linux/man-pages/man2/signal.2.html
// [3]: https://stackoverflow.com/a/8918597
fin.sa_handler = sq_done_request;
Z__ (sigemptyset(&fin.sa_mask)); // block no other signals during handling
fin.sa_flags = SA_NOCLDSTOP; // only SIGCHLD on child exit
ign.sa_handler = SIG_IGN;
Z__ (sigaction(SIGCHLD, &fin, NULL));
Z__ (sigaction(SIGHUP, &ign, NULL));
Z__ (sigaction(SIGINT, &ign, NULL));
Z__ (sigaction(SIGPIPE, &ign, NULL));
Z__ (sigaction(SIGTERM, &fin, NULL));
// Run the FUSE loop, which services FUSE requests until sq_done_request()
// is invoked by a signal and tells it to stop, or someone unmounts the
// filesystem externally with e.g. fusermount(1). Because we don't use
// fuse_set_signal_handlers(), the return value doesn't contain the signal
// number that ended the loop, contrary to the documentation.
//
// FIXME: this is single-threaded; see issue #1157.
looped = fuse_session_loop(sq.chan->session);
if (looped < 0) {
errno = -looped; // restore encoded errno so our logging finds it
Tfe (0, "FUSE session failed");
}
VERBOSE("FUSE loop terminated successfully");
// Clean up zombie child if exit signal was SIGCHLD.
if (!sigchld_received)
exit_code = EXIT_ERR_SQUASH;
else {
Tfe (wait(&child_status) >= 0, "can't wait for child");
if (WIFEXITED(child_status)) {
exit_code = WEXITSTATUS(child_status);
VERBOSE("child terminated normally with exit code %d", exit_code);
} else {
// We now know that the child did not exit normally; the two
// remaining options are (a) killed by signal and (b) stopped [1].
// Because we didn't call waitpid(2) with WUNTRACED, we don't get
// notified if the child is stopped [2], so it must have been
// signaled, and we need not call WIFSIGNALED().
//
// [1]: https://codereview.stackexchange.com/a/109349
// [2]: https://man7.org/linux/man-pages/man2/wait.2.html
exit_code = 128 + WTERMSIG(child_status);
WARNING("child terminated by signal %d", WTERMSIG(child_status));
}
}
// Clean up SquashFS mount. These functions have no error reporting.
VERBOSE("unmounting: %s", sq.mountpt);
sqfs_ll_destroy(sq.ll);
sqfs_ll_unmount(sq.chan, sq.mountpt);
VERBOSE("FUSE loop done");
return exit_code;
}
/* Mount the SquashFS img_path at mountpt. Exit on any errors. */
void sq_mount(const char *img_path, char *mountpt)
{
// SquashFUSE mount takes basically a command line rather than having a
// standard library API. It's unclear to me where this command line is
// documented, but the libfuse docs [1] suggest mount(8).
// [1]: https://libfuse.github.io/doxygen/fuse-3_810_83_2include_2fuse_8h.html#ad866b0fd4d81bdbf3e737f7273ba4520
char *mount_argv[] = {"WEIRDAL", "-d"};
int mount_argc = (verbose > 3) ? 2 : 1; // include -d if high verbosity
struct fuse_args mount_args = FUSE_ARGS_INIT(mount_argc, mount_argv);
sq.mountpt = mountpt;
sq.chan = malloc_ch(sizeof(sqfs_ll_chan), true);
sq.ll = sqfs_ll_open(img_path, 0);
Tf_ (sq.ll != NULL, "can't open SquashFS: %s; try ch-run -vv?", img_path);
// sqfs_ll_mount() is squirrely for a couple reasons:
//
// 1. Error reporting. We get back only SQFS_OK or SQFS_ERR, with no
// further detail. Looking at the source code [1], the latter says
// either fuse_session_new() or fuse_session_mount() failed, but we
// can't tell which, or get any further information about what went
// wrong. Hopefully fusermount3 also printed an error message.
//
// 2. Race condition. We have been seeing intermittent errors in the test
// suite about permission denied accessing the mount point (issue
// #1364). I *think* this is because a previous mount on the same
// location is not yet cleaned up. For this reason, we have a short
// retry loop.
//
// [1]: https://github.com/vasi/squashfuse/blob/74f4fe8/ll.c#L399
for (int i = 5; true; i--)
if (SQFS_OK == sqfs_ll_mount(sq.chan, sq.mountpt, &mount_args,
&OPS, sizeof(OPS), sq.ll)) {
break; // success
} else if (i <= 0) {
FATAL(1, "too many FUSE errors; giving up");
} else {
WARNING("FUSE error mounting SquashFS; will retry");
sleep(1);
}
}
|