File: shepherd.scm

package info (click to toggle)
shepherd 1.0.9-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,828 kB
  • sloc: lisp: 8,779; sh: 3,586; makefile: 289; ansic: 50
file content (775 lines) | stat: -rw-r--r-- 33,105 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
;; shepherd.scm -- The daemon shepherd.
;; Copyright (C) 2013-2014, 2016, 2018-2020, 2022-2025 Ludovic Courtès <ludo@gnu.org>
;; Copyright (C) 2002, 2003 Wolfgang Jährling <wolfgang@pro-linux.de>
;; Copyright (C) 2018 Carlo Zancanaro <carlo@zancanaro.id.au>
;; Copyright (C) 2018 Danny Milosavljevic <dannym@scratchpost.org>
;;
;; This file is part of the GNU Shepherd.
;;
;; The GNU Shepherd is free software; you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 3 of the License, or (at
;; your option) any later version.
;;
;; The GNU Shepherd is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with the GNU Shepherd.  If not, see <https://www.gnu.org/licenses/>.

(define-module (shepherd)
  #:use-module ((fibers)
                #:hide (sleep))                   ;avoid Guile warning
  #:use-module (ice-9 match)
  #:use-module (ice-9 format)
  #:use-module (ice-9 rdelim)   ;; Line-based I/O.
  #:use-module ((ice-9 textual-ports) #:select (put-char))
  #:use-module ((ice-9 threads) #:select (all-threads))
  #:use-module (srfi srfi-1)    ;; List library.
  #:use-module (srfi srfi-26)
  #:use-module (srfi srfi-34)
  #:use-module (srfi srfi-35)
  #:use-module (shepherd config)
  #:use-module (shepherd support)
  #:use-module (shepherd service)
  #:use-module (shepherd system)
  #:use-module (shepherd args)
  #:use-module (shepherd comm)
  #:autoload   (shepherd logger) (spawn-service-file-logger
                                  spawn-service-system-logger)
  #:autoload   (ice-9 binary-ports) (put-u8 get-u8)
  #:autoload   (fibers operations) (choice-operation
                                    perform-operation
                                    wrap-operation)
  #:autoload   (fibers io-wakeup) (wait-until-port-readable-operation)
  #:autoload   (fibers timers) (sleep-operation)
  #:export (main))


(define sleep (@ (fibers) sleep))

(define (call-with-server-socket file-name proc)
  "Call PROC, passing it a listening socket at FILE-NAME and deleting the
socket file at FILE-NAME upon exit of PROC.  Return the values of PROC."
  (let ((sock (catch 'system-error
                (lambda ()
                  (or (open-server-socket file-name
                                          #:false-if-in-use?
                                          (not (= 1 (getpid))))
                      (begin
                        ;; Refuse to start when another shepherd is already
                        ;; listening on FILE-NAME.
                        (report-error (l10n "shepherd instance already \
listening on '~a'")
                                      file-name)
                        (exit 1))))
                (lambda args
                  (match args
                    ((key proc . _)
                     (report-error (l10n "while opening socket '~a': ~a: ~a")
                                   file-name proc
                                   (strerror (system-error-errno args)))
                     (exit 1)))))))
    (catch #t
      (lambda ()
        (proc sock))
      (lambda args
        (close sock)
        (apply throw args)))))

(define (maybe-signal-port signals)
  "Return a signal port for SIGNALS, using 'signalfd' on GNU/Linux, or #f if
that is not supported."
  ;; Honor this environment variable for testing purposes: it lets us emulate
  ;; the behavior on non-Linux systems.
  (match (getenv "SHEPHERD_DISABLE_SIGNALFD")
    ((or #f "0" "no")
     (catch 'system-error
       (lambda ()
         (let ((port (signalfd -1 signals (logior SFD_CLOEXEC SFD_NONBLOCK))))
           ;; As per the signalfd(2) man page, block SIGNALS.  The tricky bit is
           ;; that SIGNALS must be blocked for all the threads; new threads will
           ;; inherit the signal mask, but we must ensure that neither Guile's
           ;; signal delivery thread nor its finalization thread are already
           ;; running, because if they do, they are not blocking SIGNALS.  The
           ;; signal delivery thread is started on the first call to 'sigaction'
           ;; so we arrange to not call 'sigaction' beforehand; as for the
           ;; finalization thread, use 'without-automatic-finalization' to
           ;; temporarily stop it.
           (without-automatic-finalization
            (let ((count (length (all-threads))))
              (if (= 1 count)
                  (begin
                    (block-signals signals)
                    port)
                  (begin
                    (local-output (l10n "warning: \
already ~a threads running, disabling 'signalfd' support")
                                  count)
                    (close-port port)
                    #f))))))
       (lambda args
         (if (= ENOSYS (system-error-errno args))
             (begin
               (local-output
                (l10n "System lacks support for 'signalfd'; \
using fallback mechanism."))
               #f)
             (apply throw args)))))
    (_
     (local-output (l10n "Support for 'signalfd' is disabled."))
     #f)))

(define (handle-SIGINT)
  "Handle SIGINT by stopping the Shepherd, which means rebooting if we're PID 1."
  ;; Since 'stop' is synchronous and may block until SIGCHLD has been received
  ;; for the process it's waiting for, call it in a separate fiber so that
  ;; signals are still being processed in the meantime.
  (spawn-fiber
   (lambda ()
     (catch 'quit
       (lambda ()
         (stop-service root-service))
       quit-exception-handler))))

(define (signal-handler signal)
  "Return the signal handler for SIGNAL."
  (cond ((= signal SIGCHLD)
         (lambda _ (handle-SIGCHLD)))
        ((= signal SIGINT)
         (lambda _ (handle-SIGINT)))
        ((memv signal (list SIGTERM SIGHUP))
         (lambda _ (handle-SIGINT)))
        (else
         (const #f))))

(define (handle-signal-port port)
  "Read from PORT, a signalfd port, and handle the signal accordingly."
  (let ((signal (consume-signalfd-siginfo port)))
    ((signal-handler signal))))


(define (configuration-file-loader file)
  "Return a thunk that loads @var{file}, the user's configuration file."
  (define (failure)
    (report-error
     (l10n "~s: exception thrown while loading configuration file~%")
     file)
    #f)

  (define (handle-key-and-args-exception key args)
    (local-output
     (l10n "While loading configuration file '~a': ~s")
     file
     (string-trim-right
      (call-with-output-string
        (lambda (port)
          (print-exception port #f key args))))))

  (lambda ()
    (guard (c ((action-runtime-error? c)
               (local-output (l10n "action '~a' on service '~a' failed: ~s")
                             (action-runtime-error-action c)
                             (service-canonical-name
                              (action-runtime-error-service c))
                             (cons (action-runtime-error-key c)
                                   (action-runtime-error-arguments c)))
               (failure))
              ((exception-with-kind-and-args? c)
               (handle-key-and-args-exception
                (exception-kind c) (exception-args c))
               (failure))
              ((message-condition? c)
               (local-output
                (l10n "Error while loading configuration file '~a': ~a")
                file (condition-message c))
               (failure))
              (else
               (local-output (l10n "Uncaught exception while loading \
configuration file '~a': ~s")
                             file c)
               (failure)))
      (load-in-user-module file)
      (local-output (l10n "Configuration successfully loaded from '~a'.")
                    file))))

(define (get-u8/timeout port timeout)
  "Read a byte from @var{port}; return that byte, or the EOF object, or
@code{'timeout} if @var{timeout} has expired and nothing was read."
  (perform-operation
   (choice-operation (wrap-operation (wait-until-port-readable-operation port)
                                     (lambda ()
                                       (get-u8 port)))
                     (wrap-operation (sleep-operation timeout)
                                     (const 'timeout)))))

(define (socket-monitor socket-file on-deletion)
  "Return a thunk that monitors @var{socket-file} and calls @var{on-deletion},
a thunk, if and when it is deleted.  Exit if the @var{on-deletion} call throws
to 'quit."
  (lambda ()
    (catch 'quit
      (lambda ()
        (let loop ()
          (wait-for-file-deletion socket-file)
          (on-deletion)
          (loop)))
      quit-exception-handler)))

(define* (run-daemon #:key (config-file (default-config-file))
                     socket-file socket pid-file signal-port poll-services?)
  (define (signal-thunk signal-port)
    ;; Thunk that waits for signals (particularly SIGCHLD) and handles them.
    (if signal-port
        (lambda ()
          (let loop ()
            (handle-signal-port signal-port)
            (loop)))
        ;; When not using signalfd(2), register a signal handler.  The handler
        ;; cannot safely send a message to, say, the process monitor, because
        ;; handlers run as "asyncs", which may be called anytime, including
        ;; possibly when the message recipient's fiber was active, leading to
        ;; a deadlock.  To address that, the handler writes to a "self pipe";
        ;; a fiber reads from the pipe and invokes the actual handler from
        ;; there.
        (match (pipe (logior O_NONBLOCK O_CLOEXEC))
          ((input . output)
           (setvbuf input 'none)
           (setvbuf output 'none)
           (let ((handler (lambda (signal)
                            (put-u8 output signal))))
             (for-each (lambda (signal)
                         (sigaction signal handler SA_NOCLDSTOP))
                       %precious-signals)
             (lambda ()
               ;; There's a time window before blocking on 'epoll_wait' during
               ;; which a handler async can be queued but not executed.  Work
               ;; around it by exiting Fibers' 'epoll_wait' call periodically.
               ;;
               ;; TODO: Avoid this race condition by having Fibers use
               ;; 'epoll_waitp' or 'pselect' and run asyncs before blocking.
               (let loop ()
                 (match (get-u8/timeout input (if poll-services? 0.5 30))
                   ('timeout
                    (when poll-services?
                      (check-for-dead-services)))
                   (signal
                    ((signal-handler signal))))
                 (loop))))))))

  (define (serve socket)
    ;; Return a thunk that processes incoming connections on SOCKET.
    (let next-client ()
      (match (accept socket (logior SOCK_NONBLOCK SOCK_CLOEXEC))
        ((client . address)
         (setvbuf client 'block 1024)
         (set-port-encoding! client "UTF-8")
         (set-port-conversion-strategy! client 'error)
         (spawn-fiber
          (lambda ()
            (process-connection client)))
         (next-client))
        (_ #f))))

  (define on-socket-deletion
    ;; Thunk called upon deletion of SOCKET-FILE.
    (if (= 1 (getpid))
        (lambda ()
          ;; For PID 1, try hard to recreate the socket.
          (local-output (l10n "Socket '~a' deleted; spawning new server.")
                        socket-file)
          (close-port socket)
          (catch 'system-error
            (lambda ()
              (call-with-server-socket
               socket-file
               (lambda (socket)
                 (spawn-fiber (lambda ()
                                (serve socket))))))
            (lambda args
              (local-output (l10n "Failed to bind '~a': ~a.")
                            socket-file (strerror (system-error-errno args)))
              (stop-service root-service))))
        (lambda ()
          (local-output (l10n "Socket '~a' deleted; stopping.") socket-file)
          (close-port socket)
          (stop-service root-service))))

  ;; We might have file descriptors inherited from our parent, as well as file
  ;; descriptors wrongfully opened by Guile or Fibers (see
  ;; <https://bugs.gnu.org/57567> and
  ;; <https://codeberg.org/guile/fibers/commit/1f834cb81126dea2fd47d3d7ebb2d21f798a3c8b>);
  ;; mark them all as FD_CLOEXEC so child processes do not inherit them.
  (mark-as-close-on-exec)

  (when signal-port
    ;; When the 'daemonize' action is invoked, open a new signal port in the
    ;; child process and spawn a new fiber reading it.  This required because
    ;; after fork(2), epoll_wait(2), which is used by Fibers, does not signal
    ;; that the signal FD is ready for reading--see the signalfd(2) man page.
    (add-hook! %post-daemonize-hook
               (lambda ()
                 (local-output (l10n "Restarting signal handler."))
                 (close-port signal-port)
                 (spawn-fiber
                  (essential-task-thunk
                   'signal-handler
                   (signal-thunk (maybe-signal-port %precious-signals)))))))

  ;; Spawn a signal handling fiber.
  (spawn-fiber
   (essential-task-thunk 'signal-handler
                         (signal-thunk signal-port)))

  ;; Load CONFIG-FILE in another fiber.  If loading fails, report it but keep
  ;; going: the user can use 'herd load root' with a new config file if
  ;; needed.
  (spawn-fiber (configuration-file-loader config-file))

  ;; Ignore SIGPIPE so that we don't die if a client closes the connection
  ;; prematurely.
  (sigaction SIGPIPE SIG_IGN)

  ;; Possibly write out our PID, which means we're ready to accept
  ;; connections.
  (match pid-file
    ((? string? file)
     (with-atomic-file-output pid-file
       (cute display (getpid) <>)))
    (#t (display (getpid)))
    (_  #t))

  (check-argument-types (string? socket-file)
                        (port? socket))
  (spawn-fiber (socket-monitor socket-file on-socket-deletion))

  (serve socket))

;; In Guile 3.0.x, 'call-with-input-file' & co. do not open their files as
;; O_CLOEXEC.  The two procedures below address that.

(define* (call-with-input-file/close-on-exec file proc
                                             #:key
                                             guess-encoding
                                             encoding binary)
  "Like @code{call-with-input-file}, but always open files as close-on-exec."
  ;; Note: 'open-file' supports the "e" flag for O_CLOEXEC, but only since
  ;; 3.0.9, hence the use of 'open'.
  (call-with-port (open file (logior O_RDONLY O_CLOEXEC))
    (lambda (port)
      (cond (binary (set-port-encoding! port #f))
            (encoding (set-port-encoding! port encoding))
            (guess-encoding (set-port-encoding! port (file-encoding port))))
      (proc port))))

(define* (call-with-output-file/close-on-exec file proc
                                              #:key encoding binary)
  "Like @code{call-with-output-file}, but always open files as close-on-exec."
  (call-with-port (open file (logior O_WRONLY O_CREAT O_CLOEXEC))
    (lambda (port)
      (cond (binary (set-port-encoding! port #f))
            (encoding (set-port-encoding! port encoding)))
      (proc port))))

(define-syntax replace-core-bindings!
  (syntax-rules (<>)
    "Replace the given core bindings in the current process, restoring them upon
fork in the child process."
    ((_ () <> ((binding value) ...))
     (let ((real-primitive-fork primitive-fork))
       (set! primitive-fork
             (lambda* (#:key preserve-replacement-bindings?)
               ;; Like 'primitive-fork', but restore the original core Guile
               ;; bindings in the child process, unless
               ;; PRESERVE-REPLACEMENT-BINDINGS? is true.
               (let ((result (real-primitive-fork)))
                 (when (and (zero? result)
                            (not preserve-replacement-bindings?))
                   (set! binding value)
                   ...
                   (set! primitive-fork real-primitive-fork))
                 result)))))
    ((_ ((binding value) rest ...) <> (saved-bindings ...))
     (let ((real binding))
       (set! binding value)
       (replace-core-bindings! (rest ...) <>
                               ((binding real) saved-bindings ...))))
    ((_ (binding value) ...)
     (replace-core-bindings! ((binding value) ...) <> ()))))

(define (fibers-version)
  "Return the version of Fibers being used."
  ;; The '%fibers-version' variable was introduced in 1.4.0, hence this check.
  (let ((fibers (resolve-interface '(fibers))))
    (if (module-defined? fibers '%fibers-version)
        (module-ref fibers '%fibers-version)
        "< 1.4")))


;; Main program.
(define (main . args)
  (define poll-services?
    ;; Do we need polling to find out whether services died?
    (and (not (= 1 (getpid)))                     ;if we're pid 1, we don't
         (catch 'system-error
           (lambda ()
             ;; Register for orphaned processes to be reparented onto us when
             ;; their original parent dies. This lets us handle SIGCHLD from
             ;; daemon processes that would otherwise have been reparented
             ;; under pid 1. Obviously this is unnecessary when we are pid 1.
             (prctl PR_SET_CHILD_SUBREAPER 1)
             #f)                                  ;don't poll
           (lambda args
             ;; We fall back to polling for services on systems that don't
             ;; support prctl/PR_SET_CHILD_SUBREAPER.
             (let ((errno (system-error-errno args)))
               (or (= ENOSYS errno)        ;prctl unavailable
                   (= EINVAL errno)        ;PR_SET_CHILD_SUBREAPER unavailable
                   (apply throw args)))))))

  (define signal-port
    ;; Attempt to create a "signal port" via 'signalfd'.  This must be called
    ;; before the 'sigaction' procedure is called, because 'sigaction' spawns
    ;; the signal thread.
    (maybe-signal-port %precious-signals))

  (define log-input+output
    ;; Pipe used to send output to the logger of the 'root' service.
    (match (pipe (logior O_NONBLOCK O_CLOEXEC))
      ((input . output)
       (let ((init! (lambda (port)
                      (setvbuf port 'none)
                      (set-port-encoding! port "UTF-8")
                      (set-port-conversion-strategy! port 'substitute))))
         (init! input)
         (init! output)
         (cons input output)))))

  (initialize-cli)

  (let ((config-file #f)
        (syslog-requested? #f)
	(socket-file (current-socket-file))
        (pid-file    #f)
        (secure      #t)
        (silent?     #f)
        (logfile     #f))
    ;; Process command line arguments.
    (process-args (program-name) args
		  ""
		  (l10n "This is a service manager for Unix and GNU.")
		  not ;; Fail on unknown args.
		  (option
		    #:long-name "quiet"
		    #:takes-argument? #f
		    #:description (l10n "synonym for --silent")
		    #:action (lambda ()
                               (set! silent? #t)))
		  (option
		    #:long-name "silent" #:short-name #\S
		    #:takes-argument? #f
		    #:description (l10n "don't do output to stdout")
		    #:action (lambda ()
                               (set! silent? #t)))
		  (option
		    ;; It might actually be desirable to have an
		    ;; ``insecure'' setup in some circumstances, thus
		    ;; we provide it as an option.
		    #:long-name "insecure" #:short-name #\I
		    #:takes-argument? #f
		    #:description (l10n "don't ensure that the setup is secure")
		    #:action (lambda ()
                               (set! secure #f)))
		  (option
		    #:long-name "logfile" #:short-name #\l
		    #:takes-argument? #t #:argument-is-optional? #f
                    #:argument-name (l10n "FILE")
		    #:description (l10n  "log actions in FILE")
		    #:action (lambda (file)
			       (set! logfile file)))
		  (option
		    #:long-name "pid"
		    #:takes-argument? #t #:argument-is-optional? #t
                    #:argument-name (l10n "FILE")
		    #:description (l10n "when ready, write PID to FILE or stdout")
		    #:action (lambda (file)
			       (set! pid-file (or file #t))))
		  (option
		    #:long-name "config" #:short-name #\c
		    #:takes-argument? #t #:argument-is-optional? #f
                    #:argument-name (l10n "FILE")
		    #:description (l10n "read configuration from FILE")
		    #:action (lambda (file)
			       (set! config-file file)))
		  (option
		    #:long-name "socket" #:short-name #\s
		    #:takes-argument? #t #:argument-is-optional? #f
                    #:argument-name (l10n "FILE")
		    #:description
		    (l10n "get commands from socket FILE")
		    #:action (lambda (file)
			       (set! socket-file file)))
                  (option
                   #:long-name "syslog"
                   #:takes-argument? #t #:argument-is-optional? #t
                   #:argument-name (l10n "FILE")
                   #:description
                   (l10n "log to the system log (syslog) at FILE or /dev/log")
                   #:action (lambda (file)
                              (when file
                                (system-log-file file))
                              (set! syslog-requested? #t))))
    (assert (string? socket-file))

    ;; We do this early so that we can abort early if necessary.
    (catch 'system-error
      (lambda ()
        (verify-dir (dirname socket-file) #:secure? secure))
      (lambda args
        (report-error (l10n "cannot access directory of socket '~a': ~a")
                      socket-file (strerror (system-error-errno args)))
        (exit 1)))

    (define syslog?
      ;; Is shepherd logging to /dev/log?
      (or syslog-requested?
          (and (not logfile) (= 1 (getpid)))))

    ;; By default, enable detailed deprecation warnings.
    (unless (getenv "GUILE_WARN_DEPRECATED")
      (debug-enable 'warn-deprecated))

    (call-with-server-socket
     socket-file
     (lambda (socket)
       ;; Enable logging as first action.
       (parameterize ((log-output-port (cdr log-input+output))

                      ;; Send warnings such as deprecation warnings to the log.
                      (current-warning-port (cdr log-input+output))

                      (%current-logfile-date-format
                       (if syslog?
                           ""                     ;for the "built-in" logger
                           default-logfile-date-format))
                      (%current-service-output-port
                       ;; Send output to log and clients.
                       (make-shepherd-output-port
                        (if (or silent? syslog?)
                            ;; By default we'd write both to /dev/log and to
                            ;; stdout.  Redirect stdout to the bitbucket so we
                            ;; don't log twice.
                            (%make-void-port "w")
                            (current-output-port))))

                      ;; In Guile 3.0.10, calling 'environ' from the top-level
                      ;; triggers a warning so do it from here.
                      (default-environment-variables (environ)))

         (parameterize ((current-output-port (%current-service-output-port)))
           (set-port-encoding! (log-output-port) "UTF-8")
           (set-port-encoding! (%current-service-output-port) "UTF-8")

           ;; Log provenance info.
           (format #t "~a ~a (Guile ~a, Fibers ~a, ~a)~%"
                   package-name Version
                   (version) (fibers-version)
                   %host-type)

           (when (= 1 (getpid))
             ;; When running as PID 1, disable hard reboots upon ctrl-alt-del.
             ;; Instead, the kernel will send us SIGINT so that we can gracefully
             ;; shut down.  See ctrlaltdel(8) and kernel/reboot.c.
             (catch 'system-error
               (lambda ()
                 (disable-reboot-on-ctrl-alt-del))
               (lambda args
                 (let ((err (system-error-errno args)))
                   ;; When in a separate PID namespace, we get EINVAL (see
                   ;; 'reboot_pid_ns' in kernel/pid_namespace.c.)  We get EPERM in
                   ;; a user namespace that lacks CAP_SYS_BOOT.
                   ;; ENOSYS is returned in runc environments due to seccomp
                   ;; defaults: <https://github.com/opencontainers/runc/pull/2750>.
                   (unless (member err (list EINVAL EPERM ENOSYS))
                     (apply throw args)))))

             ;; Load the SIGSEGV/SIGABRT handler.  This is what allows PID 1 to
             ;; dump core on "/", should something go wrong.
             (false-if-exception
              (dynamic-link (string-append %pkglibdir "/crash-handler"))))

           ;; Run Fibers in such a way that it does not create any POSIX thread,
           ;; because POSIX threads and 'fork' cannot be used together.
           (run-fibers
            (lambda ()
              (with-process-monitor
                (with-service-registry

                  ;; Register and start the 'root' service.
                  (register-services (list root-service))
                  (start-service root-service)

                  (if syslog?
                      (spawn-service-system-logger (car log-input+output)
                                                   #:service root-service)
                      (spawn-service-file-logger (or logfile
                                                     (if (= 1 (getpid))
                                                         (system-default-log-file)
                                                         (user-default-log-file)))
                                                 (car log-input+output)
                                                 #:service root-service))

                  ;; Replace the default 'system*' binding with one that
                  ;; cooperates instead of blocking on 'waitpid'.  Replace
                  ;; 'primitive-load' (in C as of 3.0.9) with one that does
                  ;; not introduce a continuation barrier.  Replace 'sleep' to
                  ;; avoid blocking in user code such as 'start' methods.
                  (replace-core-bindings!
                   (newline (lambda* (#:optional (port (current-output-port)))
                              ;; As of Guile 3.0.10, 'newline' is written in C
                              ;; and thus a continuation barrier.  Replace it.
                              (put-char port #\newline)))
                   (system* (lambda command
                              (spawn-command command #:directory (getcwd))))
                   (system spawn-shell-command)
                   (primitive-load primitive-load*)
                   (call-with-input-file call-with-input-file/close-on-exec)
                   (call-with-output-file call-with-output-file/close-on-exec)
                   ((@ (guile) sleep) (@ (fibers) sleep)))

                  (run-daemon #:socket-file socket-file
                              #:socket socket
                              #:config-file (or config-file (default-config-file))
                              #:pid-file pid-file
                              #:signal-port signal-port
                              #:poll-services? poll-services?))))
            #:parallelism 1                       ;don't create POSIX threads
            #:hz 0)))))))       ;disable preemption, which would require POSIX threads

(define* (quit-exception-handler key #:optional value)
  "Handle the 'quit' exception, rebooting if we're running as root."
  ;; Note: The 'quit' exception does not necessarily have an associated value:
  ;; compare (exit 1) with (exit).

  ;; Most likely we're receiving 'quit' from the 'stop' method of
  ;; ROOT-SERVICE.  So, if we're running as 'root', just reboot.
  (if (and (zero? (getuid)) (= 1 (getpid)))
      (begin
        (local-output (l10n "Rebooting..."))
        (reboot))
      (begin
        (local-output (l10n "Exiting."))
        (primitive-exit 0))))              ;leave without going through Fibers

(define (call-with-command-message-port command proc)
  "Call @var{proc} passing it a procedure to retrieve the messages emitted
while evaluating @var{command}."
  (define message-port
    (with-fluids ((%default-port-encoding "UTF-8"))
      (open-output-string)))

  (define (get-messages)
    ;; Since the port returned by 'make-shepherd-output-port' is buffered,
    ;; flush before retrieve messages.
    (force-output)

    (let* ((str (get-output-string message-port))
           (lst (string-split str #\newline)))
      ;; 'string-tokenize' swallows empty lines, which is not great,
      ;; and 'string-split' doesn't distinguish between an empty line
      ;; and this empty string, which is not great either.  So we hack
      ;; our way the best we can.
      (cond ((string-null? str)
             '())
            ;; If STR ends in \n, drop the trailing empty string since
            ;; that would lead the client to print an extra newline.
            ((string-suffix? "\n" str)
             (drop-right lst 1))
            (else lst))))

  (parameterize ((%current-client-socket message-port))
    (proc get-messages)))

(define-syntax-rule (with-command-message-port command get-messages
                                               body ...)
  "Evaluate @var{command} and bind @var{get-messages} in the lexical extent of
@var{body} to a thunk to fetch messages emitted while evaluating
@var{command}."
  (call-with-command-message-port command
                                  (lambda (get-messages)
                                    body ...)))

(define (process-command command port)
  "Interpret COMMAND, a command sent by the user, represented as a
<shepherd-command> object.  Send the reply to PORT."
  (match command
    (($ <shepherd-command> version the-action service-symbol (args ...)
                           directory)             ;ignored

     ;; We have to catch `quit' so that we can send the terminator
     ;; line to herd before we actually quit.
     (catch 'quit
       (lambda ()
         (with-command-message-port command get-messages
           (guard (c ((service-error? c)
                      (write-reply (command-reply command #f
                                                  (condition->sexp c)
                                                  (get-messages))
                                   port)))

             (define service
               (or (lookup-service service-symbol)
                   (raise (condition
                           (&missing-service-error (name service-symbol))))))

             (define result
               (case the-action
                 ((start)
                  ;; Return #f or SERVICE: clients expect the service sexp.
                  (if (eq? 'running (service-status service))
                      (begin
                        (local-output (l10n "Service ~a is already running.")
		                      (service-canonical-name service))
                        service)
                      (and (apply start-service service args)
                           service)))
                 ((stop)
                  (if (service-stopped? service)
                      '()
                      (map service-canonical-name
                           (apply stop-service service args))))

                 ;; XXX: This used to return a list of action results, on the
                 ;; grounds that there could be several services called NAME.
                 ;; Clients like 'herd' expect a list so now we return a
                 ;; singleton.
                 (else (list (apply perform-service-action
                                    service the-action args)))))

             (write-reply (command-reply command result #f (get-messages))
                          port))))
       quit-exception-handler))
    (_
     (local-output (l10n "Invalid command.")))))

(define (process-connection sock)
  "Process client connection SOCK, reading and processing commands."
  (catch 'system-error
    (lambda ()
      (match (read-command sock)
        ((? shepherd-command? command)
         (process-command command sock))
        (#f                                    ;failed to read a valid command
         #f))

      ;; Currently we assume one command per connection.
      (false-if-exception (close sock)))
    (lambda args
      ;; Maybe we got EPIPE while writing to SOCK, or something like that.
      (false-if-exception (close sock)))))

;; Local Variables:
;; eval: (put 'with-command-message-port 'scheme-indent-function 2)
;; End: