summary refs log tree commit diff
path: root/gnu/build
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/build')
-rw-r--r--gnu/build/file-systems.scm118
-rw-r--r--gnu/build/image.scm31
-rw-r--r--gnu/build/linux-container.scm78
-rw-r--r--gnu/build/marionette.scm45
-rw-r--r--gnu/build/secret-service.scm90
-rw-r--r--gnu/build/shepherd.scm92
6 files changed, 322 insertions, 132 deletions
diff --git a/gnu/build/file-systems.scm b/gnu/build/file-systems.scm
index d95340df83..b06a4cc25c 100644
--- a/gnu/build/file-systems.scm
+++ b/gnu/build/file-systems.scm
@@ -1,5 +1,5 @@
 ;;; GNU Guix --- Functional package management for GNU
-;;; Copyright © 2014, 2015, 2016, 2017, 2018, 2020, 2021 Ludovic Courtès <ludo@gnu.org>
+;;; Copyright © 2014-2018, 2020-2022 Ludovic Courtès <ludo@gnu.org>
 ;;; Copyright © 2016, 2017 David Craven <david@craven.ch>
 ;;; Copyright © 2017 Mathieu Othacehe <m.othacehe@gmail.com>
 ;;; Copyright © 2019 Guillaume Le Vaillant <glv@posteo.net>
@@ -54,6 +54,7 @@
 
             bind-mount
 
+            system*/tty
             mount-flags->bit-mask
             check-file-system
             mount-file-system
@@ -67,6 +68,33 @@
 ;;;
 ;;; Code:
 
+(define (system*/console program . args)
+  "Run PROGRAM with ARGS in a tty on top of /dev/console.  The return value is
+as for 'system*'."
+  (match (primitive-fork)
+    (0
+     (dynamic-wind
+       (const #t)
+       (lambda ()
+         (login-tty (open-fdes "/dev/console" O_RDWR))
+         (apply execlp program program args))
+       (lambda ()
+         (primitive-_exit 127))))
+    (pid
+     (cdr (waitpid pid)))))
+
+(define (system*/tty program . args)
+  "Run PROGRAM with ARGS, creating a tty if its standard input isn't one.
+The return value is as for 'system*'.
+
+This is necessary for commands such as 'cryptsetup open' or 'fsck' that may
+need to interact with the user but might be invoked from shepherd, where
+standard input is /dev/null."
+  (apply (if (isatty? (current-input-port))
+             system*
+             system*/console)
+         program args))
+
 (define (bind-mount source target)
   "Bind-mount SOURCE at TARGET."
   (mount source target "" MS_BIND))
@@ -180,13 +208,13 @@ true, check the file system even if it's marked as clean.  If REPAIR is false,
 do not write to the file system to fix errors.  If it's #t, fix all
 errors.  Otherwise, fix only those considered safe to repair automatically."
   (match (status:exit-val
-          (apply system* `("e2fsck" "-v" "-C" "0"
-                           ,@(if force? '("-f") '())
-                           ,@(match repair
-                               (#f '("-n"))
-                               (#t '("-y"))
-                               (_  '("-p")))
-                           ,device)))
+          (apply system*/tty "e2fsck" "-v" "-C" "0"
+                 `(,@(if force? '("-f") '())
+                   ,@(match repair
+                       (#f '("-n"))
+                       (#t '("-y"))
+                       (_  '("-p")))
+                   ,device)))
     (0 'pass)
     (1 'errors-corrected)
     (2 'reboot-required)
@@ -312,14 +340,14 @@ errors. Otherwise, fix only those considered safe to repair automatically."
         (status
          ;; A number, or #f on abnormal termination (e.g., assertion failure).
          (status:exit-val
-          (apply system* `("bcachefs" "fsck" "-v"
-                           ,@(if force? '("-f") '())
-                           ,@(match repair
-                               (#f '("-n"))
-                               (#t '("-y"))
-                               (_  '("-p")))
-                           ;; Make each multi-device member a separate argument.
-                           ,@(string-split device #\:))))))
+          (apply system*/tty "bcachefs" "fsck" "-v"
+                 `(,@(if force? '("-f") '())
+                   ,@(match repair
+                       (#f '("-n"))
+                       (#t '("-y"))
+                       (_  '("-p")))
+                   ;; Make each multi-device member a separate argument.
+                   ,@(string-split device #\:))))))
     (match (and=> status (cut logand <> (lognot ignored-bits)))
       (0 'pass)
       (1 'errors-corrected)
@@ -364,17 +392,17 @@ false, do not write to DEVICE.  If it's #t, fix any errors found.  Otherwise,
 fix only those considered safe to repair automatically."
   (if force?
       (match (status:exit-val
-              (apply system* `("btrfs" "check" "--progress"
-                               ;; Btrfs's ‘--force’ is not relevant to us here.
-                               ,@(match repair
-                                   ;; Upstream considers ALL repairs dangerous
-                                   ;; and will warn the user at run time.
-                                   (#t '("--repair"))
-                                   (_  '("--readonly" ; a no-op for clarity
-                                         ;; A 466G file system with 180G used is
-                                         ;; enough to kill btrfs with 6G of RAM.
-                                         "--mode" "lowmem")))
-                               ,device)))
+              (apply system*/tty "btrfs" "check" "--progress"
+                     ;; Btrfs's ‘--force’ is not relevant to us here.
+                     `(,@(match repair
+                           ;; Upstream considers ALL repairs dangerous
+                           ;; and will warn the user at run time.
+                           (#t '("--repair"))
+                           (_  '("--readonly"     ; a no-op for clarity
+                                 ;; A 466G file system with 180G used is
+                                 ;; enough to kill btrfs with 6G of RAM.
+                                 "--mode" "lowmem")))
+                       ,device)))
         (0 'pass)
         (_ 'fatal-error))
       'pass))
@@ -412,11 +440,11 @@ ignored: a full file system scan is always performed.  If REPAIR is false, do
 not write to the file system to fix errors. Otherwise, automatically fix them
 using the least destructive approach."
   (match (status:exit-val
-          (apply system* `("fsck.vfat" "-v"
-                           ,@(match repair
-                               (#f '("-n"))
-                               (_  '("-a"))) ; no 'safe/#t distinction
-                           ,device)))
+          (system*/tty "fsck.vfat" "-v"
+                       (match repair
+                         (#f "-n")
+                         (_  "-a"))               ;no 'safe/#t distinction
+                       device))
     (0 'pass)
     (1 'errors-corrected)
     (_ 'fatal-error)))
@@ -545,7 +573,7 @@ do not write to the file system to fix errors, and replay the transaction log
 only if FORCE?  is true. Otherwise, replay the transaction log before checking
 and automatically fix found errors."
   (match (status:exit-val
-          (apply system*
+          (apply system*/tty
                  `("jfs_fsck" "-v"
                    ;; The ‘LEVEL’ logic is convoluted.  To quote fsck/xchkdsk.c
                    ;; (‘-p’, ‘-a’, and ‘-r’ are aliases in every way):
@@ -621,10 +649,10 @@ REPAIR are true, automatically fix found errors."
             "warning: forced check of F2FS ~a implies repairing any errors~%"
             device))
   (match (status:exit-val
-          (apply system* `("fsck.f2fs"
-                           ,@(if force? '("-f") '())
-                           ,@(if repair '("-p") '("--dry-run"))
-                           ,device)))
+          (apply system*/tty "fsck.f2fs"
+                 `(,@(if force? '("-f") '())
+                   ,@(if repair '("-p") '("--dry-run"))
+                   ,device)))
     ;; 0 and -1 are the only two possibilities according to the man page.
     (0 'pass)
     (_ 'fatal-error)))
@@ -709,9 +737,9 @@ ignored: a full check is always performed.  Repair is not possible: if REPAIR is
 true and the volume has been repaired by an external tool, clear the volume
 dirty flag to indicate that it's now safe to mount."
   (match (status:exit-val
-          (apply system* `("ntfsfix"
-                           ,@(if repair '("--clear-dirty") '("--no-action"))
-                           ,device)))
+          (system*/tty "ntfsfix"
+                       (if repair "--clear-dirty" "--no-action")
+                       device))
     (0 'pass)
     (_ 'fatal-error)))
 
@@ -754,11 +782,11 @@ write to DEVICE.  If it's #t, replay the log, check, and fix any errors found.
 Otherwise, only replay the log, and check without attempting further repairs."
   (define (xfs_repair)
     (status:exit-val
-     (apply system* `("xfs_repair" "-Pv"
-                      ,@(match repair
-                          (#t '("-e"))
-                          (_  '("-n"))) ; will miss some errors
-                      ,device))))
+     (system*/tty "xfs_repair" "-Pv"
+                  (match repair
+                    (#t "-e")
+                    (_  "-n"))                    ;will miss some errors
+                  device)))
   (if force?
       ;; xfs_repair fails with exit status 2 if the log is dirty, which is
       ;; likely in situations where you're running xfs_repair.  Only the kernel
diff --git a/gnu/build/image.scm b/gnu/build/image.scm
index 81caa424f8..ddfd34c111 100644
--- a/gnu/build/image.scm
+++ b/gnu/build/image.scm
@@ -5,6 +5,7 @@
 ;;; Copyright © 2017 Marius Bakke <mbakke@fastmail.com>
 ;;; Copyright © 2020, 2022 Tobias Geerinckx-Rice <me@tobias.gr>
 ;;; Copyright © 2020 Mathieu Othacehe <m.othacehe@gmail.com>
+;;; Copyright © 2022 Pavel Shlyak <p.shlyak@pantherx.org>
 ;;;
 ;;; This file is part of GNU Guix.
 ;;;
@@ -48,12 +49,13 @@
   "Take SEXP, a tuple as returned by 'partition->gexp', and turn it into a
 <partition> record."
   (match sexp
-    ((size file-system file-system-options label uuid)
+    ((size file-system file-system-options label uuid flags)
      (partition (size size)
                 (file-system file-system)
                 (file-system-options file-system-options)
                 (label label)
-                (uuid uuid)))))
+                (uuid uuid)
+                (flags flags)))))
 
 (define (size-in-kib size)
   "Convert SIZE expressed in bytes, to kilobytes and return it as a string."
@@ -78,6 +80,7 @@ turn doesn't take any constant overhead into account, force a 1-MiB minimum."
         (fs-options (partition-file-system-options partition))
         (label (partition-label partition))
         (uuid (partition-uuid partition))
+        (flags (partition-flags partition))
         (journal-options "lazy_itable_init=1,lazy_journal_init=1"))
     (apply invoke
            `("fakeroot" "mke2fs" "-t" ,fs "-d" ,root
@@ -92,16 +95,18 @@ turn doesn't take any constant overhead into account, force a 1-MiB minimum."
                            (estimate-partition-size root)
                            size)))))))
 
-(define* (make-vfat-image partition target root)
+(define* (make-vfat-image partition target root fs-bits)
   "Handle the creation of VFAT partition images.  See 'make-partition-image'."
   (let ((size (partition-size partition))
-        (label (partition-label partition)))
-    (invoke "fakeroot" "mkdosfs" "-n" label "-C" target
-            "-F" "16" "-S" "1024"
-            (size-in-kib
-             (if (eq? size 'guess)
-                 (estimate-partition-size root)
-                 size)))
+        (label (partition-label partition))
+        (flags (partition-flags partition)))
+    (apply invoke "fakeroot" "mkdosfs" "-n" label "-C" target
+                          "-F" (number->string fs-bits)
+                          (size-in-kib
+                           (if (eq? size 'guess)
+                               (estimate-partition-size root)
+                               size))
+                    (if (member 'esp flags) (list "-S" "1024") '()))
     (for-each (lambda (file)
                 (unless (member file '("." ".."))
                   (invoke "mcopy" "-bsp" "-i" target
@@ -117,8 +122,10 @@ ROOT directory to populate the image."
     (cond
      ((string-prefix? "ext" type)
       (make-ext-image partition target root))
-     ((string=? type "vfat")
-      (make-vfat-image partition target root))
+     ((or (string=? type "vfat") (string=? type "fat16"))
+      (make-vfat-image partition target root 16))
+     ((string=? type "fat32")
+      (make-vfat-image partition target root 32))
      (else
       (raise (condition
               (&message
diff --git a/gnu/build/linux-container.scm b/gnu/build/linux-container.scm
index bdeca2cdb9..a0c8174721 100644
--- a/gnu/build/linux-container.scm
+++ b/gnu/build/linux-container.scm
@@ -301,8 +301,28 @@ delete it when leaving the dynamic extent of this call."
       (lambda ()
         (false-if-exception (delete-file-recursively tmp-dir))))))
 
+(define (wait-child-process)
+  "Wait for one child process and return a pair, like 'waitpid', or return #f
+if there are no child processes left."
+  (catch 'system-error
+    (lambda ()
+      (waitpid WAIT_ANY))
+    (lambda args
+      (if (= ECHILD (system-error-errno args))
+          #f
+          (apply throw args)))))
+
+(define (status->exit-status status)
+  "Reify STATUS as an exit status."
+  (or (status:exit-val status)
+      ;; See <http://www.tldp.org/LDP/abs/html/exitcodes.html#EXITCODESREF>.
+      (+ 128 (or (status:term-sig status)
+                 (status:stop-sig status)))))
+
 (define* (call-with-container mounts thunk #:key (namespaces %namespaces)
                               (host-uids 1) (guest-uid 0) (guest-gid 0)
+                              (relayed-signals (list SIGINT SIGTERM))
+                              (child-is-pid1? #t)
                               (process-spawned-hook (const #t)))
   "Run THUNK in a new container process and return its exit status; call
 PROCESS-SPAWNED-HOOK with the PID of the new process that has been spawned.
@@ -320,20 +340,64 @@ can map more than a single uid/gid.
 GUEST-UID and GUEST-GID specify the first UID (respectively GID) that host
 UIDs (respectively GIDs) map to in the namespace.
 
+RELAYED-SIGNALS is the list of signals that are \"relayed\" to the container
+process when caught by its parent.
+
+When CHILD-IS-PID1? is true, and if NAMESPACES contains 'pid', then the child
+process runs directly as PID 1.  As such, it is responsible for (1) installing
+signal handlers and (2) reaping terminated processes by calling 'waitpid'.
+When CHILD-IS-PID1? is false, a new intermediate process is created instead
+that takes this responsibility.
+
 Note that if THUNK needs to load any additional Guile modules, the relevant
 module files must be present in one of the mappings in MOUNTS and the Guile
 load path must be adjusted as needed."
+  (define thunk*
+    (if (and (memq 'pid namespaces)
+             (not child-is-pid1?))
+        (lambda ()
+          ;; Behave like an init process: create a sub-process that calls
+          ;; THUNK, and wait for child processes.  Furthermore, forward
+          ;; RELAYED-SIGNALS to the child process.
+          (match (primitive-fork)
+            (0
+             (call-with-clean-exit thunk))
+            (pid
+             (install-signal-handlers pid)
+             (let loop ()
+               (match (wait-child-process)
+                 ((child . status)
+                  (if (= child pid)
+                      (primitive-exit (status->exit-status status))
+                      (loop)))
+                 (#f
+                  (primitive-exit 128)))))))      ;cannot happen
+        thunk))
+
+  (define (periodically-schedule-asyncs)
+    ;; XXX: In Guile there's a time window where a signal-handling async could
+    ;; be queued without being processed by the time we enter a blocking
+    ;; syscall like waitpid(2) (info "(guile) Signals").  This terrible hack
+    ;; ensures pending asyncs get a chance to run periodically.
+    (sigaction SIGALRM (lambda _ (alarm 1)))
+    (alarm 1))
+
+  (define (install-signal-handlers pid)
+    ;; Install handlers that forward signals to PID.
+    (define (relay-signal signal)
+      (false-if-exception (kill pid signal)))
+
+    (periodically-schedule-asyncs)
+    (for-each (lambda (signal)
+                (sigaction signal relay-signal))
+              relayed-signals))
+
   (call-with-temporary-directory
    (lambda (root)
-     (let ((pid (run-container root mounts namespaces host-uids thunk
+     (let ((pid (run-container root mounts namespaces host-uids thunk*
                                #:guest-uid guest-uid
                                #:guest-gid guest-gid)))
-       ;; Catch SIGINT and kill the container process.
-       (sigaction SIGINT
-         (lambda (signum)
-           (false-if-exception
-            (kill pid SIGKILL))))
-
+       (install-signal-handlers pid)
        (process-spawned-hook pid)
        (match (waitpid pid)
          ((_ . status) status))))))
diff --git a/gnu/build/marionette.scm b/gnu/build/marionette.scm
index b336024610..0d2af642c8 100644
--- a/gnu/build/marionette.scm
+++ b/gnu/build/marionette.scm
@@ -1,5 +1,5 @@
 ;;; GNU Guix --- Functional package management for GNU
-;;; Copyright © 2016, 2017, 2018, 2019, 2020, 2021 Ludovic Courtès <ludo@gnu.org>
+;;; Copyright © 2016-2022 Ludovic Courtès <ludo@gnu.org>
 ;;; Copyright © 2018 Chris Marusich <cmmarusich@gmail.com>
 ;;;
 ;;; This file is part of GNU Guix.
@@ -196,31 +196,38 @@ FILE has not shown up after TIMEOUT seconds, raise an error."
      (error "file didn't show up" file))))
 
 (define* (wait-for-tcp-port port marionette
-                            #:key (timeout 20))
+                            #:key
+                            (timeout 20)
+                            (address `(make-socket-address AF_INET
+                                                           INADDR_LOOPBACK
+                                                           ,port)))
   "Wait for up to TIMEOUT seconds for PORT to accept connections in
-MARIONETTE.  Raise an error on failure."
+MARIONETTE.  ADDRESS must be an expression that returns a socket address,
+typically a call to 'make-socket-address'.  Raise an error on failure."
   ;; Note: The 'connect' loop has to run within the guest because, when we
   ;; forward ports to the host, connecting to the host never raises
   ;; ECONNREFUSED.
   (match (marionette-eval
-          `(begin
-             (let ((sock (socket PF_INET SOCK_STREAM 0)))
-               (let loop ((i 0))
-                 (catch 'system-error
-                   (lambda ()
-                     (connect sock AF_INET INADDR_LOOPBACK ,port)
-                     (close-port sock)
-                     'success)
-                   (lambda args
-                     (if (< i ,timeout)
-                         (begin
-                           (sleep 1)
-                           (loop (+ 1 i)))
-                         'failure))))))
+          `(let* ((address ,address)
+                  (sock (socket (sockaddr:fam address) SOCK_STREAM 0)))
+             (let loop ((i 0))
+               (catch 'system-error
+                 (lambda ()
+                   (connect sock address)
+                   (close-port sock)
+                   'success)
+                 (lambda args
+                   (if (< i ,timeout)
+                       (begin
+                         (sleep 1)
+                         (loop (+ 1 i)))
+                       (list 'failure address))))))
           marionette)
     ('success #t)
-    ('failure
-     (error "nobody's listening on port" port))))
+    (('failure address)
+     (error "nobody's listening on port"
+            (list (inet-ntop (sockaddr:fam address) (sockaddr:addr address))
+                  (sockaddr:port address))))))
 
 (define* (wait-for-unix-socket file-name marionette
                                 #:key (timeout 20))
diff --git a/gnu/build/secret-service.scm b/gnu/build/secret-service.scm
index 4e183e11e8..1baa058635 100644
--- a/gnu/build/secret-service.scm
+++ b/gnu/build/secret-service.scm
@@ -1,5 +1,5 @@
 ;;; GNU Guix --- Functional package management for GNU
-;;; Copyright © 2020, 2021 Ludovic Courtès <ludo@gnu.org>
+;;; Copyright © 2020-2022 Ludovic Courtès <ludo@gnu.org>
 ;;; Copyright © 2020 Jan (janneke) Nieuwenhuizen <janneke@gnu.org>
 ;;;
 ;;; This file is part of GNU Guix.
@@ -47,6 +47,52 @@
          ;; to syslog.
          #'(format (current-output-port) fmt args ...))))))
 
+(define-syntax with-modules
+  (syntax-rules ()
+    "Dynamically load the given MODULEs at run time, making the chosen
+bindings available within the lexical scope of BODY."
+    ((_ ((module #:select (bindings ...)) rest ...) body ...)
+     (let* ((iface (resolve-interface 'module))
+            (bindings (module-ref iface 'bindings))
+            ...)
+       (with-modules (rest ...) body ...)))
+    ((_ () body ...)
+     (begin body ...))))
+
+(define (wait-for-readable-fd port timeout)
+  "Wait until PORT has data available for reading or TIMEOUT has expired.
+Return #t in the former case and #f in the latter case."
+  (match (resolve-module '(fibers) #f)            ;using Fibers?
+    (#f
+     (log "blocking on socket...~%")
+     (match (select (list port) '() '() timeout)
+       (((_) () ()) #t)
+       ((() () ())  #f)))
+    (fibers
+     ;; We're running on the Shepherd 0.9+ with Fibers.  Arrange to make a
+     ;; non-blocking wait so that other fibers can be scheduled in while we
+     ;; wait for PORT.
+     (with-modules (((fibers) #:select (spawn-fiber sleep))
+                    ((fibers channels)
+                     #:select (make-channel put-message get-message)))
+       ;; Make PORT non-blocking.
+       (let ((flags (fcntl port F_GETFL)))
+         (fcntl port F_SETFL (logior O_NONBLOCK flags)))
+
+       (let ((channel (make-channel)))
+         (spawn-fiber
+          (lambda ()
+            (sleep timeout)                       ;suspends the fiber
+            (put-message channel 'timeout)))
+         (spawn-fiber
+          (lambda ()
+            (lookahead-u8 port)                   ;suspends the fiber
+            (put-message channel 'readable)))
+         (log "suspending fiber on socket...~%")
+         (match (get-message channel)
+           ('readable #t)
+           ('timeout  #f)))))))
+
 (define* (secret-service-send-secrets port secret-root
                                       #:key (retry 60)
                                       (handshake-timeout 120))
@@ -74,7 +120,10 @@ wait for at most HANDSHAKE-TIMEOUT seconds for handshake to complete.  Return
 
   (log "sending secrets to ~a~%" port)
   (let ((sock (socket AF_INET SOCK_STREAM 0))
-        (addr (make-socket-address AF_INET INADDR_LOOPBACK port)))
+        (addr (make-socket-address AF_INET INADDR_LOOPBACK port))
+        (sleep (if (resolve-module '(fibers) #f)
+                   (module-ref (resolve-interface '(fibers)) 'sleep)
+                   sleep)))
     ;; Connect to QEMU on the forwarded port.  The 'connect' call succeeds as
     ;; soon as QEMU is ready, even if there's no server listening on the
     ;; forward port inside the guest.
@@ -93,23 +142,22 @@ wait for at most HANDSHAKE-TIMEOUT seconds for handshake to complete.  Return
 
     ;; Wait for "hello" message from the server.  This is the only way to know
     ;; that we're really connected to the server inside the guest.
-    (match (select (list sock) '() '() handshake-timeout)
-      (((_) () ())
-       (match (read sock)
-         (('secret-service-server ('version version ...))
-          (log "sending files from ~s...~%" secret-root)
-          (send-files sock)
-          (log "done sending files to port ~a~%" port)
-          (close-port sock)
-          secret-root)
-         (x
-          (log "invalid handshake ~s~%" x)
-          (close-port sock)
-          #f)))
-      ((() () ())                                 ;timeout
-       (log "timeout while sending files to ~a~%" port)
-       (close-port sock)
-       #f))))
+    (if (wait-for-readable-fd sock handshake-timeout)
+        (match (read sock)
+          (('secret-service-server ('version version ...))
+           (log "sending files from ~s...~%" secret-root)
+           (send-files sock)
+           (log "done sending files to port ~a~%" port)
+           (close-port sock)
+           secret-root)
+          (x
+           (log "invalid handshake ~s~%" x)
+           (close-port sock)
+           #f))
+        (begin                                    ;timeout
+         (log "timeout while sending files to ~a~%" port)
+         (close-port sock)
+         #f))))
 
 (define (delete-file* file)
   "Ensure FILE does not exist."
@@ -202,4 +250,8 @@ and #f otherwise."
       (close-port port))
     result))
 
+;;; Local Variables:
+;;; eval: (put 'with-modules 'scheme-indent-function 1)
+;;; End:
+
 ;;; secret-service.scm ends here
diff --git a/gnu/build/shepherd.scm b/gnu/build/shepherd.scm
index 778e3fc627..f4caefce3c 100644
--- a/gnu/build/shepherd.scm
+++ b/gnu/build/shepherd.scm
@@ -1,6 +1,7 @@
 ;;; GNU Guix --- Functional package management for GNU
-;;; Copyright © 2017, 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
+;;; Copyright © 2017, 2018, 2019, 2020, 2022 Ludovic Courtès <ludo@gnu.org>
 ;;; Copyright © 2020 Mathieu Othacehe <othacehe@gnu.org>
+;;; Copyright © 2022 Leo Nikkilä <hello@lnikki.la>
 ;;;
 ;;; This file is part of GNU Guix.
 ;;;
@@ -30,7 +31,8 @@
                                  exec-command
                                  %precious-signals)
   #:autoload (shepherd system) (unblock-signals)
-  #:export (make-forkexec-constructor/container
+  #:export (default-mounts
+            make-forkexec-constructor/container
             fork+exec-command/container))
 
 ;;; Commentary:
@@ -103,8 +105,13 @@
 separate mount and PID name space.  Return the \"outer\" PID. "
   (match (container-excursion* pid
            (lambda ()
-             (read-pid-file pid-file
-                            #:max-delay max-delay)))
+             ;; XXX: Trick for Shepherd 0.9: prevent 'read-pid-file' from
+             ;; using (@ (fibers) sleep), which would try to suspend the
+             ;; current task, which doesn't work in this extra process.
+             (with-continuation-barrier
+              (lambda ()
+                (read-pid-file pid-file
+                               #:max-delay max-delay)))))
     (#f
      ;; Send SIGTERM to the whole process group.
      (catch-system-error (kill (- pid) SIGTERM))
@@ -114,6 +121,28 @@ separate mount and PID name space.  Return the \"outer\" PID. "
      ;; PID is always 1, but that's not what Shepherd needs to know.
      pid)))
 
+(define* (exec-command* command #:key user group log-file pid-file
+                        (supplementary-groups '())
+                        (directory "/") (environment-variables (environ)))
+  "Like 'exec-command', but first restore signal handles modified by
+shepherd (PID 1)."
+  ;; First restore the default handlers.
+  (for-each (cut sigaction <> SIG_DFL) %precious-signals)
+
+  ;; Unblock any signals that have been blocked by the parent process.
+  (unblock-signals %precious-signals)
+
+  (mkdir-p "/var/run")
+  (clean-up pid-file)
+
+  (exec-command command
+                #:user user
+                #:group group
+                #:supplementary-groups supplementary-groups
+                #:log-file log-file
+                #:directory directory
+                #:environment-variables environment-variables))
+
 (define* (make-forkexec-constructor/container command
                                               #:key
                                               (namespaces
@@ -121,6 +150,7 @@ separate mount and PID name space.  Return the \"outer\" PID. "
                                               (mappings '())
                                               (user #f)
                                               (group #f)
+                                              (supplementary-groups '())
                                               (log-file #f)
                                               pid-file
                                               (pid-file-timeout 5)
@@ -164,24 +194,16 @@ namespace, in addition to essential bind-mounts such /proc."
     (let ((pid (run-container container-directory
                               mounts namespaces 1
                               (lambda ()
-                                ;; First restore the default handlers.
-                                (for-each (cut sigaction <> SIG_DFL)
-                                          %precious-signals)
-
-                                ;; Unblock any signals that have been blocked
-                                ;; by the parent process.
-                                (unblock-signals %precious-signals)
-
-                                (mkdir-p "/var/run")
-                                (clean-up pid-file)
-
-                                (exec-command command
-                                              #:user user
-                                              #:group group
-                                              #:log-file log-file
-                                              #:directory directory
-                                              #:environment-variables
-                                              environment-variables)))))
+                                (exec-command* command
+                                               #:user user
+                                               #:group group
+                                               #:supplementary-groups
+                                               supplementary-groups
+                                               #:pid-file pid-file
+                                               #:log-file log-file
+                                               #:directory directory
+                                               #:environment-variables
+                                               environment-variables)))))
       (if pid-file
           (if (or (memq 'mnt namespaces) (memq 'pid namespaces))
               (read-pid-file/container pid pid-file
@@ -209,14 +231,24 @@ on Hurd systems for instance, fallback to direct forking."
         ((head . rest)
          (loop rest (cons head result))))))
 
-  (let ((container-support?
-         (file-exists? "/proc/self/ns"))
-        (fork-proc (lambda ()
-                     (apply fork+exec-command command
-                            (strip-pid args)))))
-    (if container-support?
-        (container-excursion* pid fork-proc)
-        (fork-proc))))
+  (let ((container-support? (file-exists? "/proc/self/ns")))
+    (if (and container-support?
+             (not (and pid (= pid (getpid)))))
+        (container-excursion* pid
+          (lambda ()
+            ;; Note: In the Shepherd 0.9, 'fork+exec-command' expects to be
+            ;; called from the shepherd process (because it creates a pipe to
+            ;; capture stdout/stderr and spawns a logging fiber) so we cannot
+            ;; use it here.
+            (match (primitive-fork)
+              (0 (dynamic-wind
+                   (const #t)
+                   (lambda ()
+                     (apply exec-command* command (strip-pid args)))
+                   (lambda ()
+                     (primitive-_exit 127))))
+              (pid pid))))               ;XXX: assuming the same PID namespace
+        (apply fork+exec-command command (strip-pid args)))))
 
 ;; Local Variables:
 ;; eval: (put 'container-excursion* 'scheme-indent-function 1)