summary refs log tree commit diff
diff options
context:
space:
mode:
authorMathieu Othacehe <m.othacehe@gmail.com>2020-02-27 16:22:28 +0100
committerLudovic Courtès <ludo@gnu.org>2020-03-05 23:40:24 +0100
commit64704be417ab6f2788e8e3bc36fede1db35470e7 (patch)
tree41b8a40b82929f581e6d7fdc6810c70e73ab0cb4
parentccb1a8c437fa40760899ae95f9d3f10ed7c8b41b (diff)
downloadguix-64704be417ab6f2788e8e3bc36fede1db35470e7.tar.gz
installer: Fix cow-store umount.
This fixes <https://bugs.gnu.org/39712>.

The guix-daemon was preventing the cow-store umount, so restart it.  Some
udevd workers, using cow-store files might also still be around, so have some
umount retries.

* gnu/installer/final.scm (kill-cow-users): New procedure,
(umount-cow-store): restart guix-daemon and kill all processes started from
within the cow-store before trying to umount the store overlay. Also try 5
times to umount the overlay in case it is still busy.
-rw-r--r--gnu/installer/final.scm77
1 files changed, 71 insertions, 6 deletions
diff --git a/gnu/installer/final.scm b/gnu/installer/final.scm
index 869be8814b..3c170e5d0f 100644
--- a/gnu/installer/final.scm
+++ b/gnu/installer/final.scm
@@ -28,6 +28,12 @@
   #:use-module (gnu build accounts)
   #:use-module ((gnu system shadow) #:prefix sys:)
   #:use-module (rnrs io ports)
+  #:use-module (srfi srfi-1)
+  #:use-module (ice-9 ftw)
+  #:use-module (ice-9 popen)
+  #:use-module (ice-9 match)
+  #:use-module (ice-9 format)
+  #:use-module (ice-9 rdelim)
   #:export (install-system))
 
 (define %seed
@@ -97,14 +103,73 @@ USERS."
   (write-passwd password (string-append etc "/passwd"))
   (write-shadow shadow (string-append etc "/shadow")))
 
+(define* (kill-cow-users cow-path #:key (spare '("udevd")))
+  "Kill all processes that have references to the given COW-PATH in their
+'maps' file.  The process whose names are in SPARE list are spared."
+  (define %not-nul
+    (char-set-complement (char-set #\nul)))
+
+  (let ((pids
+         (filter-map (lambda (pid)
+                       (call-with-input-file
+                           (string-append "/proc/" pid "/maps")
+                         (lambda (port)
+                           (and (string-contains (get-string-all port)
+                                                 cow-path)
+                                (string->number pid)))))
+                     (scandir "/proc" string->number))))
+    (for-each (lambda (pid)
+                ;; cmdline does not always exist.
+                (false-if-exception
+                 (call-with-input-file
+                     (string-append "/proc/" (number->string pid) "/cmdline")
+                   (lambda (port)
+                     (match (string-tokenize (read-string port) %not-nul)
+                       ((argv0 _ ...)
+                        (unless (member (pk (basename argv0)) spare)
+                          (syslog "Killing process ~a~%" pid)
+                          (kill pid SIGKILL)))
+                       (_ #f))))))
+              pids)))
+
 (define (umount-cow-store)
   "Remove the store overlay and the bind-mount on /tmp created by the
-cow-store service."
-  (let ((tmp-dir "/remove"))
-    (mkdir-p tmp-dir)
-    (mount (%store-directory) tmp-dir "" MS_MOVE)
-    (umount tmp-dir)
-    (umount "/tmp")))
+cow-store service.  This procedure is very fragile and a better approach would
+be much appreciated."
+
+  ;; Remove when integrated in (gnu services herd).
+  (define (restart-service name)
+    (with-shepherd-action name ('restart) result
+      result))
+
+  (catch #t
+    (lambda ()
+      (let ((tmp-dir "/remove"))
+        (mkdir-p tmp-dir)
+        (mount (%store-directory) tmp-dir "" MS_MOVE)
+
+        ;; The guix-daemon has possibly opened files from the cow-store,
+        ;; restart it.
+        (restart-service 'guix-daemon)
+
+        ;; Kill all processes started while the cow-store was active (logins
+        ;; on other TTYs for instance).
+        (kill-cow-users tmp-dir)
+
+        ;; Try to umount the store overlay. Some process such as udevd
+        ;; workers might still be active, so do some retries.
+        (let loop ((try 5))
+          (sleep 1)
+          (let ((umounted? (false-if-exception (umount tmp-dir))))
+            (if (and (not umounted?) (> try 0))
+                (loop (- try 1))
+                (if umounted?
+                    (syslog "Umounted ~a successfully.~%" tmp-dir)
+                    (syslog "Failed to umount ~a.~%" tmp-dir)))))
+
+        (umount "/tmp")))
+    (lambda args
+      (syslog "~a~%" args))))
 
 (define* (install-system locale #:key (users '()))
   "Create /etc/shadow and /etc/passwd on the installation target for USERS.