summary refs log tree commit diff
diff options
context:
space:
mode:
authorLudovic Courtès <ludo@gnu.org>2018-12-24 15:40:04 +0100
committerLudovic Courtès <ludo@gnu.org>2018-12-24 16:06:32 +0100
commited7b44370f71126087eb953f36aad8dc4c44109f (patch)
treec9d9e952f956e58b5878ed1cee70d8423b58b531
parentaf15fe13b69d27f9902353540fd8ad0001ce8311 (diff)
downloadguix-ed7b44370f71126087eb953f36aad8dc4c44109f.tar.gz
offload: Use (guix inferior) instead of (ssh dist node).
Using inferiors and thus 'guix repl' simplifies setup on build
machines (no need to worry about GUILE_LOAD_PATH etc.)

Furthermore, the 'guix repl -t machine' protocol running in a remote
pipe addresses several issues with the current implementation of nodes
and RREPLs in Guile-SSH: fewer round trips, doesn't leave a 'guile
--listen' process behind it, stateless (since a new process is started
each time), more efficient (the SSH channel can be reused), more
reliable (no 'pgrep', 'pkill', and shellology; see
<https://github.com/artyom-poptsov/guile-ssh/issues/11> as an example.)

* guix/ssh.scm (inferior-remote-eval): New procedure.
(send-files): Use it instead of 'make-node' and 'node-eval'.
* guix/scripts/offload.scm (node-guile-version): New procedure.
(node-free-disk-space, transfer-and-offload, node-load)
(choose-build-machine, assert-node-has-guix): Use 'remote-inferior'
instead of 'make-node' and 'inferior-eval' instead of 'node-eval'.
(assert-node-can-import, assert-node-can-export): Likewise, and add
'session' parameter.
(check-machine-availability): Likewise, and add calls to
'close-inferior' and 'disconnect!'.
(check-machine-status): Likewise.
* doc/guix.texi (Daemon Offload Setup): Remove bit related to 'guile' in
$PATH and $GUILE_LOAD_PATH; mention 'guix' alone.
-rw-r--r--doc/guix.texi8
-rw-r--r--guix/scripts/offload.scm107
-rw-r--r--guix/ssh.scm34
3 files changed, 83 insertions, 66 deletions
diff --git a/doc/guix.texi b/doc/guix.texi
index f86a2885a7..c182995b2b 100644
--- a/doc/guix.texi
+++ b/doc/guix.texi
@@ -1051,13 +1051,11 @@ name, and they will be scheduled on matching build machines.
 @end table
 @end deftp
 
-The @code{guile} command must be in the search path on the build
-machines.  In addition, the Guix modules must be in
-@code{$GUILE_LOAD_PATH} on the build machine---you can check whether
-this is the case by running:
+The @command{guix} command must be in the search path on the build
+machines.  You can check whether this is the case by running:
 
 @example
-ssh build-machine guile -c "'(use-modules (guix config))'"
+ssh build-machine guix repl --version
 @end example
 
 There is one last thing to do once @file{machines.scm} is in place.  As
diff --git a/guix/scripts/offload.scm b/guix/scripts/offload.scm
index bfdaa3c011..b472d202a9 100644
--- a/guix/scripts/offload.scm
+++ b/guix/scripts/offload.scm
@@ -23,13 +23,12 @@
   #:use-module (ssh session)
   #:use-module (ssh channel)
   #:use-module (ssh popen)
-  #:use-module (ssh dist)
-  #:use-module (ssh dist node)
   #:use-module (ssh version)
   #:use-module (guix config)
   #:use-module (guix records)
   #:use-module (guix ssh)
   #:use-module (guix store)
+  #:use-module (guix inferior)
   #:use-module (guix derivations)
   #:use-module ((guix serialization)
                 #:select (nar-error? nar-error-file))
@@ -321,12 +320,15 @@ hook."
     (set-port-revealed! port 1)
     port))
 
+(define (node-guile-version node)
+  (inferior-eval '(version) node))
+
 (define (node-free-disk-space node)
   "Return the free disk space, in bytes, in NODE's store."
-  (node-eval node
-             `(begin
-                (use-modules (guix build syscalls))
-                (free-disk-space ,(%store-prefix)))))
+  (inferior-eval `(begin
+                    (use-modules (guix build syscalls))
+                    (free-disk-space ,(%store-prefix)))
+                 node))
 
 (define* (transfer-and-offload drv machine
                                #:key
@@ -367,8 +369,12 @@ MACHINE."
                      (derivation-file-name drv)
                      (build-machine-name machine)
                      (nix-protocol-error-message c))
-             (let* ((space (false-if-exception
-                            (node-free-disk-space (make-node session)))))
+             (let* ((inferior (false-if-exception (remote-inferior session)))
+                    (space (false-if-exception
+                            (node-free-disk-space inferior))))
+
+               (when inferior
+                 (close-inferior inferior))
 
                ;; Use exit code 100 for a permanent build failure.  The daemon
                ;; interprets other non-zero codes as transient build failures.
@@ -417,11 +423,11 @@ of free disk space on '~a'~%")
 
 (define (node-load node)
   "Return the load on NODE.  Return +∞ if NODE is misbehaving."
-  (let ((line (node-eval node
-                         '(begin
-                            (use-modules (ice-9 rdelim))
-                            (call-with-input-file "/proc/loadavg"
-                              read-string)))))
+  (let ((line (inferior-eval '(begin
+                                (use-modules (ice-9 rdelim))
+                                (call-with-input-file "/proc/loadavg"
+                                  read-string))
+                             node)))
     (if (eof-object? line)
         +inf.0 ;MACHINE does not respond, so assume it is infinitely loaded
         (match (string-tokenize line)
@@ -508,9 +514,10 @@ slot (which must later be released with 'release-build-slot'), or #f and #f."
          ;; Note: We call 'node-load' only as a last resort because it is
          ;; too costly to call it once for every machine.
          (let* ((session (false-if-exception (open-ssh-session best)))
-                (node    (and session (make-node session)))
+                (node    (and session (remote-inferior session)))
                 (load    (and node (normalized-load best (node-load node))))
                 (space   (and node (node-free-disk-space node))))
+           (when node (close-inferior node))
            (when session (disconnect! session))
            (if (and node (< load 2.) (>= space %minimum-disk-space))
                (match others
@@ -613,18 +620,17 @@ If TIMEOUT is #f, simply evaluate EXP..."
     (#f
      (report-guile-error name))
     ((? string? version)
-     ;; Note: The version string already contains the word "Guile".
-     (info (G_ "'~a' is running ~a~%")
+     (info (G_ "'~a' is running GNU Guile ~a~%")
            name (node-guile-version node)))))
 
 (define (assert-node-has-guix node name)
   "Bail out if NODE lacks the (guix) module, or if its daemon is not running."
   (catch 'node-repl-error
     (lambda ()
-      (match (node-eval node
-                        '(begin
-                           (use-modules (guix))
-                           (and add-text-to-store 'alright)))
+      (match (inferior-eval '(begin
+                               (use-modules (guix))
+                               (and add-text-to-store 'alright))
+                            node)
         ('alright #t)
         (_ (report-module-error name))))
     (lambda (key . args)
@@ -632,12 +638,12 @@ If TIMEOUT is #f, simply evaluate EXP..."
 
   (catch 'node-repl-error
     (lambda ()
-      (match (node-eval node
-                        '(begin
-                           (use-modules (guix))
-                           (with-store store
-                             (add-text-to-store store "test"
-                                                "Hello, build machine!"))))
+      (match (inferior-eval '(begin
+                               (use-modules (guix))
+                               (with-store store
+                                 (add-text-to-store store "test"
+                                                    "Hello, build machine!")))
+                            node)
         ((? string? str)
          (info (G_ "Guix is usable on '~a' (test returned ~s)~%")
                name str))
@@ -656,25 +662,23 @@ If TIMEOUT is #f, simply evaluate EXP..."
   (string-append name "-"
                  (number->string (random 1000000 (force %random-state)))))
 
-(define (assert-node-can-import node name daemon-socket)
+(define (assert-node-can-import session node name daemon-socket)
   "Bail out if NODE refuses to import our archives."
-  (let ((session (node-session node)))
-    (with-store store
-      (let* ((item   (add-text-to-store store "export-test" (nonce)))
-             (remote (connect-to-remote-daemon session daemon-socket)))
-        (with-store local
-          (send-files local (list item) remote))
-
-        (if (valid-path? remote item)
-            (info (G_ "'~a' successfully imported '~a'~%")
-                  name item)
-            (leave (G_ "'~a' was not properly imported on '~a'~%")
-                   item name))))))
-
-(define (assert-node-can-export node name daemon-socket)
+  (with-store store
+    (let* ((item   (add-text-to-store store "export-test" (nonce)))
+           (remote (connect-to-remote-daemon session daemon-socket)))
+      (with-store local
+        (send-files local (list item) remote))
+
+      (if (valid-path? remote item)
+          (info (G_ "'~a' successfully imported '~a'~%")
+                name item)
+          (leave (G_ "'~a' was not properly imported on '~a'~%")
+                 item name)))))
+
+(define (assert-node-can-export session node name daemon-socket)
   "Bail out if we cannot import signed archives from NODE."
-  (let* ((session (node-session node))
-         (remote  (connect-to-remote-daemon session daemon-socket))
+  (let* ((remote  (connect-to-remote-daemon session daemon-socket))
          (item    (add-text-to-store remote "import-test" (nonce name))))
     (with-store store
       (if (and (retrieve-files store (list item) remote)
@@ -701,11 +705,13 @@ machine."
     (let* ((names    (map build-machine-name machines))
            (sockets  (map build-machine-daemon-socket machines))
            (sessions (map open-ssh-session machines))
-           (nodes    (map make-node sessions)))
+           (nodes    (map remote-inferior sessions)))
       (for-each assert-node-repl nodes names)
       (for-each assert-node-has-guix nodes names)
-      (for-each assert-node-can-import nodes names sockets)
-      (for-each assert-node-can-export nodes names sockets))))
+      (for-each assert-node-can-import sessions nodes names sockets)
+      (for-each assert-node-can-export sessions nodes names sockets)
+      (for-each close-inferior nodes)
+      (for-each disconnect! sessions))))
 
 (define (check-machine-status machine-file pred)
   "Print the load of each machine matching PRED in MACHINE-FILE."
@@ -722,10 +728,11 @@ machine."
           (length machines) machine-file)
     (for-each (lambda (machine)
                 (let* ((session (open-ssh-session machine))
-                       (node    (make-node session))
-                       (uts     (node-eval node '(uname)))
-                       (load    (node-load node))
-                       (free    (node-free-disk-space node)))
+                       (inferior (remote-inferior session))
+                       (uts     (inferior-eval '(uname) inferior))
+                       (load    (node-load inferior))
+                       (free    (node-free-disk-space inferior)))
+                  (close-inferior inferior)
                   (disconnect! session)
                   (format #t "~a~%  kernel: ~a ~a~%  architecture: ~a~%\
   host name: ~a~%  normalized load: ~a~%  free disk space: ~,2f MiB~%"
diff --git a/guix/ssh.scm b/guix/ssh.scm
index b8bea8028a..1ed8406633 100644
--- a/guix/ssh.scm
+++ b/guix/ssh.scm
@@ -27,8 +27,6 @@
   #:use-module (ssh channel)
   #:use-module (ssh popen)
   #:use-module (ssh session)
-  #:use-module (ssh dist)
-  #:use-module (ssh dist node)
   #:use-module (srfi srfi-1)
   #:use-module (srfi srfi-11)
   #:use-module (srfi srfi-26)
@@ -102,6 +100,20 @@ Throw an error on failure."
                                  "guix" "repl" "-t" "machine")))
     (port->inferior pipe)))
 
+(define (inferior-remote-eval exp session)
+  "Evaluate EXP in a new inferior running in SESSION, and close the inferior
+right away."
+  (let ((inferior (remote-inferior session)))
+    (dynamic-wind
+      (const #t)
+      (lambda ()
+        (inferior-eval exp inferior))
+      (lambda ()
+        ;; Close INFERIOR right away to prevent finalization from happening in
+        ;; another thread at the wrong time (see
+        ;; <https://bugs.gnu.org/26976>.)
+        (close-inferior inferior)))))
+
 (define* (remote-daemon-channel session
                                 #:optional
                                 (socket-name
@@ -277,15 +289,15 @@ Return the list of store items actually sent."
   ;; Compute the subset of FILES missing on SESSION and send them.
   (let* ((files   (if recursive? (requisites local files) files))
          (session (channel-get-session (nix-server-socket remote)))
-         (node    (make-node session))
-         (missing (node-eval node
-                             `(begin
-                                (use-modules (guix)
-                                             (srfi srfi-1) (srfi srfi-26))
-
-                                (with-store store
-                                  (remove (cut valid-path? store <>)
-                                          ',files)))))
+         (missing (inferior-remote-eval
+                   `(begin
+                      (use-modules (guix)
+                                   (srfi srfi-1) (srfi srfi-26))
+
+                      (with-store store
+                        (remove (cut valid-path? store <>)
+                                ',files)))
+                   session))
          (count   (length missing))
          (sizes   (map (lambda (item)
                          (path-info-nar-size (query-path-info local item)))