summary refs log tree commit diff
diff options
context:
space:
mode:
authorLudovic Courtès <ludo@gnu.org>2016-11-05 00:47:34 +0100
committerLudovic Courtès <ludo@gnu.org>2016-11-25 23:44:20 +0100
commitcf283dd92eb5ef2dee4b761bb23f6dca2525cd55 (patch)
treedf6d63632b09b9a1ec6bf0cd38a24f44e98e7213
parente8a5db80d5fe2e603d7b72c3b3cc5ba6ea6d99d9 (diff)
downloadguix-cf283dd92eb5ef2dee4b761bb23f6dca2525cd55.tar.gz
offload: Rewrite to make direct RPCs to the remote daemon.
* guix/scripts/offload.scm (<build-machine>)[daemon-socket]: New field.
(connect-to-remote-daemon): New procedure.
(%gc-root-file, register-gc-root, remove-gc-roots, offload): Remove.
(transfer-and-offload): Rewrite using 'connect-to-remote-daemon' and
RPCs over SSH.
(store-import-channel, store-export-channel): New procedures.
(send-files, retrieve-files): Rewrite using these.
-rw-r--r--doc/guix.texi4
-rw-r--r--guix/scripts/offload.scm371
2 files changed, 175 insertions, 200 deletions
diff --git a/doc/guix.texi b/doc/guix.texi
index f1cb007aa9..b8e37055e6 100644
--- a/doc/guix.texi
+++ b/doc/guix.texi
@@ -921,6 +921,10 @@ Port number of SSH server on the machine.
 The SSH private key file to use when connecting to the machine, in
 OpenSSH format.
 
+@item @code{daemon-socket} (default: @code{"/var/guix/daemon-socket/socket"})
+File name of the Unix-domain socket @command{guix-daemon} is listening
+to on that machine.
+
 @item @code{parallel-builds} (default: @code{1})
 The number of builds that may run in parallel on the machine.
 
diff --git a/guix/scripts/offload.scm b/guix/scripts/offload.scm
index 35286ab9d5..1821bb5b7a 100644
--- a/guix/scripts/offload.scm
+++ b/guix/scripts/offload.scm
@@ -21,6 +21,9 @@
   #:use-module (ssh auth)
   #:use-module (ssh session)
   #:use-module (ssh channel)
+  #:use-module (ssh popen)
+  #:use-module (ssh dist)
+  #:use-module (ssh dist node)
   #:use-module (guix config)
   #:use-module (guix records)
   #:use-module (guix store)
@@ -71,6 +74,8 @@
   (private-key     build-machine-private-key      ; file name
                    (default (user-openssh-private-key)))
   (host-key        build-machine-host-key)        ; string
+  (daemon-socket   build-machine-daemon-socket    ; string
+                   (default "/var/guix/daemon-socket/socket"))
   (parallel-builds build-machine-parallel-builds  ; number
                    (default 1))
   (speed           build-machine-speed            ; inexact real
@@ -197,6 +202,53 @@ instead of '~a' of type '~a'~%")
 
     session))
 
+(define* (connect-to-remote-daemon session
+                                   #:optional
+                                   (socket-name "/var/guix/daemon-socket/socket"))
+  "Connect to the remote build daemon listening on SOCKET-NAME over SESSION,
+an SSH session.  Return a <nix-server> object."
+  (define redirect
+    ;; Code run in SESSION to redirect the remote process' stdin/stdout to the
+    ;; daemon's socket, à la socat.  The SSH protocol supports forwarding to
+    ;; Unix-domain sockets but libssh doesn't have an API for that, hence this
+    ;; hack.
+    `(begin
+       (use-modules (ice-9 match) (rnrs io ports))
+
+       (let ((sock   (socket AF_UNIX SOCK_STREAM 0))
+             (stdin  (current-input-port))
+             (stdout (current-output-port)))
+         (setvbuf stdin _IONBF)
+         (setvbuf stdout _IONBF)
+         (connect sock AF_UNIX ,socket-name)
+
+         (let loop ()
+           (match (select (list stdin sock) '() (list stdin stdout sock))
+             ((reads writes ())
+              (when (memq stdin reads)
+                (match (get-bytevector-some stdin)
+                  ((? eof-object?)
+                   (primitive-exit 0))
+                  (bv
+                   (put-bytevector sock bv))))
+              (when (memq sock reads)
+                (match (get-bytevector-some sock)
+                  ((? eof-object?)
+                   (primitive-exit 0))
+                  (bv
+                   (put-bytevector stdout bv))))
+              (loop))
+             (_
+              (primitive-exit 1)))))))
+
+  (let ((channel
+         (open-remote-pipe* session OPEN_BOTH
+                            ;; Sort-of shell-quote REDIRECT.
+                            "guile" "-c"
+                            (object->string
+                             (object->string redirect)))))
+    (open-connection #:port channel)))
+
 (define* (remote-pipe session command
                       #:key (quote? #t))
   "Run COMMAND (a list) on SESSION, and return an open input/output port,
@@ -306,116 +358,6 @@ hook."
     (set-port-revealed! port 1)
     port))
 
-(define %gc-root-file
-  ;; File name of the temporary GC root we install.
-  (format #f "offload-~a-~a" (gethostname) (getpid)))
-
-(define (register-gc-root file session)
-  "Mark FILE, a store item, as a garbage collector root in SESSION.  Return
-the exit status, zero on success."
-  (define script
-    `(begin
-       (use-modules (guix config))
-
-       ;; Note: we can't use 'add-indirect-root' because dangling links under
-       ;; gcroots/auto are automatically deleted by the GC.  This strategy
-       ;; doesn't have this problem, but it requires write access to that
-       ;; directory.
-       (let ((root-directory (string-append %state-directory
-                                            "/gcroots/tmp")))
-         (catch 'system-error
-           (lambda ()
-             (mkdir root-directory))
-           (lambda args
-             (unless (= EEXIST (system-error-errno args))
-               (error "failed to create remote GC root directory"
-                      root-directory (system-error-errno args)))))
-
-         (catch 'system-error
-           (lambda ()
-             (symlink ,file
-                      (string-append root-directory "/" ,%gc-root-file)))
-           (lambda args
-             ;; If FILE already exists, we can assume that either it's a stale
-             ;; reference (which is fine), or another process is already
-             ;; building the derivation represented by FILE (which is fine
-             ;; too.)  Thus, do nothing in that case.
-             (unless (= EEXIST (system-error-errno args))
-               (apply throw args)))))))
-
-  (let ((pipe (remote-pipe session
-                           `("guile" "-c" ,(object->string script)))))
-    (read-string pipe)
-    (let ((status (channel-get-exit-status pipe)))
-      (close-port pipe)
-      (unless (zero? status)
-        ;; Better be safe than sorry: if we ignore the error here, then FILE
-        ;; may be GC'd just before we start using it.
-        (leave (_ "failed to register GC root for '~a' on '~a' (status: ~a)~%")
-               file (session-get session 'host) status)))))
-
-(define (remove-gc-roots session)
-  "Remove in SESSION the GC roots previously installed with
-'register-gc-root'."
-  (define script
-    `(begin
-       (use-modules (guix config) (ice-9 ftw)
-                    (srfi srfi-1) (srfi srfi-26))
-
-       (let ((root-directory (string-append %state-directory
-                                            "/gcroots/tmp")))
-         (false-if-exception
-          (delete-file
-           (string-append root-directory "/" ,%gc-root-file)))
-
-         ;; These ones were created with 'guix build -r' (there can be more
-         ;; than one in case of multiple-output derivations.)
-         (let ((roots (filter (cut string-prefix? ,%gc-root-file <>)
-                              (scandir "."))))
-           (for-each (lambda (file)
-                       (false-if-exception (delete-file file)))
-                     roots)))))
-
-  (let ((pipe (remote-pipe session
-                           `("guile" "-c" ,(object->string script)))))
-    (read-string pipe)
-    (close-port pipe)))
-
-(define* (offload drv session
-                  #:key print-build-trace? (max-silent-time 3600)
-                  build-timeout (log-port (build-log-port)))
-  "Perform DRV in SESSION, assuming DRV and its prerequisites are available
-there, and write the build log to LOG-PORT.  Return the exit status."
-  ;; Normally DRV has already been protected from GC when it was transferred.
-  ;; The '-r' flag below prevents the build result from being GC'd.
-  (let ((pipe (remote-pipe session
-                           `("guix" "build"
-                             "-r" ,%gc-root-file
-                             ,(format #f "--max-silent-time=~a"
-                                      max-silent-time)
-                             ,@(if build-timeout
-                                   (list (format #f "--timeout=~a"
-                                                 build-timeout))
-                                   '())
-                             ,(derivation-file-name drv))
-
-                           ;; Since 'guix build' writes the build log to its
-                           ;; stderr, everything will go directly to LOG-PORT.
-                           ;; #:error-port log-port ;; FIXME
-                           )))
-    ;; Make standard error visible.
-    (channel-set-stream! pipe 'stderr)
-
-    (let loop ((line (read-line pipe)))
-      (unless (eof-object? line)
-        (display line log-port)
-        (newline log-port)
-        (loop (read-line pipe))))
-
-    (let loop ((status (channel-get-exit-status pipe)))
-      (close-port pipe)
-      status)))
-
 (define* (transfer-and-offload drv machine
                                #:key
                                (inputs '())
@@ -429,99 +371,128 @@ MACHINE."
   (define session
     (open-ssh-session machine))
 
-  (when (begin
-          (register-gc-root (derivation-file-name drv) session)
-          (send-files (cons (derivation-file-name drv) inputs)
-                      session))
-    (format (current-error-port) "offloading '~a' to '~a'...~%"
-            (derivation-file-name drv) (build-machine-name machine))
-    (format (current-error-port) "@ build-remote ~a ~a~%"
-            (derivation-file-name drv) (build-machine-name machine))
-
-    (let ((status (offload drv session
-                           #:print-build-trace? print-build-trace?
-                           #:max-silent-time max-silent-time
-                           #:build-timeout build-timeout)))
-      (if (zero? status)
-          (begin
-            (retrieve-files outputs session)
-            (remove-gc-roots session)
-            (format (current-error-port)
-                    "done with offloaded '~a'~%"
-                    (derivation-file-name drv)))
-          (begin
-            (remove-gc-roots session)
-            (format (current-error-port)
-                    "derivation '~a' offloaded to '~a' failed \
-with exit code ~a~%"
-                    (derivation-file-name drv)
-                    (build-machine-name machine)
-                    status)
-
-            ;; Use exit code 100 for a permanent build failure.  The daemon
-            ;; interprets other non-zero codes as transient build failures.
-            (primitive-exit 100))))))
-
-(define (send-files files session)
-  "Send the subset of FILES that's missing to SESSION's store.  Return #t on
-success, #f otherwise."
-  (define (missing-files files)
-    ;; Return the subset of FILES not already on SESSION.  Use 'head' as a
-    ;; hack to make sure the remote end stops reading when we're done.
-    (let* ((pipe (remote-pipe session
-                              `("guix" "archive" "--missing")
-                              #:quote? #f)))
-      (format pipe "~{~a~%~}" files)
-      (channel-send-eof pipe)
-      (string-tokenize (read-string pipe))))
+  (define store
+    (connect-to-remote-daemon session
+                              (build-machine-daemon-socket machine)))
+
+  (set-build-options store
+                     #:print-build-trace print-build-trace?
+                     #:max-silent-time max-silent-time
+                     #:timeout build-timeout)
+
+  ;; Protect DRV from garbage collection.
+  (add-temp-root store (derivation-file-name drv))
+
+  (send-files (cons (derivation-file-name drv) inputs)
+              store)
+  (format (current-error-port) "offloading '~a' to '~a'...~%"
+          (derivation-file-name drv) (build-machine-name machine))
+  (format (current-error-port) "@ build-remote ~a ~a~%"
+          (derivation-file-name drv) (build-machine-name machine))
+
+  (guard (c ((nix-protocol-error? c)
+             (format (current-error-port)
+                     (_ "derivation '~a' offloaded to '~a' failed: ~a~%")
+                     (derivation-file-name drv)
+                     (build-machine-name machine)
+                     (nix-protocol-error-message c))
+             ;; Use exit code 100 for a permanent build failure.  The daemon
+             ;; interprets other non-zero codes as transient build failures.
+             (primitive-exit 100)))
+    (build-derivations store (list drv)))
+
+  (retrieve-files outputs store)
+  (format (current-error-port) "done with offloaded '~a'~%"
+          (derivation-file-name drv)))
+
+(define (store-import-channel session)
+  "Return an output port to which archives to be exported to SESSION's store
+can be written."
+  ;; Using the 'import-paths' RPC on a remote store would be slow because it
+  ;; makes a round trip every time 32 KiB have been transferred.  This
+  ;; procedure instead opens a separate channel to use the remote
+  ;; 'import-paths' procedure, which consumes all the data in a single round
+  ;; trip.
+  (define import
+    `(begin
+       (use-modules (guix))
+
+       (with-store store
+         (setvbuf (current-input-port) _IONBF)
+         (import-paths store (current-input-port)))))
+
+  (open-remote-output-pipe session
+                           (string-join
+                            `("guile" "-c"
+                              ,(object->string
+                                (object->string import))))))
+
+(define (store-export-channel session files)
+  "Return an input port from which an export of FILES from SESSION's store can
+be read."
+  ;; Same as above: this is more efficient than calling 'export-paths' on a
+  ;; remote store.
+  (define export
+    `(begin
+       (use-modules (guix))
+
+       (with-store store
+         (setvbuf (current-output-port) _IONBF)
+         (export-paths store ',files (current-output-port)))))
+
+  (open-remote-input-pipe session
+                          (string-join
+                           `("guile" "-c"
+                             ,(object->string
+                               (object->string export))))))
 
+(define (send-files files remote)
+  "Send the subset of FILES that's missing to REMOTE, a remote store."
   (with-store store
-    (guard (c ((nix-protocol-error? c)
-               (warning (_ "failed to export files for '~a': ~s~%")
-                        (session-get session 'host) c)
-               #f))
-
-      ;; Compute the subset of FILES missing on SESSION, and send them in
-      ;; topologically sorted order so that they can actually be imported.
-      (let* ((files (missing-files (topologically-sorted store files)))
-             (pipe  (remote-pipe session
-                                 '("guix" "archive" "--import")
-                                 #:quote? #f)))
-        (format #t (_ "sending ~a store files to '~a'...~%")
-                (length files) (session-get session 'host))
-
-        (export-paths store files pipe)
-        (channel-send-eof pipe)
-
-        ;; Wait for the remote process to complete.
-        (let ((status (channel-get-exit-status pipe)))
-          (close pipe)
-          status)))))
-
-(define (retrieve-files files session)
+    ;; Compute the subset of FILES missing on SESSION, and send them in
+    ;; topologically sorted order so that they can actually be imported.
+    (let* ((sorted  (topologically-sorted store files))
+           (session (channel-get-session (nix-server-socket remote)))
+           (node    (make-node session))
+           (missing (node-eval node
+                               `(begin
+                                  (use-modules (guix)
+                                               (srfi srfi-1) (srfi srfi-26))
+
+                                  (with-store store
+                                    (remove (cut valid-path? store <>)
+                                            ',sorted)))))
+           (port    (store-import-channel session)))
+      (format #t (_ "sending ~a store files to '~a'...~%")
+              (length missing) (session-get session 'host))
+
+      (export-paths store missing port)
+
+      ;; Tell the remote process that we're done.  (In theory the
+      ;; end-of-archive mark of 'export-paths' would be enough, but in
+      ;; practice it's not.)
+      (channel-send-eof port)
+
+      ;; Wait for completion of the remote process.
+      (let ((result (zero? (channel-get-exit-status port))))
+        (close-port port)
+        result))))
+
+(define (retrieve-files files remote)
   "Retrieve FILES from SESSION's store, and import them."
-  (define host
-    (session-get session 'host))
-
-  (let ((pipe (remote-pipe session
-                           `("guix" "archive" "--export" ,@files)
-                           #:quote? #f)))
-    (and pipe
-         (with-store store
-           (guard (c ((nix-protocol-error? c)
-                      (warning (_ "failed to import files from '~a': ~s~%")
-                               host c)
-                      #f))
-             (format (current-error-port) "retrieving ~a files from '~a'...~%"
-                     (length files) host)
-
-             ;; We cannot use the 'import-paths' RPC here because we already
-             ;; hold the locks for FILES.
-             (restore-file-set pipe
-                               #:log-port (current-error-port)
-                               #:lock? #f)
-
-             (close-port pipe))))))
+  (let* ((session (channel-get-session (nix-server-socket remote)))
+         (host    (session-get session 'host))
+         (port    (store-export-channel session files)))
+    (format #t (_ "retrieving ~a files from '~a'...~%")
+            (length files) host)
+
+    ;; We cannot use the 'import-paths' RPC here because we already
+    ;; hold the locks for FILES.
+    (let ((result (restore-file-set port
+                                    #:log-port (current-error-port)
+                                    #:lock? #f)))
+      (close-port port)
+      result)))
 
 
 ;;;