summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--config-daemon.ac18
-rw-r--r--doc/guix.texi69
-rw-r--r--guix/scripts/offload.scm279
-rw-r--r--m4/guix.m418
4 files changed, 213 insertions, 171 deletions
diff --git a/config-daemon.ac b/config-daemon.ac
index 8a3e6d8b60..056c939e39 100644
--- a/config-daemon.ac
+++ b/config-daemon.ac
@@ -128,12 +128,20 @@ if test "x$guix_build_daemon" = "xyes"; then
   dnl 'restore-file-set', which requires unbuffered custom binary input
   dnl ports from Guile >= 2.0.10.)
   GUIX_CHECK_UNBUFFERED_CBIP
-  guix_build_daemon_offload="$ac_cv_guix_cbips_support_setvbuf"
 
-  if test "x$guix_build_daemon_offload" = "xyes"; then
-    AC_DEFINE([HAVE_DAEMON_OFFLOAD_HOOK], [1],
-      [Define if the daemon's 'offload' build hook is being built.])
-  fi
+  dnl Check for Guile-SSH, which is required by 'guix offload'.
+  GUIX_CHECK_GUILE_SSH
+
+  case "x$ac_cv_guix_cbips_support_setvbuf$guix_cv_have_recent_guile_ssh" in
+    xyesyes)
+      guix_build_daemon_offload="yes"
+      AC_DEFINE([HAVE_DAEMON_OFFLOAD_HOOK], [1],
+	[Define if the daemon's 'offload' build hook is being built (requires Guile-SSH).])
+      ;;
+    *)
+      guix_build_daemon_offload="no"
+      ;;
+  esac
 
   dnl Temporary directory used to store the daemon's data.
   GUIX_TEST_ROOT_DIRECTORY
diff --git a/doc/guix.texi b/doc/guix.texi
index ebb138e15d..f1cb007aa9 100644
--- a/doc/guix.texi
+++ b/doc/guix.texi
@@ -567,6 +567,12 @@ guix import}).  It is of
 interest primarily for developers and not for casual users.
 
 @item
+@c Note: We need at least 0.10.2 for 'channel-send-eof'.
+Support for build offloading (@pxref{Daemon Offload Setup}) depends on
+@uref{https://github.com/artyom-poptsov/guile-ssh, Guile-SSH},
+version 0.10.2 or later.
+
+@item
 When @url{http://zlib.net, zlib} is available, @command{guix publish}
 can compress build byproducts (@pxref{Invoking guix publish}).
 @end itemize
@@ -814,9 +820,11 @@ available on the system---making it much harder to view them as
 
 @cindex offloading
 @cindex build hook
-When desired, the build daemon can @dfn{offload}
-derivation builds to other machines
-running Guix, using the @code{offload} @dfn{build hook}.  When that
+When desired, the build daemon can @dfn{offload} derivation builds to
+other machines running Guix, using the @code{offload} @dfn{build
+hook}@footnote{This feature is available only when
+@uref{https://github.com/artyom-poptsov/guile-ssh, Guile-SSH} is
+present.}.  When that
 feature is enabled, a list of user-specified build machines is read from
 @file{/etc/guix/machines.scm}; every time a build is requested, for
 instance via @code{guix build}, the daemon attempts to offload it to one
@@ -832,16 +840,18 @@ The @file{/etc/guix/machines.scm} file typically looks like this:
 (list (build-machine
         (name "eightysix.example.org")
         (system "x86_64-linux")
+        (host-key "ssh-ed25519 AAAAC3Nza@dots{}")
         (user "bob")
-        (speed 2.))    ; incredibly fast!
+        (speed 2.))     ;incredibly fast!
 
       (build-machine
         (name "meeps.example.org")
         (system "mips64el-linux")
+        (host-key "ssh-rsa AAAAB3Nza@dots{}")
         (user "alice")
         (private-key
          (string-append (getenv "HOME")
-                        "/.lsh/identity-for-guix"))))
+                        "/.ssh/identity-for-guix"))))
 @end example
 
 @noindent
@@ -875,31 +885,50 @@ The user account to use when connecting to the remote machine over SSH.
 Note that the SSH key pair must @emph{not} be passphrase-protected, to
 allow non-interactive logins.
 
+@item host-key
+This must be the machine's SSH @dfn{public host key} in OpenSSH format.
+This is used to authenticate the machine when we connect to it.  It is a
+long string that looks like this:
+
+@example
+ssh-ed25519 AAAAC3NzaC@dots{}mde+UhL hint@@example.org
+@end example
+
+If the machine is running the OpenSSH daemon, @command{sshd}, the host
+key can be found in a file such as
+@file{/etc/ssh/ssh_host_ed25519_key.pub}.
+
+If the machine is running the SSH daemon of GNU@tie{}lsh,
+@command{lshd}, the host key is in @file{/etc/lsh/host-key.pub} or a
+similar file.  It can be converted to the OpenSSH format using
+@command{lsh-export-key} (@pxref{Converting keys,,, lsh, LSH Manual}):
+
+@example
+$ lsh-export-key --openssh < /etc/lsh/host-key.pub 
+ssh-rsa AAAAB3NzaC1yc2EAAAAEOp8FoQAAAQEAs1eB46LV@dots{}
+@end example
+
 @end table
 
 A number of optional fields may be specified:
 
-@table @code
-
-@item port
-Port number of SSH server on the machine (default: 22).
+@table @asis
 
-@item private-key
-The SSH private key file to use when connecting to the machine.
+@item @code{port} (default: @code{22})
+Port number of SSH server on the machine.
 
-Currently offloading uses GNU@tie{}lsh as its SSH client
-(@pxref{Invoking lsh,,, GNU lsh Manual}).  Thus, the key file here must
-be an lsh key file.  This may change in the future, though.
+@item @code{private-key} (default: @file{~/.ssh/id_rsa})
+The SSH private key file to use when connecting to the machine, in
+OpenSSH format.
 
-@item parallel-builds
-The number of builds that may run in parallel on the machine (1 by
-default.)
+@item @code{parallel-builds} (default: @code{1})
+The number of builds that may run in parallel on the machine.
 
-@item speed
+@item @code{speed} (default: @code{1.0})
 A ``relative speed factor''.  The offload scheduler will tend to prefer
 machines with a higher speed factor.
 
-@item features
+@item @code{features} (default: @code{'()})
 A list of strings denoting specific features supported by the machine.
 An example is @code{"kvm"} for machines that have the KVM Linux modules
 and corresponding hardware support.  Derivations can request features by
@@ -915,7 +944,7 @@ machines, since offloading works by invoking the @code{guix archive} and
 this is the case by running:
 
 @example
-lsh build-machine guile -c "'(use-modules (guix config))'"
+ssh build-machine guile -c "'(use-modules (guix config))'"
 @end example
 
 There is one last thing to do once @file{machines.scm} is in place.  As
diff --git a/guix/scripts/offload.scm b/guix/scripts/offload.scm
index 33d141e7ef..327c99dfea 100644
--- a/guix/scripts/offload.scm
+++ b/guix/scripts/offload.scm
@@ -17,6 +17,10 @@
 ;;; along with GNU Guix.  If not, see <http://www.gnu.org/licenses/>.
 
 (define-module (guix scripts offload)
+  #:use-module (ssh key)
+  #:use-module (ssh auth)
+  #:use-module (ssh session)
+  #:use-module (ssh channel)
   #:use-module (guix config)
   #:use-module (guix records)
   #:use-module (guix store)
@@ -65,14 +69,13 @@
   (system          build-machine-system)          ; string
   (user            build-machine-user)            ; string
   (private-key     build-machine-private-key      ; file name
-                   (default (user-lsh-private-key)))
+                   (default (user-openssh-private-key)))
+  (host-key        build-machine-host-key)        ; string
   (parallel-builds build-machine-parallel-builds  ; number
                    (default 1))
   (speed           build-machine-speed            ; inexact real
                    (default 1.0))
   (features        build-machine-features         ; list of strings
-                   (default '()))
-  (ssh-options     build-machine-ssh-options      ; list of strings
                    (default '())))
 
 (define-record-type* <build-requirements>
@@ -86,19 +89,11 @@
   ;; File that lists machines available as build slaves.
   (string-append %config-directory "/machines.scm"))
 
-(define %lsh-command
-  "lsh")
-
-(define %lshg-command
-  ;; FIXME: 'lshg' fails to pass large amounts of data, see
-  ;; <http://lists.lysator.liu.se/pipermail/lsh-bugs/2014q1/000639.html>.
-  "lsh")
-
-(define (user-lsh-private-key)
-  "Return the user's default lsh private key, or #f if it could not be
+(define (user-openssh-private-key)
+  "Return the user's default SSH private key, or #f if it could not be
 determined."
   (and=> (getenv "HOME")
-         (cut string-append <> "/.lsh/identity")))
+         (cut string-append <> "/.ssh/id_rsa")))
 
 (define %user-module
   ;; Module in which the machine description file is loaded.
@@ -134,60 +129,79 @@ determined."
          (leave (_ "failed to load machine file '~a': ~s~%")
                 file args))))))
 
-;;; FIXME: The idea was to open the connection to MACHINE once for all, but
-;;; lshg is currently non-functional.
-;; (define (open-ssh-gateway machine)
-;;   "Initiate an SSH connection gateway to MACHINE, and return the PID of the
-;; running lsh gateway upon success, or #f on failure."
-;;   (catch 'system-error
-;;     (lambda ()
-;;       (let* ((port   (open-pipe* OPEN_READ %lsh-command
-;;                                  "-l" (build-machine-user machine)
-;;                                  "-i" (build-machine-private-key machine)
-;;                                  ;; XXX: With lsh 2.1, passing '--write-pid'
-;;                                  ;; last causes the PID not to be printed.
-;;                                  "--write-pid" "--gateway" "--background"
-;;                                  (build-machine-name machine)))
-;;              (line   (read-line port))
-;;              (status (close-pipe port)))
-;;        (if (zero? status)
-;;            (let ((pid (string->number line)))
-;;              (if (integer? pid)
-;;                  pid
-;;                  (begin
-;;                    (warning (_ "'~a' did not write its PID on stdout: ~s~%")
-;;                             %lsh-command line)
-;;                    #f)))
-;;            (begin
-;;              (warning (_ "failed to initiate SSH connection to '~a':\
-;;  '~a' exited with ~a~%")
-;;                       (build-machine-name machine)
-;;                       %lsh-command
-;;                       (status:exit-val status))
-;;              #f))))
-;;     (lambda args
-;;       (leave (_ "failed to execute '~a': ~a~%")
-;;              %lsh-command (strerror (system-error-errno args))))))
-
-(define-syntax with-error-to-port
-  (syntax-rules ()
-    ((_ port exp0 exp ...)
-     (let ((new port)
-           (old (current-error-port)))
-       (dynamic-wind
-         (lambda ()
-           (set-current-error-port new))
-         (lambda ()
-           exp0 exp ...)
-         (lambda ()
-           (set-current-error-port old)))))))
-
-(define* (remote-pipe machine mode command
-                      #:key (error-port (current-error-port)) (quote? #t))
-  "Run COMMAND (a string list) on MACHINE, assuming an lsh gateway has been
-set up.  When QUOTE? is true, perform shell-quotation of all the elements of
-COMMAND.  Return either a pipe opened with MODE, or #f if the lsh client could
-not be started."
+(define (host-key->type+key host-key)
+  "Destructure HOST-KEY, an OpenSSH host key string, and return two values:
+its key type as a symbol, and the actual base64-encoded string."
+  (define (type->symbol type)
+    (and (string-prefix? "ssh-" type)
+         (string->symbol (string-drop type 4))))
+
+  (match (string-tokenize host-key)
+    ((type key _)
+     (values (type->symbol type) key))
+    ((type key)
+     (values (type->symbol type) key))))
+
+(define (private-key-from-file* file)
+  "Like 'private-key-from-file', but raise an error that 'with-error-handling'
+can interpret meaningfully."
+  (catch 'guile-ssh-error
+    (lambda ()
+      (private-key-from-file file))
+    (lambda (key proc str . rest)
+      (raise (condition
+              (&message (message (format #f (_ "failed to load SSH \
+private key from '~a': ~a")
+                                         file str))))))))
+
+(define (open-ssh-session machine)
+  "Open an SSH session for MACHINE and return it.  Throw an error on failure."
+  (let ((private (private-key-from-file* (build-machine-private-key machine)))
+        (public  (public-key-from-file
+                  (string-append (build-machine-private-key machine)
+                                 ".pub")))
+        (session (make-session #:user (build-machine-user machine)
+                               #:host (build-machine-name machine)
+                               #:port (build-machine-port machine)
+                               #:timeout 5        ;seconds
+                               ;; #:log-verbosity 'protocol
+                               #:identity (build-machine-private-key machine)
+
+                               ;; We need lightweight compression when
+                               ;; exchanging full archives.
+                               #:compression "zlib"
+                               #:compression-level 3)))
+    (connect! session)
+
+    ;; Authenticate the server.  XXX: Guile-SSH 0.10.1 doesn't know about
+    ;; ed25519 keys and 'get-key-type' returns #f in that case.
+    (let-values (((server)   (get-server-public-key session))
+                 ((type key) (host-key->type+key
+                              (build-machine-host-key machine))))
+      (unless (and (or (not (get-key-type server))
+                       (eq? (get-key-type server) type))
+                   (string=? (public-key->string server) key))
+        ;; Key mismatch: something's wrong.  XXX: It could be that the server
+        ;; provided its Ed25519 key when we where expecting its RSA key.
+        (leave (_ "server at '~a' returned host key '~a' of type '~a' \
+instead of '~a' of type '~a'~%")
+               (build-machine-name machine)
+               (public-key->string server) (get-key-type server)
+               key type)))
+
+    (let ((auth (userauth-public-key! session private)))
+      (unless (eq? 'success auth)
+        (disconnect! session)
+        (leave (_ "SSH public key authentication failed for '~a': ~a~%")
+               (build-machine-name machine) (get-error session))))
+
+    session))
+
+(define* (remote-pipe machine command
+                      #:key (quote? #t))
+  "Run COMMAND (a list) on MACHINE, and return an open input/output port,
+which is also an SSH channel.  When QUOTE? is true, perform shell-quotation of
+all the elements of COMMAND."
   (define (shell-quote str)
     ;; Sort-of shell-quote STR so it can be passed as an argument to the
     ;; shell.
@@ -195,20 +209,15 @@ not be started."
       (lambda ()
         (write str))))
 
-  ;; Let the child inherit ERROR-PORT.
-  (with-error-to-port error-port
-    (apply open-pipe* mode %lshg-command
-           "-l" (build-machine-user machine)
-           "-p" (number->string (build-machine-port machine))
-
-           ;; XXX: Remove '-i' when %LSHG-COMMAND really is lshg.
-           "-i" (build-machine-private-key machine)
-
-           (append (build-machine-ssh-options machine)
-                   (list (build-machine-name machine))
-                   (if quote?
-                       (map shell-quote command)
-                       command)))))
+  ;; TODO: Use (ssh popen) instead.
+  (let* ((session (open-ssh-session machine))
+         (channel (make-channel session)))
+    (channel-open-session channel)
+    (channel-request-exec channel
+                          (string-join (if quote?
+                                           (map shell-quote command)
+                                           command)))
+    channel))
 
 
 ;;;
@@ -335,10 +344,11 @@ hook."
              (unless (= EEXIST (system-error-errno args))
                (apply throw args)))))))
 
-  (let ((pipe (remote-pipe machine OPEN_READ
+  (let ((pipe (remote-pipe machine
                            `("guile" "-c" ,(object->string script)))))
     (read-string pipe)
-    (let ((status (close-pipe pipe)))
+    (let ((status (channel-get-exit-status pipe)))
+      (close-port pipe)
       (unless (zero? status)
         ;; Better be safe than sorry: if we ignore the error here, then FILE
         ;; may be GC'd just before we start using it.
@@ -367,10 +377,10 @@ hook."
                        (false-if-exception (delete-file file)))
                      roots)))))
 
-  (let ((pipe (remote-pipe machine OPEN_READ
+  (let ((pipe (remote-pipe machine
                            `("guile" "-c" ,(object->string script)))))
     (read-string pipe)
-    (close-pipe pipe)))
+    (close-port pipe)))
 
 (define* (offload drv machine
                   #:key print-build-trace? (max-silent-time 3600)
@@ -384,7 +394,7 @@ there, and write the build log to LOG-PORT.  Return the exit status."
 
   ;; Normally DRV has already been protected from GC when it was transferred.
   ;; The '-r' flag below prevents the build result from being GC'd.
-  (let ((pipe (remote-pipe machine OPEN_READ
+  (let ((pipe (remote-pipe machine
                            `("guix" "build"
                              "-r" ,%gc-root-file
                              ,(format #f "--max-silent-time=~a"
@@ -397,14 +407,20 @@ there, and write the build log to LOG-PORT.  Return the exit status."
 
                            ;; Since 'guix build' writes the build log to its
                            ;; stderr, everything will go directly to LOG-PORT.
-                           #:error-port log-port)))
+                           ;; #:error-port log-port ;; FIXME
+                           )))
+    ;; Make standard error visible.
+    (channel-set-stream! pipe 'stderr)
+
     (let loop ((line (read-line pipe)))
       (unless (eof-object? line)
         (display line log-port)
         (newline log-port)
         (loop (read-line pipe))))
 
-    (close-pipe pipe)))
+    (let loop ((status (channel-get-exit-status pipe)))
+      (close-port pipe)
+      status)))
 
 (define* (transfer-and-offload drv machine
                                #:key
@@ -438,7 +454,7 @@ MACHINE."
 with exit code ~a~%"
                     (derivation-file-name drv)
                     (build-machine-name machine)
-                    (status:exit-val status))
+                    status)
 
             ;; Use exit code 100 for a permanent build failure.  The daemon
             ;; interprets other non-zero codes as transient build failures.
@@ -448,24 +464,14 @@ with exit code ~a~%"
   "Send the subset of FILES that's missing to MACHINE's store.  Return #t on
 success, #f otherwise."
   (define (missing-files files)
-    ;; Return the subset of FILES not already on MACHINE.
-    (let*-values (((files)
-                   (format #f "~{~a~%~}" files))
-                  ((missing pids)
-                   (filtered-port
-                    (append (list (which %lshg-command)
-                                  "-l" (build-machine-user machine)
-                                  "-p" (number->string
-                                        (build-machine-port machine))
-                                  "-i" (build-machine-private-key machine))
-                            (build-machine-ssh-options machine)
-                            (cons (build-machine-name machine)
-                                  '("guix" "archive" "--missing")))
-                    (open-input-string files)))
-                  ((result)
-                   (read-string missing)))
-      (for-each waitpid pids)
-      (string-tokenize result)))
+    ;; Return the subset of FILES not already on MACHINE.  Use 'head' as a
+    ;; hack to make sure the remote end stops reading when we're done.
+    (let* ((pipe (remote-pipe machine
+                              `("guix" "archive" "--missing")
+                              #:quote? #f)))
+      (format pipe "~{~a~%~}" files)
+      (channel-send-eof pipe)
+      (string-tokenize (read-string pipe))))
 
   (with-store store
     (guard (c ((nix-protocol-error? c)
@@ -476,40 +482,28 @@ success, #f otherwise."
 
       ;; Compute the subset of FILES missing on MACHINE, and send them in
       ;; topologically sorted order so that they can actually be imported.
-      ;;
-      ;; To reduce load on the machine that's offloading (since it's typically
-      ;; already quite busy, see hydra.gnu.org), compress with gzip rather
-      ;; than xz: For a compression ratio 2 times larger, it is 20 times
-      ;; faster.
       (let* ((files (missing-files (topologically-sorted store files)))
-             (pipe  (remote-pipe machine OPEN_WRITE
-                                 '("gzip" "-dc" "|"
-                                   "guix" "archive" "--import")
+             (pipe  (remote-pipe machine
+                                 '("guix" "archive" "--import")
                                  #:quote? #f)))
         (format #t (_ "sending ~a store files to '~a'...~%")
                 (length files) (build-machine-name machine))
-        (call-with-compressed-output-port 'gzip pipe
-          (lambda (compressed)
-            (catch 'system-error
-              (lambda ()
-                (export-paths store files compressed))
-              (lambda args
-                (warning (_ "failed while exporting files to '~a': ~a~%")
-                         (build-machine-name machine)
-                         (strerror (system-error-errno args))))))
-          #:options '("--fast"))
-
-        ;; Wait for the 'lsh' process to complete.
-        (zero? (close-pipe pipe))))))
+
+        (export-paths store files pipe)
+        (channel-send-eof pipe)
+
+        ;; Wait for the remote process to complete.
+        (let ((status (channel-get-exit-status pipe)))
+          (close pipe)
+          status)))))
 
 (define (retrieve-files files machine)
   "Retrieve FILES from MACHINE's store, and import them."
   (define host
     (build-machine-name machine))
 
-  (let ((pipe (remote-pipe machine OPEN_READ
-                           `("guix" "archive" "--export" ,@files
-                             "|" "xz" "-c")
+  (let ((pipe (remote-pipe machine
+                           `("guix" "archive" "--export" ,@files)
                            #:quote? #f)))
     (and pipe
          (with-store store
@@ -522,14 +516,11 @@ success, #f otherwise."
 
              ;; We cannot use the 'import-paths' RPC here because we already
              ;; hold the locks for FILES.
-             (call-with-decompressed-port 'xz pipe
-               (lambda (decompressed)
-                 (restore-file-set decompressed
-                                   #:log-port (current-error-port)
-                                   #:lock? #f)))
+             (restore-file-set pipe
+                               #:log-port (current-error-port)
+                               #:lock? #f)
 
-             ;; Wait for the 'lsh' process to complete.
-             (zero? (close-pipe pipe)))))))
+             (close-port pipe))))))
 
 
 ;;;
@@ -547,13 +538,9 @@ success, #f otherwise."
 (define (machine-load machine)
   "Return the load of MACHINE, divided by the number of parallel builds
 allowed on MACHINE."
-  (let* ((pipe   (remote-pipe machine OPEN_READ `("cat" "/proc/loadavg")))
-         (line   (read-line pipe))
-         (status (close-pipe pipe)))
-    (unless (eqv? 0 (status:exit-val status))
-      (warning (_ "failed to obtain load of '~a': SSH client exited with ~a~%")
-               (build-machine-name machine)
-               (status:exit-val status)))
+  (let* ((pipe   (remote-pipe machine '("cat" "/proc/loadavg")))
+         (line   (read-line pipe)))
+    (close-port pipe)
 
     (if (eof-object? line)
         +inf.0    ;MACHINE does not respond, so assume it is infinitely loaded
diff --git a/m4/guix.m4 b/m4/guix.m4
index 6d8ec2e4e0..6630598416 100644
--- a/m4/guix.m4
+++ b/m4/guix.m4
@@ -171,6 +171,24 @@ AC_DEFUN([GUIX_CHECK_UNBUFFERED_CBIP], [
      fi])
 ])
 
+dnl GUIX_CHECK_GUILE_SSH
+dnl
+dnl Check whether a recent-enough Guile-SSH is available.
+AC_DEFUN([GUIX_CHECK_GUILE_SSH], [
+  dnl Check whether 'channel-send-eof' (introduced in 0.10.2) is present.
+  AC_CACHE_CHECK([whether Guile-SSH is available and recent enough],
+    [guix_cv_have_recent_guile_ssh],
+    [GUILE_CHECK([retval],
+      [(and (@ (ssh channel) channel-send-eof)
+            (@ (ssh popen) open-remote-pipe)
+	    (@ (ssh dist node) node-eval))])
+     if test "$retval" = 0; then
+       guix_cv_have_recent_guile_ssh="yes"
+     else
+       guix_cv_have_recent_guile_ssh="no"
+     fi])
+])
+
 dnl GUIX_TEST_ROOT_DIRECTORY
 AC_DEFUN([GUIX_TEST_ROOT_DIRECTORY], [
   AC_CACHE_CHECK([for unit test root directory],