summary refs log tree commit diff
diff options
context:
space:
mode:
authorLudovic Courtès <ludo@gnu.org>2019-10-15 12:24:09 +0200
committerLudovic Courtès <ludo@gnu.org>2019-10-15 13:56:41 +0200
commit00d732195812234f578a9513b32010fbe6469cd1 (patch)
tree762089240dd0cc144f07f05c27498edc4c631c48
parente464ac667297d2acf57e52438a39cadc87b95da2 (diff)
downloadguix-00d732195812234f578a9513b32010fbe6469cd1.tar.gz
offload: Set a longer SSH session timeout.
Fixes <https://bugs.gnu.org/37762>.

* guix/scripts/offload.scm (open-ssh-session): Add 'max-silent-time'
parameter.  Add call to 'session-set!' before returning SESSION.
(transfer-and-offload): Pass MAX-SILENT-TIME to 'open-ssh-session'.
(%short-timeout): New variable.
(choose-build-machine): Pass %SHORT-TIMEOUT to 'open-ssh-session'.
(check-machine-availability): Likewise.
(check-machine-status): Likewise.
-rw-r--r--guix/scripts/offload.scm22
1 files changed, 16 insertions, 6 deletions
diff --git a/guix/scripts/offload.scm b/guix/scripts/offload.scm
index bb307cefd1..1384f6b41d 100644
--- a/guix/scripts/offload.scm
+++ b/guix/scripts/offload.scm
@@ -174,7 +174,7 @@ can interpret meaningfully."
 private key from '~a': ~a")
                                          file str))))))))
 
-(define (open-ssh-session machine)
+(define* (open-ssh-session machine #:optional (max-silent-time -1))
   "Open an SSH session for MACHINE and return it.  Throw an error on failure."
   (let ((private (private-key-from-file* (build-machine-private-key machine)))
         (public  (public-key-from-file
@@ -183,7 +183,7 @@ private key from '~a': ~a")
         (session (make-session #:user (build-machine-user machine)
                                #:host (build-machine-name machine)
                                #:port (build-machine-port machine)
-                               #:timeout 10       ;seconds
+                               #:timeout 10       ;initial timeout (seconds)
                                ;; #:log-verbosity 'protocol
                                #:identity (build-machine-private-key machine)
 
@@ -225,6 +225,10 @@ instead of '~a' of type '~a'~%")
            (leave (G_ "SSH public key authentication failed for '~a': ~a~%")
                   (build-machine-name machine) (get-error session))))
 
+       ;; From then on use MAX-SILENT-TIME as the absolute timeout when
+       ;; reading from or write to a channel for this session.
+       (session-set! session 'timeout max-silent-time)
+
        session)
       (x
        ;; Connection failed or timeout expired.
@@ -313,7 +317,7 @@ hook."
 INPUTS to MACHINE; if building DRV succeeds, retrieve all of OUTPUTS from
 MACHINE."
   (define session
-    (open-ssh-session machine))
+    (open-ssh-session machine max-silent-time))
 
   (define store
     (connect-to-remote-daemon session
@@ -472,7 +476,8 @@ slot (which must later be released with 'release-build-slot'), or #f and #f."
        ;; Return the best machine unless it's already overloaded.
        ;; Note: We call 'node-load' only as a last resort because it is
        ;; too costly to call it once for every machine.
-       (let* ((session (false-if-exception (open-ssh-session best)))
+       (let* ((session (false-if-exception (open-ssh-session best
+                                                             %short-timeout)))
               (node    (and session (remote-inferior session)))
               (load    (and node (normalized-load best (node-load node))))
               (space   (and node (node-free-disk-space node))))
@@ -573,6 +578,11 @@ If TIMEOUT is #f, simply evaluate EXP..."
 ;;; Installation tests.
 ;;;
 
+(define %short-timeout
+  ;; Timeout in seconds used on SSH connections where reads and writes
+  ;; shouldn't take long.
+  15)
+
 (define (assert-node-repl node name)
   "Bail out if NODE is not running Guile."
   (match (node-guile-version node)
@@ -658,7 +668,7 @@ machine."
           (length machines) machine-file)
     (let* ((names    (map build-machine-name machines))
            (sockets  (map build-machine-daemon-socket machines))
-           (sessions (map open-ssh-session machines))
+           (sessions (map (cut open-ssh-session <> %short-timeout) machines))
            (nodes    (map remote-inferior sessions)))
       (for-each assert-node-has-guix nodes names)
       (for-each assert-node-repl nodes names)
@@ -682,7 +692,7 @@ machine."
           (length machines) machine-file)
     (for-each (lambda (machine)
                 (define session
-                  (open-ssh-session machine))
+                  (open-ssh-session machine %short-timeout))
 
                 (match (remote-inferior session)
                   (#f