summary refs log tree commit diff
diff options
context:
space:
mode:
authorLudovic Courtès <ludo@gnu.org>2018-12-21 23:12:52 +0100
committerLudovic Courtès <ludo@gnu.org>2018-12-21 23:50:13 +0100
commit63b0c3eaccdf1816b419632cd7fe721934d2eb27 (patch)
treed4ee34fa8abf87572fa1fe4dfd4c85af80952efa
parentbbe66a530a014e8146d63002a5294941e935f863 (diff)
downloadguix-63b0c3eaccdf1816b419632cd7fe721934d2eb27.tar.gz
offload: Skip machines that are low on disk space.
Fixes <https://bugs.gnu.org/33378>.

* guix/scripts/offload.scm (node-free-disk-space): New procedure.
(%minimum-disk-space): New variable.
(choose-build-machine): Call 'node-free-disk-space' and take it into
account in addition to LOAD.
(check-machine-status): Display the free disk space.
-rw-r--r--guix/scripts/offload.scm34
1 files changed, 28 insertions, 6 deletions
diff --git a/guix/scripts/offload.scm b/guix/scripts/offload.scm
index c345d438d1..0bedcb402f 100644
--- a/guix/scripts/offload.scm
+++ b/guix/scripts/offload.scm
@@ -321,6 +321,13 @@ hook."
     (set-port-revealed! port 1)
     port))
 
+(define (node-free-disk-space node)
+  "Return the free disk space, in bytes, in NODE's store."
+  (node-eval node
+             `(begin
+                (use-modules (guix build syscalls))
+                (free-disk-space ,(%store-prefix)))))
+
 (define* (transfer-and-offload drv machine
                                #:key
                                (inputs '())
@@ -392,6 +399,12 @@ MACHINE."
                (build-requirements-features requirements)
                (build-machine-features machine))))
 
+(define %minimum-disk-space
+  ;; Minimum disk space required on the build machine for a build to be
+  ;; offloaded.  This keeps us from offloading to machines that are bound to
+  ;; run out of disk space.
+  (* 100 (expt 2 20)))                            ;100 MiB
+
 (define (node-load node)
   "Return the load on NODE.  Return +∞ if NODE is misbehaving."
   (let ((line (node-eval node
@@ -486,9 +499,10 @@ slot (which must later be released with 'release-build-slot'), or #f and #f."
          ;; too costly to call it once for every machine.
          (let* ((session (false-if-exception (open-ssh-session best)))
                 (node    (and session (make-node session)))
-                (load    (and node (normalized-load best (node-load node)))))
+                (load    (and node (normalized-load best (node-load node))))
+                (space   (and node (node-free-disk-space node))))
            (when session (disconnect! session))
-           (if (and node (< load 2.))
+           (if (and node (< load 2.) (>= space %minimum-disk-space))
                (match others
                  (((machines slots) ...)
                   ;; Release slots from the uninteresting machines.
@@ -498,7 +512,13 @@ slot (which must later be released with 'release-build-slot'), or #f and #f."
                   ;; eventually release it.
                   (values best slot)))
                (begin
-                 ;; BEST is overloaded, so try the next one.
+                 ;; BEST is unsuitable, so try the next one.
+                 (when (and space (< space %minimum-disk-space))
+                   (format (current-error-port)
+                           "skipping machine '~a' because it is low \
+on disk space (~,2f MiB free)~%"
+                           (build-machine-name best)
+                           (/ space (expt 2 20) 1.)))
                  (release-build-slot slot)
                  (loop others)))))
         (()
@@ -694,15 +714,17 @@ machine."
                 (let* ((session (open-ssh-session machine))
                        (node    (make-node session))
                        (uts     (node-eval node '(uname)))
-                       (load    (node-load node)))
+                       (load    (node-load node))
+                       (free    (node-free-disk-space node)))
                   (disconnect! session)
                   (format #t "~a~%  kernel: ~a ~a~%  architecture: ~a~%\
-  host name: ~a~%  normalized load: ~a~%"
+  host name: ~a~%  normalized load: ~a~%  free disk space: ~,2f MiB~%"
                           (build-machine-name machine)
                           (utsname:sysname uts) (utsname:release uts)
                           (utsname:machine uts)
                           (utsname:nodename uts)
-                          load)))
+                          load
+                          (/ free (expt 2 20) 1.))))
               machines)))