summary refs log tree commit diff
diff options
context:
space:
mode:
authorLudovic Courtès <ludo@gnu.org>2018-12-26 17:30:56 +0100
committerLudovic Courtès <ludo@gnu.org>2018-12-26 18:40:49 +0100
commit7f4d102c2fff9ff60cd7bc69f5e7eb694274baae (patch)
tree9e7ccb75968298125143c8172764607876ada364
parentb12f8720f574c75e8b65b8a076e98caa61830b62 (diff)
downloadguix-7f4d102c2fff9ff60cd7bc69f5e7eb694274baae.tar.gz
offload: Remove the "machine choice" lock.
This lock was unnecessary and it led to a contention when many 'guix
offload' processes are polling for available machines.

* guix/scripts/offload.scm (machine-choice-lock-file): Remove.
(choose-build-machine): Remove surrounding 'with-file-lock (machine-lock-file)'.
-rw-r--r--guix/scripts/offload.scm119
1 files changed, 56 insertions, 63 deletions
diff --git a/guix/scripts/offload.scm b/guix/scripts/offload.scm
index dcdccc80e0..f90f9e92fa 100644
--- a/guix/scripts/offload.scm
+++ b/guix/scripts/offload.scm
@@ -453,10 +453,6 @@ of free disk space on '~a'~%")
                  (build-machine-name machine)
                  "." (symbol->string hint) ".lock"))
 
-(define (machine-choice-lock-file)
-  "Return the name of the file used as a lock when choosing a build machine."
-  (string-append %state-directory "/offload/machine-choice.lock"))
-
 (define (random-seed)
   (logxor (getpid) (car (gettimeofday))))
 
@@ -479,67 +475,64 @@ of free disk space on '~a'~%")
 slot (which must later be released with 'release-build-slot'), or #f and #f."
 
   ;; Proceed like this:
-  ;;   1. Acquire the global machine-choice lock.
-  ;;   2. For all MACHINES, attempt to acquire a build slot, and filter out
+  ;;   1. For all MACHINES, attempt to acquire a build slot, and filter out
   ;;      those machines for which we failed.
-  ;;   3. Choose the best machine among those that are left.
-  ;;   4. Release the previously-acquired build slots of the other machines.
-  ;;   5. Release the global machine-choice lock.
-
-  (with-file-lock (machine-choice-lock-file)
-    (define machines+slots
-      (filter-map (lambda (machine)
-                    (let ((slot (acquire-build-slot machine)))
-                      (and slot (list machine slot))))
-                  (shuffle machines)))
-
-    (define (undecorate pred)
-      (lambda (a b)
-        (match a
-          ((machine1 slot1)
-           (match b
-             ((machine2 slot2)
-              (pred machine1 machine2)))))))
-
-    (define (machine-faster? m1 m2)
-      ;; Return #t if M1 is faster than M2.
-      (> (build-machine-speed m1)
-         (build-machine-speed m2)))
-
-    (let loop ((machines+slots
-                (sort machines+slots (undecorate machine-faster?))))
-      (match machines+slots
-        (((best slot) others ...)
-         ;; Return the best machine unless it's already overloaded.
-         ;; Note: We call 'node-load' only as a last resort because it is
-         ;; too costly to call it once for every machine.
-         (let* ((session (false-if-exception (open-ssh-session best)))
-                (node    (and session (remote-inferior session)))
-                (load    (and node (normalized-load best (node-load node))))
-                (space   (and node (node-free-disk-space node))))
-           (when node (close-inferior node))
-           (when session (disconnect! session))
-           (if (and node (< load 2.) (>= space %minimum-disk-space))
-               (match others
-                 (((machines slots) ...)
-                  ;; Release slots from the uninteresting machines.
-                  (for-each release-build-slot slots)
-
-                  ;; The caller must keep SLOT to protect it from GC and to
-                  ;; eventually release it.
-                  (values best slot)))
-               (begin
-                 ;; BEST is unsuitable, so try the next one.
-                 (when (and space (< space %minimum-disk-space))
-                   (format (current-error-port)
-                           "skipping machine '~a' because it is low \
+  ;;   2. Choose the best machine among those that are left.
+  ;;   3. Release the previously-acquired build slots of the other machines.
+
+  (define machines+slots
+    (filter-map (lambda (machine)
+                  (let ((slot (acquire-build-slot machine)))
+                    (and slot (list machine slot))))
+                (shuffle machines)))
+
+  (define (undecorate pred)
+    (lambda (a b)
+      (match a
+        ((machine1 slot1)
+         (match b
+           ((machine2 slot2)
+            (pred machine1 machine2)))))))
+
+  (define (machine-faster? m1 m2)
+    ;; Return #t if M1 is faster than M2.
+    (> (build-machine-speed m1)
+       (build-machine-speed m2)))
+
+  (let loop ((machines+slots
+              (sort machines+slots (undecorate machine-faster?))))
+    (match machines+slots
+      (((best slot) others ...)
+       ;; Return the best machine unless it's already overloaded.
+       ;; Note: We call 'node-load' only as a last resort because it is
+       ;; too costly to call it once for every machine.
+       (let* ((session (false-if-exception (open-ssh-session best)))
+              (node    (and session (remote-inferior session)))
+              (load    (and node (normalized-load best (node-load node))))
+              (space   (and node (node-free-disk-space node))))
+         (when node (close-inferior node))
+         (when session (disconnect! session))
+         (if (and node (< load 2.) (>= space %minimum-disk-space))
+             (match others
+               (((machines slots) ...)
+                ;; Release slots from the uninteresting machines.
+                (for-each release-build-slot slots)
+
+                ;; The caller must keep SLOT to protect it from GC and to
+                ;; eventually release it.
+                (values best slot)))
+             (begin
+               ;; BEST is unsuitable, so try the next one.
+               (when (and space (< space %minimum-disk-space))
+                 (format (current-error-port)
+                         "skipping machine '~a' because it is low \
 on disk space (~,2f MiB free)~%"
-                           (build-machine-name best)
-                           (/ space (expt 2 20) 1.)))
-                 (release-build-slot slot)
-                 (loop others)))))
-        (()
-         (values #f #f))))))
+                         (build-machine-name best)
+                         (/ space (expt 2 20) 1.)))
+               (release-build-slot slot)
+               (loop others)))))
+      (()
+       (values #f #f)))))
 
 (define (call-with-timeout timeout drv thunk)
   "Call THUNK and leave after TIMEOUT seconds.  If TIMEOUT is #f, simply call