summary refs log tree commit diff
diff options
context:
space:
mode:
authorLudovic Courtès <ludo@gnu.org>2021-07-27 17:58:40 +0200
committerLudovic Courtès <ludo@gnu.org>2021-07-27 18:26:08 +0200
commit78daf9e02e5bc51f91488d8237cab2050cc060cf (patch)
treeaf92643cf7df04fb742756cd3159cfeb94cc751c
parent5dc4fe1004b093cda46dc249012688f560544ada (diff)
downloadguix-78daf9e02e5bc51f91488d8237cab2050cc060cf.tar.gz
derivations: Make 'coalesce-duplicate-inputs' linear in the number of inputs.
Partly fixes <https://issues.guix.gnu.org/49439>.
Reported by Ricardo Wurmus <rekado@elephly.net>.

When running the command:

  guix environment pigx-scrnaseq --search-paths --no-grafts

this change reduces total heap allocations from 1.4GiB to 717MiB (49%)
and wall-clock time from 7.5s to 5.7s (24%).

Without '--no-grafts', heap allocations go from 2.1GiB to 1.4GiB (33%)
and wall-clock time from 12.1s to 10.9s (10%).

* guix/derivations.scm (coalesce-duplicate-inputs): Rewrite using a hash
table to make it O(N) rather than O(N²).
-rw-r--r--guix/derivations.scm49
1 files changed, 23 insertions, 26 deletions
diff --git a/guix/derivations.scm b/guix/derivations.scm
index 2fe684cc18..33f4dc5d9d 100644
--- a/guix/derivations.scm
+++ b/guix/derivations.scm
@@ -241,32 +241,29 @@ the store."
   "Return a list of inputs, such that when INPUTS contains the same DRV twice,
 they are coalesced, with their sub-derivations merged.  This is needed because
 Nix itself keeps only one of them."
-  (define (find pred lst)                         ;inlinable copy of 'find'
-    (let loop ((lst lst))
-      (match lst
-        (() #f)
-        ((head . tail)
-         (if (pred head) head (loop tail))))))
-
-  (fold (lambda (input result)
-          (match input
-            (($ <derivation-input> (= derivation-file-name path) sub-drvs)
-             ;; XXX: quadratic
-             (match (find (match-lambda
-                            (($ <derivation-input> (= derivation-file-name p)
-                                                   s)
-                             (string=? p path)))
-                          result)
-               (#f
-                (cons input result))
-               ((and dup ($ <derivation-input> drv sub-drvs2))
-                ;; Merge DUP with INPUT.
-                (let ((sub-drvs (delete-duplicates
-                                 (append sub-drvs sub-drvs2))))
-                  (cons (make-derivation-input drv (sort sub-drvs string<?))
-                        (delq dup result))))))))
-        '()
-        inputs))
+  (define table
+    (make-hash-table 25))
+
+  (for-each (lambda (input)
+              (let* ((drv (derivation-input-path input))
+                     (sub-drvs (derivation-input-sub-derivations input)))
+                (match (hash-get-handle table drv)
+                  (#f
+                   (hash-set! table drv input))
+                  ((and handle (key . ($ <derivation-input> drv sub-drvs2)))
+                   ;; Merge DUP with INPUT.
+                   (let* ((sub-drvs (delete-duplicates
+                                     (append sub-drvs sub-drvs2)))
+                          (input
+                           (make-derivation-input drv
+                                                  (sort sub-drvs string<?))))
+                     (set-cdr! handle input))))))
+            inputs)
+
+  (hash-fold (lambda (key input lst)
+               (cons input lst))
+             '()
+             table))
 
 (define* (derivation-prerequisites drv #:optional (cut? (const #f)))
   "Return the list of derivation-inputs required to build DRV, recursively.