summary refs log tree commit diff
diff options
context:
space:
mode:
authorLudovic Courtès <ludo@gnu.org>2020-06-22 12:29:15 +0200
committerLudovic Courtès <ludo@gnu.org>2020-06-22 15:42:55 +0200
commitfe5de925aa0f2854a679cebdea02b03cac561c8f (patch)
tree7c256a5c405b677c7ab7994c06aa20db17c99978
parenta0d419e63955c50c270e0697af3eb74ac321b108 (diff)
downloadguix-fe5de925aa0f2854a679cebdea02b03cac561c8f.tar.gz
deduplicate: Avoid traversing directories twice.
Until now, we'd call (nar-sha256 file) unconditionally.  Thus, if FILE
was a directory, we would traverse it for no reason, and then call
'deduplicate' on FILE, which would again traverse it.

This change also removes redundant (mkdir-p store) calls from the loop,
and avoids 'lstat' calls by using 'scandir*'.

* guix/store/deduplication.scm (deduplicate): Add named loop.  Move
'mkdir-p' outside the loop.  Use 'scandir*' instead of 'scandir'.  Do
not call 'nar-sha256' when FILE has type 'directory.
-rw-r--r--guix/store/deduplication.scm81
1 files changed, 45 insertions, 36 deletions
diff --git a/guix/store/deduplication.scm b/guix/store/deduplication.scm
index 80868692c0..6784ee0b92 100644
--- a/guix/store/deduplication.scm
+++ b/guix/store/deduplication.scm
@@ -23,10 +23,12 @@
 (define-module (guix store deduplication)
   #:use-module (gcrypt hash)
   #:use-module (guix build utils)
+  #:use-module (guix build syscalls)
   #:use-module (guix base32)
   #:use-module (srfi srfi-11)
   #:use-module (rnrs io ports)
   #:use-module (ice-9 ftw)
+  #:use-module (ice-9 match)
   #:use-module (guix serialization)
   #:export (nar-sha256
             deduplicate))
@@ -138,40 +140,47 @@ Note: TARGET, TO-REPLACE, and SWAP-DIRECTORY must be on the same file system."
 replace PATH with a hardlink to the already-existing one.  If not, register
 PATH so that future duplicates can hardlink to it.  PATH is assumed to be
 under STORE."
-  (let* ((links-directory (string-append store "/.links"))
-         (link-file       (string-append links-directory "/"
-                                         (bytevector->nix-base32-string hash))))
-    (mkdir-p links-directory)
-    (if (eq? 'directory (stat:type (lstat path)))
+  (define links-directory
+    (string-append store "/.links"))
+
+  (mkdir-p links-directory)
+  (let loop ((path path)
+             (type (stat:type (lstat path)))
+             (hash hash))
+    (if (eq? 'directory type)
         ;; Can't hardlink directories, so hardlink their atoms.
-        (for-each (lambda (file)
-                    (unless (or (member file '("." ".."))
-                                (and (string=? path store)
-                                     (string=? file ".links")))
-                      (let ((file (string-append path "/" file)))
-                        (deduplicate file (nar-sha256 file)
-                                     #:store store))))
-                  (scandir path))
-        (if (file-exists? link-file)
-            (replace-with-link link-file path
-                               #:swap-directory links-directory)
-            (catch 'system-error
-              (lambda ()
-                (link path link-file))
-              (lambda args
-                (let ((errno (system-error-errno args)))
-                  (cond ((= errno EEXIST)
-                         ;; Someone else put an entry for PATH in
-                         ;; LINKS-DIRECTORY before we could.  Let's use it.
-                         (replace-with-link path link-file
-                                            #:swap-directory links-directory))
-                        ((= errno ENOSPC)
-                         ;; There's not enough room in the directory index for
-                         ;; more entries in .links, but that's fine: we can
-                         ;; just stop.
-                         #f)
-                        ((= errno EMLINK)
-                         ;; PATH has reached the maximum number of links, but
-                         ;; that's OK: we just can't deduplicate it more.
-                         #f)
-                        (else (apply throw args))))))))))
+        (for-each (match-lambda
+                    ((file . properties)
+                     (unless (member file '("." ".."))
+                       (let* ((file (string-append path "/" file))
+                              (type (or (assq-ref properties 'type)
+                                        (stat:type (lstat file)))))
+                         (loop file type
+                               (and (not (eq? 'directory type))
+                                    (nar-sha256 file)))))))
+                  (scandir* path))
+        (let ((link-file (string-append links-directory "/"
+                                        (bytevector->nix-base32-string hash))))
+          (if (file-exists? link-file)
+              (replace-with-link link-file path
+                                 #:swap-directory links-directory)
+              (catch 'system-error
+                (lambda ()
+                  (link path link-file))
+                (lambda args
+                  (let ((errno (system-error-errno args)))
+                    (cond ((= errno EEXIST)
+                           ;; Someone else put an entry for PATH in
+                           ;; LINKS-DIRECTORY before we could.  Let's use it.
+                           (replace-with-link path link-file
+                                              #:swap-directory links-directory))
+                          ((= errno ENOSPC)
+                           ;; There's not enough room in the directory index for
+                           ;; more entries in .links, but that's fine: we can
+                           ;; just stop.
+                           #f)
+                          ((= errno EMLINK)
+                           ;; PATH has reached the maximum number of links, but
+                           ;; that's OK: we just can't deduplicate it more.
+                           #f)
+                          (else (apply throw args)))))))))))