summary refs log tree commit diff
path: root/tests/store-deduplication.scm
diff options
context:
space:
mode:
authorLudovic Courtès <ludo@gnu.org>2021-11-13 21:47:15 +0100
committerLudovic Courtès <ludo@gnu.org>2021-11-16 14:34:28 +0100
commit472a0e82a52a3d5d841e1dfad6b13e26082a5750 (patch)
treef159bd736c66d9cd7f23188fd4110727b0787637 /tests/store-deduplication.scm
parentf39397b21041fe418247239f27473aff49a203c9 (diff)
downloadguix-472a0e82a52a3d5d841e1dfad6b13e26082a5750.tar.gz
daemon: Do not deduplicate files smaller than 8 KiB.
Files smaller than 8 KiB typically represent ~70% of the entries in
/gnu/store/.links but only contribute to ~4% of the space savings
afforded by deduplication.

Not considering these files for deduplication speeds up file insertion
in the store and, more importantly, leaves 'removeUnusedLinks' with
fewer entries to traverse, thereby speeding it up proportionally.

Partly fixes <https://issues.guix.gnu.org/24937>.

* config-daemon.ac: Remove symlink hard link check and CAN_LINK_SYMLINK
definition.
* guix/store/deduplication.scm (%deduplication-minimum-size): New
variable.
(deduplicate)[loop]: Do not recurse when FILE's size is below
%DEDUPLICATION-MINIMUM-SIZE.
(dump-port): New procedure.
(dump-file/deduplicate)[hash]: Turn into...
[dump-and-compute-hash]: ... this thunk.
Call 'deduplicate' only when SIZE is greater than
%DEDUPLICATION-MINIMUM-SIZE; otherwise call 'dump-port'.
* nix/libstore/gc.cc (LocalStore::removeUnusedLinks): Drop files where
st.st_size < deduplicationMinSize.
* nix/libstore/local-store.hh (deduplicationMinSize): New declaration.
* nix/libstore/optimise-store.cc (deduplicationMinSize): New variable.
(LocalStore::optimisePath_): Return when PATH is a symlink or smaller
than 'deduplicationMinSize'.
* tests/derivations.scm ("identical files are deduplicated"): Produce
files bigger than %DEDUPLICATION-MINIMUM-SIZE.
* tests/nar.scm ("restore-file-set with directories (signed, valid)"):
Likewise.
* tests/store-deduplication.scm ("deduplicate, below %deduplication-minimum-size"):
New test.
("deduplicate", "deduplicate, ENOSPC"): Produce files bigger than
%DEDUPLICATION-MINIMUM-SIZE.
* tests/store.scm ("substitute, deduplication"): Likewise.
Diffstat (limited to 'tests/store-deduplication.scm')
-rw-r--r--tests/store-deduplication.scm41
1 files changed, 35 insertions, 6 deletions
diff --git a/tests/store-deduplication.scm b/tests/store-deduplication.scm
index b1c2d93bbd..2950fbc1a3 100644
--- a/tests/store-deduplication.scm
+++ b/tests/store-deduplication.scm
@@ -1,5 +1,5 @@
 ;;; GNU Guix --- Functional package management for GNU
-;;; Copyright © 2018, 2020 Ludovic Courtès <ludo@gnu.org>
+;;; Copyright © 2018, 2020-2021 Ludovic Courtès <ludo@gnu.org>
 ;;;
 ;;; This file is part of GNU Guix.
 ;;;
@@ -30,13 +30,40 @@
 
 (test-begin "store-deduplication")
 
+(test-equal "deduplicate, below %deduplication-minimum-size"
+  (list #t (make-list 5 1))
+
+  (call-with-temporary-directory
+   (lambda (store)
+     ;; Note: DATA must be longer than %DEDUPLICATION-MINIMUM-SIZE.
+     (let ((data      "Hello, world!")
+           (identical (map (lambda (n)
+                             (string-append store "/" (number->string n)
+                                            "/a/b/c"))
+                           (iota 5))))
+       (for-each (lambda (file)
+                   (mkdir-p (dirname file))
+                   (call-with-output-file file
+                     (lambda (port)
+                       (put-bytevector port (string->utf8 data)))))
+                 identical)
+
+       (deduplicate store (nar-sha256 store) #:store store)
+
+       ;; (system (string-append "ls -lRia " store))
+       (list (= (length (delete-duplicates
+                         (map (compose stat:ino stat) identical)))
+                (length identical))
+             (map (compose stat:nlink stat) identical))))))
+
 (test-equal "deduplicate"
   (cons* #t #f                                    ;inode comparisons
          2 (make-list 5 6))                       ;'nlink' values
 
   (call-with-temporary-directory
    (lambda (store)
-     (let ((data      (string->utf8 "Hello, world!"))
+     ;; Note: DATA must be longer than %DEDUPLICATION-MINIMUM-SIZE.
+     (let ((data      (string-concatenate (make-list 1000 "Hello, world!")))
            (identical (map (lambda (n)
                              (string-append store "/" (number->string n)
                                             "/a/b/c"))
@@ -46,7 +73,7 @@
                    (mkdir-p (dirname file))
                    (call-with-output-file file
                      (lambda (port)
-                       (put-bytevector port data))))
+                       (put-bytevector port (string->utf8 data)))))
                  identical)
        ;; Make the parent of IDENTICAL read-only.  This should not prevent
        ;; deduplication from inserting its hard link.
@@ -54,7 +81,7 @@
 
        (call-with-output-file unique
          (lambda (port)
-           (put-bytevector port (string->utf8 "This is unique."))))
+           (put-bytevector port (string->utf8 (string-reverse data)))))
 
        (deduplicate store (nar-sha256 store) #:store store)
 
@@ -77,8 +104,10 @@
    (lambda (store)
      (let ((true-link link)
            (links     0)
-           (data1     (string->utf8 "Hello, world!"))
-           (data2     (string->utf8 "Hi, world!"))
+           (data1     (string->utf8
+                       (string-concatenate (make-list 1000 "Hello, world!"))))
+           (data2     (string->utf8
+                       (string-concatenate (make-list 1000 "Hi, world!"))))
            (identical (map (lambda (n)
                              (string-append store "/" (number->string n)
                                             "/a/b/c"))