summary refs log tree commit diff
path: root/gnu
diff options
context:
space:
mode:
authorMaxim Cournoyer <maxim.cournoyer@gmail.com>2021-06-17 01:22:35 -0400
committerMaxim Cournoyer <maxim.cournoyer@gmail.com>2021-06-29 14:53:21 -0400
commit4f3bdc8f21657dbda857027b3ec8754dd4c7c67b (patch)
treec2a13dca9fdd301c8042696ea5f8f62dcfe43818 /gnu
parent6b0e55cde901dd5f6eae72cee10723b7739cadf7 (diff)
downloadguix-4f3bdc8f21657dbda857027b3ec8754dd4c7c67b.tar.gz
pack: Prevent duplicate files in tar archives.
Tar translate duplicate files in the archive into hard links.  These can cause
problems, as not every tool support them; for example dpkg doesn't.

* gnu/system/file-systems.scm (reduce-directories): New procedure.
(file-prefix?): Lift the restriction on file prefix.  The procedure can be
useful for comparing relative file names.  Adjust doc.
(file-name-depth): New procedure, extracted from ...
(btrfs-store-subvolume-file-name): ... here.
* guix/scripts/pack.scm (self-contained-tarball/builder): Use
reduce-directories.
* tests/file-systems.scm ("reduce-directories"): New test.
Diffstat (limited to 'gnu')
-rw-r--r--gnu/system/file-systems.scm56
1 files changed, 38 insertions, 18 deletions
diff --git a/gnu/system/file-systems.scm b/gnu/system/file-systems.scm
index 464e87cb18..fb87bfc85b 100644
--- a/gnu/system/file-systems.scm
+++ b/gnu/system/file-systems.scm
@@ -55,6 +55,7 @@
             file-system-dependencies
             file-system-location
 
+            reduce-directories
             file-system-type-predicate
             btrfs-subvolume?
             btrfs-store-subvolume-file-name
@@ -231,8 +232,8 @@
   (char-set-complement (char-set #\/)))
 
 (define (file-prefix? file1 file2)
-  "Return #t if FILE1 denotes the name of a file that is a parent of FILE2,
-where both FILE1 and FILE2 are absolute file name.  For example:
+  "Return #t if FILE1 denotes the name of a file that is a parent of FILE2.
+For example:
 
   (file-prefix? \"/gnu\" \"/gnu/store\")
   => #t
@@ -240,19 +241,41 @@ where both FILE1 and FILE2 are absolute file name.  For example:
   (file-prefix? \"/gn\" \"/gnu/store\")
   => #f
 "
-  (and (string-prefix? "/" file1)
-       (string-prefix? "/" file2)
-       (let loop ((file1 (string-tokenize file1 %not-slash))
-                  (file2 (string-tokenize file2 %not-slash)))
-         (match file1
-           (()
-            #t)
-           ((head1 tail1 ...)
-            (match file2
-              ((head2 tail2 ...)
-               (and (string=? head1 head2) (loop tail1 tail2)))
-              (()
-               #f)))))))
+  (let loop ((file1 (string-tokenize file1 %not-slash))
+             (file2 (string-tokenize file2 %not-slash)))
+    (match file1
+      (()
+       #t)
+      ((head1 tail1 ...)
+       (match file2
+         ((head2 tail2 ...)
+          (and (string=? head1 head2) (loop tail1 tail2)))
+         (()
+          #f))))))
+
+(define (file-name-depth file-name)
+  (length (string-tokenize file-name %not-slash)))
+
+(define (reduce-directories file-names)
+  "Eliminate entries in FILE-NAMES that are children of other entries in
+FILE-NAMES.  This is for example useful when passing a list of files to GNU
+tar, which would otherwise descend into each directory passed and archive the
+duplicate files as hard links, which can be undesirable."
+  (let* ((file-names/sorted
+          ;; Ascending sort by file hierarchy depth, then by file name length.
+          (stable-sort (delete-duplicates file-names)
+                       (lambda (f1 f2)
+                         (let ((depth1 (file-name-depth f1))
+                               (depth2 (file-name-depth f2)))
+                           (if (= depth1 depth2)
+                               (string< f1 f2)
+                               (< depth1 depth2)))))))
+    (reverse (fold (lambda (file-name results)
+                     (if (find (cut file-prefix? <> file-name) results)
+                         results        ;parent found -- skipping
+                         (cons file-name results)))
+                   '()
+                   file-names/sorted))))
 
 (define* (file-system-device->string device #:key uuid-type)
   "Return the string representations of the DEVICE field of a <file-system>
@@ -624,9 +647,6 @@ store is located, else #f."
         s
         (string-append "/" s)))
 
-  (define (file-name-depth file-name)
-    (length (string-tokenize file-name %not-slash)))
-
   (and-let* ((btrfs-subvolume-fs (filter btrfs-subvolume? file-systems))
              (btrfs-subvolume-fs*
               (sort btrfs-subvolume-fs