From 12129998689648923b58c426362a1bc875da75f9 Mon Sep 17 00:00:00 2001 From: Mark H Weaver Date: Fri, 28 Mar 2014 03:54:01 -0400 Subject: union: Rewrite to be faster; handle symlink/directory conflicts. * guix/build/union.scm: Rewrite; only 'file=?' remains unchanged. Remove 'tree-union' and 'delete-duplicate-leaves' exports. Merge inputs in a breadth-first fashion. Follow symlinks for purposes of making decisions about the merge. * tests/union.scm: Remove tests of 'tree-union' and 'delete-duplicate-leaves'. --- guix/build/union.scm | 252 +++++++++++++++++---------------------------------- tests/union.scm | 41 --------- 2 files changed, 85 insertions(+), 208 deletions(-) diff --git a/guix/build/union.scm b/guix/build/union.scm index 6e2b296d81..c65bea4692 100644 --- a/guix/build/union.scm +++ b/guix/build/union.scm @@ -1,5 +1,6 @@ ;;; GNU Guix --- Functional package management for GNU ;;; Copyright © 2012, 2013, 2014 Ludovic Courtès +;;; Copyright © 2014 Mark H Weaver ;;; ;;; This file is part of GNU Guix. ;;; @@ -17,16 +18,13 @@ ;;; along with GNU Guix. If not, see . (define-module (guix build union) - #:use-module (ice-9 ftw) #:use-module (ice-9 match) #:use-module (ice-9 format) #:use-module (srfi srfi-1) #:use-module (srfi srfi-26) #:use-module (rnrs bytevectors) #:use-module (rnrs io ports) - #:export (tree-union - delete-duplicate-leaves - union-build)) + #:export (union-build)) ;;; Commentary: ;;; @@ -35,72 +33,20 @@ ;;; ;;; Code: -(define (tree-union trees) - "Return a tree that is the union of the trees listed in TREES. Each -tree has the form (PARENT LEAVES ...) or just LEAF, where each leaf is -itself a tree. " - (let loop ((trees trees)) - (match trees - (() ; nothing left - '()) - (_ - (let ((dirs (filter pair? trees)) - (leaves (remove pair? trees))) - `(,@leaves - ,@(fold (lambda (dir result) - (cons `(,dir - ,@(loop - (concatenate - (filter-map (match-lambda - ((head children ...) - (and (equal? head dir) - children))) - dirs)))) - result)) - '() - (delete-duplicates (map car dirs))))))))) - -(define* (delete-duplicate-leaves tree - #:optional - (leaf=? equal?) - (delete-duplicates (match-lambda - ((head _ ...) head)))) - "Delete duplicate leaves from TREE. Two leaves are considered equal -when LEAF=? applied to them returns #t. Each collision (list of leaves -that are LEAF=?) is passed to DELETE-DUPLICATES, which must return a -single leaf." - (let loop ((tree tree)) - (match tree - ((dir children ...) - (let ((dirs (filter pair? children)) - (leaves (remove pair? children))) - (define collisions - (fold (lambda (leaf result) - (define same? - (cut leaf=? leaf <>)) - - (if (any (cut find same? <>) result) - result - (match (filter same? leaves) - ((_) - result) - ((collision ...) - (cons collision result))))) - '() - leaves)) - - (define non-collisions - (filter (lambda (leaf) - (match (filter (cut leaf=? leaf <>) leaves) - ((_) #t) - ((_ _ ..1) #f))) - leaves)) - - `(,dir - ,@non-collisions - ,@(map delete-duplicates collisions) - ,@(map loop dirs)))) - (leaf leaf)))) +(define (files-in-directory dirname) + (let ((dir (opendir dirname))) + (let loop ((files '())) + (match (readdir dir) + ((or "." "..") + (loop files)) + ((? eof-object?) + (closedir dir) + (sort files string) rest) - (format (current-error-port) "warning: collision encountered: ~{~a ~}~%" - lst) - - ;; TODO: Implement smarter strategies. - (format (current-error-port) "warning: arbitrarily choosing ~a~%" - head)) - head))) +the INPUTS." + + (define (symlink* input output) + (format log-port "`~a' ~~> `~a'~%" input output) + (symlink input output)) + + (define (resolve-collisions output dirs files) + (cond ((null? dirs) + ;; The inputs are all files. + (format (current-error-port) + "warning: collision encountered: ~{~a ~}~%" + files) + + (let ((file (first files))) + ;; TODO: Implement smarter strategies. + (format (current-error-port) + "warning: arbitrarily choosing ~a~%" + file) + + (symlink* file output))) + + (else + ;; The inputs are a mixture of files and directories + (error "union-build: collision between file and directories" + `((files ,files) (dirs ,dirs)))))) + + (define (union output inputs) + (match inputs + ((input) + ;; There's only one input, so just make a link. + (symlink* input output)) + (_ + (call-with-values (lambda () (partition file-is-directory? inputs)) + (match-lambda* + ((dirs ()) + ;; All inputs are directories. Create a new directory + ;; where we will merge the input directories. + (mkdir output) + + ;; Build a hash table mapping each file to a list of input + ;; directories containing that file. + (let ((table (make-hash-table))) + + (define (add-to-table! file dir) + (hash-set! table file (cons dir (hash-ref table file '())))) + + ;; Populate the table. + (for-each (lambda (dir) + (for-each (cut add-to-table! <> dir) + (files-in-directory dir))) + dirs) + + ;; Now iterate over the table and recursively + ;; perform a union for each entry. + (hash-for-each (lambda (file dirs-with-file) + (union (string-append output "/" file) + (map (cut string-append <> "/" file) + (reverse dirs-with-file)))) + table))) + + ((() (file (? (cut file=? <> file)) ...)) + ;; There are no directories, and all files have the same contents, + ;; so there's no conflict. + (symlink* file output)) + + ((dirs files) + (resolve-collisions output dirs files))))))) (setvbuf (current-output-port) _IOLBF) (setvbuf (current-error-port) _IOLBF) (when (file-port? log-port) (setvbuf log-port _IOLBF)) - (mkdir output) - (let loop ((tree (delete-duplicate-leaves - (cons "." - (tree-union - (append-map (compose tree-leaves file-tree) - (delete-duplicates directories)))) - leaf=? - resolve-collision)) - (dir '())) - (match tree - ((? string?) - ;; A leaf: create a symlink. - (let* ((dir (string-join dir "/")) - (target (string-append output "/" dir "/" (basename tree)))) - (format log-port "`~a' ~~> `~a'~%" tree target) - (symlink tree target))) - (((? string? subdir) leaves ...) - ;; A sub-directory: create it in OUTPUT, and iterate over LEAVES. - (unless (string=? subdir ".") - (let ((dir (string-join dir "/"))) - (mkdir (string-append output "/" dir "/" subdir)))) - (for-each (cute loop <> `(,@dir ,subdir)) - leaves)) - ((leaves ...) - ;; A series of leaves: iterate over them. - (for-each (cut loop <> dir) leaves))))) + (union output (delete-duplicates inputs))) ;;; union.scm ends here diff --git a/tests/union.scm b/tests/union.scm index 3ebf483efa..f63329a511 100644 --- a/tests/union.scm +++ b/tests/union.scm @@ -43,47 +43,6 @@ (test-begin "union") -(test-equal "tree-union, empty" - '() - (tree-union '())) - -(test-equal "tree-union, leaves only" - '(a b c d) - (tree-union '(a b c d))) - -(test-equal "tree-union, simple" - '((bin ls touch make awk gawk)) - (tree-union '((bin ls touch) - (bin make) - (bin awk gawk)))) - -(test-equal "tree-union, several levels" - '((share (doc (make README) (coreutils README))) - (bin ls touch make)) - (tree-union '((bin ls touch) - (share (doc (coreutils README))) - (bin make) - (share (doc (make README)))))) - -(test-equal "delete-duplicate-leaves, default" - '(bin make touch ls) - (delete-duplicate-leaves '(bin ls make touch ls))) - -(test-equal "delete-duplicate-leaves, file names" - '("doc" ("info" - "/binutils/ld.info" - "/gcc/gcc.info" - "/binutils/standards.info")) - (let ((leaf=? (lambda (a b) - (string=? (basename a) (basename b))))) - (delete-duplicate-leaves '("doc" - ("info" - "/binutils/ld.info" - "/binutils/standards.info" - "/gcc/gcc.info" - "/gcc/standards.info")) - leaf=?))) - (test-skip (if (and %store (false-if-exception (getaddrinfo "www.gnu.org" "80" AI_NUMERICSERV))) -- cgit 1.4.1