diff options
-rw-r--r-- | guix/build/utils.scm | 57 | ||||
-rw-r--r-- | tests/build-utils.scm | 24 |
2 files changed, 69 insertions, 12 deletions
diff --git a/guix/build/utils.scm b/guix/build/utils.scm index f0ea83085e..76180e67e0 100644 --- a/guix/build/utils.scm +++ b/guix/build/utils.scm @@ -2,7 +2,7 @@ ;;; Copyright © 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org> ;;; Copyright © 2013 Andreas Enge <andreas@enge.fr> ;;; Copyright © 2013 Nikita Karetnikov <nikita@karetnikov.org> -;;; Copyright © 2015, 2018 Mark H Weaver <mhw@netris.org> +;;; Copyright © 2015, 2018, 2021 Mark H Weaver <mhw@netris.org> ;;; Copyright © 2018 Arun Isaac <arunisaac@systemreboot.net> ;;; Copyright © 2018, 2019 Ricardo Wurmus <rekado@elephly.net> ;;; Copyright © 2020 Efraim Flashner <efraim@flashner.co.il> @@ -784,6 +784,31 @@ PROC's result is returned." (lambda (key . args) (false-if-exception (delete-file template)))))) +(define (unused-private-use-code-point s) + "Find a code point within a Unicode Private Use Area that is not +present in S, and return the corresponding character object. If one +cannot be found, return false." + (define (scan lo hi) + (and (<= lo hi) + (let ((c (integer->char lo))) + (if (string-index s c) + (scan (+ lo 1) hi) + c)))) + (or (scan #xE000 #xF8FF) + (scan #xF0000 #xFFFFD) + (scan #x100000 #x10FFFD))) + +(define (replace-char c1 c2 s) + "Return a string which is equal to S except with all instances of C1 +replaced by C2. If C1 and C2 are equal, return S." + (if (char=? c1 c2) + s + (string-map (lambda (c) + (if (char=? c c1) + c2 + c)) + s))) + (define (substitute file pattern+procs) "PATTERN+PROCS is a list of regexp/two-argument-procedure pairs. For each line of FILE, and for each PATTERN that it matches, call the corresponding @@ -802,16 +827,26 @@ end of a line; by itself it won't match the terminating newline of a line." (let loop ((line (read-line in 'concat))) (if (eof-object? line) #t - (let ((line (fold (lambda (r+p line) - (match r+p - ((regexp . proc) - (match (list-matches regexp line) - ((and m+ (_ _ ...)) - (proc line m+)) - (_ line))))) - line - rx+proc))) - (display line out) + ;; Work around the fact that Guile's regexp-exec does not handle + ;; NUL characters (a limitation of the underlying GNU libc's + ;; regexec) by temporarily replacing them by an unused private + ;; Unicode code point. + ;; TODO: Use SRFI-115 instead, once available in Guile. + (let* ((nul* (or (and (string-index line #\nul) + (unused-private-use-code-point line)) + #\nul)) + (line* (replace-char #\nul nul* line)) + (line1* (fold (lambda (r+p line) + (match r+p + ((regexp . proc) + (match (list-matches regexp line) + ((and m+ (_ _ ...)) + (proc line m+)) + (_ line))))) + line* + rx+proc)) + (line1 (replace-char nul* #\nul line1*))) + (display line1 out) (loop (read-line in 'concat))))))))) diff --git a/tests/build-utils.scm b/tests/build-utils.scm index 654b480ed9..31be7ff80f 100644 --- a/tests/build-utils.scm +++ b/tests/build-utils.scm @@ -1,6 +1,7 @@ ;;; GNU Guix --- Functional package management for GNU ;;; Copyright © 2012, 2015, 2016, 2019, 2020 Ludovic Courtès <ludo@gnu.org> ;;; Copyright © 2019 Ricardo Wurmus <rekado@elephly.net> +;;; Copyright © 2021 Maxim Cournoyer <maxim.cournoyer@gmail.com> ;;; ;;; This file is part of GNU Guix. ;;; @@ -18,7 +19,7 @@ ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. -(define-module (test-build-utils) +(define-module (test build-utils) #:use-module (guix tests) #:use-module (guix build utils) #:use-module ((guix utils) @@ -241,4 +242,25 @@ print('hello world')")) "/some/other/path"))) #f))))) +(test-equal "substitute*, text contains a NUL byte, UTF-8" + "c\0d" + (with-fluids ((%default-port-encoding "UTF-8") + (%default-port-conversion-strategy 'error)) + ;; The GNU libc is locale sensitive. Depending on the value of LANG, the + ;; test could fail with "string contains #\\nul character: ~S" or "cannot + ;; convert wide string to output locale". + (setlocale LC_ALL "en_US.UTF-8") + (call-with-temporary-output-file + (lambda (file port) + (format port "a\0b") + (flush-output-port port) + + (substitute* file + (("a") "c") + (("b") "d")) + + (with-input-from-file file + (lambda _ + (get-string-all (current-input-port)))))))) + (test-end) |