summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--guix/build/utils.scm57
-rw-r--r--tests/build-utils.scm24
2 files changed, 69 insertions, 12 deletions
diff --git a/guix/build/utils.scm b/guix/build/utils.scm
index f0ea83085e..76180e67e0 100644
--- a/guix/build/utils.scm
+++ b/guix/build/utils.scm
@@ -2,7 +2,7 @@
 ;;; Copyright © 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
 ;;; Copyright © 2013 Andreas Enge <andreas@enge.fr>
 ;;; Copyright © 2013 Nikita Karetnikov <nikita@karetnikov.org>
-;;; Copyright © 2015, 2018 Mark H Weaver <mhw@netris.org>
+;;; Copyright © 2015, 2018, 2021 Mark H Weaver <mhw@netris.org>
 ;;; Copyright © 2018 Arun Isaac <arunisaac@systemreboot.net>
 ;;; Copyright © 2018, 2019 Ricardo Wurmus <rekado@elephly.net>
 ;;; Copyright © 2020 Efraim Flashner <efraim@flashner.co.il>
@@ -784,6 +784,31 @@ PROC's result is returned."
       (lambda (key . args)
         (false-if-exception (delete-file template))))))
 
+(define (unused-private-use-code-point s)
+  "Find a code point within a Unicode Private Use Area that is not
+present in S, and return the corresponding character object.  If one
+cannot be found, return false."
+  (define (scan lo hi)
+    (and (<= lo hi)
+         (let ((c (integer->char lo)))
+           (if (string-index s c)
+               (scan (+ lo 1) hi)
+               c))))
+  (or (scan   #xE000   #xF8FF)
+      (scan  #xF0000  #xFFFFD)
+      (scan #x100000 #x10FFFD)))
+
+(define (replace-char c1 c2 s)
+  "Return a string which is equal to S except with all instances of C1
+replaced by C2.  If C1 and C2 are equal, return S."
+  (if (char=? c1 c2)
+      s
+      (string-map (lambda (c)
+                    (if (char=? c c1)
+                        c2
+                        c))
+                  s)))
+
 (define (substitute file pattern+procs)
   "PATTERN+PROCS is a list of regexp/two-argument-procedure pairs.  For each
 line of FILE, and for each PATTERN that it matches, call the corresponding
@@ -802,16 +827,26 @@ end of a line; by itself it won't match the terminating newline of a line."
         (let loop ((line (read-line in 'concat)))
           (if (eof-object? line)
               #t
-              (let ((line (fold (lambda (r+p line)
-                                  (match r+p
-                                    ((regexp . proc)
-                                     (match (list-matches regexp line)
-                                       ((and m+ (_ _ ...))
-                                        (proc line m+))
-                                       (_ line)))))
-                                line
-                                rx+proc)))
-                (display line out)
+              ;; Work around the fact that Guile's regexp-exec does not handle
+              ;; NUL characters (a limitation of the underlying GNU libc's
+              ;; regexec) by temporarily replacing them by an unused private
+              ;; Unicode code point.
+              ;; TODO: Use SRFI-115 instead, once available in Guile.
+              (let* ((nul* (or (and (string-index line #\nul)
+                                    (unused-private-use-code-point line))
+                               #\nul))
+                     (line* (replace-char #\nul nul* line))
+                     (line1* (fold (lambda (r+p line)
+                                     (match r+p
+                                       ((regexp . proc)
+                                        (match (list-matches regexp line)
+                                          ((and m+ (_ _ ...))
+                                           (proc line m+))
+                                          (_ line)))))
+                                   line*
+                                   rx+proc))
+                     (line1 (replace-char nul* #\nul line1*)))
+                (display line1 out)
                 (loop (read-line in 'concat)))))))))
 
 
diff --git a/tests/build-utils.scm b/tests/build-utils.scm
index 654b480ed9..31be7ff80f 100644
--- a/tests/build-utils.scm
+++ b/tests/build-utils.scm
@@ -1,6 +1,7 @@
 ;;; GNU Guix --- Functional package management for GNU
 ;;; Copyright © 2012, 2015, 2016, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
 ;;; Copyright © 2019 Ricardo Wurmus <rekado@elephly.net>
+;;; Copyright © 2021 Maxim Cournoyer <maxim.cournoyer@gmail.com>
 ;;;
 ;;; This file is part of GNU Guix.
 ;;;
@@ -18,7 +19,7 @@
 ;;; along with GNU Guix.  If not, see <http://www.gnu.org/licenses/>.
 
 
-(define-module (test-build-utils)
+(define-module (test build-utils)
   #:use-module (guix tests)
   #:use-module (guix build utils)
   #:use-module ((guix utils)
@@ -241,4 +242,25 @@ print('hello world')"))
                                            "/some/other/path")))
          #f)))))
 
+(test-equal "substitute*, text contains a NUL byte, UTF-8"
+  "c\0d"
+  (with-fluids ((%default-port-encoding "UTF-8")
+                (%default-port-conversion-strategy 'error))
+    ;; The GNU libc is locale sensitive.  Depending on the value of LANG, the
+    ;; test could fail with "string contains #\\nul character: ~S" or "cannot
+    ;; convert wide string to output locale".
+    (setlocale LC_ALL "en_US.UTF-8")
+    (call-with-temporary-output-file
+     (lambda (file port)
+       (format port "a\0b")
+       (flush-output-port port)
+
+       (substitute* file
+         (("a") "c")
+         (("b") "d"))
+
+       (with-input-from-file file
+         (lambda _
+           (get-string-all (current-input-port))))))))
+
 (test-end)