summary refs log tree commit diff
diff options
context:
space:
mode:
authorLudovic Courtès <ludo@gnu.org>2015-03-05 22:00:11 +0100
committerLudovic Courtès <ludo@gnu.org>2015-03-05 22:17:36 +0100
commitce72c780746776a86f59747f5eff8731cb4ff39b (patch)
tree72b3bd889fd71a4eeeca33811a56ec63cec52e1f
parent472e4c430343671a6cb4e5ed392beae04ef09da6 (diff)
downloadguix-ce72c780746776a86f59747f5eff8731cb4ff39b.tar.gz
store: Attempt to decode build logs as UTF-8.
* guix/serialization.scm (read-maybe-utf8-string): New procedure.
* guix/store.scm (process-stderr): Use it for the build log and errors.
* tests/store.scm ("current-build-output-port, UTF-8",
  "current-build-output-port, UTF-8 + garbage"): New tests.
-rw-r--r--guix/serialization.scm18
-rw-r--r--guix/store.scm9
-rw-r--r--tests/store.scm37
3 files changed, 60 insertions, 4 deletions
diff --git a/guix/serialization.scm b/guix/serialization.scm
index da01ff39f5..a99f53ee0b 100644
--- a/guix/serialization.scm
+++ b/guix/serialization.scm
@@ -29,7 +29,8 @@
   #:export (write-int read-int
             write-long-long read-long-long
             write-padding
-            write-string read-string read-latin1-string
+            write-string
+            read-string read-latin1-string read-maybe-utf8-string
             write-string-list read-string-list
             write-string-pairs
             write-store-path read-store-path
@@ -130,6 +131,21 @@
     ;; upgraded to Guile >= 2.0.9.
     (list->string (map integer->char (bytevector->u8-list bv)))))
 
+(define (read-maybe-utf8-string p)
+  "Read a serialized string from port P.  Attempt to decode it as UTF-8 and
+substitute invalid byte sequences with question marks.  This is a
+\"permissive\" UTF-8 decoder."
+  ;; XXX: We rely on the port's decoding mechanism to do permissive decoding
+  ;; and substitute invalid byte sequences with question marks, but this is
+  ;; not very efficient.  Eventually Guile may provide a lightweight
+  ;; permissive UTF-8 decoder.
+  (let* ((bv   (read-byte-string p))
+         (port (with-fluids ((%default-port-encoding "UTF-8")
+                             (%default-port-conversion-strategy
+                              'substitute))
+                 (open-bytevector-input-port bv))))
+    (get-string-all port)))
+
 (define (write-string-list l p)
   (write-int (length l) p)
   (for-each (cut write-string <> p) l))
diff --git a/guix/store.scm b/guix/store.scm
index d88fb3ea54..a3f3cbf43b 100644
--- a/guix/store.scm
+++ b/guix/store.scm
@@ -418,15 +418,18 @@ encoding conversion errors."
              (write-padding len p)
              #f))
           ((= k %stderr-next)
-           ;; Log a string.
-           (let ((s (read-latin1-string p)))
+           ;; Log a string.  Build logs are usually UTF-8-encoded, but they
+           ;; may also contain arbitrary byte sequences that should not cause
+           ;; this to fail.  Thus, use the permissive
+           ;; 'read-maybe-utf8-string'.
+           (let ((s (read-maybe-utf8-string p)))
              (display s (current-build-output-port))
              (when (string-any %newlines s)
                (flush-output-port (current-build-output-port)))
              #f))
           ((= k %stderr-error)
            ;; Report an error.
-           (let ((error  (read-latin1-string p))
+           (let ((error  (read-maybe-utf8-string p))
                  ;; Currently the daemon fails to send a status code for early
                  ;; errors like DB schema version mismatches, so check for EOF.
                  (status (if (and (>= (nix-server-minor-version server) 8)
diff --git a/tests/store.scm b/tests/store.scm
index ee783be846..9ed78be085 100644
--- a/tests/store.scm
+++ b/tests/store.scm
@@ -25,6 +25,7 @@
   #:use-module (guix packages)
   #:use-module (guix derivations)
   #:use-module (guix serialization)
+  #:use-module (guix gexp)
   #:use-module (gnu packages)
   #:use-module (gnu packages bootstrap)
   #:use-module (ice-9 match)
@@ -268,6 +269,42 @@
                        (list a b c d w x y)))
            (lset= string=? s1 s3)))))
 
+(test-assert "current-build-output-port, UTF-8"
+  ;; Are UTF-8 strings in the build log properly interpreted?
+  (string-contains
+   (with-fluids ((%default-port-encoding "UTF-8")) ;for the string port
+     (call-with-output-string
+      (lambda (port)
+        (parameterize ((current-build-output-port port))
+          (let* ((s "Here’s a Greek letter: λ.")
+                 (d (build-expression->derivation
+                     %store "foo" `(display ,s)
+                     #:guile-for-build
+                     (package-derivation s %bootstrap-guile (%current-system)))))
+            (guard (c ((nix-protocol-error? c) #t))
+              (build-derivations %store (list d))))))))
+   "Here’s a Greek letter: λ."))
+
+(test-assert "current-build-output-port, UTF-8 + garbage"
+  ;; What about a mixture of UTF-8 + garbage?
+  (string-contains
+   (with-fluids ((%default-port-encoding "UTF-8")) ;for the string port
+     (call-with-output-string
+      (lambda (port)
+        (parameterize ((current-build-output-port port))
+          (let ((d (build-expression->derivation
+                    %store "foo"
+                    `(begin
+                       (use-modules (rnrs io ports))
+                       (display "garbage: ")
+                       (put-bytevector (current-output-port) #vu8(128))
+                       (display "lambda: λ\n"))
+                     #:guile-for-build
+                     (package-derivation %store %bootstrap-guile))))
+            (guard (c ((nix-protocol-error? c) #t))
+              (build-derivations %store (list d))))))))
+   "garbage: ?lambda: λ"))
+
 (test-assert "log-file, derivation"
   (let* ((b (add-text-to-store %store "build" "echo $foo > $out" '()))
          (s (add-to-store %store "bash" #t "sha256"