summary refs log tree commit diff
path: root/gnu/build/file-systems.scm
diff options
context:
space:
mode:
authorDanny Milosavljevic <dannym@scratchpost.org>2020-05-03 21:40:04 +0200
committerDanny Milosavljevic <dannym@scratchpost.org>2020-05-03 23:04:57 +0200
commitbb357c509e1c017e1fef5aa5f4d05beea0c25157 (patch)
treed4c20addb90b3d58a907644e874c4cd7b2086273 /gnu/build/file-systems.scm
parent96e399ee30251a0767a45b2bde3f052b25c62714 (diff)
downloadguix-bb357c509e1c017e1fef5aa5f4d05beea0c25157.tar.gz
file-systems: Fix UTF-16 handling in initrd.
Follow-up to f73f4b3a2d7a313a6cb1667bd69205ea4b09f57c.

* gnu/build/file-systems.scm (bytevector->u16-list): New procedure.
(utf16->string): New procedure.
Diffstat (limited to 'gnu/build/file-systems.scm')
-rw-r--r--gnu/build/file-systems.scm25
1 files changed, 25 insertions, 0 deletions
diff --git a/gnu/build/file-systems.scm b/gnu/build/file-systems.scm
index 4ac672d96f..b920e8fc62 100644
--- a/gnu/build/file-systems.scm
+++ b/gnu/build/file-systems.scm
@@ -110,6 +110,31 @@ NUL terminator, return the size of the bytevector."
               (loop (+ index 2)))
           length))))
 
+(define* (bytevector->u16-list bv endianness #:optional (index 0))
+  (if (< index (bytevector-length bv))
+      (cons (bytevector-u16-ref bv index endianness)
+            (bytevector->u16-list bv endianness (+ index 2)))
+      '()))
+
+;; The initrd doesn't have iconv data, so do the conversion ourselves.
+(define (utf16->string bv endianness)
+  (list->string
+   (map integer->char
+        (reverse
+         (let loop ((remainder (bytevector->u16-list bv endianness))
+                    (result '()))
+             (match remainder
+              (() result)
+              ((a) (cons a result))
+              ((a b x ...)
+               (if (and (>= a #xD800) (< a #xDC00) ; high surrogate
+                        (>= b #xDC00) (< b #xE000)) ; low surrogate
+                   (loop x (cons (+ #x10000
+                                    (* #x400 (- a #xD800))
+                                    (- b #xDC00))
+                                 result))
+                   (loop (cons b x) (cons a result))))))))))
+
 (define (null-terminated-utf16->string bv endianness)
   (utf16->string (sub-bytevector bv 0 (bytevector-utf16-length bv))
                  endianness))