summary refs log tree commit diff
diff options
context:
space:
mode:
authorLudovic Courtès <ludo@gnu.org>2020-06-22 22:57:26 +0200
committerLudovic Courtès <ludo@gnu.org>2020-06-23 00:03:08 +0200
commita524a31de43b330c911fb08ae02fcd880d32aa04 (patch)
treed739815c0597dc159b0b1a41d151c42e42104c23
parent2f562699ea936f9639ccf5deef2e7b458a7426bf (diff)
downloadguix-a524a31de43b330c911fb08ae02fcd880d32aa04.tar.gz
self: Speed up Texinfo cross-reference translation.
Building guix-translated-texinfo.drv goes from 11mn to 1mn50s, most of
which is taken by po4a.

* guix/self.scm (translate-texi-manuals)[build](make-ref-regex): Remove.
(canonicalize-whitespace): New procedure.
(xref-regexp): New variable.
(translate-cross-references): Rewrite to iterate over the
cross-references rather than iterating over the msgids.  Update caller.
-rw-r--r--guix/self.scm105
1 files changed, 67 insertions, 38 deletions
diff --git a/guix/self.scm b/guix/self.scm
index 60fe6e6b01..39dfbaadc0 100644
--- a/guix/self.scm
+++ b/guix/self.scm
@@ -290,6 +290,7 @@ DOMAIN, a gettext domain."
       #~(begin
           (use-modules (guix build utils) (guix build po)
                        (ice-9 match) (ice-9 regex) (ice-9 textual-ports)
+                       (ice-9 vlist)
                        (srfi srfi-1))
 
           (mkdir #$output)
@@ -315,38 +316,69 @@ the result to OUTPUT."
               "-M" "UTF-8" "-L" "UTF-8" "-k" "0" "-f" "texinfo"
               "-m" source "-p" po "-l" output))
 
-          (define (make-ref-regex msgid end)
-            (make-regexp (string-append
-                           "ref\\{"
-                           (string-join (string-split (regexp-quote msgid) #\ )
-                                        "[ \n]+")
-                           end)))
-
-          (define (translate-cross-references content translations)
-            "Take CONTENT, a string representing a .texi file and translate any
-cross-reference in it (@ref, @xref and @pxref) that have a translation in
-TRANSLATIONS, an alist of msgid and msgstr."
-            (fold
-              (lambda (elem content)
-                (match elem
-                  ((msgid . msgstr)
-                   ;; Empty translations and strings containing some special characters
-                   ;; cannot be the name of a section.
-                   (if (or (equal? msgstr "")
-                           (string-any (lambda (chr)
-                                         (member chr '(#\{ #\} #\( #\) #\newline #\,)))
-                                       msgid))
-                       content
-                       ;; Otherwise, they might be the name of a section, so we
-                       ;; need to translate any occurence in @(p?x?)ref{...}.
-                       (let ((regexp1 (make-ref-regex msgid ","))
-                             (regexp2 (make-ref-regex msgid "\\}")))
-                         (regexp-substitute/global
-                           #f regexp2
-                           (regexp-substitute/global
-                             #f regexp1 content 'pre "ref{" msgstr "," 'post)
-                           'pre "ref{" msgstr "}" 'post))))))
-              content translations))
+          (define (canonicalize-whitespace str)
+            ;; Change whitespace (newlines, etc.) in STR to #\space.
+            (string-map (lambda (chr)
+                          (if (char-set-contains? char-set:whitespace chr)
+                              #\space
+                              chr))
+                        str))
+
+          (define xref-regexp
+            ;; Texinfo cross-reference regexp.
+            (make-regexp "@(px|x)?ref\\{([^,}]+)"))
+
+          (define (translate-cross-references texi translations)
+            ;; Translate the cross-references that appear in TEXI, a Texinfo
+            ;; file, using the msgid/msgstr pairs from TRANSLATIONS.
+            (define content
+              (call-with-input-file texi get-string-all))
+
+            (define matches
+              (list-matches xref-regexp content))
+
+            (define translation-map
+              (fold (match-lambda*
+                      (((msgid . str) result)
+                       (vhash-cons msgid str result)))
+                    vlist-null
+                    translations))
+
+            (define translated
+              ;; Iterate over MATCHES and replace cross-references with their
+              ;; translation found in TRANSLATION-MAP.  (We can't use
+              ;; 'substitute*' because matches can span multiple lines.)
+              (let loop ((matches matches)
+                         (offset 0)
+                         (result '()))
+                (match matches
+                  (()
+                   (string-concatenate-reverse
+                    (cons (string-drop content offset) result)))
+                  ((head . tail)
+                   (let ((prefix (match:substring head 1))
+                         (ref    (canonicalize-whitespace (match:substring head 2))))
+                     (define translated
+                       (string-append "@" (or prefix "")
+                                      "ref{"
+                                      (match (vhash-assoc ref translation-map)
+                                        (#f ref)
+                                        ((_ . str) str))))
+
+                     (loop tail
+                           (match:end head)
+                           (append (list translated
+                                         (string-take
+                                          (string-drop content offset)
+                                          (- (match:start head) offset)))
+                                   result)))))))
+
+            (format (current-error-port)
+                    "translated ~a cross-references in '~a'~%"
+                    (length matches) texi)
+            (call-with-output-file texi
+              (lambda (port)
+                (display translated port))))
 
           (define* (translate-texi prefix po lang
                                    #:key (extras '()))
@@ -363,12 +395,9 @@ a list of extra files, such as '(\"contributing\")."
               (for-each (lambda (file)
                           (let* ((texi (string-append file "." lang ".texi"))
                                  (tmp  (string-append texi ".tmp")))
-                            (with-output-to-file texi
-                              (lambda ()
-                                (display
-                                 (translate-cross-references
-                                  (call-with-input-file tmp get-string-all)
-                                  translations))))))
+                            (copy-file tmp texi)
+                            (translate-cross-references texi
+                                                        translations)))
                         (cons prefix extras))))
 
           (define (available-translations directory domain)