summary refs log tree commit diff
path: root/gnu/packages/ocr.scm
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/packages/ocr.scm')
-rw-r--r--gnu/packages/ocr.scm47
1 files changed, 28 insertions, 19 deletions
diff --git a/gnu/packages/ocr.scm b/gnu/packages/ocr.scm
index 0382e0d869..99ebc9d4f7 100644
--- a/gnu/packages/ocr.scm
+++ b/gnu/packages/ocr.scm
@@ -5,7 +5,7 @@
 ;;; Copyright © 2019 Alex Vong <alexvong1995@gmail.com>
 ;;; Copyright © 2021 Andy Tai <atai@atai.org>
 ;;; Copyright © 2021, 2022 Nicolas Goaziou <mail@nicolasgoaziou.fr>
-;;; Copyright © 2022 Maxim Cournoyer <maxim.cournoyer@gmail.com>
+;;; Copyright © 2022, 2023 Maxim Cournoyer <maxim.cournoyer@gmail.com>
 ;;;
 ;;; This file is part of GNU Guix.
 ;;;
@@ -31,7 +31,6 @@
   #:use-module (guix build-system cmake)
   #:use-module (guix build-system copy)
   #:use-module (guix build-system gnu)
-  #:use-module (guix build-system python)
   #:use-module (gnu packages)
   #:use-module (gnu packages autotools)
   #:use-module (gnu packages backup)
@@ -89,7 +88,7 @@ it produces text in 8-bit or UTF-8 formats.")
                (base32
                 "1m310cpb87xx8l8q7jy9fvzf6a0m8rm0dmjpbiwhc2mi6w4gn084"))))
     (build-system copy-build-system)
-    (arguments (list #:install-plan #~'(("." "share/tesseract-ocr/tessdata"))
+    (arguments (list #:install-plan #~'(("." "share/tessdata"))
                      #:phases #~(modify-phases %standard-phases
                                   (add-after 'unpack 'delete-broken-links
                                     (lambda _
@@ -104,7 +103,7 @@ models for the Tesseract OCR Engine.")
 (define-public tesseract-ocr
   (package
     (name "tesseract-ocr")
-    (version "5.2.0")
+    (version "5.3.0")
     (source
      (origin
        (method git-fetch)
@@ -114,7 +113,7 @@ models for the Tesseract OCR Engine.")
        (file-name (git-file-name name version))
        (sha256
         (base32
-         "0dai539h07lqj8lyhznd3wbwdpqr78qrsczq78rsmsryqvmdbyaa"))))
+         "0vylcba8w5ljnr6pf3ilc2wjq72k660h71b7mlbjz3a2f0x5kr33"))))
     (build-system gnu-build-system)
     (arguments
      (list
@@ -132,15 +131,6 @@ models for the Tesseract OCR Engine.")
               (substitute* "configure.ac"
                 (("AC_SUBST\\(\\[XML_CATALOG_FILES])")
                  ""))))
-          (add-after 'unpack 'adjust-TESSDATA_PREFIX-macro
-            (lambda _
-              ;; Use a deeper TESSDATA_PREFIX hierarchy so that a more
-              ;; specific search-path than '/share' can be specified.  The
-              ;; build system uses CPPFLAGS for itself, so we can't simply set
-              ;; a make flag.
-              (substitute* "Makefile.am"
-                (("-DTESSDATA_PREFIX='\"@datadir@\"'")
-                 "-DTESSDATA_PREFIX='\"@datadir@/tesseract-ocr\"'"))))
           (add-after 'build 'build-training
             (lambda* (#:key parallel-build? #:allow-other-keys)
               (define n (if parallel-build? (number->string
@@ -156,7 +146,7 @@ models for the Tesseract OCR Engine.")
             ;; extended via TESSDATA_PREFIX.
             (lambda* (#:key native-inputs inputs #:allow-other-keys)
               (define eng.traineddata
-                "/share/tesseract-ocr/tessdata/eng.traineddata")
+                "/share/tessdata/eng.traineddata")
               (install-file (search-input-file (or native-inputs inputs)
                                                eng.traineddata)
                             (dirname (string-append #$output
@@ -184,7 +174,7 @@ models for the Tesseract OCR Engine.")
      (list leptonica))
     (native-search-paths (list (search-path-specification
                                 (variable "TESSDATA_PREFIX")
-                                (files (list "share/tesseract-ocr/tessdata"))
+                                (files (list "share/tessdata"))
                                 (separator #f)))) ;single value
     (home-page "https://github.com/tesseract-ocr/tesseract")
     (synopsis "Optical character recognition engine")
@@ -198,10 +188,29 @@ default.  To add support for more languages, the
 @code{tesseract-ocr-tessdata-fast} package should be installed.")
     (license license:asl2.0)))
 
+(define-public tesseract-ocr-4
+  (package
+    (inherit tesseract-ocr)
+    (name "tesseract-ocr")
+    (version "4.1.3")
+    (source
+     (origin
+       (method git-fetch)
+       (uri (git-reference
+             (url "https://github.com/tesseract-ocr/tesseract")
+             (commit version)))
+       (file-name (git-file-name name version))
+       (sha256
+        (base32
+         "1ksds8n26kq0acprjn4ycdr163hr9kmkwij7fc973mrjg7kz0pdi"))))
+    (propagated-inputs
+     (modify-inputs (package-propagated-inputs tesseract-ocr)
+       (replace "leptonica" leptonica-1.80)))))
+
 (define-public gimagereader
   (package
     (name "gimagereader")
-    (version "3.4.0")
+    (version "3.4.1")
     (source
      (origin
        (method url-fetch)
@@ -210,7 +219,7 @@ default.  To add support for more languages, the
              "/download/v" version "/"
              "gimagereader-" version ".tar.xz"))
        (sha256
-        (base32 "09glxh7b4ivrd4samm67b8k2p0aljiagr83wb8nvy5ps2a9gwp5m"))))
+        (base32 "1972bvnk2bkgbh70vy2prcmdzf4wlna862p2vja9yjxi2c0scmwc"))))
     (build-system cmake-build-system)
     (arguments
      (list
@@ -227,7 +236,7 @@ default.  To add support for more languages, the
            sane-backends
            qtbase-5
            qtspell
-           quazip-0
+           quazip
            tesseract-ocr))
     (home-page "https://github.com/manisandro/gImageReader")
     (synopsis "Qt front-end to tesseract-ocr")