diff options
Diffstat (limited to 'gnu/packages/ocr.scm')
-rw-r--r-- | gnu/packages/ocr.scm | 47 |
1 files changed, 28 insertions, 19 deletions
diff --git a/gnu/packages/ocr.scm b/gnu/packages/ocr.scm index 0382e0d869..99ebc9d4f7 100644 --- a/gnu/packages/ocr.scm +++ b/gnu/packages/ocr.scm @@ -5,7 +5,7 @@ ;;; Copyright © 2019 Alex Vong <alexvong1995@gmail.com> ;;; Copyright © 2021 Andy Tai <atai@atai.org> ;;; Copyright © 2021, 2022 Nicolas Goaziou <mail@nicolasgoaziou.fr> -;;; Copyright © 2022 Maxim Cournoyer <maxim.cournoyer@gmail.com> +;;; Copyright © 2022, 2023 Maxim Cournoyer <maxim.cournoyer@gmail.com> ;;; ;;; This file is part of GNU Guix. ;;; @@ -31,7 +31,6 @@ #:use-module (guix build-system cmake) #:use-module (guix build-system copy) #:use-module (guix build-system gnu) - #:use-module (guix build-system python) #:use-module (gnu packages) #:use-module (gnu packages autotools) #:use-module (gnu packages backup) @@ -89,7 +88,7 @@ it produces text in 8-bit or UTF-8 formats.") (base32 "1m310cpb87xx8l8q7jy9fvzf6a0m8rm0dmjpbiwhc2mi6w4gn084")))) (build-system copy-build-system) - (arguments (list #:install-plan #~'(("." "share/tesseract-ocr/tessdata")) + (arguments (list #:install-plan #~'(("." "share/tessdata")) #:phases #~(modify-phases %standard-phases (add-after 'unpack 'delete-broken-links (lambda _ @@ -104,7 +103,7 @@ models for the Tesseract OCR Engine.") (define-public tesseract-ocr (package (name "tesseract-ocr") - (version "5.2.0") + (version "5.3.0") (source (origin (method git-fetch) @@ -114,7 +113,7 @@ models for the Tesseract OCR Engine.") (file-name (git-file-name name version)) (sha256 (base32 - "0dai539h07lqj8lyhznd3wbwdpqr78qrsczq78rsmsryqvmdbyaa")))) + "0vylcba8w5ljnr6pf3ilc2wjq72k660h71b7mlbjz3a2f0x5kr33")))) (build-system gnu-build-system) (arguments (list @@ -132,15 +131,6 @@ models for the Tesseract OCR Engine.") (substitute* "configure.ac" (("AC_SUBST\\(\\[XML_CATALOG_FILES])") "")))) - (add-after 'unpack 'adjust-TESSDATA_PREFIX-macro - (lambda _ - ;; Use a deeper TESSDATA_PREFIX hierarchy so that a more - ;; specific search-path than '/share' can be specified. The - ;; build system uses CPPFLAGS for itself, so we can't simply set - ;; a make flag. - (substitute* "Makefile.am" - (("-DTESSDATA_PREFIX='\"@datadir@\"'") - "-DTESSDATA_PREFIX='\"@datadir@/tesseract-ocr\"'")))) (add-after 'build 'build-training (lambda* (#:key parallel-build? #:allow-other-keys) (define n (if parallel-build? (number->string @@ -156,7 +146,7 @@ models for the Tesseract OCR Engine.") ;; extended via TESSDATA_PREFIX. (lambda* (#:key native-inputs inputs #:allow-other-keys) (define eng.traineddata - "/share/tesseract-ocr/tessdata/eng.traineddata") + "/share/tessdata/eng.traineddata") (install-file (search-input-file (or native-inputs inputs) eng.traineddata) (dirname (string-append #$output @@ -184,7 +174,7 @@ models for the Tesseract OCR Engine.") (list leptonica)) (native-search-paths (list (search-path-specification (variable "TESSDATA_PREFIX") - (files (list "share/tesseract-ocr/tessdata")) + (files (list "share/tessdata")) (separator #f)))) ;single value (home-page "https://github.com/tesseract-ocr/tesseract") (synopsis "Optical character recognition engine") @@ -198,10 +188,29 @@ default. To add support for more languages, the @code{tesseract-ocr-tessdata-fast} package should be installed.") (license license:asl2.0))) +(define-public tesseract-ocr-4 + (package + (inherit tesseract-ocr) + (name "tesseract-ocr") + (version "4.1.3") + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/tesseract-ocr/tesseract") + (commit version))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "1ksds8n26kq0acprjn4ycdr163hr9kmkwij7fc973mrjg7kz0pdi")))) + (propagated-inputs + (modify-inputs (package-propagated-inputs tesseract-ocr) + (replace "leptonica" leptonica-1.80))))) + (define-public gimagereader (package (name "gimagereader") - (version "3.4.0") + (version "3.4.1") (source (origin (method url-fetch) @@ -210,7 +219,7 @@ default. To add support for more languages, the "/download/v" version "/" "gimagereader-" version ".tar.xz")) (sha256 - (base32 "09glxh7b4ivrd4samm67b8k2p0aljiagr83wb8nvy5ps2a9gwp5m")))) + (base32 "1972bvnk2bkgbh70vy2prcmdzf4wlna862p2vja9yjxi2c0scmwc")))) (build-system cmake-build-system) (arguments (list @@ -227,7 +236,7 @@ default. To add support for more languages, the sane-backends qtbase-5 qtspell - quazip-0 + quazip tesseract-ocr)) (home-page "https://github.com/manisandro/gImageReader") (synopsis "Qt front-end to tesseract-ocr") |