diff options
author | Maxim Cournoyer <maxim.cournoyer@gmail.com> | 2022-08-11 22:59:45 -0400 |
---|---|---|
committer | Maxim Cournoyer <maxim.cournoyer@gmail.com> | 2022-08-12 15:43:48 -0400 |
commit | a6b6b0e89e6a3aefa0241de9f171424ad99be30c (patch) | |
tree | 49f7a18591a9847b1b7478b4aebb7f157d4a0b53 /gnu/packages/ocr.scm | |
parent | ecc2587f1f2c4bd693f1f08a9f681a3d829d0289 (diff) | |
download | guix-a6b6b0e89e6a3aefa0241de9f171424ad99be30c.tar.gz |
gnu: Add tesseract-ocr-tessdata-fast.
* gnu/packages/ocr.scm (tesseract-ocr-tessdata-fast): New variable.
Diffstat (limited to 'gnu/packages/ocr.scm')
-rw-r--r-- | gnu/packages/ocr.scm | 27 |
1 files changed, 27 insertions, 0 deletions
diff --git a/gnu/packages/ocr.scm b/gnu/packages/ocr.scm index e28bd17668..e2c9f561cc 100644 --- a/gnu/packages/ocr.scm +++ b/gnu/packages/ocr.scm @@ -29,6 +29,7 @@ #:use-module (guix gexp) #:use-module (guix git-download) #:use-module (guix build-system cmake) + #:use-module (guix build-system copy) #:use-module (guix build-system gnu) #:use-module (guix build-system python) #:use-module (gnu packages) @@ -74,6 +75,32 @@ feature extraction method. It can read images in PBM, PGM or PPM formats and it produces text in 8-bit or UTF-8 formats.") (license license:gpl3+))) +(define-public tesseract-ocr-tessdata-fast + (package + (name "tesseract-ocr-tessdata-fast") + (version "4.1.0") + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/tesseract-ocr/tessdata_fast") + (commit version))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "1m310cpb87xx8l8q7jy9fvzf6a0m8rm0dmjpbiwhc2mi6w4gn084")))) + (build-system copy-build-system) + (arguments (list #:install-plan #~'(("." "share/tesseract-ocr/tessdata")) + #:phases #~(modify-phases %standard-phases + (add-after 'unpack 'delete-broken-links + (lambda _ + (delete-file "configs") + (delete-file "pdf.ttf")))))) + (home-page "https://github.com/tesseract-ocr/tessdata_fast") + (synopsis "Fast integer versions of trained LSTM models") + (description "This repository contains fast integer versions of trained +models for the Tesseract OCR Engine.") + (license license:asl2.0))) + (define-public tesseract-ocr (package (name "tesseract-ocr") |