summary refs log tree commit diff
path: root/gnu/packages/machine-learning.scm
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/packages/machine-learning.scm')
-rw-r--r--gnu/packages/machine-learning.scm438
1 files changed, 412 insertions, 26 deletions
diff --git a/gnu/packages/machine-learning.scm b/gnu/packages/machine-learning.scm
index a266c70886..7e7bb89943 100644
--- a/gnu/packages/machine-learning.scm
+++ b/gnu/packages/machine-learning.scm
@@ -56,6 +56,7 @@
   #:use-module (gnu packages check)
   #:use-module (gnu packages compression)
   #:use-module (gnu packages cmake)
+  #:use-module (gnu packages cpp)
   #:use-module (gnu packages cran)
   #:use-module (gnu packages databases)
   #:use-module (gnu packages dejagnu)
@@ -64,10 +65,12 @@
   #:use-module (gnu packages graphviz)
   #:use-module (gnu packages gstreamer)
   #:use-module (gnu packages image)
+  #:use-module (gnu packages libffi)
   #:use-module (gnu packages linux)
   #:use-module (gnu packages llvm)
   #:use-module (gnu packages maths)
   #:use-module (gnu packages mpi)
+  #:use-module (gnu packages ninja)
   #:use-module (gnu packages ocaml)
   #:use-module (gnu packages onc-rpc)
   #:use-module (gnu packages parallel)
@@ -86,6 +89,7 @@
   #:use-module (gnu packages statistics)
   #:use-module (gnu packages sqlite)
   #:use-module (gnu packages swig)
+  #:use-module (gnu packages tls)
   #:use-module (gnu packages web)
   #:use-module (gnu packages xml)
   #:use-module (gnu packages xorg)
@@ -660,6 +664,50 @@ standard data types.")
   ;; headers, hence the name change.
   (deprecated-package "python-onnx" onnx))
 
+(define-public onnx-optimizer
+  (package
+    (name "onnx-optimizer")
+    ;; Note: 0.2.x is *more* recent than 1.5.0.
+    (version "0.2.6")
+    (home-page "https://github.com/onnx/optimizer")
+    (source (origin
+              (method git-fetch)
+              (uri (git-reference
+                    (url home-page)
+                    (commit (string-append "v" version))))
+              (sha256
+               (base32
+                "1wkqqdxcxpfbf8zpbdfdd3zz5jkw775g31gyykj11z4y6pp659l6"))
+              (file-name (git-file-name name version))
+              (patches (search-patches "onnx-optimizer-system-library.patch"))
+              (modules '((guix build utils)))
+              (snippet '(delete-file-recursively "third_party"))))
+    (build-system python-build-system)
+    (arguments (package-arguments onnx))          ;reuse build system tweaks
+    (native-inputs
+     `(("cmake" ,cmake)
+       ("python-pytest" ,python-pytest)
+       ("python-pytest-runner" ,python-pytest-runner)
+       ("python-nbval" ,python-nbval)
+       ("python-coverage" ,python-coverage)))
+    (inputs
+     `(("onnx" ,onnx)
+       ("protobuf" ,protobuf)
+       ("pybind11" ,pybind11)))
+    (propagated-inputs
+     `(("python-numpy" ,python-numpy)))
+    (synopsis "Library to optimize ONNX models")
+    (description
+     "This package provides a C++ and Python library for performing arbitrary
+optimizations on ONNX models, as well as a growing list of prepackaged
+optimization passes.
+
+Not all possible optimizations can be directly implemented on ONNX graphs---
+some will need additional backend-specific information---but many can, and the
+aim is to provide all such passes along with ONNX so that they can be re-used
+with a single function call.")
+    (license license:expat)))
+
 (define-public rxcpp
   (package
     (name "rxcpp")
@@ -701,23 +749,22 @@ synchronization, thread-safety, concurrent data structures, and non-blocking
 I/O.")
     (license license:asl2.0)))
 
-(define-public gemmlowp-for-tensorflow
-  ;; The commit hash is taken from "tensorflow/workspace.bzl".
-  (let ((commit "38ebac7b059e84692f53e5938f97a9943c120d98")
-        (revision "2"))
+
+(define-public gemmlowp
+  (let ((commit "f9959600daa42992baace8a49544a00a743ce1b6")
+        (version "0.1")
+        (revision "1"))
     (package
       (name "gemmlowp")
-      (version (git-version "0" revision commit))
+      (version (git-version version revision commit))
+      (home-page "https://github.com/google/gemmlowp")
       (source (origin
-                (method url-fetch)
-                (uri (string-append "https://mirror.bazel.build/"
-                                    "github.com/google/gemmlowp/archive/"
-                                    commit ".zip"))
-                (file-name (string-append "gemmlowp-" version ".zip"))
+                (method git-fetch)
+                (uri (git-reference (url home-page) (commit commit)))
+                (file-name (git-file-name name version))
                 (sha256
                  (base32
-                  "0n56s2g8hrssm4w8qj1v58gfm56a04n9v992ixkmvk6zjiralzxq"))))
-      (build-system cmake-build-system)
+                  "1hzfhlhzcb827aza6a7drydc67dw5fm3qfqilb9ibskan8dsf0c6"))))
       (arguments
        `(#:configure-flags
          (list ,@(match (%current-system)
@@ -737,18 +784,15 @@ I/O.")
                       (inc (string-append out "/include/")))
                  (install-file "../build/libeight_bit_int_gemm.so" lib)
                  (for-each (lambda (dir)
-                             (let ((target (string-append inc "/" dir)))
-                               (mkdir-p target)
+                             (let ((target
+                                    (string-append inc "/gemmlowp/" dir)))
                                (for-each (lambda (h)
                                            (install-file h target))
                                          (find-files (string-append "../" dir)
                                                      "\\.h$"))))
                            '("meta" "profiling" "public" "fixedpoint"
-                             "eight_bit_int_gemm" "internal"))
-                 #t))))))
-      (native-inputs
-       `(("unzip" ,unzip)))
-      (home-page "https://github.com/google/gemmlowp")
+                             "eight_bit_int_gemm" "internal"))))))))
+      (build-system cmake-build-system)
       (synopsis "Small self-contained low-precision GEMM library")
       (description
        "This is a small self-contained low-precision @dfn{general matrix
@@ -758,6 +802,46 @@ at most 8 bits.  To avoid overflow, results are internally accumulated on more
 than 8 bits, and at the end only some significant 8 bits are kept.")
       (license license:asl2.0))))
 
+(define-public gemmlowp-for-tensorflow
+  ;; The commit hash is taken from "tensorflow/workspace.bzl".
+  (let ((commit "38ebac7b059e84692f53e5938f97a9943c120d98")
+        (revision "2"))
+    (package
+      (inherit gemmlowp)
+      (version (git-version "0" revision commit))
+      (source (origin
+                (method url-fetch)
+                (uri (string-append "https://mirror.bazel.build/"
+                                    "github.com/google/gemmlowp/archive/"
+                                    commit ".zip"))
+                (file-name (string-append "gemmlowp-" version ".zip"))
+                (sha256
+                 (base32
+                  "0n56s2g8hrssm4w8qj1v58gfm56a04n9v992ixkmvk6zjiralzxq"))))
+      (arguments
+       (substitute-keyword-arguments (package-arguments gemmlowp)
+         ((#:phases phases)
+          `(modify-phases ,phases
+             (replace 'install
+               (lambda* (#:key outputs #:allow-other-keys)
+                 (let* ((out (assoc-ref outputs "out"))
+                        (lib (string-append out "/lib/"))
+                        (inc (string-append out "/include/")))
+                   (install-file "../build/libeight_bit_int_gemm.so" lib)
+                   (for-each (lambda (dir)
+                               ;; Note: Install headers straight into
+                               ;; $includedir instead of $includedir/gemmlowp.
+                               (let ((target (string-append inc "/" dir)))
+                                 (for-each (lambda (h)
+                                             (install-file h target))
+                                           (find-files (string-append "../" dir)
+                                                       "\\.h$"))))
+                             '("meta" "profiling" "public" "fixedpoint"
+                               "eight_bit_int_gemm" "internal")))))))))
+      (native-inputs
+       `(("unzip" ,unzip)))
+      (properties '((hidden? . #t))))))
+
 (define-public dlib
   (package
     (name "dlib")
@@ -2030,6 +2114,175 @@ together building blocks and a subclassing API with an imperative style for
 advanced research.")
     (license license:asl2.0)))
 
+(define-public tensorflow-lite
+  (package
+    (name "tensorflow-lite")
+    (version "2.5.0")
+    (source
+     (origin
+       (method git-fetch)
+       (uri (git-reference
+             (url "https://github.com/tensorflow/tensorflow")
+             (commit (string-append "v" version))))
+       (file-name (git-file-name name version))
+       (sha256
+        (base32
+         "1jdw2i1rq06zqd6aabh7bbm0avsg4pygnfmd7gviv0blhih9054l"))))
+    (build-system cmake-build-system)
+    (arguments
+     `(#:tests? #false                  ; no "check" target
+       #:build-type "Release"
+       #:configure-flags
+       (list
+        "-DTFLITE_ENABLE_GPU=OFF"
+        "-DTFLITE_ENABLE_RUY=OFF"
+
+        ;; TODO: The build system attempts to build xnnpack from source.  We
+        ;; would like to use our xnnpack package here, but this requires more
+        ;; work.
+        "-DTFLITE_ENABLE_XNNPACK=OFF"
+
+        ;; Pretend we've already fetched abseil.  We won't actually build it
+        ;; but use the existing package.
+        "-Dabseil-cpp_POPULATED=TRUE"
+
+        ;; Don't fetch the sources.  We have already built flatbuffers.
+        "-Dflatbuffers_POPULATED=TRUE"
+
+        "-DFFT2D_SOURCE_DIR=/tmp/fft2d"
+        "-Dneon2sse_SOURCE_DIR=/tmp/neon2sse"
+        "-Dneon2sse_BINARY_DIR=/tmp/neon2sse-bin"
+        "-DFARMHASH_SOURCE_DIR=/tmp/farmhash"
+        "-Dgemmlowp_SOURCE_DIR=/tmp/gemmlowp"
+        (string-append "-DRUY_SOURCE_DIR="
+                       (assoc-ref %build-inputs "ruy-src")))
+       #:phases
+       (modify-phases %standard-phases
+         (add-after 'unpack 'chdir
+           (lambda _ (chdir "tensorflow/lite")))
+         (add-after 'chdir 'copy-sources
+           (lambda* (#:key inputs #:allow-other-keys)
+             ;; Use external cmake finders instead of these stubs that won't
+             ;; find anything but the bundled sources.
+             (delete-file "tools/cmake/modules/Findabsl.cmake")
+             (delete-file "tools/cmake/modules/Findeigen.cmake")
+
+             (substitute* "CMakeLists.txt"
+               (("find_package\\(eigen REQUIRED")
+                "find_package(eigen REQUIRED NAMES Eigen3"))
+             (substitute* "tools/cmake/modules/Findflatbuffers.cmake"
+               (("get_target_property.*")
+                (format #false "set(FLATBUFFERS_INCLUDE_DIRS ~a/include)\n"
+                        (assoc-ref inputs "flatbuffers"))))
+
+             ;; Don't fetch source code; we already have everything we need.
+             (substitute* '("tools/cmake/modules/fft2d.cmake"
+                            "tools/cmake/modules/ruy.cmake"
+                            "tools/cmake/modules/farmhash.cmake"
+                            "tools/cmake/modules/neon2sse.cmake"
+                            "tools/cmake/modules/gemmlowp.cmake")
+               (("OverridableFetchContent_Populate.*") ""))
+
+             (mkdir-p "/tmp/farmhash")
+             (with-directory-excursion "/tmp/farmhash"
+               (invoke "tar" "--strip-components=1"
+                       "-xf" (assoc-ref inputs "farmhash-src")))
+
+             (mkdir-p "/tmp/fft2d")
+             (with-directory-excursion "/tmp/fft2d"
+               (invoke "tar" "--strip-components=1"
+                       "-xf" (assoc-ref inputs "fft2d-src")))
+
+             (copy-recursively (assoc-ref inputs "neon2sse-src")
+                               "/tmp/neon2sse/")
+             (copy-recursively (assoc-ref inputs "gemmlowp-src")
+                               "/tmp/gemmlowp/")))
+         (add-after 'copy-sources 'prepare-shared-library-build
+           (lambda _ (chdir "c")))
+         (replace 'install
+           (lambda* (#:key outputs #:allow-other-keys)
+             (let* ((out (assoc-ref outputs "out"))
+                    (lib (string-append out "/lib"))
+                    (headers (string-append out "/include/tensorflow/lite")))
+               (install-file "../build/libtensorflowlite_c.so" lib)
+               (with-directory-excursion ".."
+                 (for-each
+                  (lambda (file)
+                    (let ((target-dir (string-append headers "/" (dirname file))))
+                      (install-file file target-dir)))
+                  (find-files "." "\\.h$")))))))))
+    (inputs
+     `(("abseil-cpp" ,abseil-cpp)
+       ("eigen" ,eigen-for-tensorflow-lite)
+       ("flatbuffers" ,flatbuffers)
+       ("python" ,python)))
+    (native-inputs
+     `(("pkg-config" ,pkg-config)
+       ("gemmlowp-src"
+        ;; The commit hash is taken from
+        ;; "tensorflow/lite/tools/cmake/modules/gemmlowp.cmake".
+        ,(let ((commit "fda83bdc38b118cc6b56753bd540caa49e570745"))
+           (origin
+             (method git-fetch)
+             (uri (git-reference
+                   (url "https://github.com/google/gemmlowp")
+                   (commit commit)))
+             (file-name (git-file-name "gemmlowp" (string-take commit 8)))
+             (sha256
+              (base32
+               "1sbp8kmr2azwlvfbzryy1frxi99jhsh1nc93bdbxdf8zdgpv0kxl")))))
+       ("neon2sse-src"
+        ,(let ((commit "a1652fd5253afbf3e39357b012974f93511f6108"))
+           (origin
+             (method git-fetch)
+             (uri (git-reference
+                   (url "https://github.com/intel/ARM_NEON_2_x86_SSE")
+                   (commit commit)))
+             (file-name (git-file-name "neon2sse" (string-take commit 8)))
+             (sha256
+              (base32
+               "1q8gkxag9wlnwdwad2pclsrkwzrdjy94hyrkayrsvxyj7szb5y8i")))))
+       ("farmhash-src"
+        ,(let ((commit "816a4ae622e964763ca0862d9dbd19324a1eaf45"))
+           (origin
+             (method url-fetch)
+             (uri (string-append
+                   "https://mirror.bazel.build/github.com/google/farmhash/archive/"
+                   commit ".tar.gz"))
+             (file-name (git-file-name "farmhash" (string-take commit 8)))
+             (sha256
+              (base32
+               "185b2xdxl4d4cnsnv6abg8s22gxvx8673jq2yaq85bz4cdy58q35")))))
+       ("fft2d-src"
+        ,(origin
+           (method url-fetch)
+           (uri (string-append "https://storage.googleapis.com/"
+                               "mirror.tensorflow.org/"
+                               "www.kurims.kyoto-u.ac.jp/~ooura/fft2d.tgz"))
+           (file-name "fft2d.tar.gz")
+           (sha256
+            (base32
+             "1jfflzi74fag9z4qmgwvp90aif4dpbr1657izmxlgvf4hy8fk9xd"))))
+       ("ruy-src"
+        ,(let ((commit "9c56af3fce210a8a103eda19bd6f47c08a9e3d90"))
+           (origin
+             (method git-fetch)
+             (uri (git-reference
+                   (url "https://github.com/google/ruy")
+                   (commit commit)
+                   (recursive? #true)))
+             (file-name (git-file-name "ruy" (string-take commit 8)))
+             (sha256
+              (base32
+               "1cfd5gk6kaj8kbl3h98gx1ap8czd59y6p8qq8nr28fklpyzf5cis")))))))
+    (home-page "https://tensorflow.org")
+    (synopsis "Machine learning framework")
+    (description
+     "TensorFlow is a flexible platform for building and training machine
+learning models.  This package provides the \"lite\" variant for mobile
+devices.")
+    (license license:asl2.0)))
+
 (define-public python-iml
   (package
     (name "python-iml")
@@ -2194,8 +2447,8 @@ that:
 
 (define-public gloo
   (let ((version "0.0.0") ; no proper version tag
-        (commit "ca528e32fea9ca8f2b16053cff17160290fc84ce")
-        (revision "0"))
+        (commit "c22a5cfba94edf8ea4f53a174d38aa0c629d070f")
+        (revision "1"))
     (package
       (name "gloo")
       (version (git-version version revision commit))
@@ -2208,18 +2461,20 @@ that:
          (file-name (git-file-name name version))
          (sha256
           (base32
-           "1q9f80zy75f6njrzrqkmhc0g3qxs4gskr7ns2jdqanxa2ww7a99w"))))
+           "1crmqgybzkgkpbmcx16912gsl5qsj49swa0ikx6mhqgph0chrh11"))))
       (build-system cmake-build-system)
       (native-inputs
        `(("googletest" ,googletest)))
+      (inputs
+       `(("openssl" ,openssl)))
       (arguments
        `(#:configure-flags '("-DBUILD_TEST=1")
          #:phases
          (modify-phases %standard-phases
            (replace 'check
-             (lambda _
-               (invoke "make" "gloo_test")
-               #t)))))
+             (lambda* (#:key tests? #:allow-other-keys)
+               (when tests?
+                 (invoke "make" "gloo_test")))))))
       (synopsis "Collective communications library")
       (description
        "Gloo is a collective communications library.  It comes with a
@@ -2265,7 +2520,7 @@ general non-linear dimension reduction.")
     (package
       (name "xnnpack")
       (version (git-version version revision commit))
-      (home-page "https://github.com/google/XNNPACK")
+      (home-page "https://github.com/google/XNNPACK") ;fork of QNNPACK
       (source (origin
                 (method git-fetch)
                 (uri (git-reference (url home-page) (commit commit)))
@@ -2305,3 +2560,134 @@ high-level machine learning frameworks, such as TensorFlow Lite,
 TensorFlow.js, PyTorch, and MediaPipe.")
       (license license:bsd-3))))
 
+(define-public python-pytorch
+  (package
+    (name "python-pytorch")
+    (version "1.9.0")
+    (source (origin
+              (method git-fetch)
+              (uri (git-reference
+                    (url "https://github.com/pytorch/pytorch")
+                    (commit (string-append "v" version))
+                    (recursive? #t)))
+              (file-name (git-file-name name version))
+              (sha256
+               (base32
+                "0cznsh68hwk5761gv7iijb4g6jgjpvs3bbixwpzzmkbkbn2q96c1"))
+              (patches (search-patches "python-pytorch-system-libraries.patch"
+                                       "python-pytorch-runpath.patch"))
+              (modules '((guix build utils)))
+              (snippet
+               '(begin
+                  ;; XXX: Let's be clear: this package is a bundling fest.  We
+                  ;; delete as much as we can, but there's still a lot left.
+                  (for-each (lambda (directory)
+                              (delete-file-recursively
+                               (string-append "third_party/" directory)))
+                            '("benchmark" "cpuinfo" "eigen"
+
+                              ;; FIXME: QNNPACK (of which XNNPACK is a fork)
+                              ;; needs these.
+                              ;; "FP16" "FXdiv" "gemmlowp" "psimd"
+
+                              "gloo" "googletest" "ios-cmake"
+                              "onnx" "protobuf" "pthreadpool"
+                              "pybind11" "python-enum" "python-peachpy"
+                              "python-six" "tbb" "XNNPACK" "zstd"))
+
+                  ;; Adjust references to the onnx-optimizer headers.
+                  (substitute* "caffe2/onnx/backend.cc"
+                    (("onnx/optimizer/")
+                     "onnxoptimizer/"))))))
+    (build-system python-build-system)
+    (arguments
+     '(#:phases (modify-phases %standard-phases
+                  (add-before 'build 'use-system-libraries
+                    (lambda* (#:key outputs #:allow-other-keys)
+                      ;; Tell 'setup.py' to let 'CMakeLists.txt' know that we
+                      ;; want to use "system libraries" instead of the bundled
+                      ;; ones.
+                      (setenv "USE_SYSTEM_LIBS" "1")
+
+                      ;; XXX: Disable that for simplicity for now.
+                      (setenv "USE_FBGEMM" "0")))
+                  (add-before 'build 'make-things-writable
+                    (lambda _
+                      ;; The 'build_caffe2' function in
+                      ;; 'tools/build_pytorch_libs.py', called from the
+                      ;; top-level 'setup.py', needs write access to this
+                      ;; directory.
+                      (for-each make-file-writable
+                                (find-files "caffe2/proto" "."
+                                            #:directories? #t))))
+                  (replace 'check
+                    (lambda* (#:key inputs outputs tests? #:allow-other-keys)
+                      ;; Run the test suite following the instructions in
+                      ;; 'CONTRIBUTING.md'.  XXX: Unfortunately this doesn't
+                      ;; work, unless you set PYTHONPATH presumably.
+                      (when tests?
+                        (let ((python-site (site-packages inputs outputs)))
+                          (setenv "PYTHONPATH"
+                                  (string-append python-site ":"
+                                                 (getenv "PYTHONPATH")))
+                          (invoke "python" "test/run_test.py")))))
+                  (add-after 'install 'remove-test-executables
+                    (lambda* (#:key inputs outputs #:allow-other-keys)
+                      ;; Remove test executables, but keep other executables
+                      ;; such as 'torch_shm_manager' and and .so files such as
+                      ;; 'libtorch_global_deps.so'.
+                      (let ((python-site (site-packages inputs outputs)))
+                        (for-each delete-file
+                                  (find-files python-site
+                                              "(^test_cpp_rpc|_test)$"))))))
+
+       ;; XXX: Tests attempt to download data such as
+       ;; <https://raw.githubusercontent.com/pytorch/test-infra/master/stats/slow-tests.json>.
+       #:tests? #f))
+    (native-inputs
+     `(("cmake" ,cmake)
+       ("ninja" ,ninja)))
+    (inputs
+     `(("eigen" ,eigen)
+       ;; ("fmt" ,fmt)
+       ("fp16" ,fp16)
+       ("gemmlowp" ,gemmlowp)
+       ("googletest" ,googletest)
+       ("googlebenchmark" ,googlebenchmark)
+       ("gloo" ,gloo)
+       ("openblas" ,openblas)
+       ("openmpi" ,openmpi)
+       ("pthreadpool" ,pthreadpool)
+       ("protobuf" ,protobuf)
+       ("pybind11" ,pybind11)
+       ("sleef" ,sleef)
+       ("xnnpack" ,xnnpack)
+       ("zstd" ,zstd)))
+    (propagated-inputs
+     `(("python-astunparse" ,python-astunparse)
+       ("python-numpy" ,python-numpy)
+       ("python-pyyaml" ,python-pyyaml)
+       ("python-cffi" ,python-cffi)
+       ("python-peachpy" ,python-peachpy)
+       ("python-typing-extensions" ,python-typing-extensions)
+       ("python-future" ,python-future)
+       ("python-six" ,python-six)
+       ("python-requests" ,python-requests)
+       ("onnx" ,onnx)                       ;propagated for its Python modules
+       ("onnx-optimizer" ,onnx-optimizer)
+       ("cpuinfo" ,cpuinfo)))
+    (home-page "https://pytorch.org/")
+    (synopsis "Python library for tensor computation and deep neural networks")
+    (description
+     "PyTorch is a Python package that provides two high-level features:
+
+@itemize
+@item tensor computation (like NumPy) with strong GPU acceleration;
+@item deep neural networks (DNNs) built on a tape-based autograd system.
+@end itemize
+
+You can reuse Python packages such as NumPy, SciPy, and Cython to extend
+PyTorch when needed.
+
+Note: currently this package does not provide GPU support.")
+    (license license:bsd-3)))