summary refs log tree commit diff
path: root/gnu/packages/machine-learning.scm
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/packages/machine-learning.scm')
-rw-r--r--gnu/packages/machine-learning.scm451
1 files changed, 451 insertions, 0 deletions
diff --git a/gnu/packages/machine-learning.scm b/gnu/packages/machine-learning.scm
index 5373020a77..00662fb26d 100644
--- a/gnu/packages/machine-learning.scm
+++ b/gnu/packages/machine-learning.scm
@@ -49,6 +49,7 @@
   #:use-module (gnu packages check)
   #:use-module (gnu packages compression)
   #:use-module (gnu packages cran)
+  #:use-module (gnu packages databases)
   #:use-module (gnu packages dejagnu)
   #:use-module (gnu packages gcc)
   #:use-module (gnu packages glib)
@@ -65,7 +66,9 @@
   #:use-module (gnu packages python)
   #:use-module (gnu packages python-web)
   #:use-module (gnu packages python-xyz)
+  #:use-module (gnu packages serialization)
   #:use-module (gnu packages statistics)
+  #:use-module (gnu packages sqlite)
   #:use-module (gnu packages swig)
   #:use-module (gnu packages tls)
   #:use-module (gnu packages web)
@@ -1303,3 +1306,451 @@ for load balancing, tracing, health checking and authentication.  It is also
 applicable in last mile of distributed computing to connect devices, mobile
 applications and browsers to backend services.")
     (license license:asl2.0)))
+
+;; Note that Tensorflow includes a "third_party" directory, which seems to not
+;; only contain modified subsets of upstream library source code, but also
+;; adapter headers provided by Google (such as the fft.h header, which is not
+;; part of the upstream project code).  The Tensorflow code includes headers
+;; from the "third_party" directory.  It does not look like we can replace
+;; these headers with unmodified upstream files, so we keep them.
+(define-public tensorflow
+  (package
+    (name "tensorflow")
+    (version "1.9.0")
+    (source
+     (origin
+       (method git-fetch)
+       (uri (git-reference
+             (url "https://github.com/tensorflow/tensorflow.git")
+             (commit (string-append "v" version))))
+       (file-name (string-append "tensorflow-" version "-checkout"))
+       (sha256
+        (base32
+         "0a9kwha395g3wgxfwln5j8vn9nkspmd75xldrlqdq540w996g8xa"))))
+    (build-system cmake-build-system)
+    (arguments
+     `(#:tests? #f                      ; no "check" target
+       #:build-type "Release"
+       #:configure-flags
+       (let ((protobuf (assoc-ref %build-inputs "protobuf"))
+             (protobuf:native (assoc-ref %build-inputs "protobuf:native"))
+             (jsoncpp (assoc-ref %build-inputs "jsoncpp"))
+             (snappy (assoc-ref %build-inputs "snappy"))
+             (sqlite (assoc-ref %build-inputs "sqlite")))
+         (list
+          ;; Use protobuf from Guix
+          (string-append "-Dprotobuf_STATIC_LIBRARIES="
+                         protobuf "/lib/libprotobuf.so")
+          (string-append "-DPROTOBUF_PROTOC_EXECUTABLE="
+                         protobuf:native "/bin/protoc")
+
+          ;; Use snappy from Guix
+          (string-append "-Dsnappy_STATIC_LIBRARIES="
+                         snappy "/lib/libsnappy.so")
+          ;; Yes, this is not actually the include directory but a prefix...
+          (string-append "-Dsnappy_INCLUDE_DIR=" snappy)
+
+          ;; Use jsoncpp from Guix
+          (string-append "-Djsoncpp_STATIC_LIBRARIES="
+                         jsoncpp "/lib/libjsoncpp.so")
+          ;; Yes, this is not actually the include directory but a prefix...
+          (string-append "-Djsoncpp_INCLUDE_DIR=" jsoncpp)
+
+          ;; Use sqlite from Guix
+          (string-append "-Dsqlite_STATIC_LIBRARIES="
+                         sqlite "/lib/libsqlite.a")
+
+          ;; Use system libraries wherever possible.  Currently, this
+          ;; only affects zlib.
+          "-Dsystemlib_ALL=ON"
+          "-Dtensorflow_ENABLE_POSITION_INDEPENDENT_CODE=ON"
+          "-Dtensorflow_BUILD_SHARED_LIB=ON"
+          "-Dtensorflow_OPTIMIZE_FOR_NATIVE_ARCH=OFF"
+          "-Dtensorflow_ENABLE_SSL_SUPPORT=OFF"
+          "-Dtensorflow_BUILD_CONTRIB_KERNELS=OFF"))
+       #:make-flags
+       (list "CC=gcc")
+       #:modules ((ice-9 ftw)
+                  (guix build utils)
+                  (guix build cmake-build-system))
+       #:phases
+       (modify-phases %standard-phases
+         (add-after 'unpack 'set-source-file-times-to-1980
+           ;; At the end of the tf_python_build_pip_package target, a ZIP
+           ;; archive should be generated via bdist_wheel, but it fails with
+           ;; "ZIP does not support timestamps before 1980".  Luckily,
+           ;; SOURCE_DATE_EPOCH is respected, which we set to some time in
+           ;; 1980.
+           (lambda _ (setenv "SOURCE_DATE_EPOCH" "315532800") #t))
+         ;; See https://github.com/tensorflow/tensorflow/issues/20517#issuecomment-406373913
+         (add-after 'unpack 'python3.7-compatibility
+           (lambda _
+             (substitute* '("tensorflow/python/eager/pywrap_tfe_src.cc"
+                            "tensorflow/python/lib/core/ndarray_tensor.cc"
+                            "tensorflow/python/lib/core/py_func.cc")
+               (("PyUnicode_AsUTF8") "(char *)PyUnicode_AsUTF8"))
+             (substitute* "tensorflow/c/eager/c_api.h"
+               (("unsigned char async")
+                "unsigned char is_async"))
+
+             ;; Remove dependency on tensorboard, a complicated but probably
+             ;; optional package.
+             (substitute* "tensorflow/tools/pip_package/setup.py"
+               ((".*'tensorboard >.*") ""))
+             #t))
+         (add-after 'python3.7-compatibility 'chdir
+           (lambda _ (chdir "tensorflow/contrib/cmake") #t))
+         (add-after 'chdir 'disable-downloads
+           (lambda* (#:key inputs #:allow-other-keys)
+             (substitute* (find-files "external" "\\.cmake$")
+               (("GIT_REPOSITORY.*") "")
+               (("GIT_TAG.*") "")
+               (("PREFIX ")
+                "DOWNLOAD_COMMAND \"\"\nPREFIX "))
+
+             ;; Use packages from Guix
+             (let ((grpc (assoc-ref inputs "grpc")))
+               (substitute* "CMakeLists.txt"
+                 ;; Sqlite
+                 (("include\\(sqlite\\)") "")
+                 (("\\$\\{sqlite_STATIC_LIBRARIES\\}")
+                  (string-append (assoc-ref inputs "sqlite")
+                                 "/lib/libsqlite3.so"))
+                 (("sqlite_copy_headers_to_destination") "")
+
+                 ;; PNG
+                 (("include\\(png\\)") "")
+                 (("\\$\\{png_STATIC_LIBRARIES\\}")
+                  (string-append (assoc-ref inputs "libpng")
+                                 "/lib/libpng16.so"))
+                 (("png_copy_headers_to_destination") "")
+
+                 ;; JPEG
+                 (("include\\(jpeg\\)") "")
+                 (("\\$\\{jpeg_STATIC_LIBRARIES\\}")
+                  (string-append (assoc-ref inputs "libjpeg")
+                                 "/lib/libjpeg.so"))
+                 (("jpeg_copy_headers_to_destination") "")
+
+                 ;; GIF
+                 (("include\\(gif\\)") "")
+                 (("\\$\\{gif_STATIC_LIBRARIES\\}")
+                  (string-append (assoc-ref inputs "giflib")
+                                 "/lib/libgif.so"))
+                 (("gif_copy_headers_to_destination") "")
+
+                 ;; lmdb
+                 (("include\\(lmdb\\)") "")
+                 (("\\$\\{lmdb_STATIC_LIBRARIES\\}")
+                  (string-append (assoc-ref inputs "lmdb")
+                                 "/lib/liblmdb.so"))
+                 (("lmdb_copy_headers_to_destination") "")
+
+                 ;; Protobuf
+                 (("include\\(protobuf\\)") "")
+                 (("protobuf_copy_headers_to_destination") "")
+                 (("^ +protobuf") "")
+
+                 ;; gRPC
+                 (("include\\(grpc\\)")
+                  "find_package(grpc REQUIRED NAMES gRPC)")
+                 (("list\\(APPEND tensorflow_EXTERNAL_DEPENDENCIES grpc\\)") "")
+
+                 ;; Eigen
+                 (("include\\(eigen\\)")
+                  (string-append "find_package(eigen REQUIRED NAMES Eigen3)
+set(eigen_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/external/eigen_archive "
+                                 (assoc-ref inputs "eigen") "/include/eigen3)"))
+                 (("^ +eigen") "")
+
+                 ;; snappy
+                 (("include\\(snappy\\)")
+                  "add_definitions(-DTF_USE_SNAPPY)")
+                 (("list\\(APPEND tensorflow_EXTERNAL_DEPENDENCIES snappy\\)") "")
+
+                 ;; jsoncpp
+                 (("include\\(jsoncpp\\)") "")
+                 (("^ +jsoncpp") ""))
+
+               (substitute* "tf_core_framework.cmake"
+                 ((" grpc") "")
+                 (("\\$\\{GRPC_BUILD\\}/grpc_cpp_plugin")
+                  (which "grpc_cpp_plugin"))
+                 ;; Link with gRPC libraries
+                 (("add_library\\(tf_protos_cc.*" m)
+                  (string-append m
+                                 (format #f "\ntarget_link_libraries(tf_protos_cc PRIVATE \
+~a/lib/libgrpc++_unsecure.a \
+~a/lib/libgrpc_unsecure.a \
+~a/lib/libaddress_sorting.a \
+~a/lib/libgpr.a \
+~a//lib/libcares.so
+)\n"
+                                         grpc grpc grpc grpc
+                                         (assoc-ref inputs "c-ares"))))))
+             (substitute* "tf_tools.cmake"
+               (("add_dependencies\\(\\$\\{proto_text.*") ""))
+             ;; Remove dependency on bundled grpc
+             (substitute* "tf_core_distributed_runtime.cmake"
+               (("tf_core_cpu grpc") "tf_core_cpu"))
+
+             ;; This directory is a dependency of many targets.
+             (mkdir-p "protobuf")
+             #t))
+         (add-after 'configure 'unpack-third-party-sources
+           (lambda* (#:key inputs #:allow-other-keys)
+             ;; This is needed to configure bundled packages properly.
+             (setenv "CONFIG_SHELL" (which "bash"))
+             (for-each
+              (lambda (name)
+                (let* ((what  (assoc-ref inputs (string-append name "-src")))
+                       (name* (string-map (lambda (c)
+                                            (if (char=? c #\-)
+                                                #\_ c)) name))
+                       (where (string-append "../build/" name* "/src/" name*)))
+                  (cond
+                   ((string-suffix? ".zip" what)
+                    (mkdir-p where)
+                    (with-directory-excursion where
+                      (invoke "unzip" what)))
+                   ((string-suffix? ".tar.gz" what)
+                    (mkdir-p where)
+                    (invoke "tar" "xf" what
+                            "-C" where "--strip-components=1"))
+                   (else
+                    (let ((parent (dirname where)))
+                      (mkdir-p parent)
+                      (with-directory-excursion parent
+                        (when (file-exists? name*)
+                          (delete-file-recursively name*))
+                        (copy-recursively what name*)
+                        (map make-file-writable
+                             (find-files name* ".*"))))))))
+              (list "boringssl"
+                    "cub"
+                    "double-conversion"
+                    "farmhash"
+                    "fft2d"
+                    "highwayhash"
+                    "nsync"
+                    "re2"))
+
+             (rename-file "../build/cub/src/cub/cub-1.8.0/"
+                          "../build/cub/src/cub/cub/")
+             #t))
+         (add-after 'unpack 'fix-python-build
+           (lambda* (#:key inputs outputs #:allow-other-keys)
+             (mkdir-p "protobuf-src")
+             (invoke "tar" "xf" (assoc-ref inputs "protobuf:src")
+                     "-C" "protobuf-src" "--strip-components=1")
+             (mkdir-p "eigen-src")
+             (invoke "tar" "xf" (assoc-ref inputs "eigen:src")
+                     "-C" "eigen-src" "--strip-components=1")
+
+             (substitute* "tensorflow/contrib/cmake/tf_python.cmake"
+               ;; Ensure that all Python dependencies can be found at build time.
+               (("PYTHONPATH=\\$\\{CMAKE_CURRENT_BINARY_DIR\\}/tf_python" m)
+                (string-append m ":" (getenv "PYTHONPATH")))
+               ;; Take protobuf source files from our source package.
+               (("\\$\\{CMAKE_CURRENT_BINARY_DIR\\}/protobuf/src/protobuf/src/google")
+                (string-append (getcwd) "/protobuf-src/src/google")))
+
+             (substitute* '("tensorflow/contrib/cmake/tf_shared_lib.cmake"
+                            "tensorflow/contrib/cmake/tf_python.cmake")
+               ;; Take Eigen source files from our source package.
+               (("\\$\\{CMAKE_CURRENT_BINARY_DIR\\}/eigen/src/eigen/")
+                (string-append (getcwd) "/eigen-src/"))
+               ;; Take Eigen headers from our own package.
+               (("\\$\\{CMAKE_CURRENT_BINARY_DIR\\}/external/eigen_archive")
+                (string-append (assoc-ref inputs "eigen") "/include/eigen3")))
+
+             ;; Correct the RUNPATH of ops libraries generated for Python.
+             ;; TODO: this doesn't work :(
+             ;; /gnu/store/...-tensorflow-1.9.0/lib/python3.7/site-packages/tensorflow/contrib/seq2seq/python/ops/lib_beam_search_ops.so:
+             ;; warning: RUNPATH contains bogus entries: ("/tmp/guix-build-tensorflow-1.9.0.drv-0/source/tensorflow/contrib/build")
+             ;; /gnu/store/...-tensorflow-1.9.0/lib/python3.7/site-packages/tensorflow/contrib/seq2seq/python/ops/lib_beam_search_ops.so:
+             ;; error: depends on 'libpywrap_tensorflow_internal.so', which
+             ;; cannot be found in RUNPATH ...
+             (substitute* "tensorflow/contrib/cmake/tf_cc_ops.cmake"
+               (("set_target_properties.*")
+                (string-append "set_target_properties(${_AT_TARGET} PROPERTIES \
+COMPILE_FLAGS ${target_compile_flags} \
+INSTALL_RPATH_USE_LINK_PATH TRUE \
+INSTALL_RPATH " (assoc-ref outputs "out") "/lib)\n")))
+             #t))
+         (add-after 'build 'build-pip-package
+           (lambda* (#:key outputs #:allow-other-keys)
+             (setenv "LDFLAGS"
+                     (string-append "-Wl,-rpath="
+                                    (assoc-ref outputs "out") "/lib"))
+             (invoke "make" "tf_python_build_pip_package")
+             #t))
+         (add-after 'build-pip-package 'install-python
+           (lambda* (#:key outputs #:allow-other-keys)
+             (let ((out (assoc-ref outputs "out"))
+                   (wheel (car (find-files "../build/tf_python/dist/" "\\.whl$"))))
+               (invoke "python" "-m" "pip" "install" wheel
+                       (string-append "--prefix=" out))
+
+               ;; XXX: broken RUNPATH, see fix-python-build phase.
+               (delete-file
+                (string-append
+                 out "/lib/python3.7/site-packages/tensorflow/contrib/"
+                 "seq2seq/python/ops/lib_beam_search_ops.so"))
+               #t))))))
+    (native-inputs
+     `(("pkg-config" ,pkg-config)
+       ("protobuf:native" ,protobuf-next) ; protoc
+       ("protobuf:src" ,(package-source protobuf-next))
+       ("eigen:src" ,(package-source eigen-for-tensorflow))
+       ;; The commit hashes and URLs for third-party source code are taken
+       ;; from "tensorflow/workspace.bzl".
+       ("boringssl-src"
+        ,(let ((commit "ee7aa02")
+               (revision "1"))
+           (origin
+             (method git-fetch)
+             (uri (git-reference
+                   (url "https://boringssl.googlesource.com/boringssl")
+                   (commit commit)))
+             (file-name (string-append "boringssl-0-" revision
+                                       (string-take commit 7)
+                                       "-checkout"))
+             (sha256
+              (base32
+               "1jf693q0nw0adsic6cgmbdx6g7wr4rj4vxa8j1hpn792fqhd8wgw")))))
+       ("cub-src"
+        ,(let ((version "1.8.0"))
+           (origin
+             (method url-fetch)
+             (uri (string-append "https://mirror.bazel.build/github.com/NVlabs/"
+                                 "cub/archive/" version ".zip"))
+             (file-name (string-append "cub-" version ".zip"))
+             (sha256
+              (base32
+               "1hsqikqridb90dkxkjr2918dcry6pfh46ccnwrzawl56aamhdykb")))))
+       ("double-conversion-src"
+        ,(let ((commit "5664746")
+               (revision "1"))
+           (origin
+             (method git-fetch)
+             (uri (git-reference
+                   (url "https://github.com/google/double-conversion.git")
+                   (commit commit)))
+             (file-name
+              (git-file-name "double-conversion"
+                             (string-append "0-" revision "."
+                                            (string-take commit 7))))
+             (sha256
+              (base32
+               "1h5lppqqxcvdg5jq42i5msgwx20ryij3apvmndflngrgdpc04gn1")))))
+       ("farmhash-src"
+        ,(let ((commit "816a4ae622e964763ca0862d9dbd19324a1eaf45"))
+           (origin
+             (method url-fetch)
+             (uri (string-append
+                   "https://mirror.bazel.build/github.com/google/farmhash/archive/"
+                   commit ".tar.gz"))
+             (file-name (string-append "farmhash-0-" (string-take commit 7)
+                                       ".tar.gz"))
+             (sha256
+              (base32
+               "185b2xdxl4d4cnsnv6abg8s22gxvx8673jq2yaq85bz4cdy58q35")))))
+       ;; The license notice on the home page at
+       ;; http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html says:
+       ;;   Copyright Takuya OOURA, 1996-2001
+       ;;
+       ;;   You may use, copy, modify and distribute this code for any purpose
+       ;;   (include commercial use) and without fee. Please refer to this
+       ;;   package when you modify this code.
+       ;;
+       ;; We take the identical tarball from the Bazel mirror, because the URL
+       ;; at the home page is not versioned and might change.
+       ("fft2d-src"
+        ,(origin
+           (method url-fetch)
+           (uri "https://mirror.bazel.build/www.kurims.kyoto-u.ac.jp/~ooura/fft.tgz")
+           (file-name "fft2d.tar.gz")
+           (sha256
+            (base32
+             "15jjkfvhqvl2c0753d2di8hz0pyzn598g74wqy79awdrf1y67fsj"))))
+       ("highwayhash-src"
+        ,(let ((commit "be5edafc2e1a455768e260ccd68ae7317b6690ee")
+               (revision "1"))
+           (origin
+             (method git-fetch)
+             (uri (git-reference
+                   (url "https://github.com/google/highwayhash.git")
+                   (commit commit)))
+             (file-name (string-append "highwayhash-0-" revision
+                                       (string-take commit 7)
+                                       "-checkout"))
+             (sha256
+              (base32
+               "154jwf98cyy54hldr94pgjn85zynly3abpnc1avmb8a18lzwjyb6")))))
+       ("nsync-src"
+        ,(let ((version "0559ce013feac8db639ee1bf776aca0325d28777")
+               (revision "1"))
+           (origin
+             (method url-fetch)
+             (uri (string-append "https://mirror.bazel.build/"
+                                 "github.com/google/nsync/archive/"
+                                 version ".tar.gz"))
+             (file-name (string-append "nsync-0." revision
+                                       "-" (string-take version 7)
+                                       ".tar.gz"))
+             (sha256
+              (base32
+               "0qdkyqym34x739mmzv97ah5r7ph462v5xkxqxvidmcfqbi64b132")))))
+       ("re2-src"
+        ,(let ((commit "e7efc48")
+               (revision "1"))
+           (origin
+             (method git-fetch)
+             (uri (git-reference
+                   (url "https://github.com/google/re2")
+                   (commit commit)))
+             (file-name (string-append "re2-0-" revision
+                                       (string-take commit 7)
+                                       "-checkout"))
+             (sha256
+              (base32
+               "161g9841rjfsy5pn52fcis0s9hdr7rxvb06pad38j5rppfihvign")))))
+       ("googletest" ,googletest)
+       ("swig" ,swig)
+       ("unzip" ,unzip)))
+    (propagated-inputs
+     `(("python-absl-py" ,python-absl-py)
+       ("python-astor" ,python-astor)
+       ("python-gast" ,python-gast)
+       ("python-grpcio" ,python-grpcio)
+       ("python-numpy" ,python-numpy)
+       ("python-protobuf" ,python-protobuf-next)
+       ("python-six" ,python-six)
+       ("python-termcolo" ,python-termcolor)
+       ("python-wheel" ,python-wheel)))
+    (inputs
+     `(("c-ares" ,c-ares-next)
+       ("eigen" ,eigen-for-tensorflow)
+       ("gemmlowp" ,gemmlowp-for-tensorflow)
+       ("lmdb" ,lmdb)
+       ("libjpeg" ,libjpeg)
+       ("libpng" ,libpng)
+       ("giflib" ,giflib)
+       ("grpc" ,grpc)
+       ("jsoncpp" ,jsoncpp-for-tensorflow)
+       ("snappy" ,snappy)
+       ("sqlite" ,sqlite)
+       ("protobuf" ,protobuf-next)
+       ("python" ,python-wrapper)
+       ("zlib" ,zlib)))
+    (home-page "https://tensorflow.org")
+    (synopsis "Machine learning framework")
+    (description
+     "TensorFlow is a flexible platform for building and training machine
+learning models.  It provides a library for high performance numerical
+computation and includes high level Python APIs, including both a sequential
+API for beginners that allows users to build models quickly by plugging
+together building blocks and a subclassing API with an imperative style for
+advanced research.")
+    (license license:asl2.0)))