summary refs log tree commit diff
path: root/gnu/packages/python-science.scm
diff options
context:
space:
mode:
authorMarius Bakke <marius@gnu.org>2021-07-29 22:34:57 +0200
committerMarius Bakke <marius@gnu.org>2021-07-29 22:34:57 +0200
commitb029be2ee0f81cdcbc14240ff426408085ab0a40 (patch)
treeed6d0e1bfdadfd28e1eb804e43763b793baa0b42 /gnu/packages/python-science.scm
parentbc55f3091bac4677df0cf020381c554921fea179 (diff)
parentffb381856d0c6cc1a557b789f6b377cfa17002a0 (diff)
downloadguix-b029be2ee0f81cdcbc14240ff426408085ab0a40.tar.gz
Merge branch 'master' into core-updates-frozen
 Conflicts:
	gnu/packages/bioinformatics.scm
	gnu/packages/cmake.scm
	gnu/packages/curl.scm
	gnu/packages/emacs-xyz.scm
	gnu/packages/gpodder.scm
	gnu/packages/music.scm
	gnu/packages/patches/glibc-bootstrap-system.patch
	gnu/packages/python-xyz.scm
	gnu/packages/shells.scm
	gnu/packages/statistics.scm
Diffstat (limited to 'gnu/packages/python-science.scm')
-rw-r--r--gnu/packages/python-science.scm210
1 files changed, 178 insertions, 32 deletions
diff --git a/gnu/packages/python-science.scm b/gnu/packages/python-science.scm
index 6fce8d2319..738b504e3b 100644
--- a/gnu/packages/python-science.scm
+++ b/gnu/packages/python-science.scm
@@ -35,6 +35,7 @@
   #:use-module (gnu packages)
   #:use-module (gnu packages base)
   #:use-module (gnu packages check)
+  #:use-module (gnu packages databases)
   #:use-module (gnu packages gcc)
   #:use-module (gnu packages image-processing)
   #:use-module (gnu packages machine-learning)
@@ -44,6 +45,7 @@
   #:use-module (gnu packages pkg-config)
   #:use-module (gnu packages python)
   #:use-module (gnu packages python-build)
+  #:use-module (gnu packages python-crypto)
   #:use-module (gnu packages python-check)
   #:use-module (gnu packages python-web)
   #:use-module (gnu packages python-xyz)
@@ -327,49 +329,55 @@ of the SGP4 satellite tracking algorithm.")
 (define-public python-pandas
   (package
     (name "python-pandas")
-    (version "1.0.5")
+    (version "1.3.0")
     (source
      (origin
        (method url-fetch)
        (uri (pypi-uri "pandas" version))
        (sha256
-        (base32 "1a2gv3g6jr6vb5ca43fkwjl5xf86wpfz8y3zcy787adjl0hdkib9"))))
+        (base32 "1qi2cv450m05dwccx3p1s373k5b4ncvwi74plnms2pidrz4ycm65"))))
     (build-system python-build-system)
     (arguments
      `(#:modules ((guix build utils)
                   (guix build python-build-system)
                   (ice-9 ftw)
+                  (srfi srfi-1)
                   (srfi srfi-26))
-       #:phases (modify-phases %standard-phases
-                  (add-after 'unpack 'patch-which
-                    (lambda* (#:key inputs #:allow-other-keys)
-                      (let ((which (assoc-ref inputs "which")))
-                        (substitute* "pandas/io/clipboard/__init__.py"
-                          (("^WHICH_CMD = .*")
-                           (string-append "WHICH_CMD = \"" which "\"\n"))))
-                      #t))
-                  (add-before 'check 'prepare-x
-                    (lambda _
-                      (system "Xvfb &")
-                      (setenv "DISPLAY" ":0")
-                      ;; xsel needs to write a log file.
-                      (setenv "HOME" "/tmp")
-                      #t))
-                  (replace 'check
-                    (lambda _
-                      (let ((build-directory
-                             (string-append
-                              (getcwd) "/build/"
-                              (car (scandir "build"
-                                            (cut string-prefix? "lib." <>))))))
-                        ;; Disable the "strict data files" option which causes
-                        ;; the build to error out if required data files are
-                        ;; not available (as is the case with PyPI archives).
-                        (substitute* "setup.cfg"
-                          (("addopts = --strict-data-files") "addopts = "))
-                        (with-directory-excursion build-directory
-                          (invoke "pytest" "-vv" "pandas" "--skip-slow"
-                                  "--skip-network"))))))))
+       #:phases
+       (modify-phases %standard-phases
+         (add-after 'unpack 'patch-which
+           (lambda* (#:key inputs #:allow-other-keys)
+             (let ((which (assoc-ref inputs "which")))
+               (substitute* "pandas/io/clipboard/__init__.py"
+                 (("^WHICH_CMD = .*")
+                  (string-append "WHICH_CMD = \"" which "\"\n"))))))
+         (add-before 'check 'prepare-x
+           (lambda _
+             (system "Xvfb &")
+             (setenv "DISPLAY" ":0")
+             ;; xsel needs to write a log file.
+             (setenv "HOME" "/tmp")))
+         (replace 'check
+           (lambda _
+             (let ((build-directory
+                    (string-append
+                     (getcwd) "/build/"
+                     (first (scandir "build"
+                                     (cut string-prefix? "lib." <>))))))
+               (with-directory-excursion build-directory
+                 (invoke "pytest" "-vv" "pandas" "--skip-slow"
+                         "--skip-network"
+                         "-k"
+                         ;; These tets access the internet:
+                         ;; pandas/tests/io/xml/test_xml.py::test_wrong_url[lxml]
+                         ;; pandas/tests/io/xml/test_xml.py::test_wrong_url[etree]
+                         ;; TODO: the excel tests fail for unknown reasons
+                         (string-append "not test_wrong_url"
+                                        " and not test_excelwriter_fspath"
+                                        " and not test_ExcelWriter_dispatch"
+                                        ;; TODO: Missing input
+                                        " and not TestS3"
+                                        " and not s3")))))))))
     (propagated-inputs
      `(("python-jinja2" ,python-jinja2)
        ("python-numpy" ,python-numpy)
@@ -835,3 +843,141 @@ and more
 @end itemize")
     (license license:gpl3)))
 
+(define-public python-distributed
+  (package
+    (name "python-distributed")
+    (version "2021.07.1")
+    (source
+     (origin
+       ;; The test files are not included in the archive on pypi
+       (method git-fetch)
+       (uri (git-reference
+             (url "https://github.com/dask/distributed")
+             (commit version)))
+       (file-name (git-file-name name version))
+       (sha256
+        (base32
+         "0i55zf3k55sqjxnwlzsyj3h3v1588fn54ng4mj3dfiqzh3nlj0dg"))))
+    (build-system python-build-system)
+    (arguments
+     '(#:phases
+       (modify-phases %standard-phases
+         (add-after 'unpack 'fix-references
+           (lambda* (#:key outputs #:allow-other-keys)
+             (substitute* '("distributed/comm/tests/test_ucx_config.py"
+                            "distributed/tests/test_client.py"
+                            "distributed/tests/test_queues.py"
+                            "distributed/tests/test_variable.py"
+                            "distributed/cli/tests/test_tls_cli.py"
+                            "distributed/cli/tests/test_dask_spec.py"
+                            "distributed/cli/tests/test_dask_worker.py"
+                            "distributed/cli/tests/test_dask_scheduler.py")
+               (("\"dask-scheduler\"")
+                (format #false "\"~a/bin/dask-scheduler\""
+                        (assoc-ref outputs "out")))
+               (("\"dask-worker\"")
+                (format #false "\"~a/bin/dask-worker\""
+                        (assoc-ref outputs "out"))))))
+         (replace 'check
+           (lambda* (#:key tests? #:allow-other-keys)
+             (when tests?
+               (setenv "DISABLE_IPV6" "1")
+               (invoke "pytest" "-vv" "distributed"
+                       "-m" "not slow and not gpu and not ipython and not avoid_ci"
+                       "-k"
+                       ;; TODO: These tests fail for unknown reasons:
+                       ;; Assertion error.
+                       (string-append
+                        "not test_version_option"
+                        ;; "The 'distributed' distribution was not found"
+                        " and not test_register_backend_entrypoint"
+                        ;; "AttributeError: module 'distributed.dashboard' has no attribute 'scheduler'"
+                        " and not test_get_client_functions_spawn_clusters"))))))))
+    (propagated-inputs
+     `(("python-click" ,python-click)
+       ("python-cloudpickle" ,python-cloudpickle)
+       ("python-cryptography" ,python-cryptography)
+       ("python-dask" ,python-dask)
+       ("python-msgpack" ,python-msgpack)
+       ("python-psutil" ,python-psutil)
+       ("python-pyyaml" ,python-pyyaml)
+       ("python-setuptools" ,python-setuptools)
+       ("python-sortedcontainers" ,python-sortedcontainers)
+       ("python-tblib" ,python-tblib)
+       ("python-toolz" ,python-toolz)
+       ("python-tornado" ,python-tornado-6)
+       ("python-zict" ,python-zict)))
+    (native-inputs
+     `(("python-pytest" ,python-pytest)))
+    (home-page "https://distributed.dask.org")
+    (synopsis "Distributed scheduler for Dask")
+    (description "Dask.distributed is a lightweight library for distributed
+computing in Python.  It extends both the @code{concurrent.futures} and
+@code{dask} APIs to moderate sized clusters.")
+    (license license:bsd-3)))
+
+(define-public python-modin
+  (package
+    (name "python-modin")
+    (version "0.10.1")
+    (source
+     (origin
+       ;; The archive on pypi does not include all required files.
+       (method git-fetch)
+       (uri (git-reference
+             (url "https://github.com/modin-project/modin")
+             (commit version)))
+       (file-name (git-file-name name version))
+       (sha256
+        (base32
+         "128ghfb9ncmnn8km409xjcdppvn9nr9jqw8rkvsfavh7wnwlk509"))))
+    (build-system python-build-system)
+    (arguments
+     `(#:phases
+       (modify-phases %standard-phases
+         (add-after 'unpack 'make-files-writable
+           (lambda _
+             (for-each make-file-writable (find-files "."))))
+         (replace 'check
+           (lambda* (#:key tests? #:allow-other-keys)
+             (when tests?
+               (setenv "MODIN_ENGINE" "dask")
+               (invoke "python" "-m" "pytest"
+                       "modin/pandas/test/test_concat.py")
+               (setenv "MODIN_ENGINE" "python")
+               (invoke "python" "-m" "pytest"
+                       "modin/pandas/test/test_concat.py")))))))
+    (propagated-inputs
+     `(("python-cloudpickle" ,python-cloudpickle)
+       ("python-dask" ,python-dask)
+       ("python-distributed" ,python-distributed)
+       ("python-numpy" ,python-numpy)
+       ("python-packaging" ,python-packaging)
+       ("python-pandas" ,python-pandas)))
+    (native-inputs
+     `(("python-coverage" ,python-coverage)
+       ("python-jinja2" ,python-jinja2)
+       ("python-lxml" ,python-lxml)
+       ("python-matplotlib" ,python-matplotlib)
+       ("python-msgpack" ,python-msgpack)
+       ("python-openpyxl" ,python-openpyxl)
+       ("python-psutil" ,python-psutil)
+       ("python-pyarrow" ,python-pyarrow)
+       ("python-pytest" ,python-pytest)
+       ("python-pytest-benchmark" ,python-pytest-benchmark)
+       ("python-pytest-cov" ,python-pytest-cov)
+       ("python-pytest-xdist" ,python-pytest-xdist)
+       ("python-scipy" ,python-scipy)
+       ("python-sqlalchemy" ,python-sqlalchemy)
+       ("python-tables" ,python-tables)
+       ("python-tqdm" ,python-tqdm)
+       ("python-xarray" ,python-xarray)
+       ("python-xlrd" ,python-xlrd)))
+    (home-page "https://github.com/modin-project/modin")
+    (synopsis "Make your pandas code run faster")
+    (description
+     "Modin uses Ray or Dask to provide an effortless way to speed up your
+pandas notebooks, scripts, and libraries.  Unlike other distributed DataFrame
+libraries, Modin provides seamless integration and compatibility with existing
+pandas code.")
+    (license license:asl2.0)))