summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--gnu/packages/python.scm45
1 files changed, 45 insertions, 0 deletions
diff --git a/gnu/packages/python.scm b/gnu/packages/python.scm
index b771452629..1d4b069d09 100644
--- a/gnu/packages/python.scm
+++ b/gnu/packages/python.scm
@@ -14246,3 +14246,48 @@ This Python package wraps the Blosc library.")
     (description "Partd stores key-value pairs.  Values are raw bytes.  We
 append on old values.  Partd excels at shuffling operations.")
     (license license:bsd-3)))
+
+(define-public python-dask
+  (package
+    (name "python-dask")
+    (version "0.19.0")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (pypi-uri "dask" version))
+       (sha256
+        (base32
+         "1pm1163qb6s22p8fnvj0zlfazihvs7hxjn8l2n52bzs7shw6kdz3"))))
+    (build-system python-build-system)
+    ;; A single test out of 5000+ fails.  This test is marked as xfail when
+    ;; pytest-xdist is used.
+    (arguments
+     `(#:phases
+       (modify-phases %standard-phases
+         (add-after 'unpack 'disable-broken-test
+           (lambda _
+             (substitute* "dask/tests/test_threaded.py"
+               (("def test_interrupt\\(\\)" m)
+                (string-append "@pytest.mark.skip(reason=\"Disabled by Guix\")\n"
+                               m)))
+             #t)))))
+    (propagated-inputs
+     `(("python-cloudpickle" ,python-cloudpickle)
+       ("python-numpy" ,python-numpy)
+       ("python-pandas" ,python-pandas)
+       ("python-partd" ,python-partd)
+       ("python-toolz" ,python-toolz)
+       ("python-pyyaml" ,python-pyyaml)))
+    (native-inputs
+     `(("python-pytest" ,python-pytest)
+       ("python-pytest-runner" ,python-pytest-runner)))
+    (home-page "https://github.com/dask/dask/")
+    (synopsis "Parallel computing with task scheduling")
+    (description
+     "Dask is a flexible parallel computing library for analytics.  It
+consists of two components: dynamic task scheduling optimized for computation,
+and large data collections like parallel arrays, dataframes, and lists that
+extend common interfaces like NumPy, Pandas, or Python iterators to
+larger-than-memory or distributed environments.  These parallel collections
+run on top of the dynamic task schedulers. ")
+    (license license:bsd-3)))