From a6a7156aa970b08821a00599d7941740d8a22b70 Mon Sep 17 00:00:00 2001 From: Ricardo Wurmus Date: Wed, 5 Sep 2018 16:00:00 +0200 Subject: gnu: Add python-dask. * gnu/packages/python.scm (python-dask): New variable. --- gnu/packages/python.scm | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'gnu/packages/python.scm') diff --git a/gnu/packages/python.scm b/gnu/packages/python.scm index b771452629..1d4b069d09 100644 --- a/gnu/packages/python.scm +++ b/gnu/packages/python.scm @@ -14246,3 +14246,48 @@ This Python package wraps the Blosc library.") (description "Partd stores key-value pairs. Values are raw bytes. We append on old values. Partd excels at shuffling operations.") (license license:bsd-3))) + +(define-public python-dask + (package + (name "python-dask") + (version "0.19.0") + (source + (origin + (method url-fetch) + (uri (pypi-uri "dask" version)) + (sha256 + (base32 + "1pm1163qb6s22p8fnvj0zlfazihvs7hxjn8l2n52bzs7shw6kdz3")))) + (build-system python-build-system) + ;; A single test out of 5000+ fails. This test is marked as xfail when + ;; pytest-xdist is used. + (arguments + `(#:phases + (modify-phases %standard-phases + (add-after 'unpack 'disable-broken-test + (lambda _ + (substitute* "dask/tests/test_threaded.py" + (("def test_interrupt\\(\\)" m) + (string-append "@pytest.mark.skip(reason=\"Disabled by Guix\")\n" + m))) + #t))))) + (propagated-inputs + `(("python-cloudpickle" ,python-cloudpickle) + ("python-numpy" ,python-numpy) + ("python-pandas" ,python-pandas) + ("python-partd" ,python-partd) + ("python-toolz" ,python-toolz) + ("python-pyyaml" ,python-pyyaml))) + (native-inputs + `(("python-pytest" ,python-pytest) + ("python-pytest-runner" ,python-pytest-runner))) + (home-page "https://github.com/dask/dask/") + (synopsis "Parallel computing with task scheduling") + (description + "Dask is a flexible parallel computing library for analytics. It +consists of two components: dynamic task scheduling optimized for computation, +and large data collections like parallel arrays, dataframes, and lists that +extend common interfaces like NumPy, Pandas, or Python iterators to +larger-than-memory or distributed environments. These parallel collections +run on top of the dynamic task schedulers. ") + (license license:bsd-3))) -- cgit 1.4.1