summary refs log tree commit diff
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2021-08-06 09:59:27 +0200
committerLars-Dominik Braun <lars@6xq.net>2021-08-06 09:59:27 +0200
commit196f171c556acb1c94d719b261287eaf23e5116c (patch)
tree38089547360f00ac4004c066d4baa0d55ba1517c
parent0d72f24ac084acf9d69e147a692e5d8bcb2ea21b (diff)
downloadguix-196f171c556acb1c94d719b261287eaf23e5116c.tar.gz
gnu: Add r-datasaurus.
* gnu/packages/statistics.scm (r-datasaurus): New variable.
-rw-r--r--gnu/packages/statistics.scm33
1 files changed, 33 insertions, 0 deletions
diff --git a/gnu/packages/statistics.scm b/gnu/packages/statistics.scm
index 7b50d287f2..5c744f664a 100644
--- a/gnu/packages/statistics.scm
+++ b/gnu/packages/statistics.scm
@@ -13,6 +13,7 @@
 ;;; Copyright © 2018 Alex Branham <alex.branham@gmail.com>
 ;;; Copyright © 2020 Tim Howes <timhowes@lavabit.com>
 ;;; Copyright © 2021 Bonface Munyoki Kilyungi <me@bonfacemunyoki.com>
+;;; Copyright © 2021 Lars-Dominik Braun <lars@6xq.net>
 ;;;
 ;;; This file is part of GNU Guix.
 ;;;
@@ -6310,3 +6311,35 @@ the machinery described in the paper \"Learning interactions via hierarchical
 group-lasso regularization\" (JCGS 2015, Volume 24, Issue 3).
 Michael Lim & Trevor Hastie (2015)")
     (license license:gpl2)))
+
+(define-public r-datasaurus
+  (package
+    (name "r-datasaurus")
+    (version "0.1.4")
+    (source
+      (origin
+        (method url-fetch)
+        (uri (cran-uri "datasauRus" version))
+        (sha256
+          (base32
+            "1w1yhwwrmh95bklacz44wjwynxd8cj3z8b9zvsnzmk18m5a4k0fl"))))
+    (properties `((upstream-name . "datasauRus")))
+    (build-system r-build-system)
+    (native-inputs `(("r-knitr" ,r-knitr)))
+    (home-page
+      "https://github.com/lockedata/datasauRus")
+    (synopsis "Datasets from the Datasaurus Dozen")
+    (description
+     "The Datasaurus Dozen is a set of datasets with the same summary
+statistics.  They retain the same summary statistics despite having radically
+different distributions.  The datasets represent a larger and quirkier object
+lesson that is typically taught via Anscombe's Quartet (available in the
+'datasets' package).  Anscombe's Quartet contains four very different
+distributions with the same summary statistics and as such highlights the value
+of visualisation in understanding data, over and above summary statistics.  As
+well as being an engaging variant on the Quartet, the data is generated in a
+novel way.  The simulated annealing process used to derive datasets from the
+original Datasaurus is detailed in \"Same Stats, Different Graphs: Generating
+Datasets with Varied Appearance and Identical Statistics through Simulated
+Annealing\" @url{doi:10.1145/3025453.3025912}.")
+    (license license:expat)))