summary refs log tree commit diff
diff options
context:
space:
mode:
authorMarius Bakke <mbakke@fastmail.com>2016-08-30 18:49:21 +0100
committerLeo Famulari <leo@famulari.name>2016-09-10 17:41:38 -0400
commit84be3b9920120e7cc03095baca06d61b7f3fb741 (patch)
tree6f249abd59a8913fb09cc274b6ca435990fdd0c2
parent0e790a7ce0410dc7f991bca4e566fb73004ddd39 (diff)
downloadguix-84be3b9920120e7cc03095baca06d61b7f3fb741.tar.gz
gnu: Add mash.
* gnu/packages/bioinformatics.scm (mash): New variable.

Signed-off-by: Leo Famulari <leo@famulari.name>
-rw-r--r--gnu/packages/bioinformatics.scm57
1 files changed, 57 insertions, 0 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index 74360832b0..2c28ac0a24 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -76,6 +76,7 @@
   #:use-module (gnu packages python)
   #:use-module (gnu packages readline)
   #:use-module (gnu packages ruby)
+  #:use-module (gnu packages serialization)
   #:use-module (gnu packages statistics)
   #:use-module (gnu packages tbb)
   #:use-module (gnu packages tex)
@@ -3046,6 +3047,62 @@ sequences).")
               "http://mafft.cbrc.jp/alignment/software/license.txt"
               "BSD-3 with different formatting"))))
 
+(define-public mash
+  (package
+    (name "mash")
+    (version "1.1.1")
+    (source (origin
+              (method url-fetch)
+              (uri (string-append
+                    "https://github.com/marbl/mash/archive/v"
+                    version ".tar.gz"))
+              (file-name (string-append name "-" version ".tar.gz"))
+              (sha256
+               (base32
+                "08znbvqq5xknfhmpp3wcj574zvi4p7i8zifi67c9qw9a6ikp42fj"))
+              (modules '((guix build utils)))
+              (snippet
+               ;; Delete bundled kseq.
+               ;; TODO: Also delete bundled murmurhash and open bloom filter.
+               '(delete-file "src/mash/kseq.h"))))
+    (build-system gnu-build-system)
+    (arguments
+     `(#:tests? #f ; No tests.
+       #:configure-flags
+       (list
+        (string-append "--with-capnp=" (assoc-ref %build-inputs "capnproto"))
+        (string-append "--with-gsl=" (assoc-ref %build-inputs "gsl")))
+       #:make-flags (list "CC=gcc")
+       #:phases
+       (modify-phases %standard-phases
+         (add-after 'unpack 'fix-includes
+           (lambda _
+             (substitute* '("src/mash/Sketch.cpp" "src/mash/CommandFind.cpp")
+               (("^#include \"kseq\\.h\"")
+                "#include \"htslib/kseq.h\""))
+             #t))
+         (add-before 'configure 'autoconf
+           (lambda _ (zero? (system* "autoconf")))))))
+    (native-inputs
+     `(("autoconf" ,autoconf)
+       ;; Capnproto and htslib are statically embedded in the final
+       ;; application. Therefore we also list their licenses, below.
+       ("capnproto" ,capnproto)
+       ("htslib" ,htslib)))
+    (inputs
+     `(("gsl" ,gsl)
+       ("zlib" ,zlib)))
+    (supported-systems '("x86_64-linux"))
+    (home-page "https://mash.readthedocs.io")
+    (synopsis "Fast genome and metagenome distance estimation using MinHash")
+    (description "Mash is a fast sequence distance estimator that uses the
+MinHash algorithm and is designed to work with genomes and metagenomes in the
+form of assemblies or reads.")
+    (license (list license:bsd-3          ; Mash
+                   license:expat          ; HTSlib and capnproto
+                   license:public-domain  ; MurmurHash 3
+                   license:cpl1.0))))     ; Open Bloom Filter
+
 (define-public metabat
   (package
     (name "metabat")