summary refs log tree commit diff
path: root/gnu/packages/bioinformatics.scm
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/packages/bioinformatics.scm')
-rw-r--r--gnu/packages/bioinformatics.scm636
1 files changed, 595 insertions, 41 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index f5e7285193..1cbf85ff6c 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -27,6 +27,7 @@
   #:use-module (guix utils)
   #:use-module (guix download)
   #:use-module (guix git-download)
+  #:use-module (guix hg-download)
   #:use-module (guix build-system ant)
   #:use-module (guix build-system gnu)
   #:use-module (guix build-system cmake)
@@ -49,18 +50,23 @@
   #:use-module (gnu packages file)
   #:use-module (gnu packages gawk)
   #:use-module (gnu packages gcc)
+  #:use-module (gnu packages gd)
+  #:use-module (gnu packages image)
   #:use-module (gnu packages java)
   #:use-module (gnu packages linux)
+  #:use-module (gnu packages logging)
   #:use-module (gnu packages machine-learning)
   #:use-module (gnu packages maths)
   #:use-module (gnu packages mpi)
   #:use-module (gnu packages ncurses)
   #:use-module (gnu packages pcre)
+  #:use-module (gnu packages pdf)
   #:use-module (gnu packages perl)
   #:use-module (gnu packages pkg-config)
   #:use-module (gnu packages popt)
   #:use-module (gnu packages protobuf)
   #:use-module (gnu packages python)
+  #:use-module (gnu packages readline)
   #:use-module (gnu packages ruby)
   #:use-module (gnu packages statistics)
   #:use-module (gnu packages tbb)
@@ -70,6 +76,7 @@
   #:use-module (gnu packages vim)
   #:use-module (gnu packages web)
   #:use-module (gnu packages xml)
+  #:use-module (gnu packages xorg)
   #:use-module (gnu packages zip)
   #:use-module (srfi srfi-1))
 
@@ -215,7 +222,7 @@ computational cluster.")
 (define-public bedtools
   (package
     (name "bedtools")
-    (version "2.25.0")
+    (version "2.26.0")
     (source (origin
               (method url-fetch)
               (uri (string-append "https://github.com/arq5x/bedtools2/archive/v"
@@ -223,7 +230,7 @@ computational cluster.")
               (file-name (string-append name "-" version ".tar.gz"))
               (sha256
                (base32
-                "1ywcy3yfwzhl905b51l0ffjia55h75vv3mw5xkvib04pp6pj548m"))))
+                "0xvri5hnp2iim1cx6mcd5d9f102p5ql41x69rd6106x1c17pinqm"))))
     (build-system gnu-build-system)
     (native-inputs `(("python" ,python-2)))
     (inputs `(("samtools" ,samtools)
@@ -251,6 +258,51 @@ intervals from multiple files in widely-used genomic file formats such as BAM,
 BED, GFF/GTF, VCF.")
     (license license:gpl2)))
 
+;; Later releases of bedtools produce files with more columns than
+;; what Ribotaper expects.
+(define-public bedtools-2.18
+  (package (inherit bedtools)
+    (name "bedtools")
+    (version "2.18.0")
+    (source (origin
+              (method url-fetch)
+              (uri (string-append "https://github.com/arq5x/bedtools2/"
+                                  "archive/v" version ".tar.gz"))
+              (file-name (string-append name "-" version ".tar.gz"))
+              (sha256
+               (base32
+                "05vrnr8yp7swfagshzpgqmzk1blnwnq8pq5pckzi1m26w98d63vf"))))))
+
+(define-public ribotaper
+  (package
+    (name "ribotaper")
+    (version "1.3.1")
+    (source (origin
+              (method url-fetch)
+              (uri (string-append "https://ohlerlab.mdc-berlin.de/"
+                                  "files/RiboTaper/RiboTaper_Version_"
+                                  version ".tar.gz"))
+              (sha256
+               (base32
+                "0ykjbps1y3z3085q94npw8i9x5gldc6shy8vlc08v76zljsm07hv"))))
+    (build-system gnu-build-system)
+    (inputs
+     `(("bedtools" ,bedtools-2.18)
+       ("samtools" ,samtools-0.1)
+       ("r" ,r)
+       ("r-foreach" ,r-foreach)
+       ("r-xnomial" ,r-xnomial)
+       ("r-domc" ,r-domc)
+       ("r-multitaper" ,r-multitaper)
+       ("r-seqinr" ,r-seqinr)))
+    (home-page "https://ohlerlab.mdc-berlin.de/software/RiboTaper_126/")
+    (synopsis "Define translated ORFs using ribosome profiling data")
+    (description
+     "Ribotaper is a method for defining translated @dfn{open reading
+frames} (ORFs) using ribosome profiling (ribo-seq) data.  This package
+provides the Ribotaper pipeline.")
+    (license license:gpl3+)))
+
 (define-public bioawk
   (package
     (name "bioawk")
@@ -421,7 +473,7 @@ into separate processes; and more.")
 (define-public blast+
   (package
     (name "blast+")
-    (version "2.2.31")
+    (version "2.4.0")
     (source (origin
               (method url-fetch)
               (uri (string-append
@@ -429,7 +481,7 @@ into separate processes; and more.")
                     version "/ncbi-blast-" version "+-src.tar.gz"))
               (sha256
                (base32
-                "19gq6as4k1jrgsd26158ads6h7v4jca3h4r5dzg1y0m6ya50x5ph"))
+                "14n9jik6vhiwjd3m7bach4xj1pzfn0szbsbyfxybd9l9cc43b6mb"))
               (modules '((guix build utils)))
               (snippet
                '(begin
@@ -1439,6 +1491,40 @@ accessing bigWig files.")
       (native-inputs
        `(("python-setuptools" ,python2-setuptools))))))
 
+(define-public python-dendropy
+  (package
+    (name "python-dendropy")
+    (version "4.1.0")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (pypi-uri "DendroPy" version))
+       (sha256
+        (base32
+         "1jfz7gp18wph311w1yygbvjanb3n5mdqal439bb6myw41dwb5m63"))
+       ;; There are two known test failures that will be fixed in the next
+       ;; release after 4.1.0.
+       ;; https://github.com/jeetsukumaran/DendroPy/issues/48
+       (patches (search-patches
+                 "python-dendropy-exclude-failing-tests.patch"))))
+    (build-system python-build-system)
+    (home-page "http://packages.python.org/DendroPy/")
+    (synopsis "Library for phylogenetics and phylogenetic computing")
+    (description
+     "DendroPy is a library for phylogenetics and phylogenetic computing: reading,
+writing, simulation, processing and manipulation of phylogenetic
+trees (phylogenies) and characters.")
+    (license license:bsd-3)
+    (properties `((python2-variant . ,(delay python2-dendropy))))))
+
+(define-public python2-dendropy
+  (let ((base (package-with-python2 (strip-python2-variant python-dendropy))))
+    (package
+      (inherit base)
+      (native-inputs `(("python2-setuptools" ,python2-setuptools)
+                       ,@(package-native-inputs base))))))
+
+
 (define-public deeptools
   (package
     (name "deeptools")
@@ -1482,7 +1568,7 @@ identify enrichments with functional annotations of the genome.")
 (define-public diamond
   (package
     (name "diamond")
-    (version "0.8.7")
+    (version "0.8.15")
     (source (origin
               (method url-fetch)
               (uri (string-append
@@ -1491,7 +1577,7 @@ identify enrichments with functional annotations of the genome.")
               (file-name (string-append name "-" version ".tar.gz"))
               (sha256
                (base32
-                "15r7gcrqc4pv5d4kvv530zc3xnni92c74y63zrxzidriss7591yx"))))
+                "14n0p28b4i5j8vvz1fl4xj1gxnpg98bj0kr3i90mhn7miwr4pkpw"))))
     (build-system cmake-build-system)
     (arguments
      '(#:tests? #f ; no "check" target
@@ -1517,6 +1603,75 @@ data and settings.")
     (license (license:non-copyleft "file://src/COPYING"
                                    "See src/COPYING in the distribution."))))
 
+(define-public eigensoft
+  (let ((revision "1")
+        (commit "b14d1e202e21e532536ff8004f0419cd5e259dc7"))
+    (package
+      (name "eigensoft")
+      (version (string-append "6.1.2-"
+                              revision "."
+                              (string-take commit 9)))
+      (source
+       (origin
+         (method git-fetch)
+         (uri (git-reference
+               (url "https://github.com/DReichLab/EIG.git")
+               (commit commit)))
+         (file-name (string-append "eigensoft-" commit "-checkout"))
+         (sha256
+          (base32
+           "0f5m6k2j5c16xc3xbywcs989xyc26ncy1zfzp9j9n55n9r4xcaiq"))
+         (modules '((guix build utils)))
+         ;; Remove pre-built binaries.
+         (snippet '(begin
+                     (delete-file-recursively "bin")
+                     (mkdir "bin")
+                     #t))))
+      (build-system gnu-build-system)
+      (arguments
+       `(#:tests? #f                    ; There are no tests.
+         #:make-flags '("CC=gcc")
+         #:phases
+         (modify-phases %standard-phases
+           ;; There is no configure phase, but the Makefile is in a
+           ;; sub-directory.
+           (replace 'configure
+             (lambda _
+               (chdir "src")
+               ;; The link flags are incomplete.
+               (substitute* "Makefile"
+                 (("-lgsl") "-lgsl -lm -llapack -llapacke -lpthread"))
+               #t))
+           ;; The provided install target only copies executables to
+           ;; the "bin" directory in the build root.
+           (add-after 'install 'actually-install
+             (lambda* (#:key outputs #:allow-other-keys)
+               (let* ((out (assoc-ref outputs "out"))
+                      (bin  (string-append out "/bin")))
+                 (mkdir-p bin)
+                 (for-each (lambda (file)
+                             (install-file file bin))
+                           (find-files "../bin" ".*"))
+                 #t))))))
+      (inputs
+       `(("gsl" ,gsl)
+         ("lapack" ,lapack)
+         ("openblas" ,openblas)
+         ("perl" ,perl)
+         ("gfortran" ,gfortran "lib")))
+      (home-page "https://github.com/DReichLab/EIG")
+      (synopsis "Tools for population genetics")
+      (description "The EIGENSOFT package provides tools for population
+genetics and stratification correction.  EIGENSOFT implements methods commonly
+used in population genetics analyses such as PCA, computation of Tracy-Widom
+statistics, and finding related individuals in structured populations.  It
+comes with a built-in plotting script and supports multiple file formats and
+quantitative phenotypes.")
+      ;; The license of the eigensoft tools is Expat, but since it's
+      ;; linking with the GNU Scientific Library (GSL) the effective
+      ;; license is the GPL.
+      (license license:gpl3+))))
+
 (define-public edirect
   (package
     (name "edirect")
@@ -2170,7 +2325,7 @@ manipulating HTS data.")
 (define-public htslib
   (package
     (name "htslib")
-    (version "1.2.1")
+    (version "1.3.1")
     (source (origin
               (method url-fetch)
               (uri (string-append
@@ -2178,7 +2333,7 @@ manipulating HTS data.")
                     version "/htslib-" version ".tar.bz2"))
               (sha256
                (base32
-                "1c32ssscbnjwfw3dra140fq7riarp2x990qxybh34nr1p5r17nxx"))))
+                "1rja282fwdc25ql6izkhdyh8ppw8x2fs0w0js78zgkmqjlikmma9"))))
     (build-system gnu-build-system)
     (arguments
      `(#:phases
@@ -2301,6 +2456,87 @@ command, or queried for specific k-mers with @code{jellyfish query}.")
     ;; files such as lib/jsoncpp.cpp are released under the Expat license.
     (license (list license:gpl3+ license:expat))))
 
+(define-public khmer
+  (package
+    (name "khmer")
+    (version "2.0")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (pypi-uri "khmer" version))
+       (sha256
+        (base32
+         "0wb05shqh77v00256qlm68vbbx3kl76fyzihszbz5nhanl4ni33a"))
+       (patches (search-patches "khmer-use-libraries.patch"))))
+    (build-system python-build-system)
+    (arguments
+     `(#:phases
+       (modify-phases %standard-phases
+         (add-after 'unpack 'set-paths
+           (lambda* (#:key inputs outputs #:allow-other-keys)
+             ;; Delete bundled libraries.
+             (delete-file-recursively "third-party/zlib")
+             (delete-file-recursively "third-party/bzip2")
+             ;; Replace bundled seqan.
+             (let* ((seqan-all "third-party/seqan")
+                    (seqan-include (string-append
+                                    seqan-all "/core/include")))
+               (delete-file-recursively seqan-all)
+               (copy-recursively (string-append (assoc-ref inputs "seqan")
+                                                "/include/seqan")
+                          (string-append seqan-include "/seqan")))
+             ;; We do not replace the bundled MurmurHash as the canonical
+             ;; repository for this code 'SMHasher' is unsuitable for
+             ;; providing a library.  See
+             ;; https://lists.gnu.org/archive/html/guix-devel/2016-06/msg00977.html
+             #t))
+         (add-after 'unpack 'set-cc
+           (lambda _
+             (setenv "CC" "gcc")
+             #t))
+         ;; It is simpler to test after installation.
+         (delete 'check)
+         (add-after 'install 'post-install-check
+           (lambda* (#:key inputs outputs #:allow-other-keys)
+             (let ((out (assoc-ref outputs "out")))
+               (setenv "PATH"
+                       (string-append
+                        (getenv "PATH")
+                        ":"
+                        (assoc-ref outputs "out")
+                        "/bin"))
+               (setenv "PYTHONPATH"
+                       (string-append
+                        (getenv "PYTHONPATH")
+                        ":"
+                        out
+                        "/lib/python"
+                        (string-take (string-take-right
+                                      (assoc-ref inputs "python") 5) 3)
+                        "/site-packages"))
+               (with-directory-excursion "build"
+                 (zero? (system* "nosetests" "khmer" "--attr"
+                                 "!known_failing")))))))))
+    (native-inputs
+     `(("seqan" ,seqan)
+       ("python-nose" ,python-nose)))
+    (inputs
+     `(("zlib" ,zlib)
+       ("bzip2" ,bzip2)
+       ("python-screed" ,python-screed)
+       ("python-bz2file" ,python-bz2file)))
+    (home-page "https://khmer.readthedocs.org/")
+    (synopsis "K-mer counting, filtering and graph traversal library")
+    (description "The khmer software is a set of command-line tools for
+working with DNA shotgun sequencing data from genomes, transcriptomes,
+metagenomes and single cells.  Khmer can make de novo assemblies faster, and
+sometimes better.  Khmer can also identify and fix problems with shotgun
+data.")
+    ;; When building on i686, armhf and mips64el, we get the following error:
+    ;; error: ['khmer', 'khmer.tests', 'oxli'] require 64-bit operating system
+    (supported-systems '("x86_64-linux"))
+    (license license:bsd-3)))
+
 (define-public macs
   (package
     (name "macs")
@@ -2333,7 +2569,7 @@ sequencing tag position and orientation.")
 (define-public mafft
   (package
     (name "mafft")
-    (version "7.267")
+    (version "7.299")
     (source (origin
               (method url-fetch)
               (uri (string-append
@@ -2342,7 +2578,7 @@ sequencing tag position and orientation.")
               (file-name (string-append name "-" version ".tgz"))
               (sha256
                (base32
-                "1xl6xq1rfxkws0svrlhyqxhhwbv6r77jwblsdpcyiwzsscw6wlk0"))))
+                "1pwwdy5a17ggx8h9v9y712ilswj27dc3d23r65l56jgjz67y5zc0"))))
     (build-system gnu-build-system)
     (arguments
      `(#:tests? #f ; no automated tests, though there are tests in the read me
@@ -2408,7 +2644,7 @@ sequences).")
 (define-public metabat
   (package
     (name "metabat")
-    (version "0.26.1")
+    (version "0.26.3")
     (source (origin
               (method url-fetch)
               (uri (string-append
@@ -2417,31 +2653,40 @@ sequences).")
               (file-name (string-append name "-" version ".tar.bz2"))
               (sha256
                (base32
-                "0vgrhbaxg4dkxyax2kbigak7w0arhqvw0szwp6gd9wmyilc44kfa"))))
+                "1vpfvgsn8wdsv1g7z73zxcncskx7dy7bw5msg1hhibk25ay11pyg"))))
     (build-system gnu-build-system)
     (arguments
      `(#:phases
        (modify-phases %standard-phases
          (add-after 'unpack 'fix-includes
-                    (lambda _
-                      (substitute* "SConstruct"
-                        (("/include/bam/bam.h")
-                         "/include/samtools/bam.h"))
-                      (substitute* "src/BamUtils.h"
-                        (("^#include \"bam/bam\\.h\"")
-                         "#include \"samtools/bam.h\"")
-                        (("^#include \"bam/sam\\.h\"")
-                         "#include \"samtools/sam.h\""))
-                      (substitute* "src/KseqReader.h"
-                        (("^#include \"bam/kseq\\.h\"")
-                         "#include \"samtools/kseq.h\""))
-                      #t))
+           (lambda _
+             (substitute* "src/BamUtils.h"
+               (("^#include \"bam/bam\\.h\"")
+                "#include \"samtools/bam.h\"")
+               (("^#include \"bam/sam\\.h\"")
+                "#include \"samtools/sam.h\""))
+             (substitute* "src/KseqReader.h"
+               (("^#include \"bam/kseq\\.h\"")
+                "#include \"htslib/kseq.h\""))
+             #t))
          (add-after 'unpack 'fix-scons
-                    (lambda _
-                      (substitute* "SConstruct" ; Do not distribute README
-                        (("^env\\.Install\\(idir_prefix, 'README\\.md'\\)")
-                         ""))
-                      #t))
+            (lambda* (#:key inputs #:allow-other-keys)
+              (substitute* "SConstruct"
+                (("^htslib_dir = 'samtools'")
+                 (string-append "hitslib_dir = '"
+                                (assoc-ref inputs "htslib")
+                                "'"))
+                (("^samtools_dir = 'samtools'")
+                 (string-append "samtools_dir = '"
+                                (assoc-ref inputs "htslib")
+                                "'"))
+                (("^findStaticOrShared\\('bam', hts_lib")
+                 (string-append "findStaticOrShared('bam', '"
+                                (assoc-ref inputs "samtools")
+                                "/lib'"))
+                ;; Do not distribute README.
+                (("^env\\.Install\\(idir_prefix, 'README\\.md'\\)") ""))
+              #t))
          (delete 'configure)
          (replace 'build
                   (lambda* (#:key inputs outputs #:allow-other-keys)
@@ -2451,16 +2696,10 @@ sequences).")
                                      "PREFIX="
                                      (assoc-ref outputs "out"))
                                     (string-append
-                                     "HTSLIB_DIR="
-                                     (assoc-ref inputs "htslib"))
-                                    (string-append
-                                     "SAMTOOLS_DIR="
-                                     (assoc-ref inputs "samtools"))
-                                    (string-append
                                      "BOOST_ROOT="
                                      (assoc-ref inputs "boost"))
                                     "install"))))
-         ;; check and install carried out during build phase
+         ;; Check and install are carried out during build phase.
          (delete 'check)
          (delete 'install))))
     (inputs
@@ -2919,6 +3158,75 @@ while RNA-seq specific modules evaluate sequencing saturation, mapped reads
 distribution, coverage uniformity, strand specificity, etc.")
     (license license:gpl3+)))
 
+(define-public seek
+  ;; There are no release tarballs.  According to the installation
+  ;; instructions at http://seek.princeton.edu/installation.jsp, the latest
+  ;; stable release is identified by this changeset ID.
+  (let ((changeset "2329130")
+        (revision "1"))
+    (package
+      (name "seek")
+      (version (string-append "0-" revision "." changeset))
+      (source (origin
+                (method hg-fetch)
+                (uri (hg-reference
+                      (url "https://bitbucket.org/libsleipnir/sleipnir")
+                      (changeset changeset)))
+                (sha256
+                 (base32
+                  "0qrvilwh18dpbhkf92qvxbmay0j75ra3jg2wrhz67gf538zzphsx"))))
+      (build-system gnu-build-system)
+      (arguments
+       `(#:modules ((srfi srfi-1)
+                    (guix build gnu-build-system)
+                    (guix build utils))
+         #:phases
+         (let ((dirs '("SeekMiner"
+                       "SeekEvaluator"
+                       "SeekPrep"
+                       "Distancer"
+                       "Data2DB"
+                       "PCL2Bin")))
+           (modify-phases %standard-phases
+             (add-before 'configure 'bootstrap
+               (lambda _
+                 (zero? (system* "bash" "gen_auto"))))
+             (add-after 'build 'build-additional-tools
+               (lambda* (#:key make-flags #:allow-other-keys)
+                 (every (lambda (dir)
+                          (with-directory-excursion (string-append "tools/" dir)
+                            (zero? (apply system* "make" make-flags))))
+                        dirs)))
+             (add-after 'install 'install-additional-tools
+               (lambda* (#:key make-flags #:allow-other-keys)
+                 (fold (lambda (dir result)
+                         (with-directory-excursion (string-append "tools/" dir)
+                           (and result
+                                (zero? (apply system*
+                                              `("make" ,@make-flags "install"))))))
+                       #t dirs)))))))
+      (inputs
+       `(("gsl" ,gsl)
+         ("boost" ,boost)
+         ("libsvm" ,libsvm)
+         ("readline" ,readline)
+         ("gengetopt" ,gengetopt)
+         ("log4cpp" ,log4cpp)))
+      (native-inputs
+       `(("autoconf" ,autoconf)
+         ("automake" ,automake)
+         ("perl" ,perl)))
+      (home-page "http://seek.princeton.edu")
+      (synopsis "Gene co-expression search engine")
+      (description
+       "SEEK is a computational gene co-expression search engine.  SEEK provides
+biologists with a way to navigate the massive human expression compendium that
+now contains thousands of expression datasets.  SEEK returns a robust ranking
+of co-expressed genes in the biological area of interest defined by the user's
+query genes.  It also prioritizes thousands of expression datasets according
+to the user's query of interest.")
+      (license license:cc-by3.0))))
+
 (define-public samtools
   (package
     (name "samtools")
@@ -3359,6 +3667,44 @@ optimize the sequencing depth, or to screen multiple libraries to avoid low
 complexity samples.")
     (license license:gpl3+)))
 
+(define-public python-screed
+  (package
+    (name "python-screed")
+    (version "0.9")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (pypi-uri "screed" version))
+       (sha256
+        (base32
+         "18czszp9fkx3j6jr7y5kp6dfialscgddk05mw1zkhh2zhn0jd8i0"))))
+    (build-system python-build-system)
+    (arguments
+     `(#:phases
+       (modify-phases %standard-phases
+         (replace 'check
+           (lambda _
+             (setenv "PYTHONPATH"
+                     (string-append (getenv "PYTHONPATH") ":."))
+             (zero? (system* "nosetests" "--attr" "!known_failing")))))))
+    (native-inputs
+     `(("python-nose" ,python-nose)))
+    (inputs
+     `(("python-bz2file" ,python-bz2file)))
+    (home-page "http://github.com/dib-lab/screed/")
+    (synopsis "Short read sequence database utilities")
+    (description "Screed parses FASTA and FASTQ files and generates databases.
+Values such as sequence name, sequence description, sequence quality and the
+sequence itself can be retrieved from these databases.")
+    (license license:bsd-3)))
+
+(define-public python2-screed
+  (let ((base (package-with-python2 (strip-python2-variant python-screed))))
+    (package
+      (inherit base)
+      (native-inputs `(("python2-setuptools" ,python2-setuptools)
+                       ,@(package-native-inputs base))))))
+
 (define-public sra-tools
   (package
     (name "sra-tools")
@@ -3600,7 +3946,7 @@ application of SortMeRNA is filtering rRNA from metatranscriptomic data.")
 (define-public star
   (package
     (name "star")
-    (version "2.5.1b")
+    (version "2.5.2a")
     (source (origin
               (method url-fetch)
               (uri (string-append "https://github.com/alexdobin/STAR/archive/"
@@ -3608,7 +3954,7 @@ application of SortMeRNA is filtering rRNA from metatranscriptomic data.")
               (file-name (string-append name "-" version ".tar.gz"))
               (sha256
                (base32
-                "0wzcfhkg10apnh0y73xlarfa79xxwxdizicbdl11wb48awk44iq4"))
+                "0xjlsm4p9flln111hv4xx7xy94c2nl53zvdvbk9winmiradjsdra"))
               (modules '((guix build utils)))
               (snippet
                '(begin
@@ -3665,6 +4011,8 @@ by seed clustering and stitching procedure.  In addition to unbiased de novo
 detection of canonical junctions, STAR can discover non-canonical splices and
 chimeric (fusion) transcripts, and is also capable of mapping full-length RNA
 sequences.")
+    ;; Only 64-bit systems are supported according to the README.
+    (supported-systems '("x86_64-linux" "mips64el-linux"))
     ;; STAR is licensed under GPLv3 or later; htslib is MIT-licensed.
     (license license:gpl3+)))
 
@@ -3869,7 +4217,7 @@ data types as well.")
 (define-public vsearch
   (package
     (name "vsearch")
-    (version "2.0.0")
+    (version "2.0.1")
     (source
      (origin
        (method url-fetch)
@@ -3879,7 +4227,7 @@ data types as well.")
        (file-name (string-append name "-" version ".tar.gz"))
        (sha256
         (base32
-         "1sd57abgx077icqrbj36jq9q7pdpzc6dbics2pn1555kisq2jhfh"))
+         "0q7szwbf7r29yxkhb415a8i51vj87kvl5ap7h09w7k9ycb2ywvzw"))
        (modules '((guix build utils)))
        (snippet
         '(begin
@@ -4191,6 +4539,30 @@ In addition, a few low-level concrete subclasses of general interest (e.g.
 S4Vectors package itself.")
     (license license:artistic2.0)))
 
+(define-public r-seqinr
+  (package
+    (name "r-seqinr")
+    (version "3.1-3")
+    (source
+      (origin
+        (method url-fetch)
+        (uri (cran-uri "seqinr" version))
+        (sha256
+          (base32
+            "0bbjfwbqg74wsamb3iz01g0ssdpdpg65gh00y9xlnpk4wb990n4n"))))
+    (build-system r-build-system)
+    (propagated-inputs
+     `(("r-ade4" ,r-ade4)))
+    (inputs
+     `(("zlib" ,zlib)))
+    (home-page "http://seqinr.r-forge.r-project.org/")
+    (synopsis "Biological sequences retrieval and analysis")
+    (description
+     "This package provides tools for exploratory data analysis and data
+visualization of biological sequence (DNA and protein) data.  It also includes
+utilities for sequence data management under the ACNUC system.")
+    (license license:gpl2+)))
+
 (define-public r-iranges
   (package
     (name "r-iranges")
@@ -4246,6 +4618,31 @@ translation between different chromosome sequence naming conventions (e.g.,
 names in their natural, rather than lexicographic, order.")
     (license license:artistic2.0)))
 
+(define-public r-edger
+  (package
+    (name "r-edger")
+    (version "3.14.0")
+    (source (origin
+              (method url-fetch)
+              (uri (bioconductor-uri "edgeR" version))
+              (sha256
+               (base32
+                "14vrygy7rz5ngaap4kgkvr3j18y5l6m742n79h68plk6iqgmsskn"))))
+    (properties `((upstream-name . "edgeR")))
+    (build-system r-build-system)
+    (propagated-inputs
+     `(("r-limma" ,r-limma)))
+    (home-page "http://bioinf.wehi.edu.au/edgeR")
+    (synopsis "EdgeR does empirical analysis of digital gene expression data")
+    (description "This package can do differential expression analysis of
+RNA-seq expression profiles with biological replication.  It implements a range
+of statistical methodology based on the negative binomial distributions,
+including empirical Bayes estimation, exact tests, generalized linear models
+and quasi-likelihood tests.  It be applied to differential signal analysis of
+other types of genomic data that produce counts, including ChIP-seq, SAGE and
+CAGE.")
+    (license license:gpl2+)))
+
 (define-public r-variantannotation
   (package
     (name "r-variantannotation")
@@ -4278,6 +4675,25 @@ names in their natural, rather than lexicographic, order.")
 coding changes and predict coding outcomes.")
     (license license:artistic2.0)))
 
+(define-public r-limma
+  (package
+    (name "r-limma")
+    (version "3.28.14")
+    (source (origin
+              (method url-fetch)
+              (uri (bioconductor-uri "limma" version))
+              (sha256
+               (base32
+                "1jgn66ajafhjlqpfcw2p85h6ah8mgmz66znmsw6pcapia7d34akw"))))
+    (build-system r-build-system)
+    (home-page "http://bioinf.wehi.edu.au/limma")
+    (synopsis "Package for linear models for microarray and RNA-seq data")
+    (description "This package can be used for the analysis of gene expression
+studies, especially the use of linear models for analysing designed experiments
+and the assessment of differential expression.  The analysis methods apply to
+different technologies, including microarrays, RNA-seq, and quantitative PCR.")
+    (license license:gpl2+)))
+
 (define-public r-xvector
   (package
     (name "r-xvector")
@@ -5237,6 +5653,115 @@ two-dimensional genome scans.")
 libraries for systems that do not have these available via other means.")
     (license license:artistic2.0)))
 
+(define-public emboss
+  (package
+    (name "emboss")
+    (version "6.5.7")
+    (source (origin
+              (method url-fetch)
+              (uri (string-append "ftp://emboss.open-bio.org/pub/EMBOSS/old/"
+                                  (version-major+minor version) ".0/"
+                                  "EMBOSS-" version ".tar.gz"))
+              (sha256
+               (base32
+                "0vsmz96gc411yj2iyzdrsmg4l2n1nhgmp7vrgzlxx3xixv9xbf0q"))))
+    (build-system gnu-build-system)
+    (arguments
+     `(#:configure-flags
+       (list (string-append "--with-hpdf="
+                            (assoc-ref %build-inputs "libharu")))
+       #:phases
+       (modify-phases %standard-phases
+         (add-after 'unpack 'fix-checks
+           (lambda _
+             ;; The PNGDRIVER tests check for the presence of libgd, libpng
+             ;; and zlib, but assume that they are all found at the same
+             ;; prefix.
+             (substitute* "configure.in"
+               (("CHECK_PNGDRIVER")
+                "LIBS=\"$LIBS -lgd -lpng -lz -lm\"
+AC_DEFINE([PLD_png], [1], [Define to 1 if PNG support is available])
+AM_CONDITIONAL(AMPNG, true)"))
+             #t))
+         (add-after 'unpack 'disable-update-check
+           (lambda _
+             ;; At build time there is no connection to the Internet, so
+             ;; looking for updates will not work.
+             (substitute* "Makefile.am"
+               (("\\$\\(bindir\\)/embossupdate") ""))
+             #t))
+         (add-before 'configure 'autogen
+           (lambda _ (zero? (system* "autoreconf" "-vif")))))))
+    (inputs
+     `(("perl" ,perl)
+       ("libpng" ,libpng)
+       ("gd" ,gd)
+       ("libx11" ,libx11)
+       ("libharu" ,libharu)
+       ("zlib" ,zlib)))
+    (native-inputs
+     `(("autoconf" ,autoconf)
+       ("automake" ,automake)
+       ("libtool" ,libtool)
+       ("pkg-config" ,pkg-config)))
+    (home-page "http://emboss.sourceforge.net")
+    (synopsis "Molecular biology analysis suite")
+    (description "EMBOSS is the \"European Molecular Biology Open Software
+Suite\".  EMBOSS is an analysis package specially developed for the needs of
+the molecular biology (e.g. EMBnet) user community.  The software
+automatically copes with data in a variety of formats and even allows
+transparent retrieval of sequence data from the web.  It also provides a
+number of libraries for the development of software in the field of molecular
+biology.  EMBOSS also integrates a range of currently available packages and
+tools for sequence analysis into a seamless whole.")
+    (license license:gpl2+)))
+
+(define-public bits
+  (let ((revision "1")
+        (commit "3cc4567896d9d6442923da944beb704750a08d2d"))
+    (package
+      (name "bits")
+      ;; The version is 2.13.0 even though no release archives have been
+      ;; published as yet.
+      (version (string-append "2.13.0-" revision "." (string-take commit 9)))
+      (source (origin
+                (method git-fetch)
+                (uri (git-reference
+                      (url "https://github.com/arq5x/bits.git")
+                      (commit commit)))
+                (file-name (string-append name "-" version "-checkout"))
+                (sha256
+                 (base32
+                  "17n2kffk4kmhivd8c98g2vr6y1s23vbg4sxlxs689wni66797hbs"))))
+      (build-system gnu-build-system)
+      (arguments
+       `(#:tests? #f ;no tests included
+         #:phases
+         (modify-phases %standard-phases
+           (delete 'configure)
+           (add-after 'unpack 'remove-cuda
+             (lambda _
+               (substitute* "Makefile"
+                 ((".*_cuda") "")
+                 (("(bits_test_intersections) \\\\" _ match) match))
+               #t))
+           (replace 'install
+             (lambda* (#:key outputs #:allow-other-keys)
+               (copy-recursively
+                "bin" (string-append (assoc-ref outputs "out") "/bin"))
+               #t)))))
+      (inputs
+       `(("gsl" ,gsl)
+         ("zlib" ,zlib)))
+      (home-page "https://github.com/arq5x/bits")
+      (synopsis "Implementation of binary interval search algorithm")
+      (description "This package provides an implementation of the
+BITS (Binary Interval Search) algorithm, an approach to interval set
+intersection.  It is especially suited for the comparison of diverse genomic
+datasets and the exploration of large datasets of genome
+intervals (e.g. genes, sequence alignments).")
+      (license license:gpl2))))
+
 (define-public piranha
   ;; There is no release tarball for the latest version.  The latest commit is
   ;; older than one year at the time of this writing.
@@ -5372,3 +5897,32 @@ group or two ChIP groups run under different conditions.")
       (description "This program compares version strings.  It intends to be a
 replacement for strverscmp.")
       (license license:gpl3+))))
+
+(define-public multiqc
+  (package
+    (name "multiqc")
+    (version "0.6")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (pypi-uri "multiqc" version))
+       (sha256
+        (base32
+         "0avw11h63ldpxy5pizc3wl1wa01ha7q10wb240nggsjz3jaqvyiy"))))
+    (build-system python-build-system)
+    (propagated-inputs
+     `(("python-jinja2" ,python-jinja2)
+       ("python-simplejson" ,python-simplejson)
+       ("python-pyyaml" ,python-pyyaml)
+       ("python-click" ,python-click)
+       ("python-matplotlib" ,python-matplotlib)
+       ("python-numpy" ,python-numpy)))
+    (native-inputs
+     `(("python-setuptools" ,python-setuptools)))
+    (home-page "http://multiqc.info")
+    (synopsis "Aggregate bioinformatics analysis reports")
+    (description
+     "MultiQC is a tool to aggregate bioinformatics results across many
+samples into a single report.  It contains modules for a large number of
+common bioinformatics tools.")
+    (license license:gpl3)))