summary refs log tree commit diff
path: root/gnu/packages/bioinformatics.scm
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/packages/bioinformatics.scm')
-rw-r--r--gnu/packages/bioinformatics.scm918
1 files changed, 233 insertions, 685 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index d37d93047e..e008ca16fb 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -20,6 +20,7 @@
 ;;; Copyright © 2020 Pierre Langlois <pierre.langlois@gmx.com>
 ;;; Copyright © 2020 Bonface Munyoki Kilyungi <bonfacemunyoki@gmail.com>
 ;;; Copyright © 2021 Tim Howes <timhowes@lavabit.com>
+;;; Copyright © 2021 Hong Li <hli@mdc-berlin.de>
 ;;;
 ;;; This file is part of GNU Guix.
 ;;;
@@ -52,6 +53,7 @@
   #:use-module (guix build-system ocaml)
   #:use-module (guix build-system perl)
   #:use-module (guix build-system python)
+  #:use-module (guix build-system qt)
   #:use-module (guix build-system r)
   #:use-module (guix build-system ruby)
   #:use-module (guix build-system scons)
@@ -125,6 +127,7 @@
   #:use-module (gnu packages python-science)
   #:use-module (gnu packages python-web)
   #:use-module (gnu packages python-xyz)
+  #:use-module (gnu packages qt)
   #:use-module (gnu packages rdf)
   #:use-module (gnu packages readline)
   #:use-module (gnu packages ruby)
@@ -2692,8 +2695,7 @@ trees (phylogenies) and characters.")
 
 (define-public python2-dendropy
   (let ((base (package-with-python2 python-dendropy)))
-    (package
-      (inherit base)
+    (package/inherit base
       (arguments
        `(#:phases
          (modify-phases %standard-phases
@@ -3762,15 +3764,17 @@ particular, reads spanning multiple exons.")
 (define-public hisat2
   (package
     (name "hisat2")
-    (version "2.0.5")
+    (version "2.2.1")
     (source
      (origin
-       (method url-fetch)
-       (uri (string-append "ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2"
-                           "/downloads/hisat2-" version "-source.zip"))
+       (method git-fetch)
+       (uri (git-reference
+             (url "https://github.com/DaehwanKimLab/hisat2/")
+             (commit (string-append "v" version))))
+       (file-name (git-file-name name version))
        (sha256
         (base32
-         "0lywnr8kijwsc2aw10dwxic0n0yvip6fl3rjlvc8zzwahamy4x7g"))))
+         "0lmzdhzjkvxw7n5w40pbv5fgzd4cz0f9pxczswn3d4cr0k10k754"))))
     (build-system gnu-build-system)
     (arguments
      `(#:tests? #f                      ; no check target
@@ -3783,9 +3787,12 @@ particular, reads spanning multiple exons.")
          (add-after 'unpack 'make-deterministic
            (lambda _
              (substitute* "Makefile"
-               (("`date`") "0"))
-             #t))
+               (("`date`") "0"))))
          (delete 'configure)
+         (add-before 'build 'build-manual
+           (lambda _
+             (mkdir-p "doc")
+             (invoke "make" "doc")))
          (replace 'install
            (lambda* (#:key outputs #:allow-other-keys)
              (let* ((out (assoc-ref outputs "out"))
@@ -3796,13 +3803,13 @@ particular, reads spanning multiple exons.")
                 (find-files "."
                             "hisat2(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))
                (mkdir-p doc)
-               (install-file "doc/manual.inc.html" doc))
-             #t)))))
+               (install-file "doc/manual.inc.html" doc)))))))
     (native-inputs
-     `(("unzip" ,unzip)                 ; needed for archive from ftp
-       ("perl" ,perl)
+     `(("perl" ,perl)
        ("pandoc" ,pandoc)))             ; for documentation
-    (home-page "https://ccb.jhu.edu/software/hisat2/index.shtml")
+    (inputs
+     `(("python" ,python-wrapper)))
+    (home-page "https://daehwankimlab.github.io/hisat2/")
     (synopsis "Graph-based alignment of genomic sequencing reads")
     (description "HISAT2 is a fast and sensitive alignment program for mapping
 next-generation sequencing reads (both DNA and RNA) to a population of human
@@ -6534,7 +6541,7 @@ writing files into the .sra format.")
      `(("source" ,source)
        ("tar" ,tar)
        ("xz" ,xz)))
-    (home-page "http://www.seqan.de")
+    (home-page "https://www.seqan.de")
     (synopsis "Library for nucleotide sequence analysis")
     (description
      "SeqAn is a C++ library of efficient algorithms and data structures for
@@ -6550,7 +6557,7 @@ bioinformatics file formats, sequence alignment, and more.")
     (version "1.4.2")
     (source (origin
               (method url-fetch)
-              (uri (string-append "http://packages.seqan.de/seqan-library/"
+              (uri (string-append "https://packages.seqan.de/seqan-library/"
                                   "seqan-library-" version ".tar.bz2"))
               (sha256
                (base32
@@ -7311,387 +7318,6 @@ includes software to
 ")
       (license license:cc0))))
 
-(define-public r-genefilter
-  (package
-    (name "r-genefilter")
-    (version "1.72.1")
-    (source
-     (origin
-       (method url-fetch)
-       (uri (bioconductor-uri "genefilter" version))
-       (sha256
-        (base32
-         "1c6h3qnjvphs977qhv5vafvsb108r0q7xhaayly6qv6adqfn94rn"))))
-    (build-system r-build-system)
-    (native-inputs
-     `(("gfortran" ,gfortran)
-       ("r-knitr" ,r-knitr)))
-    (propagated-inputs
-     `(("r-annotate" ,r-annotate)
-       ("r-annotationdbi" ,r-annotationdbi)
-       ("r-biobase" ,r-biobase)
-       ("r-biocgenerics" ,r-biocgenerics)
-       ("r-survival" ,r-survival)))
-    (home-page "https://bioconductor.org/packages/genefilter")
-    (synopsis "Filter genes from high-throughput experiments")
-    (description
-     "This package provides basic functions for filtering genes from
-high-throughput sequencing experiments.")
-    (license license:artistic2.0)))
-
-(define-public r-deseq2
-  (package
-    (name "r-deseq2")
-    (version "1.30.0")
-    (source
-     (origin
-       (method url-fetch)
-       (uri (bioconductor-uri "DESeq2" version))
-       (sha256
-        (base32
-         "0q2f9cywrcmp1p7ii8f45g4dk4hsnjflq3yqhsxgnpv9fw338qpp"))))
-    (properties `((upstream-name . "DESeq2")))
-    (build-system r-build-system)
-    (propagated-inputs
-     `(("r-biobase" ,r-biobase)
-       ("r-biocgenerics" ,r-biocgenerics)
-       ("r-biocparallel" ,r-biocparallel)
-       ("r-genefilter" ,r-genefilter)
-       ("r-geneplotter" ,r-geneplotter)
-       ("r-genomicranges" ,r-genomicranges)
-       ("r-ggplot2" ,r-ggplot2)
-       ("r-iranges" ,r-iranges)
-       ("r-locfit" ,r-locfit)
-       ("r-rcpp" ,r-rcpp)
-       ("r-rcpparmadillo" ,r-rcpparmadillo)
-       ("r-s4vectors" ,r-s4vectors)
-       ("r-summarizedexperiment" ,r-summarizedexperiment)))
-    (native-inputs
-     `(("r-knitr" ,r-knitr)))
-    (home-page "https://bioconductor.org/packages/DESeq2")
-    (synopsis "Differential gene expression analysis")
-    (description
-     "This package provides functions to estimate variance-mean dependence in
-count data from high-throughput nucleotide sequencing assays and test for
-differential expression based on a model using the negative binomial
-distribution.")
-    (license license:lgpl3+)))
-
-(define-public r-dexseq
-  (package
-    (name "r-dexseq")
-    (version "1.36.0")
-    (source
-     (origin
-       (method url-fetch)
-       (uri (bioconductor-uri "DEXSeq" version))
-       (sha256
-        (base32
-         "0wfjb42xcr4wjy8a654b74411dky8hp6sp8xdwf0sxqgsxy106qi"))))
-    (properties `((upstream-name . "DEXSeq")))
-    (build-system r-build-system)
-    (propagated-inputs
-     `(("r-annotationdbi" ,r-annotationdbi)
-       ("r-biobase" ,r-biobase)
-       ("r-biocgenerics" ,r-biocgenerics)
-       ("r-biocparallel" ,r-biocparallel)
-       ("r-biomart" ,r-biomart)
-       ("r-deseq2" ,r-deseq2)
-       ("r-genefilter" ,r-genefilter)
-       ("r-geneplotter" ,r-geneplotter)
-       ("r-genomicranges" ,r-genomicranges)
-       ("r-hwriter" ,r-hwriter)
-       ("r-iranges" ,r-iranges)
-       ("r-rcolorbrewer" ,r-rcolorbrewer)
-       ("r-rsamtools" ,r-rsamtools)
-       ("r-s4vectors" ,r-s4vectors)
-       ("r-statmod" ,r-statmod)
-       ("r-stringr" ,r-stringr)
-       ("r-summarizedexperiment" ,r-summarizedexperiment)))
-    (native-inputs
-     `(("r-knitr" ,r-knitr)))
-    (home-page "https://bioconductor.org/packages/DEXSeq")
-    (synopsis "Inference of differential exon usage in RNA-Seq")
-    (description
-     "This package is focused on finding differential exon usage using RNA-seq
-exon counts between samples with different experimental designs.  It provides
-functions that allows the user to make the necessary statistical tests based
-on a model that uses the negative binomial distribution to estimate the
-variance between biological replicates and generalized linear models for
-testing.  The package also provides functions for the visualization and
-exploration of the results.")
-    (license license:gpl3+)))
-
-(define-public r-annotationforge
-  (package
-    (name "r-annotationforge")
-    (version "1.32.0")
-    (source
-     (origin
-       (method url-fetch)
-       (uri (bioconductor-uri "AnnotationForge" version))
-       (sha256
-        (base32
-         "0y3820dkvwz09wlmz9drx6gqpsr9cwppaiz40zafwfxbz65y8px7"))))
-    (properties
-     `((upstream-name . "AnnotationForge")))
-    (build-system r-build-system)
-    (propagated-inputs
-     `(("r-annotationdbi" ,r-annotationdbi)
-       ("r-biobase" ,r-biobase)
-       ("r-biocgenerics" ,r-biocgenerics)
-       ("r-dbi" ,r-dbi)
-       ("r-rcurl" ,r-rcurl)
-       ("r-rsqlite" ,r-rsqlite)
-       ("r-s4vectors" ,r-s4vectors)
-       ("r-xml" ,r-xml)))
-    (native-inputs
-     `(("r-knitr" ,r-knitr)))
-    (home-page "https://bioconductor.org/packages/AnnotationForge")
-    (synopsis "Code for building annotation database packages")
-    (description
-     "This package provides code for generating Annotation packages and their
-databases.  Packages produced are intended to be used with AnnotationDbi.")
-    (license license:artistic2.0)))
-
-(define-public r-rbgl
-  (package
-    (name "r-rbgl")
-    (version "1.66.0")
-    (source
-     (origin
-       (method url-fetch)
-       (uri (bioconductor-uri "RBGL" version))
-       (sha256
-        (base32
-         "016vyzgixb3gjpzi21rbs6ngnnqcxr77krwjjf1ldnzzj8vqrqsz"))))
-    (properties `((upstream-name . "RBGL")))
-    (build-system r-build-system)
-    (propagated-inputs
-     `(("r-bh" ,r-bh)
-       ("r-graph" ,r-graph)))
-    (home-page "https://www.bioconductor.org/packages/RBGL")
-    (synopsis "Interface to the Boost graph library")
-    (description
-     "This package provides a fairly extensive and comprehensive interface to
-the graph algorithms contained in the Boost library.")
-    (license license:artistic2.0)))
-
-(define-public r-gseabase
-  (package
-    (name "r-gseabase")
-    (version "1.52.1")
-    (source
-     (origin
-       (method url-fetch)
-       (uri (bioconductor-uri "GSEABase" version))
-       (sha256
-        (base32
-         "0dawh1kjmf6921jm77j2s2phrq5237pjc4sdh8fkln89gf48zx6i"))))
-    (properties `((upstream-name . "GSEABase")))
-    (build-system r-build-system)
-    (propagated-inputs
-     `(("r-annotate" ,r-annotate)
-       ("r-annotationdbi" ,r-annotationdbi)
-       ("r-biobase" ,r-biobase)
-       ("r-biocgenerics" ,r-biocgenerics)
-       ("r-graph" ,r-graph)
-       ("r-xml" ,r-xml)))
-    (native-inputs
-     `(("r-knitr" ,r-knitr)))
-    (home-page "https://bioconductor.org/packages/GSEABase")
-    (synopsis "Gene set enrichment data structures and methods")
-    (description
-     "This package provides classes and methods to support @dfn{Gene Set
-Enrichment Analysis} (GSEA).")
-    (license license:artistic2.0)))
-
-(define-public r-category
-  (package
-    (name "r-category")
-    (version "2.56.0")
-    (source
-     (origin
-       (method url-fetch)
-       (uri (bioconductor-uri "Category" version))
-       (sha256
-        (base32
-         "0m77wpnica0h2ia9ajdaiga4plgz1s9wls6pdnxzk7kwl8a68wkr"))))
-    (properties `((upstream-name . "Category")))
-    (build-system r-build-system)
-    (propagated-inputs
-     `(("r-annotate" ,r-annotate)
-       ("r-annotationdbi" ,r-annotationdbi)
-       ("r-biobase" ,r-biobase)
-       ("r-biocgenerics" ,r-biocgenerics)
-       ("r-genefilter" ,r-genefilter)
-       ("r-graph" ,r-graph)
-       ("r-gseabase" ,r-gseabase)
-       ("r-matrix" ,r-matrix)
-       ("r-rbgl" ,r-rbgl)
-       ("r-dbi" ,r-dbi)))
-    (home-page "https://bioconductor.org/packages/Category")
-    (synopsis "Category analysis")
-    (description
-     "This package provides a collection of tools for performing category
-analysis.")
-    (license license:artistic2.0)))
-
-(define-public r-gostats
-  (package
-    (name "r-gostats")
-    (version "2.56.0")
-    (source
-     (origin
-       (method url-fetch)
-       (uri (bioconductor-uri "GOstats" version))
-       (sha256
-        (base32
-         "18q8p0fv9fl2r6zjxknfjwqxr69dlyxy6c8amzn6c6dwjq1cxk6j"))))
-    (properties `((upstream-name . "GOstats")))
-    (build-system r-build-system)
-    (propagated-inputs
-     `(("r-annotate" ,r-annotate)
-       ("r-annotationdbi" ,r-annotationdbi)
-       ("r-annotationforge" ,r-annotationforge)
-       ("r-biobase" ,r-biobase)
-       ("r-category" ,r-category)
-       ("r-go-db" ,r-go-db)
-       ("r-graph" ,r-graph)
-       ("r-rgraphviz" ,r-rgraphviz)
-       ("r-rbgl" ,r-rbgl)))
-    (home-page "https://bioconductor.org/packages/GOstats")
-    (synopsis "Tools for manipulating GO and microarrays")
-    (description
-     "This package provides a set of tools for interacting with GO and
-microarray data.  A variety of basic manipulation tools for graphs, hypothesis
-testing and other simple calculations.")
-    (license license:artistic2.0)))
-
-(define-public r-shortread
-  (package
-    (name "r-shortread")
-    (version "1.48.0")
-    (source
-     (origin
-       (method url-fetch)
-       (uri (bioconductor-uri "ShortRead" version))
-       (sha256
-        (base32
-         "0w4m8d3h660mmr2ymp206r1n4aqssxmkv8yxkbr5y1swrahxzfk9"))))
-    (properties `((upstream-name . "ShortRead")))
-    (build-system r-build-system)
-    (inputs
-     `(("zlib" ,zlib)))
-    (propagated-inputs
-     `(("r-biobase" ,r-biobase)
-       ("r-biocgenerics" ,r-biocgenerics)
-       ("r-biocparallel" ,r-biocparallel)
-       ("r-biostrings" ,r-biostrings)
-       ("r-genomeinfodb" ,r-genomeinfodb)
-       ("r-genomicalignments" ,r-genomicalignments)
-       ("r-genomicranges" ,r-genomicranges)
-       ("r-rhtslib" ,r-rhtslib)
-       ("r-hwriter" ,r-hwriter)
-       ("r-iranges" ,r-iranges)
-       ("r-lattice" ,r-lattice)
-       ("r-latticeextra" ,r-latticeextra)
-       ("r-rsamtools" ,r-rsamtools)
-       ("r-s4vectors" ,r-s4vectors)
-       ("r-xvector" ,r-xvector)
-       ("r-zlibbioc" ,r-zlibbioc)))
-    (home-page "https://bioconductor.org/packages/ShortRead")
-    (synopsis "FASTQ input and manipulation tools")
-    (description
-     "This package implements sampling, iteration, and input of FASTQ files.
-It includes functions for filtering and trimming reads, and for generating a
-quality assessment report.  Data are represented as
-@code{DNAStringSet}-derived objects, and easily manipulated for a diversity of
-purposes.  The package also contains legacy support for early single-end,
-ungapped alignment formats.")
-    (license license:artistic2.0)))
-
-(define-public r-systempiper
-  (package
-    (name "r-systempiper")
-    (version "1.24.3")
-    (source
-     (origin
-       (method url-fetch)
-       (uri (bioconductor-uri "systemPipeR" version))
-       (sha256
-        (base32
-         "0ffazyl2q9plbhwlxi04s3fvnli6qj95n7bkjc21535bbi08xfki"))))
-    (properties `((upstream-name . "systemPipeR")))
-    (build-system r-build-system)
-    (propagated-inputs
-     `(("r-annotate" ,r-annotate)
-       ("r-assertthat" ,r-assertthat)
-       ("r-batchtools" ,r-batchtools)
-       ("r-biostrings" ,r-biostrings)
-       ("r-deseq2" ,r-deseq2)
-       ("r-dot" ,r-dot)
-       ("r-edger" ,r-edger)
-       ("r-genomicfeatures" ,r-genomicfeatures)
-       ("r-genomicranges" ,r-genomicranges)
-       ("r-ggplot2" ,r-ggplot2)
-       ("r-go-db" ,r-go-db)
-       ("r-gostats" ,r-gostats)
-       ("r-iranges" ,r-iranges)
-       ("r-limma" ,r-limma)
-       ("r-magrittr" ,r-magrittr)
-       ("r-pheatmap" ,r-pheatmap)
-       ("r-rjson" ,r-rjson)
-       ("r-rsamtools" ,r-rsamtools)
-       ("r-rsvg" ,r-rsvg)
-       ("r-shortread" ,r-shortread)
-       ("r-stringr" ,r-stringr)
-       ("r-summarizedexperiment" ,r-summarizedexperiment)
-       ("r-yaml" ,r-yaml)
-       ("r-variantannotation" ,r-variantannotation)))
-    (native-inputs
-     `(("r-knitr" ,r-knitr)))
-    (home-page "https://github.com/tgirke/systemPipeR")
-    (synopsis "Next generation sequencing workflow and reporting environment")
-    (description
-     "This R package provides tools for building and running automated
-end-to-end analysis workflows for a wide range of @dfn{next generation
-sequence} (NGS) applications such as RNA-Seq, ChIP-Seq, VAR-Seq and Ribo-Seq.
-Important features include a uniform workflow interface across different NGS
-applications, automated report generation, and support for running both R and
-command-line software, such as NGS aligners or peak/variant callers, on local
-computers or compute clusters.  Efficient handling of complex sample sets and
-experimental designs is facilitated by a consistently implemented sample
-annotation infrastructure.")
-    (license license:artistic2.0)))
-
-(define-public r-grohmm
-  (package
-    (name "r-grohmm")
-    (version "1.24.0")
-    (source
-     (origin
-       (method url-fetch)
-       (uri (bioconductor-uri "groHMM" version))
-       (sha256
-        (base32
-         "08pap9wsaxl4jjlc1py0rc019gmi6daa0f9cr3ih1d97wybncanx"))))
-    (properties `((upstream-name . "groHMM")))
-    (build-system r-build-system)
-    (propagated-inputs
-     `(("r-genomeinfodb" ,r-genomeinfodb)
-       ("r-genomicalignments" ,r-genomicalignments)
-       ("r-genomicranges" ,r-genomicranges)
-       ("r-iranges" ,r-iranges)
-       ("r-mass" ,r-mass)
-       ("r-rtracklayer" ,r-rtracklayer)
-       ("r-s4vectors" ,r-s4vectors)))
-    (home-page "https://github.com/Kraus-Lab/groHMM")
-    (synopsis "GRO-seq analysis pipeline")
-    (description
-     "This package provides a pipeline for the analysis of GRO-seq data.")
-    (license license:gpl3+)))
-
 (define-public vsearch
   (package
     (name "vsearch")
@@ -7996,286 +7622,6 @@ including VCF header and contents in RDF and JSON.")
     (home-page "https://github.com/vcflib/bio-vcf")
     (license license:expat)))
 
-(define-public r-biocviews
-  (package
-    (name "r-biocviews")
-    (version "1.58.1")
-    (source (origin
-              (method url-fetch)
-              (uri (bioconductor-uri "biocViews" version))
-              (sha256
-               (base32
-                "1by2639z7n62z84dr8rj9jz12gsd1k8q42zsnxacxbwfwp6h0cl4"))))
-    (properties
-     `((upstream-name . "biocViews")))
-    (build-system r-build-system)
-    (propagated-inputs
-     `(("r-biobase" ,r-biobase)
-       ("r-biocmanager" ,r-biocmanager)
-       ("r-graph" ,r-graph)
-       ("r-rbgl" ,r-rbgl)
-       ("r-rcurl" ,r-rcurl)
-       ("r-xml" ,r-xml)
-       ("r-runit" ,r-runit)))
-    (home-page "https://bioconductor.org/packages/biocViews")
-    (synopsis "Bioconductor package categorization helper")
-    (description "The purpose of biocViews is to create HTML pages that
-categorize packages in a Bioconductor package repository according to keywords,
-also known as views, in a controlled vocabulary.")
-    (license license:artistic2.0)))
-
-(define-public r-biocstyle
-  (package
-    (name "r-biocstyle")
-    (version "2.18.1")
-    (source (origin
-              (method url-fetch)
-              (uri (bioconductor-uri "BiocStyle" version))
-              (sha256
-               (base32
-                "0rsxyna4dd99x42vc82mlkxx774vb9375llpakg53max1hhwkrqp"))))
-    (properties
-     `((upstream-name . "BiocStyle")))
-    (build-system r-build-system)
-    (propagated-inputs
-     `(("r-biocmanager" ,r-biocmanager)
-       ("r-bookdown" ,r-bookdown)
-       ("r-knitr" ,r-knitr)
-       ("r-rmarkdown" ,r-rmarkdown)
-       ("r-yaml" ,r-yaml)))
-    (native-inputs
-     `(("r-knitr" ,r-knitr)))
-    (home-page "https://bioconductor.org/packages/BiocStyle")
-    (synopsis "Bioconductor formatting styles")
-    (description "This package provides standard formatting styles for
-Bioconductor PDF and HTML documents.  Package vignettes illustrate use and
-functionality.")
-    (license license:artistic2.0)))
-
-(define-public r-bioccheck
-  (package
-    (name "r-bioccheck")
-    (version "1.26.0")
-    (source (origin
-              (method url-fetch)
-              (uri (bioconductor-uri "BiocCheck" version))
-              (sha256
-               (base32
-                "1hyncn9zqj432da95k86rm5b28nbwrvzm52jbhisifkxj1j43cib"))))
-    (properties
-     `((upstream-name . "BiocCheck")))
-    (build-system r-build-system)
-    (arguments
-     '(#:phases
-       (modify-phases %standard-phases
-         ;; This package can be used by calling BiocCheck(<package>) from
-         ;; within R, or by running R CMD BiocCheck <package>.  This phase
-         ;; makes sure the latter works.  For this to work, the BiocCheck
-         ;; script must be somewhere on the PATH (not the R bin directory).
-         (add-after 'install 'install-bioccheck-subcommand
-           (lambda* (#:key outputs #:allow-other-keys)
-             (let* ((out (assoc-ref outputs "out"))
-                    (dest-dir (string-append out "/bin"))
-                    (script-dir
-                     (string-append out "/site-library/BiocCheck/script/")))
-               (mkdir-p dest-dir)
-               (symlink (string-append script-dir "/checkBadDeps.R")
-                        (string-append dest-dir "/checkBadDeps.R"))
-               (symlink (string-append script-dir "/BiocCheck")
-                        (string-append dest-dir "/BiocCheck")))
-             #t)))))
-    (propagated-inputs
-     `(("r-codetools" ,r-codetools)
-       ("r-graph" ,r-graph)
-       ("r-httr" ,r-httr)
-       ("r-knitr" ,r-knitr)
-       ("r-optparse" ,r-optparse)
-       ("r-biocmanager" ,r-biocmanager)
-       ("r-biocviews" ,r-biocviews)
-       ("r-stringdist" ,r-stringdist)))
-    (native-inputs
-     `(("r-knitr" ,r-knitr)))
-    (home-page "https://bioconductor.org/packages/BiocCheck")
-    (synopsis "Executes Bioconductor-specific package checks")
-    (description "This package contains tools to perform additional quality
-checks on R packages that are to be submitted to the Bioconductor repository.")
-    (license license:artistic2.0)))
-
-(define-public r-s4vectors
-  (package
-    (name "r-s4vectors")
-    (version "0.28.1")
-    (source (origin
-              (method url-fetch)
-              (uri (bioconductor-uri "S4Vectors" version))
-              (sha256
-               (base32
-                "0fhf4lsfxrim7glazh6ng46ykzaly5ggwpg170vcz4cc24prv0rh"))))
-    (properties
-     `((upstream-name . "S4Vectors")))
-    (build-system r-build-system)
-    (propagated-inputs
-     `(("r-biocgenerics" ,r-biocgenerics)))
-    (home-page "https://bioconductor.org/packages/S4Vectors")
-    (synopsis "S4 implementation of vectors and lists")
-    (description
-     "The S4Vectors package defines the @code{Vector} and @code{List} virtual
-classes and a set of generic functions that extend the semantic of ordinary
-vectors and lists in R.  Package developers can easily implement vector-like
-or list-like objects as concrete subclasses of @code{Vector} or @code{List}.
-In addition, a few low-level concrete subclasses of general interest (e.g.
-@code{DataFrame}, @code{Rle}, and @code{Hits}) are implemented in the
-S4Vectors package itself.")
-    (license license:artistic2.0)))
-
-(define-public r-iranges
-  (package
-    (name "r-iranges")
-    (version "2.24.1")
-    (source (origin
-              (method url-fetch)
-              (uri (bioconductor-uri "IRanges" version))
-              (sha256
-               (base32
-                "01mx46a82vd3gz705pj0kk4wpxg683s8jqxchzjia3gz00b4qw52"))))
-    (properties
-     `((upstream-name . "IRanges")))
-    (build-system r-build-system)
-    (propagated-inputs
-     `(("r-biocgenerics" ,r-biocgenerics)
-       ("r-s4vectors" ,r-s4vectors)))
-    (home-page "https://bioconductor.org/packages/IRanges")
-    (synopsis "Infrastructure for manipulating intervals on sequences")
-    (description
-     "This package provides efficient low-level and highly reusable S4 classes
-for storing ranges of integers, RLE vectors (Run-Length Encoding), and, more
-generally, data that can be organized sequentially (formally defined as
-@code{Vector} objects), as well as views on these @code{Vector} objects.
-Efficient list-like classes are also provided for storing big collections of
-instances of the basic classes.  All classes in the package use consistent
-naming and share the same rich and consistent \"Vector API\" as much as
-possible.")
-    (license license:artistic2.0)))
-
-(define-public r-genomeinfodbdata
-  (package
-    (name "r-genomeinfodbdata")
-    (version "1.2.0")
-    (source (origin
-              (method url-fetch)
-              ;; We cannot use bioconductor-uri here because this tarball is
-              ;; located under "data/annotation/" instead of "bioc/".
-              (uri (string-append "https://bioconductor.org/packages/release/"
-                                  "data/annotation/src/contrib/GenomeInfoDbData_"
-                                  version ".tar.gz"))
-              (sha256
-               (base32
-                "0di6nlqpsyqf693k2na65ayqldih563x3zfrczpqc5q2hl5kg35c"))))
-    (properties
-     `((upstream-name . "GenomeInfoDbData")))
-    (build-system r-build-system)
-    (home-page "https://bioconductor.org/packages/GenomeInfoDbData")
-    (synopsis "Species and taxonomy ID look up tables for GenomeInfoDb")
-    (description "This package contains data for mapping between NCBI taxonomy
-ID and species.  It is used by functions in the GenomeInfoDb package.")
-    (license license:artistic2.0)))
-
-(define-public r-genomeinfodb
-  (package
-    (name "r-genomeinfodb")
-    (version "1.26.2")
-    (source (origin
-              (method url-fetch)
-              (uri (bioconductor-uri "GenomeInfoDb" version))
-              (sha256
-               (base32
-                "092izc49maxjhf6m4b0qx21ad16dz7bmxy5pysp3vkyhdrfa2f7v"))))
-    (properties
-     `((upstream-name . "GenomeInfoDb")))
-    (build-system r-build-system)
-    (propagated-inputs
-     `(("r-biocgenerics" ,r-biocgenerics)
-       ("r-genomeinfodbdata" ,r-genomeinfodbdata)
-       ("r-iranges" ,r-iranges)
-       ("r-rcurl" ,r-rcurl)
-       ("r-s4vectors" ,r-s4vectors)))
-    (native-inputs
-     `(("r-knitr" ,r-knitr)))
-    (home-page "https://bioconductor.org/packages/GenomeInfoDb")
-    (synopsis "Utilities for manipulating chromosome identifiers")
-    (description
-     "This package contains data and functions that define and allow
-translation between different chromosome sequence naming conventions (e.g.,
-\"chr1\" versus \"1\"), including a function that attempts to place sequence
-names in their natural, rather than lexicographic, order.")
-    (license license:artistic2.0)))
-
-(define-public r-edger
-  (package
-    (name "r-edger")
-    (version "3.32.1")
-    (source (origin
-              (method url-fetch)
-              (uri (bioconductor-uri "edgeR" version))
-              (sha256
-               (base32
-                "1gaic8qf6a6sy0bmydh1xzf52w0wnq31aanpvw3a30pfsi218bcp"))))
-    (properties `((upstream-name . "edgeR")))
-    (build-system r-build-system)
-    (propagated-inputs
-     `(("r-limma" ,r-limma)
-       ("r-locfit" ,r-locfit)
-       ("r-rcpp" ,r-rcpp)
-       ("r-statmod" ,r-statmod))) ;for estimateDisp
-    (home-page "http://bioinf.wehi.edu.au/edgeR")
-    (synopsis "EdgeR does empirical analysis of digital gene expression data")
-    (description "This package can do differential expression analysis of
-RNA-seq expression profiles with biological replication.  It implements a range
-of statistical methodology based on the negative binomial distributions,
-including empirical Bayes estimation, exact tests, generalized linear models
-and quasi-likelihood tests.  It be applied to differential signal analysis of
-other types of genomic data that produce counts, including ChIP-seq, SAGE and
-CAGE.")
-    (license license:gpl2+)))
-
-(define-public r-variantannotation
-  (package
-    (name "r-variantannotation")
-    (version "1.36.0")
-    (source (origin
-              (method url-fetch)
-              (uri (bioconductor-uri "VariantAnnotation" version))
-              (sha256
-               (base32
-                "1sl0l6v05lfglj281nszma0h5k234md7rn2pdah8vs2d4iq3kimw"))))
-    (properties
-     `((upstream-name . "VariantAnnotation")))
-    (propagated-inputs
-     `(("r-annotationdbi" ,r-annotationdbi)
-       ("r-biobase" ,r-biobase)
-       ("r-biocgenerics" ,r-biocgenerics)
-       ("r-biostrings" ,r-biostrings)
-       ("r-bsgenome" ,r-bsgenome)
-       ("r-dbi" ,r-dbi)
-       ("r-genomeinfodb" ,r-genomeinfodb)
-       ("r-genomicfeatures" ,r-genomicfeatures)
-       ("r-genomicranges" ,r-genomicranges)
-       ("r-iranges" ,r-iranges)
-       ("r-matrixgenerics" ,r-matrixgenerics)
-       ("r-summarizedexperiment" ,r-summarizedexperiment)
-       ("r-rhtslib" ,r-rhtslib)
-       ("r-rsamtools" ,r-rsamtools)
-       ("r-rtracklayer" ,r-rtracklayer)
-       ("r-s4vectors" ,r-s4vectors)
-       ("r-xvector" ,r-xvector)
-       ("r-zlibbioc" ,r-zlibbioc)))
-    (build-system r-build-system)
-    (home-page "https://bioconductor.org/packages/VariantAnnotation")
-    (synopsis "Package for annotation of genetic variants")
-    (description "This R package can annotate variants, compute amino acid
-coding changes and predict coding outcomes.")
-    (license license:artistic2.0)))
-
 (define-public r-limma
   (package
     (name "r-limma")
@@ -8551,13 +7897,13 @@ tab-delimited (tabix) files.")
 (define-public r-delayedarray
   (package
     (name "r-delayedarray")
-    (version "0.16.1")
+    (version "0.16.2")
     (source (origin
               (method url-fetch)
               (uri (bioconductor-uri "DelayedArray" version))
               (sha256
                (base32
-                "1d75zrhha1v7dhbvjp6a4iap441l5k268w0jjxklpqywbqns7l3d"))))
+                "09lpj951v1afxkrnjvnhzp4qgklq23ykdwlny7k1lyfcdy9q6wm0"))))
     (properties
      `((upstream-name . "DelayedArray")))
     (build-system r-build-system)
@@ -8699,13 +8045,13 @@ as well as query and modify the browser state, such as the current viewport.")
 (define-public r-genomicfeatures
   (package
     (name "r-genomicfeatures")
-    (version "1.42.1")
+    (version "1.42.2")
     (source (origin
               (method url-fetch)
               (uri (bioconductor-uri "GenomicFeatures" version))
               (sha256
                (base32
-                "17dyd9hcw6pw16y353dh55wfhxmkxka99lbsxsp9xyrhffwrxi0s"))))
+                "17ns5hvx5q8mrmkgb6linspwml62mi34i6al5bxlib5xi9d9f04s"))))
     (properties
      `((upstream-name . "GenomicFeatures")))
     (build-system r-build-system)
@@ -10954,7 +10300,7 @@ with narrow binding events such as transcription factor ChIP-seq.")
 (define-public trim-galore
   (package
     (name "trim-galore")
-    (version "0.6.1")
+    (version "0.6.6")
     (source
      (origin
        (method git-fetch)
@@ -10964,7 +10310,7 @@ with narrow binding events such as transcription factor ChIP-seq.")
        (file-name (git-file-name name version))
        (sha256
         (base32
-         "1y31wbxwkm9xqzr5zv1pk5q418whnmlmgmfyxxpnl12h83m2i9iv"))))
+         "0yrwg6325j4sb9vnplvl3jplzab0qdhp92wl480qjinpfq88j4rs"))))
     (build-system gnu-build-system)
     (arguments
      `(#:tests? #f                      ; no tests
@@ -13109,6 +12455,64 @@ concatenates pieces of read sequences to generate the final unitig sequences.
 Thus the per-base error rate is similar to the raw input reads.")
    (license license:expat)))
 
+(define-public bandage
+  (package
+    (name "bandage")
+    (version "0.8.1")
+    (source
+     (origin
+       (method git-fetch)
+       (uri (git-reference
+             (url "https://github.com/rrwick/Bandage")
+             (commit (string-append "v" version))))
+       (file-name (git-file-name name version))
+       (sha256
+        (base32 "1bbsn5f5x8wlspg4pbibqz6m5vin8c19nl224f3z3km0pkc97rwv"))))
+    (build-system qt-build-system)
+    (arguments
+     `(#:phases
+       (modify-phases %standard-phases
+         (replace 'configure
+           (lambda _
+             (invoke "qmake" "Bandage.pro")))
+         (replace 'check
+           (lambda* (#:key tests? #:allow-other-keys)
+             (when tests?
+               (substitute* "tests/bandage_command_line_tests.sh"
+                 (("^bandagepath=.*")
+                  (string-append "bandagepath=" (getcwd) "/Bandage\n")))
+               (with-directory-excursion "tests"
+                 (setenv "XDG_RUNTIME_DIR" (getcwd))
+                 (invoke "./bandage_command_line_tests.sh")))
+             #t))
+         (replace 'install
+           (lambda* (#:key outputs #:allow-other-keys)
+             (let ((out (assoc-ref outputs "out")))
+               (install-file "Bandage" (string-append out "/bin"))
+               #t))))))
+    (inputs
+     `(("qtbase" ,qtbase)
+       ("qtsvg" ,qtsvg)))
+    (native-inputs
+     `(("imagemagick" ,imagemagick)))
+    (home-page "https://rrwick.github.io/Bandage/")
+    (synopsis
+     "Bioinformatics Application for Navigating De novo Assembly Graphs Easily")
+    (description "Bandage is a program for visualising de novo assembly graphs.
+It allows users to interact with the assembly graphs made by de novo assemblers
+such as Velvet, SPAdes, MEGAHIT and others.  De novo assembly graphs contain not
+only assembled contigs but also the connections between those contigs, which
+were previously not easily accessible.  Bandage visualises assembly graphs, with
+connections, using graph layout algorithms.  Nodes in the drawn graph, which
+represent contigs, can be automatically labelled with their ID, length or depth.
+Users can interact with the graph by moving, labelling and colouring nodes.
+Sequence information can also be extracted directly from the graph viewer.  By
+displaying connections between contigs, Bandage opens up new possibilities for
+analysing and improving de novo assemblies that are not possible by looking at
+contigs alone.")
+    (license (list license:gpl2+        ; bundled ogdf
+                   license:gpl3+))))
+
 (define-public r-circus
   (package
     (name "r-circus")
@@ -13359,6 +12763,89 @@ create connections between analogous cells in different batches without
 altering the counts or PCA space.")
     (license license:expat)))
 
+(define-public python-drep
+  (package
+    (name "python-drep")
+    (version "3.2.0")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (pypi-uri "drep" version))
+       (sha256
+        (base32
+         "08vk0x6v5c5n7afgd5pcjhsvb424absypxy22hw1cm1n9kirbi77"))))
+    (build-system python-build-system)
+    (propagated-inputs
+     `(("python-biopython" ,python-biopython)
+       ("python-matplotlib" ,python-matplotlib)
+       ("python-numpy" ,python-numpy)
+       ("python-pandas" ,python-pandas)
+       ("python-pytest" ,python-pytest)
+       ("python-scikit-learn" ,python-scikit-learn)
+       ("python-seaborn" ,python-seaborn)
+       ("python-tqdm" ,python-tqdm)))
+    (home-page "https://github.com/MrOlm/drep")
+    (synopsis "De-replication of microbial genomes assembled from multiple samples")
+    (description
+     "dRep is a Python program for rapidly comparing large numbers of genomes.
+dRep can also \"de-replicate\" a genome set by identifying groups of highly
+similar genomes and choosing the best representative genome for each genome
+set.")
+    (license license:expat)))
+
+(define-public instrain
+  (package
+    (name "instrain")
+    (version "1.5.2")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (pypi-uri "inStrain" version))
+       (sha256
+        (base32
+         "0ykqlpf6yz4caihsaz3ys00cyvlr7wdj4s9a8rh56q5r8xf80ic0"))))
+    (build-system python-build-system)
+    (arguments
+     `(#:phases
+       (modify-phases %standard-phases
+         (add-after 'unpack 'patch-relative-imports
+           (lambda _
+             (substitute* "docker/run_instrain.py"
+               (("from s3_utils")
+                "from .s3_utils")
+               (("from job_utils")
+                "from .job_utils")))))))
+    (inputs
+     `(("python-biopython" ,python-biopython)
+       ("python-boto3" ,python-boto3)
+       ("python-h5py" ,python-h5py)
+       ("python-lmfit" ,python-lmfit)
+       ("python-matplotlib" ,python-matplotlib)
+       ("python-networkx" ,python-networkx)
+       ("python-numba" ,python-numba)
+       ("python-numpy" ,python-numpy)
+       ("python-pandas" ,python-pandas)
+       ("python-psutil" ,python-psutil)
+       ("python-pysam" ,python-pysam)
+       ("python-scikit-learn" ,python-scikit-learn)
+       ("python-seaborn" ,python-seaborn)
+       ("python-tqdm" ,python-tqdm)
+       ;; drep is needed for deprecated plot utilities
+       ("python-drep" ,python-drep)))
+    (native-inputs
+     `(("python-pytest" ,python-pytest)))
+    (home-page "https://github.com/MrOlm/inStrain")
+    (synopsis "Calculation of strain-level metrics")
+    (description
+     "inStrain is a Python program for analysis of co-occurring genome
+populations from metagenomes that allows highly accurate genome comparisons,
+analysis of coverage, microdiversity, and linkage, and sensitive SNP detection
+with gene localization and synonymous non-synonymous identification.")
+    ;; The tool itself says that the license is "MIT", but the repository
+    ;; contains a LICENSE file with the GPLv3.
+    ;; See https://github.com/MrOlm/inStrain/issues/51
+    (license license:expat)))
+
 (define-public gffcompare
   (let ((commit "be56ef4349ea3966c12c6397f85e49e047361c41")
         (revision "1"))
@@ -14367,14 +13854,14 @@ is a Cython wrapper for FIt-SNE.")
 (define-public bbmap
   (package
     (name "bbmap")
-    (version "35.82")
+    (version "38.90")
     (source (origin
               (method url-fetch)
               (uri (string-append
                     "mirror://sourceforge/bbmap/BBMap_" version ".tar.gz"))
               (sha256
                (base32
-                "1q4rfhxcb6z3gm8zg2davjz98w22lkf4hm9ikxz9kdl93pil3wkd"))))
+                "1wb94bcc006qq86x77z2rz0lc8m9f1kpnw6gdhjfg9bdaqf56rm3"))))
     (build-system ant-build-system)
     (arguments
      `(#:build-target "dist"
@@ -15785,3 +15272,64 @@ biological processes.  SBML is useful for models of metabolism, cell
 signaling, and more.  It continues to be evolved and expanded by an
 international community.")
     (license license:lgpl2.1+)))
+
+(define-public r-signac
+  (let ((commit "e0512d348adeda4a3f23a2e8f56d1fe09840e03c")
+        (revision "1"))
+    (package
+      (name "r-signac")
+      (version (git-version "1.1.1" revision commit))
+      (source
+       (origin
+         (method git-fetch)
+         (uri (git-reference
+               (url "https://github.com/timoast/signac/")
+               (commit commit)))
+         (file-name (git-file-name name version))
+         (sha256
+          (base32
+           "1yihhrv7zs87ax61la1nb4y12lg3knraw4b20k5digbcwm8488lb"))))
+      (properties `((upstream-name . "Signac")))
+      (build-system r-build-system)
+      (inputs `(("zlib" ,zlib)))
+      (propagated-inputs
+       `(("r-annotationfilter" ,r-annotationfilter)
+         ("r-biocgenerics" ,r-biocgenerics)
+         ("r-biostrings" ,r-biostrings)
+         ("r-biovizbase" ,r-biovizbase)
+         ("r-data-table" ,r-data-table)
+         ("r-dplyr" ,r-dplyr)
+         ("r-fastmatch" ,r-fastmatch)
+         ("r-future" ,r-future)
+         ("r-future-apply" ,r-future-apply)
+         ("r-genomeinfodb" ,r-genomeinfodb)
+         ("r-genomicranges" ,r-genomicranges)
+         ("r-ggbio" ,r-ggbio)
+         ("r-ggforce" ,r-ggforce)
+         ("r-ggplot2" ,r-ggplot2)
+         ("r-ggrepel" ,r-ggrepel)
+         ("r-ggseqlogo" ,r-ggseqlogo)
+         ("r-iranges" ,r-iranges)
+         ("r-irlba" ,r-irlba)
+         ("r-lsa" ,r-lsa)
+         ("r-matrix" ,r-matrix)
+         ("r-patchwork" ,r-patchwork)
+         ("r-pbapply" ,r-pbapply)
+         ("r-rcpp" ,r-rcpp)
+         ("r-rcpproll" ,r-rcpproll)
+         ("r-rsamtools" ,r-rsamtools)
+         ("r-s4vectors" ,r-s4vectors)
+         ("r-scales" ,r-scales)
+         ("r-seurat" ,r-seurat)
+         ("r-seuratobject" ,r-seuratobject)
+         ("r-stringi" ,r-stringi)
+         ("r-tidyr" ,r-tidyr)))
+      (home-page "https://github.com/timoast/signac/")
+      (synopsis "Analysis of single-cell chromatin data")
+      (description
+       "This package provides a framework for the analysis and exploration of
+single-cell chromatin data.  The Signac package contains functions for
+quantifying single-cell chromatin data, computing per-cell quality control
+metrics, dimension reduction and normalization, visualization, and DNA
+sequence motif analysis.")
+      (license license:expat))))