summary refs log tree commit diff
path: root/gnu/packages/bioconductor.scm
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/packages/bioconductor.scm')
-rw-r--r--gnu/packages/bioconductor.scm278
1 files changed, 277 insertions, 1 deletions
diff --git a/gnu/packages/bioconductor.scm b/gnu/packages/bioconductor.scm
index 68d8e5b574..b4be8aee15 100644
--- a/gnu/packages/bioconductor.scm
+++ b/gnu/packages/bioconductor.scm
@@ -1,6 +1,6 @@
 ;;; GNU Guix --- Functional package management for GNU
 ;;; Copyright © 2016, 2017, 2018, 2019, 2020 Ricardo Wurmus <rekado@elephly.net>
-;;; Copyright © 2016, 2017, 2018 Roel Janssen <roel@gnu.org>
+;;; Copyright © 2016, 2017, 2018, 2020 Roel Janssen <roel@gnu.org>
 ;;; Copyright © 2017, 2018, 2019 Tobias Geerinckx-Rice <me@tobias.gr>
 ;;; Copyright © 2019 Simon Tournier <zimon.toutoune@gmail.com>
 ;;;
@@ -493,6 +493,30 @@ annotations for the genome of the model mouse Mus musculus.")
 by UCSC (hg19, February 2009) and stored in Biostrings objects.")
     (license license:artistic2.0)))
 
+(define-public r-bsgenome-hsapiens-ucsc-hg38
+  (package
+    (name "r-bsgenome-hsapiens-ucsc-hg38")
+    (version "1.4.1")
+    (source (origin
+              (method url-fetch)
+              (uri (bioconductor-uri "BSgenome.Hsapiens.UCSC.hg38"
+                                     version 'annotation))
+              (sha256
+               (base32
+                "1ql08pvi4vv0ynvg4qs9kysw1c7s3crkgin6zxvgzqk6fray9mvi"))))
+    (properties
+     `((upstream-name . "BSgenome.Hsapiens.UCSC.hg38")))
+    (build-system r-build-system)
+    (propagated-inputs
+     `(("r-bsgenome" ,r-bsgenome)))
+    (home-page
+     "https://www.bioconductor.org/packages/BSgenome.Hsapiens.UCSC.hg38/")
+    (synopsis "Full genome sequences for Homo sapiens")
+    (description
+     "This package provides full genome sequences for Homo sapiens (Human)
+as provided by UCSC (hg38, Dec. 2013) and stored in Biostrings objects.")
+    (license license:artistic2.0)))
+
 (define-public r-ensdb-hsapiens-v75
   (package
     (name "r-ensdb-hsapiens-v75")
@@ -718,6 +742,30 @@ annotations.")
      "This is a manifest package for Illumina's EPIC methylation arrays.")
     (license license:artistic2.0)))
 
+(define-public r-ideoviz
+  (package
+    (name "r-ideoviz")
+    (version "1.22.0")
+    (source (origin
+              (method url-fetch)
+              (uri (bioconductor-uri "IdeoViz" version))
+              (sha256
+               (base32
+                "0rsz6dawrx5qdrypxs2hgihmx3kbpdg1y73h876yxccgdlabvzil"))))
+    (build-system r-build-system)
+    (propagated-inputs
+     `(("r-biobase" ,r-biobase)
+       ("r-iranges" ,r-iranges)
+       ("r-genomicranges" ,r-genomicranges)
+       ("r-rcolorbrewer" ,r-rcolorbrewer)
+       ("r-rtracklayer" ,r-rtracklayer)
+       ("r-genomeinfodb" ,r-genomeinfodb)))
+    (home-page "https://bioconductor.org/packages/IdeoViz/")
+    (synopsis "Plots data along a chromosomal ideogram")
+    (description "This package provides functions to plot data associated with
+arbitrary genomic intervals along chromosomal ideogram.")
+    (license license:gpl2)))
+
 ;; This is a CRAN package, but it depends on r-bsgenome-hsapiens-ucsc-hg19
 ;; from Bioconductor.
 (define-public r-deconstructsigs
@@ -811,6 +859,32 @@ performing parallel computations on multicore machines.")
 Disease Ontology.")
     (license license:artistic2.0)))
 
+(define-public r-pasilla
+  (package
+    (name "r-pasilla")
+    (version "1.14.0")
+    (source (origin
+              (method url-fetch)
+              (uri (string-append
+                    "http://bioconductor.org/packages/release/data/experiment"
+                    "/src/contrib/pasilla_" version ".tar.gz"))
+              (sha256
+               (base32
+                "0h124i2fb2lbj2k48zzf1n7ldqa471bs26fbd9vw50299aqx28x0"))))
+    (build-system r-build-system)
+    (propagated-inputs
+     `(("r-biocstyle" ,r-biocstyle)
+       ("r-dexseq" ,r-dexseq)
+       ("r-knitr" ,r-knitr)
+       ("r-rmarkdown" ,r-rmarkdown)))
+    (home-page "https://www.bioconductor.org/packages/pasilla/")
+    (synopsis "Data package with per-exon and per-gene read counts")
+    (description "This package provides per-exon and per-gene read counts
+computed for selected genes from RNA-seq data that were presented in the
+article 'Conservation of an RNA regulatory map between Drosophila and mammals'
+by Brooks et al., Genome Research 2011.")
+    (license license:lgpl2.1+)))
+
 (define-public r-pfam-db
   (package
     (name "r-pfam-db")
@@ -995,6 +1069,63 @@ examples' of Affymetrix data, unlike the artificial examples included in the
 package @code{affy}.")
     (license license:gpl2+)))
 
+(define-public r-coverageview
+  (package
+    (name "r-coverageview")
+    (version "1.24.0")
+    (source (origin
+              (method url-fetch)
+              (uri (bioconductor-uri "CoverageView" version))
+              (sha256
+               (base32
+                "0s47svs7xnr9jkylq0dxidqrigihdddiprcl0951vjr4w7kmb5nf"))))
+    (build-system r-build-system)
+    (propagated-inputs
+     `(("r-s4vectors" ,r-s4vectors)
+       ("r-iranges" ,r-iranges)
+       ("r-genomicranges" ,r-genomicranges)
+       ("r-genomicalignments" ,r-genomicalignments)
+       ("r-rtracklayer" ,r-rtracklayer)
+       ("r-rsamtools" ,r-rsamtools)))
+    (home-page "https://bioconductor.org/packages/CoverageView/")
+    (synopsis "Coverage visualization package for R")
+    (description "This package provides a framework for the visualization of
+genome coverage profiles.  It can be used for ChIP-seq experiments, but it can
+be also used for genome-wide nucleosome positioning experiments or other
+experiment types where it is important to have a framework in order to inspect
+how the coverage distributed across the genome.")
+    (license license:artistic2.0)))
+
+(define-public r-cummerbund
+  (package
+   (name "r-cummerbund")
+   (version "2.28.0")
+   (source (origin
+             (method url-fetch)
+             (uri (bioconductor-uri "cummeRbund" version))
+             (sha256
+              (base32
+               "1fjc3bcclm4gsvw4nq6cv3a1kbrldvrxbkyfb9306708si1n4dwk"))))
+   (build-system r-build-system)
+   (propagated-inputs
+    `(("r-biobase" ,r-biobase)
+      ("r-biocgenerics" ,r-biocgenerics)
+      ("r-fastcluster", r-fastcluster)
+      ("r-ggplot2" ,r-ggplot2)
+      ("r-gviz" ,r-gviz)
+      ("r-plyr" ,r-plyr)
+      ("r-reshape2" ,r-reshape2)
+      ("r-rsqlite" ,r-rsqlite)
+      ("r-rtracklayer" ,r-rtracklayer)
+      ("r-s4vectors" ,r-s4vectors)))
+   (home-page "https://bioconductor.org/packages/cummeRbund/")
+   (synopsis "Analyze Cufflinks high-throughput sequencing data")
+   (description "This package allows for persistent storage, access,
+exploration, and manipulation of Cufflinks high-throughput sequencing
+data.  In addition, provides numerous plotting functions for commonly
+used visualizations.")
+   (license license:artistic2.0)))
+
 (define-public r-curatedtcgadata
   (package
     (name "r-curatedtcgadata")
@@ -2533,6 +2664,41 @@ and regression inferences from RNA-sequencing data.")
 gene and isoform level using RNA-seq data")
     (license license:artistic2.0)))
 
+(define-public r-karyoploter
+  (package
+    (name "r-karyoploter")
+    (version "1.12.4")
+    (source (origin
+              (method url-fetch)
+              (uri (bioconductor-uri "karyoploteR" version))
+              (sha256
+               (base32
+                "03jmfgmw35hrgn3pc5lq6pblzhfx9fp4l6dx50rp303lr7kjxp9v"))))
+    (build-system r-build-system)
+    (propagated-inputs
+     `(("r-regioner" ,r-regioner)
+       ("r-genomicranges" ,r-genomicranges)
+       ("r-iranges" ,r-iranges)
+       ("r-rsamtools" ,r-rsamtools)
+       ("r-memoise" ,r-memoise)
+       ("r-rtracklayer" ,r-rtracklayer)
+       ("r-genomeinfodb" ,r-genomeinfodb)
+       ("r-s4vectors" ,r-s4vectors)
+       ("r-biovizbase" ,r-biovizbase)
+       ("r-digest" ,r-digest)
+       ("r-bezier" ,r-bezier)
+       ("r-bamsignals" ,r-bamsignals)
+       ("r-annotationdbi" ,r-annotationdbi)
+       ("r-variantannotation" ,r-variantannotation)))
+    (home-page "https://bioconductor.org/packages/karyoploteR/")
+    (synopsis "Plot customizable linear genomes displaying arbitrary data")
+    (description "This package creates karyotype plots of arbitrary genomes and
+offers a complete set of functions to plot arbitrary data on them.  It mimicks
+many R base graphics functions coupling them with a coordinate change function
+automatically mapping the chromosome and data coordinates into the plot
+coordinates.")
+    (license license:artistic2.0)))
+
 (define-public r-lpsymphony
   (package
     (name "r-lpsymphony")
@@ -3972,6 +4138,78 @@ database (e.g. JASPAR).  It can also be used to visualize motifs, motif
 distributions, modules and filter motifs.")
     (license license:gpl2)))
 
+(define-public r-motifdb
+  (package
+   (name "r-motifdb")
+   (version "1.28.0")
+   (source (origin
+            (method url-fetch)
+            (uri (bioconductor-uri "MotifDb" version))
+            (sha256
+             (base32 "0m5apkjlvdq9yhjdyds3hivfnkbm6f059hy2bkjhalrlhd2si2jc"))))
+   (properties `((upstream-name . "MotifDb")))
+   (build-system r-build-system)
+   (propagated-inputs
+    `(("r-biocgenerics" ,r-biocgenerics)
+      ("r-biostrings" ,r-biostrings)
+      ("r-iranges" ,r-iranges)
+      ("r-rtracklayer" ,r-rtracklayer)
+      ("r-s4vectors" ,r-s4vectors)
+      ("r-splitstackshape" ,r-splitstackshape)))
+   (home-page "https://www.bioconductor.org/packages/MotifDb/")
+   (synopsis "Annotated collection of protein-DNA binding sequence motifs")
+   (description "This package provides more than 2000 annotated position
+frequency matrices from nine public sources, for multiple organisms.")
+   (license license:artistic2.0)))
+
+(define-public r-motifbreakr
+  (package
+   (name "r-motifbreakr")
+   (version "2.0.0")
+   (source (origin
+            (method url-fetch)
+            (uri (bioconductor-uri "motifbreakR" version))
+            (sha256
+             (base32 "190z8gj393qdpq5wz7gph96k0l8c1j9wd0p0llscysvk5kr1hf9n"))))
+   (properties `((upstream-name . "motifbreakR")))
+   (build-system r-build-system)
+   (propagated-inputs
+    `(("r-grimport" ,r-grimport)
+      ("r-stringr" ,r-stringr)
+      ("r-biocgenerics" ,r-biocgenerics)
+      ("r-s4vectors" ,r-s4vectors)
+      ("r-iranges" ,r-iranges)
+      ("r-genomeinfodb" ,r-genomeinfodb)
+      ("r-genomicranges" ,r-genomicranges)
+      ("r-biostrings" ,r-biostrings)
+      ("r-bsgenome" ,r-bsgenome)
+      ("r-rtracklayer" ,r-rtracklayer)
+      ("r-variantannotation" ,r-variantannotation)
+      ("r-biocparallel" ,r-biocparallel)
+      ("r-motifstack" ,r-motifstack)
+      ("r-gviz" ,r-gviz)
+      ("r-matrixstats" ,r-matrixstats)
+      ("r-tfmpvalue" ,r-tfmpvalue)
+      ("r-motifdb" ,r-motifdb)))
+   (home-page "https://www.bioconductor.org/packages/motifbreakR/")
+   (synopsis "Predicting disruptiveness of single nucleotide polymorphisms")
+   (description "This package allows biologists to judge in the first place
+whether the sequence surrounding the polymorphism is a good match, and in
+the second place how much information is gained or lost in one allele of
+the polymorphism relative to another.  This package gives a choice of
+algorithms for interrogation of genomes with motifs from public sources:
+@enumerate
+@item a weighted-sum probability matrix;
+@item log-probabilities;
+@item weighted by relative entropy.
+@end enumerate
+
+This package can predict effects for novel or previously described variants in
+public databases, making it suitable for tasks beyond the scope of its original
+design.  Lastly, it can be used to interrogate any genome curated within
+Bioconductor.")
+   (license license:gpl2+)))
+
 (define-public r-motifstack
   (package
     (name "r-motifstack")
@@ -6368,6 +6606,44 @@ and parameters of which are trained on a set of aligned reads and a reference
 genome sequence.")
     (license license:lgpl3)))
 
+(define-public r-snplocs-hsapiens-dbsnp144-grch37
+  (package
+    (name "r-snplocs-hsapiens-dbsnp144-grch37")
+    (version "0.99.20")
+    (source (origin
+              (method url-fetch)
+              (uri (bioconductor-uri "SNPlocs.Hsapiens.dbSNP144.GRCh37"
+                                     version 'annotation))
+              (sha256
+               (base32
+                "1z8kx43ki1jvj7ms7pcybakcdimfwr6zpjvspkjmma97bdz093iz"))))
+    (build-system r-build-system)
+    ;; As this package provides little more than a very large data file it
+    ;; doesn't make sense to build substitutes.
+    (arguments `(#:substitutable? #f))
+    (propagated-inputs
+     `(("r-biocgenerics" ,r-biocgenerics)
+       ("r-s4vectors" ,r-s4vectors)
+       ("r-iranges" ,r-iranges)
+       ("r-genomeinfodb" ,r-genomeinfodb)
+       ("r-genomicranges" ,r-genomicranges)
+       ("r-bsgenome" ,r-bsgenome)
+       ("r-biostrings" ,r-biostrings)))
+    (home-page
+     "https://bioconductor.org/packages/SNPlocs.Hsapiens.dbSNP144.GRCh37/")
+    (synopsis "SNP locations for Homo sapiens (dbSNP Build 144)")
+    (description "This package provides SNP locations and alleles for Homo
+sapiens extracted from NCBI dbSNP Build 144.  The source data files used for
+this package were created by NCBI on May 29-30, 2015, and contain SNPs mapped
+to reference genome GRCh37.p13.  Note that the GRCh37.p13 genome is a
+patched version of GRCh37.  However the patch doesn't alter chromosomes 1-22,
+X, Y, MT.  GRCh37 itself is the same as the hg19 genome from UCSC *except* for
+the mitochondrion chromosome.  Therefore, the SNPs in this package can be
+injected in @code{BSgenome.Hsapiens.UCSC.hg19} and they will land at the
+correct position but this injection will exclude chrM (i.e. nothing will be
+injected in that sequence).")
+    (license license:artistic2.0)))
+
 (define-public r-reqon
   (package
     (name "r-reqon")