diff options
Diffstat (limited to 'gnu/packages/bioinformatics.scm')
-rw-r--r-- | gnu/packages/bioinformatics.scm | 543 |
1 files changed, 477 insertions, 66 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index 313c70784a..0d09a98dc5 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -3,7 +3,7 @@ ;;; Copyright © 2015, 2016, 2017, 2018 Ben Woodcroft <donttrustben@gmail.com> ;;; Copyright © 2015, 2016, 2018, 2019, 2020 Pjotr Prins <pjotr.guix@thebird.nl> ;;; Copyright © 2015 Andreas Enge <andreas@enge.fr> -;;; Copyright © 2016, 2020 Roel Janssen <roel@gnu.org> +;;; Copyright © 2016, 2020, 2021 Roel Janssen <roel@gnu.org> ;;; Copyright © 2016, 2017, 2018, 2019, 2020, 2021 Efraim Flashner <efraim@flashner.co.il> ;;; Copyright © 2016, 2020 Marius Bakke <mbakke@fastmail.com> ;;; Copyright © 2016, 2018 Raoul Bonnal <ilpuccio.febo@gmail.com> @@ -130,6 +130,7 @@ #:use-module (gnu packages qt) #:use-module (gnu packages rdf) #:use-module (gnu packages readline) + #:use-module (gnu packages rsync) #:use-module (gnu packages ruby) #:use-module (gnu packages serialization) #:use-module (gnu packages shells) @@ -144,6 +145,7 @@ #:use-module (gnu packages tls) #:use-module (gnu packages vim) #:use-module (gnu packages web) + #:use-module (gnu packages wget) #:use-module (gnu packages xml) #:use-module (gnu packages xorg) #:use-module (srfi srfi-1) @@ -311,7 +313,7 @@ BAM files.") (define-public bcftools (package (name "bcftools") - (version "1.11") + (version "1.12") (source (origin (method url-fetch) (uri (string-append "https://github.com/samtools/bcftools/" @@ -319,11 +321,11 @@ BAM files.") version "/bcftools-" version ".tar.bz2")) (sha256 (base32 - "0r508mp15pqzf8r1269kb4v5naw9zsvbwd3cz8s1yj7carsf9viw")) + "1x94l1hy2pi3lbz0sxlbw0g6q5z5apcrhrlcwda94ns9n4r6a3ks")) (modules '((guix build utils))) (snippet '(begin ;; Delete bundled htslib. - (delete-file-recursively "htslib-1.11") + (delete-file-recursively "htslib-1.12") #t)))) (build-system gnu-build-system) (arguments @@ -437,7 +439,7 @@ computational cluster.") (define-public bedtools (package (name "bedtools") - (version "2.29.2") + (version "2.30.0") (source (origin (method url-fetch) (uri (string-append "https://github.com/arq5x/bedtools2/releases/" @@ -445,7 +447,7 @@ computational cluster.") "bedtools-" version ".tar.gz")) (sha256 (base32 - "0m3hk6548846w83a9s5drsczvy67n2azx41kj71n03klb2gbzwg3")))) + "1f2hh79l7dn147c2xyfgf5wfjvlqfw32kjfnnh2n1qy6rpzx2fik")))) (build-system gnu-build-system) (arguments '(#:test-target "test" @@ -457,7 +459,7 @@ computational cluster.") (native-inputs `(("python" ,python-wrapper))) (inputs - `(("samtools" ,samtools-1.9) + `(("samtools" ,samtools) ("zlib" ,zlib))) (home-page "https://github.com/arq5x/bedtools2") (synopsis "Tools for genome analysis and arithmetic") @@ -553,6 +555,40 @@ input and output BAMs must adhere to the PacBio BAM format specification. Non-PacBio BAMs will cause exceptions to be thrown.") (license license:bsd-3))) +(define-public pbgzip + (let ((commit "2b09f97b5f20b6d83c63a5c6b408d152e3982974")) + (package + (name "pbgzip") + (version (git-version "0.0.0" "0" commit)) + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/nh13/pbgzip") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "1mlmq0v96irbz71bgw5zcc43g1x32zwnxx21a5p1f1ch4cikw1yd")))) + (build-system gnu-build-system) + (native-inputs + `(("autoconf" ,autoconf) + ("automake" ,automake))) + (inputs + `(("zlib" ,zlib))) + (home-page "https://github.com/nh13/pbgzip") + (synopsis "Parallel Block GZIP") + (description "This package implements parallel block gzip. For many +formats, in particular genomics data formats, data are compressed in +fixed-length blocks such that they can be easily indexed based on a (genomic) +coordinate order, since typically each block is sorted according to this order. +This allows for each block to be individually compressed (deflated), or more +importantly, decompressed (inflated), with the latter enabling random retrieval +of data in large files (gigabytes to terabytes). @code{pbgzip} is not limited +to any particular format, but certain features are tailored to genomics data +formats when enabled. Parallel decompression is somewhat faster, but the true +speedup comes during compression.") + (license license:expat)))) + (define-public blasr-libcpp (package (name "blasr-libcpp") @@ -797,13 +833,13 @@ intended to behave exactly the same as the original BWK awk.") (define-public python-pybedtools (package (name "python-pybedtools") - (version "0.8.1") + (version "0.8.2") (source (origin (method url-fetch) (uri (pypi-uri "pybedtools" version)) (sha256 (base32 - "14w5i40gi25clrr7h4wa2pcpnyipya8hrqi7nq77553zc5wf0df0")))) + "0wc7z8g8prgdx7n5chjva2fdq03wiwhqisjjxzkjg1j5k5ha7151")))) (build-system python-build-system) (arguments `(#:modules ((srfi srfi-26) @@ -814,13 +850,6 @@ intended to behave exactly the same as the original BWK awk.") (modify-phases %standard-phases (add-after 'unpack 'disable-broken-tests (lambda _ - (substitute* "pybedtools/test/test_scripts.py" - ;; This test freezes. - (("def test_intron_exon_reads") - "def _do_not_test_intron_exon_reads") - ;; This test fails in the Python 2 build. - (("def test_venn_mpl") - "def _do_not_test_venn_mpl")) (substitute* "pybedtools/test/test_helpers.py" ;; Requires internet access. (("def test_chromsizes") @@ -1097,6 +1126,31 @@ converted to Alignment objects, and so on. This means that the objects provide a coordinated and extensible framework to do computational biology.") (license license:perl-license)))) +(define-public perl-bio-db-hts + (package + (name "perl-bio-db-hts") + (version "3.01") + (source + (origin + (method url-fetch) + (uri (string-append "mirror://cpan/authors/id/A/AV/AVULLO/Bio-DB-HTS-" + version ".tar.gz")) + (sha256 + (base32 + "0hjg0igfkpvh27zdkdr6pa7cqm9n6r7cwz0np74cl4wmawgvr9hj")))) + (build-system perl-build-system) + (native-inputs + `(("perl-module-build" ,perl-module-build) + ("pkg-config" ,pkg-config))) + (propagated-inputs + `(("bioperl-minimal" ,bioperl-minimal) + ("htslib" ,htslib-1.9))) + (home-page "https://metacpan.org/release/Bio-DB-HTS") + (synopsis "Perl interface to HTS library for DNA sequencing") + (description "This is a Perl interface to the HTS library for DNA +sequencing.") + (license license:asl2.0))) + (define-public python-biopython (package (name "python-biopython") @@ -1324,7 +1378,7 @@ package provides command line tools using the Bio++ library.") (define-public blast+ (package (name "blast+") - (version "2.10.1") + (version "2.11.0") (source (origin (method url-fetch) (uri (string-append @@ -1332,7 +1386,7 @@ package provides command line tools using the Bio++ library.") version "/ncbi-blast-" version "+-src.tar.gz")) (sha256 (base32 - "11kvrrl0mcwww6530r55hccpg3x3msmhr3051fwnjbq8rzg2j1qi")) + "0m0r9vkw631ky1za1wilsfk9k9spwqh22nkrb9a57rbwmrc1i3nq")) (modules '((guix build utils))) (snippet '(begin @@ -4421,7 +4475,7 @@ performance.") (define-public htslib (package (name "htslib") - (version "1.11") + (version "1.12") (source (origin (method url-fetch) (uri (string-append @@ -4429,7 +4483,7 @@ performance.") version "/htslib-" version ".tar.bz2")) (sha256 (base32 - "1mrq4mihzx37yqhj3sfz6da6mw49niia808bzsw2gkkgmadxvyng")))) + "1jplnvizgr0fyyvvmkfmnsywrrpqhid3760vw15bllz98qdi9012")))) (build-system gnu-build-system) ;; Let htslib translate "gs://" and "s3://" to regular https links with ;; "--enable-gcs" and "--enable-s3". For these options to work, we also @@ -5846,7 +5900,7 @@ to the user's query of interest.") (define-public samtools (package (name "samtools") - (version "1.11") + (version "1.12") (source (origin (method url-fetch) @@ -5855,11 +5909,11 @@ to the user's query of interest.") version "/samtools-" version ".tar.bz2")) (sha256 (base32 - "1dp5wknak4arnw5ghhif9mmljlfnw5bgm91wib7z0j8wdjywx0z2")) + "1jrdj2idpma5ja9cg0rr73b565vdbr9wyy6zig54bidicc2pg8vd")) (modules '((guix build utils))) (snippet '(begin ;; Delete bundled htslib. - (delete-file-recursively "htslib-1.11") + (delete-file-recursively "htslib-1.12") #t)))) (build-system gnu-build-system) (arguments @@ -7175,6 +7229,43 @@ clustering analysis, differential analysis, motif inference and exploration of single cell ATAC-seq sequencing data.") (license license:gpl3))) +(define-public r-shinycell + (let ((commit + "aecbd56e66802f28e397f5ae1f19403aadd12163") + (revision "1")) + (package + (name "r-shinycell") + (version (git-version "2.0.0" revision commit)) + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/SGDDNB/ShinyCell") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "13jn2ikmvljnzayk485g1mmq5abcp9m1b8n1djdb1agmn83zaki5")))) + (properties `((upstream-name . "ShinyCell"))) + (build-system r-build-system) + (propagated-inputs + `(("r-data-table" ,r-data-table) + ("r-ggplot2" ,r-ggplot2) + ("r-glue" ,r-glue) + ("r-gridextra" ,r-gridextra) + ("r-hdf5r" ,r-hdf5r) + ("r-matrix" ,r-matrix) + ("r-r-utils" ,r-r-utils) + ("r-rcolorbrewer" ,r-rcolorbrewer) + ("r-readr" ,r-readr) + ("r-reticulate" ,r-reticulate))) + (home-page "https://github.com/SGDDNB/ShinyCell") + (synopsis "Shiny interactive web apps for single-cell data") + (description + "This package provides Shiny apps for interactive exploration of +single-cell data.") + (license license:gpl3)))) + (define-public r-archr (let ((commit "46b519ffb6f73edf132497ac31650d19ef055dc1") (revision "1")) @@ -7489,6 +7580,64 @@ Perl and can be helpful if you want to filter, reformat, or trim your sequence data. It also generates basic statistics for your sequences.") (license license:gpl3+))) +(define-public shorah + (package + (name "shorah") + (version "1.99.2") + (source + (origin + (method url-fetch) + (uri (string-append "https://github.com/cbg-ethz/shorah" + "/releases/download/v" version + "/shorah-" version ".tar.xz")) + (sha256 + (base32 + "158dir9qcqspknlnyfr9zwk41x48nrh5wcg10k2grh9cidp9daiq")))) + (build-system gnu-build-system) + (arguments + `(#:phases + (modify-phases %standard-phases + (add-after 'unpack 'fix-test-wrapper + (lambda* (#:key outputs #:allow-other-keys) + (let ((bin (string-append (assoc-ref outputs "out") "/bin"))) + (substitute* "examples/run_end2end_test" + (("\\$\\{interpreter\\} ../\\$\\{testscript\\}") + (string-append bin "/${testscript}")))))) + (delete 'check) + (add-after 'install 'wrap-programs + (lambda* (#:key outputs #:allow-other-keys) + (let* ((out (assoc-ref outputs "out")) + (site (string-append + out "/lib/python" + ,(version-major+minor + (package-version python)) + "/site-packages")) + (pythonpath (getenv "PYTHONPATH")) + (script (string-append out "/bin/shorah"))) + (chmod script #o555) + (wrap-program script `("PYTHONPATH" ":" prefix (,site ,pythonpath)))))) + (add-after 'wrap-programs 'check + (lambda* (#:key tests? #:allow-other-keys) + (when tests? + (invoke "make" "check"))))))) + (inputs + `(("boost" ,boost) + ("htslib" ,htslib) + ("python" ,python) + ("python-biopython" ,python-biopython) + ("python-numpy" ,python-numpy) + ("zlib" ,zlib))) + (native-inputs + `(("pkg-config" ,pkg-config))) + (home-page "") + (synopsis "Short reads assembly into haplotypes") + (description + "ShoRAH is a project for the analysis of next generation sequencing data. +It is designed to analyse genetically heterogeneous samples. Its tools +provide error correction, haplotype reconstruction and estimation of the +frequency of the different genetic variants present in a mixed sample.") + (license license:gpl3+))) + (define-public ruby-bio-kseq (package (name "ruby-bio-kseq") @@ -7633,40 +7782,6 @@ including VCF header and contents in RDF and JSON.") (home-page "https://github.com/vcflib/bio-vcf") (license license:expat))) -(define-public r-summarizedexperiment - (package - (name "r-summarizedexperiment") - (version "1.20.0") - (source (origin - (method url-fetch) - (uri (bioconductor-uri "SummarizedExperiment" version)) - (sha256 - (base32 - "04x6d4mcsnvz6glkmf6k2cv3fs8zk03i9rvv0ahpl793n8l411ps")))) - (properties - `((upstream-name . "SummarizedExperiment"))) - (build-system r-build-system) - (propagated-inputs - `(("r-biobase" ,r-biobase) - ("r-biocgenerics" ,r-biocgenerics) - ("r-delayedarray" ,r-delayedarray) - ("r-genomeinfodb" ,r-genomeinfodb) - ("r-genomicranges" ,r-genomicranges) - ("r-iranges" ,r-iranges) - ("r-matrix" ,r-matrix) - ("r-matrixgenerics" ,r-matrixgenerics) - ("r-s4vectors" ,r-s4vectors))) - (native-inputs - `(("r-knitr" ,r-knitr))) - (home-page "https://bioconductor.org/packages/SummarizedExperiment") - (synopsis "Container for representing genomic ranges by sample") - (description - "The SummarizedExperiment container contains one or more assays, each -represented by a matrix-like object of numeric or other mode. The rows -typically represent genomic ranges of interest and the columns represent -samples.") - (license license:artistic2.0))) - (define-public r-genomicalignments (package (name "r-genomicalignments") @@ -9461,14 +9576,14 @@ Shiny-based display methods for Bioconductor objects.") (define-public r-annotationhub (package (name "r-annotationhub") - (version "2.22.0") + (version "2.22.1") (source (origin (method url-fetch) (uri (bioconductor-uri "AnnotationHub" version)) (sha256 (base32 - "1950x654ffqx53b154kbph808zdh2xm5vmj9vzmc5nxc28fi2z5g")))) + "08d7m0n4jkpajsj0bvi5xd4vi1zqczl6lnrh8kqi2fbjkrvwdqp5")))) (properties `((upstream-name . "AnnotationHub"))) (build-system r-build-system) (propagated-inputs @@ -9558,14 +9673,14 @@ microarrays or GRanges for sequencing data.") (define-public r-gage (package (name "r-gage") - (version "2.40.1") + (version "2.40.2") (source (origin (method url-fetch) (uri (bioconductor-uri "gage" version)) (sha256 (base32 - "1iawa03dy4bl333my69d4sk7d74cjzfg5dpcxga6q5dglan4sp8r")))) + "1bs0hx8sqiyl08dqn2zx31kbv5aci4xvrs71pplx2yxal3jf5178")))) (build-system r-build-system) (propagated-inputs `(("r-annotationdbi" ,r-annotationdbi) @@ -9691,14 +9806,14 @@ originally made available by Holmes, Harris, and Quince, 2012, PLoS ONE 7(2): (define-public r-ensembldb (package (name "r-ensembldb") - (version "2.14.0") + (version "2.14.1") (source (origin (method url-fetch) (uri (bioconductor-uri "ensembldb" version)) (sha256 (base32 - "04il99gcrqzakvc0bxchdp9gghkn1sp9lpiian0iz4y7r67z3wpy")))) + "1hxwfh19qafpdhzprvw4nr8ks3gz7f0y8gyfhk8yqmmvvnvgqv40")))) (build-system r-build-system) (propagated-inputs `(("r-annotationdbi" ,r-annotationdbi) @@ -11775,7 +11890,7 @@ in an easily configurable manner.") (define-public pigx-bsseq (package (name "pigx-bsseq") - (version "0.1.2") + (version "0.1.3") (source (origin (method url-fetch) (uri (string-append "https://github.com/BIMSBbioinfo/pigx_bsseq/" @@ -11783,7 +11898,7 @@ in an easily configurable manner.") "/pigx_bsseq-" version ".tar.gz")) (sha256 (base32 - "0mpzlay2d5cjpmrcp7knff6rg1c2mqszd638n7lw0mc0cycbp9f8")))) + "0blm0bl5z3ng01n7hh2ffk4rkzvf7vb3nm0crgdzrxr5cahxdxql")))) (build-system gnu-build-system) (arguments `(;; TODO: tests currently require 12+GB of RAM. See @@ -11809,8 +11924,11 @@ in an easily configurable manner.") ("r-annotationhub" ,r-annotationhub) ("r-dt" ,r-dt) ("r-genomation" ,r-genomation) + ("r-ggbio" ,r-ggbio) ("r-ggrepel" ,r-ggrepel) + ("r-matrixstats" ,r-matrixstats) ("r-methylkit" ,r-methylkit) + ("r-reshape2" ,r-reshape2) ("r-rtracklayer" ,r-rtracklayer) ("r-rmarkdown" ,r-rmarkdown) ("r-bookdown" ,r-bookdown) @@ -12408,7 +12526,7 @@ in RNA-seq data.") ("python-igraph" ,python-igraph) ("python-joblib" ,python-joblib) ("python-legacy-api-wrap" ,python-legacy-api-wrap) - ("python-louvain" ,python-louvain) + ("python-louvain" ,python-louvain-0.6) ("python-matplotlib" ,python-matplotlib) ("python-natsort" ,python-natsort) ("python-networkx" ,python-networkx) @@ -14979,6 +15097,299 @@ signaling, and more. It continues to be evolved and expanded by an international community.") (license license:lgpl2.1+))) +(define-public kraken2 + (package + (name "kraken2") + (version "2.1.1") + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/DerrickWood/kraken2") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "0h7a7vygd7y5isbrnc6srwq6xj1rmyd33pm8mmcgfkmlxlg5vkg3")))) + (build-system gnu-build-system) + (arguments + `(#:tests? #false ; there are none + #:make-flags (list "-C" "src" + (string-append "KRAKEN2_DIR=" + (assoc-ref %outputs "out") "/bin")) + #:phases + (modify-phases %standard-phases + (delete 'configure) + (add-before 'install 'install-scripts + (lambda* (#:key outputs #:allow-other-keys) + (let* ((bin (string-append (assoc-ref outputs "out") "/bin")) + (replacements `(("KRAKEN2_DIR" . ,bin) + ("VERSION" . ,,version)))) + (mkdir-p bin) + + (with-directory-excursion "scripts" + (let ((scripts (find-files "." ".*"))) + (substitute* scripts + (("#####=([^=]+)=#####" _ key) + (or (assoc-ref replacements key) + (error (format #false "unknown key: ~a~%" key))))) + (substitute* "kraken2" + (("compression_program = \"bzip2\"") + (string-append "compression_program = \"" + (which "bzip2") + "\"")) + (("compression_program = \"gzip\"") + (string-append "compression_program = \"" + (which "gzip") + "\""))) + (substitute* '("download_genomic_library.sh" + "download_taxonomy.sh" + "16S_gg_installation.sh" + "16S_silva_installation.sh" + "16S_rdp_installation.sh") + (("wget") (which "wget"))) + (substitute* '("download_taxonomy.sh" + "download_genomic_library.sh" + "rsync_from_ncbi.pl") + (("rsync -") + (string-append (which "rsync") " -"))) + (substitute* "mask_low_complexity.sh" + (("which") (which "which"))) + (substitute* '("mask_low_complexity.sh" + "download_genomic_library.sh" + "16S_silva_installation.sh") + (("sed -e ") + (string-append (which "sed") " -e "))) + (substitute* '("rsync_from_ncbi.pl" + "16S_rdp_installation.sh" + "16S_silva_installation.sh" + "16S_gg_installation.sh" + "download_taxonomy.sh" + "download_genomic_library.sh") + (("gunzip") (which "gunzip"))) + (for-each (lambda (script) + (chmod script #o555) + (install-file script bin)) + scripts))))))))) + (inputs + `(("gzip" ,gzip) + ("perl" ,perl) + ("rsync" ,rsync) + ("sed" ,sed) + ("wget" ,wget) + ("which" ,which))) + (home-page "https://github.com/DerrickWood/kraken2") + (synopsis "Taxonomic sequence classification system") + (description "Kraken is a taxonomic sequence classifier that assigns +taxonomic labels to DNA sequences. Kraken examines the k-mers within a query +sequence and uses the information within those k-mers to query a +database. That database maps k-mers to the lowest common ancestor (LCA) of all +genomes known to contain a given k-mer.") + (license license:expat))) + +(define-public lofreq + (package + (name "lofreq") + (version "2.1.5") + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/CSB5/lofreq") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "0qssrn3mgjak7df6iqc1rljqd3g3a5syvg0lsv4vds43s3fq23bl")))) + (build-system gnu-build-system) + (arguments + '(#:test-target "bug-tests" + #:tests? #false)) ; test data are not included + (inputs + `(("htslib" ,htslib) + ("python" ,python-wrapper) + ("zlib" ,zlib))) + (native-inputs + `(("autoconf" ,autoconf) + ("automake" ,automake) + ("which" ,which))) + (home-page "https://csb5.github.io/lofreq/") + (synopsis "Sensitive variant calling from sequencing data ") + (description "LoFreq is a fast and sensitive variant-caller for inferring +SNVs and indels from next-generation sequencing data. It makes full use of +base-call qualities and other sources of errors inherent in +sequencing (e.g. mapping or base/indel alignment uncertainty), which are +usually ignored by other methods or only used for filtering.") + (license license:expat))) + +(define-public python-pyliftover + (package + (name "python-pyliftover") + (version "0.4") + ;; The version of pypi does not include test data. + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/konstantint/pyliftover") + (commit version))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "1j8jp9iynv2l3jv5pr0pn0p3azlama1bqg233piglzm6bqh3m2m3")))) + (build-system python-build-system) + (arguments `(#:tests? #false)) ; the tests access the web + (native-inputs + `(("python-pytest" ,python-pytest))) + (home-page "https://github.com/konstantint/pyliftover") + (synopsis "Python implementation of UCSC liftOver genome coordinate conversion") + (description + "PyLiftover is a library for quick and easy conversion of genomic (point) +coordinates between different assemblies.") + (license license:expat))) + +(define-public ensembl-vep + (let* ((api-version "103") + (api-module + (lambda (name hash) + (origin (method git-fetch) + (uri (git-reference + (url (string-append "https://github.com/Ensembl/" + name ".git")) + (commit (string-append "release/" api-version)))) + (file-name (string-append name "-" api-version "-checkout")) + (sha256 (base32 hash)))))) + (package + (name "ensembl-vep") + (version (string-append api-version ".1")) + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/Ensembl/ensembl-vep.git") + (commit (string-append "release/" version)))) + (sha256 + (base32 + "1iq7p72cv9b38jz2v8a4slzy2n8y0md487943180ym9xc8qvw09c")))) + (build-system gnu-build-system) + (arguments + `(#:modules ((guix build gnu-build-system) + (guix build utils) + (ice-9 match)) + #:phases + (modify-phases %standard-phases + (delete 'configure) + (delete 'build) + ;; Tests need to run after installation + (delete 'check) + (replace 'install + (lambda* (#:key inputs outputs #:allow-other-keys) + (let* ((modules '(("ensembl" "/") + ("ensembl-variation" "/Variation") + ("ensembl-funcgen" "/Funcgen") + ("ensembl-io" "/"))) + (scripts '(("convert_cache.pl" "vep_convert_cache.pl") + ("INSTALL.pl" "vep_install.pl") + ("haplo" #f) + ("variant_recoder" #f) + ("filter_vep" #f) + ("vep" #f))) + (out (assoc-ref outputs "out")) + (bin (string-append out "/bin")) + (perl (string-append out "/lib/perl5/site_perl"))) + (for-each + (match-lambda + ((name path) + (let ((dir (string-append perl "/Bio/EnsEMBL" path))) + (mkdir-p dir) + (copy-recursively + (string-append (assoc-ref inputs (string-append "api-module-" name)) + "/modules/Bio/EnsEMBL" path) + dir)))) + modules) + (copy-recursively "modules/" perl) + (mkdir-p bin) + (for-each + (match-lambda + ((script new-name) + (let ((location (string-append bin "/" + (or new-name (basename script))))) + (copy-file script location) + (chmod location #o555) + (wrap-program location + `("PERL5LIB" ":" prefix (,(getenv "PERL5LIB") + ,perl)))))) + scripts) + + ;; Fix path to tools + (with-directory-excursion (string-append perl "/Bio/EnsEMBL") + (substitute* '("Funcgen/RunnableDB/ProbeMapping/PrePipelineChecks.pm" + "VEP/BaseRunner.pm" + "VEP/Utils.pm" + "VEP/AnnotationSource/Cache/VariationTabix.pm" + "VEP/AnnotationSource/Cache/BaseSerialized.pm" + "Variation/Utils/BaseVepTabixPlugin.pm" + "Variation/Utils/VEP.pm" + "Variation/Pipeline/ReleaseDataDumps/PreRunChecks.pm") + (("`which") + (string-append "`" + (assoc-ref inputs "which") + "/bin/which"))))))) + (add-after 'install 'check + (lambda* (#:key tests? inputs outputs #:allow-other-keys) + (when tests? + (setenv "PERL5LIB" + (string-append (getenv "PERL5LIB") + ":" + (assoc-ref outputs "out") + "/lib/perl5/site_perl")) + (copy-recursively (string-append (assoc-ref inputs "source") "/t") + "/tmp/t") + (for-each make-file-writable (find-files "/tmp/t")) + ;; TODO: haplo needs Set/IntervalTree.pm + (invoke "perl" "-e" (string-append " +use Test::Harness; use Test::Exception; +my $dirname = \"/tmp\"; +opendir TEST, \"$dirname\\/t\"; +my @test_files = map {\"$dirname\\/t\\/\".$_} grep {!/^\\./ && /\\.t$/} readdir TEST; closedir TEST; +@test_files = grep {!/Haplo/} @test_files; +runtests(@test_files); +")))))))) + (inputs + `(("bioperl-minimal" ,bioperl-minimal) + ("perl-bio-db-hts" ,perl-bio-db-hts) + ("perl-dbi" ,perl-dbi) + ("perl-dbd-mysql" ,perl-dbd-mysql) + ("perl-libwww" ,perl-libwww) + ("perl-http-tiny" ,perl-http-tiny) + ("perl-json" ,perl-json) + ("which" ,which))) + (propagated-inputs + `(("kentutils" ,kentutils))) + (native-inputs + `(("unzip" ,unzip) + ("perl" ,perl) + ("api-module-ensembl" + ,(api-module "ensembl" + "0s59rj905g72hljzfpvnx5nxwz925b917y4jp912i23f5gwxh14v")) + ("api-module-ensembl-variation" + ,(api-module "ensembl-variation" + "1dvwdzzfjhzymq02b6n4p6j3a9q4jgq0g89hs7hj1apd7zhirgkq")) + ("api-module-ensembl-funcgen" + ,(api-module "ensembl-funcgen" + "1x23pv38dmv0w0gby6rv3wds50qghb4v3v1mf43vk55msfxzry8n")) + ("api-module-ensembl-io" + ,(api-module "ensembl-io" + "14adb2x934lzsq20035mazdkhrkcw0qzb0xhz6zps9vk4wixwaix")) + ("perl-test-harness" ,perl-test-harness) + ("perl-test-exception" ,perl-test-exception))) + (home-page "http://www.ensembl.org/vep") + (synopsis "Predict functional effects of genomic variants") + (description + "This package provides a Variant Effect Predictor, which predicts +the functional effects of genomic variants. It also provides +Haplosaurus, which uses phased genotype data to predict +whole-transcript haplotype sequences, and Variant Recoder, which +translates between different variant encodings.") + (license license:asl2.0)))) + (define-public r-signac (let ((commit "e0512d348adeda4a3f23a2e8f56d1fe09840e03c") (revision "1")) |