summary refs log tree commit diff
path: root/gnu/packages/bioinformatics.scm
diff options
context:
space:
mode:
authorEfraim Flashner <efraim@flashner.co.il>2021-10-31 12:47:14 +0200
committerEfraim Flashner <efraim@flashner.co.il>2021-10-31 14:49:47 +0200
commitbc5155b952ae8bdbc56aded4d8d39768b4e2a7d4 (patch)
tree6b55475d86c522543384dea7d1ab66bba32af63e /gnu/packages/bioinformatics.scm
parentdac8d013bd1fc7f57b8ba3582eef6e0e01b23dfd (diff)
parent4e5000114ec01b5e92a87c52f2a10f9ba7a601c8 (diff)
downloadguix-bc5155b952ae8bdbc56aded4d8d39768b4e2a7d4.tar.gz
Merge remote-tracking branch 'origin/master' into core-updates-frozen
Diffstat (limited to 'gnu/packages/bioinformatics.scm')
-rw-r--r--gnu/packages/bioinformatics.scm369
1 files changed, 364 insertions, 5 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index 229fd34d2a..3ddb91714a 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -106,6 +106,7 @@
   #:use-module (gnu packages java-compression)
   #:use-module (gnu packages jemalloc)
   #:use-module (gnu packages jupyter)
+  #:use-module (gnu packages libffi)
   #:use-module (gnu packages linux)
   #:use-module (gnu packages logging)
   #:use-module (gnu packages lsof)
@@ -1216,14 +1217,14 @@ sequencing.")
 (define-public python-biopython
   (package
     (name "python-biopython")
-    (version "1.73")
+    (version "1.76")
     (source (origin
               (method url-fetch)
               ;; use PyPi rather than biopython.org to ease updating
               (uri (pypi-uri "biopython" version))
               (sha256
                (base32
-                "1q55jhf76z3k6is3psis0ckbki7df26x7dikpcc3vhk1vhkwribh"))))
+                "0wlch9xpa0fpgjzyxi6jsfca6iakaq9a05927xg8vqnmvaccnwrq"))))
     (build-system python-build-system)
     (arguments
      `(#:phases
@@ -1244,6 +1245,18 @@ dealing with alignments; code making it easy to split up parallelizable tasks
 into separate processes; and more.")
     (license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))))
 
+(define-public python-biopython-1.73
+  (package
+    (inherit python-biopython)
+    (version "1.73")
+    (source (origin
+              (method url-fetch)
+              ;; use PyPi rather than biopython.org to ease updating
+              (uri (pypi-uri "biopython" version))
+              (sha256
+               (base32
+                "1q55jhf76z3k6is3psis0ckbki7df26x7dikpcc3vhk1vhkwribh"))))))
+
 (define-public python2-biopython
   (package-with-python2 python-biopython))
 
@@ -1270,6 +1283,133 @@ relying on a complex dependency tree.")
 (define-public python2-fastalite
   (package-with-python2 python-fastalite))
 
+(define-public ciri-long
+  (package
+    (name "ciri-long")
+    (version "1.0.2")
+    (source
+     (origin
+       (method git-fetch)
+       (uri (git-reference
+             (url "https://github.com/bioinfo-biols/CIRI-long")
+             (commit (string-append "v" version))))
+       (file-name (git-file-name name version))
+       (sha256
+        (base32
+         "10k88i1fcqchrrjv82rmylwvbwqfba0n51palhig9hsg71xs0dbi"))
+       ;; Delete bundled binary
+       (snippet '(delete-file "libs/ccs"))))
+    (build-system python-build-system)
+    (arguments
+     `(#:phases
+       (modify-phases %standard-phases
+         (add-after 'unpack 'relax-requirements
+           (lambda _
+             (substitute* "setup.py"
+               (("'argparse[^']*',") "") ; only for python2
+               (("==") ">="))))
+         (add-before 'build 'build-libssw
+           (lambda _
+             (with-directory-excursion "libs/striped_smith_waterman"
+               (invoke "make" "libssw.so")))))))
+    (inputs
+     `(("python-biopython" ,python-biopython)
+       ("python-bwapy" ,python-bwapy)
+       ("python-cython" ,python-cython)
+       ("python-levenshtein" ,python-levenshtein)
+       ("python-mappy" ,python-mappy)
+       ("python-numpy" ,python-numpy)
+       ("python-pandas" ,python-pandas)
+       ("python-pysam" ,python-pysam)
+       ("python-pyspoa" ,python-pyspoa)
+       ("python-scikit-learn" ,python-scikit-learn)
+       ("python-scipy" ,python-scipy)))
+    (native-inputs
+     `(("python-nose" ,python-nose)
+       ("python-setuptools" ,python-setuptools)))
+    (home-page "https://ciri-cookbook.readthedocs.io/")
+    (synopsis "Circular RNA identification for Nanopore sequencing")
+    (description "CIRI-long is a package for circular RNA identification using
+long-read sequencing data.")
+    (license license:expat)))
+
+(define-public qtltools
+  (package
+    (name "qtltools")
+    (version "1.3.1")
+    (source (origin
+              (method url-fetch/tarbomb)
+              (uri (string-append "https://qtltools.github.io/qtltools/"
+                                  "binaries/QTLtools_" version
+                                  "_source.tar.gz"))
+              (sha256
+               (base32
+                "13gdry5l43abn3464fmk8qzrxgxnxah2612r66p9dzhhl92j30cd"))))
+    (build-system gnu-build-system)
+    (arguments
+     `(#:tests? #f                      ; no tests included
+       #:make-flags
+       (list (string-append "BOOST_INC="
+                            (assoc-ref %build-inputs "boost") "/include")
+             (string-append "BOOST_LIB="
+                            (assoc-ref %build-inputs "boost") "/lib")
+             (string-append "HTSLD_INC="
+                            (assoc-ref %build-inputs "htslib") "/include")
+             (string-append "HTSLD_LIB="
+                            (assoc-ref %build-inputs "htslib") "/lib")
+             (string-append "RMATH_INC="
+                            (assoc-ref %build-inputs "rmath-standalone")
+                            "/include")
+             (string-append "RMATH_LIB="
+                            (assoc-ref %build-inputs "rmath-standalone")
+                            "/lib"))
+       #:phases
+       (modify-phases %standard-phases
+         (add-after 'unpack 'fix-linkage
+           (lambda _
+             (substitute* "qtltools/Makefile"
+               (("libboost_iostreams.a")
+                "libboost_iostreams.so")
+               (("libboost_program_options.a")
+                "libboost_program_options.so")
+               (("-lblas") "-lopenblas"))))
+         (add-before 'build 'chdir
+           (lambda _ (chdir "qtltools")))
+         (replace 'configure
+           (lambda _
+             (substitute* "qtltools/Makefile"
+               (("LIB_FLAGS=-lz")
+                "LIB_FLAGS=-lz -lcrypto -lssl")
+               (("LIB_FILES=\\$\\(RMATH_LIB\\)/libRmath.a \
+\\$\\(HTSLD_LIB\\)/libhts.a \
+\\$\\(BOOST_LIB\\)/libboost_iostreams.a \
+\\$\\(BOOST_LIB\\)/libboost_program_options.a")
+                "LIB_FILES=$(RMATH_LIB)/libRmath.so \
+$(HTSLD_LIB)/libhts.so \
+$(BOOST_LIB)/libboost_iostreams.so \
+$(BOOST_LIB)/libboost_program_options.so"))))
+         (replace 'install
+           (lambda* (#:key outputs #:allow-other-keys)
+             (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
+               (mkdir-p bin)
+               (install-file "bin/QTLtools" bin)))))))
+    (inputs
+     `(("curl" ,curl)
+       ("gsl" ,gsl)
+       ("boost" ,boost)
+       ("rmath-standalone" ,rmath-standalone)
+       ("htslib" ,htslib-1.3)
+       ("openssl" ,openssl)
+       ("openblas" ,openblas)
+       ("zlib" ,zlib)))
+    (home-page "https://qtltools.github.io/qtltools/")
+    (synopsis "Tool set for molecular QTL discovery and analysis")
+    (description "QTLtools is a tool set for molecular QTL discovery
+and analysis.  It allows to go from the raw genetic sequence data to
+collection of molecular @dfn{Quantitative Trait Loci} (QTLs) in few
+easy-to-perform steps.")
+    (license license:gpl3+)))
+
 (define-public bpp-core
   ;; The last release was in 2014 and the recommended way to install from source
   ;; is to clone the git repository, so we do this.
@@ -7448,6 +7588,33 @@ clustering analysis, differential analysis, motif inference and exploration of
 single cell ATAC-seq sequencing data.")
     (license license:gpl3)))
 
+(define-public r-umi4cpackage
+  (let ((commit "88b07d896a137418ba6c31c2474b9dbe1d86fc20")
+        (revision "1"))
+    (package
+      (name "r-umi4cpackage")
+      (version (git-version "0.0.1" revision commit))
+      (source
+       (origin
+         (method git-fetch)
+         (uri (git-reference
+               (url "https://github.com/tanaylab/umi4cpackage")
+               (commit commit)))
+         (file-name (git-file-name name version))
+         (sha256
+          (base32 "0bjzamdw2lcfhlbzc0vdva87c3wwnij8jsvnrpx4wyyxvpcz13m5"))))
+      (properties `((upstream-name . "umi4cPackage")))
+      (build-system r-build-system)
+      (propagated-inputs
+       `(("r-misha" ,r-misha)
+         ("r-zoo" ,r-zoo)))
+      (native-inputs `(("r-knitr" ,r-knitr)))
+      (home-page "https://github.com/tanaylab/umi4cpackage")
+      (synopsis "Processing and analysis of UMI-4C contact profiles.")
+      (description "This is a package that lets you process UMI-4C data from
+scratch to produce nice plots.")
+      (license license:expat))))
+
 (define-public r-shinycell
   (let ((commit
          "aecbd56e66802f28e397f5ae1f19403aadd12163")
@@ -7576,6 +7743,86 @@ of transcriptional heterogeneity among single cells.")
     ;; See https://github.com/hms-dbmi/scde/issues/38
     (license license:gpl2)))
 
+(define-public r-misha
+  (package
+    (name "r-misha")
+    (version "4.1.0")
+    (source
+     (origin
+       (method git-fetch)
+       (uri (git-reference
+             (url "https://github.com/tanaylab/misha")
+             (commit version)))
+       (file-name (git-file-name name version))
+       (sha256
+        (base32
+         "0bgivx3lzjh3173jsfrhb5kvhjsn53br0n4hmyx7i3dwy2cnnp2p"))
+       ;; Delete bundled executable.
+       (snippet
+        '(delete-file "exec/bigWigToWig"))))
+    (build-system r-build-system)
+    (arguments
+     `(#:phases
+       (modify-phases %standard-phases
+         (add-after 'unpack 'do-not-use-bundled-bigWigToWig
+           (lambda* (#:key inputs #:allow-other-keys)
+             (substitute* "R/misha.R"
+               (("get\\(\".GLIBDIR\"\\), \"/exec/bigWigToWig")
+                (string-append "\""
+                               (assoc-ref inputs "kentutils")
+                               "/bin/bigWigToWig"))))))))
+    (inputs
+     `(("kentutils" ,kentutils)))
+    (home-page "https://github.com/tanaylab/misha")
+    (synopsis "Toolkit for analysis of genomic data")
+    (description "This package is intended to help users to efficiently
+analyze genomic data resulting from various experiments.")
+    (license license:gpl2)))
+
+(define-public r-shaman
+  (let ((commit "d6944e8ac7bd1dbd5c6cec646eafc1d19d0ca96f")
+        (release "2.0")
+        (revision "2"))
+    (package
+      (name "r-shaman")
+      (version (git-version release revision commit))
+      (source (origin
+                (method git-fetch)
+                (uri (git-reference
+                      (url "https://github.com/tanaylab/shaman")
+                      (commit commit)))
+                (file-name (git-file-name name version))
+                (sha256
+                 (base32
+                  "03sx138dzpfiq23j49z0m0s4j79855mrg64hpj9c83408wzphxi6"))
+                (snippet
+                 ;; This file will be generated.
+                 '(delete-file "inst/doc/shaman-package.R"))))
+      (build-system r-build-system)
+      (propagated-inputs
+       `(("r-data-table" ,r-data-table)
+         ("r-domc" ,r-domc)
+         ("r-ggplot2" ,r-ggplot2)
+         ("r-gviz" ,r-gviz)
+         ("r-misha" ,r-misha)
+         ("r-plyr" ,r-plyr)
+         ("r-rann" ,r-rann)
+         ("r-rcpp" ,r-rcpp)
+         ("r-reshape2" ,r-reshape2)
+         ;; For vignettes
+         ("r-rmarkdown" ,r-rmarkdown)
+         ("r-knitr" ,r-knitr)))
+      (home-page "https://github.com/tanaylab/shaman")
+      (synopsis "Sampling HiC contact matrices for a-parametric normalization")
+      (description "The Shaman package implements functions for
+resampling Hi-C matrices in order to generate expected contact
+distributions given constraints on marginal coverage and
+contact-distance probability distributions.  The package also provides
+support for visualizing normalized matrices and statistical analysis
+of contact distributions around selected landmarks.")
+      ;; Any version of the GPL
+      (license license:gpl3+))))
+
 (define-public r-centipede
   (package
     (name "r-centipede")
@@ -7639,6 +7886,46 @@ includes software to
 ")
       (license license:cc0))))
 
+(define-public gdc-client
+  (package
+    (name "gdc-client")
+    (version "1.6.0")
+    (source
+     (origin
+       (method git-fetch)
+       (uri (git-reference
+             (url "https://github.com/NCI-GDC/gdc-client.git")
+             (commit version)))
+       (file-name (git-file-name name version))
+       (sha256
+        (base32
+         "0cagawlzjwj3wam10lv64xgbfx4zcnzxi5sjpsdhq7rn4z24mzc2"))))
+    (build-system python-build-system)
+    (arguments
+     `(#:phases
+       (modify-phases %standard-phases
+         (add-after 'unpack 'relax-requirements
+           (lambda _
+             (substitute* "requirements.txt"
+               (("==") ">=")))))))
+    (inputs
+     `(("python-cryptography" ,python-cryptography)
+       ("python-intervaltree" ,python-intervaltree)
+       ("python-jsonschema" ,python-jsonschema)
+       ("python-lxml" ,python-lxml)
+       ("python-ndg-httpsclient" ,python-ndg-httpsclient)
+       ("python-progressbar2" ,python-progressbar2)
+       ("python-pyasn1" ,python-pyasn1)
+       ("python-pyopenssl" ,python-pyopenssl)
+       ("python-pyyaml" ,python-pyyaml)
+       ("python-requests" ,python-requests)
+       ("python-termcolor" ,python-termcolor)))
+    (home-page "https://gdc.nci.nih.gov/access-data/gdc-data-transfer-tool")
+    (synopsis "GDC data transfer tool")
+    (description "The gdc-client provides several convenience functions over
+the GDC API which provides general download/upload via HTTPS.")
+    (license license:asl2.0)))
+
 (define-public vsearch
   (package
     (name "vsearch")
@@ -11642,14 +11929,14 @@ set.")
 (define-public instrain
   (package
     (name "instrain")
-    (version "1.5.2")
+    (version "1.5.4")
     (source
      (origin
        (method url-fetch)
        (uri (pypi-uri "inStrain" version))
        (sha256
         (base32
-         "0ykqlpf6yz4caihsaz3ys00cyvlr7wdj4s9a8rh56q5r8xf80ic0"))))
+         "05w1lw75x4lwkzg4qpi055g7hdjp9rnc4ksbxg2hfgksq9djk0hx"))))
     (build-system python-build-system)
     (arguments
      `(#:phases
@@ -11662,7 +11949,7 @@ set.")
                (("from job_utils")
                 "from .job_utils")))))))
     (inputs
-     `(("python-biopython" ,python-biopython)
+     `(("python-biopython" ,python-biopython-1.73)
        ("python-boto3" ,python-boto3)
        ("python-h5py" ,python-h5py)
        ("python-lmfit" ,python-lmfit)
@@ -14995,6 +15282,78 @@ line, interactively explore genomic data within Jupyter environment or web
 browser.")
     (license license:gpl3+)))
 
+(define-public python-pyspoa
+  (package
+    (name "python-pyspoa")
+    (version "0.0.5")
+    (source
+     (origin
+       (method git-fetch)
+       (uri (git-reference
+             (url "https://github.com/nanoporetech/pyspoa")
+             (commit (string-append "v" version))
+             (recursive? #true)))
+       (file-name (git-file-name name version))
+       (sha256
+        (base32
+         "1lgf2shzhxkcsircd6vy46h27pjljd5q95fyz1cm3lkk702qbnzx"))))
+    (build-system python-build-system)
+    (arguments
+     `(#:phases
+       (modify-phases %standard-phases
+         (add-before 'build 'build-libspoa
+           (lambda _
+             (mkdir-p "src/build")
+             (with-directory-excursion "src/build"
+               (invoke "cmake"
+                       "-Dspoa_optimize_for_portability=ON"
+                       "-DCMAKE_BUILD_TYPE=Release"
+                       "-DCMAKE_CXX_FLAGS=\"-I ../vendor/cereal/include/\" -fPIC"
+                       "..")
+               (invoke "make"))))
+         (replace 'check
+           (lambda* (#:key inputs outputs tests? #:allow-other-keys)
+             (when tests?
+               (add-installed-pythonpath inputs outputs)
+               (invoke "python" "tests/test_pyspoa.py")))))))
+    (propagated-inputs
+     `(("pybind11" ,pybind11)))
+    (native-inputs
+     `(("cmake" ,cmake-minimal)))
+    (home-page "https://github.com/nanoporetech/pyspoa")
+    (synopsis "Python bindings for the SIMD partial order alignment library ")
+    (description
+     "This package provides Python bindings for spoa, a C++ implementation of
+the @dfn{partial order alignment} (POA) algorithm (as described in
+10.1093/bioinformatics/18.3.452) which is used to generate consensus
+sequences")
+    (license license:expat)))
+
+(define-public python-bwapy
+  (package
+    (name "python-bwapy")
+    (version "0.1.4")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (pypi-uri "bwapy" version))
+       (sha256
+        (base32 "090qwx3vl729zn3a7sksbviyg04kc71gpbm3nd8dalqp673x1npw"))))
+    (build-system python-build-system)
+    (propagated-inputs
+     `(("python-cffi" ,python-cffi)
+       ("python-setuptools" ,python-setuptools)
+       ("python-wheel" ,python-wheel)))
+    (inputs
+     `(("zlib" ,zlib)))
+    (home-page "https://github.com/ACEnglish/bwapy")
+    (synopsis "Python bindings to bwa alinger")
+    (description "This package provides Python bindings to the bwa mem
+aligner.")
+    ;; These Python bindings are licensed under Mozilla Public License 2.0,
+    ;; bwa itself is licenced under GNU General Public License v3.0.
+    (license license:mpl2.0)))
+
 (define-public scregseg
   (package
     (name "scregseg")