summary refs log tree commit diff
path: root/gnu/packages/bioinformatics.scm
diff options
context:
space:
mode:
authorRicardo Wurmus <rekado@elephly.net>2019-03-13 15:14:10 +0100
committerRicardo Wurmus <rekado@elephly.net>2019-03-13 15:14:10 +0100
commit0b5d12ccdfdbfe9820e1236420288d1380ed87a3 (patch)
tree49361d262d3eff49def06b27a166cb423688924a /gnu/packages/bioinformatics.scm
parent984a8aa64462b6ecf197f756260e0f83016ac575 (diff)
downloadguix-0b5d12ccdfdbfe9820e1236420288d1380ed87a3.tar.gz
gnu: Add tetoolkit.
* gnu/packages/bioinformatics.scm (tetoolkit): New variable.
Diffstat (limited to 'gnu/packages/bioinformatics.scm')
-rw-r--r--gnu/packages/bioinformatics.scm66
1 files changed, 66 insertions, 0 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index e29f2a7a39..97d2404bf3 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -1752,6 +1752,72 @@ high-throughput sequencing data – with an emphasis on simplicity.")
 (define-public python2-plastid
   (package-with-python2 python-plastid))
 
+(define-public tetoolkit
+  (package
+    (name "tetoolkit")
+    (version "2.0.3")
+    (source (origin
+              (method git-fetch)
+              (uri (git-reference
+                    (url "https://github.com/mhammell-laboratory/tetoolkit.git")
+                    (commit version)))
+              (file-name (git-file-name name version))
+              (sha256
+               (base32
+                "1yzi0kfpzip8zpjb82x1ik6h22yzfyjiz2dv85v6as2awwqvk807"))))
+    (build-system python-build-system)
+    (arguments
+     `(#:python ,python-2               ; not guaranteed to work with Python 3
+       #:phases
+       (modify-phases %standard-phases
+         (add-after 'unpack 'make-writable
+           (lambda _
+             (for-each make-file-writable (find-files "."))
+             #t))
+         (add-after 'unpack 'patch-invocations
+           (lambda* (#:key inputs #:allow-other-keys)
+             (substitute* '("bin/TEtranscripts"
+                            "bin/TEcount")
+               (("'sort ")
+                (string-append "'" (which "sort") " "))
+               (("'rm -f ")
+                (string-append "'" (which "rm") " -f "))
+               (("'Rscript'") (string-append "'" (which "Rscript") "'")))
+             (substitute* "TEToolkit/IO/ReadInputs.py"
+               (("BamToBED") (which "bamToBed")))
+             (substitute* "TEToolkit/Normalization.py"
+               (("\"Rscript\"")
+                (string-append "\"" (which "Rscript") "\"")))
+             #t))
+         (add-after 'install 'wrap-program
+           (lambda* (#:key outputs #:allow-other-keys)
+             ;; Make sure the executables find R packages.
+             (let ((out (assoc-ref outputs "out")))
+               (for-each
+                (lambda (script)
+                  (wrap-program (string-append out "/bin/" script)
+                    `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
+                '("TEtranscripts"
+                  "TEcount")))
+             #t)))))
+    (inputs
+     `(("coreutils" ,coreutils)
+       ("bedtools" ,bedtools)
+       ("python-argparse" ,python2-argparse)
+       ("python-pysam" ,python2-pysam)
+       ("r-minimal" ,r-minimal)
+       ("r-deseq2" ,r-deseq2)))
+    (home-page "https://github.com/mhammell-laboratory/tetoolkit")
+    (synopsis "Transposable elements in differential enrichment analysis")
+    (description
+     "This is package for including transposable elements in differential
+enrichment analysis of sequencing datasets.  TEtranscripts and TEcount take
+RNA-seq (and similar data) and annotates reads to both genes and transposable
+elements.  TEtranscripts then performs differential analysis using DESeq2.
+Note that TEtranscripts and TEcount rely on specially curated GTF files, which
+are not included due to their size.")
+    (license license:gpl3+)))
+
 (define-public cd-hit
   (package
     (name "cd-hit")