summary refs log tree commit diff
diff options
context:
space:
mode:
authorRicardo Wurmus <rekado@elephly.net>2024-01-23 21:26:50 +0100
committerRicardo Wurmus <rekado@elephly.net>2024-01-23 21:26:50 +0100
commitebad9d6c0ebb02ca5825a5984ad2bd795d1cbf59 (patch)
tree555e1eaa944fa1555c4d67ff48d0245a77d9df0d
parentf7373261e918b9601ae9f18db770fb0b0ac3999c (diff)
downloadguix-ebad9d6c0ebb02ca5825a5984ad2bd795d1cbf59.tar.gz
gnu: Add python-pyfasta.
* gnu/packages/bioinformatics.scm (python-pyfasta): New variable.

Change-Id: Ib2ab0a6eb250309633d5be33c18409227bce84ba
-rw-r--r--gnu/packages/bioinformatics.scm84
1 files changed, 78 insertions, 6 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index ec076b783e..5e30cf1ec7 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -4820,17 +4820,89 @@ gkm-SVM.")
 accessing bigWig files.")
     (license license:expat)))
 
+(define-public python-pyfasta
+  ;; The release on pypi does not contain the test data files.
+  (let ((commit "c2f0611c5311f1b1466f2d56560447898b4a8b03")
+        (revision "1"))
+    (package
+      (name "python-pyfasta")
+      (version (git-version "0.5.2" revision commit))
+      (source
+       (origin
+         (method git-fetch)
+         (uri (git-reference
+               (url "https://github.com/brentp/pyfasta")
+               (commit commit)))
+         (file-name (git-file-name name version))
+         (sha256
+          (base32
+           "0a189id3fbv88gssyk6adbmz2ll1mqpmyw8vxmx3fi955gvaq9j7"))))
+      (build-system pyproject-build-system)
+      (arguments
+       (list
+        #:phases
+        '(modify-phases %standard-phases
+           (add-after 'unpack 'python3.10-compat
+             (lambda _
+               (substitute* "pyfasta/__init__.py"
+                 (("from fasta import")
+                  "from pyfasta.fasta import")
+                 (("from records import")
+                  "from pyfasta.records import")
+                 (("from split_fasta import")
+                  "from pyfasta.split_fasta import")
+                 (("in f.iteritems")
+                  "in f.items"))
+               (substitute* "pyfasta/fasta.py"
+                 (("from collections import Mapping")
+                  "from collections.abc import Mapping")
+                 (("from records import")
+                  "from pyfasta.records import"))
+               (substitute* "pyfasta/records.py"
+                 (("cPickle") "pickle")
+                 (("\\(int, long\\)")
+                  "(int, int)")
+                 ;; XXX: it's not clear if this is really correct.
+                 (("buffer\\(self\\)")
+                  "memoryview(bytes(str(self), encoding='utf-8'))")
+                 (("sys.maxint") "sys.maxsize"))
+               (substitute* "pyfasta/split_fasta.py"
+                 (("from cStringIO import")
+                  "from io import")
+                 (("in lens.iteritems") "in lens.items"))
+               (substitute* "tests/test_all.py"
+                 (("f.keys\\(\\)\\) == \\['a-extra'")
+                  "list(f.keys())) == ['a-extra'")
+                 (("f.iterkeys\\(\\)") "iter(f.keys())")
+                 (("tests/data/" m)
+                  (string-append (getcwd) "/" m))))))))
+      (propagated-inputs (list python-numpy))
+      (native-inputs (list python-nose))
+      (home-page "https://github.com/brentp/pyfasta/")
+      (synopsis "Pythonic access to fasta sequence files")
+      (description
+       "This library provides fast, memory-efficient, pythonic (and
+command-line) access to fasta sequence files.  It stores a flattened version
+of a fasta sequence file without spaces or headers and uses either a
+@code{mmap} in numpy binary format or @code{fseek}/@code{fread} so the
+sequence data is never read into memory.  It saves a pickle (@code{.gdx}) of
+the start and stop (for @code{fseek}/@code{mmap}) locations of each header in
+the fasta file for internal use.
+
+Note that this package has been deprecated in favor of @code{pyfaidx}.")
+      (license license:expat))))
+
 (define-public python-schema-salad
   (package
     (name "python-schema-salad")
     (version "8.2.20211116214159")
     (source
-      (origin
-        (method url-fetch)
-        (uri (pypi-uri "schema-salad" version))
-        (sha256
-         (base32
-          "005dh2y45x92zl8sf2sqjmfvcqr4hrz8dfckgkckv87003v7lwqc"))))
+     (origin
+       (method url-fetch)
+       (uri (pypi-uri "schema-salad" version))
+       (sha256
+        (base32
+         "005dh2y45x92zl8sf2sqjmfvcqr4hrz8dfckgkckv87003v7lwqc"))))
     (build-system pyproject-build-system)
     (arguments
      `(#:phases