From 31def9a9df583dd27f8604302700ff48368c43f3 Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Tue, 21 Apr 2020 13:56:33 +0300 Subject: gnu: Add grocsvs. * gnu/packages/bioinformatics.scm (grocsvs): New variable. * gnu/packages/patches/grocsvs-dont-use-admiral.patch: New file. * gnu/local.mk (dist_patch_DATA): Register it. --- gnu/packages/bioinformatics.scm | 42 +++++++++++++ .../patches/grocsvs-dont-use-admiral.patch | 69 ++++++++++++++++++++++ 2 files changed, 111 insertions(+) create mode 100644 gnu/packages/patches/grocsvs-dont-use-admiral.patch (limited to 'gnu/packages') diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index 65b44568e0..40f75e9e0c 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -79,6 +79,7 @@ #:use-module (gnu packages golang) #:use-module (gnu packages glib) #:use-module (gnu packages graph) + #:use-module (gnu packages graphviz) #:use-module (gnu packages groff) #:use-module (gnu packages gtk) #:use-module (gnu packages guile) @@ -15853,3 +15854,44 @@ biological processes. SBML is useful for models of metabolism, cell signaling, and more. It continues to be evolved and expanded by an international community.") (license license:lgpl2.1+))) + +(define-public grocsvs + ;; The last release is out of date and new features have been added. + (let ((commit "ecd956a65093a0b2c41849050e4512d46fecea5d") + (revision "1")) + (package + (name "grocsvs") + (version (git-version "0.2.6.1" revision commit)) + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/grocsvs/grocsvs") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 "14505725gr7qxc17cxxf0k6lzcwmgi64pija4mwf29aw70qn35cc")) + (patches (search-patches "grocsvs-dont-use-admiral.patch")))) + (build-system python-build-system) + (arguments + `(#:tests? #f ; No test suite. + #:python ,python-2)) ; Only python-2 supported. + (inputs + `(("python2-h5py" ,python2-h5py) + ("python2-ipython-cluster-helper" ,python2-ipython-cluster-helper) + ("python2-networkx" ,python2-networkx) + ("python2-psutil" ,python2-psutil) + ("python2-pandas" ,python2-pandas) + ("python2-pybedtools" ,python2-pybedtools) + ("python2-pyfaidx" ,python2-pyfaidx) + ("python2-pygraphviz" ,python2-pygraphviz) + ("python2-pysam" ,python2-pysam) + ("python2-scipy" ,python2-scipy))) + (home-page "https://github.com/grocsvs/grocsvs") + (synopsis "Genome-wide reconstruction of complex structural variants") + (description + "@dfn{Genome-wide Reconstruction of Complex Structural Variants} +(GROC-SVs) is a software pipeline for identifying large-scale structural +variants, performing sequence assembly at the breakpoints, and reconstructing +the complex structural variants using the long-fragment information from the +10x Genomics platform.") + (license license:expat)))) diff --git a/gnu/packages/patches/grocsvs-dont-use-admiral.patch b/gnu/packages/patches/grocsvs-dont-use-admiral.patch new file mode 100644 index 0000000000..cb976e19b0 --- /dev/null +++ b/gnu/packages/patches/grocsvs-dont-use-admiral.patch @@ -0,0 +1,69 @@ +python-admiral doesn't have a license +https://github.com/nspies/admiral/issues/3 + +diff --git a/setup.py b/setup.py +index 692b6a0..568f381 100755 +--- a/setup.py ++++ b/setup.py +@@ -20,7 +20,7 @@ setup( + 'console_scripts' : ["grocsvs = grocsvs.main:main"] + }, + +- install_requires = ["admiral", "h5py", "networkx>=2.0", "pandas", "pybedtools", ++ install_requires = ["h5py", "networkx>=2.0", "pandas", "pybedtools", + "pyfaidx", "pysam>=0.10.0", "scipy", "ipython-cluster-helper", + "pygraphviz", "psutil"], + +diff --git a/src/grocsvs/jobmanagers.py b/src/grocsvs/jobmanagers.py +index 6da0b58..112d7ff 100755 +--- a/src/grocsvs/jobmanagers.py ++++ b/src/grocsvs/jobmanagers.py +@@ -41,34 +41,3 @@ class MultiprocessingCluster(Cluster): + pool = multiprocessing.Pool(processes=self.processes) + return pool.map_async(fn, args).get(999999) + +- +-class AdmiralCluster(Cluster): +- def map(self, fn, args): +- from admiral import jobmanagers, remote +- +- cluster_options = self.cluster_settings.cluster_options.copy() +- +- scheduler = cluster_options.pop("scheduler") +- +- jobmanager_class = jobmanagers.get_jobmanager(scheduler) +- jobmanager = jobmanager_class( +- batch_dir=self.batch_dir, log_dir=self.batch_dir) +- +- +- if not "mem" in cluster_options: +- cluster_options["mem"] = "16g" +- if not "time" in cluster_options: +- cluster_options["time"] = "12h" +- +- jobs = [] +- #for i, arg in enumerate(args): +- +- job_name = args[0].__class__.__name__ +- args = [[arg] for arg in args] +- job = remote.run_remote(fn, jobmanager, job_name, args=args, +- array=True, overwrite=True, **cluster_options) +- +- result = jobmanagers.wait_for_jobs([job], wait=5, progress=True) +- +- if not result: +- raise Exception("Some chunks failed to complete") +diff --git a/src/grocsvs/pipeline.py b/src/grocsvs/pipeline.py +index ab1bb2d..350976f 100755 +--- a/src/grocsvs/pipeline.py ++++ b/src/grocsvs/pipeline.py +@@ -8,8 +8,7 @@ from grocsvs import utilities + def make_jobmanager(jobmanager_settings, processes, batch_dir): + jobmanager_classes = {"IPCluster":jobmanagers.IPCluster, + "local": jobmanagers.LocalCluster, +- "multiprocessing": jobmanagers.MultiprocessingCluster, +- "admiral": jobmanagers.AdmiralCluster} ++ "multiprocessing": jobmanagers.MultiprocessingCluster} + + cls = jobmanager_classes[jobmanager_settings.cluster_type] + return cls(processes, jobmanager_settings, batch_dir) -- cgit 1.4.1