From 803fb336d62ea65990e263ce58d8552e04c9c038 Mon Sep 17 00:00:00 2001 From: Maxim Cournoyer Date: Thu, 28 Mar 2019 00:26:01 -0400 Subject: import: pypi: Improve parsing of requirement specifications. The previous solution was fragile and could leave unwanted characters in a requirement name, such as '[' or ']'. Partially fixes . * guix/import/pypi.scm (use-modules): Export SPECIFICATION->REQUIREMENT-NAME (%requirement-name-regexp): New variable. (clean-requirement): Rename to... (specification->requirement-name): this, which now uses %requirement-name-regexp to select the requirement name from the requirement specification. (parse-requires.txt): Adapt. --- guix/import/pypi.scm | 54 ++++++++++++++++++++++++++++++++++++++-------------- tests/pypi.scm | 12 ++++++++++++ 2 files changed, 52 insertions(+), 14 deletions(-) diff --git a/guix/import/pypi.scm b/guix/import/pypi.scm index d9db876222..6a881bda12 100644 --- a/guix/import/pypi.scm +++ b/guix/import/pypi.scm @@ -48,6 +48,7 @@ #:use-module ((guix licenses) #:prefix license:) #:use-module (guix build-system python) #:export (parse-requires.txt + specification->requirement-name guix-package->pypi-name pypi-recursive-import pypi->guix-package @@ -118,22 +119,47 @@ package definition." ((package-inputs ...) `((propagated-inputs (,'quasiquote ,package-inputs)))))) -(define (clean-requirement s) - ;; Given a requirement LINE, as can be found in a setuptools requires.txt - ;; file, remove everything other than the actual name of the required - ;; package, and return it. - (cond - ((string-index s (char-set #\space #\> #\= #\<)) => (cut string-take s <>)) - (else s))) +(define %requirement-name-regexp + ;; Regexp to match the requirement name in a requirement specification. + + ;; Some grammar, taken from PEP-0508 (see: + ;; https://www.python.org/dev/peps/pep-0508/). + + ;; Using this grammar makes the PEP-0508 regexp easier to understand for + ;; humans. The use of a regexp is preferred to more primitive string + ;; manipulations because we can more directly match what upstream uses + ;; (again, per PEP-0508). The regexp approach is also easier to extend, + ;; should we want to implement more completely the grammar of PEP-0508. + + ;; The unified rule can be expressed as: + ;; specification = wsp* ( url_req | name_req ) wsp* + + ;; where url_req is: + ;; url_req = name wsp* extras? wsp* urlspec wsp+ quoted_marker? + + ;; and where name_req is: + ;; name_req = name wsp* extras? wsp* versionspec? wsp* quoted_marker? + + ;; Thus, we need only matching NAME, which is expressed as: + ;; identifer_end = letterOrDigit | (('-' | '_' | '.' )* letterOrDigit) + ;; identifier = letterOrDigit identifier_end* + ;; name = identifier + (let* ((letter-or-digit "[A-Za-z0-9]") + (identifier-end (string-append "(" letter-or-digit "|" + "[-_.]*" letter-or-digit ")")) + (identifier (string-append "^" letter-or-digit identifier-end "*")) + (name identifier)) + (make-regexp name))) + +(define (specification->requirement-name spec) + "Given a specification SPEC, return the requirement name." + (match:substring + (or (regexp-exec %requirement-name-regexp spec) + (error (G_ "Could not extract requirement name in spec:") spec)))) (define (parse-requires.txt requires.txt) "Given REQUIRES.TXT, a Setuptools requires.txt file, return a list of requirement names." - ;; This is a very incomplete parser, whose job is to select the non-optional - ;; dependencies and strip them out of any version information. - ;; Alternatively, we could implement a PEG parser with the (ice-9 peg) - ;; library and the requirements grammar defined by PEP-0508 - ;; (https://www.python.org/dev/peps/pep-0508/). (define (comment? line) ;; Return #t if the given LINE is a comment, #f otherwise. @@ -156,7 +182,7 @@ requirement names." ((or (string-null? line) (comment? line)) (loop result)) (else - (loop (cons (clean-requirement line) + (loop (cons (specification->requirement-name line) result)))))))))) (define (guess-requirements source-url wheel-url tarball) @@ -198,7 +224,7 @@ cannot determine package dependencies")) (hash-ref (list-ref run_requires 0) "requires") '()))) - (map clean-requirement requirements))))) + (map specification->requirement-name requirements))))) (lambda () (delete-file json-file) (rmdir dirname)))))) diff --git a/tests/pypi.scm b/tests/pypi.scm index 03455ba6be..c40be6c21d 100644 --- a/tests/pypi.scm +++ b/tests/pypi.scm @@ -55,6 +55,14 @@ (define test-source-hash "") +(define test-specifications + '("Fizzy [foo, bar]" + "PickyThing<1.6,>1.9,!=1.9.6,<2.0a0,==2.4c1" + "SomethingWithMarker[foo]>1.0;python_version<\"2.7\"" + "requests [security,tests] >= 2.8.1, == 2.8.* ; python_version < \"2.7\"" + "pip @ https://github.com/pypa/pip/archive/1.3.1.zip#\ +sha1=da9234ee9982d4bbb3c72346a6de940a148ea686")) + (define test-requires.txt "\ # A comment # A comment after a space @@ -109,6 +117,10 @@ pytest (>=2.5.0) (uri (list "https://bitheap.org/cram/cram-0.7.tar.gz" (pypi-uri "cram" "0.7")))))))) +(test-equal "specification->requirement-name" + '("Fizzy" "PickyThing" "SomethingWithMarker" "requests" "pip") + (map specification->requirement-name test-specifications)) + (test-equal "parse-requires.txt, with sections" '("foo" "bar") (mock ((ice-9 ports) call-with-input-file -- cgit 1.4.1