summary refs log tree commit diff
diff options
context:
space:
mode:
authorMaxim Cournoyer <maxim.cournoyer@gmail.com>2019-03-28 00:26:01 -0400
committerMaxim Cournoyer <maxim.cournoyer@gmail.com>2019-07-02 10:07:59 +0900
commit803fb336d62ea65990e263ce58d8552e04c9c038 (patch)
tree302306159a166490b7837135ba766960d5be5490
parentc4797121beea74ae93e3ce17677b9e72b8df920d (diff)
downloadguix-803fb336d62ea65990e263ce58d8552e04c9c038.tar.gz
import: pypi: Improve parsing of requirement specifications.
The previous solution was fragile and could leave unwanted characters in a
requirement name, such as '[' or ']'.

Partially fixes <https://bugs.gnu.org/33047>.

* guix/import/pypi.scm (use-modules): Export SPECIFICATION->REQUIREMENT-NAME
(%requirement-name-regexp): New variable.
(clean-requirement): Rename to...
(specification->requirement-name): this, which now uses
%requirement-name-regexp to select the requirement name from the requirement
specification.
(parse-requires.txt): Adapt.
-rw-r--r--guix/import/pypi.scm54
-rw-r--r--tests/pypi.scm12
2 files changed, 52 insertions, 14 deletions
diff --git a/guix/import/pypi.scm b/guix/import/pypi.scm
index d9db876222..6a881bda12 100644
--- a/guix/import/pypi.scm
+++ b/guix/import/pypi.scm
@@ -48,6 +48,7 @@
   #:use-module ((guix licenses) #:prefix license:)
   #:use-module (guix build-system python)
   #:export (parse-requires.txt
+            specification->requirement-name
             guix-package->pypi-name
             pypi-recursive-import
             pypi->guix-package
@@ -118,22 +119,47 @@ package definition."
     ((package-inputs ...)
      `((propagated-inputs (,'quasiquote ,package-inputs))))))
 
-(define (clean-requirement s)
-  ;; Given a requirement LINE, as can be found in a setuptools requires.txt
-  ;; file, remove everything other than the actual name of the required
-  ;; package, and return it.
-  (cond
-   ((string-index s (char-set #\space #\> #\= #\<)) => (cut string-take s <>))
-   (else s)))
+(define %requirement-name-regexp
+  ;; Regexp to match the requirement name in a requirement specification.
+
+  ;; Some grammar, taken from PEP-0508 (see:
+  ;; https://www.python.org/dev/peps/pep-0508/).
+
+  ;; Using this grammar makes the PEP-0508 regexp easier to understand for
+  ;; humans.  The use of a regexp is preferred to more primitive string
+  ;; manipulations because we can more directly match what upstream uses
+  ;; (again, per PEP-0508).  The regexp approach is also easier to extend,
+  ;; should we want to implement more completely the grammar of PEP-0508.
+
+  ;; The unified rule can be expressed as:
+  ;; specification = wsp* ( url_req | name_req ) wsp*
+
+  ;; where url_req is:
+  ;; url_req = name wsp* extras? wsp* urlspec wsp+ quoted_marker?
+
+  ;; and where name_req is:
+  ;; name_req = name wsp* extras? wsp* versionspec? wsp* quoted_marker?
+
+  ;; Thus, we need only matching NAME, which is expressed as:
+  ;; identifer_end = letterOrDigit | (('-' | '_' | '.' )* letterOrDigit)
+  ;; identifier    = letterOrDigit identifier_end*
+  ;; name          = identifier
+  (let* ((letter-or-digit "[A-Za-z0-9]")
+         (identifier-end (string-append "(" letter-or-digit "|"
+                                        "[-_.]*" letter-or-digit ")"))
+         (identifier (string-append "^" letter-or-digit identifier-end "*"))
+         (name identifier))
+    (make-regexp name)))
+
+(define (specification->requirement-name spec)
+  "Given a specification SPEC, return the requirement name."
+  (match:substring
+   (or (regexp-exec %requirement-name-regexp spec)
+       (error (G_ "Could not extract requirement name in spec:") spec))))
 
 (define (parse-requires.txt requires.txt)
   "Given REQUIRES.TXT, a Setuptools requires.txt file, return a list of
 requirement names."
-  ;; This is a very incomplete parser, whose job is to select the non-optional
-  ;; dependencies and strip them out of any version information.
-  ;; Alternatively, we could implement a PEG parser with the (ice-9 peg)
-  ;; library and the requirements grammar defined by PEP-0508
-  ;; (https://www.python.org/dev/peps/pep-0508/).
 
   (define (comment? line)
     ;; Return #t if the given LINE is a comment, #f otherwise.
@@ -156,7 +182,7 @@ requirement names."
                ((or (string-null? line) (comment? line))
                 (loop result))
                (else
-                (loop (cons (clean-requirement line)
+                (loop (cons (specification->requirement-name line)
                             result))))))))))
 
 (define (guess-requirements source-url wheel-url tarball)
@@ -198,7 +224,7 @@ cannot determine package dependencies"))
                                             (hash-ref (list-ref run_requires 0)
                                                        "requires")
                                             '())))
-                     (map clean-requirement requirements)))))
+                     (map specification->requirement-name requirements)))))
              (lambda ()
                (delete-file json-file)
                (rmdir dirname))))))
diff --git a/tests/pypi.scm b/tests/pypi.scm
index 03455ba6be..c40be6c21d 100644
--- a/tests/pypi.scm
+++ b/tests/pypi.scm
@@ -55,6 +55,14 @@
 (define test-source-hash
   "")
 
+(define test-specifications
+  '("Fizzy [foo, bar]"
+    "PickyThing<1.6,>1.9,!=1.9.6,<2.0a0,==2.4c1"
+    "SomethingWithMarker[foo]>1.0;python_version<\"2.7\""
+    "requests [security,tests] >= 2.8.1, == 2.8.* ; python_version < \"2.7\""
+    "pip @ https://github.com/pypa/pip/archive/1.3.1.zip#\
+sha1=da9234ee9982d4bbb3c72346a6de940a148ea686"))
+
 (define test-requires.txt "\
 # A comment
  # A comment after a space
@@ -109,6 +117,10 @@ pytest (>=2.5.0)
                     (uri (list "https://bitheap.org/cram/cram-0.7.tar.gz"
                                (pypi-uri "cram" "0.7"))))))))
 
+(test-equal "specification->requirement-name"
+  '("Fizzy" "PickyThing" "SomethingWithMarker" "requests" "pip")
+  (map specification->requirement-name test-specifications))
+
 (test-equal "parse-requires.txt, with sections"
   '("foo" "bar")
   (mock ((ice-9 ports) call-with-input-file