summary refs log tree commit diff
diff options
context:
space:
mode:
authorLudovic Courtès <ludo@gnu.org>2018-03-16 23:35:07 +0100
committerLudovic Courtès <ludo@gnu.org>2018-03-18 22:35:36 +0100
commite914b398af11f909e88a8bc85eeebb0768aacd54 (patch)
tree2e03d03eddd9e35bd34bce36768c2fad19c112e7
parent675e81a082b47aec7b3c2caa950953edb7c01c1e (diff)
downloadguix-e914b398af11f909e88a8bc85eeebb0768aacd54.tar.gz
glob: Support square brackets in patterns.
* guix/glob.scm (wildcard-indices): Remove.
(parse-bracket): New procedure.
(compile-glob-pattern): Rewrite.  Support square brackets for sets and
ranges.
(glob-match?): Support sets and ranges.
* tests/glob.scm (test-compile-glob-pattern)
(test-glob-match): New macros.
Use them to rewrite the existing tests, and add new tests.
-rw-r--r--guix/glob.scm95
-rw-r--r--tests/glob.scm67
2 files changed, 99 insertions, 63 deletions
diff --git a/guix/glob.scm b/guix/glob.scm
index 4fc5173ac0..29c335ca1d 100644
--- a/guix/glob.scm
+++ b/guix/glob.scm
@@ -25,20 +25,17 @@
 ;;;
 ;;; This is a minimal implementation of "glob patterns" (info "(libc)
 ;;; Globbbing").  It is currently limited to simple patterns and does not
-;;; support braces and square brackets, for instance.
+;;; support braces, for instance.
 ;;;
 ;;; Code:
 
-(define (wildcard-indices str)
-  "Return the list of indices in STR where wildcards can be found."
-  (let loop ((index 0)
-             (result '()))
-    (if (= index (string-length str))
-        (reverse result)
-        (loop (+ 1 index)
-              (case (string-ref str index)
-                ((#\? #\*) (cons index result))
-                (else      result))))))
+(define (parse-bracket chars)
+  "Parse CHARS, a list of characters that extracted from a '[...]' sequence."
+  (match chars
+    ((start #\- end)
+     `(range ,start ,end))
+    (lst
+     `(set ,@lst))))
 
 (define (compile-glob-pattern str)
   "Return an sexp that represents the compiled form of STR, a glob pattern
@@ -48,29 +45,43 @@ such as \"foo*\" or \"foo??bar\"."
       (((? string? str)) str)
       (x x)))
 
-  (let loop ((index   0)
-             (indices (wildcard-indices str))
+  (define (cons-string chars lst)
+    (match chars
+      (() lst)
+      (_ (cons (list->string (reverse chars)) lst))))
+
+  (let loop ((chars   (string->list str))
+             (pending '())
+             (brackets 0)
              (result '()))
-    (match indices
+    (match chars
       (()
-       (flatten (cond ((zero? index)
-                       (list str))
-                      ((= index (string-length str))
-                       (reverse result))
-                      (else
-                       (reverse (cons (string-drop str index)
-                                      result))))))
-      ((wildcard-index . rest)
-       (let ((wildcard (match (string-ref str wildcard-index)
+       (flatten (reverse (if (null? pending)
+                             result
+                             (cons-string pending result)))))
+      (((and chr (or #\? #\*)) . rest)
+       (let ((wildcard (match chr
                          (#\? '?)
                          (#\* '*))))
-         (match (substring str index wildcard-index)
-           (""  (loop (+ 1 wildcard-index)
-                      rest
-                      (cons wildcard result)))
-           (str (loop (+ 1 wildcard-index)
-                      rest
-                      (cons* wildcard str result)))))))))
+         (if (zero? brackets)
+             (loop rest '() 0
+                   (cons* wildcard (cons-string pending result)))
+             (loop rest (cons chr pending) brackets result))))
+      ((#\[ . rest)
+       (if (zero? brackets)
+           (loop rest '() (+ 1 brackets)
+                 (cons-string pending result))
+           (loop rest (cons #\[ pending) (+ 1 brackets) result)))
+      ((#\] . rest)
+       (cond ((zero? brackets)
+              (error "unexpected closing bracket" str))
+             ((= 1 brackets)
+              (loop rest '() 0
+                    (cons (parse-bracket (reverse pending)) result)))
+             (else
+              (loop rest (cons #\] pending) (- brackets 1) result))))
+      ((chr . rest)
+       (loop rest (cons chr pending) brackets result)))))
 
 (define (glob-match? pattern str)
   "Return true if STR matches PATTERN, a compiled glob pattern as returned by
@@ -78,11 +89,12 @@ such as \"foo*\" or \"foo??bar\"."
   (let loop ((pattern pattern)
              (str str))
    (match pattern
-     ((? string? literal) (string=? literal str))
-     (((? string? one))   (string=? one str))
-     (('*)  #t)
-     (('?) (= 1 (string-length str)))
-     (()    #t)
+     ((? string? literal)
+      (string=? literal str))
+     (()
+      (string-null? str))
+     (('*)
+      #t)
      (('* suffix . rest)
       (match (string-contains str suffix)
         (#f    #f)
@@ -92,6 +104,19 @@ such as \"foo*\" or \"foo??bar\"."
      (('? . rest)
       (and (>= (string-length str) 1)
            (loop rest (string-drop str 1))))
+     ((('range start end) . rest)
+      (and (>= (string-length str) 1)
+           (let ((chr (string-ref str 0)))
+             (and (char-set-contains? (ucs-range->char-set
+                                       (char->integer start)
+                                       (+ 1 (char->integer end)))
+                                      chr)
+                  (loop rest (string-drop str 1))))))
+     ((('set . chars) . rest)
+      (and (>= (string-length str) 1)
+           (let ((chr (string-ref str 0)))
+             (and (char-set-contains? (list->char-set chars) chr)
+                  (loop rest (string-drop str 1))))))
      ((prefix . rest)
       (and (string-prefix? prefix str)
            (loop rest (string-drop str (string-length prefix))))))))
diff --git a/tests/glob.scm b/tests/glob.scm
index 033eeb10fe..71e2d3fce0 100644
--- a/tests/glob.scm
+++ b/tests/glob.scm
@@ -23,36 +23,47 @@
 
 (test-begin "glob")
 
-(test-equal "compile-glob-pattern, no wildcards"
-  "foo"
-  (compile-glob-pattern "foo"))
+(define-syntax test-compile-glob-pattern
+  (syntax-rules (=>)
+    ((_ pattern => result rest ...)
+     (begin
+       (test-equal (format #f "compile-glob-pattern, ~s" pattern)
+         result
+         (compile-glob-pattern pattern))
+       (test-compile-glob-pattern rest ...)))
+    ((_)
+     #t)))
 
-(test-equal "compile-glob-pattern, Kleene star"
-  '("foo" * "bar")
-  (compile-glob-pattern "foo*bar"))
+(define-syntax test-glob-match
+  (syntax-rules (matches and not)
+    ((_ (pattern-string matches strings ... (and not others ...)) rest ...)
+     (begin
+       (test-assert (format #f "glob-match? ~s" pattern-string)
+         (let ((pattern (compile-glob-pattern pattern-string)))
+           (and (glob-match? pattern strings) ...
+                (not (glob-match? pattern others)) ...)))
+       (test-glob-match rest ...)))
+    ((_)
+     #t)))
 
-(test-equal "compile-glob-pattern, question mark"
-  '(? "foo" *)
-  (compile-glob-pattern "?foo*"))
+(test-compile-glob-pattern
+ "foo" => "foo"
+ "?foo*" => '(? "foo" *)
+ "foo[1-5]" => '("foo" (range #\1 #\5))
+ "foo[abc]bar" => '("foo" (set #\a #\b #\c) "bar")
+ "foo[a[b]c]bar" => '("foo" (set #\a #\[ #\b #\] #\c) "bar")
+ "[123]x" => '((set #\1 #\2 #\3) "x")
+ "[a-z]" => '((range #\a #\z)))
 
-(test-assert "literal match"
-  (let ((pattern (compile-glob-pattern "foo")))
-    (and (glob-match? pattern "foo")
-         (not (glob-match? pattern "foobar"))
-         (not (glob-match? pattern "barfoo")))))
-
-(test-assert "trailing star"
-  (let ((pattern (compile-glob-pattern "foo*")))
-    (and (glob-match? pattern "foo")
-         (glob-match? pattern "foobar")
-         (not (glob-match? pattern "xfoo")))))
-
-(test-assert "question marks"
-  (let ((pattern (compile-glob-pattern "foo??bar")))
-    (and (glob-match? pattern "fooxxbar")
-         (glob-match? pattern "fooZZbar")
-         (not (glob-match? pattern "foobar"))
-         (not (glob-match? pattern "fooxxxbar"))
-         (not (glob-match? pattern "fooxxbarzz")))))
+(test-glob-match
+ ("foo" matches "foo" (and not "foobar" "barfoo"))
+ ("foo*" matches "foo" "foobar" (and not "xfoo"))
+ ("foo??bar" matches "fooxxbar" "fooZZbar"
+  (and not "foobar" "fooxxxbar" "fooxxbarzz"))
+ ("foo?" matches "foox" (and not "fooxx"))
+ ("ab[0-9]c" matches "ab0c" "ab7c" "ab9c"
+  (and not "ab-c" "ab00c" "ab3"))
+ ("ab[cdefg]" matches "abc" "abd" "abg"
+  (and not "abh" "abcd" "ab[")))
 
 (test-end "glob")