summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--guix/download.scm1
-rw-r--r--guix/lint.scm62
-rw-r--r--tests/lint.scm34
3 files changed, 89 insertions, 8 deletions
diff --git a/guix/download.scm b/guix/download.scm
index 72094e7318..b6eb97e6fa 100644
--- a/guix/download.scm
+++ b/guix/download.scm
@@ -35,6 +35,7 @@
   #:use-module (srfi srfi-1)
   #:use-module (srfi srfi-26)
   #:export (%mirrors
+            %disarchive-mirrors
             (url-fetch* . url-fetch)
             url-fetch/executable
             url-fetch/tarbomb
diff --git a/guix/lint.scm b/guix/lint.scm
index 1bebfe03d3..a2d6418b85 100644
--- a/guix/lint.scm
+++ b/guix/lint.scm
@@ -30,6 +30,7 @@
 
 (define-module (guix lint)
   #:use-module (guix store)
+  #:autoload   (guix base16) (bytevector->base16-string)
   #:use-module (guix base32)
   #:use-module (guix diagnostics)
   #:use-module (guix download)
@@ -1227,6 +1228,43 @@ upstream releases")
                             #:field 'source)))))))
 
 
+(define (lookup-disarchive-spec hash)
+  "If Disarchive mirrors have a spec for HASH, return the list of SWH
+directory identifiers the spec refers to.  Otherwise return #f."
+  (define (extract-swh-id spec)
+    ;; Return the list of SWH directory identifiers SPEC refers to, where SPEC
+    ;; is a Disarchive sexp.  Instead of attempting to parse it, traverse it
+    ;; in a pretty unintelligent fashion.
+    (let loop ((sexp spec)
+               (ids '()))
+      (match sexp
+        ((? string? str)
+         (let ((prefix "swh:1:dir:"))
+           (if (string-prefix? prefix str)
+               (cons (string-drop str (string-length prefix)) ids)
+               ids)))
+        ((head tail ...)
+         (loop tail (loop head ids)))
+        (_ ids))))
+
+  (any (lambda (mirror)
+         (with-networking-fail-safe
+          (format #f (G_ "failed to access Disarchive database at ~a")
+                  mirror)
+          #f
+          (guard (c ((http-get-error? c) #f))
+            (let* ((url (string-append mirror
+                                       (symbol->string
+                                        (content-hash-algorithm hash))
+                                       "/"
+                                       (bytevector->base16-string
+                                        (content-hash-value hash))))
+                   (port (http-fetch (string->uri url) #:text? #t))
+                   (spec (read port)))
+              (close-port port)
+              (extract-swh-id spec)))))
+       %disarchive-mirrors))
+
 (define (check-archival package)
   "Check whether PACKAGE's source code is archived on Software Heritage.  If
 it's not, and if its source code is a VCS snapshot, then send a \"save\"
@@ -1302,10 +1340,26 @@ try again later")
                                         (symbol->string
                                          (content-hash-algorithm hash)))
                    (#f
-                    (list (make-warning package
-                                        (G_ "source not archived on Software \
-Heritage")
-                                        #:field 'source)))
+                    ;; If SWH doesn't have HASH as is, it may be because it's
+                    ;; a hand-crafted tarball.  In that case, check whether
+                    ;; the Disarchive database has an entry for that tarball.
+                    (match (lookup-disarchive-spec hash)
+                      (#f
+                       (list (make-warning package
+                                           (G_ "source not archived on Software \
+Heritage and missing from the Disarchive database")
+                                           #:field 'source)))
+                      (directory-ids
+                       (match (find (lambda (id)
+                                      (not (lookup-directory id)))
+                                    directory-ids)
+                         (#f '())
+                         (id
+                          (list (make-warning package
+                                              (G_ "
+Disarchive entry refers to non-existent SWH directory '~a'")
+                                              (list id)
+                                              #:field 'source)))))))
                    ((? content?)
                     '())))
                '()))))
diff --git a/tests/lint.scm b/tests/lint.scm
index a2c8665142..d54fafc1d2 100644
--- a/tests/lint.scm
+++ b/tests/lint.scm
@@ -1,7 +1,7 @@
 ;;; GNU Guix --- Functional package management for GNU
 ;;; Copyright © 2012, 2013 Cyril Roelandt <tipecaml@gmail.com>
 ;;; Copyright © 2014, 2015, 2016 Eric Bavier <bavier@member.fsf.org>
-;;; Copyright © 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
+;;; Copyright © 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021 Ludovic Courtès <ludo@gnu.org>
 ;;; Copyright © 2015, 2016 Mathieu Lirzin <mthl@gnu.org>
 ;;; Copyright © 2016 Hartmut Goebel <h.goebel@crazy-compilers.com>
 ;;; Copyright © 2017 Alex Kost <alezost@gmail.com>
@@ -1008,10 +1008,13 @@
                      (method url-fetch)
                      (uri "http://example.org/foo.tgz")
                      (sha256 (make-bytevector 32))))
-         (warnings (with-http-server '((404 "Not archived."))
+         (warnings (with-http-server '((404 "Not archived.")
+                                       (404 "Not in Disarchive database."))
                      (parameterize ((%swh-base-url (%local-url)))
-                       (check-archival (dummy-package "x"
-                                                      (source origin)))))))
+                       (mock ((guix download) %disarchive-mirrors
+                              (list (%local-url)))
+                             (check-archival (dummy-package "x"
+                                                            (source origin))))))))
     (warning-contains? "not archived" warnings)))
 
 (test-equal "archival: content available"
@@ -1027,6 +1030,29 @@
       (parameterize ((%swh-base-url (%local-url)))
         (check-archival (dummy-package "x" (source origin)))))))
 
+(test-equal "archival: content unavailable but disarchive available"
+  '()
+  (let* ((origin   (origin
+                     (method url-fetch)
+                     (uri "http://example.org/foo.tgz")
+                     (sha256 (make-bytevector 32))))
+         (disarchive (object->string
+                      '(disarchive (version 0)
+                                   ...
+                                   "swh:1:dir:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")))
+         ;; https://archive.softwareheritage.org/api/1/directory/
+         (directory "[ { \"checksums\": {},
+                         \"dir_id\": \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\",
+                         \"type\": \"file\",
+                         \"name\": \"README\"
+                         \"length\": 42 } ]"))
+    (with-http-server `((404 "")                  ;lookup-content
+                        (200 ,disarchive)         ;Disarchive database lookup
+                        (200 ,directory))         ;lookup-directory
+      (mock ((guix download) %disarchive-mirrors (list (%local-url)))
+            (parameterize ((%swh-base-url (%local-url)))
+              (check-archival (dummy-package "x" (source origin))))))))
+
 (test-assert "archival: missing revision"
   (let* ((origin   (origin
                      (method git-fetch)