summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--doc/guix.texi25
-rw-r--r--guix/lint.scm96
-rw-r--r--tests/lint.scm81
3 files changed, 201 insertions, 1 deletions
diff --git a/doc/guix.texi b/doc/guix.texi
index 0510f57c23..de02ad8687 100644
--- a/doc/guix.texi
+++ b/doc/guix.texi
@@ -9249,6 +9249,31 @@ Parse the @code{source} URL to determine if a tarball from GitHub is
 autogenerated or if it is a release tarball.  Unfortunately GitHub's
 autogenerated tarballs are sometimes regenerated.
 
+@item archival
+@cindex Software Heritage, source code archive
+@cindex archival of source code, Software Heritage
+Checks whether the package's source code is archived at
+@uref{https://www.softwareheritage.org, Software Heritage}.
+
+When the source code that is not archived comes from a version-control system
+(VCS)---e.g., it's obtained with @code{git-fetch}, send Software Heritage a
+``save'' request so that it eventually archives it.  This ensures that the
+source will remain available in the long term, and that Guix can fall back to
+Software Heritage should the source code disappear from its original host.
+The status of recent ``save'' requests can be
+@uref{https://archive.softwareheritage.org/save/#requests, viewed on-line}.
+
+When source code is a tarball obtained with @code{url-fetch}, simply print a
+message when it is not archived.  As of this writing, Software Heritage does
+not allow requests to save arbitrary tarballs; we are working on ways to
+ensure that non-VCS source code is also archived.
+
+Software Heritage
+@uref{https://archive.softwareheritage.org/api/#rate-limiting, limits the
+request rate per IP address}.  When the limit is reached, @command{guix lint}
+prints a message and the @code{archival} checker stops doing anything until
+that limit has been reset.
+
 @item cve
 @cindex security vulnerabilities
 @cindex CVE, Common Vulnerabilities and Exposures
diff --git a/guix/lint.scm b/guix/lint.scm
index 254f4e2830..ba38bef806 100644
--- a/guix/lint.scm
+++ b/guix/lint.scm
@@ -44,6 +44,8 @@
   #:use-module ((guix ui) #:select (texi->plain-text fill-paragraph))
   #:use-module (guix gnu-maintenance)
   #:use-module (guix cve)
+  #:use-module ((guix swh) #:hide (origin?))
+  #:autoload   (guix git-download) (git-reference?)
   #:use-module (ice-9 match)
   #:use-module (ice-9 regex)
   #:use-module (ice-9 format)
@@ -80,6 +82,7 @@
             check-vulnerabilities
             check-for-updates
             check-formatting
+            check-archival
 
             lint-warning
             lint-warning?
@@ -1033,6 +1036,93 @@ the NIST server non-fatal."
          '()))
     (#f '()))) ; cannot find newer upstream release
 
+
+(define (check-archival package)
+  "Check whether PACKAGE's source code is archived on Software Heritage.  If
+it's not, and if its source code is a VCS snapshot, then send a \"save\"
+request to Software Heritage.
+
+Software Heritage imposes limits on the request rate per client IP address.
+This checker prints a notice and stops doing anything once that limit has been
+reached."
+  (define (response->warning url method response)
+    (if (request-rate-limit-reached? url method)
+        (list (make-warning package
+                            (G_ "Software Heritage rate limit reached; \
+try again later")
+                            #:field 'source))
+        (list (make-warning package
+                            (G_ "'~a' returned ~a")
+                            (list url (response-code response))
+                            #:field 'source))))
+
+  (define skip-key (gensym "skip-archival-check"))
+
+  (define (skip-when-limit-reached url method)
+    (or (not (request-rate-limit-reached? url method))
+        (throw skip-key #t)))
+
+  (parameterize ((%allow-request? skip-when-limit-reached))
+    (catch #t
+      (lambda ()
+        (match (and (origin? (package-source package))
+                    (package-source package))
+          (#f                                     ;no source
+           '())
+          ((= origin-uri (? git-reference? reference))
+           (define url
+             (git-reference-url reference))
+           (define commit
+             (git-reference-commit reference))
+
+           (match (if (commit-id? commit)
+                      (or (lookup-revision commit)
+                          (lookup-origin-revision url commit))
+                      (lookup-origin-revision url commit))
+             ((? revision? revision)
+              '())
+             (#f
+              ;; Revision is missing from the archive, attempt to save it.
+              (catch 'swh-error
+                (lambda ()
+                  (save-origin (git-reference-url reference) "git")
+                  (list (make-warning
+                         package
+                         ;; TRANSLATORS: "Software Heritage" is a proper noun
+                         ;; that must remain untranslated.  See
+                         ;; <https://www.softwareheritage.org>.
+                         (G_ "scheduled Software Heritage archival")
+                         #:field 'source)))
+                (lambda (key url method response . _)
+                  (cond ((= 429 (response-code response))
+                         (list (make-warning
+                                package
+                                (G_ "archival rate limit exceeded; \
+try again later")
+                                #:field 'source)))
+                        (else
+                         (response->warning url method response))))))))
+          ((? origin? origin)
+           ;; Since "save" origins are not supported for non-VCS source, all
+           ;; we can do is tell whether a given tarball is available or not.
+           (if (origin-sha256 origin)             ;XXX: for ungoogled-chromium
+               (match (lookup-content (origin-sha256 origin) "sha256")
+                 (#f
+                  (list (make-warning package
+                                      (G_ "source not archived on Software \
+Heritage")
+                                      #:field 'source)))
+                 ((? content?)
+                  '()))
+               '()))))
+      (match-lambda*
+        ((key url method response)
+         (response->warning url method response))
+        ((key . args)
+         (if (eq? key skip-key)
+             '()
+             (apply throw key args)))))))
+
 
 ;;;
 ;;; Source code formatting.
@@ -1237,7 +1327,11 @@ or a list thereof")
    (lint-checker
      (name        'refresh)
      (description "Check the package for new upstream releases")
-     (check       check-for-updates))))
+     (check       check-for-updates))
+   (lint-checker
+     (name        'archival)
+     (description "Ensure source code archival on Software Heritage")
+     (check       check-archival))))
 
 (define %all-checkers
   (append %local-checkers
diff --git a/tests/lint.scm b/tests/lint.scm
index c8b88136f4..1b92f02b85 100644
--- a/tests/lint.scm
+++ b/tests/lint.scm
@@ -35,6 +35,7 @@
   #:use-module (guix packages)
   #:use-module (guix lint)
   #:use-module (guix ui)
+  #:use-module (guix swh)
   #:use-module (gnu packages)
   #:use-module (gnu packages glib)
   #:use-module (gnu packages pkg-config)
@@ -47,6 +48,7 @@
   #:use-module (ice-9 regex)
   #:use-module (ice-9 getopt-long)
   #:use-module (ice-9 pretty-print)
+  #:use-module (rnrs bytevectors)
   #:use-module (srfi srfi-1)
   #:use-module (srfi srfi-9 gnu)
   #:use-module (srfi srfi-26)
@@ -859,6 +861,85 @@
   '()
   (check-formatting (dummy-package "x")))
 
+(test-assert "archival: missing content"
+  (let* ((origin   (origin
+                     (method url-fetch)
+                     (uri "http://example.org/foo.tgz")
+                     (sha256 (make-bytevector 32))))
+         (warnings (with-http-server '((404 "Not archived."))
+                     (parameterize ((%swh-base-url (%local-url)))
+                       (check-archival (dummy-package "x"
+                                                      (source origin)))))))
+    (warning-contains? "not archived" warnings)))
+
+(test-equal "archival: content available"
+  '()
+  (let* ((origin   (origin
+                     (method url-fetch)
+                     (uri "http://example.org/foo.tgz")
+                     (sha256 (make-bytevector 32))))
+         ;; https://archive.softwareheritage.org/api/1/content/
+         (content  "{ \"checksums\": {}, \"data_url\": \"xyz\",
+                      \"length\": 42 }"))
+    (with-http-server `((200 ,content))
+      (parameterize ((%swh-base-url (%local-url)))
+        (check-archival (dummy-package "x" (source origin)))))))
+
+(test-assert "archival: missing revision"
+  (let* ((origin   (origin
+                     (method git-fetch)
+                     (uri (git-reference
+                           (url "http://example.org/foo.git")
+                           (commit "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")))
+                     (sha256 (make-bytevector 32))))
+         ;; https://archive.softwareheritage.org/api/1/origin/save/
+         (save     "{ \"origin_url\": \"http://example.org/foo.git\",
+                      \"save_request_date\": \"2014-11-17T22:09:38+01:00\",
+                      \"save_request_status\": \"accepted\",
+                      \"save_task_status\": \"scheduled\" }")
+         (warnings (with-http-server `((404 "No revision.") ;lookup-revision
+                                       (404 "No origin.")   ;lookup-origin
+                                       (200 ,save))         ;save-origin
+                     (parameterize ((%swh-base-url (%local-url)))
+                       (check-archival (dummy-package "x" (source origin)))))))
+    (warning-contains? "scheduled" warnings)))
+
+(test-equal "archival: revision available"
+  '()
+  (let* ((origin   (origin
+                     (method git-fetch)
+                     (uri (git-reference
+                           (url "http://example.org/foo.git")
+                           (commit "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")))
+                     (sha256 (make-bytevector 32))))
+         ;; https://archive.softwareheritage.org/api/1/revision/
+         (revision "{ \"author\": {}, \"parents\": [],
+                      \"date\": \"2014-11-17T22:09:38+01:00\" }"))
+    (with-http-server `((200 ,revision))
+      (parameterize ((%swh-base-url (%local-url)))
+        (check-archival (dummy-package "x" (source origin)))))))
+
+(test-assert "archival: rate limit reached"
+  ;; We should get a single warning stating that the rate limit was reached,
+  ;; and nothing more, in particular no other HTTP requests.
+  (let* ((origin   (origin
+                     (method url-fetch)
+                     (uri "http://example.org/foo.tgz")
+                     (sha256 (make-bytevector 32))))
+         (too-many (build-response
+                    #:code 429
+                    #:reason-phrase "Too many requests"
+                    #:headers '((x-ratelimit-remaining . "0")
+                                (x-ratelimit-reset . "3000000000"))))
+         (warnings (with-http-server `((,too-many "Rate limit reached."))
+                     (parameterize ((%swh-base-url (%local-url)))
+                       (append-map (lambda (name)
+                                     (check-archival
+                                      (dummy-package name (source origin))))
+                                   '("x" "y" "z"))))))
+    (string-contains (single-lint-warning-message warnings)
+                     "rate limit reached")))
+
 (test-end "lint")
 
 ;; Local Variables: