summary refs log tree commit diff
diff options
context:
space:
mode:
authorLudovic Courtès <ludo@gnu.org>2020-10-24 16:31:18 +0200
committerLudovic Courtès <ludo@gnu.org>2020-10-28 16:24:47 +0100
commitecaa102a58ad3ab0b42e04a3d10d7c761c05ec98 (patch)
treee89b3791dec3f3fada0db1768b109414b4b56cf1
parent6d1fd37182f17e4178e2950a22a5aed0ba135587 (diff)
downloadguix-ecaa102a58ad3ab0b42e04a3d10d7c761c05ec98.tar.gz
publish: Add '--cache-bypass-threshold'.
* guix/scripts/publish.scm (show-help, %options): Add
'--cache-bypass-threshold'.
(low-compression): New procedure.
(cache-bypass-threshold): New parameter.
(bypass-cache?): New procedure.
(render-narinfo/cached): Call 'render-narinfo' when 'bypass-cache?'
returns true.
(render-nar/cached): Call 'render-nar' when 'bypass-cache?' returns
true.
(guix-publish): Parameterize 'cache-bypass-threshold'.
* tests/publish.scm ("with cache", "with cache, lzip + gzip")
("with cache, uncompressed"): Pass '--cache-bypass-threshold=0'.
("with cache, vanishing item"): Expect 200 for RESPONSE.
("with cache, cache bypass"): New test.
-rw-r--r--doc/guix.texi24
-rw-r--r--guix/scripts/publish.scm87
-rw-r--r--tests/publish.scm43
3 files changed, 131 insertions, 23 deletions
diff --git a/doc/guix.texi b/doc/guix.texi
index ba7bb9612e..22bddf10e3 100644
--- a/doc/guix.texi
+++ b/doc/guix.texi
@@ -12086,13 +12086,20 @@ in advance, so @command{guix publish} does not add a
 prevents clients from knowing the amount of data being downloaded.
 
 Conversely, when @option{--cache} is used, the first request for a store
-item (@i{via} a @code{.narinfo} URL) returns 404 and triggers a
+item (@i{via} a @code{.narinfo} URL) triggers a
 background process to @dfn{bake} the archive---computing its
 @code{.narinfo} and compressing the archive, if needed.  Once the
 archive is cached in @var{directory}, subsequent requests succeed and
 are served directly from the cache, which guarantees that clients get
 the best possible bandwidth.
 
+That first @code{.narinfo} request nonetheless returns 200, provided the
+requested store item is ``small enough'', below the cache bypass
+threshold---see @option{--cache-bypass-threshold} below.  That way,
+clients do not have to wait until the archive is baked.  For larger
+store items, the first @code{.narinfo} request returns 404, meaning that
+clients have to wait until the archive is baked.
+
 The ``baking'' process is performed by worker threads.  By default, one
 thread per CPU core is created, but this can be customized.  See
 @option{--workers} below.
@@ -12118,6 +12125,21 @@ Additionally, when @option{--cache} is used, cached entries that have
 not been accessed for @var{ttl} and that no longer have a corresponding
 item in the store, may be deleted.
 
+@item --cache-bypass-threshold=@var{size}
+When used in conjunction with @option{--cache}, store items smaller than
+@var{size} are immediately available, even when they are not yet in
+cache.  @var{size} is a size in bytes, or it can be prefixed by @code{M}
+for megabytes and so on.  The default is @code{10M}.
+
+``Cache bypass'' allows you to reduce the publication delay for clients
+at the expense of possibly additional I/O and CPU use on the server
+side: depending on the client access patterns, those store items can end
+up being baked several times until a copy is available in cache.
+
+Increasing the threshold may be useful for sites that have few users, or
+to guarantee that users get substitutes even for store items that are
+not popular.
+
 @item --nar-path=@var{path}
 Use @var{path} as the prefix for the URLs of ``nar'' files
 (@pxref{Invoking guix archive, normalized archives}).
diff --git a/guix/scripts/publish.scm b/guix/scripts/publish.scm
index 1741b93309..9706b52844 100644
--- a/guix/scripts/publish.scm
+++ b/guix/scripts/publish.scm
@@ -83,6 +83,9 @@ Publish ~a over HTTP.\n") %store-directory)
   (display (G_ "
   -c, --cache=DIRECTORY  cache published items to DIRECTORY"))
   (display (G_ "
+      --cache-bypass-threshold=SIZE
+                         serve store items below SIZE even when not cached"))
+  (display (G_ "
       --workers=N        use N workers to bake items"))
   (display (G_ "
       --ttl=TTL          announce narinfos can be cached for TTL seconds"))
@@ -135,6 +138,12 @@ if ITEM is already compressed."
       (list %no-compression)
       requested))
 
+(define (low-compression c)
+  "Return <compression> of the same type as C, but optimized for low CPU
+usage."
+  (compression (compression-type c)
+               (min (compression-level c) 2)))
+
 (define %options
   (list (option '(#\h "help") #f #f
                 (lambda _
@@ -185,6 +194,10 @@ if ITEM is already compressed."
         (option '(#\c "cache") #t #f
                 (lambda (opt name arg result)
                   (alist-cons 'cache arg result)))
+        (option '("cache-bypass-threshold") #t #f
+                (lambda (opt name arg result)
+                  (alist-cons 'cache-bypass-threshold (size->number arg)
+                              result)))
         (option '("workers") #t #f
                 (lambda (opt name arg result)
                   (alist-cons 'workers (string->number* arg)
@@ -435,7 +448,7 @@ items.  Failing that, we could eventually have to recompute them and return
             (expiration-time file))))))
 
 (define (hash-part->path* store hash cache)
-  "Like 'hash-part->path' but cached results under CACHE.  This ensures we can
+  "Like 'hash-part->path' but cache results under CACHE.  This ensures we can
 still map HASH to the corresponding store file name, even if said store item
 vanished from the store in the meantime."
   (let ((cached (hash-part-mapping-cache-file cache hash)))
@@ -455,6 +468,18 @@ vanished from the store in the meantime."
                result))
             (apply throw args))))))
 
+(define cache-bypass-threshold
+  ;; Maximum size of a store item that may be served by the '/cached' handlers
+  ;; below even when not in cache.
+  (make-parameter (* 10 (expt 2 20))))
+
+(define (bypass-cache? store item)
+  "Return true if we allow ITEM to be downloaded before it is cached.  ITEM is
+interpreted as the basename of a store item."
+  (guard (c ((store-error? c) #f))
+    (< (path-info-nar-size (query-path-info store item))
+       (cache-bypass-threshold))))
+
 (define* (render-narinfo/cached store request hash
                                 #:key ttl (compressions (list %no-compression))
                                 (nar-path "nar")
@@ -514,9 +539,20 @@ requested using POOL."
                                                      (nar-expiration-time ttl)
                                                      #:delete-entry delete-entry
                                                      #:cleanup-period ttl))))
-           (not-found request
-                      #:phrase "We're baking it"
-                      #:ttl 300))              ;should be available within 5m
+
+           ;; If ITEM passes 'bypass-cache?', render a temporary narinfo right
+           ;; away, with a short TTL.  The narinfo is temporary because it
+           ;; lacks 'FileSize', for instance, which the cached narinfo will
+           ;; have.  Chances are that the nar will be baked by the time the
+           ;; client asks for it.
+           (if (bypass-cache? store item)
+               (render-narinfo store request hash
+                               #:ttl 300          ;temporary
+                               #:nar-path nar-path
+                               #:compressions compressions)
+               (not-found request
+                          #:phrase "We're baking it"
+                          #:ttl 300)))          ;should be available within 5m
           (else
            (not-found request #:phrase "")))))
 
@@ -628,19 +664,31 @@ return it; otherwise, return 404.  When TTL is true, use it as the
 'Cache-Control' expiration time."
   (let ((cached (nar-cache-file cache store-item
                                 #:compression compression)))
-    (if (file-exists? cached)
-        (values `((content-type . (application/octet-stream
-                                   (charset . "ISO-8859-1")))
-                  ,@(if ttl
-                        `((cache-control (max-age . ,ttl)))
-                        '())
-
-                  ;; XXX: We're not returning the actual contents, deferring
-                  ;; instead to 'http-write'.  This is a hack to work around
-                  ;; <http://bugs.gnu.org/21093>.
-                  (x-raw-file . ,cached))
-                #f)
-        (not-found request))))
+    (cond ((file-exists? cached)
+           (values `((content-type . (application/octet-stream
+                                      (charset . "ISO-8859-1")))
+                     ,@(if ttl
+                           `((cache-control (max-age . ,ttl)))
+                           '())
+
+                     ;; XXX: We're not returning the actual contents, deferring
+                     ;; instead to 'http-write'.  This is a hack to work around
+                     ;; <http://bugs.gnu.org/21093>.
+                     (x-raw-file . ,cached))
+                   #f))
+          ((let* ((hash (and=> (string-index store-item #\-)
+                               (cut string-take store-item <>)))
+                  (item (and hash
+                             (guard (c ((store-error? c) #f))
+                               (hash-part->path store hash)))))
+             (and item (bypass-cache? store item)))
+           ;; Render STORE-ITEM live.  We reach this because STORE-ITEM is
+           ;; being baked but clients are already asking for it.  Thus, we're
+           ;; duplicating work, but doing so allows us to reduce delays.
+           (render-nar store request store-item
+                       #:compression (low-compression compression)))
+          (else
+           (not-found request)))))
 
 (define (render-content-addressed-file store request
                                        name algo hash)
@@ -1077,7 +1125,10 @@ methods, return the applicable compression."
 consider using the '--user' option!~%")))
 
       (parameterize ((%public-key public-key)
-                     (%private-key private-key))
+                     (%private-key private-key)
+                     (cache-bypass-threshold
+                      (or (assoc-ref opts 'cache-bypass-threshold)
+                          (cache-bypass-threshold))))
         (info (G_ "publishing ~a on ~a, port ~d~%")
               %store-directory
               (inet-ntop (sockaddr:fam address) (sockaddr:addr address))
diff --git a/tests/publish.scm b/tests/publish.scm
index 13f667aa7e..84aa6e5d73 100644
--- a/tests/publish.scm
+++ b/tests/publish.scm
@@ -413,7 +413,8 @@ References: ~%"
                     (call-with-new-thread
                      (lambda ()
                        (guix-publish "--port=6797" "-C2"
-                                     (string-append "--cache=" cache)))))))
+                                     (string-append "--cache=" cache)
+                                     "--cache-bypass-threshold=0"))))))
        (wait-until-ready 6797)
        (let* ((base     "http://localhost:6797/")
               (part     (store-path-hash-part %item))
@@ -462,7 +463,8 @@ References: ~%"
                     (call-with-new-thread
                      (lambda ()
                        (guix-publish "--port=6794" "-Cgzip:2" "-Clzip:2"
-                                     (string-append "--cache=" cache)))))))
+                                     (string-append "--cache=" cache)
+                                     "--cache-bypass-threshold=0"))))))
        (wait-until-ready 6794)
        (let* ((base     "http://localhost:6794/")
               (part     (store-path-hash-part %item))
@@ -517,7 +519,8 @@ References: ~%"
                       (call-with-new-thread
                        (lambda ()
                          (guix-publish "--port=6796" "-C2" "--ttl=42h"
-                                       (string-append "--cache=" cache)))))))
+                                       (string-append "--cache=" cache)
+                                       "--cache-bypass-threshold=0"))))))
          (wait-until-ready 6796)
          (let* ((base     "http://localhost:6796/")
                 (part     (store-path-hash-part item))
@@ -581,12 +584,44 @@ References: ~%"
                                        (basename item)
                                        ".narinfo"))
               (response (http-get url)))
-         (and (= 404 (response-code response))
+         (and (= 200 (response-code response))    ;we're below the threshold
               (wait-for-file cached)
               (begin
                 (delete-paths %store (list item))
                 (response-code (pk 'response (http-get url))))))))))
 
+(test-equal "with cache, cache bypass"
+  200
+  (call-with-temporary-directory
+   (lambda (cache)
+     (let ((thread (with-separate-output-ports
+                    (call-with-new-thread
+                     (lambda ()
+                       (guix-publish "--port=6788" "-C" "gzip"
+                                     (string-append "--cache=" cache)))))))
+       (wait-until-ready 6788)
+
+       (let* ((base     "http://localhost:6788/")
+              (item     (add-text-to-store %store "random" (random-text)))
+              (part     (store-path-hash-part item))
+              (narinfo  (string-append base part ".narinfo"))
+              (nar      (string-append base "nar/gzip/" (basename item)))
+              (cached   (string-append cache "/gzip/" (basename item)
+                                       ".narinfo")))
+         ;; We're below the default cache bypass threshold, so NAR and NARINFO
+         ;; should immediately return 200.  The NARINFO request should trigger
+         ;; caching, and the next request to NAR should return 200 as well.
+         (and (let ((response (pk 'r1 (http-get nar))))
+                (and (= 200 (response-code response))
+                     (not (response-content-length response)))) ;not known
+              (= 200 (response-code (http-get narinfo)))
+              (begin
+                (wait-for-file cached)
+                (let ((response (pk 'r2 (http-get nar))))
+                  (and (> (response-content-length response)
+                          (stat:size (stat item)))
+                       (response-code response))))))))))
+
 (test-equal "/log/NAME"
   `(200 #t application/x-bzip2)
   (let ((drv (run-with-store %store