summary refs log tree commit diff
diff options
context:
space:
mode:
authorEelco Dolstra <e.dolstra@tudelft.nl>2010-02-03 21:22:57 +0000
committerEelco Dolstra <e.dolstra@tudelft.nl>2010-02-03 21:22:57 +0000
commitf859a8d3c33cc275f41d983bfeff2a21a9f88f1b (patch)
tree8f242f4fc9d094a93b416f03b829d0f58af37bf0
parentc45de33c67a7273fc87a654696d15010df0f5c6d (diff)
downloadguix-f859a8d3c33cc275f41d983bfeff2a21a9f88f1b.tar.gz
* While waiting for a lock, print a sign of life every 5 minutes.
  This prevents remote builders from being killed by the
  `max-silent-time' inactivity monitor while they are waiting for a
  long garbage collection to finish.  This happens fairly often in the
  Hydra build farm.

-rw-r--r--src/libmain/shared.cc13
-rw-r--r--src/libstore/pathlocks.cc14
-rw-r--r--src/libstore/pathlocks.hh3
3 files changed, 27 insertions, 3 deletions
diff --git a/src/libmain/shared.cc b/src/libmain/shared.cc
index d9cf9a8626..d48e2ad696 100644
--- a/src/libmain/shared.cc
+++ b/src/libmain/shared.cc
@@ -31,6 +31,11 @@ static void sigintHandler(int signo)
 }
 
 
+static void sigalrmHandler(int signo)
+{
+}
+
+
 Path makeRootName(const Path & gcRoot, int & counter)
 {
     counter++;
@@ -160,6 +165,14 @@ static void initAndRun(int argc, char * * argv)
     if (sigaction(SIGPIPE, &act, 0))
         throw SysError("ignoring SIGPIPE");
 
+    /* Catch SIGALRM with an empty handler (we just need it to get an
+       EINTR from blocking system calls). */
+    act.sa_handler = sigalrmHandler;
+    sigfillset(&act.sa_mask);
+    act.sa_flags = 0;
+    if (sigaction(SIGALRM, &act, 0))
+        throw SysError("installing handler for SIGALRM");
+
     /* Reset SIGCHLD to its default. */
     act.sa_handler = SIG_DFL;
     act.sa_flags = 0;
diff --git a/src/libstore/pathlocks.cc b/src/libstore/pathlocks.cc
index d8290815c4..fe872ceede 100644
--- a/src/libstore/pathlocks.cc
+++ b/src/libstore/pathlocks.cc
@@ -37,7 +37,8 @@ void deleteLockFile(const Path & path, int fd)
 }
 
 
-bool lockFile(int fd, LockType lockType, bool wait)
+bool lockFile(int fd, LockType lockType, bool wait,
+    unsigned int progressInterval)
 {
     struct flock lock;
     if (lockType == ltRead) lock.l_type = F_RDLCK;
@@ -49,11 +50,20 @@ bool lockFile(int fd, LockType lockType, bool wait)
     lock.l_len = 0; /* entire file */
 
     if (wait) {
-        while (fcntl(fd, F_SETLKW, &lock) != 0) {
+        /* Wait until we acquire the lock.  If `progressInterval' is
+           non-zero, when print a message every `progressInterval'
+           seconds.  This is mostly to make sure that remote builders
+           aren't killed due to the `max-silent-time' inactivity
+           monitor while waiting for the garbage collector lock. */
+        while (1) {
+            if (progressInterval) alarm(progressInterval);
+            if (fcntl(fd, F_SETLKW, &lock) == 0) break;
             checkInterrupt();
             if (errno != EINTR)
                 throw SysError(format("acquiring/releasing lock"));
+            if (progressInterval) printMsg(lvlError, "still waiting for lock...");
         }
+        alarm(0);
     } else {
         while (fcntl(fd, F_SETLK, &lock) != 0) {
             checkInterrupt();
diff --git a/src/libstore/pathlocks.hh b/src/libstore/pathlocks.hh
index 57ca1584a6..8c6ac6a03f 100644
--- a/src/libstore/pathlocks.hh
+++ b/src/libstore/pathlocks.hh
@@ -17,7 +17,8 @@ void deleteLockFile(const Path & path, int fd);
 
 enum LockType { ltRead, ltWrite, ltNone };
 
-bool lockFile(int fd, LockType lockType, bool wait);
+bool lockFile(int fd, LockType lockType, bool wait,
+    unsigned int progressInterval = 300);
 
 
 class PathLocks