summary refs log tree commit diff
path: root/src/libstore
diff options
context:
space:
mode:
authorEelco Dolstra <eelco.dolstra@logicblox.com>2012-12-20 17:32:15 +0100
committerEelco Dolstra <eelco.dolstra@logicblox.com>2012-12-20 17:32:15 +0100
commit06f62defe640517267a6a16dd222076c822f3123 (patch)
tree61e890a69c3cde04d2f354702470231e1b7e18ff /src/libstore
parent9c29a2ed35d884cda182cea74aee2a7ee614de93 (diff)
downloadguix-06f62defe640517267a6a16dd222076c822f3123.tar.gz
Yet another rewrite of the garbage collector
But this time it's *obviously* correct!  No more segfaults due to
infinite recursions for sure, etc.

Also, move directories to /nix/store/trash instead of renaming them to
/nix/store/bla-gc-<pid>.  Then we can just delete /nix/store/trash at
the end.
Diffstat (limited to 'src/libstore')
-rw-r--r--src/libstore/gc.cc263
-rw-r--r--src/libstore/local-store.hh6
2 files changed, 138 insertions, 131 deletions
diff --git a/src/libstore/gc.cc b/src/libstore/gc.cc
index fbf842350c..4385e4456f 100644
--- a/src/libstore/gc.cc
+++ b/src/libstore/gc.cc
@@ -396,24 +396,17 @@ struct LocalStore::GCState
     GCResults & results;
     PathSet roots;
     PathSet tempRoots;
-    PathSet deleted;
-    PathSet live;
-    PathSet busy;
-    PathSet invalidated;
+    PathSet dead;
+    PathSet alive;
     bool gcKeepOutputs;
     bool gcKeepDerivations;
     unsigned long long bytesInvalidated;
+    Path trashDir;
+    bool shouldDelete;
     GCState(GCResults & results_) : results(results_), bytesInvalidated(0) { }
 };
 
 
-static bool shouldDelete(GCOptions::GCAction action)
-{
-    return action == GCOptions::gcDeleteDead
-        || action == GCOptions::gcDeleteSpecific;
-}
-
-
 bool LocalStore::isActiveTempFile(const GCState & state,
     const Path & path, const string & suffix)
 {
@@ -424,152 +417,148 @@ bool LocalStore::isActiveTempFile(const GCState & state,
 
 void LocalStore::deleteGarbage(GCState & state, const Path & path)
 {
-    printMsg(lvlInfo, format("deleting `%1%'") % path);
     unsigned long long bytesFreed;
     deletePathWrapped(path, bytesFreed);
     state.results.bytesFreed += bytesFreed;
 }
 
 
-bool LocalStore::tryToDelete(GCState & state, const Path & path)
+void LocalStore::deletePathRecursive(GCState & state, const Path & path)
 {
     checkInterrupt();
 
-    if (path == linksDir) return true;
+    unsigned long long size = 0;
+
+    if (isValidPath(path)) {
+        PathSet referrers;
+        queryReferrers(path, referrers);
+        foreach (PathSet::iterator, i, referrers)
+            if (*i != path) deletePathRecursive(state, *i);
+        size = queryPathInfo(path).narSize;
+        invalidatePathChecked(path);
+    }
 
     struct stat st;
     if (lstat(path.c_str(), &st)) {
-        if (errno == ENOENT) return true;
+        if (errno == ENOENT) return;
         throw SysError(format("getting status of %1%") % path);
     }
 
-    if (state.deleted.find(path) != state.deleted.end()) return true;
-    if (state.live.find(path) != state.live.end()) return false;
+    printMsg(lvlInfo, format("deleting `%1%'") % path);
 
-    startNest(nest, lvlDebug, format("considering whether to delete `%1%'") % path);
+    /* If the path is not a regular file or symlink, move it to the
+       trash directory.  The move is to ensure that later (when we're
+       not holding the global GC lock) we can delete the path without
+       being afraid that the path has become alive again.  Otherwise
+       delete it right away. */
+    if (S_ISDIR(st.st_mode)) {
+        // Estimate the amount freed using the narSize field.  FIXME:
+        // if the path was not valid, need to determine the actual
+        // size.
+        state.bytesInvalidated += size;
+        makeMutable(path.c_str());
+        // Mac OS X cannot rename directories if they are read-only.
+        if (chmod(path.c_str(), st.st_mode | S_IWUSR) == -1)
+            throw SysError(format("making `%1%' writable") % path);
+        Path tmp = state.trashDir + "/" + baseNameOf(path);
+        if (rename(path.c_str(), tmp.c_str()))
+            throw SysError(format("unable to rename `%1%' to `%2%'") % path % tmp);
+    } else
+        deleteGarbage(state, path);
+
+    if (state.results.bytesFreed + state.bytesInvalidated > state.options.maxFreed) {
+        printMsg(lvlInfo, format("deleted or invalidated more than %1% bytes; stopping") % state.options.maxFreed);
+        throw GCLimitReached();
+    }
+}
 
-    /* If gc-keep-outputs and gc-keep-derivations are both set, we can
-       have cycles in the liveness graph, so we need to treat such
-       strongly connected components as a single unit (‘paths’).  That
-       is, we can delete the elements of ‘paths’ only if all referrers
-       of ‘paths’ are garbage. */
-    PathSet paths, referrers;
-    Paths pathsSorted;
 
-    if (isValidPath(path)) {
+bool LocalStore::canReachRoot(GCState & state, PathSet & visited, const Path & path)
+{
+    if (visited.find(path) != visited.end()) return false;
 
-        /* Add derivers and outputs of ‘path’ to ‘paths’. */
-        PathSet todo;
-        todo.insert(path);
-        while (!todo.empty()) {
-            Path p = *todo.begin();
-            assertStorePath(p);
-            todo.erase(p);
-            if (paths.find(p) != paths.end()) continue;
-            paths.insert(p);
-            /* If gc-keep-derivations is set and this is a derivation,
-               then don't delete the derivation if any of the outputs
-               are live. */
-            if (state.gcKeepDerivations && isDerivation(p)) {
-                PathSet outputs = queryDerivationOutputs(p);
-                foreach (PathSet::iterator, i, outputs)
-                    if (isValidPath(*i) && queryDeriver(*i) == p) todo.insert(*i);
-            }
-            /* If gc-keep-outputs is set, then don't delete this path
-               if there are derivers of this path that are not
-               garbage. */
-            if (state.gcKeepOutputs) {
-                PathSet derivers = queryValidDerivers(p);
-                foreach (PathSet::iterator, i, derivers) todo.insert(*i);
-            }
-        }
+    if (state.alive.find(path) != state.alive.end()) {
+        return true;
     }
 
-    else {
-        /* A lock file belonging to a path that we're building right
-           now isn't garbage. */
-        if (isActiveTempFile(state, path, ".lock")) return false;
-
-        /* Don't delete .chroot directories for derivations that are
-           currently being built. */
-        if (isActiveTempFile(state, path, ".chroot")) return false;
+    if (state.dead.find(path) != state.dead.end()) {
+        return false;
+    }
 
-        paths.insert(path);
+    if (state.roots.find(path) != state.roots.end()) {
+        printMsg(lvlDebug, format("cannot delete `%1%' because it's a root") % path);
+        state.alive.insert(path);
+        return true;
     }
 
-    /* Check if any path in ‘paths’ is a root. */
-    foreach (PathSet::iterator, i, paths)
-        if (state.roots.find(*i) != state.roots.end()) {
-            printMsg(lvlDebug, format("cannot delete `%1%' because it's a root") % *i);
-            goto isLive;
-        }
+    visited.insert(path);
 
-    /* Recursively try to delete the referrers of this strongly
-       connected component.  If any referrer can't be deleted, then
-       these paths can't be deleted either. */
-    foreach (PathSet::iterator, i, paths)
-        if (isValidPath(*i)) queryReferrers(*i, referrers);
+    if (!isValidPath(path)) return false;
 
-    foreach (PathSet::iterator, i, referrers)
-        if (paths.find(*i) == paths.end() && !tryToDelete(state, *i)) {
-            printMsg(lvlDebug, format("cannot delete `%1%' because it has live referrers") % *i);
-            goto isLive;
-        }
+    PathSet incoming;
 
-    /* The paths are garbage, so delete them. */
-    pathsSorted = topoSortPaths(*this, paths);
-    foreach (Paths::iterator, i, pathsSorted) {
-        if (shouldDelete(state.options.action)) {
-
-            /* If it's a valid path that's not a regular file or
-               symlink, invalidate it, rename it, and schedule it for
-               deletion.  The renaming is to ensure that later (when
-               we're not holding the global GC lock) we can delete the
-               path without being afraid that the path has become
-               alive again.  Otherwise delete it right away. */
-            if (isValidPath(*i)) {
-                if (S_ISDIR(st.st_mode)) {
-                    printMsg(lvlInfo, format("invalidating `%1%'") % *i);
-                    // Estimate the amount freed using the narSize field.
-                    state.bytesInvalidated += queryPathInfo(*i).narSize;
-                    invalidatePathChecked(*i);
-                    makeMutable(i->c_str());
-                    // Mac OS X cannot rename directories if they are read-only.
-                    if (chmod(i->c_str(), st.st_mode | S_IWUSR) == -1)
-                        throw SysError(format("making `%1%' writable") % *i);
-                    Path tmp = (format("%1%-gc-%2%") % *i % getpid()).str();
-                    if (rename(i->c_str(), tmp.c_str()))
-                        throw SysError(format("unable to rename `%1%' to `%2%'") % *i % tmp);
-                    state.invalidated.insert(tmp);
-                } else {
-                    invalidatePathChecked(*i);
-                    deleteGarbage(state, *i);
-                }
-            } else
-                deleteGarbage(state, *i);
+    /* Don't delete this path if any of its referrers are alive. */
+    queryReferrers(path, incoming);
 
-            if (state.results.bytesFreed + state.bytesInvalidated > state.options.maxFreed) {
-                printMsg(lvlInfo, format("deleted or invalidated more than %1% bytes; stopping") % state.options.maxFreed);
-                throw GCLimitReached();
+    /* If gc-keep-derivations is set and this is a derivation, then
+       don't delete the derivation if any of the outputs are alive. */
+    if (state.gcKeepDerivations && isDerivation(path)) {
+        PathSet outputs = queryDerivationOutputs(path);
+        foreach (PathSet::iterator, i, outputs)
+            if (isValidPath(*i) && queryDeriver(*i) == path)
+                incoming.insert(*i);
+    }
+
+    /* If gc-keep-outputs is set, then don't delete this path if there
+       are derivers of this path that are not garbage. */
+    if (state.gcKeepOutputs) {
+        PathSet derivers = queryValidDerivers(path);
+        foreach (PathSet::iterator, i, derivers)
+            incoming.insert(*i);
+    }
+
+    foreach (PathSet::iterator, i, incoming)
+        if (*i != path)
+            if (canReachRoot(state, visited, *i)) {
+                state.alive.insert(path);
+                return true;
             }
 
-        } else
-            printMsg(lvlTalkative, format("would delete `%1%'") % *i);
+    return false;
+}
 
-        state.deleted.insert(*i);
-        if (state.options.action != GCOptions::gcReturnLive)
-            state.results.paths.insert(*i);
-    }
 
-    return true;
+void LocalStore::tryToDelete(GCState & state, const Path & path)
+{
+    checkInterrupt();
 
- isLive:
-    foreach (PathSet::iterator, i, paths) {
-        state.live.insert(*i);
-        if (state.options.action == GCOptions::gcReturnLive)
-            state.results.paths.insert(*i);
+    if (path == linksDir || path == state.trashDir) return;
+
+    startNest(nest, lvlDebug, format("considering whether to delete `%1%'") % path);
+
+    if (!isValidPath(path)) {
+        /* A lock file belonging to a path that we're building right
+           now isn't garbage. */
+        if (isActiveTempFile(state, path, ".lock")) return;
+
+        /* Don't delete .chroot directories for derivations that are
+           currently being built. */
+        if (isActiveTempFile(state, path, ".chroot")) return;
+    }
+
+    PathSet visited;
+
+    if (canReachRoot(state, visited, path)) {
+        printMsg(lvlDebug, format("cannot delete `%1%' because it's still reachable") % path);
+    } else {
+        /* No path we visited was a root, so everything is garbage.
+           But we only delete ‘path’ and its referrers here so that
+           ‘nix-store --delete’ doesn't have the unexpected effect of
+           recursing into derivations and outputs. */
+        state.dead.insert(visited.begin(), visited.end());
+        if (state.shouldDelete)
+            deletePathRecursive(state, path);
     }
-    return false;
 }
 
 
@@ -625,7 +614,7 @@ void LocalStore::collectGarbage(const GCOptions & options, GCResults & results)
 {
     GCState state(results);
     state.options = options;
-
+    state.trashDir = settings.nixStore + "/trash";
     state.gcKeepOutputs = settings.gcKeepOutputs;
     state.gcKeepDerivations = settings.gcKeepDerivations;
 
@@ -638,6 +627,8 @@ void LocalStore::collectGarbage(const GCOptions & options, GCResults & results)
         state.gcKeepDerivations = false;
     }
 
+    state.shouldDelete = options.action == GCOptions::gcDeleteDead || options.action == GCOptions::gcDeleteSpecific;
+
     /* Acquire the global GC root.  This prevents
        a) New roots from being added.
        b) Processes from creating new temporary root files. */
@@ -668,6 +659,8 @@ void LocalStore::collectGarbage(const GCOptions & options, GCResults & results)
        increase, since we hold locks on everything.  So everything
        that is not reachable from `roots'. */
 
+    if (state.shouldDelete) createDirs(state.trashDir);
+
     /* Now either delete all garbage paths, or just the specified
        paths (for gcDeleteSpecific). */
 
@@ -675,13 +668,14 @@ void LocalStore::collectGarbage(const GCOptions & options, GCResults & results)
 
         foreach (PathSet::iterator, i, options.pathsToDelete) {
             assertStorePath(*i);
-            if (!tryToDelete(state, *i))
+            tryToDelete(state, *i);
+            if (state.dead.find(*i) == state.dead.end())
                 throw Error(format("cannot delete path `%1%' since it is still alive") % *i);
         }
 
     } else if (options.maxFreed > 0) {
 
-        if (shouldDelete(state.options.action))
+        if (state.shouldDelete)
             printMsg(lvlError, format("deleting garbage..."));
         else
             printMsg(lvlError, format("determining live/dead paths..."));
@@ -727,13 +721,22 @@ void LocalStore::collectGarbage(const GCOptions & options, GCResults & results)
         }
     }
 
+    if (state.options.action == GCOptions::gcReturnLive) {
+        state.results.paths = state.alive;
+        return;
+    }
+
+    if (state.options.action == GCOptions::gcReturnDead) {
+        state.results.paths = state.dead;
+        return;
+    }
+
     /* Allow other processes to add to the store from here on. */
     fdGCLock.close();
 
-    /* Delete the invalidated paths now that the lock has been
-       released. */
-    foreach (PathSet::iterator, i, state.invalidated)
-        deleteGarbage(state, *i);
+    /* Delete the trash directory. */
+    printMsg(lvlInfo, format("deleting `%1%'") % state.trashDir);
+    deleteGarbage(state, state.trashDir);
 
     /* Clean up the links directory. */
     if (options.action == GCOptions::gcDeleteDead || options.action == GCOptions::gcDeleteSpecific) {
diff --git a/src/libstore/local-store.hh b/src/libstore/local-store.hh
index ebf3f6e2b4..bb5a9143ce 100644
--- a/src/libstore/local-store.hh
+++ b/src/libstore/local-store.hh
@@ -276,7 +276,11 @@ private:
 
     void deleteGarbage(GCState & state, const Path & path);
 
-    bool tryToDelete(GCState & state, const Path & path);
+    void tryToDelete(GCState & state, const Path & path);
+
+    bool canReachRoot(GCState & state, PathSet & visited, const Path & path);
+
+    void deletePathRecursive(GCState & state, const Path & path);
 
     bool isActiveTempFile(const GCState & state,
         const Path & path, const string & suffix);