From 259c79963f12d18b42d5455babf69b8ffdbb6b08 Mon Sep 17 00:00:00 2001 From: Matthew Heon Date: Thu, 2 Jun 2022 14:15:06 -0400 Subject: Improve robustness of `podman system reset` Firstly, reset is now managed by the runtime itself as a part of initialization. This ensures that it can be used even with runtimes that would otherwise fail to be created - most notably, when the user has changed a core path (runroot/root/tmpdir/staticdir). Secondly, we now attempt a best-effort removal even if the store completely fails to be configured. Third, we now hold the alive lock for the entire reset operation. This ensures that no other Podman process can start while we are running a system reset, and removes any possibility of a race where a user tries to create containers or pull images while we are trying to perform a reset. [NO NEW TESTS NEEDED] we do not test reset last I checked. Fixes #9075 Signed-off-by: Matthew Heon --- libpod/reset.go | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) (limited to 'libpod/reset.go') diff --git a/libpod/reset.go b/libpod/reset.go index 28d0ee3f6..30eab50fb 100644 --- a/libpod/reset.go +++ b/libpod/reset.go @@ -17,8 +17,78 @@ import ( "github.com/sirupsen/logrus" ) +// removeAllDirs removes all Podman storage directories. It is intended to be +// used as a backup for reset() when that function cannot be used due to +// failures in initializing libpod. +// It does not expect that all the directories match what is in use by Podman, +// as this is a common failure point for `system reset`. As such, our ability to +// interface with containers and pods is somewhat limited. +// This function assumes that we do not have a working c/storage store. +func (r *Runtime) removeAllDirs() error { + var lastErr error + + // Grab the runtime alive lock. + // This ensures that no other Podman process can run while we are doing + // a reset, so no race conditions with containers/pods/etc being created + // while we are resetting storage. + // TODO: maybe want a helper for getting the path? This is duped from + // runtime.go + runtimeAliveLock := filepath.Join(r.config.Engine.TmpDir, "alive.lck") + aliveLock, err := storage.GetLockfile(runtimeAliveLock) + if err != nil { + logrus.Errorf("Lock runtime alive lock %s: %v", runtimeAliveLock, err) + } else { + aliveLock.Lock() + defer aliveLock.Unlock() + } + + // We do not have a store - so we can't really try and remove containers + // or pods or volumes... + // Try and remove the directories, in hopes that they are unmounted. + // This is likely to fail but it's the best we can do. + + // Volume path + if err := os.RemoveAll(r.config.Engine.VolumePath); err != nil { + lastErr = errors.Wrapf(err, "removing volume path") + } + + // Tmpdir + if err := os.RemoveAll(r.config.Engine.TmpDir); err != nil { + if lastErr != nil { + logrus.Errorf("Reset: %v", lastErr) + } + lastErr = errors.Wrapf(err, "removing tmp dir") + } + + // Runroot + if err := os.RemoveAll(r.storageConfig.RunRoot); err != nil { + if lastErr != nil { + logrus.Errorf("Reset: %v", lastErr) + } + lastErr = errors.Wrapf(err, "removing run root") + } + + // Static dir + if err := os.RemoveAll(r.config.Engine.StaticDir); err != nil { + if lastErr != nil { + logrus.Errorf("Reset: %v", lastErr) + } + lastErr = errors.Wrapf(err, "removing static dir") + } + + // Graph root + if err := os.RemoveAll(r.storageConfig.GraphRoot); err != nil { + if lastErr != nil { + logrus.Errorf("Reset: %v", lastErr) + } + lastErr = errors.Wrapf(err, "removing graph root") + } + + return lastErr +} + // Reset removes all storage -func (r *Runtime) Reset(ctx context.Context) error { +func (r *Runtime) reset(ctx context.Context) error { var timeout *uint pods, err := r.GetAllPods() if err != nil { -- cgit v1.2.3-54-g00ecf