From 259c79963f12d18b42d5455babf69b8ffdbb6b08 Mon Sep 17 00:00:00 2001 From: Matthew Heon Date: Thu, 2 Jun 2022 14:15:06 -0400 Subject: Improve robustness of `podman system reset` Firstly, reset is now managed by the runtime itself as a part of initialization. This ensures that it can be used even with runtimes that would otherwise fail to be created - most notably, when the user has changed a core path (runroot/root/tmpdir/staticdir). Secondly, we now attempt a best-effort removal even if the store completely fails to be configured. Third, we now hold the alive lock for the entire reset operation. This ensures that no other Podman process can start while we are running a system reset, and removes any possibility of a race where a user tries to create containers or pull images while we are trying to perform a reset. [NO NEW TESTS NEEDED] we do not test reset last I checked. Fixes #9075 Signed-off-by: Matthew Heon --- libpod/options.go | 15 ++++++++++++ libpod/reset.go | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- libpod/runtime.go | 60 ++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 144 insertions(+), 3 deletions(-) (limited to 'libpod') diff --git a/libpod/options.go b/libpod/options.go index a02c05537..4b6803c3f 100644 --- a/libpod/options.go +++ b/libpod/options.go @@ -435,6 +435,21 @@ func WithDefaultInfraCommand(cmd string) RuntimeOption { } } +// WithReset instructs libpod to reset all storage to factory defaults. +// All containers, pods, volumes, images, and networks will be removed. +// All directories created by Libpod will be removed. +func WithReset() RuntimeOption { + return func(rt *Runtime) error { + if rt.valid { + return define.ErrRuntimeFinalized + } + + rt.doReset = true + + return nil + } +} + // WithRenumber instructs libpod to perform a lock renumbering while // initializing. This will handle migrations from early versions of libpod with // file locks to newer versions with SHM locking, as well as changes in the diff --git a/libpod/reset.go b/libpod/reset.go index 28d0ee3f6..30eab50fb 100644 --- a/libpod/reset.go +++ b/libpod/reset.go @@ -17,8 +17,78 @@ import ( "github.com/sirupsen/logrus" ) +// removeAllDirs removes all Podman storage directories. It is intended to be +// used as a backup for reset() when that function cannot be used due to +// failures in initializing libpod. +// It does not expect that all the directories match what is in use by Podman, +// as this is a common failure point for `system reset`. As such, our ability to +// interface with containers and pods is somewhat limited. +// This function assumes that we do not have a working c/storage store. +func (r *Runtime) removeAllDirs() error { + var lastErr error + + // Grab the runtime alive lock. + // This ensures that no other Podman process can run while we are doing + // a reset, so no race conditions with containers/pods/etc being created + // while we are resetting storage. + // TODO: maybe want a helper for getting the path? This is duped from + // runtime.go + runtimeAliveLock := filepath.Join(r.config.Engine.TmpDir, "alive.lck") + aliveLock, err := storage.GetLockfile(runtimeAliveLock) + if err != nil { + logrus.Errorf("Lock runtime alive lock %s: %v", runtimeAliveLock, err) + } else { + aliveLock.Lock() + defer aliveLock.Unlock() + } + + // We do not have a store - so we can't really try and remove containers + // or pods or volumes... + // Try and remove the directories, in hopes that they are unmounted. + // This is likely to fail but it's the best we can do. + + // Volume path + if err := os.RemoveAll(r.config.Engine.VolumePath); err != nil { + lastErr = errors.Wrapf(err, "removing volume path") + } + + // Tmpdir + if err := os.RemoveAll(r.config.Engine.TmpDir); err != nil { + if lastErr != nil { + logrus.Errorf("Reset: %v", lastErr) + } + lastErr = errors.Wrapf(err, "removing tmp dir") + } + + // Runroot + if err := os.RemoveAll(r.storageConfig.RunRoot); err != nil { + if lastErr != nil { + logrus.Errorf("Reset: %v", lastErr) + } + lastErr = errors.Wrapf(err, "removing run root") + } + + // Static dir + if err := os.RemoveAll(r.config.Engine.StaticDir); err != nil { + if lastErr != nil { + logrus.Errorf("Reset: %v", lastErr) + } + lastErr = errors.Wrapf(err, "removing static dir") + } + + // Graph root + if err := os.RemoveAll(r.storageConfig.GraphRoot); err != nil { + if lastErr != nil { + logrus.Errorf("Reset: %v", lastErr) + } + lastErr = errors.Wrapf(err, "removing graph root") + } + + return lastErr +} + // Reset removes all storage -func (r *Runtime) Reset(ctx context.Context) error { +func (r *Runtime) reset(ctx context.Context) error { var timeout *uint pods, err := r.GetAllPods() if err != nil { diff --git a/libpod/runtime.go b/libpod/runtime.go index e268c2d17..10c6d21c5 100644 --- a/libpod/runtime.go +++ b/libpod/runtime.go @@ -96,6 +96,10 @@ type Runtime struct { // This bool is just needed so that we can set it for netavark interface. syslog bool + // doReset indicates that the runtime should perform a system reset. + // All Podman files will be removed. + doReset bool + // doRenumber indicates that the runtime should perform a lock renumber // during initialization. // Once the runtime has been initialized and returned, this variable is @@ -235,6 +239,11 @@ func newRuntimeFromConfig(conf *config.Config, options ...RuntimeOption) (*Runti runtime.config.CheckCgroupsAndAdjustConfig() + // If resetting storage, do *not* return a runtime. + if runtime.doReset { + return nil, nil + } + return runtime, nil } @@ -305,6 +314,13 @@ func makeRuntime(runtime *Runtime) (retErr error) { } runtime.conmonPath = cPath + if runtime.noStore && runtime.doReset { + return errors.Wrapf(define.ErrInvalidArg, "cannot perform system reset if runtime is not creating a store") + } + if runtime.doReset && runtime.doRenumber { + return errors.Wrapf(define.ErrInvalidArg, "cannot perform system reset while renumbering locks") + } + // Make the static files directory if it does not exist if err := os.MkdirAll(runtime.config.Engine.StaticDir, 0700); err != nil { // The directory is allowed to exist @@ -339,6 +355,20 @@ func makeRuntime(runtime *Runtime) (retErr error) { // Grab config from the database so we can reset some defaults dbConfig, err := runtime.state.GetDBConfig() if err != nil { + if runtime.doReset { + // We can at least delete the DB and the static files + // directory. + // Can't safely touch anything else because we aren't + // sure of other directories. + if err := runtime.state.Close(); err != nil { + logrus.Errorf("Closing database connection: %v", err) + } else { + if err := os.RemoveAll(runtime.config.Engine.StaticDir); err != nil { + logrus.Errorf("Removing static files directory %v: %v", runtime.config.Engine.StaticDir, err) + } + } + } + return errors.Wrapf(err, "error retrieving runtime configuration from database") } @@ -372,7 +402,13 @@ func makeRuntime(runtime *Runtime) (retErr error) { // Validate our config against the database, now that we've set our // final storage configuration if err := runtime.state.ValidateDBConfig(runtime); err != nil { - return err + // If we are performing a storage reset: continue on with a + // warning. Otherwise we can't `system reset` after a change to + // the core paths. + if !runtime.doReset { + return err + } + logrus.Errorf("Runtime paths differ from those stored in database, storage reset may not remove all files") } if err := runtime.state.SetNamespace(runtime.config.Engine.Namespace); err != nil { @@ -394,6 +430,14 @@ func makeRuntime(runtime *Runtime) (retErr error) { } else if runtime.noStore { logrus.Debug("No store required. Not opening container store.") } else if err := runtime.configureStore(); err != nil { + // Make a best-effort attempt to clean up if performing a + // storage reset. + if runtime.doReset { + if err := runtime.removeAllDirs(); err != nil { + logrus.Errorf("Removing libpod directories: %v", err) + } + } + return err } defer func() { @@ -575,6 +619,18 @@ func makeRuntime(runtime *Runtime) (retErr error) { return err } + // If we're resetting storage, do it now. + // We will not return a valid runtime. + // TODO: Plumb this context out so it can be set. + if runtime.doReset { + // Mark the runtime as valid, so normal functionality "mostly" + // works and we can use regular functions to remove + // ctrs/pods/etc + runtime.valid = true + + return runtime.reset(context.Background()) + } + // If we're renumbering locks, do it now. // It breaks out of normal runtime init, and will not return a valid // runtime. @@ -818,7 +874,7 @@ func (r *Runtime) DeferredShutdown(force bool) { // still containers running or mounted func (r *Runtime) Shutdown(force bool) error { if !r.valid { - return define.ErrRuntimeStopped + return nil } if r.workerChannel != nil { -- cgit v1.2.3-54-g00ecf