diff options
author | Matthew Heon <mheon@redhat.com> | 2022-06-02 14:15:06 -0400 |
---|---|---|
committer | Matthew Heon <mheon@redhat.com> | 2022-06-03 12:54:08 -0400 |
commit | 259c79963f12d18b42d5455babf69b8ffdbb6b08 (patch) | |
tree | 48a52da8cfe34022b9284d3018916a14363b8405 | |
parent | 2039445763f418720b08983b40949480e8754f9a (diff) | |
download | podman-259c79963f12d18b42d5455babf69b8ffdbb6b08.tar.gz podman-259c79963f12d18b42d5455babf69b8ffdbb6b08.tar.bz2 podman-259c79963f12d18b42d5455babf69b8ffdbb6b08.zip |
Improve robustness of `podman system reset`
Firstly, reset is now managed by the runtime itself as a part of
initialization. This ensures that it can be used even with
runtimes that would otherwise fail to be created - most notably,
when the user has changed a core path
(runroot/root/tmpdir/staticdir).
Secondly, we now attempt a best-effort removal even if the store
completely fails to be configured.
Third, we now hold the alive lock for the entire reset operation.
This ensures that no other Podman process can start while we are
running a system reset, and removes any possibility of a race
where a user tries to create containers or pull images while we
are trying to perform a reset.
[NO NEW TESTS NEEDED] we do not test reset last I checked.
Fixes #9075
Signed-off-by: Matthew Heon <mheon@redhat.com>
-rw-r--r-- | cmd/podman/system/reset.go | 14 | ||||
-rw-r--r-- | libpod/options.go | 15 | ||||
-rw-r--r-- | libpod/reset.go | 72 | ||||
-rw-r--r-- | libpod/runtime.go | 60 | ||||
-rw-r--r-- | pkg/domain/infra/abi/system.go | 2 | ||||
-rw-r--r-- | pkg/domain/infra/runtime_abi.go | 2 | ||||
-rw-r--r-- | pkg/domain/infra/runtime_libpod.go | 21 |
7 files changed, 170 insertions, 16 deletions
diff --git a/cmd/podman/system/reset.go b/cmd/podman/system/reset.go index 176573bf6..20f15a34f 100644 --- a/cmd/podman/system/reset.go +++ b/cmd/podman/system/reset.go @@ -91,18 +91,10 @@ func reset(cmd *cobra.Command, args []string) { registry.ContainerEngine().Shutdown(registry.Context()) registry.ImageEngine().Shutdown(registry.Context()) - engine, err := infra.NewSystemEngine(entities.ResetMode, registry.PodmanConfig()) - if err != nil { - logrus.Error(err) - os.Exit(define.ExecErrorCodeGeneric) - } - defer engine.Shutdown(registry.Context()) - - if err := engine.Reset(registry.Context()); err != nil { + // Do not try to shut the engine down, as a Reset engine is not valid + // after its creation. + if _, err := infra.NewSystemEngine(entities.ResetMode, registry.PodmanConfig()); err != nil { logrus.Error(err) - // FIXME change this to return the error like other commands - // defer will never run on os.Exit() - //nolint:gocritic os.Exit(define.ExecErrorCodeGeneric) } diff --git a/libpod/options.go b/libpod/options.go index a02c05537..4b6803c3f 100644 --- a/libpod/options.go +++ b/libpod/options.go @@ -435,6 +435,21 @@ func WithDefaultInfraCommand(cmd string) RuntimeOption { } } +// WithReset instructs libpod to reset all storage to factory defaults. +// All containers, pods, volumes, images, and networks will be removed. +// All directories created by Libpod will be removed. +func WithReset() RuntimeOption { + return func(rt *Runtime) error { + if rt.valid { + return define.ErrRuntimeFinalized + } + + rt.doReset = true + + return nil + } +} + // WithRenumber instructs libpod to perform a lock renumbering while // initializing. This will handle migrations from early versions of libpod with // file locks to newer versions with SHM locking, as well as changes in the diff --git a/libpod/reset.go b/libpod/reset.go index 28d0ee3f6..30eab50fb 100644 --- a/libpod/reset.go +++ b/libpod/reset.go @@ -17,8 +17,78 @@ import ( "github.com/sirupsen/logrus" ) +// removeAllDirs removes all Podman storage directories. It is intended to be +// used as a backup for reset() when that function cannot be used due to +// failures in initializing libpod. +// It does not expect that all the directories match what is in use by Podman, +// as this is a common failure point for `system reset`. As such, our ability to +// interface with containers and pods is somewhat limited. +// This function assumes that we do not have a working c/storage store. +func (r *Runtime) removeAllDirs() error { + var lastErr error + + // Grab the runtime alive lock. + // This ensures that no other Podman process can run while we are doing + // a reset, so no race conditions with containers/pods/etc being created + // while we are resetting storage. + // TODO: maybe want a helper for getting the path? This is duped from + // runtime.go + runtimeAliveLock := filepath.Join(r.config.Engine.TmpDir, "alive.lck") + aliveLock, err := storage.GetLockfile(runtimeAliveLock) + if err != nil { + logrus.Errorf("Lock runtime alive lock %s: %v", runtimeAliveLock, err) + } else { + aliveLock.Lock() + defer aliveLock.Unlock() + } + + // We do not have a store - so we can't really try and remove containers + // or pods or volumes... + // Try and remove the directories, in hopes that they are unmounted. + // This is likely to fail but it's the best we can do. + + // Volume path + if err := os.RemoveAll(r.config.Engine.VolumePath); err != nil { + lastErr = errors.Wrapf(err, "removing volume path") + } + + // Tmpdir + if err := os.RemoveAll(r.config.Engine.TmpDir); err != nil { + if lastErr != nil { + logrus.Errorf("Reset: %v", lastErr) + } + lastErr = errors.Wrapf(err, "removing tmp dir") + } + + // Runroot + if err := os.RemoveAll(r.storageConfig.RunRoot); err != nil { + if lastErr != nil { + logrus.Errorf("Reset: %v", lastErr) + } + lastErr = errors.Wrapf(err, "removing run root") + } + + // Static dir + if err := os.RemoveAll(r.config.Engine.StaticDir); err != nil { + if lastErr != nil { + logrus.Errorf("Reset: %v", lastErr) + } + lastErr = errors.Wrapf(err, "removing static dir") + } + + // Graph root + if err := os.RemoveAll(r.storageConfig.GraphRoot); err != nil { + if lastErr != nil { + logrus.Errorf("Reset: %v", lastErr) + } + lastErr = errors.Wrapf(err, "removing graph root") + } + + return lastErr +} + // Reset removes all storage -func (r *Runtime) Reset(ctx context.Context) error { +func (r *Runtime) reset(ctx context.Context) error { var timeout *uint pods, err := r.GetAllPods() if err != nil { diff --git a/libpod/runtime.go b/libpod/runtime.go index e268c2d17..10c6d21c5 100644 --- a/libpod/runtime.go +++ b/libpod/runtime.go @@ -96,6 +96,10 @@ type Runtime struct { // This bool is just needed so that we can set it for netavark interface. syslog bool + // doReset indicates that the runtime should perform a system reset. + // All Podman files will be removed. + doReset bool + // doRenumber indicates that the runtime should perform a lock renumber // during initialization. // Once the runtime has been initialized and returned, this variable is @@ -235,6 +239,11 @@ func newRuntimeFromConfig(conf *config.Config, options ...RuntimeOption) (*Runti runtime.config.CheckCgroupsAndAdjustConfig() + // If resetting storage, do *not* return a runtime. + if runtime.doReset { + return nil, nil + } + return runtime, nil } @@ -305,6 +314,13 @@ func makeRuntime(runtime *Runtime) (retErr error) { } runtime.conmonPath = cPath + if runtime.noStore && runtime.doReset { + return errors.Wrapf(define.ErrInvalidArg, "cannot perform system reset if runtime is not creating a store") + } + if runtime.doReset && runtime.doRenumber { + return errors.Wrapf(define.ErrInvalidArg, "cannot perform system reset while renumbering locks") + } + // Make the static files directory if it does not exist if err := os.MkdirAll(runtime.config.Engine.StaticDir, 0700); err != nil { // The directory is allowed to exist @@ -339,6 +355,20 @@ func makeRuntime(runtime *Runtime) (retErr error) { // Grab config from the database so we can reset some defaults dbConfig, err := runtime.state.GetDBConfig() if err != nil { + if runtime.doReset { + // We can at least delete the DB and the static files + // directory. + // Can't safely touch anything else because we aren't + // sure of other directories. + if err := runtime.state.Close(); err != nil { + logrus.Errorf("Closing database connection: %v", err) + } else { + if err := os.RemoveAll(runtime.config.Engine.StaticDir); err != nil { + logrus.Errorf("Removing static files directory %v: %v", runtime.config.Engine.StaticDir, err) + } + } + } + return errors.Wrapf(err, "error retrieving runtime configuration from database") } @@ -372,7 +402,13 @@ func makeRuntime(runtime *Runtime) (retErr error) { // Validate our config against the database, now that we've set our // final storage configuration if err := runtime.state.ValidateDBConfig(runtime); err != nil { - return err + // If we are performing a storage reset: continue on with a + // warning. Otherwise we can't `system reset` after a change to + // the core paths. + if !runtime.doReset { + return err + } + logrus.Errorf("Runtime paths differ from those stored in database, storage reset may not remove all files") } if err := runtime.state.SetNamespace(runtime.config.Engine.Namespace); err != nil { @@ -394,6 +430,14 @@ func makeRuntime(runtime *Runtime) (retErr error) { } else if runtime.noStore { logrus.Debug("No store required. Not opening container store.") } else if err := runtime.configureStore(); err != nil { + // Make a best-effort attempt to clean up if performing a + // storage reset. + if runtime.doReset { + if err := runtime.removeAllDirs(); err != nil { + logrus.Errorf("Removing libpod directories: %v", err) + } + } + return err } defer func() { @@ -575,6 +619,18 @@ func makeRuntime(runtime *Runtime) (retErr error) { return err } + // If we're resetting storage, do it now. + // We will not return a valid runtime. + // TODO: Plumb this context out so it can be set. + if runtime.doReset { + // Mark the runtime as valid, so normal functionality "mostly" + // works and we can use regular functions to remove + // ctrs/pods/etc + runtime.valid = true + + return runtime.reset(context.Background()) + } + // If we're renumbering locks, do it now. // It breaks out of normal runtime init, and will not return a valid // runtime. @@ -818,7 +874,7 @@ func (r *Runtime) DeferredShutdown(force bool) { // still containers running or mounted func (r *Runtime) Shutdown(force bool) error { if !r.valid { - return define.ErrRuntimeStopped + return nil } if r.workerChannel != nil { diff --git a/pkg/domain/infra/abi/system.go b/pkg/domain/infra/abi/system.go index 2ce190464..762f0d79a 100644 --- a/pkg/domain/infra/abi/system.go +++ b/pkg/domain/infra/abi/system.go @@ -328,7 +328,7 @@ func (ic *ContainerEngine) SystemDf(ctx context.Context, options entities.System } func (se *SystemEngine) Reset(ctx context.Context) error { - return se.Libpod.Reset(ctx) + return nil } func (se *SystemEngine) Renumber(ctx context.Context, flags *pflag.FlagSet, config *entities.PodmanConfig) error { diff --git a/pkg/domain/infra/runtime_abi.go b/pkg/domain/infra/runtime_abi.go index 39989c96b..7b5198d2f 100644 --- a/pkg/domain/infra/runtime_abi.go +++ b/pkg/domain/infra/runtime_abi.go @@ -53,7 +53,7 @@ func NewSystemEngine(setup entities.EngineSetup, facts *entities.PodmanConfig) ( case entities.RenumberMode: r, err = GetRuntimeRenumber(context.Background(), facts.FlagSet, facts) case entities.ResetMode: - r, err = GetRuntimeRenumber(context.Background(), facts.FlagSet, facts) + r, err = GetRuntimeReset(context.Background(), facts.FlagSet, facts) case entities.MigrateMode: name, flagErr := facts.FlagSet.GetString("new-runtime") if flagErr != nil { diff --git a/pkg/domain/infra/runtime_libpod.go b/pkg/domain/infra/runtime_libpod.go index ac557e9de..6eeec43fd 100644 --- a/pkg/domain/infra/runtime_libpod.go +++ b/pkg/domain/infra/runtime_libpod.go @@ -37,6 +37,7 @@ type engineOpts struct { migrate bool noStore bool withFDS bool + reset bool config *entities.PodmanConfig } @@ -48,6 +49,7 @@ func GetRuntimeMigrate(ctx context.Context, fs *flag.FlagSet, cfg *entities.Podm migrate: true, noStore: false, withFDS: true, + reset: false, config: cfg, }) } @@ -59,6 +61,7 @@ func GetRuntimeDisableFDs(ctx context.Context, fs *flag.FlagSet, cfg *entities.P migrate: false, noStore: false, withFDS: false, + reset: false, config: cfg, }) } @@ -70,6 +73,7 @@ func GetRuntimeRenumber(ctx context.Context, fs *flag.FlagSet, cfg *entities.Pod migrate: false, noStore: false, withFDS: true, + reset: false, config: cfg, }) } @@ -82,6 +86,7 @@ func GetRuntime(ctx context.Context, flags *flag.FlagSet, cfg *entities.PodmanCo migrate: false, noStore: false, withFDS: true, + reset: false, config: cfg, }) }) @@ -95,6 +100,18 @@ func GetRuntimeNoStore(ctx context.Context, fs *flag.FlagSet, cfg *entities.Podm migrate: false, noStore: true, withFDS: true, + reset: false, + config: cfg, + }) +} + +func GetRuntimeReset(ctx context.Context, fs *flag.FlagSet, cfg *entities.PodmanConfig) (*libpod.Runtime, error) { + return getRuntime(ctx, fs, &engineOpts{ + renumber: false, + migrate: false, + noStore: false, + withFDS: true, + reset: true, config: cfg, }) } @@ -161,6 +178,10 @@ func getRuntime(ctx context.Context, fs *flag.FlagSet, opts *engineOpts) (*libpo } } + if opts.reset { + options = append(options, libpod.WithReset()) + } + if opts.renumber { options = append(options, libpod.WithRenumber()) } |