diff options
author | Matthew Heon <mheon@redhat.com> | 2022-05-23 09:34:37 -0400 |
---|---|---|
committer | Matthew Heon <mheon@redhat.com> | 2022-06-14 13:56:42 -0400 |
commit | ed9ae4cad88a828d3235d126ed31ac9aa9117a13 (patch) | |
tree | 778c3dce7d03f64c14bda1460a527df1480cb3cc /libpod | |
parent | 7c2f8733edae8b1bc55ad9bb1011592869b630df (diff) | |
download | podman-ed9ae4cad88a828d3235d126ed31ac9aa9117a13.tar.gz podman-ed9ae4cad88a828d3235d126ed31ac9aa9117a13.tar.bz2 podman-ed9ae4cad88a828d3235d126ed31ac9aa9117a13.zip |
Instead of erroring, clean up after dangling IDs in DB
For various (mostly legacy) reasons, Podman presently maintains a
unified namespace for pods and containers - IE, we cannot have
both a pod and a container named "test" at the same time. To
implement this, we use a global database table of every pod and
container ID (and another of every pod and container name).
These entries should be added when containers/pods are added, and
removed when containers/pods are removed, with the database's
transactional integrity providing a guarantee that this is
batched with the overall removal and that the DB should remain
sane and consistent no matter what. As such, we treat a dangling
ID as a hard error that stops the use of Podman.
Unfortunately, we have someone run into this last Friday. I'm
still not certain how exactly their DB got into this state, but
without further clarification there, we can consider removing the
error and making Podman instead clean up and remove any dangling
IDs, which should restore Podman to a serviceable state. Drop an
error message if we do this, though, because people should know
that the DB is in a bad state.
[NO NEW TESTS NEEDED] it is deliberately impossible to produce a
configuration that would test this without hex-editing the DB
file.
Signed-off-by: Matthew Heon <mheon@redhat.com>
Diffstat (limited to 'libpod')
-rw-r--r-- | libpod/boltdb_state.go | 36 |
1 files changed, 34 insertions, 2 deletions
diff --git a/libpod/boltdb_state.go b/libpod/boltdb_state.go index 9745121c7..c3db6152a 100644 --- a/libpod/boltdb_state.go +++ b/libpod/boltdb_state.go @@ -162,6 +162,11 @@ func (s *BoltState) Refresh() error { return err } + namesBucket, err := getNamesBucket(tx) + if err != nil { + return err + } + ctrsBucket, err := getCtrBucket(tx) if err != nil { return err @@ -192,6 +197,7 @@ func (s *BoltState) Refresh() error { // PID, mountpoint, and state for all of them // Then save the modified state // Also clear all network namespaces + toRemoveIDs := []string{} err = idBucket.ForEach(func(id, name []byte) error { ctrBkt := ctrsBucket.Bucket(id) if ctrBkt == nil { @@ -199,8 +205,16 @@ func (s *BoltState) Refresh() error { podBkt := podsBucket.Bucket(id) if podBkt == nil { // This is neither a pod nor a container - // Error out on the dangling ID - return errors.Wrapf(define.ErrInternal, "id %s is not a pod or a container", string(id)) + // Something is seriously wrong, but + // continue on and try to clean up the + // state and become consistent. + // Just note what needs to be removed + // for now - ForEach says you shouldn't + // remove things from the table during + // it. + logrus.Errorf("Database issue: dangling ID %s found (not a pod or container) - removing", string(id)) + toRemoveIDs = append(toRemoveIDs, string(id)) + return nil } // Get the state @@ -285,6 +299,24 @@ func (s *BoltState) Refresh() error { return err } + // Remove dangling IDs. + for _, id := range toRemoveIDs { + // Look up the ID to see if we also have a dangling name + // in the DB. + name := idBucket.Get([]byte(id)) + if name != nil { + if testID := namesBucket.Get(name); testID != nil { + logrus.Infof("Found dangling name %s (ID %s) in database", string(name), id) + if err := namesBucket.Delete(name); err != nil { + return errors.Wrapf(err, "error removing dangling name %s (ID %s) from database", string(name), id) + } + } + } + if err := idBucket.Delete([]byte(id)); err != nil { + return errors.Wrapf(err, "error removing dangling ID %s from database", id) + } + } + // Now refresh volumes err = allVolsBucket.ForEach(func(id, name []byte) error { dbVol := volBucket.Bucket(id) |