summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Heon <mheon@redhat.com>2022-05-23 09:34:37 -0400
committerMatthew Heon <mheon@redhat.com>2022-06-14 13:56:42 -0400
commited9ae4cad88a828d3235d126ed31ac9aa9117a13 (patch)
tree778c3dce7d03f64c14bda1460a527df1480cb3cc
parent7c2f8733edae8b1bc55ad9bb1011592869b630df (diff)
downloadpodman-ed9ae4cad88a828d3235d126ed31ac9aa9117a13.tar.gz
podman-ed9ae4cad88a828d3235d126ed31ac9aa9117a13.tar.bz2
podman-ed9ae4cad88a828d3235d126ed31ac9aa9117a13.zip
Instead of erroring, clean up after dangling IDs in DB
For various (mostly legacy) reasons, Podman presently maintains a unified namespace for pods and containers - IE, we cannot have both a pod and a container named "test" at the same time. To implement this, we use a global database table of every pod and container ID (and another of every pod and container name). These entries should be added when containers/pods are added, and removed when containers/pods are removed, with the database's transactional integrity providing a guarantee that this is batched with the overall removal and that the DB should remain sane and consistent no matter what. As such, we treat a dangling ID as a hard error that stops the use of Podman. Unfortunately, we have someone run into this last Friday. I'm still not certain how exactly their DB got into this state, but without further clarification there, we can consider removing the error and making Podman instead clean up and remove any dangling IDs, which should restore Podman to a serviceable state. Drop an error message if we do this, though, because people should know that the DB is in a bad state. [NO NEW TESTS NEEDED] it is deliberately impossible to produce a configuration that would test this without hex-editing the DB file. Signed-off-by: Matthew Heon <mheon@redhat.com>
-rw-r--r--libpod/boltdb_state.go36
1 files changed, 34 insertions, 2 deletions
diff --git a/libpod/boltdb_state.go b/libpod/boltdb_state.go
index 9745121c7..c3db6152a 100644
--- a/libpod/boltdb_state.go
+++ b/libpod/boltdb_state.go
@@ -162,6 +162,11 @@ func (s *BoltState) Refresh() error {
return err
}
+ namesBucket, err := getNamesBucket(tx)
+ if err != nil {
+ return err
+ }
+
ctrsBucket, err := getCtrBucket(tx)
if err != nil {
return err
@@ -192,6 +197,7 @@ func (s *BoltState) Refresh() error {
// PID, mountpoint, and state for all of them
// Then save the modified state
// Also clear all network namespaces
+ toRemoveIDs := []string{}
err = idBucket.ForEach(func(id, name []byte) error {
ctrBkt := ctrsBucket.Bucket(id)
if ctrBkt == nil {
@@ -199,8 +205,16 @@ func (s *BoltState) Refresh() error {
podBkt := podsBucket.Bucket(id)
if podBkt == nil {
// This is neither a pod nor a container
- // Error out on the dangling ID
- return errors.Wrapf(define.ErrInternal, "id %s is not a pod or a container", string(id))
+ // Something is seriously wrong, but
+ // continue on and try to clean up the
+ // state and become consistent.
+ // Just note what needs to be removed
+ // for now - ForEach says you shouldn't
+ // remove things from the table during
+ // it.
+ logrus.Errorf("Database issue: dangling ID %s found (not a pod or container) - removing", string(id))
+ toRemoveIDs = append(toRemoveIDs, string(id))
+ return nil
}
// Get the state
@@ -285,6 +299,24 @@ func (s *BoltState) Refresh() error {
return err
}
+ // Remove dangling IDs.
+ for _, id := range toRemoveIDs {
+ // Look up the ID to see if we also have a dangling name
+ // in the DB.
+ name := idBucket.Get([]byte(id))
+ if name != nil {
+ if testID := namesBucket.Get(name); testID != nil {
+ logrus.Infof("Found dangling name %s (ID %s) in database", string(name), id)
+ if err := namesBucket.Delete(name); err != nil {
+ return errors.Wrapf(err, "error removing dangling name %s (ID %s) from database", string(name), id)
+ }
+ }
+ }
+ if err := idBucket.Delete([]byte(id)); err != nil {
+ return errors.Wrapf(err, "error removing dangling ID %s from database", id)
+ }
+ }
+
// Now refresh volumes
err = allVolsBucket.ForEach(func(id, name []byte) error {
dbVol := volBucket.Bucket(id)