From 3a90b5224df686e6efbf78e6b6cbb5333115ea82 Mon Sep 17 00:00:00 2001 From: Daniel J Walsh Date: Tue, 19 Jun 2018 07:50:30 -0400 Subject: Returning joining namespace error should not be fatal I got my database state in a bad way by killing a hanging container. It did not setup the network namespace correctly listing/remove bad containers becomes impossible. podman run alpine/nginx ^c got me in this state. I got into a state in the database where podman ps -a was returning errors and I could not get out of it, Makeing joining the network namespace a non fatal error fixes the issue. Signed-off-by: Daniel J Walsh Closes: #918 Approved by: mheon --- libpod/boltdb_state.go | 8 +++++--- libpod/boltdb_state_internal.go | 11 +++++++---- libpod/runtime_ctr.go | 8 +++++--- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/libpod/boltdb_state.go b/libpod/boltdb_state.go index 3ee24122a..d7c950fa0 100644 --- a/libpod/boltdb_state.go +++ b/libpod/boltdb_state.go @@ -455,10 +455,12 @@ func (s *BoltState) UpdateContainer(ctr *Container) error { // Open the new network namespace ns, err := joinNetNS(netNSPath) - if err != nil { - return errors.Wrapf(err, "error joining network namespace for container %s", ctr.ID()) + if err == nil { + newState.NetNS = ns + } else { + logrus.Errorf("error joining network namespace for container %s", ctr.ID()) + ctr.valid = false } - newState.NetNS = ns } } else { // The container no longer has a network namespace diff --git a/libpod/boltdb_state_internal.go b/libpod/boltdb_state_internal.go index 407d2758a..44f764be1 100644 --- a/libpod/boltdb_state_internal.go +++ b/libpod/boltdb_state_internal.go @@ -200,6 +200,7 @@ func getRuntimeConfigBucket(tx *bolt.Tx) (*bolt.Bucket, error) { } func (s *BoltState) getContainerFromDB(id []byte, ctr *Container, ctrsBkt *bolt.Bucket) error { + valid := true ctrBkt := ctrsBkt.Bucket(id) if ctrBkt == nil { return errors.Wrapf(ErrNoSuchCtr, "container %s not found in DB", string(id)) @@ -230,10 +231,12 @@ func (s *BoltState) getContainerFromDB(id []byte, ctr *Container, ctrsBkt *bolt. if netNSBytes != nil { nsPath := string(netNSBytes) netNS, err := joinNetNS(nsPath) - if err != nil { - return errors.Wrapf(err, "error joining network namespace for container %s", string(id)) + if err == nil { + ctr.state.NetNS = netNS + } else { + logrus.Errorf("error joining network namespace for container %s", ctr.ID()) + valid = false } - ctr.state.NetNS = netNS } // Get the lock @@ -245,7 +248,7 @@ func (s *BoltState) getContainerFromDB(id []byte, ctr *Container, ctrsBkt *bolt. ctr.lock = lock ctr.runtime = s.runtime - ctr.valid = true + ctr.valid = valid return nil } diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index 932bebdef..82a2fed19 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -165,9 +165,11 @@ func (r *Runtime) RemoveContainer(ctx context.Context, c *Container, force bool) // Locks the container, but does not lock the runtime func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool) error { if !c.valid { - // Container probably already removed - // Or was never in the runtime to begin with - return nil + if ok, _ := r.HasContainer(c.ID()); !ok { + // Container probably already removed + // Or was never in the runtime to begin with + return nil + } } // We need to lock the pod before we lock the container -- cgit v1.2.3-54-g00ecf