diff options
-rw-r--r-- | go.mod | 2 | ||||
-rw-r--r-- | go.sum | 4 | ||||
-rw-r--r-- | libpod/container_api.go | 10 | ||||
-rw-r--r-- | libpod/container_internal.go | 43 | ||||
-rw-r--r-- | libpod/define/containerstate.go | 5 | ||||
-rw-r--r-- | libpod/rootless_cni_linux.go | 2 | ||||
-rw-r--r-- | test/system/050-stop.bats | 28 | ||||
-rw-r--r-- | vendor/github.com/cri-o/ocicni/pkg/ocicni/ocicni.go | 32 | ||||
-rw-r--r-- | vendor/modules.txt | 2 |
9 files changed, 103 insertions, 25 deletions
@@ -17,7 +17,7 @@ require ( github.com/containers/psgo v1.5.2 github.com/containers/storage v1.24.5 github.com/coreos/go-systemd/v22 v22.1.0 - github.com/cri-o/ocicni v0.2.1-0.20201125151022-df072ea5421c + github.com/cri-o/ocicni v0.2.1-0.20201204103948-b6cbe99b9756 github.com/cyphar/filepath-securejoin v0.2.2 github.com/davecgh/go-spew v1.1.1 github.com/docker/distribution v2.7.1+incompatible @@ -131,8 +131,8 @@ github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:ma github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/creack/pty v1.1.9 h1:uDmaGzcdjhF4i/plgjmEsriH11Y0o7RKapEf/LDaM3w= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= -github.com/cri-o/ocicni v0.2.1-0.20201125151022-df072ea5421c h1:iGaCU6d3oVT0pl8tmvyDhoA/vTDL3IX08akfsKZIy9o= -github.com/cri-o/ocicni v0.2.1-0.20201125151022-df072ea5421c/go.mod h1:vingr1ztOAzP2WyTgGbpMov9dFhbjNxdLtDv0+PhAvY= +github.com/cri-o/ocicni v0.2.1-0.20201204103948-b6cbe99b9756 h1:4T3rzrCSvMgVTR+fm526d+Ed0BurAHGjOaaNFOVoK6E= +github.com/cri-o/ocicni v0.2.1-0.20201204103948-b6cbe99b9756/go.mod h1:vingr1ztOAzP2WyTgGbpMov9dFhbjNxdLtDv0+PhAvY= github.com/cyphar/filepath-securejoin v0.2.2 h1:jCwT2GTP+PY5nBz3c/YL5PAIbusElVrPujOBSCj8xRg= github.com/cyphar/filepath-securejoin v0.2.2/go.mod h1:FpkQEhXnPnOthhzymB7CGsFk2G9VLXONKD9G7QGMM+4= github.com/d2g/dhcp4 v0.0.0-20170904100407-a1d1b6c41b1c/go.mod h1:Ct2BUK8SB0YC1SMSibvLzxjeJLnrYEVLULFNiHY9YfQ= diff --git a/libpod/container_api.go b/libpod/container_api.go index 87ff764e3..0d62a2dd7 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -210,7 +210,13 @@ func (c *Container) Kill(signal uint) error { } // TODO: Is killing a paused container OK? - if c.state.State != define.ContainerStateRunning { + switch c.state.State { + case define.ContainerStateRunning, define.ContainerStateStopping: + // Note that killing containers in "stopping" state is okay. + // In that state, the Podman is waiting for the runtime to + // stop the container and if that is taking too long, a user + // may have decided to kill the container after all. + default: return errors.Wrapf(define.ErrCtrStateInvalid, "can only kill running containers. %s is in state %s", c.ID(), c.state.State.String()) } @@ -539,7 +545,7 @@ func (c *Container) Cleanup(ctx context.Context) error { } // Check if state is good - if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated, define.ContainerStateStopped, define.ContainerStateExited) { + if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated, define.ContainerStateStopped, define.ContainerStateStopping, define.ContainerStateExited) { return errors.Wrapf(define.ErrCtrStateInvalid, "container %s is running or paused, refusing to clean up", c.ID()) } diff --git a/libpod/container_internal.go b/libpod/container_internal.go index f4cdb749f..6d4d3ade0 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -764,7 +764,7 @@ func (c *Container) isStopped() (bool, error) { return true, err } - return !c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused), nil + return !c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused, define.ContainerStateStopping), nil } // save container state to the database @@ -1290,10 +1290,49 @@ func (c *Container) stop(timeout uint) error { return err } + // Set the container state to "stopping" and unlock the container + // before handing it over to conmon to unblock other commands. #8501 + // demonstrates nicely that a high stop timeout will block even simple + // commands such as `podman ps` from progressing if the container lock + // is held when busy-waiting for the container to be stopped. + c.state.State = define.ContainerStateStopping + if err := c.save(); err != nil { + return errors.Wrapf(err, "error saving container %s state before stopping", c.ID()) + } + if !c.batched { + c.lock.Unlock() + } + if err := c.ociRuntime.StopContainer(c, timeout, all); err != nil { return err } + if !c.batched { + c.lock.Lock() + if err := c.syncContainer(); err != nil { + switch errors.Cause(err) { + // If the container has already been removed (e.g., via + // the cleanup process), there's nothing left to do. + case define.ErrNoSuchCtr, define.ErrCtrRemoved: + return nil + default: + return err + } + } + } + + // Since we're now subject to a race condition with other processes who + // may have altered the state (and other data), let's check if the + // state has changed. If so, we should return immediately and log a + // warning. + if c.state.State != define.ContainerStateStopping { + logrus.Warnf( + "Container %q state changed from %q to %q while waiting for it to be stopped: discontinuing stop procedure as another process interfered", + c.ID(), define.ContainerStateStopping, c.state.State, + ) + return nil + } + c.newContainerEvent(events.Stop) c.state.PID = 0 @@ -2116,7 +2155,7 @@ func (c *Container) sortUserVolumes(ctrSpec *spec.Spec) ([]*ContainerNamedVolume // Check for an exit file, and handle one if present func (c *Container) checkExitFile() error { // If the container's not running, nothing to do. - if !c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) { + if !c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused, define.ContainerStateStopping) { return nil } diff --git a/libpod/define/containerstate.go b/libpod/define/containerstate.go index 825e77387..5d2bc9099 100644 --- a/libpod/define/containerstate.go +++ b/libpod/define/containerstate.go @@ -28,6 +28,9 @@ const ( // ContainerStateRemoving indicates the container is in the process of // being removed. ContainerStateRemoving ContainerStatus = iota + // ContainerStateStopping indicates the container is in the process of + // being stopped. + ContainerStateStopping ContainerStatus = iota ) // ContainerStatus returns a string representation for users @@ -50,6 +53,8 @@ func (t ContainerStatus) String() string { return "exited" case ContainerStateRemoving: return "removing" + case ContainerStateStopping: + return "stopping" } return "bad state" } diff --git a/libpod/rootless_cni_linux.go b/libpod/rootless_cni_linux.go index ce8a87759..9a980750f 100644 --- a/libpod/rootless_cni_linux.go +++ b/libpod/rootless_cni_linux.go @@ -110,6 +110,8 @@ func DeallocRootlessCNI(ctx context.Context, c *Container) error { logrus.Warn(err) } logrus.Debugf("rootless CNI: removing infra container %q", infra.ID()) + infra.lock.Lock() + defer infra.lock.Unlock() if err := c.runtime.removeContainer(ctx, infra, true, false, true); err != nil { return err } diff --git a/test/system/050-stop.bats b/test/system/050-stop.bats index f604ea2e2..548fd56ee 100644 --- a/test/system/050-stop.bats +++ b/test/system/050-stop.bats @@ -67,4 +67,32 @@ load helpers done } +# Regression test for #8501 +@test "podman stop - unlock while waiting for timeout" { + # Test that the container state transitions to "stopping" and that other + # commands can get the container's lock. To do that, run a container that + # ingores SIGTERM such that the Podman would wait 20 seconds for the stop + # to finish. This gives us enough time to try some commands and inspect + # the container's status. + + run_podman run --name stopme -d $IMAGE sh -c \ + "trap 'echo Received SIGTERM, ignoring' SIGTERM; echo READY; while :; do sleep 1; done" + + # Stop the container in the background + $PODMAN stop -t 20 stopme & + + # Other commands can acquire the lock + run_podman ps -a + + # The container state transitioned to "stopping" + run_podman inspect --format '{{.State.Status}}' stopme + is "$output" "stopping" "Status of container should be 'stopping'" + + run_podman kill stopme + + # Exit code should be 137 as it was killed + run_podman inspect --format '{{.State.ExitCode}}' stopme + is "$output" "137" "Exit code of killed container" +} + # vim: filetype=sh diff --git a/vendor/github.com/cri-o/ocicni/pkg/ocicni/ocicni.go b/vendor/github.com/cri-o/ocicni/pkg/ocicni/ocicni.go index 7a9f97d1c..d9c1d37db 100644 --- a/vendor/github.com/cri-o/ocicni/pkg/ocicni/ocicni.go +++ b/vendor/github.com/cri-o/ocicni/pkg/ocicni/ocicni.go @@ -102,7 +102,7 @@ func (plugin *cniNetworkPlugin) podUnlock(podNetwork PodNetwork) { fullPodName := buildFullPodName(podNetwork) lock, ok := plugin.pods[fullPodName] if !ok { - logrus.Warningf("Unbalanced pod lock unref for %s", fullPodName) + logrus.Errorf("Cannot find reference in refcount map for %s. Refcount cannot be determined.", fullPodName) return } else if lock.refcount == 0 { // This should never ever happen, but handle it anyway @@ -121,12 +121,12 @@ func newWatcher(confDir string) (*fsnotify.Watcher, error) { // Ensure plugin directory exists, because the following monitoring logic // relies on that. if err := os.MkdirAll(confDir, 0755); err != nil { - return nil, fmt.Errorf("failed to create %q: %v", confDir, err) + return nil, fmt.Errorf("failed to create directory %q: %v", confDir, err) } watcher, err := fsnotify.NewWatcher() if err != nil { - return nil, fmt.Errorf("could not create new watcher %v", err) + return nil, fmt.Errorf("failed to create new watcher %v", err) } defer func() { // Close watcher on error @@ -275,13 +275,13 @@ func loadNetworks(confDir string, cni *libcni.CNIConfig) (map[string]*cniNetwork if strings.HasSuffix(confFile, ".conflist") { confList, err = libcni.ConfListFromFile(confFile) if err != nil { - logrus.Warningf("Error loading CNI config list file %s: %v", confFile, err) + logrus.Errorf("Error loading CNI config list file %s: %v", confFile, err) continue } } else { conf, err := libcni.ConfFromFile(confFile) if err != nil { - logrus.Warningf("Error loading CNI config file %s: %v", confFile, err) + logrus.Errorf("Error loading CNI config file %s: %v", confFile, err) continue } if conf.Network.Type == "" { @@ -290,7 +290,7 @@ func loadNetworks(confDir string, cni *libcni.CNIConfig) (map[string]*cniNetwork } confList, err = libcni.ConfListFromConf(conf) if err != nil { - logrus.Warningf("Error converting CNI config file %s to list: %v", confFile, err) + logrus.Errorf("Error converting CNI config file %s to list: %v", confFile, err) continue } } @@ -321,7 +321,7 @@ func loadNetworks(confDir string, cni *libcni.CNIConfig) (map[string]*cniNetwork if _, ok := networks[confList.Name]; !ok { networks[confList.Name] = cniNet } else { - logrus.Infof("Ignore CNI network %s (type=%v) at %s because already exists", confList.Name, confList.Plugins[0].Network.Type, confFile) + logrus.Infof("Ignored CNI network %s (type=%v) at %s because already exists", confList.Name, confList.Plugins[0].Network.Type, confFile) } if defaultNetName == "" { @@ -348,7 +348,7 @@ func (plugin *cniNetworkPlugin) syncNetworkConfig() error { // Update defaultNetName if it is changeable if plugin.defaultNetName.changeable { plugin.defaultNetName.name = defaultNetName - logrus.Infof("Update default CNI network name to %s", defaultNetName) + logrus.Infof("Updated default CNI network name to %s", defaultNetName) } else { logrus.Debugf("Default CNI network name %s is unchangeable", plugin.defaultNetName.name) } @@ -479,8 +479,8 @@ func (plugin *cniNetworkPlugin) forEachNetwork(podNetwork *PodNetwork, fromCache var newRt *libcni.RuntimeConf cniNet, newRt, err = plugin.loadNetworkFromCache(network.Name, rt) if err != nil { - logrus.Debugf("error loading cached network config: %v", err) - logrus.Debugf("falling back to loading from existing plugins on disk") + logrus.Errorf("error loading cached network config: %v", err) + logrus.Warningf("falling back to loading from existing plugins on disk") } else { // Use the updated RuntimeConf rt = newRt @@ -570,7 +570,7 @@ func (plugin *cniNetworkPlugin) getCachedNetworkInfo(containerID string) ([]NetA cacheFile := filepath.Join(dirPath, fname) bytes, err := ioutil.ReadFile(cacheFile) if err != nil { - logrus.Warningf("failed to read CNI cache file %s: %v", cacheFile, err) + logrus.Errorf("failed to read CNI cache file %s: %v", cacheFile, err) continue } @@ -582,7 +582,7 @@ func (plugin *cniNetworkPlugin) getCachedNetworkInfo(containerID string) ([]NetA }{} if err := json.Unmarshal(bytes, &cachedInfo); err != nil { - logrus.Warningf("failed to unmarshal CNI cache file %s: %v", cacheFile, err) + logrus.Errorf("failed to unmarshal CNI cache file %s: %v", cacheFile, err) continue } if cachedInfo.Kind != libcni.CNICacheV1 { @@ -632,13 +632,12 @@ func (plugin *cniNetworkPlugin) TearDownPodWithContext(ctx context.Context, podN if err := tearDownLoopback(podNetwork.NetNS); err != nil { // ignore error - logrus.Errorf("Ignoring error tearing down loopback interface: %v", err) + logrus.Warningf("Ignoring error tearing down loopback interface: %v", err) } return plugin.forEachNetwork(&podNetwork, true, func(network *cniNetwork, podNetwork *PodNetwork, rt *libcni.RuntimeConf) error { if err := network.deleteFromNetwork(ctx, rt, plugin.cniConfig); err != nil { - logrus.Errorf("Error while removing pod from CNI network %q: %s", network.name, err) - return err + return fmt.Errorf("Error while removing pod from CNI network %q: %s", network.name, err) } return nil }) @@ -718,7 +717,7 @@ func (network *cniNetwork) checkNetwork(ctx context.Context, rt *libcni.RuntimeC result, err = cni.GetNetworkListCachedResult(network.config, rt) if err != nil { - logrus.Errorf("Error GetNetworkListCachedResult: %v", err) + logrus.Errorf("Error getting network list cached result: %v", err) return nil, err } else if result != nil { return result, nil @@ -771,7 +770,6 @@ func (network *cniNetwork) checkNetwork(ctx context.Context, rt *libcni.RuntimeC func (network *cniNetwork) deleteFromNetwork(ctx context.Context, rt *libcni.RuntimeConf, cni *libcni.CNIConfig) error { logrus.Infof("About to del CNI network %s (type=%v)", network.name, network.config.Plugins[0].Network.Type) if err := cni.DelNetworkList(ctx, network.config, rt); err != nil { - logrus.Errorf("Error deleting network: %v", err) return err } return nil diff --git a/vendor/modules.txt b/vendor/modules.txt index be8adbd46..88ee67917 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -224,7 +224,7 @@ github.com/coreos/go-systemd/v22/dbus github.com/coreos/go-systemd/v22/internal/dlopen github.com/coreos/go-systemd/v22/journal github.com/coreos/go-systemd/v22/sdjournal -# github.com/cri-o/ocicni v0.2.1-0.20201125151022-df072ea5421c +# github.com/cri-o/ocicni v0.2.1-0.20201204103948-b6cbe99b9756 github.com/cri-o/ocicni/pkg/ocicni # github.com/cyphar/filepath-securejoin v0.2.2 github.com/cyphar/filepath-securejoin |