From f02a9cd97547630a944df83e7e02eac11e8a7021 Mon Sep 17 00:00:00 2001 From: Matthew Heon Date: Tue, 27 Feb 2018 15:01:29 -0500 Subject: Handle removing containers with active exec sessions For containers without --force set, an error will be returned For containers with --force, all pids in the container will be stopped, first with SIGTERM and then with SIGKILL after a timeout (this mimics the behavior of stopping a container). Signed-off-by: Matthew Heon Closes: #412 Approved by: baude --- libpod/oci.go | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++ libpod/runtime_ctr.go | 11 ++++++ libpod/runtime_pod.go | 11 ++++++ 3 files changed, 115 insertions(+) diff --git a/libpod/oci.go b/libpod/oci.go index f25c6bdb9..63aff9487 100644 --- a/libpod/oci.go +++ b/libpod/oci.go @@ -147,6 +147,40 @@ func waitContainerStop(ctr *Container, timeout time.Duration) error { } } +// Wait for a set of given PIDs to stop +func waitPidsStop(pids []int, timeout time.Duration) error { + done := make(chan struct{}) + chControl := make(chan struct{}) + go func() { + for { + select { + case <-chControl: + return + default: + allClosed := true + for _, pid := range pids { + if err := unix.Kill(pid, 0); err != unix.ESRCH { + allClosed = false + break + } + } + if allClosed { + close(done) + return + } + time.Sleep(100 * time.Millisecond) + } + } + }() + select { + case <-done: + return nil + case <-time.After(timeout): + close(chControl) + return errors.Errorf("given PIDs did not die within timeout") + } +} + // CreateContainer creates a container in the OCI runtime // TODO terminal support for container // Presently just ignoring conmon opts related to it @@ -524,3 +558,62 @@ func (r *OCIRuntime) execContainer(c *Container, cmd, capAdd, env []string, tty return execCmd, nil } + +// execStopContainer stops all active exec sessions in a container +// It will also stop all other processes in the container. It is only intended +// to be used to assist in cleanup when removing a container. +// SIGTERM is used by default to stop processes. If SIGTERM fails, SIGKILL will be used. +func (r *OCIRuntime) execStopContainer(ctr *Container, timeout uint) error { + // Do we have active exec sessions? + if len(ctr.state.ExecSessions) == 0 { + return nil + } + + // Get a list of active exec sessions + execSessions := []int{} + for _, pid := range ctr.state.ExecSessions { + // Ping the PID with signal 0 to see if it still exists + if err := unix.Kill(pid, 0); err == unix.ESRCH { + continue + } + + execSessions = append(execSessions, pid) + } + + // All the sessions may be dead + // If they are, just return + if len(execSessions) == 0 { + return nil + } + + // If timeout is 0, just use SIGKILL + if timeout > 0 { + // Stop using SIGTERM by default + // Use SIGSTOP after a timeout + logrus.Debugf("Killing all processes in container %s with SIGTERM", ctr.ID()) + if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.path, "kill", "--all", ctr.ID(), "TERM"); err != nil { + return errors.Wrapf(err, "error sending SIGTERM to container %s processes", ctr.ID()) + } + + // Wait for all processes to stop + if err := waitPidsStop(execSessions, time.Duration(timeout)*time.Second); err != nil { + logrus.Warnf("Timed out stopping container %s exec sessions", ctr.ID()) + } else { + // No error, all exec sessions are dead + return nil + } + } + + // Send SIGKILL + logrus.Debugf("Killing all processes in container %s with SIGKILL", ctr.ID()) + if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.path, "kill", "--all", ctr.ID(), "KILL"); err != nil { + return errors.Wrapf(err, "error sending SIGKILL to container %s processes", ctr.ID()) + } + + // Give the processes a few seconds to go down + if err := waitPidsStop(execSessions, killContainerTimeout); err != nil { + return errors.Wrapf(err, "failed to kill container %s exec sessions", ctr.ID()) + } + + return nil +} diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index 4f8587186..51c2001d0 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -165,6 +165,17 @@ func (r *Runtime) removeContainer(c *Container, force bool) error { return errors.Wrapf(ErrCtrStateInvalid, "cannot remove container %s as it is %s - running or paused containers cannot be removed", c.ID(), c.state.State.String()) } + // Check that all of our exec sessions have finished + if len(c.state.ExecSessions) != 0 { + if force { + if err := r.ociRuntime.execStopContainer(c, c.StopTimeout()); err != nil { + return err + } + } else { + return errors.Wrapf(ErrCtrStateInvalid, "cannot remove container %s as it has active exec sessions", c.ID()) + } + } + // Check that no other containers depend on the container deps, err := r.state.ContainerInUse(c) if err != nil { diff --git a/libpod/runtime_pod.go b/libpod/runtime_pod.go index 248cadf09..0debb7924 100644 --- a/libpod/runtime_pod.go +++ b/libpod/runtime_pod.go @@ -101,6 +101,11 @@ func (r *Runtime) RemovePod(p *Pod, removeCtrs, force bool) error { return errors.Wrapf(ErrCtrStateInvalid, "pod %s contains container %s which is running", p.ID(), ctr.ID()) } + // If the container has active exec sessions and force is not set we can't do anything + if len(ctr.state.ExecSessions) != 0 && !force { + return errors.Wrapf(ErrCtrStateInvalid, "pod %s contains container %s which has active exec sessions", p.ID(), ctr.ID()) + } + deps, err := r.state.ContainerInUse(ctr) if err != nil { return err @@ -134,6 +139,12 @@ func (r *Runtime) RemovePod(p *Pod, removeCtrs, force bool) error { return err } } + // If the container has active exec sessions, stop them now + if len(ctr.state.ExecSessions) != 0 { + if err := r.ociRuntime.execStopContainer(ctr, ctr.StopTimeout()); err != nil { + return err + } + } } } -- cgit v1.2.3-54-g00ecf