From 831e2c30d479a92c203d2caf82106cb85a6cdfc8 Mon Sep 17 00:00:00 2001 From: Matthew Heon Date: Thu, 23 Nov 2017 14:06:43 -0500 Subject: Add ability to kill and stop containers Also migrates kpod kill and kpod stop to libpod to use the new code Fixes force removing containers, and actually deletes containers in runc when removing them Start is now capable of starting even when the container is unmounted Signed-off-by: Matthew Heon Closes: #68 Approved by: rhatdan --- cmd/kpod/kill.go | 48 +++++++++++++----------- cmd/kpod/stop.go | 32 ++++++++-------- libpod/container.go | 87 +++++++++++++++++++++++++++++++++++++----- libpod/oci.go | 102 ++++++++++++++++++++++++++++++++++++++++++++++++-- libpod/runtime_ctr.go | 13 ++++++- 5 files changed, 233 insertions(+), 49 deletions(-) diff --git a/cmd/kpod/kill.go b/cmd/kpod/kill.go index 9fab7cc88..776c7ef20 100644 --- a/cmd/kpod/kill.go +++ b/cmd/kpod/kill.go @@ -3,10 +3,10 @@ package main import ( "fmt" "os" + "syscall" "github.com/docker/docker/pkg/signal" "github.com/pkg/errors" - "github.com/projectatomic/libpod/libkpod" "github.com/urfave/cli" ) @@ -38,36 +38,42 @@ func killCmd(c *cli.Context) error { if err := validateFlags(c, killFlags); err != nil { return err } - config, err := getConfig(c) - if err != nil { - return errors.Wrapf(err, "could not get config") - } - server, err := libkpod.New(config) - if err != nil { - return errors.Wrapf(err, "could not get container server") - } - killSignal := c.String("signal") - // Check if the signalString provided by the user is valid - // Invalid signals will return err - sysSignal, err := signal.ParseSignal(killSignal) + + runtime, err := getRuntime(c) if err != nil { - return err + return errors.Wrapf(err, "could not get runtime") } - defer server.Shutdown() - err = server.Update() - if err != nil { - return errors.Wrapf(err, "could not update list of containers") + defer runtime.Shutdown(false) + + var killSignal uint = uint(syscall.SIGTERM) + if c.String("signal") != "" { + // Check if the signalString provided by the user is valid + // Invalid signals will return err + sysSignal, err := signal.ParseSignal(c.String("signal")) + if err != nil { + return err + } + killSignal = uint(sysSignal) } + var lastError error for _, container := range c.Args() { - id, err := server.ContainerKill(container, sysSignal) + ctr, err := runtime.LookupContainer(container) if err != nil { if lastError != nil { fmt.Fprintln(os.Stderr, lastError) } - lastError = errors.Wrapf(err, "unable to kill %v", container) + lastError = errors.Wrapf(err, "unable to find container %v", container) + continue + } + + if err := ctr.Kill(killSignal); err != nil { + if lastError != nil { + fmt.Fprintln(os.Stderr, lastError) + } + lastError = errors.Wrapf(err, "unable to find container %v", container) } else { - fmt.Println(id) + fmt.Println(ctr.ID()) } } return lastError diff --git a/cmd/kpod/stop.go b/cmd/kpod/stop.go index 79325da5f..78ed1216c 100644 --- a/cmd/kpod/stop.go +++ b/cmd/kpod/stop.go @@ -5,9 +5,8 @@ import ( "os" "github.com/pkg/errors" - "github.com/projectatomic/libpod/libkpod" + "github.com/sirupsen/logrus" "github.com/urfave/cli" - "golang.org/x/net/context" ) var ( @@ -47,29 +46,32 @@ func stopCmd(c *cli.Context) error { return err } - config, err := getConfig(c) + runtime, err := getRuntime(c) if err != nil { - return errors.Wrapf(err, "could not get config") - } - server, err := libkpod.New(config) - if err != nil { - return errors.Wrapf(err, "could not get container server") - } - defer server.Shutdown() - err = server.Update() - if err != nil { - return errors.Wrapf(err, "could not update list of containers") + return errors.Wrapf(err, "could not get runtime") } + defer runtime.Shutdown(false) + + logrus.Debugf("Stopping containers with timeout %d", stopTimeout) + var lastError error for _, container := range c.Args() { - cid, err := server.ContainerStop(context.Background(), container, stopTimeout) + ctr, err := runtime.LookupContainer(container) if err != nil { if lastError != nil { fmt.Fprintln(os.Stderr, lastError) } lastError = errors.Wrapf(err, "failed to stop container %v", container) + continue + } + + if err := ctr.Stop(stopTimeout); err != nil { + if lastError != nil { + fmt.Fprintln(os.Stderr, lastError) + } + lastError = errors.Wrapf(err, "failed to stop container %v", container) } else { - fmt.Println(cid) + fmt.Println(ctr.ID()) } } diff --git a/libpod/container.go b/libpod/container.go index b58c156a0..3de0ed02e 100644 --- a/libpod/container.go +++ b/libpod/container.go @@ -106,7 +106,7 @@ type ContainerConfig struct { // reboot StaticDir string `json:"staticDir"` // Whether to keep container STDIN open - Stdin bool + Stdin bool `json:"stdin,omitempty"` // Pod the container belongs to Pod string `json:"pod,omitempty"` // Labels is a set of key-value pairs providing additional information @@ -381,6 +381,7 @@ func (c *Container) teardownStorage() error { } c.state.Mounted = false + c.state.Mountpoint = "" } if err := c.runtime.storageService.DeleteContainer(c.ID()); err != nil { @@ -474,6 +475,29 @@ func (c *Container) Start() error { return errors.Wrapf(ErrCtrStateInvalid, "container %s must be in Created or Stopped state to be started", c.ID()) } + // If the container isn't mounted, mount it + if !c.state.Mounted { + mountPoint, err := c.runtime.storageService.StartContainer(c.ID()) + if err != nil { + return errors.Wrapf(err, "error mounting storage for container %s", c.ID()) + } + c.state.Mounted = true + c.state.Mountpoint = mountPoint + + logrus.Debugf("Created root filesystem for container %s at %s", c.ID(), c.state.Mountpoint) + + defer func() { + if err != nil { + if err2 := c.runtime.storageService.StopContainer(c.ID()); err2 != nil { + logrus.Errorf("Error unmounting storage for container %s: %v", c.ID(), err2) + } + + c.state.Mounted = false + c.state.Mountpoint = "" + } + }() + } + if err := c.runtime.ociRuntime.startContainer(c); err != nil { return err } @@ -492,14 +516,63 @@ func (c *Container) Start() error { return nil } -// Stop stops a container -func (c *Container) Stop() error { - return ErrNotImplemented +// Stop uses the container's requested stop signal (or SIGTERM if no signal was +// specified) to stop the container, and if it has not stopped after the given +// timeout (in seconds), uses SIGKILL to attempt to forcibly stop the container +// If timeout is 0, SIGKILL will be used immediately +func (c *Container) Stop(timeout int64) error { + c.lock.Lock() + defer c.lock.Unlock() + + if err := c.syncContainer(); err != nil { + return err + } + + if c.state.State == ContainerStateConfigured || + c.state.State == ContainerStateUnknown || + c.state.State == ContainerStatePaused { + return errors.Wrapf(ErrCtrStateInvalid, "can only stop created, running, or stopped containers") + } + + if err := c.runtime.ociRuntime.stopContainer(c, timeout); err != nil { + return err + } + + // Sync the container's state to pick up return code + if err := c.runtime.ociRuntime.updateContainerStatus(c); err != nil { + return err + } + + // Also unmount storage + if err := c.runtime.storageService.StopContainer(c.ID()); err != nil { + return errors.Wrapf(err, "error unmounting container %s root filesystem", c.ID()) + } + c.state.Mountpoint = "" + c.state.Mounted = false + + if err := c.runtime.state.SaveContainer(c); err != nil { + return errors.Wrapf(err, "error saving container %s state", c.ID()) + } + + return nil } // Kill sends a signal to a container func (c *Container) Kill(signal uint) error { - return ErrNotImplemented + c.lock.Lock() + defer c.lock.Unlock() + + if err := c.syncContainer(); err != nil { + return err + } + + if c.state.State == ContainerStateUnknown || + c.state.State == ContainerStateConfigured || + c.state.State == ContainerStatePaused { + return errors.Wrapf(ErrCtrStateInvalid, "can only kill created, running, or stopped containers") + } + + return c.runtime.ociRuntime.killContainer(c, signal) } // Exec starts a new process inside the container @@ -515,10 +588,6 @@ func (c *Container) Attach(noStdin bool, keys string, attached chan<- bool) erro return err } - if c.state.State == ContainerStateRunning || c.state.State == ContainerStatePaused { - return errors.Wrapf(ErrCtrStateInvalid, "cannot remove storage for container %s as it is running or paused", c.ID()) - } - // TODO is it valid to attach to a frozen container? if c.state.State == ContainerStateUnknown || c.state.State == ContainerStateConfigured || diff --git a/libpod/oci.go b/libpod/oci.go index eb8bd0d93..750e94fe7 100644 --- a/libpod/oci.go +++ b/libpod/oci.go @@ -34,6 +34,9 @@ const ( // ContainerCreateTimeout represents the value of container creating timeout ContainerCreateTimeout = 240 * time.Second + + // Timeout before declaring that runc has failed to kill a given container + killContainerTimeout = 5 * time.Second ) // OCIRuntime represents an OCI-compatible runtime that libpod can call into @@ -109,10 +112,39 @@ func createUnitName(prefix string, name string) string { return fmt.Sprintf("%s-%s.scope", prefix, name) } +// Wait for a container which has been sent a signal to stop +func waitContainerStop(ctr *Container, timeout time.Duration) error { + done := make(chan struct{}) + chControl := make(chan struct{}) + go func() { + for { + select { + case <-chControl: + return + default: + // Check if the process is still around + err := unix.Kill(ctr.state.PID, 0) + if err == unix.ESRCH { + close(done) + return + } + time.Sleep(100 * time.Millisecond) + } + } + }() + select { + case <-done: + return nil + case <-time.After(timeout): + close(chControl) + return errors.Errorf("container %s did not die within timeout", ctr.ID()) + } +} + // CreateContainer creates a container in the OCI runtime // TODO terminal support for container // Presently just ignoring conmon opts related to it -func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string) error { +func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string) (err error) { var stderrBuf bytes.Buffer parentPipe, childPipe, err := newPipe() @@ -221,8 +253,13 @@ func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string) error return err } - // TODO should do a defer r.deleteContainer(ctr) here if err != nil - // Need deleteContainer to be working first, though... + defer func() { + if err != nil { + if err2 := r.deleteContainer(ctr); err2 != nil { + logrus.Errorf("Error removing container %s from runc after creation failed", ctr.ID()) + } + } + }() // Wait to get container pid from conmon type syncStruct struct { @@ -346,6 +383,65 @@ func (r *OCIRuntime) startContainer(ctr *Container) error { return nil } +// killContainer sends the given signal to the given container +func (r *OCIRuntime) killContainer(ctr *Container, signal uint) error { + if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.path, "kill", ctr.ID(), fmt.Sprintf("%d", signal)); err != nil { + return errors.Wrapf(err, "error sending signal to container %s", ctr.ID()) + } + + return nil +} + +// stopContainer stops a container, first using its given stop signal (or +// SIGTERM if no signal was specified), then using SIGKILL +// Timeout is given in seconds. If timeout is 0, the container will be +// immediately kill with SIGKILL +// Does not set finished time for container, assumes you will run updateStatus +// after to pull the exit code +func (r *OCIRuntime) stopContainer(ctr *Container, timeout int64) error { + // Ping the container to see if it's alive + // If it's not, it's already stopped, return + err := unix.Kill(ctr.state.PID, 0) + if err == unix.ESRCH { + return nil + } + + stopSignal := ctr.config.StopSignal + if stopSignal == 0 { + stopSignal = uint(syscall.SIGTERM) + } + + if timeout > 0 { + if err := r.killContainer(ctr, stopSignal); err != nil { + return err + } + + if err := waitContainerStop(ctr, time.Duration(timeout)*time.Second); err != nil { + logrus.Warnf("Timed out stopping container %s, resorting to SIGKILL", ctr.ID()) + } else { + // No error, the container is dead + return nil + } + } + + if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.path, "kill", "--all", ctr.ID(), "KILL"); err != nil { + return errors.Wrapf(err, "error sending SIGKILL to container %s", ctr.ID()) + } + + // Give runc a few seconds to make it happen + if err := waitContainerStop(ctr, killContainerTimeout); err != nil { + return err + } + + return nil +} + +// deleteContainer deletes a container from runc +func (r *OCIRuntime) deleteContainer(ctr *Container) error { + _, err := utils.ExecCmd(r.path, "delete", "--force", ctr.ID()) + return err +} + // pauseContainer pauses the given container func (r *OCIRuntime) pauseContainer(ctr *Container) error { return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.path, "pause", ctr.ID()) diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index fd92a3411..9a91e0a48 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -7,6 +7,8 @@ import ( "github.com/sirupsen/logrus" ) +const ctrRemoveTimeout = 10 + // Contains the public Runtime API for containers // A CtrCreateOption is a functional option which alters the Container created @@ -97,7 +99,11 @@ func (r *Runtime) RemoveContainer(c *Container, force bool) error { } // Check that the container's in a good state to be removed - if !(c.state.State == ContainerStateConfigured || + if c.state.State == ContainerStateRunning && force { + if err := r.ociRuntime.stopContainer(c, ctrRemoveTimeout); err != nil { + return errors.Wrapf(err, "cannot remove container %s as it could not be stopped", c.ID()) + } + } else if !(c.state.State == ContainerStateConfigured || c.state.State == ContainerStateCreated || c.state.State == ContainerStateStopped) { return errors.Wrapf(ErrCtrStateInvalid, "cannot remove container %s as it is running or paused", c.ID()) @@ -112,6 +118,11 @@ func (r *Runtime) RemoveContainer(c *Container, force bool) error { return errors.Wrapf(err, "error removing container from state") } + // Delete the container + if err := r.ociRuntime.deleteContainer(c); err != nil { + return errors.Wrapf(err, "error removing container %s from runc", c.ID()) + } + // Set container as invalid so it can no longer be used c.valid = false -- cgit v1.2.3-54-g00ecf