From 7789284cbe8f5068d66c90c257b9784505d4a027 Mon Sep 17 00:00:00 2001 From: haircommander Date: Mon, 23 Jul 2018 15:56:12 -0400 Subject: Added pod.Restart() functionality to libpod. Moved contents of RestartWithTimeout to restartWithTimeout in container_internal to be able to call restart without locking in function. Refactored startNode to be able to either start or restart a node. Built pod Restart() with new startNode with refresh true. Signed-off-by: haircommander Closes: #1152 Approved by: rhatdan --- libpod/container_api.go | 34 +------- libpod/container_internal.go | 37 ++++++++ libpod/pod.go | 196 +++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 225 insertions(+), 42 deletions(-) diff --git a/libpod/container_api.go b/libpod/container_api.go index bb9727ec1..b5104048e 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -717,39 +717,7 @@ func (c *Container) RestartWithTimeout(ctx context.Context, timeout uint) (err e depString := strings.Join(notRunning, ",") return errors.Wrapf(ErrCtrStateInvalid, "some dependencies of container %s are not started: %s", c.ID(), depString) } - if c.state.State == ContainerStateUnknown || c.state.State == ContainerStatePaused { - return errors.Wrapf(ErrCtrStateInvalid, "unable to restart a container in a paused or unknown state") - } - - if c.state.State == ContainerStateRunning { - if err := c.stop(timeout); err != nil { - return err - } - } - if err := c.prepare(); err != nil { - return err - } - defer func() { - if err != nil { - if err2 := c.cleanup(); err2 != nil { - logrus.Errorf("error cleaning up container %s: %v", c.ID(), err2) - } - } - }() - - if c.state.State == ContainerStateStopped { - // Reinitialize the container if we need to - if err := c.reinit(ctx); err != nil { - return err - } - } else if c.state.State == ContainerStateConfigured { - // Initialize the container if it has never been initialized - if err := c.init(ctx); err != nil { - return err - } - } - - return c.start() + return c.restartWithTimeout(ctx, timeout) } // Refresh refreshes a container's state in the database, restarting the diff --git a/libpod/container_internal.go b/libpod/container_internal.go index 55fd7369d..8a96af0ab 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -718,6 +718,43 @@ func (c *Container) unpause() error { return c.save() } +// Internal, non-locking function to restart a container +func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (err error) { + if c.state.State == ContainerStateUnknown || c.state.State == ContainerStatePaused { + return errors.Wrapf(ErrCtrStateInvalid, "unable to restart a container in a paused or unknown state") + } + + if c.state.State == ContainerStateRunning { + if err := c.stop(timeout); err != nil { + return err + } + } + if err := c.prepare(); err != nil { + return err + } + defer func() { + if err != nil { + if err2 := c.cleanup(); err2 != nil { + logrus.Errorf("error cleaning up container %s: %v", c.ID(), err2) + } + } + }() + + if c.state.State == ContainerStateStopped { + // Reinitialize the container if we need to + if err := c.reinit(ctx); err != nil { + return err + } + } else if c.state.State == ContainerStateConfigured { + // Initialize the container if it has never been initialized + if err := c.init(ctx); err != nil { + return err + } + } + + return c.start() +} + // mountStorage sets up the container's root filesystem // It mounts the image and any other requested mounts // TODO: Add ability to override mount label so we can use this for Mount() too diff --git a/libpod/pod.go b/libpod/pod.go index a5b87f8b5..7cb96a270 100644 --- a/libpod/pod.go +++ b/libpod/pod.go @@ -212,15 +212,15 @@ func (p *Pod) Start(ctx context.Context) (map[string]error, error) { // Traverse the graph beginning at nodes with no dependencies for _, node := range graph.noDepNodes { - startNode(ctx, node, false, ctrErrors, ctrsVisited) + startNode(ctx, node, false, ctrErrors, ctrsVisited, false) } return ctrErrors, nil } // Visit a node on a container graph and start the container, or set an error if -// a dependency failed to start -func startNode(ctx context.Context, node *containerNode, setError bool, ctrErrors map[string]error, ctrsVisited map[string]bool) { +// a dependency failed to start. if restart is true, startNode will restart the node instead of starting it. +func startNode(ctx context.Context, node *containerNode, setError bool, ctrErrors map[string]error, ctrsVisited map[string]bool, restart bool) { // First, check if we have already visited the node if ctrsVisited[node.id] { return @@ -235,7 +235,7 @@ func startNode(ctx context.Context, node *containerNode, setError bool, ctrError // Hit anyone who depends on us, and set errors on them too for _, successor := range node.dependedOn { - startNode(ctx, successor, true, ctrErrors, ctrsVisited) + startNode(ctx, successor, true, ctrErrors, ctrsVisited, restart) } return @@ -287,10 +287,18 @@ func startNode(ctx context.Context, node *containerNode, setError bool, ctrError } // Start the container (only if it is not running) - if !ctrErrored && node.container.state.State != ContainerStateRunning { - if err := node.container.initAndStart(ctx); err != nil { - ctrErrored = true - ctrErrors[node.id] = err + if !ctrErrored { + if !restart && node.container.state.State != ContainerStateRunning { + if err := node.container.initAndStart(ctx); err != nil { + ctrErrored = true + ctrErrors[node.id] = err + } + } + if restart && node.container.state.State != ContainerStatePaused && node.container.state.State != ContainerStateUnknown { + if err := node.container.restartWithTimeout(ctx, node.container.config.StopTimeout); err != nil { + ctrErrored = true + ctrErrors[node.id] = err + } } } @@ -298,7 +306,7 @@ func startNode(ctx context.Context, node *containerNode, setError bool, ctrError // Recurse to anyone who depends on us and start them for _, successor := range node.dependedOn { - startNode(ctx, successor, ctrErrored, ctrErrors, ctrsVisited) + startNode(ctx, successor, ctrErrored, ctrErrors, ctrsVisited, restart) } return @@ -375,6 +383,176 @@ func (p *Pod) Stop(cleanup bool) (map[string]error, error) { return nil, nil } +// Restart restarts all containers within a pod that are not paused or in an error state. +// It combines the effects of Stop() and Start() on a container +// Each container will use its own stop timeout. +// All containers are started independently, in order dictated by their +// dependencies. An error restarting one container +// will not prevent other containers being restarted. +// An error and a map[string]error are returned +// If the error is not nil and the map is nil, an error was encountered before +// any containers were restarted +// If map is not nil, an error was encountered when restarting one or more +// containers. The container ID is mapped to the error encountered. The error is +// set to ErrCtrExists +// If both error and the map are nil, all containers were restarted without error +func (p *Pod) Restart(ctx context.Context) (map[string]error, error) { + p.lock.Lock() + defer p.lock.Unlock() + + if !p.valid { + return nil, ErrPodRemoved + } + allCtrs, err := p.runtime.state.PodContainers(p) + if err != nil { + return nil, err + } + + // Build a dependency graph of containers in the pod + graph, err := buildContainerGraph(allCtrs) + if err != nil { + return nil, errors.Wrapf(err, "error generating dependency graph for pod %s", p.ID()) + } + + ctrErrors := make(map[string]error) + ctrsVisited := make(map[string]bool) + + // If there are no containers without dependencies, we can't start + // Error out + if len(graph.noDepNodes) == 0 { + return nil, errors.Wrapf(ErrNoSuchCtr, "no containers in pod %s have no dependencies, cannot start pod", p.ID()) + } + + // Traverse the graph beginning at nodes with no dependencies + for _, node := range graph.noDepNodes { + startNode(ctx, node, false, ctrErrors, ctrsVisited, true) + } + + if len(ctrErrors) > 0 { + return ctrErrors, errors.Wrapf(ErrCtrExists, "error stopping some containers") + } + + return nil, nil +} + +// Pause pauses all containers within a pod that are running. +// Only running containers will be paused. Paused, stopped, or created +// containers will be ignored. +// All containers are paused independently. An error pausing one container +// will not prevent other containers being paused. +// An error and a map[string]error are returned +// If the error is not nil and the map is nil, an error was encountered before +// any containers were paused +// If map is not nil, an error was encountered when pausing one or more +// containers. The container ID is mapped to the error encountered. The error is +// set to ErrCtrExists +// If both error and the map are nil, all containers were paused without error +func (p *Pod) Pause() (map[string]error, error) { + p.lock.Lock() + defer p.lock.Unlock() + + if !p.valid { + return nil, ErrPodRemoved + } + + allCtrs, err := p.runtime.state.PodContainers(p) + if err != nil { + return nil, err + } + + ctrErrors := make(map[string]error) + + // Pause to all containers + for _, ctr := range allCtrs { + ctr.lock.Lock() + + if err := ctr.syncContainer(); err != nil { + ctr.lock.Unlock() + ctrErrors[ctr.ID()] = err + continue + } + + // Ignore containers that are not running + if ctr.state.State != ContainerStateRunning { + ctr.lock.Unlock() + continue + } + + if err := ctr.pause(); err != nil { + ctr.lock.Unlock() + ctrErrors[ctr.ID()] = err + continue + } + + ctr.lock.Unlock() + } + + if len(ctrErrors) > 0 { + return ctrErrors, errors.Wrapf(ErrCtrExists, "error pausing some containers") + } + + return nil, nil +} + +// Unpause unpauses all containers within a pod that are running. +// Only paused containers will be unpaused. Running, stopped, or created +// containers will be ignored. +// All containers are unpaused independently. An error unpausing one container +// will not prevent other containers being unpaused. +// An error and a map[string]error are returned +// If the error is not nil and the map is nil, an error was encountered before +// any containers were unpaused +// If map is not nil, an error was encountered when unpausing one or more +// containers. The container ID is mapped to the error encountered. The error is +// set to ErrCtrExists +// If both error and the map are nil, all containers were unpaused without error +func (p *Pod) Unpause() (map[string]error, error) { + p.lock.Lock() + defer p.lock.Unlock() + + if !p.valid { + return nil, ErrPodRemoved + } + + allCtrs, err := p.runtime.state.PodContainers(p) + if err != nil { + return nil, err + } + + ctrErrors := make(map[string]error) + + // Pause to all containers + for _, ctr := range allCtrs { + ctr.lock.Lock() + + if err := ctr.syncContainer(); err != nil { + ctr.lock.Unlock() + ctrErrors[ctr.ID()] = err + continue + } + + // Ignore containers that are not paused + if ctr.state.State != ContainerStatePaused { + ctr.lock.Unlock() + continue + } + + if err := ctr.unpause(); err != nil { + ctr.lock.Unlock() + ctrErrors[ctr.ID()] = err + continue + } + + ctr.lock.Unlock() + } + + if len(ctrErrors) > 0 { + return ctrErrors, errors.Wrapf(ErrCtrExists, "error unpausing some containers") + } + + return nil, nil +} + // Kill sends a signal to all running containers within a pod // Signals will only be sent to running containers. Containers that are not // running will be ignored. All signals are sent independently, and sending will -- cgit v1.2.3-54-g00ecf