summaryrefslogtreecommitdiff
path: root/libpod
diff options
context:
space:
mode:
authorPeter Hunt <pehunt@redhat.com>2019-02-15 16:39:24 -0500
committerPeter Hunt <pehunt@redhat.com>2019-02-15 16:39:24 -0500
commit81804fc4641d279fec8f9bf48b21b22fc90cb891 (patch)
treeb997e2f6e703b722cc955d14b26937fcecf45828 /libpod
parent0a521e139faa0c7f1d9b6c9e647c350c1b7c4e04 (diff)
downloadpodman-81804fc4641d279fec8f9bf48b21b22fc90cb891.tar.gz
podman-81804fc4641d279fec8f9bf48b21b22fc90cb891.tar.bz2
podman-81804fc4641d279fec8f9bf48b21b22fc90cb891.zip
pod infra container is started before a container in a pod is run, started, or attached.
Prior, a pod would have to be started immediately when created, leading to confusion about what a pod state should be immediately after creation. The problem was podman run --pod ... would error out if the infra container wasn't started (as it is a dependency). Fix this by allowing for recursive start, where each of the container's dependencies are started prior to the new container. This is only applied to the case where a new container is attached to a pod. Also rework container_api Start, StartAndAttach, and Init functions, as there was some duplicated code, which made addressing the problem easier to fix. Signed-off-by: Peter Hunt <pehunt@redhat.com>
Diffstat (limited to 'libpod')
-rw-r--r--libpod/container_api.go167
-rw-r--r--libpod/container_graph.go97
-rw-r--r--libpod/container_internal.go143
-rw-r--r--libpod/pod_internal.go96
-rw-r--r--libpod/runtime_pod_linux.go9
5 files changed, 279 insertions, 233 deletions
diff --git a/libpod/container_api.go b/libpod/container_api.go
index 149867759..abe0df610 100644
--- a/libpod/container_api.go
+++ b/libpod/container_api.go
@@ -7,7 +7,6 @@ import (
"io/ioutil"
"os"
"strconv"
- "strings"
"time"
"github.com/containers/libpod/libpod/driver"
@@ -38,24 +37,15 @@ func (c *Container) Init(ctx context.Context) (err error) {
return errors.Wrapf(ErrCtrExists, "container %s has already been created in runtime", c.ID())
}
- notRunning, err := c.checkDependenciesRunning()
- if err != nil {
- return errors.Wrapf(err, "error checking dependencies for container %s", c.ID())
- }
- if len(notRunning) > 0 {
- depString := strings.Join(notRunning, ",")
- return errors.Wrapf(ErrCtrStateInvalid, "some dependencies of container %s are not started: %s", c.ID(), depString)
+ // don't recursively start
+ if err := c.checkDependenciesAndHandleError(ctx); err != nil {
+ return err
}
- defer func() {
- if err != nil {
- if err2 := c.cleanup(ctx); err2 != nil {
- logrus.Errorf("error cleaning up container %s: %v", c.ID(), err2)
- }
- }
- }()
-
if err := c.prepare(); err != nil {
+ if err2 := c.cleanup(ctx); err2 != nil {
+ logrus.Errorf("error cleaning up container %s: %v", c.ID(), err2)
+ }
return err
}
@@ -68,13 +58,14 @@ func (c *Container) Init(ctx context.Context) (err error) {
return c.init(ctx)
}
-// Start starts a container
-// Start can start configured, created or stopped containers
+// Start starts a container.
+// Start can start configured, created or stopped containers.
// For configured containers, the container will be initialized first, then
-// started
+// started.
// Stopped containers will be deleted and re-created in runc, undergoing a fresh
-// Init()
-func (c *Container) Start(ctx context.Context) (err error) {
+// Init().
+// If recursive is set, Start will also start all containers this container depends on.
+func (c *Container) Start(ctx context.Context, recursive bool) (err error) {
if !c.batched {
c.lock.Lock()
defer c.lock.Unlock()
@@ -83,64 +74,26 @@ func (c *Container) Start(ctx context.Context) (err error) {
return err
}
}
-
- // Container must be created or stopped to be started
- if !(c.state.State == ContainerStateConfigured ||
- c.state.State == ContainerStateCreated ||
- c.state.State == ContainerStateStopped ||
- c.state.State == ContainerStateExited) {
- return errors.Wrapf(ErrCtrStateInvalid, "container %s must be in Created or Stopped state to be started", c.ID())
- }
-
- notRunning, err := c.checkDependenciesRunning()
- if err != nil {
- return errors.Wrapf(err, "error checking dependencies for container %s", c.ID())
- }
- if len(notRunning) > 0 {
- depString := strings.Join(notRunning, ",")
- return errors.Wrapf(ErrCtrStateInvalid, "some dependencies of container %s are not started: %s", c.ID(), depString)
- }
-
- defer func() {
- if err != nil {
- if err2 := c.cleanup(ctx); err2 != nil {
- logrus.Errorf("error cleaning up container %s: %v", c.ID(), err2)
- }
- }
- }()
-
- if err := c.prepare(); err != nil {
+ if err := c.prepareToStart(ctx, recursive); err != nil {
return err
}
- if c.state.State == ContainerStateStopped {
- // Reinitialize the container if we need to
- if err := c.reinit(ctx); err != nil {
- return err
- }
- } else if c.state.State == ContainerStateConfigured ||
- c.state.State == ContainerStateExited {
- // Or initialize it if necessary
- if err := c.init(ctx); err != nil {
- return err
- }
- }
-
// Start the container
return c.start()
}
-// StartAndAttach starts a container and attaches to it
-// StartAndAttach can start configured, created or stopped containers
+// StartAndAttach starts a container and attaches to it.
+// StartAndAttach can start configured, created or stopped containers.
// For configured containers, the container will be initialized first, then
-// started
+// started.
// Stopped containers will be deleted and re-created in runc, undergoing a fresh
-// Init()
+// Init().
// If successful, an error channel will be returned containing the result of the
// attach call.
// The channel will be closed automatically after the result of attach has been
-// sent
-func (c *Container) StartAndAttach(ctx context.Context, streams *AttachStreams, keys string, resize <-chan remotecommand.TerminalSize) (attachResChan <-chan error, err error) {
+// sent.
+// If recursive is set, StartAndAttach will also start all containers this container depends on.
+func (c *Container) StartAndAttach(ctx context.Context, streams *AttachStreams, keys string, resize <-chan remotecommand.TerminalSize, recursive bool) (attachResChan <-chan error, err error) {
if !c.batched {
c.lock.Lock()
defer c.lock.Unlock()
@@ -150,48 +103,10 @@ func (c *Container) StartAndAttach(ctx context.Context, streams *AttachStreams,
}
}
- // Container must be created or stopped to be started
- if !(c.state.State == ContainerStateConfigured ||
- c.state.State == ContainerStateCreated ||
- c.state.State == ContainerStateStopped ||
- c.state.State == ContainerStateExited) {
- return nil, errors.Wrapf(ErrCtrStateInvalid, "container %s must be in Created or Stopped state to be started", c.ID())
- }
-
- notRunning, err := c.checkDependenciesRunning()
- if err != nil {
- return nil, errors.Wrapf(err, "error checking dependencies for container %s", c.ID())
- }
- if len(notRunning) > 0 {
- depString := strings.Join(notRunning, ",")
- return nil, errors.Wrapf(ErrCtrStateInvalid, "some dependencies of container %s are not started: %s", c.ID(), depString)
- }
-
- defer func() {
- if err != nil {
- if err2 := c.cleanup(ctx); err2 != nil {
- logrus.Errorf("error cleaning up container %s: %v", c.ID(), err2)
- }
- }
- }()
-
- if err := c.prepare(); err != nil {
+ if err := c.prepareToStart(ctx, recursive); err != nil {
return nil, err
}
- if c.state.State == ContainerStateStopped {
- // Reinitialize the container if we need to
- if err := c.reinit(ctx); err != nil {
- return nil, err
- }
- } else if c.state.State == ContainerStateConfigured ||
- c.state.State == ContainerStateExited {
- // Or initialize it if necessary
- if err := c.init(ctx); err != nil {
- return nil, err
- }
- }
-
attachChan := make(chan error)
// Attach to the container before starting it
@@ -205,6 +120,24 @@ func (c *Container) StartAndAttach(ctx context.Context, streams *AttachStreams,
return attachChan, nil
}
+// RestartWithTimeout restarts a running container and takes a given timeout in uint
+func (c *Container) RestartWithTimeout(ctx context.Context, timeout uint) (err error) {
+ if !c.batched {
+ c.lock.Lock()
+ defer c.lock.Unlock()
+
+ if err := c.syncContainer(); err != nil {
+ return err
+ }
+ }
+
+ if err = c.checkDependenciesAndHandleError(ctx); err != nil {
+ return err
+ }
+
+ return c.restartWithTimeout(ctx, timeout)
+}
+
// Stop uses the container's stop signal (or SIGTERM if no signal was specified)
// to stop the container, and if it has not stopped after container's stop
// timeout, SIGKILL is used to attempt to forcibly stop the container
@@ -730,28 +663,6 @@ func (c *Container) Sync() error {
return nil
}
-// RestartWithTimeout restarts a running container and takes a given timeout in uint
-func (c *Container) RestartWithTimeout(ctx context.Context, timeout uint) (err error) {
- if !c.batched {
- c.lock.Lock()
- defer c.lock.Unlock()
-
- if err := c.syncContainer(); err != nil {
- return err
- }
- }
-
- notRunning, err := c.checkDependenciesRunning()
- if err != nil {
- return errors.Wrapf(err, "error checking dependencies for container %s", c.ID())
- }
- if len(notRunning) > 0 {
- depString := strings.Join(notRunning, ",")
- return errors.Wrapf(ErrCtrStateInvalid, "some dependencies of container %s are not started: %s", c.ID(), depString)
- }
- return c.restartWithTimeout(ctx, timeout)
-}
-
// Refresh refreshes a container's state in the database, restarting the
// container if it is running
func (c *Container) Refresh(ctx context.Context) error {
diff --git a/libpod/container_graph.go b/libpod/container_graph.go
index 44a1f1736..da93be77d 100644
--- a/libpod/container_graph.go
+++ b/libpod/container_graph.go
@@ -1,6 +1,9 @@
package libpod
import (
+ "context"
+ "strings"
+
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
@@ -169,3 +172,97 @@ func detectCycles(graph *containerGraph) (bool, error) {
return false, nil
}
+
+// Visit a node on a container graph and start the container, or set an error if
+// a dependency failed to start. if restart is true, startNode will restart the node instead of starting it.
+func startNode(ctx context.Context, node *containerNode, setError bool, ctrErrors map[string]error, ctrsVisited map[string]bool, restart bool) {
+ // First, check if we have already visited the node
+ if ctrsVisited[node.id] {
+ return
+ }
+
+ // If setError is true, a dependency of us failed
+ // Mark us as failed and recurse
+ if setError {
+ // Mark us as visited, and set an error
+ ctrsVisited[node.id] = true
+ ctrErrors[node.id] = errors.Wrapf(ErrCtrStateInvalid, "a dependency of container %s failed to start", node.id)
+
+ // Hit anyone who depends on us, and set errors on them too
+ for _, successor := range node.dependedOn {
+ startNode(ctx, successor, true, ctrErrors, ctrsVisited, restart)
+ }
+
+ return
+ }
+
+ // Have all our dependencies started?
+ // If not, don't visit the node yet
+ depsVisited := true
+ for _, dep := range node.dependsOn {
+ depsVisited = depsVisited && ctrsVisited[dep.id]
+ }
+ if !depsVisited {
+ // Don't visit us yet, all dependencies are not up
+ // We'll hit the dependencies eventually, and when we do it will
+ // recurse here
+ return
+ }
+
+ // Going to try to start the container, mark us as visited
+ ctrsVisited[node.id] = true
+
+ ctrErrored := false
+
+ // Check if dependencies are running
+ // Graph traversal means we should have started them
+ // But they could have died before we got here
+ // Does not require that the container be locked, we only need to lock
+ // the dependencies
+ depsStopped, err := node.container.checkDependenciesRunning()
+ if err != nil {
+ ctrErrors[node.id] = err
+ ctrErrored = true
+ } else if len(depsStopped) > 0 {
+ // Our dependencies are not running
+ depsList := strings.Join(depsStopped, ",")
+ ctrErrors[node.id] = errors.Wrapf(ErrCtrStateInvalid, "the following dependencies of container %s are not running: %s", node.id, depsList)
+ ctrErrored = true
+ }
+
+ // Lock before we start
+ node.container.lock.Lock()
+
+ // Sync the container to pick up current state
+ if !ctrErrored {
+ if err := node.container.syncContainer(); err != nil {
+ ctrErrored = true
+ ctrErrors[node.id] = err
+ }
+ }
+
+ // Start the container (only if it is not running)
+ if !ctrErrored {
+ if !restart && node.container.state.State != ContainerStateRunning {
+ if err := node.container.initAndStart(ctx); err != nil {
+ ctrErrored = true
+ ctrErrors[node.id] = err
+ }
+ }
+ if restart && node.container.state.State != ContainerStatePaused && node.container.state.State != ContainerStateUnknown {
+ if err := node.container.restartWithTimeout(ctx, node.container.config.StopTimeout); err != nil {
+ ctrErrored = true
+ ctrErrors[node.id] = err
+ }
+ }
+ }
+
+ node.container.lock.Unlock()
+
+ // Recurse to anyone who depends on us and start them
+ for _, successor := range node.dependedOn {
+ startNode(ctx, successor, ctrErrored, ctrErrors, ctrsVisited, restart)
+ }
+
+ return
+}
diff --git a/libpod/container_internal.go b/libpod/container_internal.go
index b2ebad777..a54613443 100644
--- a/libpod/container_internal.go
+++ b/libpod/container_internal.go
@@ -559,6 +559,149 @@ func (c *Container) save() error {
return nil
}
+// Checks the container is in the right state, then initializes the container in preparation to start the container.
+// If recursive is true, each of the containers dependencies will be started.
+// Otherwise, this function will return with error if there are dependencies of this container that aren't running.
+func (c *Container) prepareToStart(ctx context.Context, recursive bool) (err error) {
+ // Container must be created or stopped to be started
+ if !(c.state.State == ContainerStateConfigured ||
+ c.state.State == ContainerStateCreated ||
+ c.state.State == ContainerStateStopped ||
+ c.state.State == ContainerStateExited) {
+ return errors.Wrapf(ErrCtrStateInvalid, "container %s must be in Created or Stopped state to be started", c.ID())
+ }
+
+ if !recursive {
+ if err := c.checkDependenciesAndHandleError(ctx); err != nil {
+ return err
+ }
+ } else {
+ if err := c.startDependencies(ctx); err != nil {
+ return err
+ }
+ }
+
+ defer func() {
+ if err != nil {
+ if err2 := c.cleanup(ctx); err2 != nil {
+ logrus.Errorf("error cleaning up container %s: %v", c.ID(), err2)
+ }
+ }
+ }()
+
+ if err := c.prepare(); err != nil {
+ return err
+ }
+
+ if c.state.State == ContainerStateStopped {
+ // Reinitialize the container if we need to
+ if err := c.reinit(ctx); err != nil {
+ return err
+ }
+ } else if c.state.State == ContainerStateConfigured ||
+ c.state.State == ContainerStateExited {
+ // Or initialize it if necessary
+ if err := c.init(ctx); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// checks dependencies are running and prints a helpful message
+func (c *Container) checkDependenciesAndHandleError(ctx context.Context) error {
+ notRunning, err := c.checkDependenciesRunning()
+ if err != nil {
+ return errors.Wrapf(err, "error checking dependencies for container %s", c.ID())
+ }
+ if len(notRunning) > 0 {
+ depString := strings.Join(notRunning, ",")
+ return errors.Wrapf(ErrCtrStateInvalid, "some dependencies of container %s are not started: %s", c.ID(), depString)
+ }
+
+ return nil
+}
+
+// Recursively start all dependencies of a container so the container can be started.
+func (c *Container) startDependencies(ctx context.Context) error {
+ depCtrIDs := c.Dependencies()
+ if len(depCtrIDs) == 0 {
+ return nil
+ }
+
+ depVisitedCtrs := make(map[string]*Container)
+ if err := c.getAllDependencies(depVisitedCtrs); err != nil {
+ return errors.Wrapf(err, "error starting dependency for container %s", c.ID())
+ }
+
+ // Because of how Go handles passing slices through functions, a slice cannot grow between function calls
+ // without clunky syntax. Circumnavigate this by translating the map to a slice for buildContainerGraph
+ depCtrs := make([]*Container, 0)
+ for _, ctr := range depVisitedCtrs {
+ depCtrs = append(depCtrs, ctr)
+ }
+
+ // Build a dependency graph of containers
+ graph, err := buildContainerGraph(depCtrs)
+ if err != nil {
+ return errors.Wrapf(err, "error generating dependency graph for container %s", c.ID())
+ }
+
+ ctrErrors := make(map[string]error)
+ // reset ctrsVisisted for next round of recursion
+ ctrsVisited := make(map[string]bool)
+
+ // If there are no containers without dependencies, we can't start
+ // Error out
+ if len(graph.noDepNodes) == 0 {
+ return errors.Wrapf(ErrNoSuchCtr, "All dependencies have dependencies of %s", c.ID())
+ }
+
+ // Traverse the graph beginning at nodes with no dependencies
+ for _, node := range graph.noDepNodes {
+ startNode(ctx, node, false, ctrErrors, ctrsVisited, true)
+ }
+
+ if len(ctrErrors) > 0 {
+ logrus.Errorf("error starting some container dependencies")
+ for _, e := range ctrErrors {
+ logrus.Errorf("%q", e)
+ }
+ return errors.Wrapf(ErrInternal, "error starting some containers")
+ }
+ return nil
+}
+
+// getAllDependencies is a precursor to starting dependencies.
+// To start a container with all of its dependencies, we need to recursively find all dependencies
+// a container has, as well as each of those containers' dependencies, and so on
+// To do so, keep track of containers already visisted (so there aren't redundant state lookups),
+// and recursively search until we have reached the leafs of every dependency node.
+// Since we need to start all dependencies for our original container to successfully start, we propegate any errors
+// in looking up dependencies.
+// Note: this function is currently meant as a robust solution to a narrow problem: start an infra-container when
+// a container in the pod is run. It has not been tested for performance past one level, so expansion of recursive start
+// must be tested first.
+func (c *Container) getAllDependencies(visited map[string]*Container) error {
+ depIDs := c.Dependencies()
+ if len(depIDs) == 0 {
+ return nil
+ }
+ for _, depID := range depIDs {
+ if _, ok := visited[depID]; !ok {
+ dep, err := c.runtime.state.LookupContainer(depID)
+ if err != nil {
+ return err
+ }
+ visited[depID] = dep
+ if err := dep.getAllDependencies(visited); err != nil {
+ return err
+ }
+ }
+ }
+ return nil
+}
+
// Check if a container's dependencies are running
// Returns a []string containing the IDs of dependencies that are not running
func (c *Container) checkDependenciesRunning() ([]string, error) {
diff --git a/libpod/pod_internal.go b/libpod/pod_internal.go
index 0f1f115e8..25e4e77d7 100644
--- a/libpod/pod_internal.go
+++ b/libpod/pod_internal.go
@@ -1,10 +1,8 @@
package libpod
import (
- "context"
"fmt"
"path/filepath"
- "strings"
"time"
"github.com/containers/storage/pkg/stringid"
@@ -85,97 +83,3 @@ func (p *Pod) refresh() error {
// Save changes
return p.save()
}
-
-// Visit a node on a container graph and start the container, or set an error if
-// a dependency failed to start. if restart is true, startNode will restart the node instead of starting it.
-func startNode(ctx context.Context, node *containerNode, setError bool, ctrErrors map[string]error, ctrsVisited map[string]bool, restart bool) {
- // First, check if we have already visited the node
- if ctrsVisited[node.id] {
- return
- }
-
- // If setError is true, a dependency of us failed
- // Mark us as failed and recurse
- if setError {
- // Mark us as visited, and set an error
- ctrsVisited[node.id] = true
- ctrErrors[node.id] = errors.Wrapf(ErrCtrStateInvalid, "a dependency of container %s failed to start", node.id)
-
- // Hit anyone who depends on us, and set errors on them too
- for _, successor := range node.dependedOn {
- startNode(ctx, successor, true, ctrErrors, ctrsVisited, restart)
- }
-
- return
- }
-
- // Have all our dependencies started?
- // If not, don't visit the node yet
- depsVisited := true
- for _, dep := range node.dependsOn {
- depsVisited = depsVisited && ctrsVisited[dep.id]
- }
- if !depsVisited {
- // Don't visit us yet, all dependencies are not up
- // We'll hit the dependencies eventually, and when we do it will
- // recurse here
- return
- }
-
- // Going to try to start the container, mark us as visited
- ctrsVisited[node.id] = true
-
- ctrErrored := false
-
- // Check if dependencies are running
- // Graph traversal means we should have started them
- // But they could have died before we got here
- // Does not require that the container be locked, we only need to lock
- // the dependencies
- depsStopped, err := node.container.checkDependenciesRunning()
- if err != nil {
- ctrErrors[node.id] = err
- ctrErrored = true
- } else if len(depsStopped) > 0 {
- // Our dependencies are not running
- depsList := strings.Join(depsStopped, ",")
- ctrErrors[node.id] = errors.Wrapf(ErrCtrStateInvalid, "the following dependencies of container %s are not running: %s", node.id, depsList)
- ctrErrored = true
- }
-
- // Lock before we start
- node.container.lock.Lock()
-
- // Sync the container to pick up current state
- if !ctrErrored {
- if err := node.container.syncContainer(); err != nil {
- ctrErrored = true
- ctrErrors[node.id] = err
- }
- }
-
- // Start the container (only if it is not running)
- if !ctrErrored {
- if !restart && node.container.state.State != ContainerStateRunning {
- if err := node.container.initAndStart(ctx); err != nil {
- ctrErrored = true
- ctrErrors[node.id] = err
- }
- }
- if restart && node.container.state.State != ContainerStatePaused && node.container.state.State != ContainerStateUnknown {
- if err := node.container.restartWithTimeout(ctx, node.container.config.StopTimeout); err != nil {
- ctrErrored = true
- ctrErrors[node.id] = err
- }
- }
- }
-
- node.container.lock.Unlock()
-
- // Recurse to anyone who depends on us and start them
- for _, successor := range node.dependedOn {
- startNode(ctx, successor, ctrErrored, ctrErrors, ctrsVisited, restart)
- }
-
- return
-}
diff --git a/libpod/runtime_pod_linux.go b/libpod/runtime_pod_linux.go
index c6d497c0c..c378d18e4 100644
--- a/libpod/runtime_pod_linux.go
+++ b/libpod/runtime_pod_linux.go
@@ -117,15 +117,6 @@ func (r *Runtime) NewPod(ctx context.Context, options ...PodCreateOption) (*Pod,
if err := pod.save(); err != nil {
return nil, err
}
-
- // Once the pod infra container has been created, we start it
- if err := ctr.Start(ctx); err != nil {
- // If the infra container does not start, we need to tear the pod down.
- if err2 := r.removePod(ctx, pod, true, true); err2 != nil {
- logrus.Errorf("Error removing pod after infra container failed to start: %v", err2)
- }
- return nil, errors.Wrapf(err, "error starting Infra Container")
- }
}
return pod, nil