diff options
author | Peter Hunt <pehunt@redhat.com> | 2019-02-15 16:39:24 -0500 |
---|---|---|
committer | Peter Hunt <pehunt@redhat.com> | 2019-02-15 16:39:24 -0500 |
commit | 81804fc4641d279fec8f9bf48b21b22fc90cb891 (patch) | |
tree | b997e2f6e703b722cc955d14b26937fcecf45828 /libpod | |
parent | 0a521e139faa0c7f1d9b6c9e647c350c1b7c4e04 (diff) | |
download | podman-81804fc4641d279fec8f9bf48b21b22fc90cb891.tar.gz podman-81804fc4641d279fec8f9bf48b21b22fc90cb891.tar.bz2 podman-81804fc4641d279fec8f9bf48b21b22fc90cb891.zip |
pod infra container is started before a container in a pod is run, started, or attached.
Prior, a pod would have to be started immediately when created, leading to confusion about what a pod state should be immediately after creation. The problem was podman run --pod ... would error out if the infra container wasn't started (as it is a dependency). Fix this by allowing for recursive start, where each of the container's dependencies are started prior to the new container. This is only applied to the case where a new container is attached to a pod.
Also rework container_api Start, StartAndAttach, and Init functions, as there was some duplicated code, which made addressing the problem easier to fix.
Signed-off-by: Peter Hunt <pehunt@redhat.com>
Diffstat (limited to 'libpod')
-rw-r--r-- | libpod/container_api.go | 167 | ||||
-rw-r--r-- | libpod/container_graph.go | 97 | ||||
-rw-r--r-- | libpod/container_internal.go | 143 | ||||
-rw-r--r-- | libpod/pod_internal.go | 96 | ||||
-rw-r--r-- | libpod/runtime_pod_linux.go | 9 |
5 files changed, 279 insertions, 233 deletions
diff --git a/libpod/container_api.go b/libpod/container_api.go index 149867759..abe0df610 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -7,7 +7,6 @@ import ( "io/ioutil" "os" "strconv" - "strings" "time" "github.com/containers/libpod/libpod/driver" @@ -38,24 +37,15 @@ func (c *Container) Init(ctx context.Context) (err error) { return errors.Wrapf(ErrCtrExists, "container %s has already been created in runtime", c.ID()) } - notRunning, err := c.checkDependenciesRunning() - if err != nil { - return errors.Wrapf(err, "error checking dependencies for container %s", c.ID()) - } - if len(notRunning) > 0 { - depString := strings.Join(notRunning, ",") - return errors.Wrapf(ErrCtrStateInvalid, "some dependencies of container %s are not started: %s", c.ID(), depString) + // don't recursively start + if err := c.checkDependenciesAndHandleError(ctx); err != nil { + return err } - defer func() { - if err != nil { - if err2 := c.cleanup(ctx); err2 != nil { - logrus.Errorf("error cleaning up container %s: %v", c.ID(), err2) - } - } - }() - if err := c.prepare(); err != nil { + if err2 := c.cleanup(ctx); err2 != nil { + logrus.Errorf("error cleaning up container %s: %v", c.ID(), err2) + } return err } @@ -68,13 +58,14 @@ func (c *Container) Init(ctx context.Context) (err error) { return c.init(ctx) } -// Start starts a container -// Start can start configured, created or stopped containers +// Start starts a container. +// Start can start configured, created or stopped containers. // For configured containers, the container will be initialized first, then -// started +// started. // Stopped containers will be deleted and re-created in runc, undergoing a fresh -// Init() -func (c *Container) Start(ctx context.Context) (err error) { +// Init(). +// If recursive is set, Start will also start all containers this container depends on. +func (c *Container) Start(ctx context.Context, recursive bool) (err error) { if !c.batched { c.lock.Lock() defer c.lock.Unlock() @@ -83,64 +74,26 @@ func (c *Container) Start(ctx context.Context) (err error) { return err } } - - // Container must be created or stopped to be started - if !(c.state.State == ContainerStateConfigured || - c.state.State == ContainerStateCreated || - c.state.State == ContainerStateStopped || - c.state.State == ContainerStateExited) { - return errors.Wrapf(ErrCtrStateInvalid, "container %s must be in Created or Stopped state to be started", c.ID()) - } - - notRunning, err := c.checkDependenciesRunning() - if err != nil { - return errors.Wrapf(err, "error checking dependencies for container %s", c.ID()) - } - if len(notRunning) > 0 { - depString := strings.Join(notRunning, ",") - return errors.Wrapf(ErrCtrStateInvalid, "some dependencies of container %s are not started: %s", c.ID(), depString) - } - - defer func() { - if err != nil { - if err2 := c.cleanup(ctx); err2 != nil { - logrus.Errorf("error cleaning up container %s: %v", c.ID(), err2) - } - } - }() - - if err := c.prepare(); err != nil { + if err := c.prepareToStart(ctx, recursive); err != nil { return err } - if c.state.State == ContainerStateStopped { - // Reinitialize the container if we need to - if err := c.reinit(ctx); err != nil { - return err - } - } else if c.state.State == ContainerStateConfigured || - c.state.State == ContainerStateExited { - // Or initialize it if necessary - if err := c.init(ctx); err != nil { - return err - } - } - // Start the container return c.start() } -// StartAndAttach starts a container and attaches to it -// StartAndAttach can start configured, created or stopped containers +// StartAndAttach starts a container and attaches to it. +// StartAndAttach can start configured, created or stopped containers. // For configured containers, the container will be initialized first, then -// started +// started. // Stopped containers will be deleted and re-created in runc, undergoing a fresh -// Init() +// Init(). // If successful, an error channel will be returned containing the result of the // attach call. // The channel will be closed automatically after the result of attach has been -// sent -func (c *Container) StartAndAttach(ctx context.Context, streams *AttachStreams, keys string, resize <-chan remotecommand.TerminalSize) (attachResChan <-chan error, err error) { +// sent. +// If recursive is set, StartAndAttach will also start all containers this container depends on. +func (c *Container) StartAndAttach(ctx context.Context, streams *AttachStreams, keys string, resize <-chan remotecommand.TerminalSize, recursive bool) (attachResChan <-chan error, err error) { if !c.batched { c.lock.Lock() defer c.lock.Unlock() @@ -150,48 +103,10 @@ func (c *Container) StartAndAttach(ctx context.Context, streams *AttachStreams, } } - // Container must be created or stopped to be started - if !(c.state.State == ContainerStateConfigured || - c.state.State == ContainerStateCreated || - c.state.State == ContainerStateStopped || - c.state.State == ContainerStateExited) { - return nil, errors.Wrapf(ErrCtrStateInvalid, "container %s must be in Created or Stopped state to be started", c.ID()) - } - - notRunning, err := c.checkDependenciesRunning() - if err != nil { - return nil, errors.Wrapf(err, "error checking dependencies for container %s", c.ID()) - } - if len(notRunning) > 0 { - depString := strings.Join(notRunning, ",") - return nil, errors.Wrapf(ErrCtrStateInvalid, "some dependencies of container %s are not started: %s", c.ID(), depString) - } - - defer func() { - if err != nil { - if err2 := c.cleanup(ctx); err2 != nil { - logrus.Errorf("error cleaning up container %s: %v", c.ID(), err2) - } - } - }() - - if err := c.prepare(); err != nil { + if err := c.prepareToStart(ctx, recursive); err != nil { return nil, err } - if c.state.State == ContainerStateStopped { - // Reinitialize the container if we need to - if err := c.reinit(ctx); err != nil { - return nil, err - } - } else if c.state.State == ContainerStateConfigured || - c.state.State == ContainerStateExited { - // Or initialize it if necessary - if err := c.init(ctx); err != nil { - return nil, err - } - } - attachChan := make(chan error) // Attach to the container before starting it @@ -205,6 +120,24 @@ func (c *Container) StartAndAttach(ctx context.Context, streams *AttachStreams, return attachChan, nil } +// RestartWithTimeout restarts a running container and takes a given timeout in uint +func (c *Container) RestartWithTimeout(ctx context.Context, timeout uint) (err error) { + if !c.batched { + c.lock.Lock() + defer c.lock.Unlock() + + if err := c.syncContainer(); err != nil { + return err + } + } + + if err = c.checkDependenciesAndHandleError(ctx); err != nil { + return err + } + + return c.restartWithTimeout(ctx, timeout) +} + // Stop uses the container's stop signal (or SIGTERM if no signal was specified) // to stop the container, and if it has not stopped after container's stop // timeout, SIGKILL is used to attempt to forcibly stop the container @@ -730,28 +663,6 @@ func (c *Container) Sync() error { return nil } -// RestartWithTimeout restarts a running container and takes a given timeout in uint -func (c *Container) RestartWithTimeout(ctx context.Context, timeout uint) (err error) { - if !c.batched { - c.lock.Lock() - defer c.lock.Unlock() - - if err := c.syncContainer(); err != nil { - return err - } - } - - notRunning, err := c.checkDependenciesRunning() - if err != nil { - return errors.Wrapf(err, "error checking dependencies for container %s", c.ID()) - } - if len(notRunning) > 0 { - depString := strings.Join(notRunning, ",") - return errors.Wrapf(ErrCtrStateInvalid, "some dependencies of container %s are not started: %s", c.ID(), depString) - } - return c.restartWithTimeout(ctx, timeout) -} - // Refresh refreshes a container's state in the database, restarting the // container if it is running func (c *Container) Refresh(ctx context.Context) error { diff --git a/libpod/container_graph.go b/libpod/container_graph.go index 44a1f1736..da93be77d 100644 --- a/libpod/container_graph.go +++ b/libpod/container_graph.go @@ -1,6 +1,9 @@ package libpod import ( + "context" + "strings" + "github.com/pkg/errors" "github.com/sirupsen/logrus" ) @@ -169,3 +172,97 @@ func detectCycles(graph *containerGraph) (bool, error) { return false, nil } + +// Visit a node on a container graph and start the container, or set an error if +// a dependency failed to start. if restart is true, startNode will restart the node instead of starting it. +func startNode(ctx context.Context, node *containerNode, setError bool, ctrErrors map[string]error, ctrsVisited map[string]bool, restart bool) { + // First, check if we have already visited the node + if ctrsVisited[node.id] { + return + } + + // If setError is true, a dependency of us failed + // Mark us as failed and recurse + if setError { + // Mark us as visited, and set an error + ctrsVisited[node.id] = true + ctrErrors[node.id] = errors.Wrapf(ErrCtrStateInvalid, "a dependency of container %s failed to start", node.id) + + // Hit anyone who depends on us, and set errors on them too + for _, successor := range node.dependedOn { + startNode(ctx, successor, true, ctrErrors, ctrsVisited, restart) + } + + return + } + + // Have all our dependencies started? + // If not, don't visit the node yet + depsVisited := true + for _, dep := range node.dependsOn { + depsVisited = depsVisited && ctrsVisited[dep.id] + } + if !depsVisited { + // Don't visit us yet, all dependencies are not up + // We'll hit the dependencies eventually, and when we do it will + // recurse here + return + } + + // Going to try to start the container, mark us as visited + ctrsVisited[node.id] = true + + ctrErrored := false + + // Check if dependencies are running + // Graph traversal means we should have started them + // But they could have died before we got here + // Does not require that the container be locked, we only need to lock + // the dependencies + depsStopped, err := node.container.checkDependenciesRunning() + if err != nil { + ctrErrors[node.id] = err + ctrErrored = true + } else if len(depsStopped) > 0 { + // Our dependencies are not running + depsList := strings.Join(depsStopped, ",") + ctrErrors[node.id] = errors.Wrapf(ErrCtrStateInvalid, "the following dependencies of container %s are not running: %s", node.id, depsList) + ctrErrored = true + } + + // Lock before we start + node.container.lock.Lock() + + // Sync the container to pick up current state + if !ctrErrored { + if err := node.container.syncContainer(); err != nil { + ctrErrored = true + ctrErrors[node.id] = err + } + } + + // Start the container (only if it is not running) + if !ctrErrored { + if !restart && node.container.state.State != ContainerStateRunning { + if err := node.container.initAndStart(ctx); err != nil { + ctrErrored = true + ctrErrors[node.id] = err + } + } + if restart && node.container.state.State != ContainerStatePaused && node.container.state.State != ContainerStateUnknown { + if err := node.container.restartWithTimeout(ctx, node.container.config.StopTimeout); err != nil { + ctrErrored = true + ctrErrors[node.id] = err + } + } + } + + node.container.lock.Unlock() + + // Recurse to anyone who depends on us and start them + for _, successor := range node.dependedOn { + startNode(ctx, successor, ctrErrored, ctrErrors, ctrsVisited, restart) + } + + return +} diff --git a/libpod/container_internal.go b/libpod/container_internal.go index b2ebad777..a54613443 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -559,6 +559,149 @@ func (c *Container) save() error { return nil } +// Checks the container is in the right state, then initializes the container in preparation to start the container. +// If recursive is true, each of the containers dependencies will be started. +// Otherwise, this function will return with error if there are dependencies of this container that aren't running. +func (c *Container) prepareToStart(ctx context.Context, recursive bool) (err error) { + // Container must be created or stopped to be started + if !(c.state.State == ContainerStateConfigured || + c.state.State == ContainerStateCreated || + c.state.State == ContainerStateStopped || + c.state.State == ContainerStateExited) { + return errors.Wrapf(ErrCtrStateInvalid, "container %s must be in Created or Stopped state to be started", c.ID()) + } + + if !recursive { + if err := c.checkDependenciesAndHandleError(ctx); err != nil { + return err + } + } else { + if err := c.startDependencies(ctx); err != nil { + return err + } + } + + defer func() { + if err != nil { + if err2 := c.cleanup(ctx); err2 != nil { + logrus.Errorf("error cleaning up container %s: %v", c.ID(), err2) + } + } + }() + + if err := c.prepare(); err != nil { + return err + } + + if c.state.State == ContainerStateStopped { + // Reinitialize the container if we need to + if err := c.reinit(ctx); err != nil { + return err + } + } else if c.state.State == ContainerStateConfigured || + c.state.State == ContainerStateExited { + // Or initialize it if necessary + if err := c.init(ctx); err != nil { + return err + } + } + return nil +} + +// checks dependencies are running and prints a helpful message +func (c *Container) checkDependenciesAndHandleError(ctx context.Context) error { + notRunning, err := c.checkDependenciesRunning() + if err != nil { + return errors.Wrapf(err, "error checking dependencies for container %s", c.ID()) + } + if len(notRunning) > 0 { + depString := strings.Join(notRunning, ",") + return errors.Wrapf(ErrCtrStateInvalid, "some dependencies of container %s are not started: %s", c.ID(), depString) + } + + return nil +} + +// Recursively start all dependencies of a container so the container can be started. +func (c *Container) startDependencies(ctx context.Context) error { + depCtrIDs := c.Dependencies() + if len(depCtrIDs) == 0 { + return nil + } + + depVisitedCtrs := make(map[string]*Container) + if err := c.getAllDependencies(depVisitedCtrs); err != nil { + return errors.Wrapf(err, "error starting dependency for container %s", c.ID()) + } + + // Because of how Go handles passing slices through functions, a slice cannot grow between function calls + // without clunky syntax. Circumnavigate this by translating the map to a slice for buildContainerGraph + depCtrs := make([]*Container, 0) + for _, ctr := range depVisitedCtrs { + depCtrs = append(depCtrs, ctr) + } + + // Build a dependency graph of containers + graph, err := buildContainerGraph(depCtrs) + if err != nil { + return errors.Wrapf(err, "error generating dependency graph for container %s", c.ID()) + } + + ctrErrors := make(map[string]error) + // reset ctrsVisisted for next round of recursion + ctrsVisited := make(map[string]bool) + + // If there are no containers without dependencies, we can't start + // Error out + if len(graph.noDepNodes) == 0 { + return errors.Wrapf(ErrNoSuchCtr, "All dependencies have dependencies of %s", c.ID()) + } + + // Traverse the graph beginning at nodes with no dependencies + for _, node := range graph.noDepNodes { + startNode(ctx, node, false, ctrErrors, ctrsVisited, true) + } + + if len(ctrErrors) > 0 { + logrus.Errorf("error starting some container dependencies") + for _, e := range ctrErrors { + logrus.Errorf("%q", e) + } + return errors.Wrapf(ErrInternal, "error starting some containers") + } + return nil +} + +// getAllDependencies is a precursor to starting dependencies. +// To start a container with all of its dependencies, we need to recursively find all dependencies +// a container has, as well as each of those containers' dependencies, and so on +// To do so, keep track of containers already visisted (so there aren't redundant state lookups), +// and recursively search until we have reached the leafs of every dependency node. +// Since we need to start all dependencies for our original container to successfully start, we propegate any errors +// in looking up dependencies. +// Note: this function is currently meant as a robust solution to a narrow problem: start an infra-container when +// a container in the pod is run. It has not been tested for performance past one level, so expansion of recursive start +// must be tested first. +func (c *Container) getAllDependencies(visited map[string]*Container) error { + depIDs := c.Dependencies() + if len(depIDs) == 0 { + return nil + } + for _, depID := range depIDs { + if _, ok := visited[depID]; !ok { + dep, err := c.runtime.state.LookupContainer(depID) + if err != nil { + return err + } + visited[depID] = dep + if err := dep.getAllDependencies(visited); err != nil { + return err + } + } + } + return nil +} + // Check if a container's dependencies are running // Returns a []string containing the IDs of dependencies that are not running func (c *Container) checkDependenciesRunning() ([]string, error) { diff --git a/libpod/pod_internal.go b/libpod/pod_internal.go index 0f1f115e8..25e4e77d7 100644 --- a/libpod/pod_internal.go +++ b/libpod/pod_internal.go @@ -1,10 +1,8 @@ package libpod import ( - "context" "fmt" "path/filepath" - "strings" "time" "github.com/containers/storage/pkg/stringid" @@ -85,97 +83,3 @@ func (p *Pod) refresh() error { // Save changes return p.save() } - -// Visit a node on a container graph and start the container, or set an error if -// a dependency failed to start. if restart is true, startNode will restart the node instead of starting it. -func startNode(ctx context.Context, node *containerNode, setError bool, ctrErrors map[string]error, ctrsVisited map[string]bool, restart bool) { - // First, check if we have already visited the node - if ctrsVisited[node.id] { - return - } - - // If setError is true, a dependency of us failed - // Mark us as failed and recurse - if setError { - // Mark us as visited, and set an error - ctrsVisited[node.id] = true - ctrErrors[node.id] = errors.Wrapf(ErrCtrStateInvalid, "a dependency of container %s failed to start", node.id) - - // Hit anyone who depends on us, and set errors on them too - for _, successor := range node.dependedOn { - startNode(ctx, successor, true, ctrErrors, ctrsVisited, restart) - } - - return - } - - // Have all our dependencies started? - // If not, don't visit the node yet - depsVisited := true - for _, dep := range node.dependsOn { - depsVisited = depsVisited && ctrsVisited[dep.id] - } - if !depsVisited { - // Don't visit us yet, all dependencies are not up - // We'll hit the dependencies eventually, and when we do it will - // recurse here - return - } - - // Going to try to start the container, mark us as visited - ctrsVisited[node.id] = true - - ctrErrored := false - - // Check if dependencies are running - // Graph traversal means we should have started them - // But they could have died before we got here - // Does not require that the container be locked, we only need to lock - // the dependencies - depsStopped, err := node.container.checkDependenciesRunning() - if err != nil { - ctrErrors[node.id] = err - ctrErrored = true - } else if len(depsStopped) > 0 { - // Our dependencies are not running - depsList := strings.Join(depsStopped, ",") - ctrErrors[node.id] = errors.Wrapf(ErrCtrStateInvalid, "the following dependencies of container %s are not running: %s", node.id, depsList) - ctrErrored = true - } - - // Lock before we start - node.container.lock.Lock() - - // Sync the container to pick up current state - if !ctrErrored { - if err := node.container.syncContainer(); err != nil { - ctrErrored = true - ctrErrors[node.id] = err - } - } - - // Start the container (only if it is not running) - if !ctrErrored { - if !restart && node.container.state.State != ContainerStateRunning { - if err := node.container.initAndStart(ctx); err != nil { - ctrErrored = true - ctrErrors[node.id] = err - } - } - if restart && node.container.state.State != ContainerStatePaused && node.container.state.State != ContainerStateUnknown { - if err := node.container.restartWithTimeout(ctx, node.container.config.StopTimeout); err != nil { - ctrErrored = true - ctrErrors[node.id] = err - } - } - } - - node.container.lock.Unlock() - - // Recurse to anyone who depends on us and start them - for _, successor := range node.dependedOn { - startNode(ctx, successor, ctrErrored, ctrErrors, ctrsVisited, restart) - } - - return -} diff --git a/libpod/runtime_pod_linux.go b/libpod/runtime_pod_linux.go index c6d497c0c..c378d18e4 100644 --- a/libpod/runtime_pod_linux.go +++ b/libpod/runtime_pod_linux.go @@ -117,15 +117,6 @@ func (r *Runtime) NewPod(ctx context.Context, options ...PodCreateOption) (*Pod, if err := pod.save(); err != nil { return nil, err } - - // Once the pod infra container has been created, we start it - if err := ctr.Start(ctx); err != nil { - // If the infra container does not start, we need to tear the pod down. - if err2 := r.removePod(ctx, pod, true, true); err2 != nil { - logrus.Errorf("Error removing pod after infra container failed to start: %v", err2) - } - return nil, errors.Wrapf(err, "error starting Infra Container") - } } return pod, nil |