From 81804fc4641d279fec8f9bf48b21b22fc90cb891 Mon Sep 17 00:00:00 2001 From: Peter Hunt Date: Fri, 15 Feb 2019 16:39:24 -0500 Subject: pod infra container is started before a container in a pod is run, started, or attached. Prior, a pod would have to be started immediately when created, leading to confusion about what a pod state should be immediately after creation. The problem was podman run --pod ... would error out if the infra container wasn't started (as it is a dependency). Fix this by allowing for recursive start, where each of the container's dependencies are started prior to the new container. This is only applied to the case where a new container is attached to a pod. Also rework container_api Start, StartAndAttach, and Init functions, as there was some duplicated code, which made addressing the problem easier to fix. Signed-off-by: Peter Hunt --- libpod/container_graph.go | 97 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) (limited to 'libpod/container_graph.go') diff --git a/libpod/container_graph.go b/libpod/container_graph.go index 44a1f1736..da93be77d 100644 --- a/libpod/container_graph.go +++ b/libpod/container_graph.go @@ -1,6 +1,9 @@ package libpod import ( + "context" + "strings" + "github.com/pkg/errors" "github.com/sirupsen/logrus" ) @@ -169,3 +172,97 @@ func detectCycles(graph *containerGraph) (bool, error) { return false, nil } + +// Visit a node on a container graph and start the container, or set an error if +// a dependency failed to start. if restart is true, startNode will restart the node instead of starting it. +func startNode(ctx context.Context, node *containerNode, setError bool, ctrErrors map[string]error, ctrsVisited map[string]bool, restart bool) { + // First, check if we have already visited the node + if ctrsVisited[node.id] { + return + } + + // If setError is true, a dependency of us failed + // Mark us as failed and recurse + if setError { + // Mark us as visited, and set an error + ctrsVisited[node.id] = true + ctrErrors[node.id] = errors.Wrapf(ErrCtrStateInvalid, "a dependency of container %s failed to start", node.id) + + // Hit anyone who depends on us, and set errors on them too + for _, successor := range node.dependedOn { + startNode(ctx, successor, true, ctrErrors, ctrsVisited, restart) + } + + return + } + + // Have all our dependencies started? + // If not, don't visit the node yet + depsVisited := true + for _, dep := range node.dependsOn { + depsVisited = depsVisited && ctrsVisited[dep.id] + } + if !depsVisited { + // Don't visit us yet, all dependencies are not up + // We'll hit the dependencies eventually, and when we do it will + // recurse here + return + } + + // Going to try to start the container, mark us as visited + ctrsVisited[node.id] = true + + ctrErrored := false + + // Check if dependencies are running + // Graph traversal means we should have started them + // But they could have died before we got here + // Does not require that the container be locked, we only need to lock + // the dependencies + depsStopped, err := node.container.checkDependenciesRunning() + if err != nil { + ctrErrors[node.id] = err + ctrErrored = true + } else if len(depsStopped) > 0 { + // Our dependencies are not running + depsList := strings.Join(depsStopped, ",") + ctrErrors[node.id] = errors.Wrapf(ErrCtrStateInvalid, "the following dependencies of container %s are not running: %s", node.id, depsList) + ctrErrored = true + } + + // Lock before we start + node.container.lock.Lock() + + // Sync the container to pick up current state + if !ctrErrored { + if err := node.container.syncContainer(); err != nil { + ctrErrored = true + ctrErrors[node.id] = err + } + } + + // Start the container (only if it is not running) + if !ctrErrored { + if !restart && node.container.state.State != ContainerStateRunning { + if err := node.container.initAndStart(ctx); err != nil { + ctrErrored = true + ctrErrors[node.id] = err + } + } + if restart && node.container.state.State != ContainerStatePaused && node.container.state.State != ContainerStateUnknown { + if err := node.container.restartWithTimeout(ctx, node.container.config.StopTimeout); err != nil { + ctrErrored = true + ctrErrors[node.id] = err + } + } + } + + node.container.lock.Unlock() + + // Recurse to anyone who depends on us and start them + for _, successor := range node.dependedOn { + startNode(ctx, successor, ctrErrored, ctrErrors, ctrsVisited, restart) + } + + return +} -- cgit v1.2.3-54-g00ecf