From 40d302be8f9e3d1832d851cb016b2f568780a950 Mon Sep 17 00:00:00 2001 From: Matthew Heon Date: Mon, 12 Mar 2018 15:32:10 -0400 Subject: Modify pod API to move Init() into Start() Separate Init() and Start() does not make sense on the pod side, where we may have to start containers in order to initialize others due to dependency orders. Also adjusts internal containers API for more code sharing. Signed-off-by: Matthew Heon Closes: #478 Approved by: rhatdan --- libpod/container_api.go | 69 +++++++++++++------------- libpod/container_internal.go | 107 +++++++++++++++++++++++++++++++++++++--- libpod/pod.go | 115 ++++++++++++++++++++++++++++++++++++------- 3 files changed, 234 insertions(+), 57 deletions(-) (limited to 'libpod') diff --git a/libpod/container_api.go b/libpod/container_api.go index 0a27da7c9..7e2197614 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -59,35 +59,11 @@ func (c *Container) Init() (err error) { } }() - if err := c.makeBindMounts(); err != nil { - return err - } - - // Generate the OCI spec - spec, err := c.generateSpec() - if err != nil { - return err - } - - // Save the OCI spec to disk - if err := c.saveSpec(spec); err != nil { - return err - } - - // With the spec complete, do an OCI create - if err := c.runtime.ociRuntime.createContainer(c, c.config.CgroupParent); err != nil { - return err - } - - logrus.Debugf("Created container %s in OCI runtime", c.ID()) - - c.state.State = ContainerStateCreated - - return c.save() + return c.init() } // Start starts a container -func (c *Container) Start() error { +func (c *Container) Start() (err error) { if !c.locked { c.lock.Lock() defer c.lock.Unlock() @@ -107,20 +83,33 @@ func (c *Container) Start() error { return errors.Wrapf(ErrNotImplemented, "restarting a stopped container is not yet supported") } - // Mount storage for the container + // Mount storage for the container if necessary if err := c.mountStorage(); err != nil { return err } + defer func() { + if err != nil { + if err2 := c.cleanupStorage(); err2 != nil { + logrus.Errorf("Error cleaning up storage for container %s: %v", c.ID(), err2) + } + } + }() - if err := c.runtime.ociRuntime.startContainer(c); err != nil { - return err + // Create a network namespace if necessary + if c.config.CreateNetNS && c.state.NetNS == nil { + if err := c.runtime.createNetNS(c); err != nil { + return err + } } + defer func() { + if err != nil { + if err2 := c.runtime.teardownNetNS(c); err2 != nil { + logrus.Errorf("Error tearing down network namespace for container %s: %v", c.ID(), err2) + } + } + }() - logrus.Debugf("Started container %s", c.ID()) - - c.state.State = ContainerStateRunning - - return c.save() + return c.start() } // Stop uses the container's stop signal (or SIGTERM if no signal was specified) @@ -138,6 +127,12 @@ func (c *Container) Stop() error { } } + if c.state.State == ContainerStateConfigured || + c.state.State == ContainerStateUnknown || + c.state.State == ContainerStatePaused { + return errors.Wrapf(ErrCtrStateInvalid, "can only stop created, running, or stopped containers") + } + return c.stop(c.config.StopTimeout) } @@ -154,6 +149,12 @@ func (c *Container) StopWithTimeout(timeout uint) error { } } + if c.state.State == ContainerStateConfigured || + c.state.State == ContainerStateUnknown || + c.state.State == ContainerStatePaused { + return errors.Wrapf(ErrCtrStateInvalid, "can only stop created, running, or stopped containers") + } + return c.stop(timeout) } diff --git a/libpod/container_internal.go b/libpod/container_internal.go index 3c7637717..ba11dd2c6 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -313,16 +313,111 @@ func (c *Container) save() error { return nil } +// Initialize a container, creating it in the runtime +func (c *Container) init() error { + if err := c.makeBindMounts(); err != nil { + return err + } + + // Generate the OCI spec + spec, err := c.generateSpec() + if err != nil { + return err + } + + // Save the OCI spec to disk + if err := c.saveSpec(spec); err != nil { + return err + } + + // With the spec complete, do an OCI create + if err := c.runtime.ociRuntime.createContainer(c, c.config.CgroupParent); err != nil { + return err + } + + logrus.Debugf("Created container %s in OCI runtime", c.ID()) + + c.state.State = ContainerStateCreated + + return c.save() +} + +// Initialize (if necessary) and start a container +// Performs all necessary steps to start a container that is not running +// Does not lock or check validity +func (c *Container) initAndStart() (err error) { + // If we are ContainerStateUnknown, throw an error + if c.state.State == ContainerStateUnknown { + return errors.Wrapf(ErrCtrStateInvalid, "container %s is in an unknown state", c.ID()) + } + + // If we are running, do nothing + if c.state.State == ContainerStateRunning { + return nil + } + // If we are paused, throw an error + if c.state.State == ContainerStatePaused { + return errors.Wrapf(ErrCtrStateInvalid, "cannot start paused container %s", c.ID()) + } + // TODO remove this once we can restart containers + if c.state.State == ContainerStateStopped { + return errors.Wrapf(ErrNotImplemented, "restarting containers is not yet implemented") + } + + // Mount if necessary + if err := c.mountStorage(); err != nil { + return err + } + defer func() { + if err != nil { + if err2 := c.cleanupStorage(); err2 != nil { + logrus.Errorf("Error cleaning up storage for container %s: %v", c.ID(), err2) + } + } + }() + + // Create a network namespace if necessary + if c.config.CreateNetNS && c.state.NetNS == nil { + if err := c.runtime.createNetNS(c); err != nil { + return err + } + } + defer func() { + if err != nil { + if err2 := c.runtime.teardownNetNS(c); err2 != nil { + logrus.Errorf("Error tearing down network namespace for container %s: %v", c.ID(), err2) + } + } + }() + + // If we are ContainerStateConfigured we need to init() + if c.state.State == ContainerStateConfigured { + if err := c.init(); err != nil { + return err + } + } + + // Now start the container + return c.start() +} + +// Internal, non-locking function to start a container +func (c *Container) start() error { + if err := c.runtime.ociRuntime.startContainer(c); err != nil { + return err + } + + logrus.Debugf("Started container %s", c.ID()) + + c.state.State = ContainerStateRunning + + return c.save() +} + // Internal, non-locking function to stop container func (c *Container) stop(timeout uint) error { logrus.Debugf("Stopping ctr %s with timeout %d", c.ID(), timeout) - if c.state.State == ContainerStateConfigured || - c.state.State == ContainerStateUnknown || - c.state.State == ContainerStatePaused { - return errors.Wrapf(ErrCtrStateInvalid, "can only stop created, running, or stopped containers") - } - if err := c.runtime.ociRuntime.stopContainer(c, timeout); err != nil { return err } diff --git a/libpod/pod.go b/libpod/pod.go index a5b05d090..148093912 100644 --- a/libpod/pod.go +++ b/libpod/pod.go @@ -68,12 +68,13 @@ func newPod(lockDir string, runtime *Runtime) (*Pod, error) { return pod, nil } -// Init initializes all containers within a pod that have not been initialized -func (p *Pod) Init() error { - return ErrNotImplemented -} +// TODO: need function to produce a directed graph of containers +// This would allow us to properly determine stop/start order // Start starts all containers within a pod +// It combines the effects of Init() and Start() on a container +// If a container has already been initialized it will be started, +// otherwise it will be initialized then started. // Containers that are already running or have been paused are ignored // All containers are started independently, in order dictated by their // dependencies. @@ -97,6 +98,11 @@ func (p *Pod) Start() (map[string]error, error) { return nil, err } + // Maintain a map of containers still to start + ctrsToStart := make(map[string]*Container) + // Maintain a map of all containers so we can easily look up dependencies + allCtrsMap := make(map[string]*Container) + // We need to lock all the containers for _, ctr := range allCtrs { ctr.lock.Lock() @@ -105,10 +111,80 @@ func (p *Pod) Start() (map[string]error, error) { if err := ctr.syncContainer(); err != nil { return nil, err } + + if ctr.state.State == ContainerStateConfigured || + ctr.state.State == ContainerStateCreated || + ctr.state.State == ContainerStateStopped { + ctrsToStart[ctr.ID()] = ctr + } + allCtrsMap[ctr.ID()] = ctr } ctrErrors := make(map[string]error) + // Loop at most 10 times, to prevent potential infinite loops in + // dependencies + loopCounter := 10 + + // Loop while we still have containers to start + for len(ctrsToStart) > 0 { + // Loop through all containers, attempting to start them + for id, ctr := range ctrsToStart { + // TODO remove this when we support restarting containers + if ctr.state.State == ContainerStateStopped { + ctrErrors[id] = errors.Wrapf(ErrNotImplemented, "starting stopped containers is not yet supported") + + delete(ctrsToStart, id) + continue + } + + // TODO should we only do a dependencies check if we are not ContainerStateCreated? + depsOK := true + var depErr error + // Check container dependencies + for _, depID := range ctr.Dependencies() { + depCtr := allCtrsMap[depID] + if depCtr.state.State != ContainerStateRunning && + depCtr.state.State != ContainerStatePaused { + // We are definitely not OK to init, a dependency is not up + depsOK = false + // Check to see if the dependency errored + // If it did, error here too + if _, ok := ctrErrors[depID]; ok { + depErr = errors.Wrapf(ErrCtrStateInvalid, "dependency %s of container %s failed to start", depID, id) + } + + break + } + } + if !depsOK { + // Only if one of the containers dependencies failed should we stop trying + // Otherwise, assume it's just yet to start, retry starting this container later + if depErr != nil { + ctrErrors[id] = depErr + delete(ctrsToStart, id) + } + continue + } + + // Initialize and start the container + if err := ctr.initAndStart(); err != nil { + ctrErrors[id] = err + } + delete(ctrsToStart, id) + } + + loopCounter = loopCounter - 1 + if loopCounter == 0 { + // Loop through all remaining containers and add an error + for id := range ctrsToStart { + ctrErrors[id] = errors.Wrapf(ErrInternal, "exceeded maximum attempts trying to start container %s", id) + } + + break + } + } + // Start all containers for _, ctr := range allCtrs { // Ignore containers that are not created or stopped @@ -148,6 +224,8 @@ func (p *Pod) Start() (map[string]error, error) { // Each container will use its own stop timeout // Only running containers will be stopped. Paused, stopped, or created // containers will be ignored. +// If cleanup is true, mounts and network namespaces will be cleaned up after +// the container is stopped. // All containers are stopped independently. An error stopping one container // will not prevent other containers being stopped. // An error and a map[string]error are returned @@ -157,7 +235,7 @@ func (p *Pod) Start() (map[string]error, error) { // containers. The container ID is mapped to the error encountered. The error is // set to ErrCtrExists // If both error and the map are nil, all containers were stopped without error -func (p *Pod) Stop() (map[string]error, error) { +func (p *Pod) Stop(cleanup bool) (map[string]error, error) { p.lock.Lock() defer p.lock.Unlock() @@ -182,6 +260,9 @@ func (p *Pod) Stop() (map[string]error, error) { ctrErrors := make(map[string]error) + // TODO: There may be cases where it makes sense to order stops based on + // dependencies. Should we bother with this? + // Stop to all containers for _, ctr := range allCtrs { // Ignore containers that are not running @@ -189,22 +270,22 @@ func (p *Pod) Stop() (map[string]error, error) { continue } - if err := ctr.runtime.ociRuntime.stopContainer(ctr, ctr.config.StopTimeout); err != nil { - ctrErrors[ctr.ID()] = err - continue - } - - logrus.Debugf("Stopped container %s", ctr.ID()) - - // Sync container state to pick up return code - if err := ctr.runtime.ociRuntime.updateContainerStatus(ctr); err != nil { + if err := ctr.stop(ctr.config.StopTimeout); err != nil { ctrErrors[ctr.ID()] = err continue } - // Clean up storage to ensure we don't leave dangling mounts - if err := ctr.cleanupStorage(); err != nil { - ctrErrors[ctr.ID()] = err + if cleanup { + // Clean up storage to ensure we don't leave dangling mounts + if err := ctr.cleanupStorage(); err != nil { + ctrErrors[ctr.ID()] = err + continue + } + + // Clean up network namespace + if err := ctr.cleanupNetwork(); err != nil { + ctrErrors[ctr.ID()] = err + } } } -- cgit v1.2.3-54-g00ecf