diff options
author | OpenShift Merge Robot <openshift-merge-robot@users.noreply.github.com> | 2019-05-03 23:14:12 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-05-03 23:14:12 +0200 |
commit | 4aa90145bf611a7bc08ddff7e061a630154e8b40 (patch) | |
tree | 01dc345c05b6ef2c5416eb169f0388c3afb4feb1 /libpod | |
parent | 2658e870d21dc03096740f17fa869463136d3fae (diff) | |
parent | d3286952e6f99b3c1f8ba177d8caddc9544adea4 (diff) | |
download | podman-4aa90145bf611a7bc08ddff7e061a630154e8b40.tar.gz podman-4aa90145bf611a7bc08ddff7e061a630154e8b40.tar.bz2 podman-4aa90145bf611a7bc08ddff7e061a630154e8b40.zip |
Merge pull request #2826 from mheon/restart_policy
Add restart policy for containers
Diffstat (limited to 'libpod')
-rw-r--r-- | libpod/container.go | 61 | ||||
-rw-r--r-- | libpod/container_api.go | 29 | ||||
-rw-r--r-- | libpod/container_inspect.go | 1 | ||||
-rw-r--r-- | libpod/container_internal.go | 126 | ||||
-rw-r--r-- | libpod/events/config.go | 2 | ||||
-rw-r--r-- | libpod/events/events.go | 2 | ||||
-rw-r--r-- | libpod/options.go | 35 | ||||
-rw-r--r-- | libpod/pod_api.go | 7 |
8 files changed, 250 insertions, 13 deletions
diff --git a/libpod/container.go b/libpod/container.go index 4bf9a1ba9..c07f4c78d 100644 --- a/libpod/container.go +++ b/libpod/container.go @@ -102,6 +102,20 @@ func (ns LinuxNS) String() string { } } +// Valid restart policy types. +const ( + // RestartPolicyNone indicates that no restart policy has been requested + // by a container. + RestartPolicyNone = "" + // RestartPolicyNo is identical in function to RestartPolicyNone. + RestartPolicyNo = "no" + // RestartPolicyAlways unconditionally restarts the container. + RestartPolicyAlways = "always" + // RestartPolicyOnFailure restarts the container on non-0 exit code, + // with an optional maximum number of retries. + RestartPolicyOnFailure = "on-failure" +) + // Container is a single OCI container. // All operations on a Container that access state must begin with a call to // syncContainer(). @@ -179,6 +193,16 @@ type ContainerState struct { // This maps the path the file will be mounted to in the container to // the path of the file on disk outside the container BindMounts map[string]string `json:"bindMounts,omitempty"` + // StoppedByUser indicates whether the container was stopped by an + // explicit call to the Stop() API. + StoppedByUser bool `json:"stoppedByUser,omitempty"` + // RestartPolicyMatch indicates whether the conditions for restart + // policy have been met. + RestartPolicyMatch bool `json:"restartPolicyMatch,omitempty"` + // RestartCount is how many times the container was restarted by its + // restart policy. This is NOT incremented by normal container restarts + // (only by restart policy). + RestartCount uint `json:"restartCount,omitempty"` // ExtensionStageHooks holds hooks which will be executed by libpod // and not delegated to the OCI runtime. @@ -346,6 +370,17 @@ type ContainerConfig struct { LogPath string `json:"logPath"` // File containing the conmon PID ConmonPidFile string `json:"conmonPidFile,omitempty"` + // RestartPolicy indicates what action the container will take upon + // exiting naturally. + // Allowed options are "no" (take no action), "on-failure" (restart on + // non-zero exit code, up an a maximum of RestartRetries times), + // and "always" (always restart the container on any exit code). + // The empty string is treated as the default ("no") + RestartPolicy string `json:"restart_policy,omitempty"` + // RestartRetries indicates the number of attempts that will be made to + // restart the container. Used only if RestartPolicy is set to + // "on-failure". + RestartRetries uint `json:"restart_retries,omitempty"` // TODO log options for log drivers PostConfigureNetNS bool `json:"postConfigureNetNS"` @@ -729,6 +764,17 @@ func (c *Container) LogPath() string { return c.config.LogPath } +// RestartPolicy returns the container's restart policy. +func (c *Container) RestartPolicy() string { + return c.config.RestartPolicy +} + +// RestartRetries returns the number of retries that will be attempted when +// using the "on-failure" restart policy +func (c *Container) RestartRetries() uint { + return c.config.RestartRetries +} + // RuntimeName returns the name of the runtime func (c *Container) RuntimeName() string { return c.runtime.ociRuntime.name @@ -1003,6 +1049,21 @@ func (c *Container) BindMounts() (map[string]string, error) { return newMap, nil } +// StoppedByUser returns whether the container was last stopped by an explicit +// call to the Stop() API, or whether it exited naturally. +func (c *Container) StoppedByUser() (bool, error) { + if !c.batched { + c.lock.Lock() + defer c.lock.Unlock() + + if err := c.syncContainer(); err != nil { + return false, err + } + } + + return c.state.StoppedByUser, nil +} + // Misc Accessors // Most will require locking diff --git a/libpod/container_api.go b/libpod/container_api.go index 5bfd869b3..5bb610aab 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -57,11 +57,11 @@ func (c *Container) Init(ctx context.Context) (err error) { if c.state.State == ContainerStateStopped { // Reinitialize the container - return c.reinit(ctx) + return c.reinit(ctx, false) } // Initialize the container for the first time - return c.init(ctx) + return c.init(ctx, false) } // Start starts a container. @@ -199,8 +199,15 @@ func (c *Container) Kill(signal uint) error { if c.state.State != ContainerStateRunning { return errors.Wrapf(ErrCtrStateInvalid, "can only kill running containers") } + defer c.newContainerEvent(events.Kill) - return c.runtime.ociRuntime.killContainer(c, signal) + if err := c.runtime.ociRuntime.killContainer(c, signal); err != nil { + return err + } + + c.state.StoppedByUser = true + + return c.save() } // Exec starts a new process inside the container @@ -583,6 +590,7 @@ func (c *Container) Cleanup(ctx context.Context) error { if !c.batched { c.lock.Lock() defer c.lock.Unlock() + if err := c.syncContainer(); err != nil { return err } @@ -593,6 +601,19 @@ func (c *Container) Cleanup(ctx context.Context) error { return errors.Wrapf(ErrCtrStateInvalid, "container %s is running or paused, refusing to clean up", c.ID()) } + // Handle restart policy. + // Returns a bool indicating whether we actually restarted. + // If we did, don't proceed to cleanup - just exit. + didRestart, err := c.handleRestartPolicy(ctx) + if err != nil { + return err + } + if didRestart { + return nil + } + + // If we didn't restart, we perform a normal cleanup + // Check if we have active exec sessions if len(c.state.ExecSessions) != 0 { return errors.Wrapf(ErrCtrStateInvalid, "container %s has active exec sessions, refusing to clean up", c.ID()) @@ -754,7 +775,7 @@ func (c *Container) Refresh(ctx context.Context) error { if err := c.prepare(); err != nil { return err } - if err := c.init(ctx); err != nil { + if err := c.init(ctx, false); err != nil { return err } } diff --git a/libpod/container_inspect.go b/libpod/container_inspect.go index aa3a07888..a7369bfdd 100644 --- a/libpod/container_inspect.go +++ b/libpod/container_inspect.go @@ -95,6 +95,7 @@ func (c *Container) getContainerInspectData(size bool, driverData *inspect.Data) LogPath: config.LogPath, ConmonPidFile: config.ConmonPidFile, Name: config.Name, + RestartCount: int32(runtimeInfo.RestartCount), Driver: driverData.Name, MountLabel: config.MountLabel, ProcessLabel: config.ProcessLabel, diff --git a/libpod/container_internal.go b/libpod/container_internal.go index a791df491..0b4e5763e 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -210,6 +210,90 @@ func (c *Container) handleExitFile(exitFile string, fi os.FileInfo) error { return nil } +// Handle container restart policy. +// This is called when a container has exited, and was not explicitly stopped by +// an API call to stop the container or pod it is in. +func (c *Container) handleRestartPolicy(ctx context.Context) (restarted bool, err error) { + // If we did not get a restart policy match, exit immediately. + // Do the same if we're not a policy that restarts. + if !c.state.RestartPolicyMatch || + c.config.RestartPolicy == RestartPolicyNo || + c.config.RestartPolicy == RestartPolicyNone { + return false, nil + } + + // If we're RestartPolicyOnFailure, we need to check retries and exit + // code. + if c.config.RestartPolicy == RestartPolicyOnFailure { + if c.state.ExitCode == 0 { + return false, nil + } + + // If we don't have a max retries set, continue + if c.config.RestartRetries > 0 { + if c.state.RestartCount < c.config.RestartRetries { + logrus.Debugf("Container %s restart policy trigger: on retry %d (of %d)", + c.ID(), c.state.RestartCount, c.config.RestartRetries) + } else { + logrus.Debugf("Container %s restart policy trigger: retries exhausted", c.ID()) + return false, nil + } + } + } + + logrus.Debugf("Restarting container %s due to restart policy %s", c.ID(), c.config.RestartPolicy) + + // Need to check if dependencies are alive. + if err = c.checkDependenciesAndHandleError(ctx); err != nil { + return false, err + } + + // Is the container running again? + // If so, we don't have to do anything + if c.state.State == ContainerStateRunning || c.state.State == ContainerStatePaused { + return false, nil + } else if c.state.State == ContainerStateUnknown { + return false, errors.Wrapf(ErrInternal, "invalid container state encountered in restart attempt!") + } + + c.newContainerEvent(events.Restart) + + // Increment restart count + c.state.RestartCount = c.state.RestartCount + 1 + logrus.Debugf("Container %s now on retry %d", c.ID(), c.state.RestartCount) + if err := c.save(); err != nil { + return false, err + } + + defer func() { + if err != nil { + if err2 := c.cleanup(ctx); err2 != nil { + logrus.Errorf("error cleaning up container %s: %v", c.ID(), err2) + } + } + }() + if err := c.prepare(); err != nil { + return false, err + } + + if c.state.State == ContainerStateStopped { + // Reinitialize the container if we need to + if err := c.reinit(ctx, true); err != nil { + return false, err + } + } else if c.state.State == ContainerStateConfigured || + c.state.State == ContainerStateExited { + // Initialize the container + if err := c.init(ctx, true); err != nil { + return false, err + } + } + if err := c.start(); err != nil { + return false, err + } + return true, nil +} + // Sync this container with on-disk state and runtime status // Should only be called with container lock held // This function should suffice to ensure a container's state is accurate and @@ -230,6 +314,14 @@ func (c *Container) syncContainer() error { } // Only save back to DB if state changed if c.state.State != oldState { + // Check for a restart policy match + if c.config.RestartPolicy != RestartPolicyNone && c.config.RestartPolicy != RestartPolicyNo && + (oldState == ContainerStateRunning || oldState == ContainerStatePaused) && + (c.state.State == ContainerStateStopped || c.state.State == ContainerStateExited) && + !c.state.StoppedByUser { + c.state.RestartPolicyMatch = true + } + if err := c.save(); err != nil { return err } @@ -376,6 +468,9 @@ func resetState(state *ContainerState) error { state.ExecSessions = make(map[string]*ExecSession) state.NetworkStatus = nil state.BindMounts = make(map[string]string) + state.StoppedByUser = false + state.RestartPolicyMatch = false + state.RestartCount = 0 return nil } @@ -569,13 +664,13 @@ func (c *Container) prepareToStart(ctx context.Context, recursive bool) (err err if c.state.State == ContainerStateStopped { // Reinitialize the container if we need to - if err := c.reinit(ctx); err != nil { + if err := c.reinit(ctx, false); err != nil { return err } } else if c.state.State == ContainerStateConfigured || c.state.State == ContainerStateExited { // Or initialize it if necessary - if err := c.init(ctx); err != nil { + if err := c.init(ctx, false); err != nil { return err } } @@ -763,7 +858,7 @@ func (c *Container) completeNetworkSetup() error { } // Initialize a container, creating it in the runtime -func (c *Container) init(ctx context.Context) error { +func (c *Container) init(ctx context.Context, retainRetries bool) error { span, _ := opentracing.StartSpanFromContext(ctx, "init") span.SetTag("struct", "container") defer span.Finish() @@ -789,6 +884,12 @@ func (c *Container) init(ctx context.Context) error { c.state.ExitCode = 0 c.state.Exited = false c.state.State = ContainerStateCreated + c.state.StoppedByUser = false + c.state.RestartPolicyMatch = false + + if !retainRetries { + c.state.RestartCount = 0 + } if err := c.save(); err != nil { return err @@ -851,7 +952,7 @@ func (c *Container) cleanupRuntime(ctx context.Context) error { // Should only be done on ContainerStateStopped containers. // Not necessary for ContainerStateExited - the container has already been // removed from the runtime, so init() can proceed freely. -func (c *Container) reinit(ctx context.Context) error { +func (c *Container) reinit(ctx context.Context, retainRetries bool) error { span, _ := opentracing.StartSpanFromContext(ctx, "reinit") span.SetTag("struct", "container") defer span.Finish() @@ -863,7 +964,7 @@ func (c *Container) reinit(ctx context.Context) error { } // Initialize the container again - return c.init(ctx) + return c.init(ctx, retainRetries) } // Initialize (if necessary) and start a container @@ -901,12 +1002,12 @@ func (c *Container) initAndStart(ctx context.Context) (err error) { if c.state.State == ContainerStateStopped { logrus.Debugf("Recreating container %s in OCI runtime", c.ID()) - if err := c.reinit(ctx); err != nil { + if err := c.reinit(ctx, false); err != nil { return err } } else if c.state.State == ContainerStateConfigured || c.state.State == ContainerStateExited { - if err := c.init(ctx); err != nil { + if err := c.init(ctx, false); err != nil { return err } } @@ -950,6 +1051,11 @@ func (c *Container) stop(timeout uint) error { return err } + c.state.StoppedByUser = true + if err := c.save(); err != nil { + return errors.Wrapf(err, "error saving container %s state after stopping", c.ID()) + } + // Wait until we have an exit file, and sync once we do return c.waitForExitFileAndSync() } @@ -986,6 +1092,8 @@ func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (err e return errors.Wrapf(ErrCtrStateInvalid, "unable to restart a container in a paused or unknown state") } + c.newContainerEvent(events.Restart) + if c.state.State == ContainerStateRunning { if err := c.stop(timeout); err != nil { return err @@ -1004,13 +1112,13 @@ func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (err e if c.state.State == ContainerStateStopped { // Reinitialize the container if we need to - if err := c.reinit(ctx); err != nil { + if err := c.reinit(ctx, false); err != nil { return err } } else if c.state.State == ContainerStateConfigured || c.state.State == ContainerStateExited { // Initialize the container - if err := c.init(ctx); err != nil { + if err := c.init(ctx, false); err != nil { return err } } diff --git a/libpod/events/config.go b/libpod/events/config.go index 36387e835..810988205 100644 --- a/libpod/events/config.go +++ b/libpod/events/config.go @@ -134,6 +134,8 @@ const ( // Renumber indicates that lock numbers were reallocated at user // request. Renumber Status = "renumber" + // Restart indicates the target was restarted via an API call. + Restart Status = "restart" // Restore ... Restore Status = "restore" // Save ... diff --git a/libpod/events/events.go b/libpod/events/events.go index 202c9db4e..650a47bfb 100644 --- a/libpod/events/events.go +++ b/libpod/events/events.go @@ -144,6 +144,8 @@ func StringToStatus(name string) (Status, error) { return Remove, nil case Renumber.String(): return Renumber, nil + case Restart.String(): + return Restart, nil case Restore.String(): return Restore, nil case Save.String(): diff --git a/libpod/options.go b/libpod/options.go index 86c04db09..7ec7dfe63 100644 --- a/libpod/options.go +++ b/libpod/options.go @@ -1239,6 +1239,41 @@ func WithUseImageHosts() CtrCreateOption { } } +// WithRestartPolicy sets the container's restart policy. Valid values are +// "no", "on-failure", and "always". The empty string is allowed, and will be +// equivalent to "no". +func WithRestartPolicy(policy string) CtrCreateOption { + return func(ctr *Container) error { + if ctr.valid { + return ErrCtrFinalized + } + + switch policy { + case RestartPolicyNone, RestartPolicyNo, RestartPolicyOnFailure, RestartPolicyAlways: + ctr.config.RestartPolicy = policy + default: + return errors.Wrapf(ErrInvalidArg, "%q is not a valid restart policy", policy) + } + + return nil + } +} + +// WithRestartRetries sets the number of retries to use when restarting a +// container with the "on-failure" restart policy. +// 0 is an allowed value, and indicates infinite retries. +func WithRestartRetries(tries uint) CtrCreateOption { + return func(ctr *Container) error { + if ctr.valid { + return ErrCtrFinalized + } + + ctr.config.RestartRetries = tries + + return nil + } +} + // withIsInfra sets the container to be an infra container. This means the container will be sometimes hidden // and expected to be the first container in the pod. func withIsInfra() CtrCreateOption { diff --git a/libpod/pod_api.go b/libpod/pod_api.go index 9a6baf23e..9ed5c88eb 100644 --- a/libpod/pod_api.go +++ b/libpod/pod_api.go @@ -364,6 +364,13 @@ func (p *Pod) Kill(signal uint) (map[string]error, error) { } logrus.Debugf("Killed container %s with signal %d", ctr.ID(), signal) + + ctr.state.StoppedByUser = true + if err := ctr.save(); err != nil { + ctrErrors[ctr.ID()] = err + } + + ctr.lock.Unlock() } if len(ctrErrors) > 0 { |