aboutsummaryrefslogtreecommitdiff
path: root/libpod
diff options
context:
space:
mode:
authorOpenShift Merge Robot <openshift-merge-robot@users.noreply.github.com>2019-05-03 23:14:12 +0200
committerGitHub <noreply@github.com>2019-05-03 23:14:12 +0200
commit4aa90145bf611a7bc08ddff7e061a630154e8b40 (patch)
tree01dc345c05b6ef2c5416eb169f0388c3afb4feb1 /libpod
parent2658e870d21dc03096740f17fa869463136d3fae (diff)
parentd3286952e6f99b3c1f8ba177d8caddc9544adea4 (diff)
downloadpodman-4aa90145bf611a7bc08ddff7e061a630154e8b40.tar.gz
podman-4aa90145bf611a7bc08ddff7e061a630154e8b40.tar.bz2
podman-4aa90145bf611a7bc08ddff7e061a630154e8b40.zip
Merge pull request #2826 from mheon/restart_policy
Add restart policy for containers
Diffstat (limited to 'libpod')
-rw-r--r--libpod/container.go61
-rw-r--r--libpod/container_api.go29
-rw-r--r--libpod/container_inspect.go1
-rw-r--r--libpod/container_internal.go126
-rw-r--r--libpod/events/config.go2
-rw-r--r--libpod/events/events.go2
-rw-r--r--libpod/options.go35
-rw-r--r--libpod/pod_api.go7
8 files changed, 250 insertions, 13 deletions
diff --git a/libpod/container.go b/libpod/container.go
index 4bf9a1ba9..c07f4c78d 100644
--- a/libpod/container.go
+++ b/libpod/container.go
@@ -102,6 +102,20 @@ func (ns LinuxNS) String() string {
}
}
+// Valid restart policy types.
+const (
+ // RestartPolicyNone indicates that no restart policy has been requested
+ // by a container.
+ RestartPolicyNone = ""
+ // RestartPolicyNo is identical in function to RestartPolicyNone.
+ RestartPolicyNo = "no"
+ // RestartPolicyAlways unconditionally restarts the container.
+ RestartPolicyAlways = "always"
+ // RestartPolicyOnFailure restarts the container on non-0 exit code,
+ // with an optional maximum number of retries.
+ RestartPolicyOnFailure = "on-failure"
+)
+
// Container is a single OCI container.
// All operations on a Container that access state must begin with a call to
// syncContainer().
@@ -179,6 +193,16 @@ type ContainerState struct {
// This maps the path the file will be mounted to in the container to
// the path of the file on disk outside the container
BindMounts map[string]string `json:"bindMounts,omitempty"`
+ // StoppedByUser indicates whether the container was stopped by an
+ // explicit call to the Stop() API.
+ StoppedByUser bool `json:"stoppedByUser,omitempty"`
+ // RestartPolicyMatch indicates whether the conditions for restart
+ // policy have been met.
+ RestartPolicyMatch bool `json:"restartPolicyMatch,omitempty"`
+ // RestartCount is how many times the container was restarted by its
+ // restart policy. This is NOT incremented by normal container restarts
+ // (only by restart policy).
+ RestartCount uint `json:"restartCount,omitempty"`
// ExtensionStageHooks holds hooks which will be executed by libpod
// and not delegated to the OCI runtime.
@@ -346,6 +370,17 @@ type ContainerConfig struct {
LogPath string `json:"logPath"`
// File containing the conmon PID
ConmonPidFile string `json:"conmonPidFile,omitempty"`
+ // RestartPolicy indicates what action the container will take upon
+ // exiting naturally.
+ // Allowed options are "no" (take no action), "on-failure" (restart on
+ // non-zero exit code, up an a maximum of RestartRetries times),
+ // and "always" (always restart the container on any exit code).
+ // The empty string is treated as the default ("no")
+ RestartPolicy string `json:"restart_policy,omitempty"`
+ // RestartRetries indicates the number of attempts that will be made to
+ // restart the container. Used only if RestartPolicy is set to
+ // "on-failure".
+ RestartRetries uint `json:"restart_retries,omitempty"`
// TODO log options for log drivers
PostConfigureNetNS bool `json:"postConfigureNetNS"`
@@ -729,6 +764,17 @@ func (c *Container) LogPath() string {
return c.config.LogPath
}
+// RestartPolicy returns the container's restart policy.
+func (c *Container) RestartPolicy() string {
+ return c.config.RestartPolicy
+}
+
+// RestartRetries returns the number of retries that will be attempted when
+// using the "on-failure" restart policy
+func (c *Container) RestartRetries() uint {
+ return c.config.RestartRetries
+}
+
// RuntimeName returns the name of the runtime
func (c *Container) RuntimeName() string {
return c.runtime.ociRuntime.name
@@ -1003,6 +1049,21 @@ func (c *Container) BindMounts() (map[string]string, error) {
return newMap, nil
}
+// StoppedByUser returns whether the container was last stopped by an explicit
+// call to the Stop() API, or whether it exited naturally.
+func (c *Container) StoppedByUser() (bool, error) {
+ if !c.batched {
+ c.lock.Lock()
+ defer c.lock.Unlock()
+
+ if err := c.syncContainer(); err != nil {
+ return false, err
+ }
+ }
+
+ return c.state.StoppedByUser, nil
+}
+
// Misc Accessors
// Most will require locking
diff --git a/libpod/container_api.go b/libpod/container_api.go
index 5bfd869b3..5bb610aab 100644
--- a/libpod/container_api.go
+++ b/libpod/container_api.go
@@ -57,11 +57,11 @@ func (c *Container) Init(ctx context.Context) (err error) {
if c.state.State == ContainerStateStopped {
// Reinitialize the container
- return c.reinit(ctx)
+ return c.reinit(ctx, false)
}
// Initialize the container for the first time
- return c.init(ctx)
+ return c.init(ctx, false)
}
// Start starts a container.
@@ -199,8 +199,15 @@ func (c *Container) Kill(signal uint) error {
if c.state.State != ContainerStateRunning {
return errors.Wrapf(ErrCtrStateInvalid, "can only kill running containers")
}
+
defer c.newContainerEvent(events.Kill)
- return c.runtime.ociRuntime.killContainer(c, signal)
+ if err := c.runtime.ociRuntime.killContainer(c, signal); err != nil {
+ return err
+ }
+
+ c.state.StoppedByUser = true
+
+ return c.save()
}
// Exec starts a new process inside the container
@@ -583,6 +590,7 @@ func (c *Container) Cleanup(ctx context.Context) error {
if !c.batched {
c.lock.Lock()
defer c.lock.Unlock()
+
if err := c.syncContainer(); err != nil {
return err
}
@@ -593,6 +601,19 @@ func (c *Container) Cleanup(ctx context.Context) error {
return errors.Wrapf(ErrCtrStateInvalid, "container %s is running or paused, refusing to clean up", c.ID())
}
+ // Handle restart policy.
+ // Returns a bool indicating whether we actually restarted.
+ // If we did, don't proceed to cleanup - just exit.
+ didRestart, err := c.handleRestartPolicy(ctx)
+ if err != nil {
+ return err
+ }
+ if didRestart {
+ return nil
+ }
+
+ // If we didn't restart, we perform a normal cleanup
+
// Check if we have active exec sessions
if len(c.state.ExecSessions) != 0 {
return errors.Wrapf(ErrCtrStateInvalid, "container %s has active exec sessions, refusing to clean up", c.ID())
@@ -754,7 +775,7 @@ func (c *Container) Refresh(ctx context.Context) error {
if err := c.prepare(); err != nil {
return err
}
- if err := c.init(ctx); err != nil {
+ if err := c.init(ctx, false); err != nil {
return err
}
}
diff --git a/libpod/container_inspect.go b/libpod/container_inspect.go
index aa3a07888..a7369bfdd 100644
--- a/libpod/container_inspect.go
+++ b/libpod/container_inspect.go
@@ -95,6 +95,7 @@ func (c *Container) getContainerInspectData(size bool, driverData *inspect.Data)
LogPath: config.LogPath,
ConmonPidFile: config.ConmonPidFile,
Name: config.Name,
+ RestartCount: int32(runtimeInfo.RestartCount),
Driver: driverData.Name,
MountLabel: config.MountLabel,
ProcessLabel: config.ProcessLabel,
diff --git a/libpod/container_internal.go b/libpod/container_internal.go
index a791df491..0b4e5763e 100644
--- a/libpod/container_internal.go
+++ b/libpod/container_internal.go
@@ -210,6 +210,90 @@ func (c *Container) handleExitFile(exitFile string, fi os.FileInfo) error {
return nil
}
+// Handle container restart policy.
+// This is called when a container has exited, and was not explicitly stopped by
+// an API call to stop the container or pod it is in.
+func (c *Container) handleRestartPolicy(ctx context.Context) (restarted bool, err error) {
+ // If we did not get a restart policy match, exit immediately.
+ // Do the same if we're not a policy that restarts.
+ if !c.state.RestartPolicyMatch ||
+ c.config.RestartPolicy == RestartPolicyNo ||
+ c.config.RestartPolicy == RestartPolicyNone {
+ return false, nil
+ }
+
+ // If we're RestartPolicyOnFailure, we need to check retries and exit
+ // code.
+ if c.config.RestartPolicy == RestartPolicyOnFailure {
+ if c.state.ExitCode == 0 {
+ return false, nil
+ }
+
+ // If we don't have a max retries set, continue
+ if c.config.RestartRetries > 0 {
+ if c.state.RestartCount < c.config.RestartRetries {
+ logrus.Debugf("Container %s restart policy trigger: on retry %d (of %d)",
+ c.ID(), c.state.RestartCount, c.config.RestartRetries)
+ } else {
+ logrus.Debugf("Container %s restart policy trigger: retries exhausted", c.ID())
+ return false, nil
+ }
+ }
+ }
+
+ logrus.Debugf("Restarting container %s due to restart policy %s", c.ID(), c.config.RestartPolicy)
+
+ // Need to check if dependencies are alive.
+ if err = c.checkDependenciesAndHandleError(ctx); err != nil {
+ return false, err
+ }
+
+ // Is the container running again?
+ // If so, we don't have to do anything
+ if c.state.State == ContainerStateRunning || c.state.State == ContainerStatePaused {
+ return false, nil
+ } else if c.state.State == ContainerStateUnknown {
+ return false, errors.Wrapf(ErrInternal, "invalid container state encountered in restart attempt!")
+ }
+
+ c.newContainerEvent(events.Restart)
+
+ // Increment restart count
+ c.state.RestartCount = c.state.RestartCount + 1
+ logrus.Debugf("Container %s now on retry %d", c.ID(), c.state.RestartCount)
+ if err := c.save(); err != nil {
+ return false, err
+ }
+
+ defer func() {
+ if err != nil {
+ if err2 := c.cleanup(ctx); err2 != nil {
+ logrus.Errorf("error cleaning up container %s: %v", c.ID(), err2)
+ }
+ }
+ }()
+ if err := c.prepare(); err != nil {
+ return false, err
+ }
+
+ if c.state.State == ContainerStateStopped {
+ // Reinitialize the container if we need to
+ if err := c.reinit(ctx, true); err != nil {
+ return false, err
+ }
+ } else if c.state.State == ContainerStateConfigured ||
+ c.state.State == ContainerStateExited {
+ // Initialize the container
+ if err := c.init(ctx, true); err != nil {
+ return false, err
+ }
+ }
+ if err := c.start(); err != nil {
+ return false, err
+ }
+ return true, nil
+}
+
// Sync this container with on-disk state and runtime status
// Should only be called with container lock held
// This function should suffice to ensure a container's state is accurate and
@@ -230,6 +314,14 @@ func (c *Container) syncContainer() error {
}
// Only save back to DB if state changed
if c.state.State != oldState {
+ // Check for a restart policy match
+ if c.config.RestartPolicy != RestartPolicyNone && c.config.RestartPolicy != RestartPolicyNo &&
+ (oldState == ContainerStateRunning || oldState == ContainerStatePaused) &&
+ (c.state.State == ContainerStateStopped || c.state.State == ContainerStateExited) &&
+ !c.state.StoppedByUser {
+ c.state.RestartPolicyMatch = true
+ }
+
if err := c.save(); err != nil {
return err
}
@@ -376,6 +468,9 @@ func resetState(state *ContainerState) error {
state.ExecSessions = make(map[string]*ExecSession)
state.NetworkStatus = nil
state.BindMounts = make(map[string]string)
+ state.StoppedByUser = false
+ state.RestartPolicyMatch = false
+ state.RestartCount = 0
return nil
}
@@ -569,13 +664,13 @@ func (c *Container) prepareToStart(ctx context.Context, recursive bool) (err err
if c.state.State == ContainerStateStopped {
// Reinitialize the container if we need to
- if err := c.reinit(ctx); err != nil {
+ if err := c.reinit(ctx, false); err != nil {
return err
}
} else if c.state.State == ContainerStateConfigured ||
c.state.State == ContainerStateExited {
// Or initialize it if necessary
- if err := c.init(ctx); err != nil {
+ if err := c.init(ctx, false); err != nil {
return err
}
}
@@ -763,7 +858,7 @@ func (c *Container) completeNetworkSetup() error {
}
// Initialize a container, creating it in the runtime
-func (c *Container) init(ctx context.Context) error {
+func (c *Container) init(ctx context.Context, retainRetries bool) error {
span, _ := opentracing.StartSpanFromContext(ctx, "init")
span.SetTag("struct", "container")
defer span.Finish()
@@ -789,6 +884,12 @@ func (c *Container) init(ctx context.Context) error {
c.state.ExitCode = 0
c.state.Exited = false
c.state.State = ContainerStateCreated
+ c.state.StoppedByUser = false
+ c.state.RestartPolicyMatch = false
+
+ if !retainRetries {
+ c.state.RestartCount = 0
+ }
if err := c.save(); err != nil {
return err
@@ -851,7 +952,7 @@ func (c *Container) cleanupRuntime(ctx context.Context) error {
// Should only be done on ContainerStateStopped containers.
// Not necessary for ContainerStateExited - the container has already been
// removed from the runtime, so init() can proceed freely.
-func (c *Container) reinit(ctx context.Context) error {
+func (c *Container) reinit(ctx context.Context, retainRetries bool) error {
span, _ := opentracing.StartSpanFromContext(ctx, "reinit")
span.SetTag("struct", "container")
defer span.Finish()
@@ -863,7 +964,7 @@ func (c *Container) reinit(ctx context.Context) error {
}
// Initialize the container again
- return c.init(ctx)
+ return c.init(ctx, retainRetries)
}
// Initialize (if necessary) and start a container
@@ -901,12 +1002,12 @@ func (c *Container) initAndStart(ctx context.Context) (err error) {
if c.state.State == ContainerStateStopped {
logrus.Debugf("Recreating container %s in OCI runtime", c.ID())
- if err := c.reinit(ctx); err != nil {
+ if err := c.reinit(ctx, false); err != nil {
return err
}
} else if c.state.State == ContainerStateConfigured ||
c.state.State == ContainerStateExited {
- if err := c.init(ctx); err != nil {
+ if err := c.init(ctx, false); err != nil {
return err
}
}
@@ -950,6 +1051,11 @@ func (c *Container) stop(timeout uint) error {
return err
}
+ c.state.StoppedByUser = true
+ if err := c.save(); err != nil {
+ return errors.Wrapf(err, "error saving container %s state after stopping", c.ID())
+ }
+
// Wait until we have an exit file, and sync once we do
return c.waitForExitFileAndSync()
}
@@ -986,6 +1092,8 @@ func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (err e
return errors.Wrapf(ErrCtrStateInvalid, "unable to restart a container in a paused or unknown state")
}
+ c.newContainerEvent(events.Restart)
+
if c.state.State == ContainerStateRunning {
if err := c.stop(timeout); err != nil {
return err
@@ -1004,13 +1112,13 @@ func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (err e
if c.state.State == ContainerStateStopped {
// Reinitialize the container if we need to
- if err := c.reinit(ctx); err != nil {
+ if err := c.reinit(ctx, false); err != nil {
return err
}
} else if c.state.State == ContainerStateConfigured ||
c.state.State == ContainerStateExited {
// Initialize the container
- if err := c.init(ctx); err != nil {
+ if err := c.init(ctx, false); err != nil {
return err
}
}
diff --git a/libpod/events/config.go b/libpod/events/config.go
index 36387e835..810988205 100644
--- a/libpod/events/config.go
+++ b/libpod/events/config.go
@@ -134,6 +134,8 @@ const (
// Renumber indicates that lock numbers were reallocated at user
// request.
Renumber Status = "renumber"
+ // Restart indicates the target was restarted via an API call.
+ Restart Status = "restart"
// Restore ...
Restore Status = "restore"
// Save ...
diff --git a/libpod/events/events.go b/libpod/events/events.go
index 202c9db4e..650a47bfb 100644
--- a/libpod/events/events.go
+++ b/libpod/events/events.go
@@ -144,6 +144,8 @@ func StringToStatus(name string) (Status, error) {
return Remove, nil
case Renumber.String():
return Renumber, nil
+ case Restart.String():
+ return Restart, nil
case Restore.String():
return Restore, nil
case Save.String():
diff --git a/libpod/options.go b/libpod/options.go
index 86c04db09..7ec7dfe63 100644
--- a/libpod/options.go
+++ b/libpod/options.go
@@ -1239,6 +1239,41 @@ func WithUseImageHosts() CtrCreateOption {
}
}
+// WithRestartPolicy sets the container's restart policy. Valid values are
+// "no", "on-failure", and "always". The empty string is allowed, and will be
+// equivalent to "no".
+func WithRestartPolicy(policy string) CtrCreateOption {
+ return func(ctr *Container) error {
+ if ctr.valid {
+ return ErrCtrFinalized
+ }
+
+ switch policy {
+ case RestartPolicyNone, RestartPolicyNo, RestartPolicyOnFailure, RestartPolicyAlways:
+ ctr.config.RestartPolicy = policy
+ default:
+ return errors.Wrapf(ErrInvalidArg, "%q is not a valid restart policy", policy)
+ }
+
+ return nil
+ }
+}
+
+// WithRestartRetries sets the number of retries to use when restarting a
+// container with the "on-failure" restart policy.
+// 0 is an allowed value, and indicates infinite retries.
+func WithRestartRetries(tries uint) CtrCreateOption {
+ return func(ctr *Container) error {
+ if ctr.valid {
+ return ErrCtrFinalized
+ }
+
+ ctr.config.RestartRetries = tries
+
+ return nil
+ }
+}
+
// withIsInfra sets the container to be an infra container. This means the container will be sometimes hidden
// and expected to be the first container in the pod.
func withIsInfra() CtrCreateOption {
diff --git a/libpod/pod_api.go b/libpod/pod_api.go
index 9a6baf23e..9ed5c88eb 100644
--- a/libpod/pod_api.go
+++ b/libpod/pod_api.go
@@ -364,6 +364,13 @@ func (p *Pod) Kill(signal uint) (map[string]error, error) {
}
logrus.Debugf("Killed container %s with signal %d", ctr.ID(), signal)
+
+ ctr.state.StoppedByUser = true
+ if err := ctr.save(); err != nil {
+ ctrErrors[ctr.ID()] = err
+ }
+
+ ctr.lock.Unlock()
}
if len(ctrErrors) > 0 {