From e996bb583d92c746dea36c71c3f747077271b75f Mon Sep 17 00:00:00 2001 From: Matthew Heon Date: Thu, 2 Jul 2020 12:59:10 -0400 Subject: Print errors from individual containers in pods The infra/abi code for pods was written in a flawed way, assuming that the map[string]error containing individual container errors was only set when the global error for the pod function was nil; that is not accurate, and we are actually *guaranteed* to set the global error when any individual container errors. Thus, we'd never actually include individual container errors, because the infra code assumed that err being set meant everything failed and no container operations were attempted. We were originally setting the cause of the error to something nonsensical ("container already exists"), so I made a new error indicating that some containers in the pod failed. We can then ignore that error when building the report on the pod operation and actually return errors from individual containers. Unfortunately, this exposed another weakness of the infra code, which was discarding the container IDs. Errors from individual containers are not guaranteed to identify which container they came from, hence the use of map[string]error in the Pod API functions. Rather than restructuring the structs we return from pkg/infra, I just wrapped the returned errors with a message including the ID of the container. Signed-off-by: Matthew Heon --- libpod/define/errors.go | 4 ++++ libpod/pod_api.go | 12 ++++++------ pkg/domain/infra/abi/pods.go | 36 ++++++++++++++++++------------------ 3 files changed, 28 insertions(+), 24 deletions(-) diff --git a/libpod/define/errors.go b/libpod/define/errors.go index 98dc603d1..200c7e3d4 100644 --- a/libpod/define/errors.go +++ b/libpod/define/errors.go @@ -70,6 +70,10 @@ var ( // ErrInternal indicates an internal library error ErrInternal = errors.New("internal libpod error") + // ErrPodPartialFail indicates that a pod operation was only partially + // successful, and some containers within the pod failed. + ErrPodPartialFail = errors.New("some containers failed") + // ErrDetach indicates that an attach session was manually detached by // the user. ErrDetach = utils.ErrDetach diff --git a/libpod/pod_api.go b/libpod/pod_api.go index 98f4cad73..c65890d66 100644 --- a/libpod/pod_api.go +++ b/libpod/pod_api.go @@ -59,7 +59,7 @@ func (p *Pod) Start(ctx context.Context) (map[string]error, error) { } if len(ctrErrors) > 0 { - return ctrErrors, errors.Wrapf(define.ErrCtrExists, "error starting some containers") + return ctrErrors, errors.Wrapf(define.ErrPodPartialFail, "error starting some containers") } defer p.newPodEvent(events.Start) return nil, nil @@ -139,7 +139,7 @@ func (p *Pod) StopWithTimeout(ctx context.Context, cleanup bool, timeout int) (m } if len(ctrErrors) > 0 { - return ctrErrors, errors.Wrapf(define.ErrCtrExists, "error stopping some containers") + return ctrErrors, errors.Wrapf(define.ErrPodPartialFail, "error stopping some containers") } defer p.newPodEvent(events.Stop) return nil, nil @@ -208,7 +208,7 @@ func (p *Pod) Pause() (map[string]error, error) { } if len(ctrErrors) > 0 { - return ctrErrors, errors.Wrapf(define.ErrCtrExists, "error pausing some containers") + return ctrErrors, errors.Wrapf(define.ErrPodPartialFail, "error pausing some containers") } defer p.newPodEvent(events.Pause) return nil, nil @@ -267,7 +267,7 @@ func (p *Pod) Unpause() (map[string]error, error) { } if len(ctrErrors) > 0 { - return ctrErrors, errors.Wrapf(define.ErrCtrExists, "error unpausing some containers") + return ctrErrors, errors.Wrapf(define.ErrPodPartialFail, "error unpausing some containers") } defer p.newPodEvent(events.Unpause) @@ -321,7 +321,7 @@ func (p *Pod) Restart(ctx context.Context) (map[string]error, error) { } if len(ctrErrors) > 0 { - return ctrErrors, errors.Wrapf(define.ErrCtrExists, "error stopping some containers") + return ctrErrors, errors.Wrapf(define.ErrPodPartialFail, "error stopping some containers") } p.newPodEvent(events.Stop) p.newPodEvent(events.Start) @@ -387,7 +387,7 @@ func (p *Pod) Kill(signal uint) (map[string]error, error) { } if len(ctrErrors) > 0 { - return ctrErrors, errors.Wrapf(define.ErrCtrExists, "error killing some containers") + return ctrErrors, errors.Wrapf(define.ErrPodPartialFail, "error killing some containers") } defer p.newPodEvent(events.Kill) return nil, nil diff --git a/pkg/domain/infra/abi/pods.go b/pkg/domain/infra/abi/pods.go index 4a122f54d..055a37b3e 100644 --- a/pkg/domain/infra/abi/pods.go +++ b/pkg/domain/infra/abi/pods.go @@ -67,14 +67,14 @@ func (ic *ContainerEngine) PodKill(ctx context.Context, namesOrIds []string, opt for _, p := range pods { report := entities.PodKillReport{Id: p.ID()} conErrs, err := p.Kill(uint(sig)) - if err != nil { + if err != nil && errors.Cause(err) != define.ErrPodPartialFail { report.Errs = []error{err} reports = append(reports, &report) continue } if len(conErrs) > 0 { - for _, err := range conErrs { - report.Errs = append(report.Errs, err) + for id, err := range conErrs { + report.Errs = append(report.Errs, errors.Wrapf(err, "error killing container %s", id)) } reports = append(reports, &report) continue @@ -93,13 +93,13 @@ func (ic *ContainerEngine) PodPause(ctx context.Context, namesOrIds []string, op for _, p := range pods { report := entities.PodPauseReport{Id: p.ID()} errs, err := p.Pause() - if err != nil { + if err != nil && errors.Cause(err) != define.ErrPodPartialFail { report.Errs = []error{err} continue } if len(errs) > 0 { - for _, v := range errs { - report.Errs = append(report.Errs, v) + for id, v := range errs { + report.Errs = append(report.Errs, errors.Wrapf(v, "error pausing container %s", id)) } reports = append(reports, &report) continue @@ -118,13 +118,13 @@ func (ic *ContainerEngine) PodUnpause(ctx context.Context, namesOrIds []string, for _, p := range pods { report := entities.PodUnpauseReport{Id: p.ID()} errs, err := p.Unpause() - if err != nil { + if err != nil && errors.Cause(err) != define.ErrPodPartialFail { report.Errs = []error{err} continue } if len(errs) > 0 { - for _, v := range errs { - report.Errs = append(report.Errs, v) + for id, v := range errs { + report.Errs = append(report.Errs, errors.Wrapf(v, "error unpausing container %s", id)) } reports = append(reports, &report) continue @@ -143,13 +143,13 @@ func (ic *ContainerEngine) PodStop(ctx context.Context, namesOrIds []string, opt for _, p := range pods { report := entities.PodStopReport{Id: p.ID()} errs, err := p.StopWithTimeout(ctx, false, options.Timeout) - if err != nil { + if err != nil && errors.Cause(err) != define.ErrPodPartialFail { report.Errs = []error{err} continue } if len(errs) > 0 { - for _, v := range errs { - report.Errs = append(report.Errs, v) + for id, v := range errs { + report.Errs = append(report.Errs, errors.Wrapf(v, "error stopping container %s", id)) } reports = append(reports, &report) continue @@ -168,14 +168,14 @@ func (ic *ContainerEngine) PodRestart(ctx context.Context, namesOrIds []string, for _, p := range pods { report := entities.PodRestartReport{Id: p.ID()} errs, err := p.Restart(ctx) - if err != nil { + if err != nil && errors.Cause(err) != define.ErrPodPartialFail { report.Errs = []error{err} reports = append(reports, &report) continue } if len(errs) > 0 { - for _, v := range errs { - report.Errs = append(report.Errs, v) + for id, v := range errs { + report.Errs = append(report.Errs, errors.Wrapf(v, "error restarting container %s", id)) } reports = append(reports, &report) continue @@ -195,14 +195,14 @@ func (ic *ContainerEngine) PodStart(ctx context.Context, namesOrIds []string, op for _, p := range pods { report := entities.PodStartReport{Id: p.ID()} errs, err := p.Start(ctx) - if err != nil { + if err != nil && errors.Cause(err) != define.ErrPodPartialFail { report.Errs = []error{err} reports = append(reports, &report) continue } if len(errs) > 0 { - for _, v := range errs { - report.Errs = append(report.Errs, v) + for id, v := range errs { + report.Errs = append(report.Errs, errors.Wrapf(v, "error starting container %s", id)) } reports = append(reports, &report) continue -- cgit v1.2.3-54-g00ecf