diff options
author | OpenShift Merge Robot <openshift-merge-robot@users.noreply.github.com> | 2022-09-26 13:37:40 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-09-26 13:37:40 +0200 |
commit | 1d63d9f488091c4bb82622d7bc22bb81d5c4eed2 (patch) | |
tree | 3d0fd12c05a9c08bcba5672d898bfaf17b8e9fff | |
parent | 17f3756884f2f65a1da753e5b58895dc0b9145e8 (diff) | |
parent | 4a053a821aab8891498cb5dd3f01ce3437fdf0ef (diff) | |
download | podman-1d63d9f488091c4bb82622d7bc22bb81d5c4eed2.tar.gz podman-1d63d9f488091c4bb82622d7bc22bb81d5c4eed2.tar.bz2 podman-1d63d9f488091c4bb82622d7bc22bb81d5c4eed2.zip |
Merge pull request #15820 from vrothberg/fix-15800
kube: notifyproxy: fix lost READY message
-rw-r--r-- | pkg/domain/infra/abi/play.go | 22 | ||||
-rw-r--r-- | pkg/systemd/notifyproxy/notifyproxy.go | 98 |
2 files changed, 82 insertions, 38 deletions
diff --git a/pkg/domain/infra/abi/play.go b/pkg/domain/infra/abi/play.go index 847e81e69..bd9117f72 100644 --- a/pkg/domain/infra/abi/play.go +++ b/pkg/domain/infra/abi/play.go @@ -10,6 +10,7 @@ import ( "path/filepath" "strconv" "strings" + "sync" buildahDefine "github.com/containers/buildah/define" "github.com/containers/common/libimage" @@ -698,9 +699,24 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY fmt.Println(playKubePod.ContainerErrors) } - // Wait for each proxy to receive a READY message. - for _, proxy := range sdNotifyProxies { - if err := proxy.WaitAndClose(); err != nil { + // Wait for each proxy to receive a READY message. Use a wait + // group to prevent the potential for ABBA kinds of deadlocks. + var wg sync.WaitGroup + errors := make([]error, len(sdNotifyProxies)) + for i := range sdNotifyProxies { + wg.Add(1) + go func(i int) { + err := sdNotifyProxies[i].WaitAndClose() + if err != nil { + err = fmt.Errorf("waiting for sd-notify proxy: %w", err) + } + errors[i] = err + wg.Done() + }(i) + } + wg.Wait() + for _, err := range errors { + if err != nil { return nil, err } } diff --git a/pkg/systemd/notifyproxy/notifyproxy.go b/pkg/systemd/notifyproxy/notifyproxy.go index ea1522bb3..4b92d9e6c 100644 --- a/pkg/systemd/notifyproxy/notifyproxy.go +++ b/pkg/systemd/notifyproxy/notifyproxy.go @@ -1,6 +1,7 @@ package notifyproxy import ( + "context" "errors" "fmt" "io" @@ -109,48 +110,75 @@ func (p *NotifyProxy) WaitAndClose() error { } }() - const bufferSize = 1024 - sBuilder := strings.Builder{} - for { - // Set a read deadline of one second such that we achieve a - // non-blocking read and can check if the container has already - // stopped running; in that case no READY message will be send - // and we're done. - if err := p.connection.SetReadDeadline(time.Now().Add(time.Second)); err != nil { - return err - } - + // Since reading from the connection is blocking, we need to spin up two + // goroutines. One waiting for the `READY` message, the other waiting + // for the container to stop running. + errorChan := make(chan error, 1) + readyChan := make(chan bool, 1) + + go func() { + // Read until the `READY` message is received or the connection + // is closed. + const bufferSize = 1024 + sBuilder := strings.Builder{} for { - buffer := make([]byte, bufferSize) - num, err := p.connection.Read(buffer) - if err != nil { - if !errors.Is(err, os.ErrDeadlineExceeded) && !errors.Is(err, io.EOF) { - return err + for { + buffer := make([]byte, bufferSize) + num, err := p.connection.Read(buffer) + if err != nil { + if !errors.Is(err, io.EOF) { + errorChan <- err + return + } + } + sBuilder.Write(buffer[:num]) + if num != bufferSize || buffer[num-1] == '\n' { + // Break as we read an entire line that + // we can inspect for the `READY` + // message. + break } } - sBuilder.Write(buffer[:num]) - if num != bufferSize || buffer[num-1] == '\n' { - break - } - } - for _, line := range strings.Split(sBuilder.String(), "\n") { - if line == daemon.SdNotifyReady { - return nil + for _, line := range strings.Split(sBuilder.String(), "\n") { + if line == daemon.SdNotifyReady { + readyChan <- true + return + } } + sBuilder.Reset() } - sBuilder.Reset() + }() - if p.container == nil { - continue - } + if p.container != nil { + // Create a cancellable context to make sure the goroutine + // below terminates. + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go func() { + select { + case <-ctx.Done(): + return + default: + state, err := p.container.State() + if err != nil { + errorChan <- err + return + } + if state != define.ContainerStateRunning { + errorChan <- fmt.Errorf("%w: %s", ErrNoReadyMessage, p.container.ID()) + return + } + time.Sleep(time.Second) + } + }() + } - state, err := p.container.State() - if err != nil { - return err - } - if state != define.ContainerStateRunning { - return fmt.Errorf("%w: %s", ErrNoReadyMessage, p.container.ID()) - } + // Wait for the ready/error channel. + select { + case <-readyChan: + return nil + case err := <-errorChan: + return err } } |