summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOpenShift Merge Robot <openshift-merge-robot@users.noreply.github.com>2022-09-26 13:37:40 +0200
committerGitHub <noreply@github.com>2022-09-26 13:37:40 +0200
commit1d63d9f488091c4bb82622d7bc22bb81d5c4eed2 (patch)
tree3d0fd12c05a9c08bcba5672d898bfaf17b8e9fff
parent17f3756884f2f65a1da753e5b58895dc0b9145e8 (diff)
parent4a053a821aab8891498cb5dd3f01ce3437fdf0ef (diff)
downloadpodman-1d63d9f488091c4bb82622d7bc22bb81d5c4eed2.tar.gz
podman-1d63d9f488091c4bb82622d7bc22bb81d5c4eed2.tar.bz2
podman-1d63d9f488091c4bb82622d7bc22bb81d5c4eed2.zip
Merge pull request #15820 from vrothberg/fix-15800
kube: notifyproxy: fix lost READY message
-rw-r--r--pkg/domain/infra/abi/play.go22
-rw-r--r--pkg/systemd/notifyproxy/notifyproxy.go98
2 files changed, 82 insertions, 38 deletions
diff --git a/pkg/domain/infra/abi/play.go b/pkg/domain/infra/abi/play.go
index 847e81e69..bd9117f72 100644
--- a/pkg/domain/infra/abi/play.go
+++ b/pkg/domain/infra/abi/play.go
@@ -10,6 +10,7 @@ import (
"path/filepath"
"strconv"
"strings"
+ "sync"
buildahDefine "github.com/containers/buildah/define"
"github.com/containers/common/libimage"
@@ -698,9 +699,24 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY
fmt.Println(playKubePod.ContainerErrors)
}
- // Wait for each proxy to receive a READY message.
- for _, proxy := range sdNotifyProxies {
- if err := proxy.WaitAndClose(); err != nil {
+ // Wait for each proxy to receive a READY message. Use a wait
+ // group to prevent the potential for ABBA kinds of deadlocks.
+ var wg sync.WaitGroup
+ errors := make([]error, len(sdNotifyProxies))
+ for i := range sdNotifyProxies {
+ wg.Add(1)
+ go func(i int) {
+ err := sdNotifyProxies[i].WaitAndClose()
+ if err != nil {
+ err = fmt.Errorf("waiting for sd-notify proxy: %w", err)
+ }
+ errors[i] = err
+ wg.Done()
+ }(i)
+ }
+ wg.Wait()
+ for _, err := range errors {
+ if err != nil {
return nil, err
}
}
diff --git a/pkg/systemd/notifyproxy/notifyproxy.go b/pkg/systemd/notifyproxy/notifyproxy.go
index ea1522bb3..4b92d9e6c 100644
--- a/pkg/systemd/notifyproxy/notifyproxy.go
+++ b/pkg/systemd/notifyproxy/notifyproxy.go
@@ -1,6 +1,7 @@
package notifyproxy
import (
+ "context"
"errors"
"fmt"
"io"
@@ -109,48 +110,75 @@ func (p *NotifyProxy) WaitAndClose() error {
}
}()
- const bufferSize = 1024
- sBuilder := strings.Builder{}
- for {
- // Set a read deadline of one second such that we achieve a
- // non-blocking read and can check if the container has already
- // stopped running; in that case no READY message will be send
- // and we're done.
- if err := p.connection.SetReadDeadline(time.Now().Add(time.Second)); err != nil {
- return err
- }
-
+ // Since reading from the connection is blocking, we need to spin up two
+ // goroutines. One waiting for the `READY` message, the other waiting
+ // for the container to stop running.
+ errorChan := make(chan error, 1)
+ readyChan := make(chan bool, 1)
+
+ go func() {
+ // Read until the `READY` message is received or the connection
+ // is closed.
+ const bufferSize = 1024
+ sBuilder := strings.Builder{}
for {
- buffer := make([]byte, bufferSize)
- num, err := p.connection.Read(buffer)
- if err != nil {
- if !errors.Is(err, os.ErrDeadlineExceeded) && !errors.Is(err, io.EOF) {
- return err
+ for {
+ buffer := make([]byte, bufferSize)
+ num, err := p.connection.Read(buffer)
+ if err != nil {
+ if !errors.Is(err, io.EOF) {
+ errorChan <- err
+ return
+ }
+ }
+ sBuilder.Write(buffer[:num])
+ if num != bufferSize || buffer[num-1] == '\n' {
+ // Break as we read an entire line that
+ // we can inspect for the `READY`
+ // message.
+ break
}
}
- sBuilder.Write(buffer[:num])
- if num != bufferSize || buffer[num-1] == '\n' {
- break
- }
- }
- for _, line := range strings.Split(sBuilder.String(), "\n") {
- if line == daemon.SdNotifyReady {
- return nil
+ for _, line := range strings.Split(sBuilder.String(), "\n") {
+ if line == daemon.SdNotifyReady {
+ readyChan <- true
+ return
+ }
}
+ sBuilder.Reset()
}
- sBuilder.Reset()
+ }()
- if p.container == nil {
- continue
- }
+ if p.container != nil {
+ // Create a cancellable context to make sure the goroutine
+ // below terminates.
+ ctx, cancel := context.WithCancel(context.Background())
+ defer cancel()
+ go func() {
+ select {
+ case <-ctx.Done():
+ return
+ default:
+ state, err := p.container.State()
+ if err != nil {
+ errorChan <- err
+ return
+ }
+ if state != define.ContainerStateRunning {
+ errorChan <- fmt.Errorf("%w: %s", ErrNoReadyMessage, p.container.ID())
+ return
+ }
+ time.Sleep(time.Second)
+ }
+ }()
+ }
- state, err := p.container.State()
- if err != nil {
- return err
- }
- if state != define.ContainerStateRunning {
- return fmt.Errorf("%w: %s", ErrNoReadyMessage, p.container.ID())
- }
+ // Wait for the ready/error channel.
+ select {
+ case <-readyChan:
+ return nil
+ case err := <-errorChan:
+ return err
}
}