aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorValentin Rothberg <vrothberg@redhat.com>2022-05-25 10:08:17 +0200
committerValentin Rothberg <vrothberg@redhat.com>2022-05-25 10:17:46 +0200
commit4a447a2133ac7e4cc42bcf5057c704ce07d31d6f (patch)
tree1f6d3a8f12c5b23932635c34709892cf5cf48463
parentb13184dfb4fbef640dc3fe421d32de068672d4ed (diff)
downloadpodman-4a447a2133ac7e4cc42bcf5057c704ce07d31d6f.tar.gz
podman-4a447a2133ac7e4cc42bcf5057c704ce07d31d6f.tar.bz2
podman-4a447a2133ac7e4cc42bcf5057c704ce07d31d6f.zip
work queue: simplify and use a wait group
Simplify the work-queue implementation by using a wait group. Once all queued work items are done, the channel can be closed. The system tests revealed a flake (i.e., #14351) which indicated that the service container does not always get stopped which suggests a race condition when queuing items. Those items are queued in a goroutine to prevent potential dead locks if the queue ever filled up too quickly. The race condition in question is that if a work item queues another, the goroutine for queuing may not be scheduled fast enough and the runtime shuts down; it seems to happen fairly easily on the slow CI machines. The wait group fixes this race and allows for simplifying the code. Also increase the queue's buffer size to 10 to make things slightly faster. [NO NEW TESTS NEEDED] as we are fixing a flake. Fixes: #14351 Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>
-rw-r--r--libpod/runtime.go14
-rw-r--r--libpod/runtime_worker.go33
2 files changed, 11 insertions, 36 deletions
diff --git a/libpod/runtime.go b/libpod/runtime.go
index 4efa7b8e8..00fa2fe88 100644
--- a/libpod/runtime.go
+++ b/libpod/runtime.go
@@ -11,6 +11,7 @@ import (
"regexp"
"strconv"
"strings"
+ "sync"
"syscall"
"time"
@@ -87,8 +88,8 @@ type Runtime struct {
lockManager lock.Manager
// Worker
- workerShutdown chan bool
- workerChannel chan func()
+ workerChannel chan func()
+ workerGroup sync.WaitGroup
// syslog describes whenever logrus should log to the syslog as well.
// Note that the syslog hook will be enabled early in cmd/podman/syslog_linux.go
@@ -823,12 +824,9 @@ func (r *Runtime) Shutdown(force bool) error {
return define.ErrRuntimeStopped
}
- if r.workerShutdown != nil {
- // Signal the worker routine to shutdown. The routine will
- // process all pending work items and then read from the
- // channel; we're blocked until all work items have been
- // processed.
- r.workerShutdown <- true
+ if r.workerChannel != nil {
+ r.workerGroup.Wait()
+ close(r.workerChannel)
}
r.valid = false
diff --git a/libpod/runtime_worker.go b/libpod/runtime_worker.go
index ca44a27f7..9d41321b2 100644
--- a/libpod/runtime_worker.go
+++ b/libpod/runtime_worker.go
@@ -1,40 +1,17 @@
package libpod
-import (
- "time"
-)
-
func (r *Runtime) startWorker() {
- if r.workerChannel == nil {
- r.workerChannel = make(chan func(), 1)
- r.workerShutdown = make(chan bool)
- }
+ r.workerChannel = make(chan func(), 10)
go func() {
- for {
- // Make sure to read all workers before
- // checking if we're about to shutdown.
- for len(r.workerChannel) > 0 {
- w := <-r.workerChannel
- w()
- }
-
- select {
- // We'll read from the shutdown channel only when all
- // items above have been processed.
- //
- // (*Runtime).Shutdown() will block until until the
- // item is read.
- case <-r.workerShutdown:
- return
-
- default:
- time.Sleep(100 * time.Millisecond)
- }
+ for w := range r.workerChannel {
+ w()
+ r.workerGroup.Done()
}
}()
}
func (r *Runtime) queueWork(f func()) {
+ r.workerGroup.Add(1)
go func() {
r.workerChannel <- f
}()