summaryrefslogtreecommitdiff
path: root/pkg/parallel
diff options
context:
space:
mode:
authorMatthew Heon <matthew.heon@pm.me>2020-08-19 16:15:35 -0400
committerMatthew Heon <mheon@redhat.com>2020-10-07 10:00:11 -0400
commit2bb2425704cc7181c5eb924400b351b3a2d9a592 (patch)
tree0a850a365ef085fce496354a241649b7e779877a /pkg/parallel
parenta7500e54a4646c7db477349e2530ac13df77b8fa (diff)
downloadpodman-2bb2425704cc7181c5eb924400b351b3a2d9a592.tar.gz
podman-2bb2425704cc7181c5eb924400b351b3a2d9a592.tar.bz2
podman-2bb2425704cc7181c5eb924400b351b3a2d9a592.zip
Move pod jobs to parallel execution
Make Podman pod operations that do not involve starting containers (which needs to be done in a specific order) use the same parallel operation code we use to make `podman stop` on large numbers of containers fast. We were previously stopping containers in a pod serially, which could take up to the timeout (default 15 seconds) for each container - stopping 100 containers that do not respond to SIGTERM would take 25 minutes. To do this, refactor the parallel operation code a bit to remove its dependency on libpod (damn circular import restrictions...) and use parallel functions that just re-use the standard container API operations - maximizes code reuse (previously each pod handler had a separate implementation of the container function it performed). This is a bit of a palate cleanser after fighting CI for two days - nice to be able to return to a land of sanity. Signed-off-by: Matthew Heon <matthew.heon@pm.me>
Diffstat (limited to 'pkg/parallel')
-rw-r--r--pkg/parallel/ctr/ctr.go (renamed from pkg/parallel/parallel_linux.go)45
-rw-r--r--pkg/parallel/parallel.go30
2 files changed, 44 insertions, 31 deletions
diff --git a/pkg/parallel/parallel_linux.go b/pkg/parallel/ctr/ctr.go
index 442db1502..e8c1292b8 100644
--- a/pkg/parallel/parallel_linux.go
+++ b/pkg/parallel/ctr/ctr.go
@@ -1,11 +1,10 @@
-package parallel
+package ctr
import (
"context"
- "sync"
"github.com/containers/podman/v2/libpod"
- "github.com/pkg/errors"
+ "github.com/containers/podman/v2/pkg/parallel"
"github.com/sirupsen/logrus"
)
@@ -14,44 +13,28 @@ import (
// If no error is returned, each container specified in ctrs will have an entry
// in the resulting map; containers with no error will be set to nil.
func ContainerOp(ctx context.Context, ctrs []*libpod.Container, applyFunc func(*libpod.Container) error) (map[*libpod.Container]error, error) {
- jobControlLock.RLock()
- defer jobControlLock.RUnlock()
-
// We could use a sync.Map but given Go's lack of generic I'd rather
// just use a lock on a normal map...
// The expectation is that most of the time is spent in applyFunc
// anyways.
var (
- errMap = make(map[*libpod.Container]error)
- errLock sync.Mutex
- allDone sync.WaitGroup
+ errMap = make(map[*libpod.Container]<-chan error)
)
for _, ctr := range ctrs {
- // Block until a thread is available
- if err := jobControl.Acquire(ctx, 1); err != nil {
- return nil, errors.Wrapf(err, "error acquiring job control semaphore")
- }
-
- allDone.Add(1)
-
c := ctr
- go func() {
- logrus.Debugf("Launching job on container %s", c.ID())
-
- err := applyFunc(c)
- errLock.Lock()
- errMap[c] = err
- errLock.Unlock()
-
- allDone.Done()
- jobControl.Release(1)
- }()
+ logrus.Debugf("Starting parallel job on container %s", c.ID())
+ errChan := parallel.Enqueue(ctx, func() error {
+ return applyFunc(c)
+ })
+ errMap[c] = errChan
}
- allDone.Wait()
+ finalErr := make(map[*libpod.Container]error)
+ for ctr, errChan := range errMap {
+ err := <-errChan
+ finalErr[ctr] = err
+ }
- return errMap, nil
+ return finalErr, nil
}
-
-// TODO: Add an Enqueue() function that returns a promise
diff --git a/pkg/parallel/parallel.go b/pkg/parallel/parallel.go
index c9e4da50d..4da7e0f89 100644
--- a/pkg/parallel/parallel.go
+++ b/pkg/parallel/parallel.go
@@ -1,6 +1,7 @@
package parallel
import (
+ "context"
"sync"
"github.com/pkg/errors"
@@ -42,3 +43,32 @@ func SetMaxThreads(threads uint) error {
func GetMaxThreads() uint {
return numThreads
}
+
+// Enqueue adds a single function to the parallel jobs queue. This function will
+// be run when an unused thread is available.
+// Returns a receive-only error channel that will return the error (if any) from
+// the provided function fn when fn has finished executing. The channel will be
+// closed after this.
+func Enqueue(ctx context.Context, fn func() error) <-chan error {
+ retChan := make(chan error)
+
+ go func() {
+ jobControlLock.RLock()
+ defer jobControlLock.RUnlock()
+
+ defer close(retChan)
+
+ if err := jobControl.Acquire(ctx, 1); err != nil {
+ retChan <- errors.Wrapf(err, "error acquiring job control semaphore")
+ return
+ }
+
+ err := fn()
+
+ jobControl.Release(1)
+
+ retChan <- err
+ }()
+
+ return retChan
+}