summaryrefslogtreecommitdiff
path: root/libpod
diff options
context:
space:
mode:
Diffstat (limited to 'libpod')
-rw-r--r--libpod/boltdb_state.go248
-rw-r--r--libpod/boltdb_state_internal.go22
-rw-r--r--libpod/container_api.go97
-rw-r--r--libpod/container_config.go1
-rw-r--r--libpod/container_exec.go21
-rw-r--r--libpod/container_internal.go84
-rw-r--r--libpod/container_internal_linux.go3
-rw-r--r--libpod/define/errors.go4
-rw-r--r--libpod/define/volume_inspect.go6
-rw-r--r--libpod/events.go13
-rw-r--r--libpod/events/config.go4
-rw-r--r--libpod/events/events.go4
-rw-r--r--libpod/events/journal_linux.go2
-rw-r--r--libpod/healthcheck.go15
-rw-r--r--libpod/oci_conmon_linux.go229
-rw-r--r--libpod/plugin/volume_api.go18
-rw-r--r--libpod/pod_internal.go2
-rw-r--r--libpod/runtime_ctr.go12
-rw-r--r--libpod/runtime_pod_linux.go35
-rw-r--r--libpod/runtime_volume.go2
-rw-r--r--libpod/runtime_volume_linux.go100
-rw-r--r--libpod/state.go9
-rw-r--r--libpod/stats.go24
-rw-r--r--libpod/util_linux.go27
-rw-r--r--libpod/volume_internal.go6
25 files changed, 826 insertions, 162 deletions
diff --git a/libpod/boltdb_state.go b/libpod/boltdb_state.go
index c3db6152a..471f64b84 100644
--- a/libpod/boltdb_state.go
+++ b/libpod/boltdb_state.go
@@ -5,8 +5,10 @@ import (
"fmt"
"net"
"os"
+ "strconv"
"strings"
"sync"
+ "time"
"github.com/containers/common/libnetwork/types"
"github.com/containers/podman/v4/libpod/define"
@@ -63,6 +65,13 @@ type BoltState struct {
// initially created the database. This must match for any further instances
// that access the database, to ensure that state mismatches with
// containers/storage do not occur.
+// - exitCodeBucket/exitCodeTimeStampBucket: (#14559) exit codes must be part
+// of the database to resolve a previous race condition when one process waits
+// for the exit file to be written and another process removes it along with
+// the container during auto-removal. The same race would happen trying to
+// read the exit code from the containers bucket. Hence, exit codes go into
+// their own bucket. To avoid the rather expensive JSON (un)marshaling, we
+// have two buckets: one for the exit codes, the other for the timestamps.
// NewBoltState creates a new bolt-backed state database
func NewBoltState(path string, runtime *Runtime) (State, error) {
@@ -98,6 +107,8 @@ func NewBoltState(path string, runtime *Runtime) (State, error) {
allVolsBkt,
execBkt,
runtimeConfigBkt,
+ exitCodeBkt,
+ exitCodeTimeStampBkt,
}
// Does the DB need an update?
@@ -192,6 +203,45 @@ func (s *BoltState) Refresh() error {
return err
}
+ exitCodeBucket, err := getExitCodeBucket(tx)
+ if err != nil {
+ return err
+ }
+
+ timeStampBucket, err := getExitCodeTimeStampBucket(tx)
+ if err != nil {
+ return err
+ }
+
+ // Clear all exec exit codes
+ toRemoveExitCodes := []string{}
+ err = exitCodeBucket.ForEach(func(id, _ []byte) error {
+ toRemoveExitCodes = append(toRemoveExitCodes, string(id))
+ return nil
+ })
+ if err != nil {
+ return errors.Wrapf(err, "error reading exit codes bucket")
+ }
+ for _, id := range toRemoveExitCodes {
+ if err := exitCodeBucket.Delete([]byte(id)); err != nil {
+ return errors.Wrapf(err, "error removing exit code for ID %s", id)
+ }
+ }
+
+ toRemoveTimeStamps := []string{}
+ err = timeStampBucket.ForEach(func(id, _ []byte) error {
+ toRemoveTimeStamps = append(toRemoveTimeStamps, string(id))
+ return nil
+ })
+ if err != nil {
+ return errors.Wrapf(err, "reading timestamps bucket")
+ }
+ for _, id := range toRemoveTimeStamps {
+ if err := timeStampBucket.Delete([]byte(id)); err != nil {
+ return errors.Wrapf(err, "removing timestamp for ID %s", id)
+ }
+ }
+
// Iterate through all IDs. Check if they are containers.
// If they are, unmarshal their state, and then clear
// PID, mountpoint, and state for all of them
@@ -1341,6 +1391,204 @@ func (s *BoltState) GetContainerConfig(id string) (*ContainerConfig, error) {
return config, nil
}
+// AddContainerExitCode adds the exit code for the specified container to the database.
+func (s *BoltState) AddContainerExitCode(id string, exitCode int32) error {
+ if len(id) == 0 {
+ return define.ErrEmptyID
+ }
+
+ if !s.valid {
+ return define.ErrDBClosed
+ }
+
+ db, err := s.getDBCon()
+ if err != nil {
+ return err
+ }
+ defer s.deferredCloseDBCon(db)
+
+ rawID := []byte(id)
+ rawExitCode := []byte(strconv.Itoa(int(exitCode)))
+ rawTimeStamp, err := time.Now().MarshalText()
+ if err != nil {
+ return fmt.Errorf("marshaling exit-code time stamp: %w", err)
+ }
+
+ return db.Update(func(tx *bolt.Tx) error {
+ exitCodeBucket, err := getExitCodeBucket(tx)
+ if err != nil {
+ return err
+ }
+ timeStampBucket, err := getExitCodeTimeStampBucket(tx)
+ if err != nil {
+ return err
+ }
+
+ if err := exitCodeBucket.Put(rawID, rawExitCode); err != nil {
+ return fmt.Errorf("adding exit code of container %s to DB: %w", id, err)
+ }
+ if err := timeStampBucket.Put(rawID, rawTimeStamp); err != nil {
+ if rmErr := exitCodeBucket.Delete(rawID); rmErr != nil {
+ logrus.Errorf("Removing exit code of container %s from DB: %v", id, rmErr)
+ }
+ return fmt.Errorf("adding exit-code time stamp of container %s to DB: %w", id, err)
+ }
+
+ return nil
+ })
+}
+
+// GetContainerExitCode returns the exit code for the specified container.
+func (s *BoltState) GetContainerExitCode(id string) (int32, error) {
+ if len(id) == 0 {
+ return -1, define.ErrEmptyID
+ }
+
+ if !s.valid {
+ return -1, define.ErrDBClosed
+ }
+
+ db, err := s.getDBCon()
+ if err != nil {
+ return -1, err
+ }
+ defer s.deferredCloseDBCon(db)
+
+ rawID := []byte(id)
+ result := int32(-1)
+ return result, db.View(func(tx *bolt.Tx) error {
+ exitCodeBucket, err := getExitCodeBucket(tx)
+ if err != nil {
+ return err
+ }
+
+ rawExitCode := exitCodeBucket.Get(rawID)
+ if rawExitCode == nil {
+ return fmt.Errorf("getting exit code of container %s from DB: %w", id, define.ErrNoSuchExitCode)
+ }
+
+ exitCode, err := strconv.Atoi(string(rawExitCode))
+ if err != nil {
+ return fmt.Errorf("converting raw exit code %v of container %s: %w", rawExitCode, id, err)
+ }
+
+ result = int32(exitCode)
+ return nil
+ })
+}
+
+// GetContainerExitCodeTimeStamp returns the time stamp when the exit code of
+// the specified container was added to the database.
+func (s *BoltState) GetContainerExitCodeTimeStamp(id string) (*time.Time, error) {
+ if len(id) == 0 {
+ return nil, define.ErrEmptyID
+ }
+
+ if !s.valid {
+ return nil, define.ErrDBClosed
+ }
+
+ db, err := s.getDBCon()
+ if err != nil {
+ return nil, err
+ }
+ defer s.deferredCloseDBCon(db)
+
+ rawID := []byte(id)
+ var result time.Time
+ return &result, db.View(func(tx *bolt.Tx) error {
+ timeStampBucket, err := getExitCodeTimeStampBucket(tx)
+ if err != nil {
+ return err
+ }
+
+ rawTimeStamp := timeStampBucket.Get(rawID)
+ if rawTimeStamp == nil {
+ return fmt.Errorf("getting exit-code time stamp of container %s from DB: %w", id, define.ErrNoSuchExitCode)
+ }
+
+ if err := result.UnmarshalText(rawTimeStamp); err != nil {
+ return fmt.Errorf("converting raw time stamp %v of container %s from DB: %w", rawTimeStamp, id, err)
+ }
+
+ return nil
+ })
+}
+
+// PruneExitCodes removes exit codes older than 5 minutes.
+func (s *BoltState) PruneContainerExitCodes() error {
+ if !s.valid {
+ return define.ErrDBClosed
+ }
+
+ db, err := s.getDBCon()
+ if err != nil {
+ return err
+ }
+ defer s.deferredCloseDBCon(db)
+
+ toRemoveIDs := []string{}
+
+ threshold := time.Minute * 5
+ err = db.View(func(tx *bolt.Tx) error {
+ timeStampBucket, err := getExitCodeTimeStampBucket(tx)
+ if err != nil {
+ return err
+ }
+
+ return timeStampBucket.ForEach(func(rawID, rawTimeStamp []byte) error {
+ var timeStamp time.Time
+ if err := timeStamp.UnmarshalText(rawTimeStamp); err != nil {
+ return fmt.Errorf("converting raw time stamp %v of container %s from DB: %w", rawTimeStamp, string(rawID), err)
+ }
+ if time.Since(timeStamp) > threshold {
+ toRemoveIDs = append(toRemoveIDs, string(rawID))
+ }
+ return nil
+ })
+ })
+ if err != nil {
+ return errors.Wrapf(err, "reading exit codes to prune")
+ }
+
+ if len(toRemoveIDs) > 0 {
+ err = db.Update(func(tx *bolt.Tx) error {
+ exitCodeBucket, err := getExitCodeBucket(tx)
+ if err != nil {
+ return err
+ }
+ timeStampBucket, err := getExitCodeTimeStampBucket(tx)
+ if err != nil {
+ return err
+ }
+
+ var finalErr error
+ for _, id := range toRemoveIDs {
+ rawID := []byte(id)
+ if err := exitCodeBucket.Delete(rawID); err != nil {
+ if finalErr != nil {
+ logrus.Error(finalErr)
+ }
+ finalErr = fmt.Errorf("removing exit code of container %s from DB: %w", id, err)
+ }
+ if err := timeStampBucket.Delete(rawID); err != nil {
+ if finalErr != nil {
+ logrus.Error(finalErr)
+ }
+ finalErr = fmt.Errorf("removing exit code timestamp of container %s from DB: %w", id, err)
+ }
+ }
+
+ return finalErr
+ })
+ if err != nil {
+ return errors.Wrapf(err, "pruning exit codes")
+ }
+ }
+
+ return nil
+}
+
// AddExecSession adds an exec session to the state.
func (s *BoltState) AddExecSession(ctr *Container, session *ExecSession) error {
if !s.valid {
diff --git a/libpod/boltdb_state_internal.go b/libpod/boltdb_state_internal.go
index d6f035af9..edba78d6d 100644
--- a/libpod/boltdb_state_internal.go
+++ b/libpod/boltdb_state_internal.go
@@ -29,6 +29,9 @@ const (
aliasesName = "aliases"
runtimeConfigName = "runtime-config"
+ exitCodeName = "exit-code"
+ exitCodeTimeStampName = "exit-code-time-stamp"
+
configName = "config"
stateName = "state"
dependenciesName = "dependencies"
@@ -65,6 +68,9 @@ var (
volDependenciesBkt = []byte(volCtrDependencies)
networksBkt = []byte(networksName)
+ exitCodeBkt = []byte(exitCodeName)
+ exitCodeTimeStampBkt = []byte(exitCodeTimeStampName)
+
configKey = []byte(configName)
stateKey = []byte(stateName)
netNSKey = []byte(netNSName)
@@ -362,6 +368,22 @@ func getRuntimeConfigBucket(tx *bolt.Tx) (*bolt.Bucket, error) {
return bkt, nil
}
+func getExitCodeBucket(tx *bolt.Tx) (*bolt.Bucket, error) {
+ bkt := tx.Bucket(exitCodeBkt)
+ if bkt == nil {
+ return nil, errors.Wrapf(define.ErrDBBadConfig, "exit-code container bucket not found in DB")
+ }
+ return bkt, nil
+}
+
+func getExitCodeTimeStampBucket(tx *bolt.Tx) (*bolt.Bucket, error) {
+ bkt := tx.Bucket(exitCodeTimeStampBkt)
+ if bkt == nil {
+ return nil, errors.Wrapf(define.ErrDBBadConfig, "exit-code time stamp bucket not found in DB")
+ }
+ return bkt, nil
+}
+
func (s *BoltState) getContainerConfigFromDB(id []byte, config *ContainerConfig, ctrsBkt *bolt.Bucket) error {
ctrBkt := ctrsBkt.Bucket(id)
if ctrBkt == nil {
diff --git a/libpod/container_api.go b/libpod/container_api.go
index b064d3528..c14fe95b0 100644
--- a/libpod/container_api.go
+++ b/libpod/container_api.go
@@ -2,6 +2,7 @@ package libpod
import (
"context"
+ "fmt"
"io"
"io/ioutil"
"net/http"
@@ -490,41 +491,84 @@ func (c *Container) RemoveArtifact(name string) error {
// Wait blocks until the container exits and returns its exit code.
func (c *Container) Wait(ctx context.Context) (int32, error) {
- return c.WaitWithInterval(ctx, DefaultWaitInterval)
+ return c.WaitForExit(ctx, DefaultWaitInterval)
}
-// WaitWithInterval blocks until the container to exit and returns its exit
-// code. The argument is the interval at which checks the container's status.
-func (c *Container) WaitWithInterval(ctx context.Context, waitTimeout time.Duration) (int32, error) {
+// WaitForExit blocks until the container exits and returns its exit code. The
+// argument is the interval at which checks the container's status.
+func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration) (int32, error) {
if !c.valid {
return -1, define.ErrCtrRemoved
}
- exitFile, err := c.exitFilePath()
- if err != nil {
- return -1, err
- }
- chWait := make(chan error, 1)
+ id := c.ID()
+ var conmonTimer time.Timer
+ conmonTimerSet := false
- go func() {
- <-ctx.Done()
- chWait <- define.ErrCanceled
- }()
+ getExitCode := func() (bool, int32, error) {
+ containerRemoved := false
+ if !c.batched {
+ c.lock.Lock()
+ defer c.lock.Unlock()
+ }
- for {
- // ignore errors here (with exception of cancellation), it is only used to avoid waiting
- // too long.
- _, e := WaitForFile(exitFile, chWait, waitTimeout)
- if e == define.ErrCanceled {
- return -1, define.ErrCanceled
+ if err := c.syncContainer(); err != nil {
+ if !errors.Is(err, define.ErrNoSuchCtr) {
+ return false, -1, err
+ }
+ containerRemoved = true
+ }
+
+ // If conmon is not alive anymore set a timer to make sure
+ // we're returning even if conmon has forcefully been killed.
+ if !conmonTimerSet && !containerRemoved {
+ conmonAlive, err := c.ociRuntime.CheckConmonRunning(c)
+ switch {
+ case errors.Is(err, define.ErrNoSuchCtr):
+ containerRemoved = true
+ case err != nil:
+ return false, -1, err
+ case !conmonAlive:
+ timerDuration := time.Second * 20
+ conmonTimer = *time.NewTimer(timerDuration)
+ conmonTimerSet = true
+ }
+ }
+
+ if !containerRemoved {
+ // If conmon is dead for more than $timerDuration or if the
+ // container has exited properly, try to look up the exit code.
+ select {
+ case <-conmonTimer.C:
+ logrus.Debugf("Exceeded conmon timeout waiting for container %s to exit", id)
+ default:
+ if !c.ensureState(define.ContainerStateExited, define.ContainerStateConfigured) {
+ return false, -1, nil
+ }
+ }
}
- stopped, code, err := c.isStopped()
+ exitCode, err := c.runtime.state.GetContainerExitCode(id)
+ if err != nil {
+ return true, -1, err
+ }
+
+ return true, exitCode, nil
+ }
+
+ for {
+ hasExited, exitCode, err := getExitCode()
+ if hasExited {
+ return exitCode, err
+ }
if err != nil {
return -1, err
}
- if stopped {
- return code, nil
+ select {
+ case <-ctx.Done():
+ return -1, fmt.Errorf("waiting for exit code of container %s canceled", id)
+ default:
+ time.Sleep(pollInterval)
}
}
}
@@ -551,11 +595,12 @@ func (c *Container) WaitForConditionWithInterval(ctx context.Context, waitTimeou
wantedStates := make(map[define.ContainerStatus]bool, len(conditions))
for _, condition := range conditions {
- if condition == define.ContainerStateStopped || condition == define.ContainerStateExited {
+ switch condition {
+ case define.ContainerStateExited, define.ContainerStateStopped:
waitForExit = true
- continue
+ default:
+ wantedStates[condition] = true
}
- wantedStates[condition] = true
}
trySend := func(code int32, err error) {
@@ -572,7 +617,7 @@ func (c *Container) WaitForConditionWithInterval(ctx context.Context, waitTimeou
go func() {
defer wg.Done()
- code, err := c.WaitWithInterval(ctx, waitTimeout)
+ code, err := c.WaitForExit(ctx, waitTimeout)
trySend(code, err)
}()
}
diff --git a/libpod/container_config.go b/libpod/container_config.go
index 45ff03d58..544c45a8c 100644
--- a/libpod/container_config.go
+++ b/libpod/container_config.go
@@ -424,7 +424,6 @@ type InfraInherit struct {
CapDrop []string `json:"cap_drop,omitempty"`
HostDeviceList []spec.LinuxDevice `json:"host_device_list,omitempty"`
ImageVolumes []*specgen.ImageVolume `json:"image_volumes,omitempty"`
- InfraResources *spec.LinuxResources `json:"resource_limits,omitempty"`
Mounts []spec.Mount `json:"mounts,omitempty"`
NoNewPrivileges bool `json:"no_new_privileges,omitempty"`
OverlayVolumes []*specgen.OverlayVolume `json:"overlay_volumes,omitempty"`
diff --git a/libpod/container_exec.go b/libpod/container_exec.go
index be00c6fbe..b112273d0 100644
--- a/libpod/container_exec.go
+++ b/libpod/container_exec.go
@@ -277,9 +277,13 @@ func (c *Container) ExecStart(sessionID string) error {
return c.save()
}
+func (c *Container) ExecStartAndAttach(sessionID string, streams *define.AttachStreams, newSize *define.TerminalSize) error {
+ return c.execStartAndAttach(sessionID, streams, newSize, false)
+}
+
// ExecStartAndAttach starts and attaches to an exec session in a container.
// newSize resizes the tty to this size before the process is started, must be nil if the exec session has no tty
-func (c *Container) ExecStartAndAttach(sessionID string, streams *define.AttachStreams, newSize *define.TerminalSize) error {
+func (c *Container) execStartAndAttach(sessionID string, streams *define.AttachStreams, newSize *define.TerminalSize, isHealthcheck bool) error {
if !c.batched {
c.lock.Lock()
defer c.lock.Unlock()
@@ -315,7 +319,12 @@ func (c *Container) ExecStartAndAttach(sessionID string, streams *define.AttachS
return err
}
- c.newContainerEvent(events.Exec)
+ if isHealthcheck {
+ c.newContainerEvent(events.HealthStatus)
+ } else {
+ c.newContainerEvent(events.Exec)
+ }
+
logrus.Debugf("Successfully started exec session %s in container %s", session.ID(), c.ID())
var lastErr error
@@ -743,10 +752,14 @@ func (c *Container) ExecResize(sessionID string, newSize define.TerminalSize) er
return c.ociRuntime.ExecAttachResize(c, sessionID, newSize)
}
+func (c *Container) Exec(config *ExecConfig, streams *define.AttachStreams, resize <-chan define.TerminalSize) (int, error) {
+ return c.exec(config, streams, resize, false)
+}
+
// Exec emulates the old Libpod exec API, providing a single call to create,
// run, and remove an exec session. Returns exit code and error. Exit code is
// not guaranteed to be set sanely if error is not nil.
-func (c *Container) Exec(config *ExecConfig, streams *define.AttachStreams, resize <-chan define.TerminalSize) (int, error) {
+func (c *Container) exec(config *ExecConfig, streams *define.AttachStreams, resize <-chan define.TerminalSize, isHealthcheck bool) (int, error) {
sessionID, err := c.ExecCreate(config)
if err != nil {
return -1, err
@@ -780,7 +793,7 @@ func (c *Container) Exec(config *ExecConfig, streams *define.AttachStreams, resi
}()
}
- if err := c.ExecStartAndAttach(sessionID, streams, size); err != nil {
+ if err := c.execStartAndAttach(sessionID, streams, size, isHealthcheck); err != nil {
return -1, err
}
diff --git a/libpod/container_internal.go b/libpod/container_internal.go
index ce48987f6..64696cc27 100644
--- a/libpod/container_internal.go
+++ b/libpod/container_internal.go
@@ -21,6 +21,7 @@ import (
"github.com/containers/common/pkg/cgroups"
"github.com/containers/common/pkg/chown"
"github.com/containers/common/pkg/config"
+ cutil "github.com/containers/common/pkg/util"
"github.com/containers/podman/v4/libpod/define"
"github.com/containers/podman/v4/libpod/events"
"github.com/containers/podman/v4/pkg/ctime"
@@ -219,7 +220,7 @@ func (c *Container) handleExitFile(exitFile string, fi os.FileInfo) error {
// Write an event for the container's death
c.newContainerExitedEvent(c.state.ExitCode)
- return nil
+ return c.runtime.state.AddContainerExitCode(c.ID(), c.state.ExitCode)
}
func (c *Container) shouldRestart() bool {
@@ -784,20 +785,6 @@ func (c *Container) getArtifactPath(name string) string {
return filepath.Join(c.config.StaticDir, artifactsDir, name)
}
-// Used with Wait() to determine if a container has exited
-func (c *Container) isStopped() (bool, int32, error) {
- if !c.batched {
- c.lock.Lock()
- defer c.lock.Unlock()
- }
- err := c.syncContainer()
- if err != nil {
- return true, -1, err
- }
-
- return !c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused, define.ContainerStateStopping), c.state.ExitCode, nil
-}
-
// save container state to the database
func (c *Container) save() error {
if err := c.runtime.state.SaveContainer(c); err != nil {
@@ -1282,13 +1269,6 @@ func (c *Container) stop(timeout uint) error {
}
}
- // Check if conmon is still alive.
- // If it is not, we won't be getting an exit file.
- conmonAlive, err := c.ociRuntime.CheckConmonRunning(c)
- if err != nil {
- return err
- }
-
// Set the container state to "stopping" and unlock the container
// before handing it over to conmon to unblock other commands. #8501
// demonstrates nicely that a high stop timeout will block even simple
@@ -1341,21 +1321,18 @@ func (c *Container) stop(timeout uint) error {
}
c.newContainerEvent(events.Stop)
-
- c.state.PID = 0
- c.state.ConmonPID = 0
c.state.StoppedByUser = true
+ conmonAlive, err := c.ociRuntime.CheckConmonRunning(c)
+ if err != nil {
+ return err
+ }
if !conmonAlive {
- // Conmon is dead, so we can't expect an exit code.
- c.state.ExitCode = -1
- c.state.FinishedTime = time.Now()
- c.state.State = define.ContainerStateStopped
- if err := c.save(); err != nil {
- logrus.Errorf("Saving container %s status: %v", c.ID(), err)
+ if err := c.checkExitFile(); err != nil {
+ return err
}
- return errors.Wrapf(define.ErrConmonDead, "container %s conmon process missing, cannot retrieve exit code", c.ID())
+ return c.save()
}
if err := c.save(); err != nil {
@@ -1663,30 +1640,16 @@ func (c *Container) mountNamedVolume(v *ContainerNamedVolume, mountpoint string)
if err := vol.update(); err != nil {
return nil, err
}
- if vol.state.NeedsCopyUp {
+ _, hasNoCopy := vol.config.Options["nocopy"]
+ if vol.state.NeedsCopyUp && !cutil.StringInSlice("nocopy", v.Options) && !hasNoCopy {
logrus.Debugf("Copying up contents from container %s to volume %s", c.ID(), vol.Name())
- // If the volume is not empty, we should not copy up.
- volMount := vol.mountPoint()
- contents, err := ioutil.ReadDir(volMount)
- if err != nil {
- return nil, errors.Wrapf(err, "error listing contents of volume %s mountpoint when copying up from container %s", vol.Name(), c.ID())
- }
- if len(contents) > 0 {
- // The volume is not empty. It was likely modified
- // outside of Podman. For safety, let's not copy up into
- // it. Fixes CVE-2020-1726.
- return vol, nil
- }
-
srcDir, err := securejoin.SecureJoin(mountpoint, v.Dest)
if err != nil {
return nil, errors.Wrapf(err, "error calculating destination path to copy up container %s volume %s", c.ID(), vol.Name())
}
// Do a manual stat on the source directory to verify existence.
// Skip the rest if it exists.
- // TODO: Should this be stat or lstat? I'm using lstat because I
- // think copy-up doesn't happen when the source is a link.
srcStat, err := os.Lstat(srcDir)
if err != nil {
if os.IsNotExist(err) {
@@ -1712,6 +1675,19 @@ func (c *Container) mountNamedVolume(v *ContainerNamedVolume, mountpoint string)
return vol, nil
}
+ // If the volume is not empty, we should not copy up.
+ volMount := vol.mountPoint()
+ contents, err := ioutil.ReadDir(volMount)
+ if err != nil {
+ return nil, errors.Wrapf(err, "error listing contents of volume %s mountpoint when copying up from container %s", vol.Name(), c.ID())
+ }
+ if len(contents) > 0 {
+ // The volume is not empty. It was likely modified
+ // outside of Podman. For safety, let's not copy up into
+ // it. Fixes CVE-2020-1726.
+ return vol, nil
+ }
+
// Set NeedsCopyUp to false since we are about to do first copy
// Do not copy second time.
vol.state.NeedsCopyUp = false
@@ -1939,6 +1915,18 @@ func (c *Container) cleanup(ctx context.Context) error {
}
}
+ // Prune the exit codes of other container during clean up.
+ // Since Podman is no daemon, we have to clean them up somewhere.
+ // Cleanup seems like a good place as it's not performance
+ // critical.
+ if err := c.runtime.state.PruneContainerExitCodes(); err != nil {
+ if lastError == nil {
+ lastError = err
+ } else {
+ logrus.Errorf("Pruning container exit codes: %v", err)
+ }
+ }
+
return lastError
}
diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go
index 77b598b16..0f4bf0f55 100644
--- a/libpod/container_internal_linux.go
+++ b/libpod/container_internal_linux.go
@@ -870,6 +870,7 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
if err != nil {
return nil, err
}
+
g.SetLinuxCgroupsPath(cgroupPath)
// Warning: CDI may alter g.Config in place.
@@ -1141,7 +1142,7 @@ func (c *Container) addCheckpointImageMetadata(importBuilder *buildah.Builder) e
return fmt.Errorf("getting host info: %v", err)
}
- criuVersion, err := criu.GetCriuVestion()
+ criuVersion, err := criu.GetCriuVersion()
if err != nil {
return fmt.Errorf("getting criu version: %v", err)
}
diff --git a/libpod/define/errors.go b/libpod/define/errors.go
index f5a7c73e5..9757a85b1 100644
--- a/libpod/define/errors.go
+++ b/libpod/define/errors.go
@@ -24,6 +24,10 @@ var (
// not exist.
ErrNoSuchExecSession = errors.New("no such exec session")
+ // ErrNoSuchExitCode indicates that the requested container exit code
+ // does not exist.
+ ErrNoSuchExitCode = errors.New("no such exit code")
+
// ErrDepExists indicates that the current object has dependencies and
// cannot be removed before them.
ErrDepExists = errors.New("dependency exists")
diff --git a/libpod/define/volume_inspect.go b/libpod/define/volume_inspect.go
index fac179176..aaa23b4fc 100644
--- a/libpod/define/volume_inspect.go
+++ b/libpod/define/volume_inspect.go
@@ -57,3 +57,9 @@ type InspectVolumeData struct {
// UID/GID.
NeedsChown bool `json:"NeedsChown,omitempty"`
}
+
+type VolumeReload struct {
+ Added []string
+ Removed []string
+ Errors []error
+}
diff --git a/libpod/events.go b/libpod/events.go
index f09d8402a..bb50df92d 100644
--- a/libpod/events.go
+++ b/libpod/events.go
@@ -33,6 +33,16 @@ func (c *Container) newContainerEvent(status events.Status) {
Attributes: c.Labels(),
}
+ // if the current event is a HealthStatus event, we need to get the current
+ // status of the container to pass to the event
+ if status == events.HealthStatus {
+ containerHealthStatus, err := c.healthCheckStatus()
+ if err != nil {
+ e.HealthStatus = fmt.Sprintf("%v", err)
+ }
+ e.HealthStatus = containerHealthStatus
+ }
+
if err := c.runtime.eventer.Write(e); err != nil {
logrus.Errorf("Unable to write pod event: %q", err)
}
@@ -151,6 +161,9 @@ func (r *Runtime) GetEvents(ctx context.Context, filters []string) ([]*events.Ev
// GetLastContainerEvent takes a container name or ID and an event status and returns
// the last occurrence of the container event
func (r *Runtime) GetLastContainerEvent(ctx context.Context, nameOrID string, containerEvent events.Status) (*events.Event, error) {
+ // FIXME: events should be read in reverse order!
+ // https://github.com/containers/podman/issues/14579
+
// check to make sure the event.Status is valid
if _, err := events.StringToStatus(containerEvent.String()); err != nil {
return nil, err
diff --git a/libpod/events/config.go b/libpod/events/config.go
index 2e7016136..a678baa2d 100644
--- a/libpod/events/config.go
+++ b/libpod/events/config.go
@@ -40,6 +40,8 @@ type Event struct {
Time time.Time
// Type of event that occurred
Type Type
+ // Health status of the current container
+ HealthStatus string `json:"health_status,omitempty"`
Details
}
@@ -141,6 +143,8 @@ const (
Exited Status = "died"
// Export ...
Export Status = "export"
+ // HealthStatus ...
+ HealthStatus Status = "health_status"
// History ...
History Status = "history"
// Import ...
diff --git a/libpod/events/events.go b/libpod/events/events.go
index a30e0f1ca..a8001ab95 100644
--- a/libpod/events/events.go
+++ b/libpod/events/events.go
@@ -76,7 +76,7 @@ func (e *Event) ToHumanReadable(truncate bool) string {
}
switch e.Type {
case Container, Pod:
- humanFormat = fmt.Sprintf("%s %s %s %s (image=%s, name=%s", e.Time, e.Type, e.Status, id, e.Image, e.Name)
+ humanFormat = fmt.Sprintf("%s %s %s %s (image=%s, name=%s, health_status=%s", e.Time, e.Type, e.Status, id, e.Image, e.Name, e.HealthStatus)
// check if the container has labels and add it to the output
if len(e.Attributes) > 0 {
for k, v := range e.Attributes {
@@ -168,6 +168,8 @@ func StringToStatus(name string) (Status, error) {
return Exited, nil
case Export.String():
return Export, nil
+ case HealthStatus.String():
+ return HealthStatus, nil
case History.String():
return History, nil
case Import.String():
diff --git a/libpod/events/journal_linux.go b/libpod/events/journal_linux.go
index d21b60c68..036638d34 100644
--- a/libpod/events/journal_linux.go
+++ b/libpod/events/journal_linux.go
@@ -58,6 +58,7 @@ func (e EventJournalD) Write(ee Event) error {
}
m["PODMAN_LABELS"] = string(b)
}
+ m["PODMAN_HEALTH_STATUS"] = ee.HealthStatus
case Network:
m["PODMAN_ID"] = ee.ID
m["PODMAN_NETWORK_NAME"] = ee.Network
@@ -213,6 +214,7 @@ func newEventFromJournalEntry(entry *sdjournal.JournalEntry) (*Event, error) {
newEvent.Details = Details{Attributes: labels}
}
}
+ newEvent.HealthStatus = entry.Fields["PODMAN_HEALTH_STATUS"]
case Network:
newEvent.ID = entry.Fields["PODMAN_ID"]
newEvent.Network = entry.Fields["PODMAN_NETWORK_NAME"]
diff --git a/libpod/healthcheck.go b/libpod/healthcheck.go
index bd77e98c6..95c70b60e 100644
--- a/libpod/healthcheck.go
+++ b/libpod/healthcheck.go
@@ -90,7 +90,7 @@ func (c *Container) runHealthCheck() (define.HealthCheckStatus, error) {
hcResult := define.HealthCheckSuccess
config := new(ExecConfig)
config.Command = newCommand
- exitCode, hcErr := c.Exec(config, streams, nil)
+ exitCode, hcErr := c.exec(config, streams, nil, true)
if hcErr != nil {
errCause := errors.Cause(hcErr)
hcResult = define.HealthCheckFailure
@@ -232,18 +232,27 @@ func (c *Container) getHealthCheckLog() (define.HealthCheckResults, error) {
// HealthCheckStatus returns the current state of a container with a healthcheck
func (c *Container) HealthCheckStatus() (string, error) {
+ c.lock.Lock()
+ defer c.lock.Unlock()
+ return c.healthCheckStatus()
+}
+
+// Internal function to return the current state of a container with a healthcheck.
+// This function does not lock the container.
+func (c *Container) healthCheckStatus() (string, error) {
if !c.HasHealthCheck() {
return "", errors.Errorf("container %s has no defined healthcheck", c.ID())
}
- c.lock.Lock()
- defer c.lock.Unlock()
+
if err := c.syncContainer(); err != nil {
return "", err
}
+
results, err := c.getHealthCheckLog()
if err != nil {
return "", errors.Wrapf(err, "unable to get healthcheck log for %s", c.ID())
}
+
return results.Status, nil
}
diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go
index fde8624b0..7a9ae7ee5 100644
--- a/libpod/oci_conmon_linux.go
+++ b/libpod/oci_conmon_linux.go
@@ -23,6 +23,9 @@ import (
"text/template"
"time"
+ runcconfig "github.com/opencontainers/runc/libcontainer/configs"
+ "github.com/opencontainers/runc/libcontainer/devices"
+
"github.com/containers/common/pkg/cgroups"
"github.com/containers/common/pkg/config"
conmonConfig "github.com/containers/conmon/runner/config"
@@ -264,11 +267,6 @@ func (r *ConmonOCIRuntime) CreateContainer(ctr *Container, restoreOptions *Conta
// status, but will instead only check for the existence of the conmon exit file
// and update state to stopped if it exists.
func (r *ConmonOCIRuntime) UpdateContainerStatus(ctr *Container) error {
- exitFile, err := r.ExitFilePath(ctr)
- if err != nil {
- return err
- }
-
runtimeDir, err := util.GetRuntimeDir()
if err != nil {
return err
@@ -340,22 +338,10 @@ func (r *ConmonOCIRuntime) UpdateContainerStatus(ctr *Container) error {
// Only grab exit status if we were not already stopped
// If we were, it should already be in the database
if ctr.state.State == define.ContainerStateStopped && oldState != define.ContainerStateStopped {
- var fi os.FileInfo
- chWait := make(chan error)
- defer close(chWait)
-
- _, err := WaitForFile(exitFile, chWait, time.Second*5)
- if err == nil {
- fi, err = os.Stat(exitFile)
- }
- if err != nil {
- ctr.state.ExitCode = -1
- ctr.state.FinishedTime = time.Now()
- logrus.Errorf("No exit file for container %s found: %v", ctr.ID(), err)
- return nil
+ if _, err := ctr.Wait(context.Background()); err != nil {
+ logrus.Errorf("Waiting for container %s to exit: %v", ctr.ID(), err)
}
-
- return ctr.handleExitFile(exitFile, fi)
+ return nil
}
// Handle ContainerStateStopping - keep it unless the container
@@ -1166,7 +1152,6 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co
}).Debugf("running conmon: %s", r.conmonPath)
cmd := exec.Command(r.conmonPath, args...)
- cmd.Dir = ctr.bundlePath()
cmd.SysProcAttr = &syscall.SysProcAttr{
Setpgid: true,
}
@@ -1354,8 +1339,6 @@ func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, p
logDriverArg = define.NoLogging
case define.PassthroughLogging:
logDriverArg = define.PassthroughLogging
- case define.JSONLogging:
- fallthrough
//lint:ignore ST1015 the default case has to be here
default: //nolint:stylecheck,gocritic
// No case here should happen except JSONLogging, but keep this here in case the options are extended
@@ -1365,6 +1348,8 @@ func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, p
// to get here, either a user would specify `--log-driver ""`, or this came from another place in libpod
// since the former case is obscure, and the latter case isn't an error, let's silently fallthrough
fallthrough
+ case define.JSONLogging:
+ fallthrough
case define.KubernetesLogging:
logDriverArg = fmt.Sprintf("%s:%s", define.KubernetesLogging, logPath)
}
@@ -1451,9 +1436,14 @@ func (r *ConmonOCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec
// TODO: This should be a switch - we are not guaranteed that
// there are only 2 valid cgroup managers
cgroupParent := ctr.CgroupParent()
+ cgroupPath := filepath.Join(ctr.config.CgroupParent, "conmon")
+ Resource := ctr.Spec().Linux.Resources
+ cgroupResources, err := GetLimits(Resource)
+ if err != nil {
+ logrus.StandardLogger().Log(logLevel, "Could not get ctr resources")
+ }
if ctr.CgroupManager() == config.SystemdCgroupsManager {
unitName := createUnitName("libpod-conmon", ctr.ID())
-
realCgroupParent := cgroupParent
splitParent := strings.Split(cgroupParent, "/")
if strings.HasSuffix(cgroupParent, ".slice") && len(splitParent) > 1 {
@@ -1465,8 +1455,7 @@ func (r *ConmonOCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec
logrus.StandardLogger().Logf(logLevel, "Failed to add conmon to systemd sandbox cgroup: %v", err)
}
} else {
- cgroupPath := filepath.Join(ctr.config.CgroupParent, "conmon")
- control, err := cgroups.New(cgroupPath, &spec.LinuxResources{})
+ control, err := cgroups.New(cgroupPath, &cgroupResources)
if err != nil {
logrus.StandardLogger().Logf(logLevel, "Failed to add conmon to cgroupfs sandbox cgroup: %v", err)
} else if err := control.AddPid(cmd.Process.Pid); err != nil {
@@ -1748,3 +1737,191 @@ func httpAttachNonTerminalCopy(container *net.UnixConn, http *bufio.ReadWriter,
}
}
}
+
+// GetLimits converts spec resource limits to cgroup consumable limits
+func GetLimits(resource *spec.LinuxResources) (runcconfig.Resources, error) {
+ if resource == nil {
+ resource = &spec.LinuxResources{}
+ }
+ final := &runcconfig.Resources{}
+ devs := []*devices.Rule{}
+
+ // Devices
+ for _, entry := range resource.Devices {
+ if entry.Major == nil || entry.Minor == nil {
+ continue
+ }
+ runeType := 'a'
+ switch entry.Type {
+ case "b":
+ runeType = 'b'
+ case "c":
+ runeType = 'c'
+ }
+
+ devs = append(devs, &devices.Rule{
+ Type: devices.Type(runeType),
+ Major: *entry.Major,
+ Minor: *entry.Minor,
+ Permissions: devices.Permissions(entry.Access),
+ Allow: entry.Allow,
+ })
+ }
+ final.Devices = devs
+
+ // HugepageLimits
+ pageLimits := []*runcconfig.HugepageLimit{}
+ for _, entry := range resource.HugepageLimits {
+ pageLimits = append(pageLimits, &runcconfig.HugepageLimit{
+ Pagesize: entry.Pagesize,
+ Limit: entry.Limit,
+ })
+ }
+ final.HugetlbLimit = pageLimits
+
+ // Networking
+ netPriorities := []*runcconfig.IfPrioMap{}
+ if resource.Network != nil {
+ for _, entry := range resource.Network.Priorities {
+ netPriorities = append(netPriorities, &runcconfig.IfPrioMap{
+ Interface: entry.Name,
+ Priority: int64(entry.Priority),
+ })
+ }
+ }
+ final.NetPrioIfpriomap = netPriorities
+ rdma := make(map[string]runcconfig.LinuxRdma)
+ for name, entry := range resource.Rdma {
+ rdma[name] = runcconfig.LinuxRdma{HcaHandles: entry.HcaHandles, HcaObjects: entry.HcaObjects}
+ }
+ final.Rdma = rdma
+
+ // Memory
+ if resource.Memory != nil {
+ if resource.Memory.Limit != nil {
+ final.Memory = *resource.Memory.Limit
+ }
+ if resource.Memory.Reservation != nil {
+ final.MemoryReservation = *resource.Memory.Reservation
+ }
+ if resource.Memory.Swap != nil {
+ final.MemorySwap = *resource.Memory.Swap
+ }
+ if resource.Memory.Swappiness != nil {
+ final.MemorySwappiness = resource.Memory.Swappiness
+ }
+ }
+
+ // CPU
+ if resource.CPU != nil {
+ if resource.CPU.Period != nil {
+ final.CpuPeriod = *resource.CPU.Period
+ }
+ if resource.CPU.Quota != nil {
+ final.CpuQuota = *resource.CPU.Quota
+ }
+ if resource.CPU.RealtimePeriod != nil {
+ final.CpuRtPeriod = *resource.CPU.RealtimePeriod
+ }
+ if resource.CPU.RealtimeRuntime != nil {
+ final.CpuRtRuntime = *resource.CPU.RealtimeRuntime
+ }
+ if resource.CPU.Shares != nil {
+ final.CpuShares = *resource.CPU.Shares
+ }
+ final.CpusetCpus = resource.CPU.Cpus
+ final.CpusetMems = resource.CPU.Mems
+ }
+
+ // BlkIO
+ if resource.BlockIO != nil {
+ if len(resource.BlockIO.ThrottleReadBpsDevice) > 0 {
+ for _, entry := range resource.BlockIO.ThrottleReadBpsDevice {
+ throttle := &runcconfig.ThrottleDevice{}
+ dev := &runcconfig.BlockIODevice{
+ Major: entry.Major,
+ Minor: entry.Minor,
+ }
+ throttle.BlockIODevice = *dev
+ throttle.Rate = entry.Rate
+ final.BlkioThrottleReadBpsDevice = append(final.BlkioThrottleReadBpsDevice, throttle)
+ }
+ }
+ if len(resource.BlockIO.ThrottleWriteBpsDevice) > 0 {
+ for _, entry := range resource.BlockIO.ThrottleWriteBpsDevice {
+ throttle := &runcconfig.ThrottleDevice{}
+ dev := &runcconfig.BlockIODevice{
+ Major: entry.Major,
+ Minor: entry.Minor,
+ }
+ throttle.BlockIODevice = *dev
+ throttle.Rate = entry.Rate
+ final.BlkioThrottleWriteBpsDevice = append(final.BlkioThrottleWriteBpsDevice, throttle)
+ }
+ }
+ if len(resource.BlockIO.ThrottleReadIOPSDevice) > 0 {
+ for _, entry := range resource.BlockIO.ThrottleReadIOPSDevice {
+ throttle := &runcconfig.ThrottleDevice{}
+ dev := &runcconfig.BlockIODevice{
+ Major: entry.Major,
+ Minor: entry.Minor,
+ }
+ throttle.BlockIODevice = *dev
+ throttle.Rate = entry.Rate
+ final.BlkioThrottleReadIOPSDevice = append(final.BlkioThrottleReadIOPSDevice, throttle)
+ }
+ }
+ if len(resource.BlockIO.ThrottleWriteIOPSDevice) > 0 {
+ for _, entry := range resource.BlockIO.ThrottleWriteIOPSDevice {
+ throttle := &runcconfig.ThrottleDevice{}
+ dev := &runcconfig.BlockIODevice{
+ Major: entry.Major,
+ Minor: entry.Minor,
+ }
+ throttle.BlockIODevice = *dev
+ throttle.Rate = entry.Rate
+ final.BlkioThrottleWriteIOPSDevice = append(final.BlkioThrottleWriteIOPSDevice, throttle)
+ }
+ }
+ if resource.BlockIO.LeafWeight != nil {
+ final.BlkioLeafWeight = *resource.BlockIO.LeafWeight
+ }
+ if resource.BlockIO.Weight != nil {
+ final.BlkioWeight = *resource.BlockIO.Weight
+ }
+ if len(resource.BlockIO.WeightDevice) > 0 {
+ for _, entry := range resource.BlockIO.WeightDevice {
+ weight := &runcconfig.WeightDevice{}
+ dev := &runcconfig.BlockIODevice{
+ Major: entry.Major,
+ Minor: entry.Minor,
+ }
+ if entry.Weight != nil {
+ weight.Weight = *entry.Weight
+ }
+ if entry.LeafWeight != nil {
+ weight.LeafWeight = *entry.LeafWeight
+ }
+ weight.BlockIODevice = *dev
+ final.BlkioWeightDevice = append(final.BlkioWeightDevice, weight)
+ }
+ }
+ }
+
+ // Pids
+ if resource.Pids != nil {
+ final.PidsLimit = resource.Pids.Limit
+ }
+
+ // Networking
+ if resource.Network != nil {
+ if resource.Network.ClassID != nil {
+ final.NetClsClassid = *resource.Network.ClassID
+ }
+ }
+
+ // Unified state
+ final.Unified = resource.Unified
+
+ return *final, nil
+}
diff --git a/libpod/plugin/volume_api.go b/libpod/plugin/volume_api.go
index f997ccf22..2de7db32c 100644
--- a/libpod/plugin/volume_api.go
+++ b/libpod/plugin/volume_api.go
@@ -197,13 +197,13 @@ func (p *VolumePlugin) verifyReachable() error {
// Send a request to the volume plugin for handling.
// Callers *MUST* close the response when they are done.
-func (p *VolumePlugin) sendRequest(toJSON interface{}, hasBody bool, endpoint string) (*http.Response, error) {
+func (p *VolumePlugin) sendRequest(toJSON interface{}, endpoint string) (*http.Response, error) {
var (
reqJSON []byte
err error
)
- if hasBody {
+ if toJSON != nil {
reqJSON, err = json.Marshal(toJSON)
if err != nil {
return nil, errors.Wrapf(err, "error marshalling request JSON for volume plugin %s endpoint %s", p.Name, endpoint)
@@ -274,7 +274,7 @@ func (p *VolumePlugin) CreateVolume(req *volume.CreateRequest) error {
logrus.Infof("Creating volume %s using plugin %s", req.Name, p.Name)
- resp, err := p.sendRequest(req, true, createPath)
+ resp, err := p.sendRequest(req, createPath)
if err != nil {
return err
}
@@ -291,7 +291,7 @@ func (p *VolumePlugin) ListVolumes() ([]*volume.Volume, error) {
logrus.Infof("Listing volumes using plugin %s", p.Name)
- resp, err := p.sendRequest(nil, false, listPath)
+ resp, err := p.sendRequest(nil, listPath)
if err != nil {
return nil, err
}
@@ -326,7 +326,7 @@ func (p *VolumePlugin) GetVolume(req *volume.GetRequest) (*volume.Volume, error)
logrus.Infof("Getting volume %s using plugin %s", req.Name, p.Name)
- resp, err := p.sendRequest(req, true, getPath)
+ resp, err := p.sendRequest(req, getPath)
if err != nil {
return nil, err
}
@@ -361,7 +361,7 @@ func (p *VolumePlugin) RemoveVolume(req *volume.RemoveRequest) error {
logrus.Infof("Removing volume %s using plugin %s", req.Name, p.Name)
- resp, err := p.sendRequest(req, true, removePath)
+ resp, err := p.sendRequest(req, removePath)
if err != nil {
return err
}
@@ -382,7 +382,7 @@ func (p *VolumePlugin) GetVolumePath(req *volume.PathRequest) (string, error) {
logrus.Infof("Getting volume %s path using plugin %s", req.Name, p.Name)
- resp, err := p.sendRequest(req, true, hostVirtualPath)
+ resp, err := p.sendRequest(req, hostVirtualPath)
if err != nil {
return "", err
}
@@ -419,7 +419,7 @@ func (p *VolumePlugin) MountVolume(req *volume.MountRequest) (string, error) {
logrus.Infof("Mounting volume %s using plugin %s for container %s", req.Name, p.Name, req.ID)
- resp, err := p.sendRequest(req, true, mountPath)
+ resp, err := p.sendRequest(req, mountPath)
if err != nil {
return "", err
}
@@ -455,7 +455,7 @@ func (p *VolumePlugin) UnmountVolume(req *volume.UnmountRequest) error {
logrus.Infof("Unmounting volume %s using plugin %s for container %s", req.Name, p.Name, req.ID)
- resp, err := p.sendRequest(req, true, unmountPath)
+ resp, err := p.sendRequest(req, unmountPath)
if err != nil {
return err
}
diff --git a/libpod/pod_internal.go b/libpod/pod_internal.go
index 41f745e6c..1502bcb06 100644
--- a/libpod/pod_internal.go
+++ b/libpod/pod_internal.go
@@ -69,7 +69,7 @@ func (p *Pod) refresh() error {
if p.config.UsePodCgroup {
switch p.runtime.config.Engine.CgroupManager {
case config.SystemdCgroupsManager:
- cgroupPath, err := systemdSliceFromPath(p.config.CgroupParent, fmt.Sprintf("libpod_pod_%s", p.ID()))
+ cgroupPath, err := systemdSliceFromPath(p.config.CgroupParent, fmt.Sprintf("libpod_pod_%s", p.ID()), p.ResourceLim())
if err != nil {
logrus.Errorf("Creating Cgroup for pod %s: %v", p.ID(), err)
}
diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go
index a9ae9d1db..fafec5e12 100644
--- a/libpod/runtime_ctr.go
+++ b/libpod/runtime_ctr.go
@@ -502,7 +502,7 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Contai
volOptions = append(volOptions, parsedOptions...)
}
}
- newVol, err := r.newVolume(volOptions...)
+ newVol, err := r.newVolume(false, volOptions...)
if err != nil {
return nil, errors.Wrapf(err, "error creating named volume %q", vol.Name)
}
@@ -664,9 +664,6 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
}
if c.state.State == define.ContainerStatePaused {
- if err := c.ociRuntime.KillContainer(c, 9, false); err != nil {
- return err
- }
isV2, err := cgroups.IsCgroup2UnifiedMode()
if err != nil {
return err
@@ -677,6 +674,9 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
return err
}
}
+ if err := c.ociRuntime.KillContainer(c, 9, false); err != nil {
+ return err
+ }
// Need to update container state to make sure we know it's stopped
if err := c.waitForExitFileAndSync(); err != nil {
return err
@@ -805,7 +805,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
if !volume.Anonymous() {
continue
}
- if err := runtime.removeVolume(ctx, volume, false, timeout); err != nil && errors.Cause(err) != define.ErrNoSuchVolume {
+ if err := runtime.removeVolume(ctx, volume, false, timeout, false); err != nil && errors.Cause(err) != define.ErrNoSuchVolume {
if errors.Cause(err) == define.ErrVolumeBeingUsed {
// Ignore error, since podman will report original error
volumesFrom, _ := c.volumesFrom()
@@ -963,7 +963,7 @@ func (r *Runtime) evictContainer(ctx context.Context, idOrName string, removeVol
if !volume.Anonymous() {
continue
}
- if err := r.removeVolume(ctx, volume, false, timeout); err != nil && err != define.ErrNoSuchVolume && err != define.ErrVolumeBeingUsed {
+ if err := r.removeVolume(ctx, volume, false, timeout, false); err != nil && err != define.ErrNoSuchVolume && err != define.ErrVolumeBeingUsed {
logrus.Errorf("Cleaning up volume (%s): %v", v, err)
}
}
diff --git a/libpod/runtime_pod_linux.go b/libpod/runtime_pod_linux.go
index dcc3a044f..1f9ebe724 100644
--- a/libpod/runtime_pod_linux.go
+++ b/libpod/runtime_pod_linux.go
@@ -17,7 +17,7 @@ import (
"github.com/containers/podman/v4/libpod/events"
"github.com/containers/podman/v4/pkg/rootless"
"github.com/containers/podman/v4/pkg/specgen"
- spec "github.com/opencontainers/runtime-spec/specs-go"
+ runcconfig "github.com/opencontainers/runc/libcontainer/configs"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
@@ -66,6 +66,7 @@ func (r *Runtime) NewPod(ctx context.Context, p specgen.PodSpecGenerator, option
case config.CgroupfsCgroupsManager:
canUseCgroup := !rootless.IsRootless() || isRootlessCgroupSet(pod.config.CgroupParent)
if canUseCgroup {
+ // need to actually create parent here
if pod.config.CgroupParent == "" {
pod.config.CgroupParent = CgroupfsDefaultCgroupParent
} else if strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") {
@@ -73,12 +74,29 @@ func (r *Runtime) NewPod(ctx context.Context, p specgen.PodSpecGenerator, option
}
// If we are set to use pod cgroups, set the cgroup parent that
// all containers in the pod will share
- // No need to create it with cgroupfs - the first container to
- // launch should do it for us
if pod.config.UsePodCgroup {
pod.state.CgroupPath = filepath.Join(pod.config.CgroupParent, pod.ID())
if p.InfraContainerSpec != nil {
p.InfraContainerSpec.CgroupParent = pod.state.CgroupPath
+ // cgroupfs + rootless = permission denied when creating the cgroup.
+ if !rootless.IsRootless() {
+ res, err := GetLimits(p.InfraContainerSpec.ResourceLimits)
+ if err != nil {
+ return nil, err
+ }
+ // Need to both create and update the cgroup
+ // rather than create a new path in c/common for pod cgroup creation
+ // just create as if it is a ctr and then update figures out that we need to
+ // populate the resource limits on the pod level
+ cgc, err := cgroups.New(pod.state.CgroupPath, &res)
+ if err != nil {
+ return nil, err
+ }
+ err = cgc.Update(&res)
+ if err != nil {
+ return nil, err
+ }
+ }
}
}
}
@@ -95,7 +113,7 @@ func (r *Runtime) NewPod(ctx context.Context, p specgen.PodSpecGenerator, option
// If we are set to use pod cgroups, set the cgroup parent that
// all containers in the pod will share
if pod.config.UsePodCgroup {
- cgroupPath, err := systemdSliceFromPath(pod.config.CgroupParent, fmt.Sprintf("libpod_pod_%s", pod.ID()))
+ cgroupPath, err := systemdSliceFromPath(pod.config.CgroupParent, fmt.Sprintf("libpod_pod_%s", pod.ID()), p.InfraContainerSpec.ResourceLimits)
if err != nil {
return nil, errors.Wrapf(err, "unable to create pod cgroup for pod %s", pod.ID())
}
@@ -239,9 +257,8 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool,
}
// New resource limits
- resLimits := new(spec.LinuxResources)
- resLimits.Pids = new(spec.LinuxPids)
- resLimits.Pids.Limit = 1 // Inhibit forks with very low pids limit
+ resLimits := new(runcconfig.Resources)
+ resLimits.PidsLimit = 1 // Inhibit forks with very low pids limit
// Don't try if we failed to retrieve the cgroup
if err == nil {
@@ -301,7 +318,7 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool,
if !volume.Anonymous() {
continue
}
- if err := r.removeVolume(ctx, volume, false, timeout); err != nil {
+ if err := r.removeVolume(ctx, volume, false, timeout, false); err != nil {
if errors.Cause(err) == define.ErrNoSuchVolume || errors.Cause(err) == define.ErrVolumeRemoved {
continue
}
@@ -321,7 +338,7 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool,
switch p.runtime.config.Engine.CgroupManager {
case config.SystemdCgroupsManager:
- if err := deleteSystemdCgroup(p.state.CgroupPath); err != nil {
+ if err := deleteSystemdCgroup(p.state.CgroupPath, p.ResourceLim()); err != nil {
if removalErr == nil {
removalErr = errors.Wrapf(err, "error removing pod %s cgroup", p.ID())
} else {
diff --git a/libpod/runtime_volume.go b/libpod/runtime_volume.go
index 21bf8aefc..6872db21d 100644
--- a/libpod/runtime_volume.go
+++ b/libpod/runtime_volume.go
@@ -33,7 +33,7 @@ func (r *Runtime) RemoveVolume(ctx context.Context, v *Volume, force bool, timeo
return nil
}
}
- return r.removeVolume(ctx, v, force, timeout)
+ return r.removeVolume(ctx, v, force, timeout, false)
}
// GetVolume retrieves a volume given its full name.
diff --git a/libpod/runtime_volume_linux.go b/libpod/runtime_volume_linux.go
index f8788e183..17a48be86 100644
--- a/libpod/runtime_volume_linux.go
+++ b/libpod/runtime_volume_linux.go
@@ -5,6 +5,7 @@ package libpod
import (
"context"
+ "fmt"
"os"
"path/filepath"
"strings"
@@ -25,11 +26,13 @@ func (r *Runtime) NewVolume(ctx context.Context, options ...VolumeCreateOption)
if !r.valid {
return nil, define.ErrRuntimeStopped
}
- return r.newVolume(options...)
+ return r.newVolume(false, options...)
}
-// newVolume creates a new empty volume
-func (r *Runtime) newVolume(options ...VolumeCreateOption) (_ *Volume, deferredErr error) {
+// newVolume creates a new empty volume with the given options.
+// The createPluginVolume can be set to true to make it not create the volume in the volume plugin,
+// this is required for the UpdateVolumePlugins() function. If you are not sure set this to false.
+func (r *Runtime) newVolume(noCreatePluginVolume bool, options ...VolumeCreateOption) (_ *Volume, deferredErr error) {
volume := newVolume(r)
for _, option := range options {
if err := option(volume); err != nil {
@@ -73,7 +76,7 @@ func (r *Runtime) newVolume(options ...VolumeCreateOption) (_ *Volume, deferredE
return nil, errors.Wrapf(err, "invalid volume option %s for driver 'local'", key)
}
}
- case "o", "type", "uid", "gid", "size", "inodes", "noquota":
+ case "o", "type", "uid", "gid", "size", "inodes", "noquota", "copy", "nocopy":
// Do nothing, valid keys
default:
return nil, errors.Wrapf(define.ErrInvalidArg, "invalid mount option %s for driver 'local'", key)
@@ -83,7 +86,7 @@ func (r *Runtime) newVolume(options ...VolumeCreateOption) (_ *Volume, deferredE
// Now we get conditional: we either need to make the volume in the
// volume plugin, or on disk if not using a plugin.
- if volume.plugin != nil {
+ if volume.plugin != nil && !noCreatePluginVolume {
// We can't chown, or relabel, or similar the path the volume is
// using, because it's not managed by us.
// TODO: reevaluate this once we actually have volume plugins in
@@ -164,6 +167,85 @@ func (r *Runtime) newVolume(options ...VolumeCreateOption) (_ *Volume, deferredE
return volume, nil
}
+// UpdateVolumePlugins reads all volumes from all configured volume plugins and
+// imports them into the libpod db. It also checks if existing libpod volumes
+// are removed in the plugin, in this case we try to remove it from libpod.
+// On errors we continue and try to do as much as possible. all errors are
+// returned as array in the returned struct.
+// This function has many race conditions, it is best effort but cannot guarantee
+// a perfect state since plugins can be modified from the outside at any time.
+func (r *Runtime) UpdateVolumePlugins(ctx context.Context) *define.VolumeReload {
+ var (
+ added []string
+ removed []string
+ errs []error
+ allPluginVolumes = map[string]struct{}{}
+ )
+
+ for driverName, socket := range r.config.Engine.VolumePlugins {
+ driver, err := volplugin.GetVolumePlugin(driverName, socket)
+ if err != nil {
+ errs = append(errs, err)
+ continue
+ }
+ vols, err := driver.ListVolumes()
+ if err != nil {
+ errs = append(errs, fmt.Errorf("failed to read volumes from plugin %q: %w", driverName, err))
+ continue
+ }
+ for _, vol := range vols {
+ allPluginVolumes[vol.Name] = struct{}{}
+ if _, err := r.newVolume(true, WithVolumeName(vol.Name), WithVolumeDriver(driverName)); err != nil {
+ // If the volume exists this is not an error, just ignore it and log. It is very likely
+ // that the volume from the plugin was already in our db.
+ if !errors.Is(err, define.ErrVolumeExists) {
+ errs = append(errs, err)
+ continue
+ }
+ logrus.Infof("Volume %q already exists: %v", vol.Name, err)
+ continue
+ }
+ added = append(added, vol.Name)
+ }
+ }
+
+ libpodVolumes, err := r.state.AllVolumes()
+ if err != nil {
+ errs = append(errs, fmt.Errorf("cannot delete dangling plugin volumes: failed to read libpod volumes: %w", err))
+ }
+ for _, vol := range libpodVolumes {
+ if vol.UsesVolumeDriver() {
+ if _, ok := allPluginVolumes[vol.Name()]; !ok {
+ // The volume is no longer in the plugin, lets remove it from the libpod db.
+ if err := r.removeVolume(ctx, vol, false, nil, true); err != nil {
+ if errors.Is(err, define.ErrVolumeBeingUsed) {
+ // Volume is still used by at least one container. This is very bad,
+ // the plugin no longer has this but we still need it.
+ errs = append(errs, fmt.Errorf("volume was removed from the plugin %q but containers still require it: %w", vol.config.Driver, err))
+ continue
+ }
+ if errors.Is(err, define.ErrNoSuchVolume) || errors.Is(err, define.ErrVolumeRemoved) || errors.Is(err, define.ErrMissingPlugin) {
+ // Volume was already removed, no problem just ignore it and continue.
+ continue
+ }
+
+ // some other error
+ errs = append(errs, err)
+ continue
+ }
+ // Volume was successfully removed
+ removed = append(removed, vol.Name())
+ }
+ }
+ }
+
+ return &define.VolumeReload{
+ Added: added,
+ Removed: removed,
+ Errors: errs,
+ }
+}
+
// makeVolumeInPluginIfNotExist makes a volume in the given volume plugin if it
// does not already exist.
func makeVolumeInPluginIfNotExist(name string, options map[string]string, plugin *volplugin.VolumePlugin) error {
@@ -197,8 +279,10 @@ func makeVolumeInPluginIfNotExist(name string, options map[string]string, plugin
return nil
}
-// removeVolume removes the specified volume from state as well tears down its mountpoint and storage
-func (r *Runtime) removeVolume(ctx context.Context, v *Volume, force bool, timeout *uint) error {
+// removeVolume removes the specified volume from state as well tears down its mountpoint and storage.
+// ignoreVolumePlugin is used to only remove the volume from the db and not the plugin,
+// this is required when the volume was already removed from the plugin, i.e. in UpdateVolumePlugins().
+func (r *Runtime) removeVolume(ctx context.Context, v *Volume, force bool, timeout *uint, ignoreVolumePlugin bool) error {
if !v.valid {
if ok, _ := r.state.HasVolume(v.Name()); !ok {
return nil
@@ -263,7 +347,7 @@ func (r *Runtime) removeVolume(ctx context.Context, v *Volume, force bool, timeo
var removalErr error
// If we use a volume plugin, we need to remove from the plugin.
- if v.UsesVolumeDriver() {
+ if v.UsesVolumeDriver() && !ignoreVolumePlugin {
canRemove := true
// Do we have a volume driver?
diff --git a/libpod/state.go b/libpod/state.go
index 471023769..4fbd3c302 100644
--- a/libpod/state.go
+++ b/libpod/state.go
@@ -111,6 +111,15 @@ type State interface {
// Return a container config from the database by full ID
GetContainerConfig(id string) (*ContainerConfig, error)
+ // Add the exit code for the specified container to the database.
+ AddContainerExitCode(id string, exitCode int32) error
+
+ // Return the exit code for the specified container.
+ GetContainerExitCode(id string) (int32, error)
+
+ // Remove exit codes older than 5 minutes.
+ PruneContainerExitCodes() error
+
// Add creates a reference to an exec session in the database.
// The container the exec session is attached to will be recorded.
// The container state will not be modified.
diff --git a/libpod/stats.go b/libpod/stats.go
index d2ffc3b32..eaac9d7d0 100644
--- a/libpod/stats.go
+++ b/libpod/stats.go
@@ -9,6 +9,8 @@ import (
"syscall"
"time"
+ runccgroup "github.com/opencontainers/runc/libcontainer/cgroups"
+
"github.com/containers/common/pkg/cgroups"
"github.com/containers/podman/v4/libpod/define"
"github.com/pkg/errors"
@@ -69,29 +71,29 @@ func (c *Container) GetContainerStats(previousStats *define.ContainerStats) (*de
// If the current total usage in the cgroup is less than what was previously
// recorded then it means the container was restarted and runs in a new cgroup
- if previousStats.Duration > cgroupStats.CPU.Usage.Total {
+ if previousStats.Duration > cgroupStats.CpuStats.CpuUsage.TotalUsage {
previousStats = &define.ContainerStats{}
}
previousCPU := previousStats.CPUNano
now := uint64(time.Now().UnixNano())
- stats.Duration = cgroupStats.CPU.Usage.Total
+ stats.Duration = cgroupStats.CpuStats.CpuUsage.TotalUsage
stats.UpTime = time.Duration(stats.Duration)
stats.CPU = calculateCPUPercent(cgroupStats, previousCPU, now, previousStats.SystemNano)
// calc the average cpu usage for the time the container is running
stats.AvgCPU = calculateCPUPercent(cgroupStats, 0, now, uint64(c.state.StartedTime.UnixNano()))
- stats.MemUsage = cgroupStats.Memory.Usage.Usage
+ stats.MemUsage = cgroupStats.MemoryStats.Usage.Usage
stats.MemLimit = c.getMemLimit()
stats.MemPerc = (float64(stats.MemUsage) / float64(stats.MemLimit)) * 100
stats.PIDs = 0
if conState == define.ContainerStateRunning || conState == define.ContainerStatePaused {
- stats.PIDs = cgroupStats.Pids.Current
+ stats.PIDs = cgroupStats.PidsStats.Current
}
stats.BlockInput, stats.BlockOutput = calculateBlockIO(cgroupStats)
- stats.CPUNano = cgroupStats.CPU.Usage.Total
- stats.CPUSystemNano = cgroupStats.CPU.Usage.Kernel
+ stats.CPUNano = cgroupStats.CpuStats.CpuUsage.TotalUsage
+ stats.CPUSystemNano = cgroupStats.CpuStats.CpuUsage.UsageInKernelmode
stats.SystemNano = now
- stats.PerCPU = cgroupStats.CPU.Usage.PerCPU
+ stats.PerCPU = cgroupStats.CpuStats.CpuUsage.PercpuUsage
// Handle case where the container is not in a network namespace
if netStats != nil {
stats.NetInput = netStats.TxBytes
@@ -133,10 +135,10 @@ func (c *Container) getMemLimit() uint64 {
// previousCPU is the last value of stats.CPU.Usage.Total measured at the time previousSystem.
// (now - previousSystem) is the time delta in nanoseconds, between the measurement in previousCPU
// and the updated value in stats.
-func calculateCPUPercent(stats *cgroups.Metrics, previousCPU, now, previousSystem uint64) float64 {
+func calculateCPUPercent(stats *runccgroup.Stats, previousCPU, now, previousSystem uint64) float64 {
var (
cpuPercent = 0.0
- cpuDelta = float64(stats.CPU.Usage.Total - previousCPU)
+ cpuDelta = float64(stats.CpuStats.CpuUsage.TotalUsage - previousCPU)
systemDelta = float64(now - previousSystem)
)
if systemDelta > 0.0 && cpuDelta > 0.0 {
@@ -146,8 +148,8 @@ func calculateCPUPercent(stats *cgroups.Metrics, previousCPU, now, previousSyste
return cpuPercent
}
-func calculateBlockIO(stats *cgroups.Metrics) (read uint64, write uint64) {
- for _, blkIOEntry := range stats.Blkio.IoServiceBytesRecursive {
+func calculateBlockIO(stats *runccgroup.Stats) (read uint64, write uint64) {
+ for _, blkIOEntry := range stats.BlkioStats.IoServiceBytesRecursive {
switch strings.ToLower(blkIOEntry.Op) {
case "read":
read += blkIOEntry.Value
diff --git a/libpod/util_linux.go b/libpod/util_linux.go
index fe98056dc..414d1bff9 100644
--- a/libpod/util_linux.go
+++ b/libpod/util_linux.go
@@ -11,6 +11,7 @@ import (
"github.com/containers/common/pkg/cgroups"
"github.com/containers/podman/v4/libpod/define"
"github.com/containers/podman/v4/pkg/rootless"
+ spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
@@ -20,7 +21,7 @@ import (
// systemdSliceFromPath makes a new systemd slice under the given parent with
// the given name.
// The parent must be a slice. The name must NOT include ".slice"
-func systemdSliceFromPath(parent, name string) (string, error) {
+func systemdSliceFromPath(parent, name string, resources *spec.LinuxResources) (string, error) {
cgroupPath, err := assembleSystemdCgroupName(parent, name)
if err != nil {
return "", err
@@ -28,7 +29,7 @@ func systemdSliceFromPath(parent, name string) (string, error) {
logrus.Debugf("Created cgroup path %s for parent %s and name %s", cgroupPath, parent, name)
- if err := makeSystemdCgroup(cgroupPath); err != nil {
+ if err := makeSystemdCgroup(cgroupPath, resources); err != nil {
return "", errors.Wrapf(err, "error creating cgroup %s", cgroupPath)
}
@@ -45,8 +46,12 @@ func getDefaultSystemdCgroup() string {
}
// makeSystemdCgroup creates a systemd Cgroup at the given location.
-func makeSystemdCgroup(path string) error {
- controller, err := cgroups.NewSystemd(getDefaultSystemdCgroup())
+func makeSystemdCgroup(path string, resources *spec.LinuxResources) error {
+ res, err := GetLimits(resources)
+ if err != nil {
+ return err
+ }
+ controller, err := cgroups.NewSystemd(getDefaultSystemdCgroup(), &res)
if err != nil {
return err
}
@@ -54,12 +59,20 @@ func makeSystemdCgroup(path string) error {
if rootless.IsRootless() {
return controller.CreateSystemdUserUnit(path, rootless.GetRootlessUID())
}
- return controller.CreateSystemdUnit(path)
+ err = controller.CreateSystemdUnit(path)
+ if err != nil {
+ return err
+ }
+ return nil
}
// deleteSystemdCgroup deletes the systemd cgroup at the given location
-func deleteSystemdCgroup(path string) error {
- controller, err := cgroups.NewSystemd(getDefaultSystemdCgroup())
+func deleteSystemdCgroup(path string, resources *spec.LinuxResources) error {
+ res, err := GetLimits(resources)
+ if err != nil {
+ return err
+ }
+ controller, err := cgroups.NewSystemd(getDefaultSystemdCgroup(), &res)
if err != nil {
return err
}
diff --git a/libpod/volume_internal.go b/libpod/volume_internal.go
index e0ebb729d..24522c0f9 100644
--- a/libpod/volume_internal.go
+++ b/libpod/volume_internal.go
@@ -55,6 +55,12 @@ func (v *Volume) needsMount() bool {
if _, ok := v.config.Options["NOQUOTA"]; ok {
index++
}
+ if _, ok := v.config.Options["nocopy"]; ok {
+ index++
+ }
+ if _, ok := v.config.Options["copy"]; ok {
+ index++
+ }
// when uid or gid is set there is also the "o" option
// set so we have to ignore this one as well
if index > 0 {