summaryrefslogtreecommitdiff
path: root/libpod
diff options
context:
space:
mode:
Diffstat (limited to 'libpod')
-rw-r--r--libpod/container_graph.go91
-rw-r--r--libpod/container_inspect.go297
-rw-r--r--libpod/container_inspect_freebsd.go17
-rw-r--r--libpod/container_inspect_linux.go306
-rw-r--r--libpod/pod_api.go2
-rw-r--r--libpod/runtime_ctr.go52
-rw-r--r--libpod/runtime_img.go2
-rw-r--r--libpod/runtime_pod_linux.go92
-rw-r--r--libpod/runtime_volume_linux.go2
9 files changed, 518 insertions, 343 deletions
diff --git a/libpod/container_graph.go b/libpod/container_graph.go
index 96d61b756..d43579e4a 100644
--- a/libpod/container_graph.go
+++ b/libpod/container_graph.go
@@ -281,3 +281,94 @@ func startNode(ctx context.Context, node *containerNode, setError bool, ctrError
startNode(ctx, successor, ctrErrored, ctrErrors, ctrsVisited, restart)
}
}
+
+// Visit a node on the container graph and remove it, or set an error if it
+// failed to remove. Only intended for use in pod removal; do *not* use when
+// removing individual containers.
+// All containers are assumed to be *UNLOCKED* on running this function.
+// Container locks will be acquired as necessary.
+// Pod and infraID are optional. If a pod is given it must be *LOCKED*.
+func removeNode(ctx context.Context, node *containerNode, pod *Pod, force bool, timeout *uint, setError bool, ctrErrors map[string]error, ctrsVisited map[string]bool, ctrNamedVolumes map[string]*ContainerNamedVolume) {
+ // If we already visited this node, we're done.
+ if ctrsVisited[node.id] {
+ return
+ }
+
+ // Someone who depends on us failed.
+ // Mark us as failed and recurse.
+ if setError {
+ ctrsVisited[node.id] = true
+ ctrErrors[node.id] = fmt.Errorf("a container that depends on container %s could not be removed: %w", node.id, define.ErrCtrStateInvalid)
+
+ // Hit anyone who depends on us, set errors there as well.
+ for _, successor := range node.dependsOn {
+ removeNode(ctx, successor, pod, force, timeout, true, ctrErrors, ctrsVisited, ctrNamedVolumes)
+ }
+ }
+
+ // Does anyone still depend on us?
+ // Cannot remove if true. Once all our dependencies have been removed,
+ // we will be removed.
+ for _, dep := range node.dependedOn {
+ // The container that depends on us hasn't been removed yet.
+ // OK to continue on
+ if ok := ctrsVisited[dep.id]; !ok {
+ return
+ }
+ }
+
+ // Going to try to remove the node, mark us as visited
+ ctrsVisited[node.id] = true
+
+ ctrErrored := false
+
+ // Verify that all that depend on us are gone.
+ // Graph traversal should guarantee this is true, but this isn't that
+ // expensive, and it's better to be safe.
+ for _, dep := range node.dependedOn {
+ if _, err := node.container.runtime.GetContainer(dep.id); err == nil {
+ ctrErrored = true
+ ctrErrors[node.id] = fmt.Errorf("a container that depends on container %s still exists: %w", node.id, define.ErrDepExists)
+ }
+ }
+
+ // Lock the container
+ node.container.lock.Lock()
+
+ // Gate all subsequent bits behind a ctrErrored check - we don't want to
+ // proceed if a previous step failed.
+ if !ctrErrored {
+ if err := node.container.syncContainer(); err != nil {
+ ctrErrored = true
+ ctrErrors[node.id] = err
+ }
+ }
+
+ if !ctrErrored {
+ for _, vol := range node.container.config.NamedVolumes {
+ ctrNamedVolumes[vol.Name] = vol
+ }
+
+ if pod != nil && pod.state.InfraContainerID == node.id {
+ pod.state.InfraContainerID = ""
+ if err := pod.save(); err != nil {
+ ctrErrored = true
+ ctrErrors[node.id] = fmt.Errorf("error removing infra container %s from pod %s: %w", node.id, pod.ID(), err)
+ }
+ }
+ }
+
+ if !ctrErrored {
+ if err := node.container.runtime.removeContainer(ctx, node.container, force, false, true, false, timeout); err != nil {
+ ctrErrored = true
+ ctrErrors[node.id] = err
+ }
+ }
+
+ node.container.lock.Unlock()
+
+ // Recurse to anyone who we depend on and remove them
+ for _, successor := range node.dependsOn {
+ removeNode(ctx, successor, pod, force, timeout, ctrErrored, ctrErrors, ctrsVisited, ctrNamedVolumes)
+ }
+}
diff --git a/libpod/container_inspect.go b/libpod/container_inspect.go
index b72d843b6..e4089efa6 100644
--- a/libpod/container_inspect.go
+++ b/libpod/container_inspect.go
@@ -3,20 +3,15 @@ package libpod
import (
"errors"
"fmt"
- "sort"
"strings"
- "github.com/containers/common/pkg/config"
"github.com/containers/podman/v4/libpod/define"
"github.com/containers/podman/v4/libpod/driver"
"github.com/containers/podman/v4/pkg/util"
"github.com/containers/storage/types"
units "github.com/docker/go-units"
spec "github.com/opencontainers/runtime-spec/specs-go"
- "github.com/opencontainers/runtime-tools/generate"
- "github.com/opencontainers/runtime-tools/validate"
"github.com/sirupsen/logrus"
- "github.com/syndtr/gocapability/capability"
)
// inspectLocked inspects a container for low-level information.
@@ -163,8 +158,6 @@ func (c *Container) getContainerInspectData(size bool, driverData *define.Driver
Driver: driverData.Name,
MountLabel: config.MountLabel,
ProcessLabel: config.ProcessLabel,
- EffectiveCaps: ctrSpec.Process.Capabilities.Effective,
- BoundingCaps: ctrSpec.Process.Capabilities.Bounding,
AppArmorProfile: ctrSpec.Process.ApparmorProfile,
ExecIDs: execIDs,
GraphDriver: driverData,
@@ -173,6 +166,10 @@ func (c *Container) getContainerInspectData(size bool, driverData *define.Driver
IsInfra: c.IsInfra(),
IsService: c.IsService(),
}
+ if ctrSpec.Process.Capabilities != nil {
+ data.EffectiveCaps = ctrSpec.Process.Capabilities.Effective
+ data.BoundingCaps = ctrSpec.Process.Capabilities.Bounding
+ }
if c.state.ConfigPath != "" {
data.OCIConfigPath = c.state.ConfigPath
@@ -484,11 +481,6 @@ func (c *Container) generateInspectContainerHostConfig(ctrSpec *spec.Spec, named
hostConfig.ShmSize = c.config.ShmSize
hostConfig.Runtime = "oci"
- // This is very expensive to initialize.
- // So we don't want to initialize it unless we absolutely have to - IE,
- // there are things that require a major:minor to path translation.
- var deviceNodes map[string]string
-
// Annotations
if ctrSpec.Annotations != nil {
hostConfig.ContainerIDFile = ctrSpec.Annotations[define.InspectAnnotationCIDFile]
@@ -506,109 +498,8 @@ func (c *Container) generateInspectContainerHostConfig(ctrSpec *spec.Spec, named
}
}
- // Resource limits
- if ctrSpec.Linux != nil {
- if ctrSpec.Linux.Resources != nil {
- if ctrSpec.Linux.Resources.CPU != nil {
- if ctrSpec.Linux.Resources.CPU.Shares != nil {
- hostConfig.CpuShares = *ctrSpec.Linux.Resources.CPU.Shares
- }
- if ctrSpec.Linux.Resources.CPU.Period != nil {
- hostConfig.CpuPeriod = *ctrSpec.Linux.Resources.CPU.Period
- }
- if ctrSpec.Linux.Resources.CPU.Quota != nil {
- hostConfig.CpuQuota = *ctrSpec.Linux.Resources.CPU.Quota
- }
- if ctrSpec.Linux.Resources.CPU.RealtimePeriod != nil {
- hostConfig.CpuRealtimePeriod = *ctrSpec.Linux.Resources.CPU.RealtimePeriod
- }
- if ctrSpec.Linux.Resources.CPU.RealtimeRuntime != nil {
- hostConfig.CpuRealtimeRuntime = *ctrSpec.Linux.Resources.CPU.RealtimeRuntime
- }
- hostConfig.CpusetCpus = ctrSpec.Linux.Resources.CPU.Cpus
- hostConfig.CpusetMems = ctrSpec.Linux.Resources.CPU.Mems
- }
- if ctrSpec.Linux.Resources.Memory != nil {
- if ctrSpec.Linux.Resources.Memory.Limit != nil {
- hostConfig.Memory = *ctrSpec.Linux.Resources.Memory.Limit
- }
- if ctrSpec.Linux.Resources.Memory.Reservation != nil {
- hostConfig.MemoryReservation = *ctrSpec.Linux.Resources.Memory.Reservation
- }
- if ctrSpec.Linux.Resources.Memory.Swap != nil {
- hostConfig.MemorySwap = *ctrSpec.Linux.Resources.Memory.Swap
- }
- if ctrSpec.Linux.Resources.Memory.Swappiness != nil {
- hostConfig.MemorySwappiness = int64(*ctrSpec.Linux.Resources.Memory.Swappiness)
- } else {
- // Swappiness has a default of -1
- hostConfig.MemorySwappiness = -1
- }
- if ctrSpec.Linux.Resources.Memory.DisableOOMKiller != nil {
- hostConfig.OomKillDisable = *ctrSpec.Linux.Resources.Memory.DisableOOMKiller
- }
- }
- if ctrSpec.Linux.Resources.Pids != nil {
- hostConfig.PidsLimit = ctrSpec.Linux.Resources.Pids.Limit
- }
- hostConfig.CgroupConf = ctrSpec.Linux.Resources.Unified
- if ctrSpec.Linux.Resources.BlockIO != nil {
- if ctrSpec.Linux.Resources.BlockIO.Weight != nil {
- hostConfig.BlkioWeight = *ctrSpec.Linux.Resources.BlockIO.Weight
- }
- hostConfig.BlkioWeightDevice = []define.InspectBlkioWeightDevice{}
- for _, dev := range ctrSpec.Linux.Resources.BlockIO.WeightDevice {
- key := fmt.Sprintf("%d:%d", dev.Major, dev.Minor)
- // TODO: how do we handle LeafWeight vs
- // Weight? For now, ignore anything
- // without Weight set.
- if dev.Weight == nil {
- logrus.Infof("Ignoring weight device %s as it lacks a weight", key)
- continue
- }
- if deviceNodes == nil {
- nodes, err := util.FindDeviceNodes()
- if err != nil {
- return nil, err
- }
- deviceNodes = nodes
- }
- path, ok := deviceNodes[key]
- if !ok {
- logrus.Infof("Could not locate weight device %s in system devices", key)
- continue
- }
- weightDev := define.InspectBlkioWeightDevice{}
- weightDev.Path = path
- weightDev.Weight = *dev.Weight
- hostConfig.BlkioWeightDevice = append(hostConfig.BlkioWeightDevice, weightDev)
- }
-
- readBps, err := blkioDeviceThrottle(deviceNodes, ctrSpec.Linux.Resources.BlockIO.ThrottleReadBpsDevice)
- if err != nil {
- return nil, err
- }
- hostConfig.BlkioDeviceReadBps = readBps
-
- writeBps, err := blkioDeviceThrottle(deviceNodes, ctrSpec.Linux.Resources.BlockIO.ThrottleWriteBpsDevice)
- if err != nil {
- return nil, err
- }
- hostConfig.BlkioDeviceWriteBps = writeBps
-
- readIops, err := blkioDeviceThrottle(deviceNodes, ctrSpec.Linux.Resources.BlockIO.ThrottleReadIOPSDevice)
- if err != nil {
- return nil, err
- }
- hostConfig.BlkioDeviceReadIOps = readIops
-
- writeIops, err := blkioDeviceThrottle(deviceNodes, ctrSpec.Linux.Resources.BlockIO.ThrottleWriteIOPSDevice)
- if err != nil {
- return nil, err
- }
- hostConfig.BlkioDeviceWriteIOps = writeIops
- }
- }
+ if err := c.platformInspectContainerHostConfig(ctrSpec, hostConfig); err != nil {
+ return nil, err
}
// NanoCPUs.
@@ -659,182 +550,6 @@ func (c *Container) generateInspectContainerHostConfig(ctrSpec *spec.Spec, named
hostConfig.PortBindings = make(map[string][]define.InspectHostPort)
}
- // Cap add and cap drop.
- // We need a default set of capabilities to compare against.
- // The OCI generate package has one, and is commonly used, so we'll
- // use it.
- // Problem: there are 5 sets of capabilities.
- // Use the bounding set for this computation, it's the most encompassing
- // (but still not perfect).
- capAdd := []string{}
- capDrop := []string{}
- // No point in continuing if we got a spec without a Process block...
- if ctrSpec.Process != nil {
- // Max an O(1) lookup table for default bounding caps.
- boundingCaps := make(map[string]bool)
- g, err := generate.New("linux")
- if err != nil {
- return nil, err
- }
- if !hostConfig.Privileged {
- for _, cap := range g.Config.Process.Capabilities.Bounding {
- boundingCaps[cap] = true
- }
- } else {
- // If we are privileged, use all caps.
- for _, cap := range capability.List() {
- if g.HostSpecific && cap > validate.LastCap() {
- continue
- }
- boundingCaps[fmt.Sprintf("CAP_%s", strings.ToUpper(cap.String()))] = true
- }
- }
- // Iterate through spec caps.
- // If it's not in default bounding caps, it was added.
- // If it is, delete from the default set. Whatever remains after
- // we finish are the dropped caps.
- for _, cap := range ctrSpec.Process.Capabilities.Bounding {
- if _, ok := boundingCaps[cap]; ok {
- delete(boundingCaps, cap)
- } else {
- capAdd = append(capAdd, cap)
- }
- }
- for cap := range boundingCaps {
- capDrop = append(capDrop, cap)
- }
- // Sort CapDrop so it displays in consistent order (GH #9490)
- sort.Strings(capDrop)
- }
- hostConfig.CapAdd = capAdd
- hostConfig.CapDrop = capDrop
- switch {
- case c.config.IPCNsCtr != "":
- hostConfig.IpcMode = fmt.Sprintf("container:%s", c.config.IPCNsCtr)
- case ctrSpec.Linux != nil:
- // Locate the spec's IPC namespace.
- // If there is none, it's ipc=host.
- // If there is one and it has a path, it's "ns:".
- // If no path, it's default - the empty string.
- for _, ns := range ctrSpec.Linux.Namespaces {
- if ns.Type == spec.IPCNamespace {
- if ns.Path != "" {
- hostConfig.IpcMode = fmt.Sprintf("ns:%s", ns.Path)
- } else {
- break
- }
- }
- }
- case c.config.NoShm:
- hostConfig.IpcMode = "none"
- case c.config.NoShmShare:
- hostConfig.IpcMode = "private"
- }
- if hostConfig.IpcMode == "" {
- hostConfig.IpcMode = "shareable"
- }
-
- // Cgroup namespace mode
- cgroupMode := ""
- if c.config.CgroupNsCtr != "" {
- cgroupMode = fmt.Sprintf("container:%s", c.config.CgroupNsCtr)
- } else if ctrSpec.Linux != nil {
- // Locate the spec's cgroup namespace
- // If there is none, it's cgroup=host.
- // If there is one and it has a path, it's "ns:".
- // If there is no path, it's private.
- for _, ns := range ctrSpec.Linux.Namespaces {
- if ns.Type == spec.CgroupNamespace {
- if ns.Path != "" {
- cgroupMode = fmt.Sprintf("ns:%s", ns.Path)
- } else {
- cgroupMode = "private"
- }
- }
- }
- if cgroupMode == "" {
- cgroupMode = "host"
- }
- }
- hostConfig.CgroupMode = cgroupMode
-
- // Cgroup parent
- // Need to check if it's the default, and not print if so.
- defaultCgroupParent := ""
- switch c.CgroupManager() {
- case config.CgroupfsCgroupsManager:
- defaultCgroupParent = CgroupfsDefaultCgroupParent
- case config.SystemdCgroupsManager:
- defaultCgroupParent = SystemdDefaultCgroupParent
- }
- if c.config.CgroupParent != defaultCgroupParent {
- hostConfig.CgroupParent = c.config.CgroupParent
- }
- hostConfig.CgroupManager = c.CgroupManager()
-
- // PID namespace mode
- pidMode := ""
- if c.config.PIDNsCtr != "" {
- pidMode = fmt.Sprintf("container:%s", c.config.PIDNsCtr)
- } else if ctrSpec.Linux != nil {
- // Locate the spec's PID namespace.
- // If there is none, it's pid=host.
- // If there is one and it has a path, it's "ns:".
- // If there is no path, it's default - the empty string.
- for _, ns := range ctrSpec.Linux.Namespaces {
- if ns.Type == spec.PIDNamespace {
- if ns.Path != "" {
- pidMode = fmt.Sprintf("ns:%s", ns.Path)
- } else {
- pidMode = "private"
- }
- break
- }
- }
- if pidMode == "" {
- pidMode = "host"
- }
- }
- hostConfig.PidMode = pidMode
-
- // UTS namespace mode
- utsMode := c.NamespaceMode(spec.UTSNamespace, ctrSpec)
-
- hostConfig.UTSMode = utsMode
-
- // User namespace mode
- usernsMode := ""
- if c.config.UserNsCtr != "" {
- usernsMode = fmt.Sprintf("container:%s", c.config.UserNsCtr)
- } else if ctrSpec.Linux != nil {
- // Locate the spec's user namespace.
- // If there is none, it's default - the empty string.
- // If there is one, it's "private" if no path, or "ns:" if
- // there's a path.
-
- for _, ns := range ctrSpec.Linux.Namespaces {
- if ns.Type == spec.UserNamespace {
- if ns.Path != "" {
- usernsMode = fmt.Sprintf("ns:%s", ns.Path)
- } else {
- usernsMode = "private"
- }
- }
- }
- }
- hostConfig.UsernsMode = usernsMode
- if c.config.IDMappings.UIDMap != nil && c.config.IDMappings.GIDMap != nil {
- hostConfig.IDMappings = generateIDMappings(c.config.IDMappings)
- }
- // Devices
- // Do not include if privileged - assumed that all devices will be
- // included.
- var err error
- hostConfig.Devices, err = c.GetDevices(hostConfig.Privileged, *ctrSpec, deviceNodes)
- if err != nil {
- return nil, err
- }
-
// Ulimits
hostConfig.Ulimits = []define.InspectUlimit{}
if ctrSpec.Process != nil {
diff --git a/libpod/container_inspect_freebsd.go b/libpod/container_inspect_freebsd.go
new file mode 100644
index 000000000..8b4e8df87
--- /dev/null
+++ b/libpod/container_inspect_freebsd.go
@@ -0,0 +1,17 @@
+package libpod
+
+import (
+ "github.com/containers/podman/v4/libpod/define"
+ spec "github.com/opencontainers/runtime-spec/specs-go"
+)
+
+func (c *Container) platformInspectContainerHostConfig(ctrSpec *spec.Spec, hostConfig *define.InspectContainerHostConfig) error {
+ // Not sure what to put here. FreeBSD jails use pids from the
+ // global pool but can only see their own pids.
+ hostConfig.PidMode = "host"
+
+ // UTS namespace mode
+ hostConfig.UTSMode = c.NamespaceMode(spec.UTSNamespace, ctrSpec)
+
+ return nil
+}
diff --git a/libpod/container_inspect_linux.go b/libpod/container_inspect_linux.go
new file mode 100644
index 000000000..355690d70
--- /dev/null
+++ b/libpod/container_inspect_linux.go
@@ -0,0 +1,306 @@
+package libpod
+
+import (
+ "fmt"
+ "sort"
+ "strings"
+
+ "github.com/containers/common/pkg/config"
+ "github.com/containers/podman/v4/libpod/define"
+ "github.com/containers/podman/v4/pkg/util"
+ spec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/opencontainers/runtime-tools/generate"
+ "github.com/opencontainers/runtime-tools/validate"
+ "github.com/sirupsen/logrus"
+ "github.com/syndtr/gocapability/capability"
+)
+
+func (c *Container) platformInspectContainerHostConfig(ctrSpec *spec.Spec, hostConfig *define.InspectContainerHostConfig) error {
+ // This is very expensive to initialize.
+ // So we don't want to initialize it unless we absolutely have to - IE,
+ // there are things that require a major:minor to path translation.
+ var deviceNodes map[string]string
+
+ // Resource limits
+ if ctrSpec.Linux != nil {
+ if ctrSpec.Linux.Resources != nil {
+ if ctrSpec.Linux.Resources.CPU != nil {
+ if ctrSpec.Linux.Resources.CPU.Shares != nil {
+ hostConfig.CpuShares = *ctrSpec.Linux.Resources.CPU.Shares
+ }
+ if ctrSpec.Linux.Resources.CPU.Period != nil {
+ hostConfig.CpuPeriod = *ctrSpec.Linux.Resources.CPU.Period
+ }
+ if ctrSpec.Linux.Resources.CPU.Quota != nil {
+ hostConfig.CpuQuota = *ctrSpec.Linux.Resources.CPU.Quota
+ }
+ if ctrSpec.Linux.Resources.CPU.RealtimePeriod != nil {
+ hostConfig.CpuRealtimePeriod = *ctrSpec.Linux.Resources.CPU.RealtimePeriod
+ }
+ if ctrSpec.Linux.Resources.CPU.RealtimeRuntime != nil {
+ hostConfig.CpuRealtimeRuntime = *ctrSpec.Linux.Resources.CPU.RealtimeRuntime
+ }
+ hostConfig.CpusetCpus = ctrSpec.Linux.Resources.CPU.Cpus
+ hostConfig.CpusetMems = ctrSpec.Linux.Resources.CPU.Mems
+ }
+ if ctrSpec.Linux.Resources.Memory != nil {
+ if ctrSpec.Linux.Resources.Memory.Limit != nil {
+ hostConfig.Memory = *ctrSpec.Linux.Resources.Memory.Limit
+ }
+ if ctrSpec.Linux.Resources.Memory.Reservation != nil {
+ hostConfig.MemoryReservation = *ctrSpec.Linux.Resources.Memory.Reservation
+ }
+ if ctrSpec.Linux.Resources.Memory.Swap != nil {
+ hostConfig.MemorySwap = *ctrSpec.Linux.Resources.Memory.Swap
+ }
+ if ctrSpec.Linux.Resources.Memory.Swappiness != nil {
+ hostConfig.MemorySwappiness = int64(*ctrSpec.Linux.Resources.Memory.Swappiness)
+ } else {
+ // Swappiness has a default of -1
+ hostConfig.MemorySwappiness = -1
+ }
+ if ctrSpec.Linux.Resources.Memory.DisableOOMKiller != nil {
+ hostConfig.OomKillDisable = *ctrSpec.Linux.Resources.Memory.DisableOOMKiller
+ }
+ }
+ if ctrSpec.Linux.Resources.Pids != nil {
+ hostConfig.PidsLimit = ctrSpec.Linux.Resources.Pids.Limit
+ }
+ hostConfig.CgroupConf = ctrSpec.Linux.Resources.Unified
+ if ctrSpec.Linux.Resources.BlockIO != nil {
+ if ctrSpec.Linux.Resources.BlockIO.Weight != nil {
+ hostConfig.BlkioWeight = *ctrSpec.Linux.Resources.BlockIO.Weight
+ }
+ hostConfig.BlkioWeightDevice = []define.InspectBlkioWeightDevice{}
+ for _, dev := range ctrSpec.Linux.Resources.BlockIO.WeightDevice {
+ key := fmt.Sprintf("%d:%d", dev.Major, dev.Minor)
+ // TODO: how do we handle LeafWeight vs
+ // Weight? For now, ignore anything
+ // without Weight set.
+ if dev.Weight == nil {
+ logrus.Infof("Ignoring weight device %s as it lacks a weight", key)
+ continue
+ }
+ if deviceNodes == nil {
+ nodes, err := util.FindDeviceNodes()
+ if err != nil {
+ return err
+ }
+ deviceNodes = nodes
+ }
+ path, ok := deviceNodes[key]
+ if !ok {
+ logrus.Infof("Could not locate weight device %s in system devices", key)
+ continue
+ }
+ weightDev := define.InspectBlkioWeightDevice{}
+ weightDev.Path = path
+ weightDev.Weight = *dev.Weight
+ hostConfig.BlkioWeightDevice = append(hostConfig.BlkioWeightDevice, weightDev)
+ }
+
+ readBps, err := blkioDeviceThrottle(deviceNodes, ctrSpec.Linux.Resources.BlockIO.ThrottleReadBpsDevice)
+ if err != nil {
+ return err
+ }
+ hostConfig.BlkioDeviceReadBps = readBps
+
+ writeBps, err := blkioDeviceThrottle(deviceNodes, ctrSpec.Linux.Resources.BlockIO.ThrottleWriteBpsDevice)
+ if err != nil {
+ return err
+ }
+ hostConfig.BlkioDeviceWriteBps = writeBps
+
+ readIops, err := blkioDeviceThrottle(deviceNodes, ctrSpec.Linux.Resources.BlockIO.ThrottleReadIOPSDevice)
+ if err != nil {
+ return err
+ }
+ hostConfig.BlkioDeviceReadIOps = readIops
+
+ writeIops, err := blkioDeviceThrottle(deviceNodes, ctrSpec.Linux.Resources.BlockIO.ThrottleWriteIOPSDevice)
+ if err != nil {
+ return err
+ }
+ hostConfig.BlkioDeviceWriteIOps = writeIops
+ }
+ }
+ }
+
+ // Cap add and cap drop.
+ // We need a default set of capabilities to compare against.
+ // The OCI generate package has one, and is commonly used, so we'll
+ // use it.
+ // Problem: there are 5 sets of capabilities.
+ // Use the bounding set for this computation, it's the most encompassing
+ // (but still not perfect).
+ capAdd := []string{}
+ capDrop := []string{}
+ // No point in continuing if we got a spec without a Process block...
+ if ctrSpec.Process != nil {
+ // Max an O(1) lookup table for default bounding caps.
+ boundingCaps := make(map[string]bool)
+ g, err := generate.New("linux")
+ if err != nil {
+ return err
+ }
+ if !hostConfig.Privileged {
+ for _, cap := range g.Config.Process.Capabilities.Bounding {
+ boundingCaps[cap] = true
+ }
+ } else {
+ // If we are privileged, use all caps.
+ for _, cap := range capability.List() {
+ if g.HostSpecific && cap > validate.LastCap() {
+ continue
+ }
+ boundingCaps[fmt.Sprintf("CAP_%s", strings.ToUpper(cap.String()))] = true
+ }
+ }
+ // Iterate through spec caps.
+ // If it's not in default bounding caps, it was added.
+ // If it is, delete from the default set. Whatever remains after
+ // we finish are the dropped caps.
+ for _, cap := range ctrSpec.Process.Capabilities.Bounding {
+ if _, ok := boundingCaps[cap]; ok {
+ delete(boundingCaps, cap)
+ } else {
+ capAdd = append(capAdd, cap)
+ }
+ }
+ for cap := range boundingCaps {
+ capDrop = append(capDrop, cap)
+ }
+ // Sort CapDrop so it displays in consistent order (GH #9490)
+ sort.Strings(capDrop)
+ }
+ hostConfig.CapAdd = capAdd
+ hostConfig.CapDrop = capDrop
+ switch {
+ case c.config.IPCNsCtr != "":
+ hostConfig.IpcMode = fmt.Sprintf("container:%s", c.config.IPCNsCtr)
+ case ctrSpec.Linux != nil:
+ // Locate the spec's IPC namespace.
+ // If there is none, it's ipc=host.
+ // If there is one and it has a path, it's "ns:".
+ // If no path, it's default - the empty string.
+ for _, ns := range ctrSpec.Linux.Namespaces {
+ if ns.Type == spec.IPCNamespace {
+ if ns.Path != "" {
+ hostConfig.IpcMode = fmt.Sprintf("ns:%s", ns.Path)
+ } else {
+ break
+ }
+ }
+ }
+ case c.config.NoShm:
+ hostConfig.IpcMode = "none"
+ case c.config.NoShmShare:
+ hostConfig.IpcMode = "private"
+ }
+ if hostConfig.IpcMode == "" {
+ hostConfig.IpcMode = "shareable"
+ }
+
+ // Cgroup namespace mode
+ cgroupMode := ""
+ if c.config.CgroupNsCtr != "" {
+ cgroupMode = fmt.Sprintf("container:%s", c.config.CgroupNsCtr)
+ } else if ctrSpec.Linux != nil {
+ // Locate the spec's cgroup namespace
+ // If there is none, it's cgroup=host.
+ // If there is one and it has a path, it's "ns:".
+ // If there is no path, it's private.
+ for _, ns := range ctrSpec.Linux.Namespaces {
+ if ns.Type == spec.CgroupNamespace {
+ if ns.Path != "" {
+ cgroupMode = fmt.Sprintf("ns:%s", ns.Path)
+ } else {
+ cgroupMode = "private"
+ }
+ }
+ }
+ if cgroupMode == "" {
+ cgroupMode = "host"
+ }
+ }
+ hostConfig.CgroupMode = cgroupMode
+
+ // Cgroup parent
+ // Need to check if it's the default, and not print if so.
+ defaultCgroupParent := ""
+ switch c.CgroupManager() {
+ case config.CgroupfsCgroupsManager:
+ defaultCgroupParent = CgroupfsDefaultCgroupParent
+ case config.SystemdCgroupsManager:
+ defaultCgroupParent = SystemdDefaultCgroupParent
+ }
+ if c.config.CgroupParent != defaultCgroupParent {
+ hostConfig.CgroupParent = c.config.CgroupParent
+ }
+ hostConfig.CgroupManager = c.CgroupManager()
+
+ // PID namespace mode
+ pidMode := ""
+ if c.config.PIDNsCtr != "" {
+ pidMode = fmt.Sprintf("container:%s", c.config.PIDNsCtr)
+ } else if ctrSpec.Linux != nil {
+ // Locate the spec's PID namespace.
+ // If there is none, it's pid=host.
+ // If there is one and it has a path, it's "ns:".
+ // If there is no path, it's default - the empty string.
+ for _, ns := range ctrSpec.Linux.Namespaces {
+ if ns.Type == spec.PIDNamespace {
+ if ns.Path != "" {
+ pidMode = fmt.Sprintf("ns:%s", ns.Path)
+ } else {
+ pidMode = "private"
+ }
+ break
+ }
+ }
+ if pidMode == "" {
+ pidMode = "host"
+ }
+ }
+ hostConfig.PidMode = pidMode
+
+ // UTS namespace mode
+ utsMode := c.NamespaceMode(spec.UTSNamespace, ctrSpec)
+
+ hostConfig.UTSMode = utsMode
+
+ // User namespace mode
+ usernsMode := ""
+ if c.config.UserNsCtr != "" {
+ usernsMode = fmt.Sprintf("container:%s", c.config.UserNsCtr)
+ } else if ctrSpec.Linux != nil {
+ // Locate the spec's user namespace.
+ // If there is none, it's default - the empty string.
+ // If there is one, it's "private" if no path, or "ns:" if
+ // there's a path.
+
+ for _, ns := range ctrSpec.Linux.Namespaces {
+ if ns.Type == spec.UserNamespace {
+ if ns.Path != "" {
+ usernsMode = fmt.Sprintf("ns:%s", ns.Path)
+ } else {
+ usernsMode = "private"
+ }
+ }
+ }
+ }
+ hostConfig.UsernsMode = usernsMode
+ if c.config.IDMappings.UIDMap != nil && c.config.IDMappings.GIDMap != nil {
+ hostConfig.IDMappings = generateIDMappings(c.config.IDMappings)
+ }
+ // Devices
+ // Do not include if privileged - assumed that all devices will be
+ // included.
+ var err error
+ hostConfig.Devices, err = c.GetDevices(hostConfig.Privileged, *ctrSpec, deviceNodes)
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
diff --git a/libpod/pod_api.go b/libpod/pod_api.go
index 1bd686ddc..924d43436 100644
--- a/libpod/pod_api.go
+++ b/libpod/pod_api.go
@@ -40,7 +40,7 @@ func (p *Pod) startInitContainers(ctx context.Context) error {
icLock := initCon.lock
icLock.Lock()
var time *uint
- if err := p.runtime.removeContainer(ctx, initCon, false, false, true, time); err != nil {
+ if err := p.runtime.removeContainer(ctx, initCon, false, false, true, false, time); err != nil {
icLock.Unlock()
return fmt.Errorf("failed to remove once init container %s: %w", initCon.ID(), err)
}
diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go
index 1f032dd6b..7b3cbadfa 100644
--- a/libpod/runtime_ctr.go
+++ b/libpod/runtime_ctr.go
@@ -581,7 +581,7 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Contai
// be removed also if and only if the container is the sole user
// Otherwise, RemoveContainer will return an error if the container is running
func (r *Runtime) RemoveContainer(ctx context.Context, c *Container, force bool, removeVolume bool, timeout *uint) error {
- return r.removeContainer(ctx, c, force, removeVolume, false, timeout)
+ return r.removeContainer(ctx, c, force, removeVolume, false, false, timeout)
}
// Internal function to remove a container.
@@ -589,7 +589,9 @@ func (r *Runtime) RemoveContainer(ctx context.Context, c *Container, force bool,
// removePod is used only when removing pods. It instructs Podman to ignore
// infra container protections, and *not* remove from the database (as pod
// remove will handle that).
-func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, removeVolume, removePod bool, timeout *uint) error {
+// ignoreDeps is *DANGEROUS* and should not be used outside of a very specific
+// context (alternate pod removal code, where graph traversal is not possible).
+func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, removeVolume, removePod, ignoreDeps bool, timeout *uint) error {
if !c.valid {
if ok, _ := r.state.HasContainer(c.ID()); !ok {
// Container probably already removed
@@ -618,25 +620,27 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
// pod.
var pod *Pod
runtime := c.runtime
- if c.config.Pod != "" && !removePod {
+ if c.config.Pod != "" {
pod, err = r.state.Pod(c.config.Pod)
if err != nil {
return fmt.Errorf("container %s is in pod %s, but pod cannot be retrieved: %w", c.ID(), pod.ID(), err)
}
- // Lock the pod while we're removing container
- if pod.config.LockID == c.config.LockID {
- return fmt.Errorf("container %s and pod %s share lock ID %d: %w", c.ID(), pod.ID(), c.config.LockID, define.ErrWillDeadlock)
- }
- pod.lock.Lock()
- defer pod.lock.Unlock()
- if err := pod.updatePod(); err != nil {
- return err
- }
+ if !removePod {
+ // Lock the pod while we're removing container
+ if pod.config.LockID == c.config.LockID {
+ return fmt.Errorf("container %s and pod %s share lock ID %d: %w", c.ID(), pod.ID(), c.config.LockID, define.ErrWillDeadlock)
+ }
+ pod.lock.Lock()
+ defer pod.lock.Unlock()
+ if err := pod.updatePod(); err != nil {
+ return err
+ }
- infraID := pod.state.InfraContainerID
- if c.ID() == infraID {
- return fmt.Errorf("container %s is the infra container of pod %s and cannot be removed without removing the pod", c.ID(), pod.ID())
+ infraID := pod.state.InfraContainerID
+ if c.ID() == infraID {
+ return fmt.Errorf("container %s is the infra container of pod %s and cannot be removed without removing the pod", c.ID(), pod.ID())
+ }
}
}
@@ -696,7 +700,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
// Check that no other containers depend on the container.
// Only used if not removing a pod - pods guarantee that all
// deps will be evicted at the same time.
- if !removePod {
+ if !ignoreDeps {
deps, err := r.state.ContainerInUse(c)
if err != nil {
return err
@@ -777,13 +781,11 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
if c.config.Pod != "" {
// If we're removing the pod, the container will be evicted
// from the state elsewhere
- if !removePod {
- if err := r.state.RemoveContainerFromPod(pod, c); err != nil {
- if cleanupErr == nil {
- cleanupErr = err
- } else {
- logrus.Errorf("Removing container %s from database: %v", c.ID(), err)
- }
+ if err := r.state.RemoveContainerFromPod(pod, c); err != nil {
+ if cleanupErr == nil {
+ cleanupErr = err
+ } else {
+ logrus.Errorf("Removing container %s from database: %v", c.ID(), err)
}
}
} else {
@@ -872,7 +874,7 @@ func (r *Runtime) evictContainer(ctx context.Context, idOrName string, removeVol
if err == nil {
logrus.Infof("Container %s successfully retrieved from state, attempting normal removal", id)
// Assume force = true for the evict case
- err = r.removeContainer(ctx, tmpCtr, true, removeVolume, false, timeout)
+ err = r.removeContainer(ctx, tmpCtr, true, removeVolume, false, false, timeout)
if !tmpCtr.valid {
// If the container is marked invalid, remove succeeded
// in kicking it out of the state - no need to continue.
@@ -1034,7 +1036,7 @@ func (r *Runtime) RemoveDepend(ctx context.Context, rmCtr *Container, force bool
}
report := reports.RmReport{Id: rmCtr.ID(), RawInput: rmCtr.ID()}
- report.Err = r.removeContainer(ctx, rmCtr, force, removeVolume, false, timeout)
+ report.Err = r.removeContainer(ctx, rmCtr, force, removeVolume, false, false, timeout)
return append(rmReports, &report), nil
}
diff --git a/libpod/runtime_img.go b/libpod/runtime_img.go
index 5510b2af6..dacbd752f 100644
--- a/libpod/runtime_img.go
+++ b/libpod/runtime_img.go
@@ -47,7 +47,7 @@ func (r *Runtime) RemoveContainersForImageCallback(ctx context.Context) libimage
return fmt.Errorf("removing image %s: container %s using image could not be removed: %w", imageID, ctr.ID(), err)
}
} else {
- if err := r.removeContainer(ctx, ctr, true, false, false, timeout); err != nil {
+ if err := r.removeContainer(ctx, ctr, true, false, false, false, timeout); err != nil {
return fmt.Errorf("removing image %s: container %s using image could not be removed: %w", imageID, ctr.ID(), err)
}
}
diff --git a/libpod/runtime_pod_linux.go b/libpod/runtime_pod_linux.go
index 3eeef69d8..24e9f3da7 100644
--- a/libpod/runtime_pod_linux.go
+++ b/libpod/runtime_pod_linux.go
@@ -17,6 +17,7 @@ import (
"github.com/containers/podman/v4/libpod/events"
"github.com/containers/podman/v4/pkg/rootless"
"github.com/containers/podman/v4/pkg/specgen"
+ "github.com/hashicorp/go-multierror"
"github.com/sirupsen/logrus"
)
@@ -191,29 +192,9 @@ func (r *Runtime) SavePod(pod *Pod) error {
return nil
}
-func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool, timeout *uint) error {
- if err := p.updatePod(); err != nil {
- return err
- }
-
- ctrs, err := r.state.PodContainers(p)
- if err != nil {
- return err
- }
- numCtrs := len(ctrs)
-
- // If the only running container in the pod is the pause container, remove the pod and container unconditionally.
- pauseCtrID := p.state.InfraContainerID
- if numCtrs == 1 && ctrs[0].ID() == pauseCtrID {
- removeCtrs = true
- force = true
- }
- if !removeCtrs && numCtrs > 0 {
- return fmt.Errorf("pod %s contains containers and cannot be removed: %w", p.ID(), define.ErrCtrExists)
- }
-
- ctrNamedVolumes := make(map[string]*ContainerNamedVolume)
-
+// DO NOT USE THIS FUNCTION DIRECTLY. Use removePod(), below. It will call
+// removeMalformedPod() if necessary.
+func (r *Runtime) removeMalformedPod(ctx context.Context, p *Pod, ctrs []*Container, force bool, timeout *uint, ctrNamedVolumes map[string]*ContainerNamedVolume) error {
var removalErr error
for _, ctr := range ctrs {
err := func() error {
@@ -231,7 +212,7 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool,
ctrNamedVolumes[vol.Name] = vol
}
- return r.removeContainer(ctx, ctr, force, false, true, timeout)
+ return r.removeContainer(ctx, ctr, force, false, true, true, timeout)
}()
if removalErr == nil {
@@ -261,6 +242,69 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool,
return err
}
+ return nil
+}
+
+func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool, timeout *uint) error {
+ if err := p.updatePod(); err != nil {
+ return err
+ }
+
+ ctrs, err := r.state.PodContainers(p)
+ if err != nil {
+ return err
+ }
+ numCtrs := len(ctrs)
+
+ // If the only running container in the pod is the pause container, remove the pod and container unconditionally.
+ pauseCtrID := p.state.InfraContainerID
+ if numCtrs == 1 && ctrs[0].ID() == pauseCtrID {
+ removeCtrs = true
+ force = true
+ }
+ if !removeCtrs && numCtrs > 0 {
+ return fmt.Errorf("pod %s contains containers and cannot be removed: %w", p.ID(), define.ErrCtrExists)
+ }
+
+ var removalErr error
+ ctrNamedVolumes := make(map[string]*ContainerNamedVolume)
+
+ // Build a graph of all containers in the pod.
+ graph, err := BuildContainerGraph(ctrs)
+ if err != nil {
+ // We have to allow the pod to be removed.
+ // But let's only do it if force is set.
+ if !force {
+ return fmt.Errorf("cannot create container graph for pod %s: %w", p.ID(), err)
+ }
+
+ removalErr = fmt.Errorf("creating container graph for pod %s failed, fell back to loop removal: %w", p.ID(), err)
+
+ if err := r.removeMalformedPod(ctx, p, ctrs, force, timeout, ctrNamedVolumes); err != nil {
+ logrus.Errorf("Error creating container graph for pod %s: %v. Falling back to loop removal.", p.ID(), err)
+ return err
+ }
+ } else {
+ ctrErrors := make(map[string]error)
+ ctrsVisited := make(map[string]bool)
+
+ for _, node := range graph.notDependedOnNodes {
+ removeNode(ctx, node, p, force, timeout, false, ctrErrors, ctrsVisited, ctrNamedVolumes)
+ }
+
+ // This is gross, but I don't want to change the signature on
+ // removePod - especially since any change here eventually has
+ // to map down to one error unless we want to make a breaking
+ // API change.
+ if len(ctrErrors) > 0 {
+ var allErrs error
+ for id, err := range ctrErrors {
+ allErrs = multierror.Append(allErrs, fmt.Errorf("removing container %s from pod %s: %w", id, p.ID(), err))
+ }
+ return allErrs
+ }
+ }
+
for volName := range ctrNamedVolumes {
volume, err := r.state.Volume(volName)
if err != nil && !errors.Is(err, define.ErrNoSuchVolume) {
diff --git a/libpod/runtime_volume_linux.go b/libpod/runtime_volume_linux.go
index c9a4a7dc1..08fdbf977 100644
--- a/libpod/runtime_volume_linux.go
+++ b/libpod/runtime_volume_linux.go
@@ -324,7 +324,7 @@ func (r *Runtime) removeVolume(ctx context.Context, v *Volume, force bool, timeo
logrus.Debugf("Removing container %s (depends on volume %q)", ctr.ID(), v.Name())
- if err := r.removeContainer(ctx, ctr, force, false, false, timeout); err != nil {
+ if err := r.removeContainer(ctx, ctr, force, false, false, false, timeout); err != nil {
return fmt.Errorf("removing container %s that depends on volume %s: %w", ctr.ID(), v.Name(), err)
}
}