diff options
Diffstat (limited to 'libpod')
-rw-r--r-- | libpod/container_graph.go | 91 | ||||
-rw-r--r-- | libpod/container_inspect.go | 297 | ||||
-rw-r--r-- | libpod/container_inspect_freebsd.go | 17 | ||||
-rw-r--r-- | libpod/container_inspect_linux.go | 306 | ||||
-rw-r--r-- | libpod/pod_api.go | 2 | ||||
-rw-r--r-- | libpod/runtime_ctr.go | 52 | ||||
-rw-r--r-- | libpod/runtime_img.go | 2 | ||||
-rw-r--r-- | libpod/runtime_pod_linux.go | 92 | ||||
-rw-r--r-- | libpod/runtime_volume_linux.go | 2 |
9 files changed, 518 insertions, 343 deletions
diff --git a/libpod/container_graph.go b/libpod/container_graph.go index 96d61b756..d43579e4a 100644 --- a/libpod/container_graph.go +++ b/libpod/container_graph.go @@ -281,3 +281,94 @@ func startNode(ctx context.Context, node *containerNode, setError bool, ctrError startNode(ctx, successor, ctrErrored, ctrErrors, ctrsVisited, restart) } } + +// Visit a node on the container graph and remove it, or set an error if it +// failed to remove. Only intended for use in pod removal; do *not* use when +// removing individual containers. +// All containers are assumed to be *UNLOCKED* on running this function. +// Container locks will be acquired as necessary. +// Pod and infraID are optional. If a pod is given it must be *LOCKED*. +func removeNode(ctx context.Context, node *containerNode, pod *Pod, force bool, timeout *uint, setError bool, ctrErrors map[string]error, ctrsVisited map[string]bool, ctrNamedVolumes map[string]*ContainerNamedVolume) { + // If we already visited this node, we're done. + if ctrsVisited[node.id] { + return + } + + // Someone who depends on us failed. + // Mark us as failed and recurse. + if setError { + ctrsVisited[node.id] = true + ctrErrors[node.id] = fmt.Errorf("a container that depends on container %s could not be removed: %w", node.id, define.ErrCtrStateInvalid) + + // Hit anyone who depends on us, set errors there as well. + for _, successor := range node.dependsOn { + removeNode(ctx, successor, pod, force, timeout, true, ctrErrors, ctrsVisited, ctrNamedVolumes) + } + } + + // Does anyone still depend on us? + // Cannot remove if true. Once all our dependencies have been removed, + // we will be removed. + for _, dep := range node.dependedOn { + // The container that depends on us hasn't been removed yet. + // OK to continue on + if ok := ctrsVisited[dep.id]; !ok { + return + } + } + + // Going to try to remove the node, mark us as visited + ctrsVisited[node.id] = true + + ctrErrored := false + + // Verify that all that depend on us are gone. + // Graph traversal should guarantee this is true, but this isn't that + // expensive, and it's better to be safe. + for _, dep := range node.dependedOn { + if _, err := node.container.runtime.GetContainer(dep.id); err == nil { + ctrErrored = true + ctrErrors[node.id] = fmt.Errorf("a container that depends on container %s still exists: %w", node.id, define.ErrDepExists) + } + } + + // Lock the container + node.container.lock.Lock() + + // Gate all subsequent bits behind a ctrErrored check - we don't want to + // proceed if a previous step failed. + if !ctrErrored { + if err := node.container.syncContainer(); err != nil { + ctrErrored = true + ctrErrors[node.id] = err + } + } + + if !ctrErrored { + for _, vol := range node.container.config.NamedVolumes { + ctrNamedVolumes[vol.Name] = vol + } + + if pod != nil && pod.state.InfraContainerID == node.id { + pod.state.InfraContainerID = "" + if err := pod.save(); err != nil { + ctrErrored = true + ctrErrors[node.id] = fmt.Errorf("error removing infra container %s from pod %s: %w", node.id, pod.ID(), err) + } + } + } + + if !ctrErrored { + if err := node.container.runtime.removeContainer(ctx, node.container, force, false, true, false, timeout); err != nil { + ctrErrored = true + ctrErrors[node.id] = err + } + } + + node.container.lock.Unlock() + + // Recurse to anyone who we depend on and remove them + for _, successor := range node.dependsOn { + removeNode(ctx, successor, pod, force, timeout, ctrErrored, ctrErrors, ctrsVisited, ctrNamedVolumes) + } +} diff --git a/libpod/container_inspect.go b/libpod/container_inspect.go index b72d843b6..e4089efa6 100644 --- a/libpod/container_inspect.go +++ b/libpod/container_inspect.go @@ -3,20 +3,15 @@ package libpod import ( "errors" "fmt" - "sort" "strings" - "github.com/containers/common/pkg/config" "github.com/containers/podman/v4/libpod/define" "github.com/containers/podman/v4/libpod/driver" "github.com/containers/podman/v4/pkg/util" "github.com/containers/storage/types" units "github.com/docker/go-units" spec "github.com/opencontainers/runtime-spec/specs-go" - "github.com/opencontainers/runtime-tools/generate" - "github.com/opencontainers/runtime-tools/validate" "github.com/sirupsen/logrus" - "github.com/syndtr/gocapability/capability" ) // inspectLocked inspects a container for low-level information. @@ -163,8 +158,6 @@ func (c *Container) getContainerInspectData(size bool, driverData *define.Driver Driver: driverData.Name, MountLabel: config.MountLabel, ProcessLabel: config.ProcessLabel, - EffectiveCaps: ctrSpec.Process.Capabilities.Effective, - BoundingCaps: ctrSpec.Process.Capabilities.Bounding, AppArmorProfile: ctrSpec.Process.ApparmorProfile, ExecIDs: execIDs, GraphDriver: driverData, @@ -173,6 +166,10 @@ func (c *Container) getContainerInspectData(size bool, driverData *define.Driver IsInfra: c.IsInfra(), IsService: c.IsService(), } + if ctrSpec.Process.Capabilities != nil { + data.EffectiveCaps = ctrSpec.Process.Capabilities.Effective + data.BoundingCaps = ctrSpec.Process.Capabilities.Bounding + } if c.state.ConfigPath != "" { data.OCIConfigPath = c.state.ConfigPath @@ -484,11 +481,6 @@ func (c *Container) generateInspectContainerHostConfig(ctrSpec *spec.Spec, named hostConfig.ShmSize = c.config.ShmSize hostConfig.Runtime = "oci" - // This is very expensive to initialize. - // So we don't want to initialize it unless we absolutely have to - IE, - // there are things that require a major:minor to path translation. - var deviceNodes map[string]string - // Annotations if ctrSpec.Annotations != nil { hostConfig.ContainerIDFile = ctrSpec.Annotations[define.InspectAnnotationCIDFile] @@ -506,109 +498,8 @@ func (c *Container) generateInspectContainerHostConfig(ctrSpec *spec.Spec, named } } - // Resource limits - if ctrSpec.Linux != nil { - if ctrSpec.Linux.Resources != nil { - if ctrSpec.Linux.Resources.CPU != nil { - if ctrSpec.Linux.Resources.CPU.Shares != nil { - hostConfig.CpuShares = *ctrSpec.Linux.Resources.CPU.Shares - } - if ctrSpec.Linux.Resources.CPU.Period != nil { - hostConfig.CpuPeriod = *ctrSpec.Linux.Resources.CPU.Period - } - if ctrSpec.Linux.Resources.CPU.Quota != nil { - hostConfig.CpuQuota = *ctrSpec.Linux.Resources.CPU.Quota - } - if ctrSpec.Linux.Resources.CPU.RealtimePeriod != nil { - hostConfig.CpuRealtimePeriod = *ctrSpec.Linux.Resources.CPU.RealtimePeriod - } - if ctrSpec.Linux.Resources.CPU.RealtimeRuntime != nil { - hostConfig.CpuRealtimeRuntime = *ctrSpec.Linux.Resources.CPU.RealtimeRuntime - } - hostConfig.CpusetCpus = ctrSpec.Linux.Resources.CPU.Cpus - hostConfig.CpusetMems = ctrSpec.Linux.Resources.CPU.Mems - } - if ctrSpec.Linux.Resources.Memory != nil { - if ctrSpec.Linux.Resources.Memory.Limit != nil { - hostConfig.Memory = *ctrSpec.Linux.Resources.Memory.Limit - } - if ctrSpec.Linux.Resources.Memory.Reservation != nil { - hostConfig.MemoryReservation = *ctrSpec.Linux.Resources.Memory.Reservation - } - if ctrSpec.Linux.Resources.Memory.Swap != nil { - hostConfig.MemorySwap = *ctrSpec.Linux.Resources.Memory.Swap - } - if ctrSpec.Linux.Resources.Memory.Swappiness != nil { - hostConfig.MemorySwappiness = int64(*ctrSpec.Linux.Resources.Memory.Swappiness) - } else { - // Swappiness has a default of -1 - hostConfig.MemorySwappiness = -1 - } - if ctrSpec.Linux.Resources.Memory.DisableOOMKiller != nil { - hostConfig.OomKillDisable = *ctrSpec.Linux.Resources.Memory.DisableOOMKiller - } - } - if ctrSpec.Linux.Resources.Pids != nil { - hostConfig.PidsLimit = ctrSpec.Linux.Resources.Pids.Limit - } - hostConfig.CgroupConf = ctrSpec.Linux.Resources.Unified - if ctrSpec.Linux.Resources.BlockIO != nil { - if ctrSpec.Linux.Resources.BlockIO.Weight != nil { - hostConfig.BlkioWeight = *ctrSpec.Linux.Resources.BlockIO.Weight - } - hostConfig.BlkioWeightDevice = []define.InspectBlkioWeightDevice{} - for _, dev := range ctrSpec.Linux.Resources.BlockIO.WeightDevice { - key := fmt.Sprintf("%d:%d", dev.Major, dev.Minor) - // TODO: how do we handle LeafWeight vs - // Weight? For now, ignore anything - // without Weight set. - if dev.Weight == nil { - logrus.Infof("Ignoring weight device %s as it lacks a weight", key) - continue - } - if deviceNodes == nil { - nodes, err := util.FindDeviceNodes() - if err != nil { - return nil, err - } - deviceNodes = nodes - } - path, ok := deviceNodes[key] - if !ok { - logrus.Infof("Could not locate weight device %s in system devices", key) - continue - } - weightDev := define.InspectBlkioWeightDevice{} - weightDev.Path = path - weightDev.Weight = *dev.Weight - hostConfig.BlkioWeightDevice = append(hostConfig.BlkioWeightDevice, weightDev) - } - - readBps, err := blkioDeviceThrottle(deviceNodes, ctrSpec.Linux.Resources.BlockIO.ThrottleReadBpsDevice) - if err != nil { - return nil, err - } - hostConfig.BlkioDeviceReadBps = readBps - - writeBps, err := blkioDeviceThrottle(deviceNodes, ctrSpec.Linux.Resources.BlockIO.ThrottleWriteBpsDevice) - if err != nil { - return nil, err - } - hostConfig.BlkioDeviceWriteBps = writeBps - - readIops, err := blkioDeviceThrottle(deviceNodes, ctrSpec.Linux.Resources.BlockIO.ThrottleReadIOPSDevice) - if err != nil { - return nil, err - } - hostConfig.BlkioDeviceReadIOps = readIops - - writeIops, err := blkioDeviceThrottle(deviceNodes, ctrSpec.Linux.Resources.BlockIO.ThrottleWriteIOPSDevice) - if err != nil { - return nil, err - } - hostConfig.BlkioDeviceWriteIOps = writeIops - } - } + if err := c.platformInspectContainerHostConfig(ctrSpec, hostConfig); err != nil { + return nil, err } // NanoCPUs. @@ -659,182 +550,6 @@ func (c *Container) generateInspectContainerHostConfig(ctrSpec *spec.Spec, named hostConfig.PortBindings = make(map[string][]define.InspectHostPort) } - // Cap add and cap drop. - // We need a default set of capabilities to compare against. - // The OCI generate package has one, and is commonly used, so we'll - // use it. - // Problem: there are 5 sets of capabilities. - // Use the bounding set for this computation, it's the most encompassing - // (but still not perfect). - capAdd := []string{} - capDrop := []string{} - // No point in continuing if we got a spec without a Process block... - if ctrSpec.Process != nil { - // Max an O(1) lookup table for default bounding caps. - boundingCaps := make(map[string]bool) - g, err := generate.New("linux") - if err != nil { - return nil, err - } - if !hostConfig.Privileged { - for _, cap := range g.Config.Process.Capabilities.Bounding { - boundingCaps[cap] = true - } - } else { - // If we are privileged, use all caps. - for _, cap := range capability.List() { - if g.HostSpecific && cap > validate.LastCap() { - continue - } - boundingCaps[fmt.Sprintf("CAP_%s", strings.ToUpper(cap.String()))] = true - } - } - // Iterate through spec caps. - // If it's not in default bounding caps, it was added. - // If it is, delete from the default set. Whatever remains after - // we finish are the dropped caps. - for _, cap := range ctrSpec.Process.Capabilities.Bounding { - if _, ok := boundingCaps[cap]; ok { - delete(boundingCaps, cap) - } else { - capAdd = append(capAdd, cap) - } - } - for cap := range boundingCaps { - capDrop = append(capDrop, cap) - } - // Sort CapDrop so it displays in consistent order (GH #9490) - sort.Strings(capDrop) - } - hostConfig.CapAdd = capAdd - hostConfig.CapDrop = capDrop - switch { - case c.config.IPCNsCtr != "": - hostConfig.IpcMode = fmt.Sprintf("container:%s", c.config.IPCNsCtr) - case ctrSpec.Linux != nil: - // Locate the spec's IPC namespace. - // If there is none, it's ipc=host. - // If there is one and it has a path, it's "ns:". - // If no path, it's default - the empty string. - for _, ns := range ctrSpec.Linux.Namespaces { - if ns.Type == spec.IPCNamespace { - if ns.Path != "" { - hostConfig.IpcMode = fmt.Sprintf("ns:%s", ns.Path) - } else { - break - } - } - } - case c.config.NoShm: - hostConfig.IpcMode = "none" - case c.config.NoShmShare: - hostConfig.IpcMode = "private" - } - if hostConfig.IpcMode == "" { - hostConfig.IpcMode = "shareable" - } - - // Cgroup namespace mode - cgroupMode := "" - if c.config.CgroupNsCtr != "" { - cgroupMode = fmt.Sprintf("container:%s", c.config.CgroupNsCtr) - } else if ctrSpec.Linux != nil { - // Locate the spec's cgroup namespace - // If there is none, it's cgroup=host. - // If there is one and it has a path, it's "ns:". - // If there is no path, it's private. - for _, ns := range ctrSpec.Linux.Namespaces { - if ns.Type == spec.CgroupNamespace { - if ns.Path != "" { - cgroupMode = fmt.Sprintf("ns:%s", ns.Path) - } else { - cgroupMode = "private" - } - } - } - if cgroupMode == "" { - cgroupMode = "host" - } - } - hostConfig.CgroupMode = cgroupMode - - // Cgroup parent - // Need to check if it's the default, and not print if so. - defaultCgroupParent := "" - switch c.CgroupManager() { - case config.CgroupfsCgroupsManager: - defaultCgroupParent = CgroupfsDefaultCgroupParent - case config.SystemdCgroupsManager: - defaultCgroupParent = SystemdDefaultCgroupParent - } - if c.config.CgroupParent != defaultCgroupParent { - hostConfig.CgroupParent = c.config.CgroupParent - } - hostConfig.CgroupManager = c.CgroupManager() - - // PID namespace mode - pidMode := "" - if c.config.PIDNsCtr != "" { - pidMode = fmt.Sprintf("container:%s", c.config.PIDNsCtr) - } else if ctrSpec.Linux != nil { - // Locate the spec's PID namespace. - // If there is none, it's pid=host. - // If there is one and it has a path, it's "ns:". - // If there is no path, it's default - the empty string. - for _, ns := range ctrSpec.Linux.Namespaces { - if ns.Type == spec.PIDNamespace { - if ns.Path != "" { - pidMode = fmt.Sprintf("ns:%s", ns.Path) - } else { - pidMode = "private" - } - break - } - } - if pidMode == "" { - pidMode = "host" - } - } - hostConfig.PidMode = pidMode - - // UTS namespace mode - utsMode := c.NamespaceMode(spec.UTSNamespace, ctrSpec) - - hostConfig.UTSMode = utsMode - - // User namespace mode - usernsMode := "" - if c.config.UserNsCtr != "" { - usernsMode = fmt.Sprintf("container:%s", c.config.UserNsCtr) - } else if ctrSpec.Linux != nil { - // Locate the spec's user namespace. - // If there is none, it's default - the empty string. - // If there is one, it's "private" if no path, or "ns:" if - // there's a path. - - for _, ns := range ctrSpec.Linux.Namespaces { - if ns.Type == spec.UserNamespace { - if ns.Path != "" { - usernsMode = fmt.Sprintf("ns:%s", ns.Path) - } else { - usernsMode = "private" - } - } - } - } - hostConfig.UsernsMode = usernsMode - if c.config.IDMappings.UIDMap != nil && c.config.IDMappings.GIDMap != nil { - hostConfig.IDMappings = generateIDMappings(c.config.IDMappings) - } - // Devices - // Do not include if privileged - assumed that all devices will be - // included. - var err error - hostConfig.Devices, err = c.GetDevices(hostConfig.Privileged, *ctrSpec, deviceNodes) - if err != nil { - return nil, err - } - // Ulimits hostConfig.Ulimits = []define.InspectUlimit{} if ctrSpec.Process != nil { diff --git a/libpod/container_inspect_freebsd.go b/libpod/container_inspect_freebsd.go new file mode 100644 index 000000000..8b4e8df87 --- /dev/null +++ b/libpod/container_inspect_freebsd.go @@ -0,0 +1,17 @@ +package libpod + +import ( + "github.com/containers/podman/v4/libpod/define" + spec "github.com/opencontainers/runtime-spec/specs-go" +) + +func (c *Container) platformInspectContainerHostConfig(ctrSpec *spec.Spec, hostConfig *define.InspectContainerHostConfig) error { + // Not sure what to put here. FreeBSD jails use pids from the + // global pool but can only see their own pids. + hostConfig.PidMode = "host" + + // UTS namespace mode + hostConfig.UTSMode = c.NamespaceMode(spec.UTSNamespace, ctrSpec) + + return nil +} diff --git a/libpod/container_inspect_linux.go b/libpod/container_inspect_linux.go new file mode 100644 index 000000000..355690d70 --- /dev/null +++ b/libpod/container_inspect_linux.go @@ -0,0 +1,306 @@ +package libpod + +import ( + "fmt" + "sort" + "strings" + + "github.com/containers/common/pkg/config" + "github.com/containers/podman/v4/libpod/define" + "github.com/containers/podman/v4/pkg/util" + spec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/runtime-tools/generate" + "github.com/opencontainers/runtime-tools/validate" + "github.com/sirupsen/logrus" + "github.com/syndtr/gocapability/capability" +) + +func (c *Container) platformInspectContainerHostConfig(ctrSpec *spec.Spec, hostConfig *define.InspectContainerHostConfig) error { + // This is very expensive to initialize. + // So we don't want to initialize it unless we absolutely have to - IE, + // there are things that require a major:minor to path translation. + var deviceNodes map[string]string + + // Resource limits + if ctrSpec.Linux != nil { + if ctrSpec.Linux.Resources != nil { + if ctrSpec.Linux.Resources.CPU != nil { + if ctrSpec.Linux.Resources.CPU.Shares != nil { + hostConfig.CpuShares = *ctrSpec.Linux.Resources.CPU.Shares + } + if ctrSpec.Linux.Resources.CPU.Period != nil { + hostConfig.CpuPeriod = *ctrSpec.Linux.Resources.CPU.Period + } + if ctrSpec.Linux.Resources.CPU.Quota != nil { + hostConfig.CpuQuota = *ctrSpec.Linux.Resources.CPU.Quota + } + if ctrSpec.Linux.Resources.CPU.RealtimePeriod != nil { + hostConfig.CpuRealtimePeriod = *ctrSpec.Linux.Resources.CPU.RealtimePeriod + } + if ctrSpec.Linux.Resources.CPU.RealtimeRuntime != nil { + hostConfig.CpuRealtimeRuntime = *ctrSpec.Linux.Resources.CPU.RealtimeRuntime + } + hostConfig.CpusetCpus = ctrSpec.Linux.Resources.CPU.Cpus + hostConfig.CpusetMems = ctrSpec.Linux.Resources.CPU.Mems + } + if ctrSpec.Linux.Resources.Memory != nil { + if ctrSpec.Linux.Resources.Memory.Limit != nil { + hostConfig.Memory = *ctrSpec.Linux.Resources.Memory.Limit + } + if ctrSpec.Linux.Resources.Memory.Reservation != nil { + hostConfig.MemoryReservation = *ctrSpec.Linux.Resources.Memory.Reservation + } + if ctrSpec.Linux.Resources.Memory.Swap != nil { + hostConfig.MemorySwap = *ctrSpec.Linux.Resources.Memory.Swap + } + if ctrSpec.Linux.Resources.Memory.Swappiness != nil { + hostConfig.MemorySwappiness = int64(*ctrSpec.Linux.Resources.Memory.Swappiness) + } else { + // Swappiness has a default of -1 + hostConfig.MemorySwappiness = -1 + } + if ctrSpec.Linux.Resources.Memory.DisableOOMKiller != nil { + hostConfig.OomKillDisable = *ctrSpec.Linux.Resources.Memory.DisableOOMKiller + } + } + if ctrSpec.Linux.Resources.Pids != nil { + hostConfig.PidsLimit = ctrSpec.Linux.Resources.Pids.Limit + } + hostConfig.CgroupConf = ctrSpec.Linux.Resources.Unified + if ctrSpec.Linux.Resources.BlockIO != nil { + if ctrSpec.Linux.Resources.BlockIO.Weight != nil { + hostConfig.BlkioWeight = *ctrSpec.Linux.Resources.BlockIO.Weight + } + hostConfig.BlkioWeightDevice = []define.InspectBlkioWeightDevice{} + for _, dev := range ctrSpec.Linux.Resources.BlockIO.WeightDevice { + key := fmt.Sprintf("%d:%d", dev.Major, dev.Minor) + // TODO: how do we handle LeafWeight vs + // Weight? For now, ignore anything + // without Weight set. + if dev.Weight == nil { + logrus.Infof("Ignoring weight device %s as it lacks a weight", key) + continue + } + if deviceNodes == nil { + nodes, err := util.FindDeviceNodes() + if err != nil { + return err + } + deviceNodes = nodes + } + path, ok := deviceNodes[key] + if !ok { + logrus.Infof("Could not locate weight device %s in system devices", key) + continue + } + weightDev := define.InspectBlkioWeightDevice{} + weightDev.Path = path + weightDev.Weight = *dev.Weight + hostConfig.BlkioWeightDevice = append(hostConfig.BlkioWeightDevice, weightDev) + } + + readBps, err := blkioDeviceThrottle(deviceNodes, ctrSpec.Linux.Resources.BlockIO.ThrottleReadBpsDevice) + if err != nil { + return err + } + hostConfig.BlkioDeviceReadBps = readBps + + writeBps, err := blkioDeviceThrottle(deviceNodes, ctrSpec.Linux.Resources.BlockIO.ThrottleWriteBpsDevice) + if err != nil { + return err + } + hostConfig.BlkioDeviceWriteBps = writeBps + + readIops, err := blkioDeviceThrottle(deviceNodes, ctrSpec.Linux.Resources.BlockIO.ThrottleReadIOPSDevice) + if err != nil { + return err + } + hostConfig.BlkioDeviceReadIOps = readIops + + writeIops, err := blkioDeviceThrottle(deviceNodes, ctrSpec.Linux.Resources.BlockIO.ThrottleWriteIOPSDevice) + if err != nil { + return err + } + hostConfig.BlkioDeviceWriteIOps = writeIops + } + } + } + + // Cap add and cap drop. + // We need a default set of capabilities to compare against. + // The OCI generate package has one, and is commonly used, so we'll + // use it. + // Problem: there are 5 sets of capabilities. + // Use the bounding set for this computation, it's the most encompassing + // (but still not perfect). + capAdd := []string{} + capDrop := []string{} + // No point in continuing if we got a spec without a Process block... + if ctrSpec.Process != nil { + // Max an O(1) lookup table for default bounding caps. + boundingCaps := make(map[string]bool) + g, err := generate.New("linux") + if err != nil { + return err + } + if !hostConfig.Privileged { + for _, cap := range g.Config.Process.Capabilities.Bounding { + boundingCaps[cap] = true + } + } else { + // If we are privileged, use all caps. + for _, cap := range capability.List() { + if g.HostSpecific && cap > validate.LastCap() { + continue + } + boundingCaps[fmt.Sprintf("CAP_%s", strings.ToUpper(cap.String()))] = true + } + } + // Iterate through spec caps. + // If it's not in default bounding caps, it was added. + // If it is, delete from the default set. Whatever remains after + // we finish are the dropped caps. + for _, cap := range ctrSpec.Process.Capabilities.Bounding { + if _, ok := boundingCaps[cap]; ok { + delete(boundingCaps, cap) + } else { + capAdd = append(capAdd, cap) + } + } + for cap := range boundingCaps { + capDrop = append(capDrop, cap) + } + // Sort CapDrop so it displays in consistent order (GH #9490) + sort.Strings(capDrop) + } + hostConfig.CapAdd = capAdd + hostConfig.CapDrop = capDrop + switch { + case c.config.IPCNsCtr != "": + hostConfig.IpcMode = fmt.Sprintf("container:%s", c.config.IPCNsCtr) + case ctrSpec.Linux != nil: + // Locate the spec's IPC namespace. + // If there is none, it's ipc=host. + // If there is one and it has a path, it's "ns:". + // If no path, it's default - the empty string. + for _, ns := range ctrSpec.Linux.Namespaces { + if ns.Type == spec.IPCNamespace { + if ns.Path != "" { + hostConfig.IpcMode = fmt.Sprintf("ns:%s", ns.Path) + } else { + break + } + } + } + case c.config.NoShm: + hostConfig.IpcMode = "none" + case c.config.NoShmShare: + hostConfig.IpcMode = "private" + } + if hostConfig.IpcMode == "" { + hostConfig.IpcMode = "shareable" + } + + // Cgroup namespace mode + cgroupMode := "" + if c.config.CgroupNsCtr != "" { + cgroupMode = fmt.Sprintf("container:%s", c.config.CgroupNsCtr) + } else if ctrSpec.Linux != nil { + // Locate the spec's cgroup namespace + // If there is none, it's cgroup=host. + // If there is one and it has a path, it's "ns:". + // If there is no path, it's private. + for _, ns := range ctrSpec.Linux.Namespaces { + if ns.Type == spec.CgroupNamespace { + if ns.Path != "" { + cgroupMode = fmt.Sprintf("ns:%s", ns.Path) + } else { + cgroupMode = "private" + } + } + } + if cgroupMode == "" { + cgroupMode = "host" + } + } + hostConfig.CgroupMode = cgroupMode + + // Cgroup parent + // Need to check if it's the default, and not print if so. + defaultCgroupParent := "" + switch c.CgroupManager() { + case config.CgroupfsCgroupsManager: + defaultCgroupParent = CgroupfsDefaultCgroupParent + case config.SystemdCgroupsManager: + defaultCgroupParent = SystemdDefaultCgroupParent + } + if c.config.CgroupParent != defaultCgroupParent { + hostConfig.CgroupParent = c.config.CgroupParent + } + hostConfig.CgroupManager = c.CgroupManager() + + // PID namespace mode + pidMode := "" + if c.config.PIDNsCtr != "" { + pidMode = fmt.Sprintf("container:%s", c.config.PIDNsCtr) + } else if ctrSpec.Linux != nil { + // Locate the spec's PID namespace. + // If there is none, it's pid=host. + // If there is one and it has a path, it's "ns:". + // If there is no path, it's default - the empty string. + for _, ns := range ctrSpec.Linux.Namespaces { + if ns.Type == spec.PIDNamespace { + if ns.Path != "" { + pidMode = fmt.Sprintf("ns:%s", ns.Path) + } else { + pidMode = "private" + } + break + } + } + if pidMode == "" { + pidMode = "host" + } + } + hostConfig.PidMode = pidMode + + // UTS namespace mode + utsMode := c.NamespaceMode(spec.UTSNamespace, ctrSpec) + + hostConfig.UTSMode = utsMode + + // User namespace mode + usernsMode := "" + if c.config.UserNsCtr != "" { + usernsMode = fmt.Sprintf("container:%s", c.config.UserNsCtr) + } else if ctrSpec.Linux != nil { + // Locate the spec's user namespace. + // If there is none, it's default - the empty string. + // If there is one, it's "private" if no path, or "ns:" if + // there's a path. + + for _, ns := range ctrSpec.Linux.Namespaces { + if ns.Type == spec.UserNamespace { + if ns.Path != "" { + usernsMode = fmt.Sprintf("ns:%s", ns.Path) + } else { + usernsMode = "private" + } + } + } + } + hostConfig.UsernsMode = usernsMode + if c.config.IDMappings.UIDMap != nil && c.config.IDMappings.GIDMap != nil { + hostConfig.IDMappings = generateIDMappings(c.config.IDMappings) + } + // Devices + // Do not include if privileged - assumed that all devices will be + // included. + var err error + hostConfig.Devices, err = c.GetDevices(hostConfig.Privileged, *ctrSpec, deviceNodes) + if err != nil { + return err + } + + return nil +} diff --git a/libpod/pod_api.go b/libpod/pod_api.go index 1bd686ddc..924d43436 100644 --- a/libpod/pod_api.go +++ b/libpod/pod_api.go @@ -40,7 +40,7 @@ func (p *Pod) startInitContainers(ctx context.Context) error { icLock := initCon.lock icLock.Lock() var time *uint - if err := p.runtime.removeContainer(ctx, initCon, false, false, true, time); err != nil { + if err := p.runtime.removeContainer(ctx, initCon, false, false, true, false, time); err != nil { icLock.Unlock() return fmt.Errorf("failed to remove once init container %s: %w", initCon.ID(), err) } diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index 1f032dd6b..7b3cbadfa 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -581,7 +581,7 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Contai // be removed also if and only if the container is the sole user // Otherwise, RemoveContainer will return an error if the container is running func (r *Runtime) RemoveContainer(ctx context.Context, c *Container, force bool, removeVolume bool, timeout *uint) error { - return r.removeContainer(ctx, c, force, removeVolume, false, timeout) + return r.removeContainer(ctx, c, force, removeVolume, false, false, timeout) } // Internal function to remove a container. @@ -589,7 +589,9 @@ func (r *Runtime) RemoveContainer(ctx context.Context, c *Container, force bool, // removePod is used only when removing pods. It instructs Podman to ignore // infra container protections, and *not* remove from the database (as pod // remove will handle that). -func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, removeVolume, removePod bool, timeout *uint) error { +// ignoreDeps is *DANGEROUS* and should not be used outside of a very specific +// context (alternate pod removal code, where graph traversal is not possible). +func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, removeVolume, removePod, ignoreDeps bool, timeout *uint) error { if !c.valid { if ok, _ := r.state.HasContainer(c.ID()); !ok { // Container probably already removed @@ -618,25 +620,27 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo // pod. var pod *Pod runtime := c.runtime - if c.config.Pod != "" && !removePod { + if c.config.Pod != "" { pod, err = r.state.Pod(c.config.Pod) if err != nil { return fmt.Errorf("container %s is in pod %s, but pod cannot be retrieved: %w", c.ID(), pod.ID(), err) } - // Lock the pod while we're removing container - if pod.config.LockID == c.config.LockID { - return fmt.Errorf("container %s and pod %s share lock ID %d: %w", c.ID(), pod.ID(), c.config.LockID, define.ErrWillDeadlock) - } - pod.lock.Lock() - defer pod.lock.Unlock() - if err := pod.updatePod(); err != nil { - return err - } + if !removePod { + // Lock the pod while we're removing container + if pod.config.LockID == c.config.LockID { + return fmt.Errorf("container %s and pod %s share lock ID %d: %w", c.ID(), pod.ID(), c.config.LockID, define.ErrWillDeadlock) + } + pod.lock.Lock() + defer pod.lock.Unlock() + if err := pod.updatePod(); err != nil { + return err + } - infraID := pod.state.InfraContainerID - if c.ID() == infraID { - return fmt.Errorf("container %s is the infra container of pod %s and cannot be removed without removing the pod", c.ID(), pod.ID()) + infraID := pod.state.InfraContainerID + if c.ID() == infraID { + return fmt.Errorf("container %s is the infra container of pod %s and cannot be removed without removing the pod", c.ID(), pod.ID()) + } } } @@ -696,7 +700,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo // Check that no other containers depend on the container. // Only used if not removing a pod - pods guarantee that all // deps will be evicted at the same time. - if !removePod { + if !ignoreDeps { deps, err := r.state.ContainerInUse(c) if err != nil { return err @@ -777,13 +781,11 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo if c.config.Pod != "" { // If we're removing the pod, the container will be evicted // from the state elsewhere - if !removePod { - if err := r.state.RemoveContainerFromPod(pod, c); err != nil { - if cleanupErr == nil { - cleanupErr = err - } else { - logrus.Errorf("Removing container %s from database: %v", c.ID(), err) - } + if err := r.state.RemoveContainerFromPod(pod, c); err != nil { + if cleanupErr == nil { + cleanupErr = err + } else { + logrus.Errorf("Removing container %s from database: %v", c.ID(), err) } } } else { @@ -872,7 +874,7 @@ func (r *Runtime) evictContainer(ctx context.Context, idOrName string, removeVol if err == nil { logrus.Infof("Container %s successfully retrieved from state, attempting normal removal", id) // Assume force = true for the evict case - err = r.removeContainer(ctx, tmpCtr, true, removeVolume, false, timeout) + err = r.removeContainer(ctx, tmpCtr, true, removeVolume, false, false, timeout) if !tmpCtr.valid { // If the container is marked invalid, remove succeeded // in kicking it out of the state - no need to continue. @@ -1034,7 +1036,7 @@ func (r *Runtime) RemoveDepend(ctx context.Context, rmCtr *Container, force bool } report := reports.RmReport{Id: rmCtr.ID(), RawInput: rmCtr.ID()} - report.Err = r.removeContainer(ctx, rmCtr, force, removeVolume, false, timeout) + report.Err = r.removeContainer(ctx, rmCtr, force, removeVolume, false, false, timeout) return append(rmReports, &report), nil } diff --git a/libpod/runtime_img.go b/libpod/runtime_img.go index 5510b2af6..dacbd752f 100644 --- a/libpod/runtime_img.go +++ b/libpod/runtime_img.go @@ -47,7 +47,7 @@ func (r *Runtime) RemoveContainersForImageCallback(ctx context.Context) libimage return fmt.Errorf("removing image %s: container %s using image could not be removed: %w", imageID, ctr.ID(), err) } } else { - if err := r.removeContainer(ctx, ctr, true, false, false, timeout); err != nil { + if err := r.removeContainer(ctx, ctr, true, false, false, false, timeout); err != nil { return fmt.Errorf("removing image %s: container %s using image could not be removed: %w", imageID, ctr.ID(), err) } } diff --git a/libpod/runtime_pod_linux.go b/libpod/runtime_pod_linux.go index 3eeef69d8..24e9f3da7 100644 --- a/libpod/runtime_pod_linux.go +++ b/libpod/runtime_pod_linux.go @@ -17,6 +17,7 @@ import ( "github.com/containers/podman/v4/libpod/events" "github.com/containers/podman/v4/pkg/rootless" "github.com/containers/podman/v4/pkg/specgen" + "github.com/hashicorp/go-multierror" "github.com/sirupsen/logrus" ) @@ -191,29 +192,9 @@ func (r *Runtime) SavePod(pod *Pod) error { return nil } -func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool, timeout *uint) error { - if err := p.updatePod(); err != nil { - return err - } - - ctrs, err := r.state.PodContainers(p) - if err != nil { - return err - } - numCtrs := len(ctrs) - - // If the only running container in the pod is the pause container, remove the pod and container unconditionally. - pauseCtrID := p.state.InfraContainerID - if numCtrs == 1 && ctrs[0].ID() == pauseCtrID { - removeCtrs = true - force = true - } - if !removeCtrs && numCtrs > 0 { - return fmt.Errorf("pod %s contains containers and cannot be removed: %w", p.ID(), define.ErrCtrExists) - } - - ctrNamedVolumes := make(map[string]*ContainerNamedVolume) - +// DO NOT USE THIS FUNCTION DIRECTLY. Use removePod(), below. It will call +// removeMalformedPod() if necessary. +func (r *Runtime) removeMalformedPod(ctx context.Context, p *Pod, ctrs []*Container, force bool, timeout *uint, ctrNamedVolumes map[string]*ContainerNamedVolume) error { var removalErr error for _, ctr := range ctrs { err := func() error { @@ -231,7 +212,7 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool, ctrNamedVolumes[vol.Name] = vol } - return r.removeContainer(ctx, ctr, force, false, true, timeout) + return r.removeContainer(ctx, ctr, force, false, true, true, timeout) }() if removalErr == nil { @@ -261,6 +242,69 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool, return err } + return nil +} + +func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool, timeout *uint) error { + if err := p.updatePod(); err != nil { + return err + } + + ctrs, err := r.state.PodContainers(p) + if err != nil { + return err + } + numCtrs := len(ctrs) + + // If the only running container in the pod is the pause container, remove the pod and container unconditionally. + pauseCtrID := p.state.InfraContainerID + if numCtrs == 1 && ctrs[0].ID() == pauseCtrID { + removeCtrs = true + force = true + } + if !removeCtrs && numCtrs > 0 { + return fmt.Errorf("pod %s contains containers and cannot be removed: %w", p.ID(), define.ErrCtrExists) + } + + var removalErr error + ctrNamedVolumes := make(map[string]*ContainerNamedVolume) + + // Build a graph of all containers in the pod. + graph, err := BuildContainerGraph(ctrs) + if err != nil { + // We have to allow the pod to be removed. + // But let's only do it if force is set. + if !force { + return fmt.Errorf("cannot create container graph for pod %s: %w", p.ID(), err) + } + + removalErr = fmt.Errorf("creating container graph for pod %s failed, fell back to loop removal: %w", p.ID(), err) + + if err := r.removeMalformedPod(ctx, p, ctrs, force, timeout, ctrNamedVolumes); err != nil { + logrus.Errorf("Error creating container graph for pod %s: %v. Falling back to loop removal.", p.ID(), err) + return err + } + } else { + ctrErrors := make(map[string]error) + ctrsVisited := make(map[string]bool) + + for _, node := range graph.notDependedOnNodes { + removeNode(ctx, node, p, force, timeout, false, ctrErrors, ctrsVisited, ctrNamedVolumes) + } + + // This is gross, but I don't want to change the signature on + // removePod - especially since any change here eventually has + // to map down to one error unless we want to make a breaking + // API change. + if len(ctrErrors) > 0 { + var allErrs error + for id, err := range ctrErrors { + allErrs = multierror.Append(allErrs, fmt.Errorf("removing container %s from pod %s: %w", id, p.ID(), err)) + } + return allErrs + } + } + for volName := range ctrNamedVolumes { volume, err := r.state.Volume(volName) if err != nil && !errors.Is(err, define.ErrNoSuchVolume) { diff --git a/libpod/runtime_volume_linux.go b/libpod/runtime_volume_linux.go index c9a4a7dc1..08fdbf977 100644 --- a/libpod/runtime_volume_linux.go +++ b/libpod/runtime_volume_linux.go @@ -324,7 +324,7 @@ func (r *Runtime) removeVolume(ctx context.Context, v *Volume, force bool, timeo logrus.Debugf("Removing container %s (depends on volume %q)", ctr.ID(), v.Name()) - if err := r.removeContainer(ctx, ctr, force, false, false, timeout); err != nil { + if err := r.removeContainer(ctx, ctr, force, false, false, false, timeout); err != nil { return fmt.Errorf("removing container %s that depends on volume %s: %w", ctr.ID(), v.Name(), err) } } |