diff options
author | Doug Rabson <dfr@rabson.org> | 2022-08-17 11:15:37 +0100 |
---|---|---|
committer | Doug Rabson <dfr@rabson.org> | 2022-08-18 08:05:42 +0100 |
commit | d43fac20f3025096cdfe45ae32f41886b39e4659 (patch) | |
tree | ae2a25dce29cec52471dac9944811eab7eb6af80 | |
parent | 93bad904864aa71c45b6b72d217a752c05eb254b (diff) | |
download | podman-d43fac20f3025096cdfe45ae32f41886b39e4659.tar.gz podman-d43fac20f3025096cdfe45ae32f41886b39e4659.tar.bz2 podman-d43fac20f3025096cdfe45ae32f41886b39e4659.zip |
libpod: Move moveConmonToCgroupAndSignal and GetLimits to oci_conmon_linux.go
[NO NEW TESTS NEEDED]
Signed-off-by: Doug Rabson <dfr@rabson.org>
-rw-r--r-- | libpod/oci_conmon_common.go | 261 | ||||
-rw-r--r-- | libpod/oci_conmon_linux.go | 267 |
2 files changed, 267 insertions, 261 deletions
diff --git a/libpod/oci_conmon_common.go b/libpod/oci_conmon_common.go index aee0c36c8..222fec9ca 100644 --- a/libpod/oci_conmon_common.go +++ b/libpod/oci_conmon_common.go @@ -23,10 +23,6 @@ import ( "text/template" "time" - runcconfig "github.com/opencontainers/runc/libcontainer/configs" - "github.com/opencontainers/runc/libcontainer/devices" - - "github.com/containers/common/pkg/cgroups" "github.com/containers/common/pkg/config" "github.com/containers/common/pkg/resize" cutil "github.com/containers/common/pkg/util" @@ -1338,75 +1334,6 @@ func startCommand(cmd *exec.Cmd, ctr *Container) error { return cmd.Start() } -// moveConmonToCgroupAndSignal gets a container's cgroupParent and moves the conmon process to that cgroup -// it then signals for conmon to start by sending nonce data down the start fd -func (r *ConmonOCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec.Cmd, startFd *os.File) error { - mustCreateCgroup := true - - if ctr.config.NoCgroups { - mustCreateCgroup = false - } - - // If cgroup creation is disabled - just signal. - switch ctr.config.CgroupsMode { - case "disabled", "no-conmon", cgroupSplit: - mustCreateCgroup = false - } - - // $INVOCATION_ID is set by systemd when running as a service. - if ctr.runtime.RemoteURI() == "" && os.Getenv("INVOCATION_ID") != "" { - mustCreateCgroup = false - } - - if mustCreateCgroup { - // Usually rootless users are not allowed to configure cgroupfs. - // There are cases though, where it is allowed, e.g. if the cgroup - // is manually configured and chowned). Avoid detecting all - // such cases and simply use a lower log level. - logLevel := logrus.WarnLevel - if rootless.IsRootless() { - logLevel = logrus.InfoLevel - } - // TODO: This should be a switch - we are not guaranteed that - // there are only 2 valid cgroup managers - cgroupParent := ctr.CgroupParent() - cgroupPath := filepath.Join(ctr.config.CgroupParent, "conmon") - Resource := ctr.Spec().Linux.Resources - cgroupResources, err := GetLimits(Resource) - if err != nil { - logrus.StandardLogger().Log(logLevel, "Could not get ctr resources") - } - if ctr.CgroupManager() == config.SystemdCgroupsManager { - unitName := createUnitName("libpod-conmon", ctr.ID()) - realCgroupParent := cgroupParent - splitParent := strings.Split(cgroupParent, "/") - if strings.HasSuffix(cgroupParent, ".slice") && len(splitParent) > 1 { - realCgroupParent = splitParent[len(splitParent)-1] - } - - logrus.Infof("Running conmon under slice %s and unitName %s", realCgroupParent, unitName) - if err := utils.RunUnderSystemdScope(cmd.Process.Pid, realCgroupParent, unitName); err != nil { - logrus.StandardLogger().Logf(logLevel, "Failed to add conmon to systemd sandbox cgroup: %v", err) - } - } else { - control, err := cgroups.New(cgroupPath, &cgroupResources) - if err != nil { - logrus.StandardLogger().Logf(logLevel, "Failed to add conmon to cgroupfs sandbox cgroup: %v", err) - } else if err := control.AddPid(cmd.Process.Pid); err != nil { - // we need to remove this defer and delete the cgroup once conmon exits - // maybe need a conmon monitor? - logrus.StandardLogger().Logf(logLevel, "Failed to add conmon to cgroupfs sandbox cgroup: %v", err) - } - } - } - - /* We set the cgroup, now the child can start creating children */ - if err := writeConmonPipeData(startFd); err != nil { - return err - } - return nil -} - // newPipe creates a unix socket pair for communication. // Returns two files - first is parent, second is child. func newPipe() (*os.File, *os.File, error) { @@ -1671,191 +1598,3 @@ func httpAttachNonTerminalCopy(container *net.UnixConn, http *bufio.ReadWriter, } } } - -// GetLimits converts spec resource limits to cgroup consumable limits -func GetLimits(resource *spec.LinuxResources) (runcconfig.Resources, error) { - if resource == nil { - resource = &spec.LinuxResources{} - } - final := &runcconfig.Resources{} - devs := []*devices.Rule{} - - // Devices - for _, entry := range resource.Devices { - if entry.Major == nil || entry.Minor == nil { - continue - } - runeType := 'a' - switch entry.Type { - case "b": - runeType = 'b' - case "c": - runeType = 'c' - } - - devs = append(devs, &devices.Rule{ - Type: devices.Type(runeType), - Major: *entry.Major, - Minor: *entry.Minor, - Permissions: devices.Permissions(entry.Access), - Allow: entry.Allow, - }) - } - final.Devices = devs - - // HugepageLimits - pageLimits := []*runcconfig.HugepageLimit{} - for _, entry := range resource.HugepageLimits { - pageLimits = append(pageLimits, &runcconfig.HugepageLimit{ - Pagesize: entry.Pagesize, - Limit: entry.Limit, - }) - } - final.HugetlbLimit = pageLimits - - // Networking - netPriorities := []*runcconfig.IfPrioMap{} - if resource.Network != nil { - for _, entry := range resource.Network.Priorities { - netPriorities = append(netPriorities, &runcconfig.IfPrioMap{ - Interface: entry.Name, - Priority: int64(entry.Priority), - }) - } - } - final.NetPrioIfpriomap = netPriorities - rdma := make(map[string]runcconfig.LinuxRdma) - for name, entry := range resource.Rdma { - rdma[name] = runcconfig.LinuxRdma{HcaHandles: entry.HcaHandles, HcaObjects: entry.HcaObjects} - } - final.Rdma = rdma - - // Memory - if resource.Memory != nil { - if resource.Memory.Limit != nil { - final.Memory = *resource.Memory.Limit - } - if resource.Memory.Reservation != nil { - final.MemoryReservation = *resource.Memory.Reservation - } - if resource.Memory.Swap != nil { - final.MemorySwap = *resource.Memory.Swap - } - if resource.Memory.Swappiness != nil { - final.MemorySwappiness = resource.Memory.Swappiness - } - } - - // CPU - if resource.CPU != nil { - if resource.CPU.Period != nil { - final.CpuPeriod = *resource.CPU.Period - } - if resource.CPU.Quota != nil { - final.CpuQuota = *resource.CPU.Quota - } - if resource.CPU.RealtimePeriod != nil { - final.CpuRtPeriod = *resource.CPU.RealtimePeriod - } - if resource.CPU.RealtimeRuntime != nil { - final.CpuRtRuntime = *resource.CPU.RealtimeRuntime - } - if resource.CPU.Shares != nil { - final.CpuShares = *resource.CPU.Shares - } - final.CpusetCpus = resource.CPU.Cpus - final.CpusetMems = resource.CPU.Mems - } - - // BlkIO - if resource.BlockIO != nil { - if len(resource.BlockIO.ThrottleReadBpsDevice) > 0 { - for _, entry := range resource.BlockIO.ThrottleReadBpsDevice { - throttle := &runcconfig.ThrottleDevice{} - dev := &runcconfig.BlockIODevice{ - Major: entry.Major, - Minor: entry.Minor, - } - throttle.BlockIODevice = *dev - throttle.Rate = entry.Rate - final.BlkioThrottleReadBpsDevice = append(final.BlkioThrottleReadBpsDevice, throttle) - } - } - if len(resource.BlockIO.ThrottleWriteBpsDevice) > 0 { - for _, entry := range resource.BlockIO.ThrottleWriteBpsDevice { - throttle := &runcconfig.ThrottleDevice{} - dev := &runcconfig.BlockIODevice{ - Major: entry.Major, - Minor: entry.Minor, - } - throttle.BlockIODevice = *dev - throttle.Rate = entry.Rate - final.BlkioThrottleWriteBpsDevice = append(final.BlkioThrottleWriteBpsDevice, throttle) - } - } - if len(resource.BlockIO.ThrottleReadIOPSDevice) > 0 { - for _, entry := range resource.BlockIO.ThrottleReadIOPSDevice { - throttle := &runcconfig.ThrottleDevice{} - dev := &runcconfig.BlockIODevice{ - Major: entry.Major, - Minor: entry.Minor, - } - throttle.BlockIODevice = *dev - throttle.Rate = entry.Rate - final.BlkioThrottleReadIOPSDevice = append(final.BlkioThrottleReadIOPSDevice, throttle) - } - } - if len(resource.BlockIO.ThrottleWriteIOPSDevice) > 0 { - for _, entry := range resource.BlockIO.ThrottleWriteIOPSDevice { - throttle := &runcconfig.ThrottleDevice{} - dev := &runcconfig.BlockIODevice{ - Major: entry.Major, - Minor: entry.Minor, - } - throttle.BlockIODevice = *dev - throttle.Rate = entry.Rate - final.BlkioThrottleWriteIOPSDevice = append(final.BlkioThrottleWriteIOPSDevice, throttle) - } - } - if resource.BlockIO.LeafWeight != nil { - final.BlkioLeafWeight = *resource.BlockIO.LeafWeight - } - if resource.BlockIO.Weight != nil { - final.BlkioWeight = *resource.BlockIO.Weight - } - if len(resource.BlockIO.WeightDevice) > 0 { - for _, entry := range resource.BlockIO.WeightDevice { - weight := &runcconfig.WeightDevice{} - dev := &runcconfig.BlockIODevice{ - Major: entry.Major, - Minor: entry.Minor, - } - if entry.Weight != nil { - weight.Weight = *entry.Weight - } - if entry.LeafWeight != nil { - weight.LeafWeight = *entry.LeafWeight - } - weight.BlockIODevice = *dev - final.BlkioWeightDevice = append(final.BlkioWeightDevice, weight) - } - } - } - - // Pids - if resource.Pids != nil { - final.PidsLimit = resource.Pids.Limit - } - - // Networking - if resource.Network != nil { - if resource.Network.ClassID != nil { - final.NetClsClassid = *resource.Network.ClassID - } - } - - // Unified state - final.Unified = resource.Unified - - return *final, nil -} diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go index ce6eaf32a..0964d4ea3 100644 --- a/libpod/oci_conmon_linux.go +++ b/libpod/oci_conmon_linux.go @@ -3,11 +3,21 @@ package libpod import ( "fmt" "os" + "os/exec" + "path/filepath" "runtime" "strings" + runcconfig "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/devices" + + "github.com/containers/common/pkg/cgroups" + "github.com/containers/common/pkg/config" "github.com/containers/podman/v4/pkg/errorhandling" + "github.com/containers/podman/v4/pkg/rootless" + "github.com/containers/podman/v4/utils" pmount "github.com/containers/storage/pkg/mount" + spec "github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/selinux/go-selinux/label" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" @@ -89,3 +99,260 @@ func (r *ConmonOCIRuntime) withContainerSocketLabel(ctr *Container, closure func } return err } + +// moveConmonToCgroupAndSignal gets a container's cgroupParent and moves the conmon process to that cgroup +// it then signals for conmon to start by sending nonce data down the start fd +func (r *ConmonOCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec.Cmd, startFd *os.File) error { + mustCreateCgroup := true + + if ctr.config.NoCgroups { + mustCreateCgroup = false + } + + // If cgroup creation is disabled - just signal. + switch ctr.config.CgroupsMode { + case "disabled", "no-conmon", cgroupSplit: + mustCreateCgroup = false + } + + // $INVOCATION_ID is set by systemd when running as a service. + if ctr.runtime.RemoteURI() == "" && os.Getenv("INVOCATION_ID") != "" { + mustCreateCgroup = false + } + + if mustCreateCgroup { + // Usually rootless users are not allowed to configure cgroupfs. + // There are cases though, where it is allowed, e.g. if the cgroup + // is manually configured and chowned). Avoid detecting all + // such cases and simply use a lower log level. + logLevel := logrus.WarnLevel + if rootless.IsRootless() { + logLevel = logrus.InfoLevel + } + // TODO: This should be a switch - we are not guaranteed that + // there are only 2 valid cgroup managers + cgroupParent := ctr.CgroupParent() + cgroupPath := filepath.Join(ctr.config.CgroupParent, "conmon") + Resource := ctr.Spec().Linux.Resources + cgroupResources, err := GetLimits(Resource) + if err != nil { + logrus.StandardLogger().Log(logLevel, "Could not get ctr resources") + } + if ctr.CgroupManager() == config.SystemdCgroupsManager { + unitName := createUnitName("libpod-conmon", ctr.ID()) + realCgroupParent := cgroupParent + splitParent := strings.Split(cgroupParent, "/") + if strings.HasSuffix(cgroupParent, ".slice") && len(splitParent) > 1 { + realCgroupParent = splitParent[len(splitParent)-1] + } + + logrus.Infof("Running conmon under slice %s and unitName %s", realCgroupParent, unitName) + if err := utils.RunUnderSystemdScope(cmd.Process.Pid, realCgroupParent, unitName); err != nil { + logrus.StandardLogger().Logf(logLevel, "Failed to add conmon to systemd sandbox cgroup: %v", err) + } + } else { + control, err := cgroups.New(cgroupPath, &cgroupResources) + if err != nil { + logrus.StandardLogger().Logf(logLevel, "Failed to add conmon to cgroupfs sandbox cgroup: %v", err) + } else if err := control.AddPid(cmd.Process.Pid); err != nil { + // we need to remove this defer and delete the cgroup once conmon exits + // maybe need a conmon monitor? + logrus.StandardLogger().Logf(logLevel, "Failed to add conmon to cgroupfs sandbox cgroup: %v", err) + } + } + } + + /* We set the cgroup, now the child can start creating children */ + if err := writeConmonPipeData(startFd); err != nil { + return err + } + return nil +} + +// GetLimits converts spec resource limits to cgroup consumable limits +func GetLimits(resource *spec.LinuxResources) (runcconfig.Resources, error) { + if resource == nil { + resource = &spec.LinuxResources{} + } + final := &runcconfig.Resources{} + devs := []*devices.Rule{} + + // Devices + for _, entry := range resource.Devices { + if entry.Major == nil || entry.Minor == nil { + continue + } + runeType := 'a' + switch entry.Type { + case "b": + runeType = 'b' + case "c": + runeType = 'c' + } + + devs = append(devs, &devices.Rule{ + Type: devices.Type(runeType), + Major: *entry.Major, + Minor: *entry.Minor, + Permissions: devices.Permissions(entry.Access), + Allow: entry.Allow, + }) + } + final.Devices = devs + + // HugepageLimits + pageLimits := []*runcconfig.HugepageLimit{} + for _, entry := range resource.HugepageLimits { + pageLimits = append(pageLimits, &runcconfig.HugepageLimit{ + Pagesize: entry.Pagesize, + Limit: entry.Limit, + }) + } + final.HugetlbLimit = pageLimits + + // Networking + netPriorities := []*runcconfig.IfPrioMap{} + if resource.Network != nil { + for _, entry := range resource.Network.Priorities { + netPriorities = append(netPriorities, &runcconfig.IfPrioMap{ + Interface: entry.Name, + Priority: int64(entry.Priority), + }) + } + } + final.NetPrioIfpriomap = netPriorities + rdma := make(map[string]runcconfig.LinuxRdma) + for name, entry := range resource.Rdma { + rdma[name] = runcconfig.LinuxRdma{HcaHandles: entry.HcaHandles, HcaObjects: entry.HcaObjects} + } + final.Rdma = rdma + + // Memory + if resource.Memory != nil { + if resource.Memory.Limit != nil { + final.Memory = *resource.Memory.Limit + } + if resource.Memory.Reservation != nil { + final.MemoryReservation = *resource.Memory.Reservation + } + if resource.Memory.Swap != nil { + final.MemorySwap = *resource.Memory.Swap + } + if resource.Memory.Swappiness != nil { + final.MemorySwappiness = resource.Memory.Swappiness + } + } + + // CPU + if resource.CPU != nil { + if resource.CPU.Period != nil { + final.CpuPeriod = *resource.CPU.Period + } + if resource.CPU.Quota != nil { + final.CpuQuota = *resource.CPU.Quota + } + if resource.CPU.RealtimePeriod != nil { + final.CpuRtPeriod = *resource.CPU.RealtimePeriod + } + if resource.CPU.RealtimeRuntime != nil { + final.CpuRtRuntime = *resource.CPU.RealtimeRuntime + } + if resource.CPU.Shares != nil { + final.CpuShares = *resource.CPU.Shares + } + final.CpusetCpus = resource.CPU.Cpus + final.CpusetMems = resource.CPU.Mems + } + + // BlkIO + if resource.BlockIO != nil { + if len(resource.BlockIO.ThrottleReadBpsDevice) > 0 { + for _, entry := range resource.BlockIO.ThrottleReadBpsDevice { + throttle := &runcconfig.ThrottleDevice{} + dev := &runcconfig.BlockIODevice{ + Major: entry.Major, + Minor: entry.Minor, + } + throttle.BlockIODevice = *dev + throttle.Rate = entry.Rate + final.BlkioThrottleReadBpsDevice = append(final.BlkioThrottleReadBpsDevice, throttle) + } + } + if len(resource.BlockIO.ThrottleWriteBpsDevice) > 0 { + for _, entry := range resource.BlockIO.ThrottleWriteBpsDevice { + throttle := &runcconfig.ThrottleDevice{} + dev := &runcconfig.BlockIODevice{ + Major: entry.Major, + Minor: entry.Minor, + } + throttle.BlockIODevice = *dev + throttle.Rate = entry.Rate + final.BlkioThrottleWriteBpsDevice = append(final.BlkioThrottleWriteBpsDevice, throttle) + } + } + if len(resource.BlockIO.ThrottleReadIOPSDevice) > 0 { + for _, entry := range resource.BlockIO.ThrottleReadIOPSDevice { + throttle := &runcconfig.ThrottleDevice{} + dev := &runcconfig.BlockIODevice{ + Major: entry.Major, + Minor: entry.Minor, + } + throttle.BlockIODevice = *dev + throttle.Rate = entry.Rate + final.BlkioThrottleReadIOPSDevice = append(final.BlkioThrottleReadIOPSDevice, throttle) + } + } + if len(resource.BlockIO.ThrottleWriteIOPSDevice) > 0 { + for _, entry := range resource.BlockIO.ThrottleWriteIOPSDevice { + throttle := &runcconfig.ThrottleDevice{} + dev := &runcconfig.BlockIODevice{ + Major: entry.Major, + Minor: entry.Minor, + } + throttle.BlockIODevice = *dev + throttle.Rate = entry.Rate + final.BlkioThrottleWriteIOPSDevice = append(final.BlkioThrottleWriteIOPSDevice, throttle) + } + } + if resource.BlockIO.LeafWeight != nil { + final.BlkioLeafWeight = *resource.BlockIO.LeafWeight + } + if resource.BlockIO.Weight != nil { + final.BlkioWeight = *resource.BlockIO.Weight + } + if len(resource.BlockIO.WeightDevice) > 0 { + for _, entry := range resource.BlockIO.WeightDevice { + weight := &runcconfig.WeightDevice{} + dev := &runcconfig.BlockIODevice{ + Major: entry.Major, + Minor: entry.Minor, + } + if entry.Weight != nil { + weight.Weight = *entry.Weight + } + if entry.LeafWeight != nil { + weight.LeafWeight = *entry.LeafWeight + } + weight.BlockIODevice = *dev + final.BlkioWeightDevice = append(final.BlkioWeightDevice, weight) + } + } + } + + // Pids + if resource.Pids != nil { + final.PidsLimit = resource.Pids.Limit + } + + // Networking + if resource.Network != nil { + if resource.Network.ClassID != nil { + final.NetClsClassid = *resource.Network.ClassID + } + } + + // Unified state + final.Unified = resource.Unified + + return *final, nil +} |