diff options
Diffstat (limited to 'libpod')
-rw-r--r-- | libpod/container_config.go | 1 | ||||
-rw-r--r-- | libpod/container_internal_linux.go | 1 | ||||
-rw-r--r-- | libpod/oci_conmon_linux.go | 201 | ||||
-rw-r--r-- | libpod/pod_internal.go | 2 | ||||
-rw-r--r-- | libpod/runtime_pod_linux.go | 30 | ||||
-rw-r--r-- | libpod/stats.go | 24 | ||||
-rw-r--r-- | libpod/util_linux.go | 27 |
7 files changed, 255 insertions, 31 deletions
diff --git a/libpod/container_config.go b/libpod/container_config.go index 45ff03d58..544c45a8c 100644 --- a/libpod/container_config.go +++ b/libpod/container_config.go @@ -424,7 +424,6 @@ type InfraInherit struct { CapDrop []string `json:"cap_drop,omitempty"` HostDeviceList []spec.LinuxDevice `json:"host_device_list,omitempty"` ImageVolumes []*specgen.ImageVolume `json:"image_volumes,omitempty"` - InfraResources *spec.LinuxResources `json:"resource_limits,omitempty"` Mounts []spec.Mount `json:"mounts,omitempty"` NoNewPrivileges bool `json:"no_new_privileges,omitempty"` OverlayVolumes []*specgen.OverlayVolume `json:"overlay_volumes,omitempty"` diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index 77b598b16..10f4eeec1 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -870,6 +870,7 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) { if err != nil { return nil, err } + g.SetLinuxCgroupsPath(cgroupPath) // Warning: CDI may alter g.Config in place. diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go index d417626dc..7a9ae7ee5 100644 --- a/libpod/oci_conmon_linux.go +++ b/libpod/oci_conmon_linux.go @@ -23,6 +23,9 @@ import ( "text/template" "time" + runcconfig "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/devices" + "github.com/containers/common/pkg/cgroups" "github.com/containers/common/pkg/config" conmonConfig "github.com/containers/conmon/runner/config" @@ -1433,9 +1436,14 @@ func (r *ConmonOCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec // TODO: This should be a switch - we are not guaranteed that // there are only 2 valid cgroup managers cgroupParent := ctr.CgroupParent() + cgroupPath := filepath.Join(ctr.config.CgroupParent, "conmon") + Resource := ctr.Spec().Linux.Resources + cgroupResources, err := GetLimits(Resource) + if err != nil { + logrus.StandardLogger().Log(logLevel, "Could not get ctr resources") + } if ctr.CgroupManager() == config.SystemdCgroupsManager { unitName := createUnitName("libpod-conmon", ctr.ID()) - realCgroupParent := cgroupParent splitParent := strings.Split(cgroupParent, "/") if strings.HasSuffix(cgroupParent, ".slice") && len(splitParent) > 1 { @@ -1447,8 +1455,7 @@ func (r *ConmonOCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec logrus.StandardLogger().Logf(logLevel, "Failed to add conmon to systemd sandbox cgroup: %v", err) } } else { - cgroupPath := filepath.Join(ctr.config.CgroupParent, "conmon") - control, err := cgroups.New(cgroupPath, &spec.LinuxResources{}) + control, err := cgroups.New(cgroupPath, &cgroupResources) if err != nil { logrus.StandardLogger().Logf(logLevel, "Failed to add conmon to cgroupfs sandbox cgroup: %v", err) } else if err := control.AddPid(cmd.Process.Pid); err != nil { @@ -1730,3 +1737,191 @@ func httpAttachNonTerminalCopy(container *net.UnixConn, http *bufio.ReadWriter, } } } + +// GetLimits converts spec resource limits to cgroup consumable limits +func GetLimits(resource *spec.LinuxResources) (runcconfig.Resources, error) { + if resource == nil { + resource = &spec.LinuxResources{} + } + final := &runcconfig.Resources{} + devs := []*devices.Rule{} + + // Devices + for _, entry := range resource.Devices { + if entry.Major == nil || entry.Minor == nil { + continue + } + runeType := 'a' + switch entry.Type { + case "b": + runeType = 'b' + case "c": + runeType = 'c' + } + + devs = append(devs, &devices.Rule{ + Type: devices.Type(runeType), + Major: *entry.Major, + Minor: *entry.Minor, + Permissions: devices.Permissions(entry.Access), + Allow: entry.Allow, + }) + } + final.Devices = devs + + // HugepageLimits + pageLimits := []*runcconfig.HugepageLimit{} + for _, entry := range resource.HugepageLimits { + pageLimits = append(pageLimits, &runcconfig.HugepageLimit{ + Pagesize: entry.Pagesize, + Limit: entry.Limit, + }) + } + final.HugetlbLimit = pageLimits + + // Networking + netPriorities := []*runcconfig.IfPrioMap{} + if resource.Network != nil { + for _, entry := range resource.Network.Priorities { + netPriorities = append(netPriorities, &runcconfig.IfPrioMap{ + Interface: entry.Name, + Priority: int64(entry.Priority), + }) + } + } + final.NetPrioIfpriomap = netPriorities + rdma := make(map[string]runcconfig.LinuxRdma) + for name, entry := range resource.Rdma { + rdma[name] = runcconfig.LinuxRdma{HcaHandles: entry.HcaHandles, HcaObjects: entry.HcaObjects} + } + final.Rdma = rdma + + // Memory + if resource.Memory != nil { + if resource.Memory.Limit != nil { + final.Memory = *resource.Memory.Limit + } + if resource.Memory.Reservation != nil { + final.MemoryReservation = *resource.Memory.Reservation + } + if resource.Memory.Swap != nil { + final.MemorySwap = *resource.Memory.Swap + } + if resource.Memory.Swappiness != nil { + final.MemorySwappiness = resource.Memory.Swappiness + } + } + + // CPU + if resource.CPU != nil { + if resource.CPU.Period != nil { + final.CpuPeriod = *resource.CPU.Period + } + if resource.CPU.Quota != nil { + final.CpuQuota = *resource.CPU.Quota + } + if resource.CPU.RealtimePeriod != nil { + final.CpuRtPeriod = *resource.CPU.RealtimePeriod + } + if resource.CPU.RealtimeRuntime != nil { + final.CpuRtRuntime = *resource.CPU.RealtimeRuntime + } + if resource.CPU.Shares != nil { + final.CpuShares = *resource.CPU.Shares + } + final.CpusetCpus = resource.CPU.Cpus + final.CpusetMems = resource.CPU.Mems + } + + // BlkIO + if resource.BlockIO != nil { + if len(resource.BlockIO.ThrottleReadBpsDevice) > 0 { + for _, entry := range resource.BlockIO.ThrottleReadBpsDevice { + throttle := &runcconfig.ThrottleDevice{} + dev := &runcconfig.BlockIODevice{ + Major: entry.Major, + Minor: entry.Minor, + } + throttle.BlockIODevice = *dev + throttle.Rate = entry.Rate + final.BlkioThrottleReadBpsDevice = append(final.BlkioThrottleReadBpsDevice, throttle) + } + } + if len(resource.BlockIO.ThrottleWriteBpsDevice) > 0 { + for _, entry := range resource.BlockIO.ThrottleWriteBpsDevice { + throttle := &runcconfig.ThrottleDevice{} + dev := &runcconfig.BlockIODevice{ + Major: entry.Major, + Minor: entry.Minor, + } + throttle.BlockIODevice = *dev + throttle.Rate = entry.Rate + final.BlkioThrottleWriteBpsDevice = append(final.BlkioThrottleWriteBpsDevice, throttle) + } + } + if len(resource.BlockIO.ThrottleReadIOPSDevice) > 0 { + for _, entry := range resource.BlockIO.ThrottleReadIOPSDevice { + throttle := &runcconfig.ThrottleDevice{} + dev := &runcconfig.BlockIODevice{ + Major: entry.Major, + Minor: entry.Minor, + } + throttle.BlockIODevice = *dev + throttle.Rate = entry.Rate + final.BlkioThrottleReadIOPSDevice = append(final.BlkioThrottleReadIOPSDevice, throttle) + } + } + if len(resource.BlockIO.ThrottleWriteIOPSDevice) > 0 { + for _, entry := range resource.BlockIO.ThrottleWriteIOPSDevice { + throttle := &runcconfig.ThrottleDevice{} + dev := &runcconfig.BlockIODevice{ + Major: entry.Major, + Minor: entry.Minor, + } + throttle.BlockIODevice = *dev + throttle.Rate = entry.Rate + final.BlkioThrottleWriteIOPSDevice = append(final.BlkioThrottleWriteIOPSDevice, throttle) + } + } + if resource.BlockIO.LeafWeight != nil { + final.BlkioLeafWeight = *resource.BlockIO.LeafWeight + } + if resource.BlockIO.Weight != nil { + final.BlkioWeight = *resource.BlockIO.Weight + } + if len(resource.BlockIO.WeightDevice) > 0 { + for _, entry := range resource.BlockIO.WeightDevice { + weight := &runcconfig.WeightDevice{} + dev := &runcconfig.BlockIODevice{ + Major: entry.Major, + Minor: entry.Minor, + } + if entry.Weight != nil { + weight.Weight = *entry.Weight + } + if entry.LeafWeight != nil { + weight.LeafWeight = *entry.LeafWeight + } + weight.BlockIODevice = *dev + final.BlkioWeightDevice = append(final.BlkioWeightDevice, weight) + } + } + } + + // Pids + if resource.Pids != nil { + final.PidsLimit = resource.Pids.Limit + } + + // Networking + if resource.Network != nil { + if resource.Network.ClassID != nil { + final.NetClsClassid = *resource.Network.ClassID + } + } + + // Unified state + final.Unified = resource.Unified + + return *final, nil +} diff --git a/libpod/pod_internal.go b/libpod/pod_internal.go index 41f745e6c..1502bcb06 100644 --- a/libpod/pod_internal.go +++ b/libpod/pod_internal.go @@ -69,7 +69,7 @@ func (p *Pod) refresh() error { if p.config.UsePodCgroup { switch p.runtime.config.Engine.CgroupManager { case config.SystemdCgroupsManager: - cgroupPath, err := systemdSliceFromPath(p.config.CgroupParent, fmt.Sprintf("libpod_pod_%s", p.ID())) + cgroupPath, err := systemdSliceFromPath(p.config.CgroupParent, fmt.Sprintf("libpod_pod_%s", p.ID()), p.ResourceLim()) if err != nil { logrus.Errorf("Creating Cgroup for pod %s: %v", p.ID(), err) } diff --git a/libpod/runtime_pod_linux.go b/libpod/runtime_pod_linux.go index dcc3a044f..d75ac2971 100644 --- a/libpod/runtime_pod_linux.go +++ b/libpod/runtime_pod_linux.go @@ -17,7 +17,7 @@ import ( "github.com/containers/podman/v4/libpod/events" "github.com/containers/podman/v4/pkg/rootless" "github.com/containers/podman/v4/pkg/specgen" - spec "github.com/opencontainers/runtime-spec/specs-go" + runcconfig "github.com/opencontainers/runc/libcontainer/configs" "github.com/pkg/errors" "github.com/sirupsen/logrus" ) @@ -66,6 +66,7 @@ func (r *Runtime) NewPod(ctx context.Context, p specgen.PodSpecGenerator, option case config.CgroupfsCgroupsManager: canUseCgroup := !rootless.IsRootless() || isRootlessCgroupSet(pod.config.CgroupParent) if canUseCgroup { + // need to actually create parent here if pod.config.CgroupParent == "" { pod.config.CgroupParent = CgroupfsDefaultCgroupParent } else if strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") { @@ -73,12 +74,26 @@ func (r *Runtime) NewPod(ctx context.Context, p specgen.PodSpecGenerator, option } // If we are set to use pod cgroups, set the cgroup parent that // all containers in the pod will share - // No need to create it with cgroupfs - the first container to - // launch should do it for us if pod.config.UsePodCgroup { pod.state.CgroupPath = filepath.Join(pod.config.CgroupParent, pod.ID()) if p.InfraContainerSpec != nil { p.InfraContainerSpec.CgroupParent = pod.state.CgroupPath + res, err := GetLimits(p.InfraContainerSpec.ResourceLimits) + if err != nil { + return nil, err + } + // Need to both create and update the cgroup + // rather than create a new path in c/common for pod cgroup creation + // just create as if it is a ctr and then update figures out that we need to + // populate the resource limits on the pod level + cgc, err := cgroups.New(pod.state.CgroupPath, &res) + if err != nil { + return nil, err + } + err = cgc.Update(&res) + if err != nil { + return nil, err + } } } } @@ -95,7 +110,7 @@ func (r *Runtime) NewPod(ctx context.Context, p specgen.PodSpecGenerator, option // If we are set to use pod cgroups, set the cgroup parent that // all containers in the pod will share if pod.config.UsePodCgroup { - cgroupPath, err := systemdSliceFromPath(pod.config.CgroupParent, fmt.Sprintf("libpod_pod_%s", pod.ID())) + cgroupPath, err := systemdSliceFromPath(pod.config.CgroupParent, fmt.Sprintf("libpod_pod_%s", pod.ID()), p.InfraContainerSpec.ResourceLimits) if err != nil { return nil, errors.Wrapf(err, "unable to create pod cgroup for pod %s", pod.ID()) } @@ -239,9 +254,8 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool, } // New resource limits - resLimits := new(spec.LinuxResources) - resLimits.Pids = new(spec.LinuxPids) - resLimits.Pids.Limit = 1 // Inhibit forks with very low pids limit + resLimits := new(runcconfig.Resources) + resLimits.PidsLimit = 1 // Inhibit forks with very low pids limit // Don't try if we failed to retrieve the cgroup if err == nil { @@ -321,7 +335,7 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool, switch p.runtime.config.Engine.CgroupManager { case config.SystemdCgroupsManager: - if err := deleteSystemdCgroup(p.state.CgroupPath); err != nil { + if err := deleteSystemdCgroup(p.state.CgroupPath, p.ResourceLim()); err != nil { if removalErr == nil { removalErr = errors.Wrapf(err, "error removing pod %s cgroup", p.ID()) } else { diff --git a/libpod/stats.go b/libpod/stats.go index d2ffc3b32..eaac9d7d0 100644 --- a/libpod/stats.go +++ b/libpod/stats.go @@ -9,6 +9,8 @@ import ( "syscall" "time" + runccgroup "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/containers/common/pkg/cgroups" "github.com/containers/podman/v4/libpod/define" "github.com/pkg/errors" @@ -69,29 +71,29 @@ func (c *Container) GetContainerStats(previousStats *define.ContainerStats) (*de // If the current total usage in the cgroup is less than what was previously // recorded then it means the container was restarted and runs in a new cgroup - if previousStats.Duration > cgroupStats.CPU.Usage.Total { + if previousStats.Duration > cgroupStats.CpuStats.CpuUsage.TotalUsage { previousStats = &define.ContainerStats{} } previousCPU := previousStats.CPUNano now := uint64(time.Now().UnixNano()) - stats.Duration = cgroupStats.CPU.Usage.Total + stats.Duration = cgroupStats.CpuStats.CpuUsage.TotalUsage stats.UpTime = time.Duration(stats.Duration) stats.CPU = calculateCPUPercent(cgroupStats, previousCPU, now, previousStats.SystemNano) // calc the average cpu usage for the time the container is running stats.AvgCPU = calculateCPUPercent(cgroupStats, 0, now, uint64(c.state.StartedTime.UnixNano())) - stats.MemUsage = cgroupStats.Memory.Usage.Usage + stats.MemUsage = cgroupStats.MemoryStats.Usage.Usage stats.MemLimit = c.getMemLimit() stats.MemPerc = (float64(stats.MemUsage) / float64(stats.MemLimit)) * 100 stats.PIDs = 0 if conState == define.ContainerStateRunning || conState == define.ContainerStatePaused { - stats.PIDs = cgroupStats.Pids.Current + stats.PIDs = cgroupStats.PidsStats.Current } stats.BlockInput, stats.BlockOutput = calculateBlockIO(cgroupStats) - stats.CPUNano = cgroupStats.CPU.Usage.Total - stats.CPUSystemNano = cgroupStats.CPU.Usage.Kernel + stats.CPUNano = cgroupStats.CpuStats.CpuUsage.TotalUsage + stats.CPUSystemNano = cgroupStats.CpuStats.CpuUsage.UsageInKernelmode stats.SystemNano = now - stats.PerCPU = cgroupStats.CPU.Usage.PerCPU + stats.PerCPU = cgroupStats.CpuStats.CpuUsage.PercpuUsage // Handle case where the container is not in a network namespace if netStats != nil { stats.NetInput = netStats.TxBytes @@ -133,10 +135,10 @@ func (c *Container) getMemLimit() uint64 { // previousCPU is the last value of stats.CPU.Usage.Total measured at the time previousSystem. // (now - previousSystem) is the time delta in nanoseconds, between the measurement in previousCPU // and the updated value in stats. -func calculateCPUPercent(stats *cgroups.Metrics, previousCPU, now, previousSystem uint64) float64 { +func calculateCPUPercent(stats *runccgroup.Stats, previousCPU, now, previousSystem uint64) float64 { var ( cpuPercent = 0.0 - cpuDelta = float64(stats.CPU.Usage.Total - previousCPU) + cpuDelta = float64(stats.CpuStats.CpuUsage.TotalUsage - previousCPU) systemDelta = float64(now - previousSystem) ) if systemDelta > 0.0 && cpuDelta > 0.0 { @@ -146,8 +148,8 @@ func calculateCPUPercent(stats *cgroups.Metrics, previousCPU, now, previousSyste return cpuPercent } -func calculateBlockIO(stats *cgroups.Metrics) (read uint64, write uint64) { - for _, blkIOEntry := range stats.Blkio.IoServiceBytesRecursive { +func calculateBlockIO(stats *runccgroup.Stats) (read uint64, write uint64) { + for _, blkIOEntry := range stats.BlkioStats.IoServiceBytesRecursive { switch strings.ToLower(blkIOEntry.Op) { case "read": read += blkIOEntry.Value diff --git a/libpod/util_linux.go b/libpod/util_linux.go index fe98056dc..414d1bff9 100644 --- a/libpod/util_linux.go +++ b/libpod/util_linux.go @@ -11,6 +11,7 @@ import ( "github.com/containers/common/pkg/cgroups" "github.com/containers/podman/v4/libpod/define" "github.com/containers/podman/v4/pkg/rootless" + spec "github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/selinux/go-selinux/label" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -20,7 +21,7 @@ import ( // systemdSliceFromPath makes a new systemd slice under the given parent with // the given name. // The parent must be a slice. The name must NOT include ".slice" -func systemdSliceFromPath(parent, name string) (string, error) { +func systemdSliceFromPath(parent, name string, resources *spec.LinuxResources) (string, error) { cgroupPath, err := assembleSystemdCgroupName(parent, name) if err != nil { return "", err @@ -28,7 +29,7 @@ func systemdSliceFromPath(parent, name string) (string, error) { logrus.Debugf("Created cgroup path %s for parent %s and name %s", cgroupPath, parent, name) - if err := makeSystemdCgroup(cgroupPath); err != nil { + if err := makeSystemdCgroup(cgroupPath, resources); err != nil { return "", errors.Wrapf(err, "error creating cgroup %s", cgroupPath) } @@ -45,8 +46,12 @@ func getDefaultSystemdCgroup() string { } // makeSystemdCgroup creates a systemd Cgroup at the given location. -func makeSystemdCgroup(path string) error { - controller, err := cgroups.NewSystemd(getDefaultSystemdCgroup()) +func makeSystemdCgroup(path string, resources *spec.LinuxResources) error { + res, err := GetLimits(resources) + if err != nil { + return err + } + controller, err := cgroups.NewSystemd(getDefaultSystemdCgroup(), &res) if err != nil { return err } @@ -54,12 +59,20 @@ func makeSystemdCgroup(path string) error { if rootless.IsRootless() { return controller.CreateSystemdUserUnit(path, rootless.GetRootlessUID()) } - return controller.CreateSystemdUnit(path) + err = controller.CreateSystemdUnit(path) + if err != nil { + return err + } + return nil } // deleteSystemdCgroup deletes the systemd cgroup at the given location -func deleteSystemdCgroup(path string) error { - controller, err := cgroups.NewSystemd(getDefaultSystemdCgroup()) +func deleteSystemdCgroup(path string, resources *spec.LinuxResources) error { + res, err := GetLimits(resources) + if err != nil { + return err + } + controller, err := cgroups.NewSystemd(getDefaultSystemdCgroup(), &res) if err != nil { return err } |