diff options
Diffstat (limited to 'libpod')
-rw-r--r-- | libpod/container_config.go | 1 | ||||
-rw-r--r-- | libpod/container_exec.go | 21 | ||||
-rw-r--r-- | libpod/container_internal_linux.go | 3 | ||||
-rw-r--r-- | libpod/define/volume_inspect.go | 6 | ||||
-rw-r--r-- | libpod/events.go | 10 | ||||
-rw-r--r-- | libpod/events/config.go | 4 | ||||
-rw-r--r-- | libpod/events/events.go | 4 | ||||
-rw-r--r-- | libpod/events/journal_linux.go | 2 | ||||
-rw-r--r-- | libpod/healthcheck.go | 15 | ||||
-rw-r--r-- | libpod/oci_conmon_linux.go | 201 | ||||
-rw-r--r-- | libpod/plugin/volume_api.go | 18 | ||||
-rw-r--r-- | libpod/pod_internal.go | 2 | ||||
-rw-r--r-- | libpod/runtime_ctr.go | 6 | ||||
-rw-r--r-- | libpod/runtime_pod_linux.go | 32 | ||||
-rw-r--r-- | libpod/runtime_volume.go | 2 | ||||
-rw-r--r-- | libpod/runtime_volume_linux.go | 98 | ||||
-rw-r--r-- | libpod/stats.go | 24 | ||||
-rw-r--r-- | libpod/util_linux.go | 27 |
18 files changed, 415 insertions, 61 deletions
diff --git a/libpod/container_config.go b/libpod/container_config.go index 45ff03d58..544c45a8c 100644 --- a/libpod/container_config.go +++ b/libpod/container_config.go @@ -424,7 +424,6 @@ type InfraInherit struct { CapDrop []string `json:"cap_drop,omitempty"` HostDeviceList []spec.LinuxDevice `json:"host_device_list,omitempty"` ImageVolumes []*specgen.ImageVolume `json:"image_volumes,omitempty"` - InfraResources *spec.LinuxResources `json:"resource_limits,omitempty"` Mounts []spec.Mount `json:"mounts,omitempty"` NoNewPrivileges bool `json:"no_new_privileges,omitempty"` OverlayVolumes []*specgen.OverlayVolume `json:"overlay_volumes,omitempty"` diff --git a/libpod/container_exec.go b/libpod/container_exec.go index be00c6fbe..b112273d0 100644 --- a/libpod/container_exec.go +++ b/libpod/container_exec.go @@ -277,9 +277,13 @@ func (c *Container) ExecStart(sessionID string) error { return c.save() } +func (c *Container) ExecStartAndAttach(sessionID string, streams *define.AttachStreams, newSize *define.TerminalSize) error { + return c.execStartAndAttach(sessionID, streams, newSize, false) +} + // ExecStartAndAttach starts and attaches to an exec session in a container. // newSize resizes the tty to this size before the process is started, must be nil if the exec session has no tty -func (c *Container) ExecStartAndAttach(sessionID string, streams *define.AttachStreams, newSize *define.TerminalSize) error { +func (c *Container) execStartAndAttach(sessionID string, streams *define.AttachStreams, newSize *define.TerminalSize, isHealthcheck bool) error { if !c.batched { c.lock.Lock() defer c.lock.Unlock() @@ -315,7 +319,12 @@ func (c *Container) ExecStartAndAttach(sessionID string, streams *define.AttachS return err } - c.newContainerEvent(events.Exec) + if isHealthcheck { + c.newContainerEvent(events.HealthStatus) + } else { + c.newContainerEvent(events.Exec) + } + logrus.Debugf("Successfully started exec session %s in container %s", session.ID(), c.ID()) var lastErr error @@ -743,10 +752,14 @@ func (c *Container) ExecResize(sessionID string, newSize define.TerminalSize) er return c.ociRuntime.ExecAttachResize(c, sessionID, newSize) } +func (c *Container) Exec(config *ExecConfig, streams *define.AttachStreams, resize <-chan define.TerminalSize) (int, error) { + return c.exec(config, streams, resize, false) +} + // Exec emulates the old Libpod exec API, providing a single call to create, // run, and remove an exec session. Returns exit code and error. Exit code is // not guaranteed to be set sanely if error is not nil. -func (c *Container) Exec(config *ExecConfig, streams *define.AttachStreams, resize <-chan define.TerminalSize) (int, error) { +func (c *Container) exec(config *ExecConfig, streams *define.AttachStreams, resize <-chan define.TerminalSize, isHealthcheck bool) (int, error) { sessionID, err := c.ExecCreate(config) if err != nil { return -1, err @@ -780,7 +793,7 @@ func (c *Container) Exec(config *ExecConfig, streams *define.AttachStreams, resi }() } - if err := c.ExecStartAndAttach(sessionID, streams, size); err != nil { + if err := c.execStartAndAttach(sessionID, streams, size, isHealthcheck); err != nil { return -1, err } diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index 77b598b16..0f4bf0f55 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -870,6 +870,7 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) { if err != nil { return nil, err } + g.SetLinuxCgroupsPath(cgroupPath) // Warning: CDI may alter g.Config in place. @@ -1141,7 +1142,7 @@ func (c *Container) addCheckpointImageMetadata(importBuilder *buildah.Builder) e return fmt.Errorf("getting host info: %v", err) } - criuVersion, err := criu.GetCriuVestion() + criuVersion, err := criu.GetCriuVersion() if err != nil { return fmt.Errorf("getting criu version: %v", err) } diff --git a/libpod/define/volume_inspect.go b/libpod/define/volume_inspect.go index fac179176..aaa23b4fc 100644 --- a/libpod/define/volume_inspect.go +++ b/libpod/define/volume_inspect.go @@ -57,3 +57,9 @@ type InspectVolumeData struct { // UID/GID. NeedsChown bool `json:"NeedsChown,omitempty"` } + +type VolumeReload struct { + Added []string + Removed []string + Errors []error +} diff --git a/libpod/events.go b/libpod/events.go index 021b3b53c..bb50df92d 100644 --- a/libpod/events.go +++ b/libpod/events.go @@ -33,6 +33,16 @@ func (c *Container) newContainerEvent(status events.Status) { Attributes: c.Labels(), } + // if the current event is a HealthStatus event, we need to get the current + // status of the container to pass to the event + if status == events.HealthStatus { + containerHealthStatus, err := c.healthCheckStatus() + if err != nil { + e.HealthStatus = fmt.Sprintf("%v", err) + } + e.HealthStatus = containerHealthStatus + } + if err := c.runtime.eventer.Write(e); err != nil { logrus.Errorf("Unable to write pod event: %q", err) } diff --git a/libpod/events/config.go b/libpod/events/config.go index 2e7016136..a678baa2d 100644 --- a/libpod/events/config.go +++ b/libpod/events/config.go @@ -40,6 +40,8 @@ type Event struct { Time time.Time // Type of event that occurred Type Type + // Health status of the current container + HealthStatus string `json:"health_status,omitempty"` Details } @@ -141,6 +143,8 @@ const ( Exited Status = "died" // Export ... Export Status = "export" + // HealthStatus ... + HealthStatus Status = "health_status" // History ... History Status = "history" // Import ... diff --git a/libpod/events/events.go b/libpod/events/events.go index a30e0f1ca..a8001ab95 100644 --- a/libpod/events/events.go +++ b/libpod/events/events.go @@ -76,7 +76,7 @@ func (e *Event) ToHumanReadable(truncate bool) string { } switch e.Type { case Container, Pod: - humanFormat = fmt.Sprintf("%s %s %s %s (image=%s, name=%s", e.Time, e.Type, e.Status, id, e.Image, e.Name) + humanFormat = fmt.Sprintf("%s %s %s %s (image=%s, name=%s, health_status=%s", e.Time, e.Type, e.Status, id, e.Image, e.Name, e.HealthStatus) // check if the container has labels and add it to the output if len(e.Attributes) > 0 { for k, v := range e.Attributes { @@ -168,6 +168,8 @@ func StringToStatus(name string) (Status, error) { return Exited, nil case Export.String(): return Export, nil + case HealthStatus.String(): + return HealthStatus, nil case History.String(): return History, nil case Import.String(): diff --git a/libpod/events/journal_linux.go b/libpod/events/journal_linux.go index d21b60c68..036638d34 100644 --- a/libpod/events/journal_linux.go +++ b/libpod/events/journal_linux.go @@ -58,6 +58,7 @@ func (e EventJournalD) Write(ee Event) error { } m["PODMAN_LABELS"] = string(b) } + m["PODMAN_HEALTH_STATUS"] = ee.HealthStatus case Network: m["PODMAN_ID"] = ee.ID m["PODMAN_NETWORK_NAME"] = ee.Network @@ -213,6 +214,7 @@ func newEventFromJournalEntry(entry *sdjournal.JournalEntry) (*Event, error) { newEvent.Details = Details{Attributes: labels} } } + newEvent.HealthStatus = entry.Fields["PODMAN_HEALTH_STATUS"] case Network: newEvent.ID = entry.Fields["PODMAN_ID"] newEvent.Network = entry.Fields["PODMAN_NETWORK_NAME"] diff --git a/libpod/healthcheck.go b/libpod/healthcheck.go index bd77e98c6..95c70b60e 100644 --- a/libpod/healthcheck.go +++ b/libpod/healthcheck.go @@ -90,7 +90,7 @@ func (c *Container) runHealthCheck() (define.HealthCheckStatus, error) { hcResult := define.HealthCheckSuccess config := new(ExecConfig) config.Command = newCommand - exitCode, hcErr := c.Exec(config, streams, nil) + exitCode, hcErr := c.exec(config, streams, nil, true) if hcErr != nil { errCause := errors.Cause(hcErr) hcResult = define.HealthCheckFailure @@ -232,18 +232,27 @@ func (c *Container) getHealthCheckLog() (define.HealthCheckResults, error) { // HealthCheckStatus returns the current state of a container with a healthcheck func (c *Container) HealthCheckStatus() (string, error) { + c.lock.Lock() + defer c.lock.Unlock() + return c.healthCheckStatus() +} + +// Internal function to return the current state of a container with a healthcheck. +// This function does not lock the container. +func (c *Container) healthCheckStatus() (string, error) { if !c.HasHealthCheck() { return "", errors.Errorf("container %s has no defined healthcheck", c.ID()) } - c.lock.Lock() - defer c.lock.Unlock() + if err := c.syncContainer(); err != nil { return "", err } + results, err := c.getHealthCheckLog() if err != nil { return "", errors.Wrapf(err, "unable to get healthcheck log for %s", c.ID()) } + return results.Status, nil } diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go index d417626dc..7a9ae7ee5 100644 --- a/libpod/oci_conmon_linux.go +++ b/libpod/oci_conmon_linux.go @@ -23,6 +23,9 @@ import ( "text/template" "time" + runcconfig "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/devices" + "github.com/containers/common/pkg/cgroups" "github.com/containers/common/pkg/config" conmonConfig "github.com/containers/conmon/runner/config" @@ -1433,9 +1436,14 @@ func (r *ConmonOCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec // TODO: This should be a switch - we are not guaranteed that // there are only 2 valid cgroup managers cgroupParent := ctr.CgroupParent() + cgroupPath := filepath.Join(ctr.config.CgroupParent, "conmon") + Resource := ctr.Spec().Linux.Resources + cgroupResources, err := GetLimits(Resource) + if err != nil { + logrus.StandardLogger().Log(logLevel, "Could not get ctr resources") + } if ctr.CgroupManager() == config.SystemdCgroupsManager { unitName := createUnitName("libpod-conmon", ctr.ID()) - realCgroupParent := cgroupParent splitParent := strings.Split(cgroupParent, "/") if strings.HasSuffix(cgroupParent, ".slice") && len(splitParent) > 1 { @@ -1447,8 +1455,7 @@ func (r *ConmonOCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec logrus.StandardLogger().Logf(logLevel, "Failed to add conmon to systemd sandbox cgroup: %v", err) } } else { - cgroupPath := filepath.Join(ctr.config.CgroupParent, "conmon") - control, err := cgroups.New(cgroupPath, &spec.LinuxResources{}) + control, err := cgroups.New(cgroupPath, &cgroupResources) if err != nil { logrus.StandardLogger().Logf(logLevel, "Failed to add conmon to cgroupfs sandbox cgroup: %v", err) } else if err := control.AddPid(cmd.Process.Pid); err != nil { @@ -1730,3 +1737,191 @@ func httpAttachNonTerminalCopy(container *net.UnixConn, http *bufio.ReadWriter, } } } + +// GetLimits converts spec resource limits to cgroup consumable limits +func GetLimits(resource *spec.LinuxResources) (runcconfig.Resources, error) { + if resource == nil { + resource = &spec.LinuxResources{} + } + final := &runcconfig.Resources{} + devs := []*devices.Rule{} + + // Devices + for _, entry := range resource.Devices { + if entry.Major == nil || entry.Minor == nil { + continue + } + runeType := 'a' + switch entry.Type { + case "b": + runeType = 'b' + case "c": + runeType = 'c' + } + + devs = append(devs, &devices.Rule{ + Type: devices.Type(runeType), + Major: *entry.Major, + Minor: *entry.Minor, + Permissions: devices.Permissions(entry.Access), + Allow: entry.Allow, + }) + } + final.Devices = devs + + // HugepageLimits + pageLimits := []*runcconfig.HugepageLimit{} + for _, entry := range resource.HugepageLimits { + pageLimits = append(pageLimits, &runcconfig.HugepageLimit{ + Pagesize: entry.Pagesize, + Limit: entry.Limit, + }) + } + final.HugetlbLimit = pageLimits + + // Networking + netPriorities := []*runcconfig.IfPrioMap{} + if resource.Network != nil { + for _, entry := range resource.Network.Priorities { + netPriorities = append(netPriorities, &runcconfig.IfPrioMap{ + Interface: entry.Name, + Priority: int64(entry.Priority), + }) + } + } + final.NetPrioIfpriomap = netPriorities + rdma := make(map[string]runcconfig.LinuxRdma) + for name, entry := range resource.Rdma { + rdma[name] = runcconfig.LinuxRdma{HcaHandles: entry.HcaHandles, HcaObjects: entry.HcaObjects} + } + final.Rdma = rdma + + // Memory + if resource.Memory != nil { + if resource.Memory.Limit != nil { + final.Memory = *resource.Memory.Limit + } + if resource.Memory.Reservation != nil { + final.MemoryReservation = *resource.Memory.Reservation + } + if resource.Memory.Swap != nil { + final.MemorySwap = *resource.Memory.Swap + } + if resource.Memory.Swappiness != nil { + final.MemorySwappiness = resource.Memory.Swappiness + } + } + + // CPU + if resource.CPU != nil { + if resource.CPU.Period != nil { + final.CpuPeriod = *resource.CPU.Period + } + if resource.CPU.Quota != nil { + final.CpuQuota = *resource.CPU.Quota + } + if resource.CPU.RealtimePeriod != nil { + final.CpuRtPeriod = *resource.CPU.RealtimePeriod + } + if resource.CPU.RealtimeRuntime != nil { + final.CpuRtRuntime = *resource.CPU.RealtimeRuntime + } + if resource.CPU.Shares != nil { + final.CpuShares = *resource.CPU.Shares + } + final.CpusetCpus = resource.CPU.Cpus + final.CpusetMems = resource.CPU.Mems + } + + // BlkIO + if resource.BlockIO != nil { + if len(resource.BlockIO.ThrottleReadBpsDevice) > 0 { + for _, entry := range resource.BlockIO.ThrottleReadBpsDevice { + throttle := &runcconfig.ThrottleDevice{} + dev := &runcconfig.BlockIODevice{ + Major: entry.Major, + Minor: entry.Minor, + } + throttle.BlockIODevice = *dev + throttle.Rate = entry.Rate + final.BlkioThrottleReadBpsDevice = append(final.BlkioThrottleReadBpsDevice, throttle) + } + } + if len(resource.BlockIO.ThrottleWriteBpsDevice) > 0 { + for _, entry := range resource.BlockIO.ThrottleWriteBpsDevice { + throttle := &runcconfig.ThrottleDevice{} + dev := &runcconfig.BlockIODevice{ + Major: entry.Major, + Minor: entry.Minor, + } + throttle.BlockIODevice = *dev + throttle.Rate = entry.Rate + final.BlkioThrottleWriteBpsDevice = append(final.BlkioThrottleWriteBpsDevice, throttle) + } + } + if len(resource.BlockIO.ThrottleReadIOPSDevice) > 0 { + for _, entry := range resource.BlockIO.ThrottleReadIOPSDevice { + throttle := &runcconfig.ThrottleDevice{} + dev := &runcconfig.BlockIODevice{ + Major: entry.Major, + Minor: entry.Minor, + } + throttle.BlockIODevice = *dev + throttle.Rate = entry.Rate + final.BlkioThrottleReadIOPSDevice = append(final.BlkioThrottleReadIOPSDevice, throttle) + } + } + if len(resource.BlockIO.ThrottleWriteIOPSDevice) > 0 { + for _, entry := range resource.BlockIO.ThrottleWriteIOPSDevice { + throttle := &runcconfig.ThrottleDevice{} + dev := &runcconfig.BlockIODevice{ + Major: entry.Major, + Minor: entry.Minor, + } + throttle.BlockIODevice = *dev + throttle.Rate = entry.Rate + final.BlkioThrottleWriteIOPSDevice = append(final.BlkioThrottleWriteIOPSDevice, throttle) + } + } + if resource.BlockIO.LeafWeight != nil { + final.BlkioLeafWeight = *resource.BlockIO.LeafWeight + } + if resource.BlockIO.Weight != nil { + final.BlkioWeight = *resource.BlockIO.Weight + } + if len(resource.BlockIO.WeightDevice) > 0 { + for _, entry := range resource.BlockIO.WeightDevice { + weight := &runcconfig.WeightDevice{} + dev := &runcconfig.BlockIODevice{ + Major: entry.Major, + Minor: entry.Minor, + } + if entry.Weight != nil { + weight.Weight = *entry.Weight + } + if entry.LeafWeight != nil { + weight.LeafWeight = *entry.LeafWeight + } + weight.BlockIODevice = *dev + final.BlkioWeightDevice = append(final.BlkioWeightDevice, weight) + } + } + } + + // Pids + if resource.Pids != nil { + final.PidsLimit = resource.Pids.Limit + } + + // Networking + if resource.Network != nil { + if resource.Network.ClassID != nil { + final.NetClsClassid = *resource.Network.ClassID + } + } + + // Unified state + final.Unified = resource.Unified + + return *final, nil +} diff --git a/libpod/plugin/volume_api.go b/libpod/plugin/volume_api.go index f997ccf22..2de7db32c 100644 --- a/libpod/plugin/volume_api.go +++ b/libpod/plugin/volume_api.go @@ -197,13 +197,13 @@ func (p *VolumePlugin) verifyReachable() error { // Send a request to the volume plugin for handling. // Callers *MUST* close the response when they are done. -func (p *VolumePlugin) sendRequest(toJSON interface{}, hasBody bool, endpoint string) (*http.Response, error) { +func (p *VolumePlugin) sendRequest(toJSON interface{}, endpoint string) (*http.Response, error) { var ( reqJSON []byte err error ) - if hasBody { + if toJSON != nil { reqJSON, err = json.Marshal(toJSON) if err != nil { return nil, errors.Wrapf(err, "error marshalling request JSON for volume plugin %s endpoint %s", p.Name, endpoint) @@ -274,7 +274,7 @@ func (p *VolumePlugin) CreateVolume(req *volume.CreateRequest) error { logrus.Infof("Creating volume %s using plugin %s", req.Name, p.Name) - resp, err := p.sendRequest(req, true, createPath) + resp, err := p.sendRequest(req, createPath) if err != nil { return err } @@ -291,7 +291,7 @@ func (p *VolumePlugin) ListVolumes() ([]*volume.Volume, error) { logrus.Infof("Listing volumes using plugin %s", p.Name) - resp, err := p.sendRequest(nil, false, listPath) + resp, err := p.sendRequest(nil, listPath) if err != nil { return nil, err } @@ -326,7 +326,7 @@ func (p *VolumePlugin) GetVolume(req *volume.GetRequest) (*volume.Volume, error) logrus.Infof("Getting volume %s using plugin %s", req.Name, p.Name) - resp, err := p.sendRequest(req, true, getPath) + resp, err := p.sendRequest(req, getPath) if err != nil { return nil, err } @@ -361,7 +361,7 @@ func (p *VolumePlugin) RemoveVolume(req *volume.RemoveRequest) error { logrus.Infof("Removing volume %s using plugin %s", req.Name, p.Name) - resp, err := p.sendRequest(req, true, removePath) + resp, err := p.sendRequest(req, removePath) if err != nil { return err } @@ -382,7 +382,7 @@ func (p *VolumePlugin) GetVolumePath(req *volume.PathRequest) (string, error) { logrus.Infof("Getting volume %s path using plugin %s", req.Name, p.Name) - resp, err := p.sendRequest(req, true, hostVirtualPath) + resp, err := p.sendRequest(req, hostVirtualPath) if err != nil { return "", err } @@ -419,7 +419,7 @@ func (p *VolumePlugin) MountVolume(req *volume.MountRequest) (string, error) { logrus.Infof("Mounting volume %s using plugin %s for container %s", req.Name, p.Name, req.ID) - resp, err := p.sendRequest(req, true, mountPath) + resp, err := p.sendRequest(req, mountPath) if err != nil { return "", err } @@ -455,7 +455,7 @@ func (p *VolumePlugin) UnmountVolume(req *volume.UnmountRequest) error { logrus.Infof("Unmounting volume %s using plugin %s for container %s", req.Name, p.Name, req.ID) - resp, err := p.sendRequest(req, true, unmountPath) + resp, err := p.sendRequest(req, unmountPath) if err != nil { return err } diff --git a/libpod/pod_internal.go b/libpod/pod_internal.go index 41f745e6c..1502bcb06 100644 --- a/libpod/pod_internal.go +++ b/libpod/pod_internal.go @@ -69,7 +69,7 @@ func (p *Pod) refresh() error { if p.config.UsePodCgroup { switch p.runtime.config.Engine.CgroupManager { case config.SystemdCgroupsManager: - cgroupPath, err := systemdSliceFromPath(p.config.CgroupParent, fmt.Sprintf("libpod_pod_%s", p.ID())) + cgroupPath, err := systemdSliceFromPath(p.config.CgroupParent, fmt.Sprintf("libpod_pod_%s", p.ID()), p.ResourceLim()) if err != nil { logrus.Errorf("Creating Cgroup for pod %s: %v", p.ID(), err) } diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index a9ae9d1db..459514d47 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -502,7 +502,7 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Contai volOptions = append(volOptions, parsedOptions...) } } - newVol, err := r.newVolume(volOptions...) + newVol, err := r.newVolume(false, volOptions...) if err != nil { return nil, errors.Wrapf(err, "error creating named volume %q", vol.Name) } @@ -805,7 +805,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo if !volume.Anonymous() { continue } - if err := runtime.removeVolume(ctx, volume, false, timeout); err != nil && errors.Cause(err) != define.ErrNoSuchVolume { + if err := runtime.removeVolume(ctx, volume, false, timeout, false); err != nil && errors.Cause(err) != define.ErrNoSuchVolume { if errors.Cause(err) == define.ErrVolumeBeingUsed { // Ignore error, since podman will report original error volumesFrom, _ := c.volumesFrom() @@ -963,7 +963,7 @@ func (r *Runtime) evictContainer(ctx context.Context, idOrName string, removeVol if !volume.Anonymous() { continue } - if err := r.removeVolume(ctx, volume, false, timeout); err != nil && err != define.ErrNoSuchVolume && err != define.ErrVolumeBeingUsed { + if err := r.removeVolume(ctx, volume, false, timeout, false); err != nil && err != define.ErrNoSuchVolume && err != define.ErrVolumeBeingUsed { logrus.Errorf("Cleaning up volume (%s): %v", v, err) } } diff --git a/libpod/runtime_pod_linux.go b/libpod/runtime_pod_linux.go index dcc3a044f..00017ca21 100644 --- a/libpod/runtime_pod_linux.go +++ b/libpod/runtime_pod_linux.go @@ -17,7 +17,7 @@ import ( "github.com/containers/podman/v4/libpod/events" "github.com/containers/podman/v4/pkg/rootless" "github.com/containers/podman/v4/pkg/specgen" - spec "github.com/opencontainers/runtime-spec/specs-go" + runcconfig "github.com/opencontainers/runc/libcontainer/configs" "github.com/pkg/errors" "github.com/sirupsen/logrus" ) @@ -66,6 +66,7 @@ func (r *Runtime) NewPod(ctx context.Context, p specgen.PodSpecGenerator, option case config.CgroupfsCgroupsManager: canUseCgroup := !rootless.IsRootless() || isRootlessCgroupSet(pod.config.CgroupParent) if canUseCgroup { + // need to actually create parent here if pod.config.CgroupParent == "" { pod.config.CgroupParent = CgroupfsDefaultCgroupParent } else if strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") { @@ -73,12 +74,26 @@ func (r *Runtime) NewPod(ctx context.Context, p specgen.PodSpecGenerator, option } // If we are set to use pod cgroups, set the cgroup parent that // all containers in the pod will share - // No need to create it with cgroupfs - the first container to - // launch should do it for us if pod.config.UsePodCgroup { pod.state.CgroupPath = filepath.Join(pod.config.CgroupParent, pod.ID()) if p.InfraContainerSpec != nil { p.InfraContainerSpec.CgroupParent = pod.state.CgroupPath + res, err := GetLimits(p.InfraContainerSpec.ResourceLimits) + if err != nil { + return nil, err + } + // Need to both create and update the cgroup + // rather than create a new path in c/common for pod cgroup creation + // just create as if it is a ctr and then update figures out that we need to + // populate the resource limits on the pod level + cgc, err := cgroups.New(pod.state.CgroupPath, &res) + if err != nil { + return nil, err + } + err = cgc.Update(&res) + if err != nil { + return nil, err + } } } } @@ -95,7 +110,7 @@ func (r *Runtime) NewPod(ctx context.Context, p specgen.PodSpecGenerator, option // If we are set to use pod cgroups, set the cgroup parent that // all containers in the pod will share if pod.config.UsePodCgroup { - cgroupPath, err := systemdSliceFromPath(pod.config.CgroupParent, fmt.Sprintf("libpod_pod_%s", pod.ID())) + cgroupPath, err := systemdSliceFromPath(pod.config.CgroupParent, fmt.Sprintf("libpod_pod_%s", pod.ID()), p.InfraContainerSpec.ResourceLimits) if err != nil { return nil, errors.Wrapf(err, "unable to create pod cgroup for pod %s", pod.ID()) } @@ -239,9 +254,8 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool, } // New resource limits - resLimits := new(spec.LinuxResources) - resLimits.Pids = new(spec.LinuxPids) - resLimits.Pids.Limit = 1 // Inhibit forks with very low pids limit + resLimits := new(runcconfig.Resources) + resLimits.PidsLimit = 1 // Inhibit forks with very low pids limit // Don't try if we failed to retrieve the cgroup if err == nil { @@ -301,7 +315,7 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool, if !volume.Anonymous() { continue } - if err := r.removeVolume(ctx, volume, false, timeout); err != nil { + if err := r.removeVolume(ctx, volume, false, timeout, false); err != nil { if errors.Cause(err) == define.ErrNoSuchVolume || errors.Cause(err) == define.ErrVolumeRemoved { continue } @@ -321,7 +335,7 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool, switch p.runtime.config.Engine.CgroupManager { case config.SystemdCgroupsManager: - if err := deleteSystemdCgroup(p.state.CgroupPath); err != nil { + if err := deleteSystemdCgroup(p.state.CgroupPath, p.ResourceLim()); err != nil { if removalErr == nil { removalErr = errors.Wrapf(err, "error removing pod %s cgroup", p.ID()) } else { diff --git a/libpod/runtime_volume.go b/libpod/runtime_volume.go index 21bf8aefc..6872db21d 100644 --- a/libpod/runtime_volume.go +++ b/libpod/runtime_volume.go @@ -33,7 +33,7 @@ func (r *Runtime) RemoveVolume(ctx context.Context, v *Volume, force bool, timeo return nil } } - return r.removeVolume(ctx, v, force, timeout) + return r.removeVolume(ctx, v, force, timeout, false) } // GetVolume retrieves a volume given its full name. diff --git a/libpod/runtime_volume_linux.go b/libpod/runtime_volume_linux.go index f8788e183..877f3a1fd 100644 --- a/libpod/runtime_volume_linux.go +++ b/libpod/runtime_volume_linux.go @@ -5,6 +5,7 @@ package libpod import ( "context" + "fmt" "os" "path/filepath" "strings" @@ -25,11 +26,13 @@ func (r *Runtime) NewVolume(ctx context.Context, options ...VolumeCreateOption) if !r.valid { return nil, define.ErrRuntimeStopped } - return r.newVolume(options...) + return r.newVolume(false, options...) } -// newVolume creates a new empty volume -func (r *Runtime) newVolume(options ...VolumeCreateOption) (_ *Volume, deferredErr error) { +// newVolume creates a new empty volume with the given options. +// The createPluginVolume can be set to true to make it not create the volume in the volume plugin, +// this is required for the UpdateVolumePlugins() function. If you are not sure set this to false. +func (r *Runtime) newVolume(noCreatePluginVolume bool, options ...VolumeCreateOption) (_ *Volume, deferredErr error) { volume := newVolume(r) for _, option := range options { if err := option(volume); err != nil { @@ -83,7 +86,7 @@ func (r *Runtime) newVolume(options ...VolumeCreateOption) (_ *Volume, deferredE // Now we get conditional: we either need to make the volume in the // volume plugin, or on disk if not using a plugin. - if volume.plugin != nil { + if volume.plugin != nil && !noCreatePluginVolume { // We can't chown, or relabel, or similar the path the volume is // using, because it's not managed by us. // TODO: reevaluate this once we actually have volume plugins in @@ -164,6 +167,85 @@ func (r *Runtime) newVolume(options ...VolumeCreateOption) (_ *Volume, deferredE return volume, nil } +// UpdateVolumePlugins reads all volumes from all configured volume plugins and +// imports them into the libpod db. It also checks if existing libpod volumes +// are removed in the plugin, in this case we try to remove it from libpod. +// On errors we continue and try to do as much as possible. all errors are +// returned as array in the returned struct. +// This function has many race conditions, it is best effort but cannot guarantee +// a perfect state since plugins can be modified from the outside at any time. +func (r *Runtime) UpdateVolumePlugins(ctx context.Context) *define.VolumeReload { + var ( + added []string + removed []string + errs []error + allPluginVolumes = map[string]struct{}{} + ) + + for driverName, socket := range r.config.Engine.VolumePlugins { + driver, err := volplugin.GetVolumePlugin(driverName, socket) + if err != nil { + errs = append(errs, err) + continue + } + vols, err := driver.ListVolumes() + if err != nil { + errs = append(errs, fmt.Errorf("failed to read volumes from plugin %q: %w", driverName, err)) + continue + } + for _, vol := range vols { + allPluginVolumes[vol.Name] = struct{}{} + if _, err := r.newVolume(true, WithVolumeName(vol.Name), WithVolumeDriver(driverName)); err != nil { + // If the volume exists this is not an error, just ignore it and log. It is very likely + // that the volume from the plugin was already in our db. + if !errors.Is(err, define.ErrVolumeExists) { + errs = append(errs, err) + continue + } + logrus.Infof("Volume %q already exists: %v", vol.Name, err) + continue + } + added = append(added, vol.Name) + } + } + + libpodVolumes, err := r.state.AllVolumes() + if err != nil { + errs = append(errs, fmt.Errorf("cannot delete dangling plugin volumes: failed to read libpod volumes: %w", err)) + } + for _, vol := range libpodVolumes { + if vol.UsesVolumeDriver() { + if _, ok := allPluginVolumes[vol.Name()]; !ok { + // The volume is no longer in the plugin, lets remove it from the libpod db. + if err := r.removeVolume(ctx, vol, false, nil, true); err != nil { + if errors.Is(err, define.ErrVolumeBeingUsed) { + // Volume is still used by at least one container. This is very bad, + // the plugin no longer has this but we still need it. + errs = append(errs, fmt.Errorf("volume was removed from the plugin %q but containers still require it: %w", vol.config.Driver, err)) + continue + } + if errors.Is(err, define.ErrNoSuchVolume) || errors.Is(err, define.ErrVolumeRemoved) || errors.Is(err, define.ErrMissingPlugin) { + // Volume was already removed, no problem just ignore it and continue. + continue + } + + // some other error + errs = append(errs, err) + continue + } + // Volume was successfully removed + removed = append(removed, vol.Name()) + } + } + } + + return &define.VolumeReload{ + Added: added, + Removed: removed, + Errors: errs, + } +} + // makeVolumeInPluginIfNotExist makes a volume in the given volume plugin if it // does not already exist. func makeVolumeInPluginIfNotExist(name string, options map[string]string, plugin *volplugin.VolumePlugin) error { @@ -197,8 +279,10 @@ func makeVolumeInPluginIfNotExist(name string, options map[string]string, plugin return nil } -// removeVolume removes the specified volume from state as well tears down its mountpoint and storage -func (r *Runtime) removeVolume(ctx context.Context, v *Volume, force bool, timeout *uint) error { +// removeVolume removes the specified volume from state as well tears down its mountpoint and storage. +// ignoreVolumePlugin is used to only remove the volume from the db and not the plugin, +// this is required when the volume was already removed from the plugin, i.e. in UpdateVolumePlugins(). +func (r *Runtime) removeVolume(ctx context.Context, v *Volume, force bool, timeout *uint, ignoreVolumePlugin bool) error { if !v.valid { if ok, _ := r.state.HasVolume(v.Name()); !ok { return nil @@ -263,7 +347,7 @@ func (r *Runtime) removeVolume(ctx context.Context, v *Volume, force bool, timeo var removalErr error // If we use a volume plugin, we need to remove from the plugin. - if v.UsesVolumeDriver() { + if v.UsesVolumeDriver() && !ignoreVolumePlugin { canRemove := true // Do we have a volume driver? diff --git a/libpod/stats.go b/libpod/stats.go index d2ffc3b32..eaac9d7d0 100644 --- a/libpod/stats.go +++ b/libpod/stats.go @@ -9,6 +9,8 @@ import ( "syscall" "time" + runccgroup "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/containers/common/pkg/cgroups" "github.com/containers/podman/v4/libpod/define" "github.com/pkg/errors" @@ -69,29 +71,29 @@ func (c *Container) GetContainerStats(previousStats *define.ContainerStats) (*de // If the current total usage in the cgroup is less than what was previously // recorded then it means the container was restarted and runs in a new cgroup - if previousStats.Duration > cgroupStats.CPU.Usage.Total { + if previousStats.Duration > cgroupStats.CpuStats.CpuUsage.TotalUsage { previousStats = &define.ContainerStats{} } previousCPU := previousStats.CPUNano now := uint64(time.Now().UnixNano()) - stats.Duration = cgroupStats.CPU.Usage.Total + stats.Duration = cgroupStats.CpuStats.CpuUsage.TotalUsage stats.UpTime = time.Duration(stats.Duration) stats.CPU = calculateCPUPercent(cgroupStats, previousCPU, now, previousStats.SystemNano) // calc the average cpu usage for the time the container is running stats.AvgCPU = calculateCPUPercent(cgroupStats, 0, now, uint64(c.state.StartedTime.UnixNano())) - stats.MemUsage = cgroupStats.Memory.Usage.Usage + stats.MemUsage = cgroupStats.MemoryStats.Usage.Usage stats.MemLimit = c.getMemLimit() stats.MemPerc = (float64(stats.MemUsage) / float64(stats.MemLimit)) * 100 stats.PIDs = 0 if conState == define.ContainerStateRunning || conState == define.ContainerStatePaused { - stats.PIDs = cgroupStats.Pids.Current + stats.PIDs = cgroupStats.PidsStats.Current } stats.BlockInput, stats.BlockOutput = calculateBlockIO(cgroupStats) - stats.CPUNano = cgroupStats.CPU.Usage.Total - stats.CPUSystemNano = cgroupStats.CPU.Usage.Kernel + stats.CPUNano = cgroupStats.CpuStats.CpuUsage.TotalUsage + stats.CPUSystemNano = cgroupStats.CpuStats.CpuUsage.UsageInKernelmode stats.SystemNano = now - stats.PerCPU = cgroupStats.CPU.Usage.PerCPU + stats.PerCPU = cgroupStats.CpuStats.CpuUsage.PercpuUsage // Handle case where the container is not in a network namespace if netStats != nil { stats.NetInput = netStats.TxBytes @@ -133,10 +135,10 @@ func (c *Container) getMemLimit() uint64 { // previousCPU is the last value of stats.CPU.Usage.Total measured at the time previousSystem. // (now - previousSystem) is the time delta in nanoseconds, between the measurement in previousCPU // and the updated value in stats. -func calculateCPUPercent(stats *cgroups.Metrics, previousCPU, now, previousSystem uint64) float64 { +func calculateCPUPercent(stats *runccgroup.Stats, previousCPU, now, previousSystem uint64) float64 { var ( cpuPercent = 0.0 - cpuDelta = float64(stats.CPU.Usage.Total - previousCPU) + cpuDelta = float64(stats.CpuStats.CpuUsage.TotalUsage - previousCPU) systemDelta = float64(now - previousSystem) ) if systemDelta > 0.0 && cpuDelta > 0.0 { @@ -146,8 +148,8 @@ func calculateCPUPercent(stats *cgroups.Metrics, previousCPU, now, previousSyste return cpuPercent } -func calculateBlockIO(stats *cgroups.Metrics) (read uint64, write uint64) { - for _, blkIOEntry := range stats.Blkio.IoServiceBytesRecursive { +func calculateBlockIO(stats *runccgroup.Stats) (read uint64, write uint64) { + for _, blkIOEntry := range stats.BlkioStats.IoServiceBytesRecursive { switch strings.ToLower(blkIOEntry.Op) { case "read": read += blkIOEntry.Value diff --git a/libpod/util_linux.go b/libpod/util_linux.go index fe98056dc..414d1bff9 100644 --- a/libpod/util_linux.go +++ b/libpod/util_linux.go @@ -11,6 +11,7 @@ import ( "github.com/containers/common/pkg/cgroups" "github.com/containers/podman/v4/libpod/define" "github.com/containers/podman/v4/pkg/rootless" + spec "github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/selinux/go-selinux/label" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -20,7 +21,7 @@ import ( // systemdSliceFromPath makes a new systemd slice under the given parent with // the given name. // The parent must be a slice. The name must NOT include ".slice" -func systemdSliceFromPath(parent, name string) (string, error) { +func systemdSliceFromPath(parent, name string, resources *spec.LinuxResources) (string, error) { cgroupPath, err := assembleSystemdCgroupName(parent, name) if err != nil { return "", err @@ -28,7 +29,7 @@ func systemdSliceFromPath(parent, name string) (string, error) { logrus.Debugf("Created cgroup path %s for parent %s and name %s", cgroupPath, parent, name) - if err := makeSystemdCgroup(cgroupPath); err != nil { + if err := makeSystemdCgroup(cgroupPath, resources); err != nil { return "", errors.Wrapf(err, "error creating cgroup %s", cgroupPath) } @@ -45,8 +46,12 @@ func getDefaultSystemdCgroup() string { } // makeSystemdCgroup creates a systemd Cgroup at the given location. -func makeSystemdCgroup(path string) error { - controller, err := cgroups.NewSystemd(getDefaultSystemdCgroup()) +func makeSystemdCgroup(path string, resources *spec.LinuxResources) error { + res, err := GetLimits(resources) + if err != nil { + return err + } + controller, err := cgroups.NewSystemd(getDefaultSystemdCgroup(), &res) if err != nil { return err } @@ -54,12 +59,20 @@ func makeSystemdCgroup(path string) error { if rootless.IsRootless() { return controller.CreateSystemdUserUnit(path, rootless.GetRootlessUID()) } - return controller.CreateSystemdUnit(path) + err = controller.CreateSystemdUnit(path) + if err != nil { + return err + } + return nil } // deleteSystemdCgroup deletes the systemd cgroup at the given location -func deleteSystemdCgroup(path string) error { - controller, err := cgroups.NewSystemd(getDefaultSystemdCgroup()) +func deleteSystemdCgroup(path string, resources *spec.LinuxResources) error { + res, err := GetLimits(resources) + if err != nil { + return err + } + controller, err := cgroups.NewSystemd(getDefaultSystemdCgroup(), &res) if err != nil { return err } |