diff options
author | Doug Rabson <dfr@rabson.org> | 2022-08-26 10:04:18 +0100 |
---|---|---|
committer | Doug Rabson <dfr@rabson.org> | 2022-08-30 11:23:58 +0100 |
commit | 39880670cdde8482baf69e6a0f98b33bf34669f3 (patch) | |
tree | 248bd711f63e9a7ee8a3f53502483351b1f74f8c | |
parent | 0166feef2a0205967eb95867a7d953f9171b27b7 (diff) | |
download | podman-39880670cdde8482baf69e6a0f98b33bf34669f3.tar.gz podman-39880670cdde8482baf69e6a0f98b33bf34669f3.tar.bz2 podman-39880670cdde8482baf69e6a0f98b33bf34669f3.zip |
specgen/generate: Move SpecGenToOCI, WeightDevices to oci_linux.go and add stubs.
Almost all of SpecGenToOCI deals with linux-specific aspects of the
runtime spec. Rather than try to factor this out piecemeal, I think it
is cleaner to move the whole function along with its implementation
helper functions. This also meams we don't need non-linux stubs for
functions called from oci_linux.go
[NO NEW TESTS NEEDED]
Signed-off-by: Doug Rabson <dfr@rabson.org>
-rw-r--r-- | pkg/specgen/generate/config_unsupported.go | 29 | ||||
-rw-r--r-- | pkg/specgen/generate/oci.go | 317 | ||||
-rw-r--r-- | pkg/specgen/generate/oci_linux.go | 331 | ||||
-rw-r--r-- | pkg/specgen/generate/oci_unsupported.go | 24 |
4 files changed, 355 insertions, 346 deletions
diff --git a/pkg/specgen/generate/config_unsupported.go b/pkg/specgen/generate/config_unsupported.go deleted file mode 100644 index a97ae0709..000000000 --- a/pkg/specgen/generate/config_unsupported.go +++ /dev/null @@ -1,29 +0,0 @@ -//go:build !linux -// +build !linux - -package generate - -import ( - "errors" - - "github.com/containers/common/libimage" - "github.com/containers/podman/v4/pkg/specgen" - spec "github.com/opencontainers/runtime-spec/specs-go" - "github.com/opencontainers/runtime-tools/generate" -) - -// DevicesFromPath computes a list of devices -func DevicesFromPath(g *generate.Generator, devicePath string) error { - return errors.New("unsupported DevicesFromPath") -} - -func BlockAccessToKernelFilesystems(privileged, pidModeIsHost bool, mask, unmask []string, g *generate.Generator) { -} - -func supportAmbientCapabilities() bool { - return false -} - -func getSeccompConfig(s *specgen.SpecGenerator, configSpec *spec.Spec, img *libimage.Image) (*spec.LinuxSeccomp, error) { - return nil, errors.New("not implemented getSeccompConfig") -} diff --git a/pkg/specgen/generate/oci.go b/pkg/specgen/generate/oci.go index a531494c9..3ac1a9b3f 100644 --- a/pkg/specgen/generate/oci.go +++ b/pkg/specgen/generate/oci.go @@ -1,37 +1,19 @@ package generate import ( - "context" - "encoding/json" "fmt" - "path" "strings" "github.com/containers/common/libimage" - "github.com/containers/common/pkg/cgroups" "github.com/containers/common/pkg/config" - "github.com/containers/podman/v4/libpod" "github.com/containers/podman/v4/libpod/define" "github.com/containers/podman/v4/pkg/rootless" "github.com/containers/podman/v4/pkg/specgen" - spec "github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/runtime-tools/generate" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" ) -func setProcOpts(s *specgen.SpecGenerator, g *generate.Generator) { - if s.ProcOpts == nil { - return - } - for i := range g.Config.Mounts { - if g.Config.Mounts[i].Destination == "/proc" { - g.Config.Mounts[i].Options = s.ProcOpts - return - } - } -} - func addRlimits(s *specgen.SpecGenerator, g *generate.Generator) { var ( isRootless = rootless.IsRootless() @@ -133,302 +115,3 @@ func makeCommand(s *specgen.SpecGenerator, imageData *libimage.ImageData, rtc *c return finalCommand, nil } - -// canMountSys is a best-effort heuristic to detect whether mounting a new sysfs is permitted in the container -func canMountSys(isRootless, isNewUserns bool, s *specgen.SpecGenerator) bool { - if s.NetNS.IsHost() && (isRootless || isNewUserns) { - return false - } - if isNewUserns { - switch s.NetNS.NSMode { - case specgen.Slirp, specgen.Private, specgen.NoNetwork, specgen.Bridge: - return true - default: - return false - } - } - return true -} - -func getCgroupPermissons(unmask []string) string { - ro := "ro" - rw := "rw" - cgroup := "/sys/fs/cgroup" - - cgroupv2, _ := cgroups.IsCgroup2UnifiedMode() - if !cgroupv2 { - return ro - } - - if unmask != nil && unmask[0] == "ALL" { - return rw - } - - for _, p := range unmask { - if path.Clean(p) == cgroup { - return rw - } - } - return ro -} - -// SpecGenToOCI returns the base configuration for the container. -func SpecGenToOCI(ctx context.Context, s *specgen.SpecGenerator, rt *libpod.Runtime, rtc *config.Config, newImage *libimage.Image, mounts []spec.Mount, pod *libpod.Pod, finalCmd []string, compatibleOptions *libpod.InfraInherit) (*spec.Spec, error) { - cgroupPerm := getCgroupPermissons(s.Unmask) - - g, err := generate.New("linux") - if err != nil { - return nil, err - } - // Remove the default /dev/shm mount to ensure we overwrite it - g.RemoveMount("/dev/shm") - g.HostSpecific = true - addCgroup := true - - isRootless := rootless.IsRootless() - isNewUserns := s.UserNS.IsContainer() || s.UserNS.IsPath() || s.UserNS.IsPrivate() - - canMountSys := canMountSys(isRootless, isNewUserns, s) - - if s.Privileged && canMountSys { - cgroupPerm = "rw" - g.RemoveMount("/sys") - sysMnt := spec.Mount{ - Destination: "/sys", - Type: "sysfs", - Source: "sysfs", - Options: []string{"rprivate", "nosuid", "noexec", "nodev", "rw"}, - } - g.AddMount(sysMnt) - } - if !canMountSys { - addCgroup = false - g.RemoveMount("/sys") - r := "ro" - if s.Privileged { - r = "rw" - } - sysMnt := spec.Mount{ - Destination: "/sys", - Type: "bind", // should we use a constant for this, like createconfig? - Source: "/sys", - Options: []string{"rprivate", "nosuid", "noexec", "nodev", r, "rbind"}, - } - g.AddMount(sysMnt) - if !s.Privileged && isRootless { - g.AddLinuxMaskedPaths("/sys/kernel") - } - } - gid5Available := true - if isRootless { - nGids, err := rootless.GetAvailableGids() - if err != nil { - return nil, err - } - gid5Available = nGids >= 5 - } - // When using a different user namespace, check that the GID 5 is mapped inside - // the container. - if gid5Available && (s.IDMappings != nil && len(s.IDMappings.GIDMap) > 0) { - mappingFound := false - for _, r := range s.IDMappings.GIDMap { - if r.ContainerID <= 5 && 5 < r.ContainerID+r.Size { - mappingFound = true - break - } - } - if !mappingFound { - gid5Available = false - } - } - if !gid5Available { - // If we have no GID mappings, the gid=5 default option would fail, so drop it. - g.RemoveMount("/dev/pts") - devPts := spec.Mount{ - Destination: "/dev/pts", - Type: "devpts", - Source: "devpts", - Options: []string{"rprivate", "nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620"}, - } - g.AddMount(devPts) - } - - inUserNS := isRootless || isNewUserns - - if inUserNS && s.IpcNS.IsHost() { - g.RemoveMount("/dev/mqueue") - devMqueue := spec.Mount{ - Destination: "/dev/mqueue", - Type: "bind", // constant ? - Source: "/dev/mqueue", - Options: []string{"bind", "nosuid", "noexec", "nodev"}, - } - g.AddMount(devMqueue) - } - if inUserNS && s.PidNS.IsHost() { - g.RemoveMount("/proc") - procMount := spec.Mount{ - Destination: "/proc", - Type: define.TypeBind, - Source: "/proc", - Options: []string{"rbind", "nosuid", "noexec", "nodev"}, - } - g.AddMount(procMount) - } - - if addCgroup { - cgroupMnt := spec.Mount{ - Destination: "/sys/fs/cgroup", - Type: "cgroup", - Source: "cgroup", - Options: []string{"rprivate", "nosuid", "noexec", "nodev", "relatime", cgroupPerm}, - } - g.AddMount(cgroupMnt) - } - - g.Config.Linux.Personality = s.Personality - - g.SetProcessCwd(s.WorkDir) - - g.SetProcessArgs(finalCmd) - - g.SetProcessTerminal(s.Terminal) - - for key, val := range s.Annotations { - g.AddAnnotation(key, val) - } - - if s.ResourceLimits != nil { - out, err := json.Marshal(s.ResourceLimits) - if err != nil { - return nil, err - } - err = json.Unmarshal(out, g.Config.Linux.Resources) - if err != nil { - return nil, err - } - g.Config.Linux.Resources = s.ResourceLimits - } - - weightDevices, err := WeightDevices(s.WeightDevice) - if err != nil { - return nil, err - } - if len(weightDevices) > 0 { - for _, dev := range weightDevices { - g.AddLinuxResourcesBlockIOWeightDevice(dev.Major, dev.Minor, *dev.Weight) - } - } - - // Devices - // set the default rule at the beginning of device configuration - if !inUserNS && !s.Privileged { - g.AddLinuxResourcesDevice(false, "", nil, nil, "rwm") - } - - var userDevices []spec.LinuxDevice - - if !s.Privileged { - // add default devices from containers.conf - for _, device := range rtc.Containers.Devices { - if err = DevicesFromPath(&g, device); err != nil { - return nil, err - } - } - if len(compatibleOptions.HostDeviceList) > 0 && len(s.Devices) == 0 { - userDevices = compatibleOptions.HostDeviceList - } else { - userDevices = s.Devices - } - // add default devices specified by caller - for _, device := range userDevices { - if err = DevicesFromPath(&g, device.Path); err != nil { - return nil, err - } - } - } - s.HostDeviceList = userDevices - - // set the devices cgroup when not running in a user namespace - if !inUserNS && !s.Privileged { - for _, dev := range s.DeviceCgroupRule { - g.AddLinuxResourcesDevice(true, dev.Type, dev.Major, dev.Minor, dev.Access) - } - } - - BlockAccessToKernelFilesystems(s.Privileged, s.PidNS.IsHost(), s.Mask, s.Unmask, &g) - - g.ClearProcessEnv() - for name, val := range s.Env { - g.AddProcessEnv(name, val) - } - - addRlimits(s, &g) - - // NAMESPACES - if err := specConfigureNamespaces(s, &g, rt, pod); err != nil { - return nil, err - } - configSpec := g.Config - - if err := securityConfigureGenerator(s, &g, newImage, rtc); err != nil { - return nil, err - } - - // BIND MOUNTS - configSpec.Mounts = SupersedeUserMounts(mounts, configSpec.Mounts) - // Process mounts to ensure correct options - if err := InitFSMounts(configSpec.Mounts); err != nil { - return nil, err - } - - // Add annotations - if configSpec.Annotations == nil { - configSpec.Annotations = make(map[string]string) - } - - if s.Remove { - configSpec.Annotations[define.InspectAnnotationAutoremove] = define.InspectResponseTrue - } else { - configSpec.Annotations[define.InspectAnnotationAutoremove] = define.InspectResponseFalse - } - - if len(s.VolumesFrom) > 0 { - configSpec.Annotations[define.InspectAnnotationVolumesFrom] = strings.Join(s.VolumesFrom, ",") - } - - if s.Privileged { - configSpec.Annotations[define.InspectAnnotationPrivileged] = define.InspectResponseTrue - } else { - configSpec.Annotations[define.InspectAnnotationPrivileged] = define.InspectResponseFalse - } - - if s.Init { - configSpec.Annotations[define.InspectAnnotationInit] = define.InspectResponseTrue - } else { - configSpec.Annotations[define.InspectAnnotationInit] = define.InspectResponseFalse - } - - if s.OOMScoreAdj != nil { - g.SetProcessOOMScoreAdj(*s.OOMScoreAdj) - } - setProcOpts(s, &g) - - return configSpec, nil -} - -func WeightDevices(wtDevices map[string]spec.LinuxWeightDevice) ([]spec.LinuxWeightDevice, error) { - devs := []spec.LinuxWeightDevice{} - for k, v := range wtDevices { - statT := unix.Stat_t{} - if err := unix.Stat(k, &statT); err != nil { - return nil, fmt.Errorf("failed to inspect '%s' in --blkio-weight-device: %w", k, err) - } - dev := new(spec.LinuxWeightDevice) - dev.Major = (int64(unix.Major(uint64(statT.Rdev)))) //nolint: unconvert - dev.Minor = (int64(unix.Minor(uint64(statT.Rdev)))) //nolint: unconvert - dev.Weight = v.Weight - devs = append(devs, *dev) - } - return devs, nil -} diff --git a/pkg/specgen/generate/oci_linux.go b/pkg/specgen/generate/oci_linux.go new file mode 100644 index 000000000..341853de5 --- /dev/null +++ b/pkg/specgen/generate/oci_linux.go @@ -0,0 +1,331 @@ +package generate + +import ( + "context" + "encoding/json" + "fmt" + "path" + "strings" + + "github.com/containers/common/libimage" + "github.com/containers/common/pkg/cgroups" + "github.com/containers/common/pkg/config" + "github.com/containers/podman/v4/libpod" + "github.com/containers/podman/v4/libpod/define" + "github.com/containers/podman/v4/pkg/rootless" + "github.com/containers/podman/v4/pkg/specgen" + spec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/runtime-tools/generate" + "golang.org/x/sys/unix" +) + +func setProcOpts(s *specgen.SpecGenerator, g *generate.Generator) { + if s.ProcOpts == nil { + return + } + for i := range g.Config.Mounts { + if g.Config.Mounts[i].Destination == "/proc" { + g.Config.Mounts[i].Options = s.ProcOpts + return + } + } +} + +// canMountSys is a best-effort heuristic to detect whether mounting a new sysfs is permitted in the container +func canMountSys(isRootless, isNewUserns bool, s *specgen.SpecGenerator) bool { + if s.NetNS.IsHost() && (isRootless || isNewUserns) { + return false + } + if isNewUserns { + switch s.NetNS.NSMode { + case specgen.Slirp, specgen.Private, specgen.NoNetwork, specgen.Bridge: + return true + default: + return false + } + } + return true +} + +func getCgroupPermissons(unmask []string) string { + ro := "ro" + rw := "rw" + cgroup := "/sys/fs/cgroup" + + cgroupv2, _ := cgroups.IsCgroup2UnifiedMode() + if !cgroupv2 { + return ro + } + + if unmask != nil && unmask[0] == "ALL" { + return rw + } + + for _, p := range unmask { + if path.Clean(p) == cgroup { + return rw + } + } + return ro +} + +// SpecGenToOCI returns the base configuration for the container. +func SpecGenToOCI(ctx context.Context, s *specgen.SpecGenerator, rt *libpod.Runtime, rtc *config.Config, newImage *libimage.Image, mounts []spec.Mount, pod *libpod.Pod, finalCmd []string, compatibleOptions *libpod.InfraInherit) (*spec.Spec, error) { + cgroupPerm := getCgroupPermissons(s.Unmask) + + g, err := generate.New("linux") + if err != nil { + return nil, err + } + // Remove the default /dev/shm mount to ensure we overwrite it + g.RemoveMount("/dev/shm") + g.HostSpecific = true + addCgroup := true + + isRootless := rootless.IsRootless() + isNewUserns := s.UserNS.IsContainer() || s.UserNS.IsPath() || s.UserNS.IsPrivate() + + canMountSys := canMountSys(isRootless, isNewUserns, s) + + if s.Privileged && canMountSys { + cgroupPerm = "rw" + g.RemoveMount("/sys") + sysMnt := spec.Mount{ + Destination: "/sys", + Type: "sysfs", + Source: "sysfs", + Options: []string{"rprivate", "nosuid", "noexec", "nodev", "rw"}, + } + g.AddMount(sysMnt) + } + if !canMountSys { + addCgroup = false + g.RemoveMount("/sys") + r := "ro" + if s.Privileged { + r = "rw" + } + sysMnt := spec.Mount{ + Destination: "/sys", + Type: "bind", // should we use a constant for this, like createconfig? + Source: "/sys", + Options: []string{"rprivate", "nosuid", "noexec", "nodev", r, "rbind"}, + } + g.AddMount(sysMnt) + if !s.Privileged && isRootless { + g.AddLinuxMaskedPaths("/sys/kernel") + } + } + gid5Available := true + if isRootless { + nGids, err := rootless.GetAvailableGids() + if err != nil { + return nil, err + } + gid5Available = nGids >= 5 + } + // When using a different user namespace, check that the GID 5 is mapped inside + // the container. + if gid5Available && (s.IDMappings != nil && len(s.IDMappings.GIDMap) > 0) { + mappingFound := false + for _, r := range s.IDMappings.GIDMap { + if r.ContainerID <= 5 && 5 < r.ContainerID+r.Size { + mappingFound = true + break + } + } + if !mappingFound { + gid5Available = false + } + } + if !gid5Available { + // If we have no GID mappings, the gid=5 default option would fail, so drop it. + g.RemoveMount("/dev/pts") + devPts := spec.Mount{ + Destination: "/dev/pts", + Type: "devpts", + Source: "devpts", + Options: []string{"rprivate", "nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620"}, + } + g.AddMount(devPts) + } + + inUserNS := isRootless || isNewUserns + + if inUserNS && s.IpcNS.IsHost() { + g.RemoveMount("/dev/mqueue") + devMqueue := spec.Mount{ + Destination: "/dev/mqueue", + Type: "bind", // constant ? + Source: "/dev/mqueue", + Options: []string{"bind", "nosuid", "noexec", "nodev"}, + } + g.AddMount(devMqueue) + } + if inUserNS && s.PidNS.IsHost() { + g.RemoveMount("/proc") + procMount := spec.Mount{ + Destination: "/proc", + Type: define.TypeBind, + Source: "/proc", + Options: []string{"rbind", "nosuid", "noexec", "nodev"}, + } + g.AddMount(procMount) + } + + if addCgroup { + cgroupMnt := spec.Mount{ + Destination: "/sys/fs/cgroup", + Type: "cgroup", + Source: "cgroup", + Options: []string{"rprivate", "nosuid", "noexec", "nodev", "relatime", cgroupPerm}, + } + g.AddMount(cgroupMnt) + } + + g.Config.Linux.Personality = s.Personality + + g.SetProcessCwd(s.WorkDir) + + g.SetProcessArgs(finalCmd) + + g.SetProcessTerminal(s.Terminal) + + for key, val := range s.Annotations { + g.AddAnnotation(key, val) + } + + if s.ResourceLimits != nil { + out, err := json.Marshal(s.ResourceLimits) + if err != nil { + return nil, err + } + err = json.Unmarshal(out, g.Config.Linux.Resources) + if err != nil { + return nil, err + } + g.Config.Linux.Resources = s.ResourceLimits + } + + weightDevices, err := WeightDevices(s.WeightDevice) + if err != nil { + return nil, err + } + if len(weightDevices) > 0 { + for _, dev := range weightDevices { + g.AddLinuxResourcesBlockIOWeightDevice(dev.Major, dev.Minor, *dev.Weight) + } + } + + // Devices + // set the default rule at the beginning of device configuration + if !inUserNS && !s.Privileged { + g.AddLinuxResourcesDevice(false, "", nil, nil, "rwm") + } + + var userDevices []spec.LinuxDevice + + if !s.Privileged { + // add default devices from containers.conf + for _, device := range rtc.Containers.Devices { + if err = DevicesFromPath(&g, device); err != nil { + return nil, err + } + } + if len(compatibleOptions.HostDeviceList) > 0 && len(s.Devices) == 0 { + userDevices = compatibleOptions.HostDeviceList + } else { + userDevices = s.Devices + } + // add default devices specified by caller + for _, device := range userDevices { + if err = DevicesFromPath(&g, device.Path); err != nil { + return nil, err + } + } + } + s.HostDeviceList = userDevices + + // set the devices cgroup when not running in a user namespace + if !inUserNS && !s.Privileged { + for _, dev := range s.DeviceCgroupRule { + g.AddLinuxResourcesDevice(true, dev.Type, dev.Major, dev.Minor, dev.Access) + } + } + + BlockAccessToKernelFilesystems(s.Privileged, s.PidNS.IsHost(), s.Mask, s.Unmask, &g) + + g.ClearProcessEnv() + for name, val := range s.Env { + g.AddProcessEnv(name, val) + } + + addRlimits(s, &g) + + // NAMESPACES + if err := specConfigureNamespaces(s, &g, rt, pod); err != nil { + return nil, err + } + configSpec := g.Config + + if err := securityConfigureGenerator(s, &g, newImage, rtc); err != nil { + return nil, err + } + + // BIND MOUNTS + configSpec.Mounts = SupersedeUserMounts(mounts, configSpec.Mounts) + // Process mounts to ensure correct options + if err := InitFSMounts(configSpec.Mounts); err != nil { + return nil, err + } + + // Add annotations + if configSpec.Annotations == nil { + configSpec.Annotations = make(map[string]string) + } + + if s.Remove { + configSpec.Annotations[define.InspectAnnotationAutoremove] = define.InspectResponseTrue + } else { + configSpec.Annotations[define.InspectAnnotationAutoremove] = define.InspectResponseFalse + } + + if len(s.VolumesFrom) > 0 { + configSpec.Annotations[define.InspectAnnotationVolumesFrom] = strings.Join(s.VolumesFrom, ",") + } + + if s.Privileged { + configSpec.Annotations[define.InspectAnnotationPrivileged] = define.InspectResponseTrue + } else { + configSpec.Annotations[define.InspectAnnotationPrivileged] = define.InspectResponseFalse + } + + if s.Init { + configSpec.Annotations[define.InspectAnnotationInit] = define.InspectResponseTrue + } else { + configSpec.Annotations[define.InspectAnnotationInit] = define.InspectResponseFalse + } + + if s.OOMScoreAdj != nil { + g.SetProcessOOMScoreAdj(*s.OOMScoreAdj) + } + setProcOpts(s, &g) + + return configSpec, nil +} + +func WeightDevices(wtDevices map[string]spec.LinuxWeightDevice) ([]spec.LinuxWeightDevice, error) { + devs := []spec.LinuxWeightDevice{} + for k, v := range wtDevices { + statT := unix.Stat_t{} + if err := unix.Stat(k, &statT); err != nil { + return nil, fmt.Errorf("failed to inspect '%s' in --blkio-weight-device: %w", k, err) + } + dev := new(spec.LinuxWeightDevice) + dev.Major = (int64(unix.Major(uint64(statT.Rdev)))) //nolint: unconvert + dev.Minor = (int64(unix.Minor(uint64(statT.Rdev)))) //nolint: unconvert + dev.Weight = v.Weight + devs = append(devs, *dev) + } + return devs, nil +} diff --git a/pkg/specgen/generate/oci_unsupported.go b/pkg/specgen/generate/oci_unsupported.go new file mode 100644 index 000000000..3902f9c9f --- /dev/null +++ b/pkg/specgen/generate/oci_unsupported.go @@ -0,0 +1,24 @@ +//go:build !linux +// +build !linux + +package generate + +import ( + "context" + "errors" + + "github.com/containers/common/libimage" + "github.com/containers/common/pkg/config" + "github.com/containers/podman/v4/libpod" + "github.com/containers/podman/v4/pkg/specgen" + spec "github.com/opencontainers/runtime-spec/specs-go" +) + +// SpecGenToOCI returns the base configuration for the container. +func SpecGenToOCI(ctx context.Context, s *specgen.SpecGenerator, rt *libpod.Runtime, rtc *config.Config, newImage *libimage.Image, mounts []spec.Mount, pod *libpod.Pod, finalCmd []string, compatibleOptions *libpod.InfraInherit) (*spec.Spec, error) { + return nil, errors.New("unsupported SpecGenToOCI") +} + +func WeightDevices(wtDevices map[string]spec.LinuxWeightDevice) ([]spec.LinuxWeightDevice, error) { + return []spec.LinuxWeightDevice{}, errors.New("unsupported WeightDevices") +} |