diff options
Diffstat (limited to 'pkg/spec/spec.go')
-rw-r--r-- | pkg/spec/spec.go | 593 |
1 files changed, 0 insertions, 593 deletions
diff --git a/pkg/spec/spec.go b/pkg/spec/spec.go deleted file mode 100644 index 81620997f..000000000 --- a/pkg/spec/spec.go +++ /dev/null @@ -1,593 +0,0 @@ -package createconfig - -import ( - "strings" - - "github.com/containers/common/pkg/capabilities" - cconfig "github.com/containers/common/pkg/config" - "github.com/containers/common/pkg/sysinfo" - "github.com/containers/podman/v2/libpod" - "github.com/containers/podman/v2/libpod/define" - "github.com/containers/podman/v2/pkg/cgroups" - "github.com/containers/podman/v2/pkg/env" - "github.com/containers/podman/v2/pkg/rootless" - "github.com/containers/podman/v2/pkg/util" - "github.com/docker/go-units" - "github.com/opencontainers/runc/libcontainer/user" - spec "github.com/opencontainers/runtime-spec/specs-go" - "github.com/opencontainers/runtime-tools/generate" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - "golang.org/x/sys/unix" -) - -const CpuPeriod = 100000 - -func GetAvailableGids() (int64, error) { - idMap, err := user.ParseIDMapFile("/proc/self/gid_map") - if err != nil { - return 0, err - } - count := int64(0) - for _, r := range idMap { - count += r.Count - } - return count, nil -} - -// CreateConfigToOCISpec parses information needed to create a container into an OCI runtime spec -func (config *CreateConfig) createConfigToOCISpec(runtime *libpod.Runtime, userMounts []spec.Mount) (*spec.Spec, error) { - cgroupPerm := "ro" - g, err := generate.New("linux") - if err != nil { - return nil, err - } - // Remove the default /dev/shm mount to ensure we overwrite it - g.RemoveMount("/dev/shm") - g.HostSpecific = true - addCgroup := true - canMountSys := true - - isRootless := rootless.IsRootless() - inUserNS := config.User.InNS(isRootless) - - if inUserNS && config.Network.NetMode.IsHost() { - canMountSys = false - } - - if config.Security.Privileged && canMountSys { - cgroupPerm = "rw" - g.RemoveMount("/sys") - sysMnt := spec.Mount{ - Destination: "/sys", - Type: "sysfs", - Source: "sysfs", - Options: []string{"rprivate", "nosuid", "noexec", "nodev", "rw"}, - } - g.AddMount(sysMnt) - } else if !canMountSys { - addCgroup = false - g.RemoveMount("/sys") - r := "ro" - if config.Security.Privileged { - r = "rw" - } - sysMnt := spec.Mount{ - Destination: "/sys", - Type: TypeBind, - Source: "/sys", - Options: []string{"rprivate", "nosuid", "noexec", "nodev", r, "rbind"}, - } - g.AddMount(sysMnt) - if !config.Security.Privileged && isRootless { - g.AddLinuxMaskedPaths("/sys/kernel") - } - } - var runtimeConfig *cconfig.Config - - if runtime != nil { - runtimeConfig, err = runtime.GetConfig() - if err != nil { - return nil, err - } - g.Config.Process.Capabilities.Bounding = runtimeConfig.Containers.DefaultCapabilities - sysctls, err := util.ValidateSysctls(runtimeConfig.Containers.DefaultSysctls) - if err != nil { - return nil, err - } - - for name, val := range config.Security.Sysctl { - sysctls[name] = val - } - config.Security.Sysctl = sysctls - if !util.StringInSlice("host", config.Resources.Ulimit) { - config.Resources.Ulimit = append(runtimeConfig.Containers.DefaultUlimits, config.Resources.Ulimit...) - } - if config.Resources.PidsLimit < 0 && !config.cgroupDisabled() { - config.Resources.PidsLimit = runtimeConfig.Containers.PidsLimit - } - - } else { - g.Config.Process.Capabilities.Bounding = cconfig.DefaultCapabilities - if config.Resources.PidsLimit < 0 && !config.cgroupDisabled() { - config.Resources.PidsLimit = cconfig.DefaultPidsLimit - } - } - - gid5Available := true - if isRootless { - nGids, err := GetAvailableGids() - if err != nil { - return nil, err - } - gid5Available = nGids >= 5 - } - // When using a different user namespace, check that the GID 5 is mapped inside - // the container. - if gid5Available && len(config.User.IDMappings.GIDMap) > 0 { - mappingFound := false - for _, r := range config.User.IDMappings.GIDMap { - if r.ContainerID <= 5 && 5 < r.ContainerID+r.Size { - mappingFound = true - break - } - } - if !mappingFound { - gid5Available = false - } - - } - if !gid5Available { - // If we have no GID mappings, the gid=5 default option would fail, so drop it. - g.RemoveMount("/dev/pts") - devPts := spec.Mount{ - Destination: "/dev/pts", - Type: "devpts", - Source: "devpts", - Options: []string{"rprivate", "nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620"}, - } - g.AddMount(devPts) - } - - if inUserNS && config.Ipc.IpcMode.IsHost() { - g.RemoveMount("/dev/mqueue") - devMqueue := spec.Mount{ - Destination: "/dev/mqueue", - Type: TypeBind, - Source: "/dev/mqueue", - Options: []string{"bind", "nosuid", "noexec", "nodev"}, - } - g.AddMount(devMqueue) - } - if inUserNS && config.Pid.PidMode.IsHost() { - g.RemoveMount("/proc") - procMount := spec.Mount{ - Destination: "/proc", - Type: TypeBind, - Source: "/proc", - Options: []string{"rbind", "nosuid", "noexec", "nodev"}, - } - g.AddMount(procMount) - } - - if addCgroup { - cgroupMnt := spec.Mount{ - Destination: "/sys/fs/cgroup", - Type: "cgroup", - Source: "cgroup", - Options: []string{"rprivate", "nosuid", "noexec", "nodev", "relatime", cgroupPerm}, - } - g.AddMount(cgroupMnt) - } - g.SetProcessCwd(config.WorkDir) - - ProcessArgs := make([]string, 0) - // We need to iterate the input for entrypoint because it is a []string - // but "" is a legit json input, which translates into a []string with an - // empty position. This messes up the eventual command being executed - // in the container - for _, a := range config.Entrypoint { - if len(a) > 0 { - ProcessArgs = append(ProcessArgs, a) - } - } - // Same issue as explained above for config.Entrypoint. - for _, a := range config.Command { - if len(a) > 0 { - ProcessArgs = append(ProcessArgs, a) - } - } - - g.SetProcessArgs(ProcessArgs) - g.SetProcessTerminal(config.Tty) - - for key, val := range config.Annotations { - g.AddAnnotation(key, val) - } - - addedResources := false - - // RESOURCES - MEMORY - if config.Resources.Memory != 0 { - g.SetLinuxResourcesMemoryLimit(config.Resources.Memory) - // If a swap limit is not explicitly set, also set a swap limit - // Default to double the memory limit - if config.Resources.MemorySwap == 0 { - g.SetLinuxResourcesMemorySwap(2 * config.Resources.Memory) - } - addedResources = true - } - if config.Resources.MemoryReservation != 0 { - g.SetLinuxResourcesMemoryReservation(config.Resources.MemoryReservation) - addedResources = true - } - if config.Resources.MemorySwap != 0 { - g.SetLinuxResourcesMemorySwap(config.Resources.MemorySwap) - addedResources = true - } - if config.Resources.KernelMemory != 0 { - g.SetLinuxResourcesMemoryKernel(config.Resources.KernelMemory) - addedResources = true - } - if config.Resources.MemorySwappiness != -1 { - g.SetLinuxResourcesMemorySwappiness(uint64(config.Resources.MemorySwappiness)) - addedResources = true - } - g.SetLinuxResourcesMemoryDisableOOMKiller(config.Resources.DisableOomKiller) - g.SetProcessOOMScoreAdj(config.Resources.OomScoreAdj) - - // RESOURCES - CPU - if config.Resources.CPUShares != 0 { - g.SetLinuxResourcesCPUShares(config.Resources.CPUShares) - addedResources = true - } - if config.Resources.CPUQuota != 0 { - g.SetLinuxResourcesCPUQuota(config.Resources.CPUQuota) - addedResources = true - } - if config.Resources.CPUPeriod != 0 { - g.SetLinuxResourcesCPUPeriod(config.Resources.CPUPeriod) - addedResources = true - } - if config.Resources.CPUs != 0 { - g.SetLinuxResourcesCPUPeriod(CpuPeriod) - g.SetLinuxResourcesCPUQuota(int64(config.Resources.CPUs * CpuPeriod)) - addedResources = true - } - if config.Resources.CPURtRuntime != 0 { - g.SetLinuxResourcesCPURealtimeRuntime(config.Resources.CPURtRuntime) - addedResources = true - } - if config.Resources.CPURtPeriod != 0 { - g.SetLinuxResourcesCPURealtimePeriod(config.Resources.CPURtPeriod) - addedResources = true - } - if config.Resources.CPUsetCPUs != "" { - g.SetLinuxResourcesCPUCpus(config.Resources.CPUsetCPUs) - addedResources = true - } - if config.Resources.CPUsetMems != "" { - g.SetLinuxResourcesCPUMems(config.Resources.CPUsetMems) - addedResources = true - } - - // Devices - if config.Security.Privileged { - // If privileged, we need to add all the host devices to the - // spec. We do not add the user provided ones because we are - // already adding them all. - if err := AddPrivilegedDevices(&g); err != nil { - return nil, err - } - } else { - for _, devicePath := range config.Devices { - if err := DevicesFromPath(&g, devicePath); err != nil { - return nil, err - } - } - if len(config.Resources.DeviceCgroupRules) != 0 { - if err := deviceCgroupRules(&g, config.Resources.DeviceCgroupRules); err != nil { - return nil, err - } - addedResources = true - } - } - - g.SetProcessNoNewPrivileges(config.Security.NoNewPrivs) - - if !config.Security.Privileged { - g.SetProcessApparmorProfile(config.Security.ApparmorProfile) - } - - // Unless already set via the CLI, check if we need to disable process - // labels or set the defaults. - if len(config.Security.LabelOpts) == 0 && runtimeConfig != nil { - if !runtimeConfig.Containers.EnableLabeling { - // Disabled in the config. - config.Security.LabelOpts = append(config.Security.LabelOpts, "disable") - } else if err := config.Security.SetLabelOpts(runtime, &config.Pid, &config.Ipc); err != nil { - // Defaults! - return nil, err - } - } - - BlockAccessToKernelFilesystems(config.Security.Privileged, config.Pid.PidMode.IsHost(), &g) - - // RESOURCES - PIDS - if config.Resources.PidsLimit > 0 { - // if running on rootless on a cgroupv1 machine or using the cgroupfs manager, pids - // limit is not supported. If the value is still the default - // then ignore the settings. If the caller asked for a - // non-default, then try to use it. - setPidLimit := true - if rootless.IsRootless() { - cgroup2, err := cgroups.IsCgroup2UnifiedMode() - if err != nil { - return nil, err - } - if (!cgroup2 || (runtimeConfig != nil && runtimeConfig.Engine.CgroupManager != cconfig.SystemdCgroupsManager)) && config.Resources.PidsLimit == sysinfo.GetDefaultPidsLimit() { - setPidLimit = false - } - } - if setPidLimit { - g.SetLinuxResourcesPidsLimit(config.Resources.PidsLimit) - addedResources = true - } - } - - // Make sure to always set the default variables unless overridden in the - // config. - var defaultEnv map[string]string - if runtimeConfig == nil { - defaultEnv = env.DefaultEnvVariables() - } else { - defaultEnv, err = env.ParseSlice(runtimeConfig.Containers.Env) - if err != nil { - return nil, errors.Wrap(err, "Env fields in containers.conf failed to parse") - } - defaultEnv = env.Join(env.DefaultEnvVariables(), defaultEnv) - } - - if err := addRlimits(config, &g); err != nil { - return nil, err - } - - // NAMESPACES - - if err := config.Pid.ConfigureGenerator(&g); err != nil { - return nil, err - } - - if err := config.User.ConfigureGenerator(&g); err != nil { - return nil, err - } - - if err := config.Network.ConfigureGenerator(&g); err != nil { - return nil, err - } - - if err := config.Uts.ConfigureGenerator(&g, &config.Network, runtime); err != nil { - return nil, err - } - - if err := config.Ipc.ConfigureGenerator(&g); err != nil { - return nil, err - } - - if err := config.Cgroup.ConfigureGenerator(&g); err != nil { - return nil, err - } - - config.Env = env.Join(defaultEnv, config.Env) - for name, val := range config.Env { - g.AddProcessEnv(name, val) - } - configSpec := g.Config - - // If the container image specifies an label with a - // capabilities.ContainerImageLabel then split the comma separated list - // of capabilities and record them. This list indicates the only - // capabilities, required to run the container. - var capRequired []string - for key, val := range config.Labels { - if util.StringInSlice(key, capabilities.ContainerImageLabels) { - capRequired = strings.Split(val, ",") - } - } - config.Security.CapRequired = capRequired - - if err := config.Security.ConfigureGenerator(&g, &config.User); err != nil { - return nil, err - } - - // BIND MOUNTS - configSpec.Mounts = SupercedeUserMounts(userMounts, configSpec.Mounts) - // Process mounts to ensure correct options - if err := InitFSMounts(configSpec.Mounts); err != nil { - return nil, err - } - - // BLOCK IO - blkio, err := config.CreateBlockIO() - if err != nil { - return nil, errors.Wrapf(err, "error creating block io") - } - if blkio != nil { - configSpec.Linux.Resources.BlockIO = blkio - addedResources = true - } - - if rootless.IsRootless() { - cgroup2, err := cgroups.IsCgroup2UnifiedMode() - if err != nil { - return nil, err - } - if !addedResources { - configSpec.Linux.Resources = &spec.LinuxResources{} - } - - canUseResources := cgroup2 && runtimeConfig != nil && (runtimeConfig.Engine.CgroupManager == cconfig.SystemdCgroupsManager) - - if addedResources && !canUseResources { - return nil, errors.New("invalid configuration, cannot specify resource limits without cgroups v2 and --cgroup-manager=systemd") - } - if !canUseResources { - // Force the resources block to be empty instead of having default values. - configSpec.Linux.Resources = &spec.LinuxResources{} - } - } - - switch config.Cgroup.Cgroups { - case "disabled": - if addedResources { - return nil, errors.New("cannot specify resource limits when cgroups are disabled is specified") - } - configSpec.Linux.Resources = &spec.LinuxResources{} - case "enabled", "no-conmon", "": - // Do nothing - default: - return nil, errors.New("unrecognized option for cgroups; supported are 'default', 'disabled', 'no-conmon'") - } - - // Add annotations - if configSpec.Annotations == nil { - configSpec.Annotations = make(map[string]string) - } - - if config.CidFile != "" { - configSpec.Annotations[define.InspectAnnotationCIDFile] = config.CidFile - } - - if config.Rm { - configSpec.Annotations[define.InspectAnnotationAutoremove] = define.InspectResponseTrue - } else { - configSpec.Annotations[define.InspectAnnotationAutoremove] = define.InspectResponseFalse - } - - if len(config.VolumesFrom) > 0 { - configSpec.Annotations[define.InspectAnnotationVolumesFrom] = strings.Join(config.VolumesFrom, ",") - } - - if config.Security.Privileged { - configSpec.Annotations[define.InspectAnnotationPrivileged] = define.InspectResponseTrue - } else { - configSpec.Annotations[define.InspectAnnotationPrivileged] = define.InspectResponseFalse - } - - if config.Init { - configSpec.Annotations[define.InspectAnnotationInit] = define.InspectResponseTrue - } else { - configSpec.Annotations[define.InspectAnnotationInit] = define.InspectResponseFalse - } - - return configSpec, nil -} - -func (config *CreateConfig) cgroupDisabled() bool { - return config.Cgroup.Cgroups == "disabled" -} - -func BlockAccessToKernelFilesystems(privileged, pidModeIsHost bool, g *generate.Generator) { - if !privileged { - for _, mp := range []string{ - "/proc/acpi", - "/proc/kcore", - "/proc/keys", - "/proc/latency_stats", - "/proc/timer_list", - "/proc/timer_stats", - "/proc/sched_debug", - "/proc/scsi", - "/sys/firmware", - "/sys/fs/selinux", - } { - g.AddLinuxMaskedPaths(mp) - } - - if pidModeIsHost && rootless.IsRootless() { - return - } - - for _, rp := range []string{ - "/proc/asound", - "/proc/bus", - "/proc/fs", - "/proc/irq", - "/proc/sys", - "/proc/sysrq-trigger", - } { - g.AddLinuxReadonlyPaths(rp) - } - } -} - -func addRlimits(config *CreateConfig, g *generate.Generator) error { - var ( - isRootless = rootless.IsRootless() - nofileSet = false - nprocSet = false - ) - - for _, u := range config.Resources.Ulimit { - if u == "host" { - if len(config.Resources.Ulimit) != 1 { - return errors.New("ulimit can use host only once") - } - g.Config.Process.Rlimits = nil - break - } - - ul, err := units.ParseUlimit(u) - if err != nil { - return errors.Wrapf(err, "ulimit option %q requires name=SOFT:HARD, failed to be parsed", u) - } - - if ul.Name == "nofile" { - nofileSet = true - } else if ul.Name == "nproc" { - nprocSet = true - } - - g.AddProcessRlimits("RLIMIT_"+strings.ToUpper(ul.Name), uint64(ul.Hard), uint64(ul.Soft)) - } - - // If not explicitly overridden by the user, default number of open - // files and number of processes to the maximum they can be set to - // (without overriding a sysctl) - if !nofileSet { - max := define.RLimitDefaultValue - current := define.RLimitDefaultValue - if isRootless { - var rlimit unix.Rlimit - if err := unix.Getrlimit(unix.RLIMIT_NOFILE, &rlimit); err != nil { - logrus.Warnf("failed to return RLIMIT_NOFILE ulimit %q", err) - } - if rlimit.Cur < current { - current = rlimit.Cur - } - if rlimit.Max < max { - max = rlimit.Max - } - } - g.AddProcessRlimits("RLIMIT_NOFILE", max, current) - } - if !nprocSet { - max := define.RLimitDefaultValue - current := define.RLimitDefaultValue - if isRootless { - var rlimit unix.Rlimit - if err := unix.Getrlimit(unix.RLIMIT_NPROC, &rlimit); err != nil { - logrus.Warnf("failed to return RLIMIT_NPROC ulimit %q", err) - } - if rlimit.Cur < current { - current = rlimit.Cur - } - if rlimit.Max < max { - max = rlimit.Max - } - } - g.AddProcessRlimits("RLIMIT_NPROC", max, current) - } - - return nil -} |