package generate import ( "context" "strings" "github.com/containers/common/pkg/config" "github.com/containers/libpod/v2/libpod" "github.com/containers/libpod/v2/libpod/define" "github.com/containers/libpod/v2/libpod/image" "github.com/containers/libpod/v2/pkg/rootless" "github.com/containers/libpod/v2/pkg/specgen" "github.com/opencontainers/runc/libcontainer/user" spec "github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/runtime-tools/generate" "github.com/pkg/errors" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" ) func addRlimits(s *specgen.SpecGenerator, g *generate.Generator) error { var ( kernelMax uint64 = 1048576 isRootless = rootless.IsRootless() nofileSet = false nprocSet = false ) if s.Rlimits == nil { g.Config.Process.Rlimits = nil return nil } for _, u := range s.Rlimits { name := "RLIMIT_" + strings.ToUpper(u.Type) if name == "RLIMIT_NOFILE" { nofileSet = true } else if name == "RLIMIT_NPROC" { nprocSet = true } g.AddProcessRlimits(name, u.Hard, u.Soft) } // If not explicitly overridden by the user, default number of open // files and number of processes to the maximum they can be set to // (without overriding a sysctl) if !nofileSet { max := kernelMax current := kernelMax if isRootless { var rlimit unix.Rlimit if err := unix.Getrlimit(unix.RLIMIT_NOFILE, &rlimit); err != nil { logrus.Warnf("failed to return RLIMIT_NOFILE ulimit %q", err) } if rlimit.Cur < current { current = rlimit.Cur } if rlimit.Max < max { max = rlimit.Max } } g.AddProcessRlimits("RLIMIT_NOFILE", max, current) } if !nprocSet { max := kernelMax current := kernelMax if isRootless { var rlimit unix.Rlimit if err := unix.Getrlimit(unix.RLIMIT_NPROC, &rlimit); err != nil { logrus.Warnf("failed to return RLIMIT_NPROC ulimit %q", err) } if rlimit.Cur < current { current = rlimit.Cur } if rlimit.Max < max { max = rlimit.Max } } g.AddProcessRlimits("RLIMIT_NPROC", max, current) } return nil } // Produce the final command for the container. func makeCommand(ctx context.Context, s *specgen.SpecGenerator, img *image.Image, rtc *config.Config) ([]string, error) { finalCommand := []string{} entrypoint := s.Entrypoint if len(entrypoint) == 0 && img != nil { newEntry, err := img.Entrypoint(ctx) if err != nil { return nil, err } entrypoint = newEntry } finalCommand = append(finalCommand, entrypoint...) command := s.Command if command == nil && img != nil { newCmd, err := img.Cmd(ctx) if err != nil { return nil, err } command = newCmd } finalCommand = append(finalCommand, command...) if len(finalCommand) == 0 { return nil, errors.Errorf("no command or entrypoint provided, and no CMD or ENTRYPOINT from image") } if s.Init { initPath := s.InitPath if initPath == "" && rtc != nil { initPath = rtc.Engine.InitPath } if initPath == "" { return nil, errors.Errorf("no path to init binary found but container requested an init") } finalCommand = append([]string{"/dev/init", "--"}, finalCommand...) } return finalCommand, nil } func SpecGenToOCI(ctx context.Context, s *specgen.SpecGenerator, rt *libpod.Runtime, rtc *config.Config, newImage *image.Image, mounts []spec.Mount, pod *libpod.Pod) (*spec.Spec, error) { var ( inUserNS bool ) cgroupPerm := "ro" g, err := generate.New("linux") if err != nil { return nil, err } // Remove the default /dev/shm mount to ensure we overwrite it g.RemoveMount("/dev/shm") g.HostSpecific = true addCgroup := true canMountSys := true isRootless := rootless.IsRootless() if isRootless { inUserNS = true } if !s.UserNS.IsHost() { if s.UserNS.IsContainer() || s.UserNS.IsPath() { inUserNS = true } if s.UserNS.IsPrivate() { inUserNS = true } } if inUserNS && s.NetNS.IsHost() { canMountSys = false } if s.Privileged && canMountSys { cgroupPerm = "rw" g.RemoveMount("/sys") sysMnt := spec.Mount{ Destination: "/sys", Type: "sysfs", Source: "sysfs", Options: []string{"rprivate", "nosuid", "noexec", "nodev", "rw"}, } g.AddMount(sysMnt) } else if !canMountSys { addCgroup = false g.RemoveMount("/sys") r := "ro" if s.Privileged { r = "rw" } sysMnt := spec.Mount{ Destination: "/sys", Type: "bind", // should we use a constant for this, like createconfig? Source: "/sys", Options: []string{"rprivate", "nosuid", "noexec", "nodev", r, "rbind"}, } g.AddMount(sysMnt) if !s.Privileged && isRootless { g.AddLinuxMaskedPaths("/sys/kernel") } } gid5Available := true if isRootless { nGids, err := GetAvailableGids() if err != nil { return nil, err } gid5Available = nGids >= 5 } // When using a different user namespace, check that the GID 5 is mapped inside // the container. if gid5Available && (s.IDMappings != nil && len(s.IDMappings.GIDMap) > 0) { mappingFound := false for _, r := range s.IDMappings.GIDMap { if r.ContainerID <= 5 && 5 < r.ContainerID+r.Size { mappingFound = true break } } if !mappingFound { gid5Available = false } } if !gid5Available { // If we have no GID mappings, the gid=5 default option would fail, so drop it. g.RemoveMount("/dev/pts") devPts := spec.Mount{ Destination: "/dev/pts", Type: "devpts", Source: "devpts", Options: []string{"rprivate", "nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620"}, } g.AddMount(devPts) } if inUserNS && s.IpcNS.IsHost() { g.RemoveMount("/dev/mqueue") devMqueue := spec.Mount{ Destination: "/dev/mqueue", Type: "bind", // constant ? Source: "/dev/mqueue", Options: []string{"bind", "nosuid", "noexec", "nodev"}, } g.AddMount(devMqueue) } if inUserNS && s.PidNS.IsHost() { g.RemoveMount("/proc") procMount := spec.Mount{ Destination: "/proc", Type: TypeBind, Source: "/proc", Options: []string{"rbind", "nosuid", "noexec", "nodev"}, } g.AddMount(procMount) } if addCgroup { cgroupMnt := spec.Mount{ Destination: "/sys/fs/cgroup", Type: "cgroup", Source: "cgroup", Options: []string{"rprivate", "nosuid", "noexec", "nodev", "relatime", cgroupPerm}, } g.AddMount(cgroupMnt) } g.SetProcessCwd(s.WorkDir) finalCmd, err := makeCommand(ctx, s, newImage, rtc) if err != nil { return nil, err } g.SetProcessArgs(finalCmd) g.SetProcessTerminal(s.Terminal) for key, val := range s.Annotations { g.AddAnnotation(key, val) } g.AddProcessEnv("container", "podman") g.Config.Linux.Resources = s.ResourceLimits // Devices if s.Privileged { // If privileged, we need to add all the host devices to the // spec. We do not add the user provided ones because we are // already adding them all. if err := addPrivilegedDevices(&g); err != nil { return nil, err } } else { // add default devices from containers.conf for _, device := range rtc.Containers.Devices { if err := DevicesFromPath(&g, device); err != nil { return nil, err } } // add default devices specified by caller for _, device := range s.Devices { if err := DevicesFromPath(&g, device.Path); err != nil { return nil, err } } } // SECURITY OPTS g.SetProcessNoNewPrivileges(s.NoNewPrivileges) if !s.Privileged { g.SetProcessApparmorProfile(s.ApparmorProfile) } BlockAccessToKernelFilesystems(s.Privileged, s.PidNS.IsHost(), &g) for name, val := range s.Env { g.AddProcessEnv(name, val) } if err := addRlimits(s, &g); err != nil { return nil, err } // NAMESPACES if err := specConfigureNamespaces(s, &g, rt, pod); err != nil { return nil, err } configSpec := g.Config if err := securityConfigureGenerator(s, &g, newImage, rtc); err != nil { return nil, err } // BIND MOUNTS configSpec.Mounts = SupercedeUserMounts(mounts, configSpec.Mounts) // Process mounts to ensure correct options if err := InitFSMounts(configSpec.Mounts); err != nil { return nil, err } // Add annotations if configSpec.Annotations == nil { configSpec.Annotations = make(map[string]string) } if s.Remove { configSpec.Annotations[define.InspectAnnotationAutoremove] = define.InspectResponseTrue } else { configSpec.Annotations[define.InspectAnnotationAutoremove] = define.InspectResponseFalse } if len(s.VolumesFrom) > 0 { configSpec.Annotations[define.InspectAnnotationVolumesFrom] = strings.Join(s.VolumesFrom, ",") } if s.Privileged { configSpec.Annotations[define.InspectAnnotationPrivileged] = define.InspectResponseTrue } else { configSpec.Annotations[define.InspectAnnotationPrivileged] = define.InspectResponseFalse } if s.Init { configSpec.Annotations[define.InspectAnnotationInit] = define.InspectResponseTrue } else { configSpec.Annotations[define.InspectAnnotationInit] = define.InspectResponseFalse } return configSpec, nil } func GetAvailableGids() (int64, error) { idMap, err := user.ParseIDMapFile("/proc/self/gid_map") if err != nil { return 0, err } count := int64(0) for _, r := range idMap { count += r.Count } return count, nil }