diff options
author | Doug Rabson <dfr@rabson.org> | 2022-08-27 11:59:44 +0100 |
---|---|---|
committer | Doug Rabson <dfr@rabson.org> | 2022-09-05 10:17:49 +0100 |
commit | e101f4350bdbaad6a70669977a0a254b1bfcb1d3 (patch) | |
tree | 96981d14039eabe7846365ce0cb75222cbf7f46a /libpod | |
parent | 6e4b5b00755c52f56962856b846725e4c45f1fb6 (diff) | |
download | podman-e101f4350bdbaad6a70669977a0a254b1bfcb1d3.tar.gz podman-e101f4350bdbaad6a70669977a0a254b1bfcb1d3.tar.bz2 podman-e101f4350bdbaad6a70669977a0a254b1bfcb1d3.zip |
libpod: Move getOverlayUpperAndWorkDir and generateSpec to container_internal_common.go
[NO NEW TESTS NEEDED]
Signed-off-by: Doug Rabson <dfr@rabson.org>
Diffstat (limited to 'libpod')
-rw-r--r-- | libpod/container_internal_common.go | 615 | ||||
-rw-r--r-- | libpod/container_internal_freebsd.go | 477 | ||||
-rw-r--r-- | libpod/container_internal_linux.go | 616 |
3 files changed, 629 insertions, 1079 deletions
diff --git a/libpod/container_internal_common.go b/libpod/container_internal_common.go new file mode 100644 index 000000000..50d7375cd --- /dev/null +++ b/libpod/container_internal_common.go @@ -0,0 +1,615 @@ +//go:build linux || freebsd +// +build linux freebsd + +package libpod + +import ( + "context" + "errors" + "fmt" + "math" + "os" + "strconv" + "strings" + "time" + + cdi "github.com/container-orchestrated-devices/container-device-interface/pkg/cdi" + "github.com/containers/buildah/pkg/overlay" + butil "github.com/containers/buildah/util" + "github.com/containers/common/pkg/apparmor" + cutil "github.com/containers/common/pkg/util" + "github.com/containers/podman/v4/pkg/annotations" + "github.com/containers/podman/v4/pkg/lookup" + "github.com/containers/podman/v4/pkg/rootless" + "github.com/containers/podman/v4/pkg/util" + "github.com/containers/storage/pkg/idtools" + securejoin "github.com/cyphar/filepath-securejoin" + spec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/runtime-tools/generate" + "github.com/opencontainers/selinux/go-selinux/label" + "github.com/sirupsen/logrus" +) + +// Internal only function which returns upper and work dir from +// overlay options. +func getOverlayUpperAndWorkDir(options []string) (string, string, error) { + upperDir := "" + workDir := "" + for _, o := range options { + if strings.HasPrefix(o, "upperdir") { + splitOpt := strings.SplitN(o, "=", 2) + if len(splitOpt) > 1 { + upperDir = splitOpt[1] + if upperDir == "" { + return "", "", errors.New("cannot accept empty value for upperdir") + } + } + } + if strings.HasPrefix(o, "workdir") { + splitOpt := strings.SplitN(o, "=", 2) + if len(splitOpt) > 1 { + workDir = splitOpt[1] + if workDir == "" { + return "", "", errors.New("cannot accept empty value for workdir") + } + } + } + } + if (upperDir != "" && workDir == "") || (upperDir == "" && workDir != "") { + return "", "", errors.New("must specify both upperdir and workdir") + } + return upperDir, workDir, nil +} + +// Generate spec for a container +// Accepts a map of the container's dependencies +func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) { + overrides := c.getUserOverrides() + execUser, err := lookup.GetUserGroupInfo(c.state.Mountpoint, c.config.User, overrides) + if err != nil { + if cutil.StringInSlice(c.config.User, c.config.HostUsers) { + execUser, err = lookupHostUser(c.config.User) + } + if err != nil { + return nil, err + } + } + + // NewFromSpec() is deprecated according to its comment + // however the recommended replace just causes a nil map panic + //nolint:staticcheck + g := generate.NewFromSpec(c.config.Spec) + + // If the flag to mount all devices is set for a privileged container, add + // all the devices from the host's machine into the container + if c.config.MountAllDevices { + if err := util.AddPrivilegedDevices(&g); err != nil { + return nil, err + } + } + + // If network namespace was requested, add it now + if c.config.CreateNetNS { + if c.config.PostConfigureNetNS { + if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), ""); err != nil { + return nil, err + } + } else { + if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), c.state.NetNS.Path()); err != nil { + return nil, err + } + } + } + + // Apply AppArmor checks and load the default profile if needed. + if len(c.config.Spec.Process.ApparmorProfile) > 0 { + updatedProfile, err := apparmor.CheckProfileAndLoadDefault(c.config.Spec.Process.ApparmorProfile) + if err != nil { + return nil, err + } + g.SetProcessApparmorProfile(updatedProfile) + } + + if err := c.makeBindMounts(); err != nil { + return nil, err + } + + if err := c.mountNotifySocket(g); err != nil { + return nil, err + } + + // Get host UID and GID based on the container process UID and GID. + hostUID, hostGID, err := butil.GetHostIDs(util.IDtoolsToRuntimeSpec(c.config.IDMappings.UIDMap), util.IDtoolsToRuntimeSpec(c.config.IDMappings.GIDMap), uint32(execUser.Uid), uint32(execUser.Gid)) + if err != nil { + return nil, err + } + + // Add named volumes + for _, namedVol := range c.config.NamedVolumes { + volume, err := c.runtime.GetVolume(namedVol.Name) + if err != nil { + return nil, fmt.Errorf("error retrieving volume %s to add to container %s: %w", namedVol.Name, c.ID(), err) + } + mountPoint, err := volume.MountPoint() + if err != nil { + return nil, err + } + + overlayFlag := false + upperDir := "" + workDir := "" + for _, o := range namedVol.Options { + if o == "O" { + overlayFlag = true + upperDir, workDir, err = getOverlayUpperAndWorkDir(namedVol.Options) + if err != nil { + return nil, err + } + } + } + + if overlayFlag { + var overlayMount spec.Mount + var overlayOpts *overlay.Options + contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID()) + if err != nil { + return nil, err + } + + overlayOpts = &overlay.Options{RootUID: c.RootUID(), + RootGID: c.RootGID(), + UpperDirOptionFragment: upperDir, + WorkDirOptionFragment: workDir, + GraphOpts: c.runtime.store.GraphOptions(), + } + + overlayMount, err = overlay.MountWithOptions(contentDir, mountPoint, namedVol.Dest, overlayOpts) + if err != nil { + return nil, fmt.Errorf("mounting overlay failed %q: %w", mountPoint, err) + } + + for _, o := range namedVol.Options { + if o == "U" { + if err := c.ChangeHostPathOwnership(mountPoint, true, int(hostUID), int(hostGID)); err != nil { + return nil, err + } + + if err := c.ChangeHostPathOwnership(contentDir, true, int(hostUID), int(hostGID)); err != nil { + return nil, err + } + } + } + g.AddMount(overlayMount) + } else { + volMount := spec.Mount{ + Type: "bind", + Source: mountPoint, + Destination: namedVol.Dest, + Options: namedVol.Options, + } + g.AddMount(volMount) + } + } + + // Check if the spec file mounts contain the options z, Z or U. + // If they have z or Z, relabel the source directory and then remove the option. + // If they have U, chown the source directory and them remove the option. + for i := range g.Config.Mounts { + m := &g.Config.Mounts[i] + var options []string + for _, o := range m.Options { + switch o { + case "U": + if m.Type == "tmpfs" { + options = append(options, []string{fmt.Sprintf("uid=%d", execUser.Uid), fmt.Sprintf("gid=%d", execUser.Gid)}...) + } else { + // only chown on initial creation of container + if err := c.ChangeHostPathOwnership(m.Source, true, int(hostUID), int(hostGID)); err != nil { + return nil, err + } + } + case "z": + fallthrough + case "Z": + if err := c.relabel(m.Source, c.MountLabel(), label.IsShared(o)); err != nil { + return nil, err + } + + default: + options = append(options, o) + } + } + m.Options = options + } + + g.SetProcessSelinuxLabel(c.ProcessLabel()) + g.SetLinuxMountLabel(c.MountLabel()) + + // Add bind mounts to container + for dstPath, srcPath := range c.state.BindMounts { + newMount := spec.Mount{ + Type: "bind", + Source: srcPath, + Destination: dstPath, + Options: []string{"bind", "rprivate"}, + } + if c.IsReadOnly() && dstPath != "/dev/shm" { + newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev") + } + if dstPath == "/dev/shm" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir { + newMount.Options = append(newMount.Options, "nosuid", "noexec", "nodev") + } + if !MountExists(g.Mounts(), dstPath) { + g.AddMount(newMount) + } else { + logrus.Infof("User mount overriding libpod mount at %q", dstPath) + } + } + + // Add overlay volumes + for _, overlayVol := range c.config.OverlayVolumes { + upperDir, workDir, err := getOverlayUpperAndWorkDir(overlayVol.Options) + if err != nil { + return nil, err + } + contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID()) + if err != nil { + return nil, err + } + overlayOpts := &overlay.Options{RootUID: c.RootUID(), + RootGID: c.RootGID(), + UpperDirOptionFragment: upperDir, + WorkDirOptionFragment: workDir, + GraphOpts: c.runtime.store.GraphOptions(), + } + + overlayMount, err := overlay.MountWithOptions(contentDir, overlayVol.Source, overlayVol.Dest, overlayOpts) + if err != nil { + return nil, fmt.Errorf("mounting overlay failed %q: %w", overlayVol.Source, err) + } + + // Check overlay volume options + for _, o := range overlayVol.Options { + if o == "U" { + if err := c.ChangeHostPathOwnership(overlayVol.Source, true, int(hostUID), int(hostGID)); err != nil { + return nil, err + } + + if err := c.ChangeHostPathOwnership(contentDir, true, int(hostUID), int(hostGID)); err != nil { + return nil, err + } + } + } + + g.AddMount(overlayMount) + } + + // Add image volumes as overlay mounts + for _, volume := range c.config.ImageVolumes { + // Mount the specified image. + img, _, err := c.runtime.LibimageRuntime().LookupImage(volume.Source, nil) + if err != nil { + return nil, fmt.Errorf("error creating image volume %q:%q: %w", volume.Source, volume.Dest, err) + } + mountPoint, err := img.Mount(ctx, nil, "") + if err != nil { + return nil, fmt.Errorf("error mounting image volume %q:%q: %w", volume.Source, volume.Dest, err) + } + + contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID()) + if err != nil { + return nil, fmt.Errorf("failed to create TempDir in the %s directory: %w", c.config.StaticDir, err) + } + + var overlayMount spec.Mount + if volume.ReadWrite { + overlayMount, err = overlay.Mount(contentDir, mountPoint, volume.Dest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions()) + } else { + overlayMount, err = overlay.MountReadOnly(contentDir, mountPoint, volume.Dest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions()) + } + if err != nil { + return nil, fmt.Errorf("creating overlay mount for image %q failed: %w", volume.Source, err) + } + g.AddMount(overlayMount) + } + + hasHomeSet := false + for _, s := range c.config.Spec.Process.Env { + if strings.HasPrefix(s, "HOME=") { + hasHomeSet = true + break + } + } + if !hasHomeSet && execUser.Home != "" { + c.config.Spec.Process.Env = append(c.config.Spec.Process.Env, fmt.Sprintf("HOME=%s", execUser.Home)) + } + + if c.config.User != "" { + // User and Group must go together + g.SetProcessUID(uint32(execUser.Uid)) + g.SetProcessGID(uint32(execUser.Gid)) + g.AddProcessAdditionalGid(uint32(execUser.Gid)) + } + + if c.config.Umask != "" { + decVal, err := strconv.ParseUint(c.config.Umask, 8, 32) + if err != nil { + return nil, fmt.Errorf("invalid Umask Value: %w", err) + } + umask := uint32(decVal) + g.Config.Process.User.Umask = &umask + } + + // Add addition groups if c.config.GroupAdd is not empty + if len(c.config.Groups) > 0 { + gids, err := lookup.GetContainerGroups(c.config.Groups, c.state.Mountpoint, overrides) + if err != nil { + return nil, fmt.Errorf("error looking up supplemental groups for container %s: %w", c.ID(), err) + } + for _, gid := range gids { + g.AddProcessAdditionalGid(gid) + } + } + + if c.Systemd() { + if err := c.setupSystemd(g.Mounts(), g); err != nil { + return nil, fmt.Errorf("error adding systemd-specific mounts: %w", err) + } + } + + // Look up and add groups the user belongs to, if a group wasn't directly specified + if !strings.Contains(c.config.User, ":") { + // the gidMappings that are present inside the container user namespace + var gidMappings []idtools.IDMap + + switch { + case len(c.config.IDMappings.GIDMap) > 0: + gidMappings = c.config.IDMappings.GIDMap + case rootless.IsRootless(): + // Check whether the current user namespace has enough gids available. + availableGids, err := rootless.GetAvailableGids() + if err != nil { + return nil, fmt.Errorf("cannot read number of available GIDs: %w", err) + } + gidMappings = []idtools.IDMap{{ + ContainerID: 0, + HostID: 0, + Size: int(availableGids), + }} + default: + gidMappings = []idtools.IDMap{{ + ContainerID: 0, + HostID: 0, + Size: math.MaxInt32, + }} + } + for _, gid := range execUser.Sgids { + isGIDAvailable := false + for _, m := range gidMappings { + if gid >= m.ContainerID && gid < m.ContainerID+m.Size { + isGIDAvailable = true + break + } + } + if isGIDAvailable { + g.AddProcessAdditionalGid(uint32(gid)) + } else { + logrus.Warnf("Additional gid=%d is not present in the user namespace, skip setting it", gid) + } + } + } + + // Add shared namespaces from other containers + if c.config.IPCNsCtr != "" { + if err := c.addNamespaceContainer(&g, IPCNS, c.config.IPCNsCtr, spec.IPCNamespace); err != nil { + return nil, err + } + } + if c.config.MountNsCtr != "" { + if err := c.addNamespaceContainer(&g, MountNS, c.config.MountNsCtr, spec.MountNamespace); err != nil { + return nil, err + } + } + if c.config.NetNsCtr != "" { + if err := c.addNamespaceContainer(&g, NetNS, c.config.NetNsCtr, spec.NetworkNamespace); err != nil { + return nil, err + } + } + if c.config.PIDNsCtr != "" { + if err := c.addNamespaceContainer(&g, PIDNS, c.config.PIDNsCtr, spec.PIDNamespace); err != nil { + return nil, err + } + } + if c.config.UserNsCtr != "" { + if err := c.addNamespaceContainer(&g, UserNS, c.config.UserNsCtr, spec.UserNamespace); err != nil { + return nil, err + } + if len(g.Config.Linux.UIDMappings) == 0 { + // runc complains if no mapping is specified, even if we join another ns. So provide a dummy mapping + g.AddLinuxUIDMapping(uint32(0), uint32(0), uint32(1)) + g.AddLinuxGIDMapping(uint32(0), uint32(0), uint32(1)) + } + } + + availableUIDs, availableGIDs, err := rootless.GetAvailableIDMaps() + if err != nil { + if os.IsNotExist(err) { + // The kernel-provided files only exist if user namespaces are supported + logrus.Debugf("User or group ID mappings not available: %s", err) + } else { + return nil, err + } + } else { + g.Config.Linux.UIDMappings = rootless.MaybeSplitMappings(g.Config.Linux.UIDMappings, availableUIDs) + g.Config.Linux.GIDMappings = rootless.MaybeSplitMappings(g.Config.Linux.GIDMappings, availableGIDs) + } + + // Hostname handling: + // If we have a UTS namespace, set Hostname in the OCI spec. + // Set the HOSTNAME environment variable unless explicitly overridden by + // the user (already present in OCI spec). If we don't have a UTS ns, + // set it to the host's hostname instead. + hostname := c.Hostname() + foundUTS := false + + for _, i := range c.config.Spec.Linux.Namespaces { + if i.Type == spec.UTSNamespace && i.Path == "" { + foundUTS = true + g.SetHostname(hostname) + break + } + } + if !foundUTS { + tmpHostname, err := os.Hostname() + if err != nil { + return nil, err + } + hostname = tmpHostname + } + needEnv := true + for _, checkEnv := range g.Config.Process.Env { + if strings.SplitN(checkEnv, "=", 2)[0] == "HOSTNAME" { + needEnv = false + break + } + } + if needEnv { + g.AddProcessEnv("HOSTNAME", hostname) + } + + if c.config.UTSNsCtr != "" { + if err := c.addNamespaceContainer(&g, UTSNS, c.config.UTSNsCtr, spec.UTSNamespace); err != nil { + return nil, err + } + } + if c.config.CgroupNsCtr != "" { + if err := c.addNamespaceContainer(&g, CgroupNS, c.config.CgroupNsCtr, spec.CgroupNamespace); err != nil { + return nil, err + } + } + + if c.config.UserNsCtr == "" && c.config.IDMappings.AutoUserNs { + if err := g.AddOrReplaceLinuxNamespace(string(spec.UserNamespace), ""); err != nil { + return nil, err + } + g.ClearLinuxUIDMappings() + for _, uidmap := range c.config.IDMappings.UIDMap { + g.AddLinuxUIDMapping(uint32(uidmap.HostID), uint32(uidmap.ContainerID), uint32(uidmap.Size)) + } + g.ClearLinuxGIDMappings() + for _, gidmap := range c.config.IDMappings.GIDMap { + g.AddLinuxGIDMapping(uint32(gidmap.HostID), uint32(gidmap.ContainerID), uint32(gidmap.Size)) + } + } + + g.SetRootPath(c.state.Mountpoint) + g.AddAnnotation(annotations.Created, c.config.CreatedTime.Format(time.RFC3339Nano)) + g.AddAnnotation("org.opencontainers.image.stopSignal", fmt.Sprintf("%d", c.config.StopSignal)) + + if _, exists := g.Config.Annotations[annotations.ContainerManager]; !exists { + g.AddAnnotation(annotations.ContainerManager, annotations.ContainerManagerLibpod) + } + + cgroupPath, err := c.getOCICgroupPath() + if err != nil { + return nil, err + } + + g.SetLinuxCgroupsPath(cgroupPath) + + // Warning: CDI may alter g.Config in place. + if len(c.config.CDIDevices) > 0 { + registry := cdi.GetRegistry( + cdi.WithAutoRefresh(false), + ) + if err := registry.Refresh(); err != nil { + logrus.Debugf("The following error was triggered when refreshing the CDI registry: %v", err) + } + _, err := registry.InjectDevices(g.Config, c.config.CDIDevices...) + if err != nil { + return nil, fmt.Errorf("error setting up CDI devices: %w", err) + } + } + + // Mounts need to be sorted so paths will not cover other paths + mounts := sortMounts(g.Mounts()) + g.ClearMounts() + + // Determine property of RootPropagation based on volume properties. If + // a volume is shared, then keep root propagation shared. This should + // work for slave and private volumes too. + // + // For slave volumes, it can be either [r]shared/[r]slave. + // + // For private volumes any root propagation value should work. + rootPropagation := "" + for _, m := range mounts { + // We need to remove all symlinks from tmpfs mounts. + // Runc and other runtimes may choke on them. + // Easy solution: use securejoin to do a scoped evaluation of + // the links, then trim off the mount prefix. + if m.Type == "tmpfs" { + finalPath, err := securejoin.SecureJoin(c.state.Mountpoint, m.Destination) + if err != nil { + return nil, fmt.Errorf("error resolving symlinks for mount destination %s: %w", m.Destination, err) + } + trimmedPath := strings.TrimPrefix(finalPath, strings.TrimSuffix(c.state.Mountpoint, "/")) + m.Destination = trimmedPath + } + g.AddMount(m) + for _, opt := range m.Options { + switch opt { + case MountShared, MountRShared: + if rootPropagation != MountShared && rootPropagation != MountRShared { + rootPropagation = MountShared + } + case MountSlave, MountRSlave: + if rootPropagation != MountShared && rootPropagation != MountRShared && rootPropagation != MountSlave && rootPropagation != MountRSlave { + rootPropagation = MountRSlave + } + } + } + } + + if rootPropagation != "" { + logrus.Debugf("Set root propagation to %q", rootPropagation) + if err := g.SetLinuxRootPropagation(rootPropagation); err != nil { + return nil, err + } + } + + // Warning: precreate hooks may alter g.Config in place. + if c.state.ExtensionStageHooks, err = c.setupOCIHooks(ctx, g.Config); err != nil { + return nil, fmt.Errorf("error setting up OCI Hooks: %w", err) + } + if len(c.config.EnvSecrets) > 0 { + manager, err := c.runtime.SecretsManager() + if err != nil { + return nil, err + } + if err != nil { + return nil, err + } + for name, secr := range c.config.EnvSecrets { + _, data, err := manager.LookupSecretData(secr.Name) + if err != nil { + return nil, err + } + g.AddProcessEnv(name, string(data)) + } + } + + // Pass down the LISTEN_* environment (see #10443). + for _, key := range []string{"LISTEN_PID", "LISTEN_FDS", "LISTEN_FDNAMES"} { + if val, ok := os.LookupEnv(key); ok { + // Force the PID to `1` since we cannot rely on (all + // versions of) all runtimes to do it for us. + if key == "LISTEN_PID" { + val = "1" + } + g.AddProcessEnv(key, val) + } + } + + return g.Config, nil +} diff --git a/libpod/container_internal_freebsd.go b/libpod/container_internal_freebsd.go index 6ff2618f2..9cfef5c70 100644 --- a/libpod/container_internal_freebsd.go +++ b/libpod/container_internal_freebsd.go @@ -350,483 +350,6 @@ func lookupHostUser(name string) (*runcuser.ExecUser, error) { return &execUser, nil } -// Generate spec for a container -// Accepts a map of the container's dependencies -func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) { - overrides := c.getUserOverrides() - execUser, err := lookup.GetUserGroupInfo(c.state.Mountpoint, c.config.User, overrides) - if err != nil { - if cutil.StringInSlice(c.config.User, c.config.HostUsers) { - execUser, err = lookupHostUser(c.config.User) - } - if err != nil { - return nil, err - } - } - - // NewFromSpec() is deprecated according to its comment - // however the recommended replace just causes a nil map panic - //nolint:staticcheck - g := generate.NewFromSpec(c.config.Spec) - - // If network namespace was requested, add it now - if c.config.CreateNetNS { - g.AddAnnotation("org.freebsd.parentJail", c.state.NetworkJail) - } - - if err := c.makeBindMounts(); err != nil { - return nil, err - } - - if err := c.mountNotifySocket(g); err != nil { - return nil, err - } - - // Get host UID and GID based on the container process UID and GID. - hostUID, hostGID, err := butil.GetHostIDs(util.IDtoolsToRuntimeSpec(c.config.IDMappings.UIDMap), util.IDtoolsToRuntimeSpec(c.config.IDMappings.GIDMap), uint32(execUser.Uid), uint32(execUser.Gid)) - if err != nil { - return nil, err - } - - // Add named volumes - for _, namedVol := range c.config.NamedVolumes { - volume, err := c.runtime.GetVolume(namedVol.Name) - if err != nil { - return nil, fmt.Errorf("error retrieving volume %s to add to container %s: %w", namedVol.Name, c.ID(), err) - } - mountPoint, err := volume.MountPoint() - if err != nil { - return nil, err - } - - overlayFlag := false - upperDir := "" - workDir := "" - for _, o := range namedVol.Options { - if o == "O" { - overlayFlag = true - } - if overlayFlag && strings.Contains(o, "upperdir") { - splitOpt := strings.SplitN(o, "=", 2) - if len(splitOpt) > 1 { - upperDir = splitOpt[1] - if upperDir == "" { - return nil, errors.New("cannot accept empty value for upperdir") - } - } - } - if overlayFlag && strings.Contains(o, "workdir") { - splitOpt := strings.SplitN(o, "=", 2) - if len(splitOpt) > 1 { - workDir = splitOpt[1] - if workDir == "" { - return nil, errors.New("cannot accept empty value for workdir") - } - } - } - } - - if overlayFlag { - var overlayMount spec.Mount - var overlayOpts *overlay.Options - contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID()) - if err != nil { - return nil, err - } - - if (upperDir != "" && workDir == "") || (upperDir == "" && workDir != "") { - return nil, errors.New("must specify both upperdir and workdir") - } - - overlayOpts = &overlay.Options{RootUID: c.RootUID(), - RootGID: c.RootGID(), - UpperDirOptionFragment: upperDir, - WorkDirOptionFragment: workDir, - GraphOpts: c.runtime.store.GraphOptions(), - } - - overlayMount, err = overlay.MountWithOptions(contentDir, mountPoint, namedVol.Dest, overlayOpts) - if err != nil { - return nil, fmt.Errorf("mounting overlay failed %q: %w", mountPoint, err) - } - - for _, o := range namedVol.Options { - switch o { - case "U": - if err := c.ChangeHostPathOwnership(mountPoint, true, int(hostUID), int(hostGID)); err != nil { - return nil, err - } - - if err := c.ChangeHostPathOwnership(contentDir, true, int(hostUID), int(hostGID)); err != nil { - return nil, err - } - } - } - g.AddMount(overlayMount) - } else { - volMount := spec.Mount{ - Type: "nullfs", - Source: mountPoint, - Destination: namedVol.Dest, - Options: namedVol.Options, - } - g.AddMount(volMount) - } - } - - // Check if the spec file mounts contain the options z, Z or U. - // If they have z or Z, relabel the source directory and then remove the option. - // If they have U, chown the source directory and them remove the option. - for i := range g.Config.Mounts { - m := &g.Config.Mounts[i] - var options []string - for _, o := range m.Options { - switch o { - case "U": - if m.Type == "tmpfs" { - options = append(options, []string{fmt.Sprintf("uid=%d", execUser.Uid), fmt.Sprintf("gid=%d", execUser.Gid)}...) - } else { - // only chown on initial creation of container - if err := c.ChangeHostPathOwnership(m.Source, true, int(hostUID), int(hostGID)); err != nil { - return nil, err - } - } - case "z": - fallthrough - case "Z": - if err := c.relabel(m.Source, c.MountLabel(), label.IsShared(o)); err != nil { - return nil, err - } - - default: - options = append(options, o) - } - } - m.Options = options - } - - // Add bind mounts to container - if false { - for dstPath, srcPath := range c.state.BindMounts { - newMount := spec.Mount{ - Type: "nullfs", - Source: srcPath, - Destination: dstPath, - Options: []string{}, - } - if !MountExists(g.Mounts(), dstPath) { - g.AddMount(newMount) - } else { - logrus.Infof("User mount overriding libpod mount at %q", dstPath) - } - } - } - - // Add bind mounts to container - for dstPath, srcPath := range c.state.BindMounts { - newMount := spec.Mount{ - Type: "nullfs", - Source: srcPath, - Destination: dstPath, - Options: []string{}, - } - if !MountExists(g.Mounts(), dstPath) { - g.AddMount(newMount) - } else { - logrus.Infof("User mount overriding libpod mount at %q", dstPath) - } - } - - // Add overlay volumes - for _, overlayVol := range c.config.OverlayVolumes { - contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID()) - if err != nil { - return nil, err - } - overlayMount, err := overlay.Mount(contentDir, overlayVol.Source, overlayVol.Dest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions()) - if err != nil { - return nil, fmt.Errorf("mounting overlay failed %q: %w", overlayVol.Source, err) - } - - // Check overlay volume options - for _, o := range overlayVol.Options { - switch o { - case "U": - if err := c.ChangeHostPathOwnership(overlayVol.Source, true, int(hostUID), int(hostGID)); err != nil { - return nil, err - } - - if err := c.ChangeHostPathOwnership(contentDir, true, int(hostUID), int(hostGID)); err != nil { - return nil, err - } - } - } - - g.AddMount(overlayMount) - } - - // Add image volumes as overlay mounts - for _, volume := range c.config.ImageVolumes { - // Mount the specified image. - img, _, err := c.runtime.LibimageRuntime().LookupImage(volume.Source, nil) - if err != nil { - return nil, fmt.Errorf("error creating image volume %q:%q: %w", volume.Source, volume.Dest, err) - } - mountPoint, err := img.Mount(ctx, nil, "") - if err != nil { - return nil, fmt.Errorf("error mounting image volume %q:%q: %w", volume.Source, volume.Dest, err) - } - - contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID()) - if err != nil { - return nil, fmt.Errorf("failed to create TempDir in the %s directory: %w", c.config.StaticDir, err) - } - - var overlayMount spec.Mount - if volume.ReadWrite { - overlayMount, err = overlay.Mount(contentDir, mountPoint, volume.Dest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions()) - } else { - overlayMount, err = overlay.MountReadOnly(contentDir, mountPoint, volume.Dest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions()) - } - if err != nil { - return nil, fmt.Errorf("creating overlay mount for image %q failed: %w", volume.Source, err) - } - g.AddMount(overlayMount) - } - - hasHomeSet := false - for _, s := range c.config.Spec.Process.Env { - if strings.HasPrefix(s, "HOME=") { - hasHomeSet = true - break - } - } - if !hasHomeSet && execUser.Home != "" { - c.config.Spec.Process.Env = append(c.config.Spec.Process.Env, fmt.Sprintf("HOME=%s", execUser.Home)) - } - - if c.config.User != "" { - // User and Group must go together - g.SetProcessUID(uint32(execUser.Uid)) - g.SetProcessGID(uint32(execUser.Gid)) - } - - if c.config.Umask != "" { - decVal, err := strconv.ParseUint(c.config.Umask, 8, 32) - if err != nil { - return nil, fmt.Errorf("invalid Umask Value: %w", err) - } - umask := uint32(decVal) - g.Config.Process.User.Umask = &umask - } - - // Add addition groups if c.config.GroupAdd is not empty - if len(c.config.Groups) > 0 { - gids, err := lookup.GetContainerGroups(c.config.Groups, c.state.Mountpoint, overrides) - if err != nil { - return nil, fmt.Errorf("error looking up supplemental groups for container %s: %w", c.ID(), err) - } - for _, gid := range gids { - g.AddProcessAdditionalGid(gid) - } - } - - // Look up and add groups the user belongs to, if a group wasn't directly specified - if !strings.Contains(c.config.User, ":") { - // the gidMappings that are present inside the container user namespace - var gidMappings []idtools.IDMap - - switch { - case len(c.config.IDMappings.GIDMap) > 0: - gidMappings = c.config.IDMappings.GIDMap - case rootless.IsRootless(): - // Check whether the current user namespace has enough gids available. - availableGids, err := rootless.GetAvailableGids() - if err != nil { - return nil, fmt.Errorf("cannot read number of available GIDs: %w", err) - } - gidMappings = []idtools.IDMap{{ - ContainerID: 0, - HostID: 0, - Size: int(availableGids), - }} - default: - gidMappings = []idtools.IDMap{{ - ContainerID: 0, - HostID: 0, - Size: math.MaxInt32, - }} - } - for _, gid := range execUser.Sgids { - isGIDAvailable := false - for _, m := range gidMappings { - if gid >= m.ContainerID && gid < m.ContainerID+m.Size { - isGIDAvailable = true - break - } - } - if isGIDAvailable { - g.AddProcessAdditionalGid(uint32(gid)) - } else { - logrus.Warnf("Additional gid=%d is not present in the user namespace, skip setting it", gid) - } - } - } - - // Add shared namespaces from other containers - if c.config.NetNsCtr != "" { - if err := c.addNetworkContainer(&g, c.config.NetNsCtr); err != nil { - return nil, err - } - } - - availableUIDs, availableGIDs, err := rootless.GetAvailableIDMaps() - if err != nil { - if os.IsNotExist(err) { - // The kernel-provided files only exist if user namespaces are supported - logrus.Debugf("User or group ID mappings not available: %s", err) - } else { - return nil, err - } - } else { - g.Config.Linux.UIDMappings = rootless.MaybeSplitMappings(g.Config.Linux.UIDMappings, availableUIDs) - g.Config.Linux.GIDMappings = rootless.MaybeSplitMappings(g.Config.Linux.GIDMappings, availableGIDs) - } - - // Hostname handling: - // If we have a UTS namespace, set Hostname in the OCI spec. - // Set the HOSTNAME environment variable unless explicitly overridden by - // the user (already present in OCI spec). If we don't have a UTS ns, - // set it to the host's hostname instead. - hostname := c.Hostname() - foundUTS := false - - // TODO: make this optional, needs progress on adding FreeBSD section to the spec - foundUTS = true - g.SetHostname(hostname) - - if !foundUTS { - tmpHostname, err := os.Hostname() - if err != nil { - return nil, err - } - hostname = tmpHostname - } - needEnv := true - for _, checkEnv := range g.Config.Process.Env { - if strings.SplitN(checkEnv, "=", 2)[0] == "HOSTNAME" { - needEnv = false - break - } - } - if needEnv { - g.AddProcessEnv("HOSTNAME", hostname) - } - - g.SetRootPath(c.state.Mountpoint) - g.AddAnnotation(annotations.Created, c.config.CreatedTime.Format(time.RFC3339Nano)) - g.AddAnnotation("org.opencontainers.image.stopSignal", fmt.Sprintf("%d", c.config.StopSignal)) - - if _, exists := g.Config.Annotations[annotations.ContainerManager]; !exists { - g.AddAnnotation(annotations.ContainerManager, annotations.ContainerManagerLibpod) - } - - // Warning: CDI may alter g.Config in place. - if len(c.config.CDIDevices) > 0 { - registry := cdi.GetRegistry() - if errs := registry.GetErrors(); len(errs) > 0 { - logrus.Debugf("The following errors were triggered when creating the CDI registry: %v", errs) - } - _, err := registry.InjectDevices(g.Config, c.config.CDIDevices...) - if err != nil { - return nil, fmt.Errorf("error setting up CDI devices: %w", err) - } - } - - // Mounts need to be sorted so paths will not cover other paths - mounts := sortMounts(g.Mounts()) - g.ClearMounts() - - // Determine property of RootPropagation based on volume properties. If - // a volume is shared, then keep root propagation shared. This should - // work for slave and private volumes too. - // - // For slave volumes, it can be either [r]shared/[r]slave. - // - // For private volumes any root propagation value should work. - rootPropagation := "" - for _, m := range mounts { - // We need to remove all symlinks from tmpfs mounts. - // Runc and other runtimes may choke on them. - // Easy solution: use securejoin to do a scoped evaluation of - // the links, then trim off the mount prefix. - if m.Type == "tmpfs" { - finalPath, err := securejoin.SecureJoin(c.state.Mountpoint, m.Destination) - if err != nil { - return nil, fmt.Errorf("error resolving symlinks for mount destination %s: %w", m.Destination, err) - } - trimmedPath := strings.TrimPrefix(finalPath, strings.TrimSuffix(c.state.Mountpoint, "/")) - m.Destination = trimmedPath - } - g.AddMount(m) - /* - for _, opt := range m.Options { - switch opt { - case MountShared, MountRShared: - if rootPropagation != MountShared && rootPropagation != MountRShared { - rootPropagation = MountShared - } - case MountSlave, MountRSlave: - if rootPropagation != MountShared && rootPropagation != MountRShared && rootPropagation != MountSlave && rootPropagation != MountRSlave { - rootPropagation = MountRSlave - } - } - }*/ - } - - if rootPropagation != "" { - logrus.Debugf("Set root propagation to %q", rootPropagation) - if err := g.SetLinuxRootPropagation(rootPropagation); err != nil { - return nil, err - } - } - - // Warning: precreate hooks may alter g.Config in place. - if c.state.ExtensionStageHooks, err = c.setupOCIHooks(ctx, g.Config); err != nil { - return nil, fmt.Errorf("error setting up OCI Hooks: %w", err) - } - if len(c.config.EnvSecrets) > 0 { - manager, err := c.runtime.SecretsManager() - if err != nil { - return nil, err - } - if err != nil { - return nil, err - } - for name, secr := range c.config.EnvSecrets { - _, data, err := manager.LookupSecretData(secr.Name) - if err != nil { - return nil, err - } - g.AddProcessEnv(name, string(data)) - } - } - - // Pass down the LISTEN_* environment (see #10443). - for _, key := range []string{"LISTEN_PID", "LISTEN_FDS", "LISTEN_FDNAMES"} { - if val, ok := os.LookupEnv(key); ok { - // Force the PID to `1` since we cannot rely on (all - // versions of) all runtimes to do it for us. - if key == "LISTEN_PID" { - val = "1" - } - g.AddProcessEnv(key, val) - } - } - - return g.Config, nil -} - // mountNotifySocket mounts the NOTIFY_SOCKET into the container if it's set // and if the sdnotify mode is set to container. It also sets c.notifySocket // to avoid redundantly looking up the env variable. diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index bb50ddc43..5ec9d8e43 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -9,7 +9,6 @@ import ( "fmt" "io" "io/ioutil" - "math" "os" "os/user" "path" @@ -22,26 +21,21 @@ import ( metadata "github.com/checkpoint-restore/checkpointctl/lib" "github.com/checkpoint-restore/go-criu/v5/stats" - cdi "github.com/container-orchestrated-devices/container-device-interface/pkg/cdi" "github.com/containernetworking/plugins/pkg/ns" "github.com/containers/buildah" "github.com/containers/buildah/pkg/chrootuser" "github.com/containers/buildah/pkg/overlay" - butil "github.com/containers/buildah/util" "github.com/containers/common/libnetwork/etchosts" "github.com/containers/common/libnetwork/resolvconf" "github.com/containers/common/libnetwork/types" - "github.com/containers/common/pkg/apparmor" "github.com/containers/common/pkg/cgroups" "github.com/containers/common/pkg/chown" "github.com/containers/common/pkg/config" "github.com/containers/common/pkg/subscriptions" "github.com/containers/common/pkg/umask" - cutil "github.com/containers/common/pkg/util" is "github.com/containers/image/v5/storage" "github.com/containers/podman/v4/libpod/define" "github.com/containers/podman/v4/libpod/events" - "github.com/containers/podman/v4/pkg/annotations" "github.com/containers/podman/v4/pkg/checkpoint/crutils" "github.com/containers/podman/v4/pkg/criu" "github.com/containers/podman/v4/pkg/lookup" @@ -384,590 +378,6 @@ func lookupHostUser(name string) (*runcuser.ExecUser, error) { return &execUser, nil } -// Internal only function which returns upper and work dir from -// overlay options. -func getOverlayUpperAndWorkDir(options []string) (string, string, error) { - upperDir := "" - workDir := "" - for _, o := range options { - if strings.HasPrefix(o, "upperdir") { - splitOpt := strings.SplitN(o, "=", 2) - if len(splitOpt) > 1 { - upperDir = splitOpt[1] - if upperDir == "" { - return "", "", errors.New("cannot accept empty value for upperdir") - } - } - } - if strings.HasPrefix(o, "workdir") { - splitOpt := strings.SplitN(o, "=", 2) - if len(splitOpt) > 1 { - workDir = splitOpt[1] - if workDir == "" { - return "", "", errors.New("cannot accept empty value for workdir") - } - } - } - } - if (upperDir != "" && workDir == "") || (upperDir == "" && workDir != "") { - return "", "", errors.New("must specify both upperdir and workdir") - } - return upperDir, workDir, nil -} - -// Generate spec for a container -// Accepts a map of the container's dependencies -func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) { - overrides := c.getUserOverrides() - execUser, err := lookup.GetUserGroupInfo(c.state.Mountpoint, c.config.User, overrides) - if err != nil { - if cutil.StringInSlice(c.config.User, c.config.HostUsers) { - execUser, err = lookupHostUser(c.config.User) - } - if err != nil { - return nil, err - } - } - - // NewFromSpec() is deprecated according to its comment - // however the recommended replace just causes a nil map panic - //nolint:staticcheck - g := generate.NewFromSpec(c.config.Spec) - - // If the flag to mount all devices is set for a privileged container, add - // all the devices from the host's machine into the container - if c.config.MountAllDevices { - if err := util.AddPrivilegedDevices(&g); err != nil { - return nil, err - } - } - - // If network namespace was requested, add it now - if c.config.CreateNetNS { - if c.config.PostConfigureNetNS { - if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), ""); err != nil { - return nil, err - } - } else { - if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), c.state.NetNS.Path()); err != nil { - return nil, err - } - } - } - - // Apply AppArmor checks and load the default profile if needed. - if len(c.config.Spec.Process.ApparmorProfile) > 0 { - updatedProfile, err := apparmor.CheckProfileAndLoadDefault(c.config.Spec.Process.ApparmorProfile) - if err != nil { - return nil, err - } - g.SetProcessApparmorProfile(updatedProfile) - } - - if err := c.makeBindMounts(); err != nil { - return nil, err - } - - if err := c.mountNotifySocket(g); err != nil { - return nil, err - } - - // Get host UID and GID based on the container process UID and GID. - hostUID, hostGID, err := butil.GetHostIDs(util.IDtoolsToRuntimeSpec(c.config.IDMappings.UIDMap), util.IDtoolsToRuntimeSpec(c.config.IDMappings.GIDMap), uint32(execUser.Uid), uint32(execUser.Gid)) - if err != nil { - return nil, err - } - - // Add named volumes - for _, namedVol := range c.config.NamedVolumes { - volume, err := c.runtime.GetVolume(namedVol.Name) - if err != nil { - return nil, fmt.Errorf("error retrieving volume %s to add to container %s: %w", namedVol.Name, c.ID(), err) - } - mountPoint, err := volume.MountPoint() - if err != nil { - return nil, err - } - - overlayFlag := false - upperDir := "" - workDir := "" - for _, o := range namedVol.Options { - if o == "O" { - overlayFlag = true - upperDir, workDir, err = getOverlayUpperAndWorkDir(namedVol.Options) - if err != nil { - return nil, err - } - } - } - - if overlayFlag { - var overlayMount spec.Mount - var overlayOpts *overlay.Options - contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID()) - if err != nil { - return nil, err - } - - overlayOpts = &overlay.Options{RootUID: c.RootUID(), - RootGID: c.RootGID(), - UpperDirOptionFragment: upperDir, - WorkDirOptionFragment: workDir, - GraphOpts: c.runtime.store.GraphOptions(), - } - - overlayMount, err = overlay.MountWithOptions(contentDir, mountPoint, namedVol.Dest, overlayOpts) - if err != nil { - return nil, fmt.Errorf("mounting overlay failed %q: %w", mountPoint, err) - } - - for _, o := range namedVol.Options { - if o == "U" { - if err := c.ChangeHostPathOwnership(mountPoint, true, int(hostUID), int(hostGID)); err != nil { - return nil, err - } - - if err := c.ChangeHostPathOwnership(contentDir, true, int(hostUID), int(hostGID)); err != nil { - return nil, err - } - } - } - g.AddMount(overlayMount) - } else { - volMount := spec.Mount{ - Type: "bind", - Source: mountPoint, - Destination: namedVol.Dest, - Options: namedVol.Options, - } - g.AddMount(volMount) - } - } - - // Check if the spec file mounts contain the options z, Z or U. - // If they have z or Z, relabel the source directory and then remove the option. - // If they have U, chown the source directory and them remove the option. - for i := range g.Config.Mounts { - m := &g.Config.Mounts[i] - var options []string - for _, o := range m.Options { - switch o { - case "U": - if m.Type == "tmpfs" { - options = append(options, []string{fmt.Sprintf("uid=%d", execUser.Uid), fmt.Sprintf("gid=%d", execUser.Gid)}...) - } else { - // only chown on initial creation of container - if err := c.ChangeHostPathOwnership(m.Source, true, int(hostUID), int(hostGID)); err != nil { - return nil, err - } - } - case "z": - fallthrough - case "Z": - if err := c.relabel(m.Source, c.MountLabel(), label.IsShared(o)); err != nil { - return nil, err - } - - default: - options = append(options, o) - } - } - m.Options = options - } - - g.SetProcessSelinuxLabel(c.ProcessLabel()) - g.SetLinuxMountLabel(c.MountLabel()) - - // Add bind mounts to container - for dstPath, srcPath := range c.state.BindMounts { - newMount := spec.Mount{ - Type: "bind", - Source: srcPath, - Destination: dstPath, - Options: []string{"bind", "rprivate"}, - } - if c.IsReadOnly() && dstPath != "/dev/shm" { - newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev") - } - if dstPath == "/dev/shm" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir { - newMount.Options = append(newMount.Options, "nosuid", "noexec", "nodev") - } - if !MountExists(g.Mounts(), dstPath) { - g.AddMount(newMount) - } else { - logrus.Infof("User mount overriding libpod mount at %q", dstPath) - } - } - - // Add overlay volumes - for _, overlayVol := range c.config.OverlayVolumes { - upperDir, workDir, err := getOverlayUpperAndWorkDir(overlayVol.Options) - if err != nil { - return nil, err - } - contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID()) - if err != nil { - return nil, err - } - overlayOpts := &overlay.Options{RootUID: c.RootUID(), - RootGID: c.RootGID(), - UpperDirOptionFragment: upperDir, - WorkDirOptionFragment: workDir, - GraphOpts: c.runtime.store.GraphOptions(), - } - - overlayMount, err := overlay.MountWithOptions(contentDir, overlayVol.Source, overlayVol.Dest, overlayOpts) - if err != nil { - return nil, fmt.Errorf("mounting overlay failed %q: %w", overlayVol.Source, err) - } - - // Check overlay volume options - for _, o := range overlayVol.Options { - if o == "U" { - if err := c.ChangeHostPathOwnership(overlayVol.Source, true, int(hostUID), int(hostGID)); err != nil { - return nil, err - } - - if err := c.ChangeHostPathOwnership(contentDir, true, int(hostUID), int(hostGID)); err != nil { - return nil, err - } - } - } - - g.AddMount(overlayMount) - } - - // Add image volumes as overlay mounts - for _, volume := range c.config.ImageVolumes { - // Mount the specified image. - img, _, err := c.runtime.LibimageRuntime().LookupImage(volume.Source, nil) - if err != nil { - return nil, fmt.Errorf("error creating image volume %q:%q: %w", volume.Source, volume.Dest, err) - } - mountPoint, err := img.Mount(ctx, nil, "") - if err != nil { - return nil, fmt.Errorf("error mounting image volume %q:%q: %w", volume.Source, volume.Dest, err) - } - - contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID()) - if err != nil { - return nil, fmt.Errorf("failed to create TempDir in the %s directory: %w", c.config.StaticDir, err) - } - - var overlayMount spec.Mount - if volume.ReadWrite { - overlayMount, err = overlay.Mount(contentDir, mountPoint, volume.Dest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions()) - } else { - overlayMount, err = overlay.MountReadOnly(contentDir, mountPoint, volume.Dest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions()) - } - if err != nil { - return nil, fmt.Errorf("creating overlay mount for image %q failed: %w", volume.Source, err) - } - g.AddMount(overlayMount) - } - - hasHomeSet := false - for _, s := range c.config.Spec.Process.Env { - if strings.HasPrefix(s, "HOME=") { - hasHomeSet = true - break - } - } - if !hasHomeSet && execUser.Home != "" { - c.config.Spec.Process.Env = append(c.config.Spec.Process.Env, fmt.Sprintf("HOME=%s", execUser.Home)) - } - - if c.config.User != "" { - // User and Group must go together - g.SetProcessUID(uint32(execUser.Uid)) - g.SetProcessGID(uint32(execUser.Gid)) - g.AddProcessAdditionalGid(uint32(execUser.Gid)) - } - - if c.config.Umask != "" { - decVal, err := strconv.ParseUint(c.config.Umask, 8, 32) - if err != nil { - return nil, fmt.Errorf("invalid Umask Value: %w", err) - } - umask := uint32(decVal) - g.Config.Process.User.Umask = &umask - } - - // Add addition groups if c.config.GroupAdd is not empty - if len(c.config.Groups) > 0 { - gids, err := lookup.GetContainerGroups(c.config.Groups, c.state.Mountpoint, overrides) - if err != nil { - return nil, fmt.Errorf("error looking up supplemental groups for container %s: %w", c.ID(), err) - } - for _, gid := range gids { - g.AddProcessAdditionalGid(gid) - } - } - - if c.Systemd() { - if err := c.setupSystemd(g.Mounts(), g); err != nil { - return nil, fmt.Errorf("error adding systemd-specific mounts: %w", err) - } - } - - // Look up and add groups the user belongs to, if a group wasn't directly specified - if !strings.Contains(c.config.User, ":") { - // the gidMappings that are present inside the container user namespace - var gidMappings []idtools.IDMap - - switch { - case len(c.config.IDMappings.GIDMap) > 0: - gidMappings = c.config.IDMappings.GIDMap - case rootless.IsRootless(): - // Check whether the current user namespace has enough gids available. - availableGids, err := rootless.GetAvailableGids() - if err != nil { - return nil, fmt.Errorf("cannot read number of available GIDs: %w", err) - } - gidMappings = []idtools.IDMap{{ - ContainerID: 0, - HostID: 0, - Size: int(availableGids), - }} - default: - gidMappings = []idtools.IDMap{{ - ContainerID: 0, - HostID: 0, - Size: math.MaxInt32, - }} - } - for _, gid := range execUser.Sgids { - isGIDAvailable := false - for _, m := range gidMappings { - if gid >= m.ContainerID && gid < m.ContainerID+m.Size { - isGIDAvailable = true - break - } - } - if isGIDAvailable { - g.AddProcessAdditionalGid(uint32(gid)) - } else { - logrus.Warnf("Additional gid=%d is not present in the user namespace, skip setting it", gid) - } - } - } - - // Add shared namespaces from other containers - if c.config.IPCNsCtr != "" { - if err := c.addNamespaceContainer(&g, IPCNS, c.config.IPCNsCtr, spec.IPCNamespace); err != nil { - return nil, err - } - } - if c.config.MountNsCtr != "" { - if err := c.addNamespaceContainer(&g, MountNS, c.config.MountNsCtr, spec.MountNamespace); err != nil { - return nil, err - } - } - if c.config.NetNsCtr != "" { - if err := c.addNamespaceContainer(&g, NetNS, c.config.NetNsCtr, spec.NetworkNamespace); err != nil { - return nil, err - } - } - if c.config.PIDNsCtr != "" { - if err := c.addNamespaceContainer(&g, PIDNS, c.config.PIDNsCtr, spec.PIDNamespace); err != nil { - return nil, err - } - } - if c.config.UserNsCtr != "" { - if err := c.addNamespaceContainer(&g, UserNS, c.config.UserNsCtr, spec.UserNamespace); err != nil { - return nil, err - } - if len(g.Config.Linux.UIDMappings) == 0 { - // runc complains if no mapping is specified, even if we join another ns. So provide a dummy mapping - g.AddLinuxUIDMapping(uint32(0), uint32(0), uint32(1)) - g.AddLinuxGIDMapping(uint32(0), uint32(0), uint32(1)) - } - } - - availableUIDs, availableGIDs, err := rootless.GetAvailableIDMaps() - if err != nil { - if os.IsNotExist(err) { - // The kernel-provided files only exist if user namespaces are supported - logrus.Debugf("User or group ID mappings not available: %s", err) - } else { - return nil, err - } - } else { - g.Config.Linux.UIDMappings = rootless.MaybeSplitMappings(g.Config.Linux.UIDMappings, availableUIDs) - g.Config.Linux.GIDMappings = rootless.MaybeSplitMappings(g.Config.Linux.GIDMappings, availableGIDs) - } - - // Hostname handling: - // If we have a UTS namespace, set Hostname in the OCI spec. - // Set the HOSTNAME environment variable unless explicitly overridden by - // the user (already present in OCI spec). If we don't have a UTS ns, - // set it to the host's hostname instead. - hostname := c.Hostname() - foundUTS := false - - for _, i := range c.config.Spec.Linux.Namespaces { - if i.Type == spec.UTSNamespace && i.Path == "" { - foundUTS = true - g.SetHostname(hostname) - break - } - } - if !foundUTS { - tmpHostname, err := os.Hostname() - if err != nil { - return nil, err - } - hostname = tmpHostname - } - needEnv := true - for _, checkEnv := range g.Config.Process.Env { - if strings.SplitN(checkEnv, "=", 2)[0] == "HOSTNAME" { - needEnv = false - break - } - } - if needEnv { - g.AddProcessEnv("HOSTNAME", hostname) - } - - if c.config.UTSNsCtr != "" { - if err := c.addNamespaceContainer(&g, UTSNS, c.config.UTSNsCtr, spec.UTSNamespace); err != nil { - return nil, err - } - } - if c.config.CgroupNsCtr != "" { - if err := c.addNamespaceContainer(&g, CgroupNS, c.config.CgroupNsCtr, spec.CgroupNamespace); err != nil { - return nil, err - } - } - - if c.config.UserNsCtr == "" && c.config.IDMappings.AutoUserNs { - if err := g.AddOrReplaceLinuxNamespace(string(spec.UserNamespace), ""); err != nil { - return nil, err - } - g.ClearLinuxUIDMappings() - for _, uidmap := range c.config.IDMappings.UIDMap { - g.AddLinuxUIDMapping(uint32(uidmap.HostID), uint32(uidmap.ContainerID), uint32(uidmap.Size)) - } - g.ClearLinuxGIDMappings() - for _, gidmap := range c.config.IDMappings.GIDMap { - g.AddLinuxGIDMapping(uint32(gidmap.HostID), uint32(gidmap.ContainerID), uint32(gidmap.Size)) - } - } - - g.SetRootPath(c.state.Mountpoint) - g.AddAnnotation(annotations.Created, c.config.CreatedTime.Format(time.RFC3339Nano)) - g.AddAnnotation("org.opencontainers.image.stopSignal", fmt.Sprintf("%d", c.config.StopSignal)) - - if _, exists := g.Config.Annotations[annotations.ContainerManager]; !exists { - g.AddAnnotation(annotations.ContainerManager, annotations.ContainerManagerLibpod) - } - - cgroupPath, err := c.getOCICgroupPath() - if err != nil { - return nil, err - } - - g.SetLinuxCgroupsPath(cgroupPath) - - // Warning: CDI may alter g.Config in place. - if len(c.config.CDIDevices) > 0 { - registry := cdi.GetRegistry( - cdi.WithAutoRefresh(false), - ) - if err := registry.Refresh(); err != nil { - logrus.Debugf("The following error was triggered when refreshing the CDI registry: %v", err) - } - _, err := registry.InjectDevices(g.Config, c.config.CDIDevices...) - if err != nil { - return nil, fmt.Errorf("error setting up CDI devices: %w", err) - } - } - - // Mounts need to be sorted so paths will not cover other paths - mounts := sortMounts(g.Mounts()) - g.ClearMounts() - - // Determine property of RootPropagation based on volume properties. If - // a volume is shared, then keep root propagation shared. This should - // work for slave and private volumes too. - // - // For slave volumes, it can be either [r]shared/[r]slave. - // - // For private volumes any root propagation value should work. - rootPropagation := "" - for _, m := range mounts { - // We need to remove all symlinks from tmpfs mounts. - // Runc and other runtimes may choke on them. - // Easy solution: use securejoin to do a scoped evaluation of - // the links, then trim off the mount prefix. - if m.Type == "tmpfs" { - finalPath, err := securejoin.SecureJoin(c.state.Mountpoint, m.Destination) - if err != nil { - return nil, fmt.Errorf("error resolving symlinks for mount destination %s: %w", m.Destination, err) - } - trimmedPath := strings.TrimPrefix(finalPath, strings.TrimSuffix(c.state.Mountpoint, "/")) - m.Destination = trimmedPath - } - g.AddMount(m) - for _, opt := range m.Options { - switch opt { - case MountShared, MountRShared: - if rootPropagation != MountShared && rootPropagation != MountRShared { - rootPropagation = MountShared - } - case MountSlave, MountRSlave: - if rootPropagation != MountShared && rootPropagation != MountRShared && rootPropagation != MountSlave && rootPropagation != MountRSlave { - rootPropagation = MountRSlave - } - } - } - } - - if rootPropagation != "" { - logrus.Debugf("Set root propagation to %q", rootPropagation) - if err := g.SetLinuxRootPropagation(rootPropagation); err != nil { - return nil, err - } - } - - // Warning: precreate hooks may alter g.Config in place. - if c.state.ExtensionStageHooks, err = c.setupOCIHooks(ctx, g.Config); err != nil { - return nil, fmt.Errorf("error setting up OCI Hooks: %w", err) - } - if len(c.config.EnvSecrets) > 0 { - manager, err := c.runtime.SecretsManager() - if err != nil { - return nil, err - } - if err != nil { - return nil, err - } - for name, secr := range c.config.EnvSecrets { - _, data, err := manager.LookupSecretData(secr.Name) - if err != nil { - return nil, err - } - g.AddProcessEnv(name, string(data)) - } - } - - // Pass down the LISTEN_* environment (see #10443). - for _, key := range []string{"LISTEN_PID", "LISTEN_FDS", "LISTEN_FDNAMES"} { - if val, ok := os.LookupEnv(key); ok { - // Force the PID to `1` since we cannot rely on (all - // versions of) all runtimes to do it for us. - if key == "LISTEN_PID" { - val = "1" - } - g.AddProcessEnv(key, val) - } - } - - return g.Config, nil -} - // mountNotifySocket mounts the NOTIFY_SOCKET into the container if it's set // and if the sdnotify mode is set to container. It also sets c.notifySocket // to avoid redundantly looking up the env variable. @@ -2553,11 +1963,12 @@ func (c *Container) bindMountRootFile(source, dest string) error { // generateGroupEntry generates an entry or entries into /etc/group as // required by container configuration. // Generally speaking, we will make an entry under two circumstances: -// 1. The container is started as a specific user:group, and that group is both -// numeric, and does not already exist in /etc/group. -// 2. It is requested that Libpod add the group that launched Podman to -// /etc/group via AddCurrentUserPasswdEntry (though this does not trigger if -// the group in question already exists in /etc/passwd). +// 1. The container is started as a specific user:group, and that group is both +// numeric, and does not already exist in /etc/group. +// 2. It is requested that Libpod add the group that launched Podman to +// /etc/group via AddCurrentUserPasswdEntry (though this does not trigger if +// the group in question already exists in /etc/passwd). +// // Returns group entry (as a string that can be appended to /etc/group) and any // error that occurred. func (c *Container) generateGroupEntry() (string, error) { @@ -2660,13 +2071,14 @@ func (c *Container) generateUserGroupEntry(addedGID int) (string, error) { // generatePasswdEntry generates an entry or entries into /etc/passwd as // required by container configuration. // Generally speaking, we will make an entry under two circumstances: -// 1. The container is started as a specific user who is not in /etc/passwd. -// This only triggers if the user is given as a *numeric* ID. -// 2. It is requested that Libpod add the user that launched Podman to -// /etc/passwd via AddCurrentUserPasswdEntry (though this does not trigger if -// the user in question already exists in /etc/passwd) or the UID to be added -// is 0). -// 3. The user specified additional host user accounts to add the the /etc/passwd file +// 1. The container is started as a specific user who is not in /etc/passwd. +// This only triggers if the user is given as a *numeric* ID. +// 2. It is requested that Libpod add the user that launched Podman to +// /etc/passwd via AddCurrentUserPasswdEntry (though this does not trigger if +// the user in question already exists in /etc/passwd) or the UID to be added +// is 0). +// 3. The user specified additional host user accounts to add the the /etc/passwd file +// // Returns password entry (as a string that can be appended to /etc/passwd) and // any error that occurred. func (c *Container) generatePasswdEntry() (string, error) { |