diff options
author | Doug Rabson <dfr@rabson.org> | 2022-08-27 11:59:44 +0100 |
---|---|---|
committer | Doug Rabson <dfr@rabson.org> | 2022-09-05 10:17:49 +0100 |
commit | e101f4350bdbaad6a70669977a0a254b1bfcb1d3 (patch) | |
tree | 96981d14039eabe7846365ce0cb75222cbf7f46a /libpod/container_internal_common.go | |
parent | 6e4b5b00755c52f56962856b846725e4c45f1fb6 (diff) | |
download | podman-e101f4350bdbaad6a70669977a0a254b1bfcb1d3.tar.gz podman-e101f4350bdbaad6a70669977a0a254b1bfcb1d3.tar.bz2 podman-e101f4350bdbaad6a70669977a0a254b1bfcb1d3.zip |
libpod: Move getOverlayUpperAndWorkDir and generateSpec to container_internal_common.go
[NO NEW TESTS NEEDED]
Signed-off-by: Doug Rabson <dfr@rabson.org>
Diffstat (limited to 'libpod/container_internal_common.go')
-rw-r--r-- | libpod/container_internal_common.go | 615 |
1 files changed, 615 insertions, 0 deletions
diff --git a/libpod/container_internal_common.go b/libpod/container_internal_common.go new file mode 100644 index 000000000..50d7375cd --- /dev/null +++ b/libpod/container_internal_common.go @@ -0,0 +1,615 @@ +//go:build linux || freebsd +// +build linux freebsd + +package libpod + +import ( + "context" + "errors" + "fmt" + "math" + "os" + "strconv" + "strings" + "time" + + cdi "github.com/container-orchestrated-devices/container-device-interface/pkg/cdi" + "github.com/containers/buildah/pkg/overlay" + butil "github.com/containers/buildah/util" + "github.com/containers/common/pkg/apparmor" + cutil "github.com/containers/common/pkg/util" + "github.com/containers/podman/v4/pkg/annotations" + "github.com/containers/podman/v4/pkg/lookup" + "github.com/containers/podman/v4/pkg/rootless" + "github.com/containers/podman/v4/pkg/util" + "github.com/containers/storage/pkg/idtools" + securejoin "github.com/cyphar/filepath-securejoin" + spec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/runtime-tools/generate" + "github.com/opencontainers/selinux/go-selinux/label" + "github.com/sirupsen/logrus" +) + +// Internal only function which returns upper and work dir from +// overlay options. +func getOverlayUpperAndWorkDir(options []string) (string, string, error) { + upperDir := "" + workDir := "" + for _, o := range options { + if strings.HasPrefix(o, "upperdir") { + splitOpt := strings.SplitN(o, "=", 2) + if len(splitOpt) > 1 { + upperDir = splitOpt[1] + if upperDir == "" { + return "", "", errors.New("cannot accept empty value for upperdir") + } + } + } + if strings.HasPrefix(o, "workdir") { + splitOpt := strings.SplitN(o, "=", 2) + if len(splitOpt) > 1 { + workDir = splitOpt[1] + if workDir == "" { + return "", "", errors.New("cannot accept empty value for workdir") + } + } + } + } + if (upperDir != "" && workDir == "") || (upperDir == "" && workDir != "") { + return "", "", errors.New("must specify both upperdir and workdir") + } + return upperDir, workDir, nil +} + +// Generate spec for a container +// Accepts a map of the container's dependencies +func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) { + overrides := c.getUserOverrides() + execUser, err := lookup.GetUserGroupInfo(c.state.Mountpoint, c.config.User, overrides) + if err != nil { + if cutil.StringInSlice(c.config.User, c.config.HostUsers) { + execUser, err = lookupHostUser(c.config.User) + } + if err != nil { + return nil, err + } + } + + // NewFromSpec() is deprecated according to its comment + // however the recommended replace just causes a nil map panic + //nolint:staticcheck + g := generate.NewFromSpec(c.config.Spec) + + // If the flag to mount all devices is set for a privileged container, add + // all the devices from the host's machine into the container + if c.config.MountAllDevices { + if err := util.AddPrivilegedDevices(&g); err != nil { + return nil, err + } + } + + // If network namespace was requested, add it now + if c.config.CreateNetNS { + if c.config.PostConfigureNetNS { + if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), ""); err != nil { + return nil, err + } + } else { + if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), c.state.NetNS.Path()); err != nil { + return nil, err + } + } + } + + // Apply AppArmor checks and load the default profile if needed. + if len(c.config.Spec.Process.ApparmorProfile) > 0 { + updatedProfile, err := apparmor.CheckProfileAndLoadDefault(c.config.Spec.Process.ApparmorProfile) + if err != nil { + return nil, err + } + g.SetProcessApparmorProfile(updatedProfile) + } + + if err := c.makeBindMounts(); err != nil { + return nil, err + } + + if err := c.mountNotifySocket(g); err != nil { + return nil, err + } + + // Get host UID and GID based on the container process UID and GID. + hostUID, hostGID, err := butil.GetHostIDs(util.IDtoolsToRuntimeSpec(c.config.IDMappings.UIDMap), util.IDtoolsToRuntimeSpec(c.config.IDMappings.GIDMap), uint32(execUser.Uid), uint32(execUser.Gid)) + if err != nil { + return nil, err + } + + // Add named volumes + for _, namedVol := range c.config.NamedVolumes { + volume, err := c.runtime.GetVolume(namedVol.Name) + if err != nil { + return nil, fmt.Errorf("error retrieving volume %s to add to container %s: %w", namedVol.Name, c.ID(), err) + } + mountPoint, err := volume.MountPoint() + if err != nil { + return nil, err + } + + overlayFlag := false + upperDir := "" + workDir := "" + for _, o := range namedVol.Options { + if o == "O" { + overlayFlag = true + upperDir, workDir, err = getOverlayUpperAndWorkDir(namedVol.Options) + if err != nil { + return nil, err + } + } + } + + if overlayFlag { + var overlayMount spec.Mount + var overlayOpts *overlay.Options + contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID()) + if err != nil { + return nil, err + } + + overlayOpts = &overlay.Options{RootUID: c.RootUID(), + RootGID: c.RootGID(), + UpperDirOptionFragment: upperDir, + WorkDirOptionFragment: workDir, + GraphOpts: c.runtime.store.GraphOptions(), + } + + overlayMount, err = overlay.MountWithOptions(contentDir, mountPoint, namedVol.Dest, overlayOpts) + if err != nil { + return nil, fmt.Errorf("mounting overlay failed %q: %w", mountPoint, err) + } + + for _, o := range namedVol.Options { + if o == "U" { + if err := c.ChangeHostPathOwnership(mountPoint, true, int(hostUID), int(hostGID)); err != nil { + return nil, err + } + + if err := c.ChangeHostPathOwnership(contentDir, true, int(hostUID), int(hostGID)); err != nil { + return nil, err + } + } + } + g.AddMount(overlayMount) + } else { + volMount := spec.Mount{ + Type: "bind", + Source: mountPoint, + Destination: namedVol.Dest, + Options: namedVol.Options, + } + g.AddMount(volMount) + } + } + + // Check if the spec file mounts contain the options z, Z or U. + // If they have z or Z, relabel the source directory and then remove the option. + // If they have U, chown the source directory and them remove the option. + for i := range g.Config.Mounts { + m := &g.Config.Mounts[i] + var options []string + for _, o := range m.Options { + switch o { + case "U": + if m.Type == "tmpfs" { + options = append(options, []string{fmt.Sprintf("uid=%d", execUser.Uid), fmt.Sprintf("gid=%d", execUser.Gid)}...) + } else { + // only chown on initial creation of container + if err := c.ChangeHostPathOwnership(m.Source, true, int(hostUID), int(hostGID)); err != nil { + return nil, err + } + } + case "z": + fallthrough + case "Z": + if err := c.relabel(m.Source, c.MountLabel(), label.IsShared(o)); err != nil { + return nil, err + } + + default: + options = append(options, o) + } + } + m.Options = options + } + + g.SetProcessSelinuxLabel(c.ProcessLabel()) + g.SetLinuxMountLabel(c.MountLabel()) + + // Add bind mounts to container + for dstPath, srcPath := range c.state.BindMounts { + newMount := spec.Mount{ + Type: "bind", + Source: srcPath, + Destination: dstPath, + Options: []string{"bind", "rprivate"}, + } + if c.IsReadOnly() && dstPath != "/dev/shm" { + newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev") + } + if dstPath == "/dev/shm" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir { + newMount.Options = append(newMount.Options, "nosuid", "noexec", "nodev") + } + if !MountExists(g.Mounts(), dstPath) { + g.AddMount(newMount) + } else { + logrus.Infof("User mount overriding libpod mount at %q", dstPath) + } + } + + // Add overlay volumes + for _, overlayVol := range c.config.OverlayVolumes { + upperDir, workDir, err := getOverlayUpperAndWorkDir(overlayVol.Options) + if err != nil { + return nil, err + } + contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID()) + if err != nil { + return nil, err + } + overlayOpts := &overlay.Options{RootUID: c.RootUID(), + RootGID: c.RootGID(), + UpperDirOptionFragment: upperDir, + WorkDirOptionFragment: workDir, + GraphOpts: c.runtime.store.GraphOptions(), + } + + overlayMount, err := overlay.MountWithOptions(contentDir, overlayVol.Source, overlayVol.Dest, overlayOpts) + if err != nil { + return nil, fmt.Errorf("mounting overlay failed %q: %w", overlayVol.Source, err) + } + + // Check overlay volume options + for _, o := range overlayVol.Options { + if o == "U" { + if err := c.ChangeHostPathOwnership(overlayVol.Source, true, int(hostUID), int(hostGID)); err != nil { + return nil, err + } + + if err := c.ChangeHostPathOwnership(contentDir, true, int(hostUID), int(hostGID)); err != nil { + return nil, err + } + } + } + + g.AddMount(overlayMount) + } + + // Add image volumes as overlay mounts + for _, volume := range c.config.ImageVolumes { + // Mount the specified image. + img, _, err := c.runtime.LibimageRuntime().LookupImage(volume.Source, nil) + if err != nil { + return nil, fmt.Errorf("error creating image volume %q:%q: %w", volume.Source, volume.Dest, err) + } + mountPoint, err := img.Mount(ctx, nil, "") + if err != nil { + return nil, fmt.Errorf("error mounting image volume %q:%q: %w", volume.Source, volume.Dest, err) + } + + contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID()) + if err != nil { + return nil, fmt.Errorf("failed to create TempDir in the %s directory: %w", c.config.StaticDir, err) + } + + var overlayMount spec.Mount + if volume.ReadWrite { + overlayMount, err = overlay.Mount(contentDir, mountPoint, volume.Dest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions()) + } else { + overlayMount, err = overlay.MountReadOnly(contentDir, mountPoint, volume.Dest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions()) + } + if err != nil { + return nil, fmt.Errorf("creating overlay mount for image %q failed: %w", volume.Source, err) + } + g.AddMount(overlayMount) + } + + hasHomeSet := false + for _, s := range c.config.Spec.Process.Env { + if strings.HasPrefix(s, "HOME=") { + hasHomeSet = true + break + } + } + if !hasHomeSet && execUser.Home != "" { + c.config.Spec.Process.Env = append(c.config.Spec.Process.Env, fmt.Sprintf("HOME=%s", execUser.Home)) + } + + if c.config.User != "" { + // User and Group must go together + g.SetProcessUID(uint32(execUser.Uid)) + g.SetProcessGID(uint32(execUser.Gid)) + g.AddProcessAdditionalGid(uint32(execUser.Gid)) + } + + if c.config.Umask != "" { + decVal, err := strconv.ParseUint(c.config.Umask, 8, 32) + if err != nil { + return nil, fmt.Errorf("invalid Umask Value: %w", err) + } + umask := uint32(decVal) + g.Config.Process.User.Umask = &umask + } + + // Add addition groups if c.config.GroupAdd is not empty + if len(c.config.Groups) > 0 { + gids, err := lookup.GetContainerGroups(c.config.Groups, c.state.Mountpoint, overrides) + if err != nil { + return nil, fmt.Errorf("error looking up supplemental groups for container %s: %w", c.ID(), err) + } + for _, gid := range gids { + g.AddProcessAdditionalGid(gid) + } + } + + if c.Systemd() { + if err := c.setupSystemd(g.Mounts(), g); err != nil { + return nil, fmt.Errorf("error adding systemd-specific mounts: %w", err) + } + } + + // Look up and add groups the user belongs to, if a group wasn't directly specified + if !strings.Contains(c.config.User, ":") { + // the gidMappings that are present inside the container user namespace + var gidMappings []idtools.IDMap + + switch { + case len(c.config.IDMappings.GIDMap) > 0: + gidMappings = c.config.IDMappings.GIDMap + case rootless.IsRootless(): + // Check whether the current user namespace has enough gids available. + availableGids, err := rootless.GetAvailableGids() + if err != nil { + return nil, fmt.Errorf("cannot read number of available GIDs: %w", err) + } + gidMappings = []idtools.IDMap{{ + ContainerID: 0, + HostID: 0, + Size: int(availableGids), + }} + default: + gidMappings = []idtools.IDMap{{ + ContainerID: 0, + HostID: 0, + Size: math.MaxInt32, + }} + } + for _, gid := range execUser.Sgids { + isGIDAvailable := false + for _, m := range gidMappings { + if gid >= m.ContainerID && gid < m.ContainerID+m.Size { + isGIDAvailable = true + break + } + } + if isGIDAvailable { + g.AddProcessAdditionalGid(uint32(gid)) + } else { + logrus.Warnf("Additional gid=%d is not present in the user namespace, skip setting it", gid) + } + } + } + + // Add shared namespaces from other containers + if c.config.IPCNsCtr != "" { + if err := c.addNamespaceContainer(&g, IPCNS, c.config.IPCNsCtr, spec.IPCNamespace); err != nil { + return nil, err + } + } + if c.config.MountNsCtr != "" { + if err := c.addNamespaceContainer(&g, MountNS, c.config.MountNsCtr, spec.MountNamespace); err != nil { + return nil, err + } + } + if c.config.NetNsCtr != "" { + if err := c.addNamespaceContainer(&g, NetNS, c.config.NetNsCtr, spec.NetworkNamespace); err != nil { + return nil, err + } + } + if c.config.PIDNsCtr != "" { + if err := c.addNamespaceContainer(&g, PIDNS, c.config.PIDNsCtr, spec.PIDNamespace); err != nil { + return nil, err + } + } + if c.config.UserNsCtr != "" { + if err := c.addNamespaceContainer(&g, UserNS, c.config.UserNsCtr, spec.UserNamespace); err != nil { + return nil, err + } + if len(g.Config.Linux.UIDMappings) == 0 { + // runc complains if no mapping is specified, even if we join another ns. So provide a dummy mapping + g.AddLinuxUIDMapping(uint32(0), uint32(0), uint32(1)) + g.AddLinuxGIDMapping(uint32(0), uint32(0), uint32(1)) + } + } + + availableUIDs, availableGIDs, err := rootless.GetAvailableIDMaps() + if err != nil { + if os.IsNotExist(err) { + // The kernel-provided files only exist if user namespaces are supported + logrus.Debugf("User or group ID mappings not available: %s", err) + } else { + return nil, err + } + } else { + g.Config.Linux.UIDMappings = rootless.MaybeSplitMappings(g.Config.Linux.UIDMappings, availableUIDs) + g.Config.Linux.GIDMappings = rootless.MaybeSplitMappings(g.Config.Linux.GIDMappings, availableGIDs) + } + + // Hostname handling: + // If we have a UTS namespace, set Hostname in the OCI spec. + // Set the HOSTNAME environment variable unless explicitly overridden by + // the user (already present in OCI spec). If we don't have a UTS ns, + // set it to the host's hostname instead. + hostname := c.Hostname() + foundUTS := false + + for _, i := range c.config.Spec.Linux.Namespaces { + if i.Type == spec.UTSNamespace && i.Path == "" { + foundUTS = true + g.SetHostname(hostname) + break + } + } + if !foundUTS { + tmpHostname, err := os.Hostname() + if err != nil { + return nil, err + } + hostname = tmpHostname + } + needEnv := true + for _, checkEnv := range g.Config.Process.Env { + if strings.SplitN(checkEnv, "=", 2)[0] == "HOSTNAME" { + needEnv = false + break + } + } + if needEnv { + g.AddProcessEnv("HOSTNAME", hostname) + } + + if c.config.UTSNsCtr != "" { + if err := c.addNamespaceContainer(&g, UTSNS, c.config.UTSNsCtr, spec.UTSNamespace); err != nil { + return nil, err + } + } + if c.config.CgroupNsCtr != "" { + if err := c.addNamespaceContainer(&g, CgroupNS, c.config.CgroupNsCtr, spec.CgroupNamespace); err != nil { + return nil, err + } + } + + if c.config.UserNsCtr == "" && c.config.IDMappings.AutoUserNs { + if err := g.AddOrReplaceLinuxNamespace(string(spec.UserNamespace), ""); err != nil { + return nil, err + } + g.ClearLinuxUIDMappings() + for _, uidmap := range c.config.IDMappings.UIDMap { + g.AddLinuxUIDMapping(uint32(uidmap.HostID), uint32(uidmap.ContainerID), uint32(uidmap.Size)) + } + g.ClearLinuxGIDMappings() + for _, gidmap := range c.config.IDMappings.GIDMap { + g.AddLinuxGIDMapping(uint32(gidmap.HostID), uint32(gidmap.ContainerID), uint32(gidmap.Size)) + } + } + + g.SetRootPath(c.state.Mountpoint) + g.AddAnnotation(annotations.Created, c.config.CreatedTime.Format(time.RFC3339Nano)) + g.AddAnnotation("org.opencontainers.image.stopSignal", fmt.Sprintf("%d", c.config.StopSignal)) + + if _, exists := g.Config.Annotations[annotations.ContainerManager]; !exists { + g.AddAnnotation(annotations.ContainerManager, annotations.ContainerManagerLibpod) + } + + cgroupPath, err := c.getOCICgroupPath() + if err != nil { + return nil, err + } + + g.SetLinuxCgroupsPath(cgroupPath) + + // Warning: CDI may alter g.Config in place. + if len(c.config.CDIDevices) > 0 { + registry := cdi.GetRegistry( + cdi.WithAutoRefresh(false), + ) + if err := registry.Refresh(); err != nil { + logrus.Debugf("The following error was triggered when refreshing the CDI registry: %v", err) + } + _, err := registry.InjectDevices(g.Config, c.config.CDIDevices...) + if err != nil { + return nil, fmt.Errorf("error setting up CDI devices: %w", err) + } + } + + // Mounts need to be sorted so paths will not cover other paths + mounts := sortMounts(g.Mounts()) + g.ClearMounts() + + // Determine property of RootPropagation based on volume properties. If + // a volume is shared, then keep root propagation shared. This should + // work for slave and private volumes too. + // + // For slave volumes, it can be either [r]shared/[r]slave. + // + // For private volumes any root propagation value should work. + rootPropagation := "" + for _, m := range mounts { + // We need to remove all symlinks from tmpfs mounts. + // Runc and other runtimes may choke on them. + // Easy solution: use securejoin to do a scoped evaluation of + // the links, then trim off the mount prefix. + if m.Type == "tmpfs" { + finalPath, err := securejoin.SecureJoin(c.state.Mountpoint, m.Destination) + if err != nil { + return nil, fmt.Errorf("error resolving symlinks for mount destination %s: %w", m.Destination, err) + } + trimmedPath := strings.TrimPrefix(finalPath, strings.TrimSuffix(c.state.Mountpoint, "/")) + m.Destination = trimmedPath + } + g.AddMount(m) + for _, opt := range m.Options { + switch opt { + case MountShared, MountRShared: + if rootPropagation != MountShared && rootPropagation != MountRShared { + rootPropagation = MountShared + } + case MountSlave, MountRSlave: + if rootPropagation != MountShared && rootPropagation != MountRShared && rootPropagation != MountSlave && rootPropagation != MountRSlave { + rootPropagation = MountRSlave + } + } + } + } + + if rootPropagation != "" { + logrus.Debugf("Set root propagation to %q", rootPropagation) + if err := g.SetLinuxRootPropagation(rootPropagation); err != nil { + return nil, err + } + } + + // Warning: precreate hooks may alter g.Config in place. + if c.state.ExtensionStageHooks, err = c.setupOCIHooks(ctx, g.Config); err != nil { + return nil, fmt.Errorf("error setting up OCI Hooks: %w", err) + } + if len(c.config.EnvSecrets) > 0 { + manager, err := c.runtime.SecretsManager() + if err != nil { + return nil, err + } + if err != nil { + return nil, err + } + for name, secr := range c.config.EnvSecrets { + _, data, err := manager.LookupSecretData(secr.Name) + if err != nil { + return nil, err + } + g.AddProcessEnv(name, string(data)) + } + } + + // Pass down the LISTEN_* environment (see #10443). + for _, key := range []string{"LISTEN_PID", "LISTEN_FDS", "LISTEN_FDNAMES"} { + if val, ok := os.LookupEnv(key); ok { + // Force the PID to `1` since we cannot rely on (all + // versions of) all runtimes to do it for us. + if key == "LISTEN_PID" { + val = "1" + } + g.AddProcessEnv(key, val) + } + } + + return g.Config, nil +} |