summaryrefslogtreecommitdiff
path: root/libpod/container_internal_common.go
diff options
context:
space:
mode:
Diffstat (limited to 'libpod/container_internal_common.go')
-rw-r--r--libpod/container_internal_common.go615
1 files changed, 615 insertions, 0 deletions
diff --git a/libpod/container_internal_common.go b/libpod/container_internal_common.go
new file mode 100644
index 000000000..50d7375cd
--- /dev/null
+++ b/libpod/container_internal_common.go
@@ -0,0 +1,615 @@
+//go:build linux || freebsd
+// +build linux freebsd
+
+package libpod
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "math"
+ "os"
+ "strconv"
+ "strings"
+ "time"
+
+ cdi "github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
+ "github.com/containers/buildah/pkg/overlay"
+ butil "github.com/containers/buildah/util"
+ "github.com/containers/common/pkg/apparmor"
+ cutil "github.com/containers/common/pkg/util"
+ "github.com/containers/podman/v4/pkg/annotations"
+ "github.com/containers/podman/v4/pkg/lookup"
+ "github.com/containers/podman/v4/pkg/rootless"
+ "github.com/containers/podman/v4/pkg/util"
+ "github.com/containers/storage/pkg/idtools"
+ securejoin "github.com/cyphar/filepath-securejoin"
+ spec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/opencontainers/runtime-tools/generate"
+ "github.com/opencontainers/selinux/go-selinux/label"
+ "github.com/sirupsen/logrus"
+)
+
+// Internal only function which returns upper and work dir from
+// overlay options.
+func getOverlayUpperAndWorkDir(options []string) (string, string, error) {
+ upperDir := ""
+ workDir := ""
+ for _, o := range options {
+ if strings.HasPrefix(o, "upperdir") {
+ splitOpt := strings.SplitN(o, "=", 2)
+ if len(splitOpt) > 1 {
+ upperDir = splitOpt[1]
+ if upperDir == "" {
+ return "", "", errors.New("cannot accept empty value for upperdir")
+ }
+ }
+ }
+ if strings.HasPrefix(o, "workdir") {
+ splitOpt := strings.SplitN(o, "=", 2)
+ if len(splitOpt) > 1 {
+ workDir = splitOpt[1]
+ if workDir == "" {
+ return "", "", errors.New("cannot accept empty value for workdir")
+ }
+ }
+ }
+ }
+ if (upperDir != "" && workDir == "") || (upperDir == "" && workDir != "") {
+ return "", "", errors.New("must specify both upperdir and workdir")
+ }
+ return upperDir, workDir, nil
+}
+
+// Generate spec for a container
+// Accepts a map of the container's dependencies
+func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
+ overrides := c.getUserOverrides()
+ execUser, err := lookup.GetUserGroupInfo(c.state.Mountpoint, c.config.User, overrides)
+ if err != nil {
+ if cutil.StringInSlice(c.config.User, c.config.HostUsers) {
+ execUser, err = lookupHostUser(c.config.User)
+ }
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ // NewFromSpec() is deprecated according to its comment
+ // however the recommended replace just causes a nil map panic
+ //nolint:staticcheck
+ g := generate.NewFromSpec(c.config.Spec)
+
+ // If the flag to mount all devices is set for a privileged container, add
+ // all the devices from the host's machine into the container
+ if c.config.MountAllDevices {
+ if err := util.AddPrivilegedDevices(&g); err != nil {
+ return nil, err
+ }
+ }
+
+ // If network namespace was requested, add it now
+ if c.config.CreateNetNS {
+ if c.config.PostConfigureNetNS {
+ if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), ""); err != nil {
+ return nil, err
+ }
+ } else {
+ if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), c.state.NetNS.Path()); err != nil {
+ return nil, err
+ }
+ }
+ }
+
+ // Apply AppArmor checks and load the default profile if needed.
+ if len(c.config.Spec.Process.ApparmorProfile) > 0 {
+ updatedProfile, err := apparmor.CheckProfileAndLoadDefault(c.config.Spec.Process.ApparmorProfile)
+ if err != nil {
+ return nil, err
+ }
+ g.SetProcessApparmorProfile(updatedProfile)
+ }
+
+ if err := c.makeBindMounts(); err != nil {
+ return nil, err
+ }
+
+ if err := c.mountNotifySocket(g); err != nil {
+ return nil, err
+ }
+
+ // Get host UID and GID based on the container process UID and GID.
+ hostUID, hostGID, err := butil.GetHostIDs(util.IDtoolsToRuntimeSpec(c.config.IDMappings.UIDMap), util.IDtoolsToRuntimeSpec(c.config.IDMappings.GIDMap), uint32(execUser.Uid), uint32(execUser.Gid))
+ if err != nil {
+ return nil, err
+ }
+
+ // Add named volumes
+ for _, namedVol := range c.config.NamedVolumes {
+ volume, err := c.runtime.GetVolume(namedVol.Name)
+ if err != nil {
+ return nil, fmt.Errorf("error retrieving volume %s to add to container %s: %w", namedVol.Name, c.ID(), err)
+ }
+ mountPoint, err := volume.MountPoint()
+ if err != nil {
+ return nil, err
+ }
+
+ overlayFlag := false
+ upperDir := ""
+ workDir := ""
+ for _, o := range namedVol.Options {
+ if o == "O" {
+ overlayFlag = true
+ upperDir, workDir, err = getOverlayUpperAndWorkDir(namedVol.Options)
+ if err != nil {
+ return nil, err
+ }
+ }
+ }
+
+ if overlayFlag {
+ var overlayMount spec.Mount
+ var overlayOpts *overlay.Options
+ contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID())
+ if err != nil {
+ return nil, err
+ }
+
+ overlayOpts = &overlay.Options{RootUID: c.RootUID(),
+ RootGID: c.RootGID(),
+ UpperDirOptionFragment: upperDir,
+ WorkDirOptionFragment: workDir,
+ GraphOpts: c.runtime.store.GraphOptions(),
+ }
+
+ overlayMount, err = overlay.MountWithOptions(contentDir, mountPoint, namedVol.Dest, overlayOpts)
+ if err != nil {
+ return nil, fmt.Errorf("mounting overlay failed %q: %w", mountPoint, err)
+ }
+
+ for _, o := range namedVol.Options {
+ if o == "U" {
+ if err := c.ChangeHostPathOwnership(mountPoint, true, int(hostUID), int(hostGID)); err != nil {
+ return nil, err
+ }
+
+ if err := c.ChangeHostPathOwnership(contentDir, true, int(hostUID), int(hostGID)); err != nil {
+ return nil, err
+ }
+ }
+ }
+ g.AddMount(overlayMount)
+ } else {
+ volMount := spec.Mount{
+ Type: "bind",
+ Source: mountPoint,
+ Destination: namedVol.Dest,
+ Options: namedVol.Options,
+ }
+ g.AddMount(volMount)
+ }
+ }
+
+ // Check if the spec file mounts contain the options z, Z or U.
+ // If they have z or Z, relabel the source directory and then remove the option.
+ // If they have U, chown the source directory and them remove the option.
+ for i := range g.Config.Mounts {
+ m := &g.Config.Mounts[i]
+ var options []string
+ for _, o := range m.Options {
+ switch o {
+ case "U":
+ if m.Type == "tmpfs" {
+ options = append(options, []string{fmt.Sprintf("uid=%d", execUser.Uid), fmt.Sprintf("gid=%d", execUser.Gid)}...)
+ } else {
+ // only chown on initial creation of container
+ if err := c.ChangeHostPathOwnership(m.Source, true, int(hostUID), int(hostGID)); err != nil {
+ return nil, err
+ }
+ }
+ case "z":
+ fallthrough
+ case "Z":
+ if err := c.relabel(m.Source, c.MountLabel(), label.IsShared(o)); err != nil {
+ return nil, err
+ }
+
+ default:
+ options = append(options, o)
+ }
+ }
+ m.Options = options
+ }
+
+ g.SetProcessSelinuxLabel(c.ProcessLabel())
+ g.SetLinuxMountLabel(c.MountLabel())
+
+ // Add bind mounts to container
+ for dstPath, srcPath := range c.state.BindMounts {
+ newMount := spec.Mount{
+ Type: "bind",
+ Source: srcPath,
+ Destination: dstPath,
+ Options: []string{"bind", "rprivate"},
+ }
+ if c.IsReadOnly() && dstPath != "/dev/shm" {
+ newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev")
+ }
+ if dstPath == "/dev/shm" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
+ newMount.Options = append(newMount.Options, "nosuid", "noexec", "nodev")
+ }
+ if !MountExists(g.Mounts(), dstPath) {
+ g.AddMount(newMount)
+ } else {
+ logrus.Infof("User mount overriding libpod mount at %q", dstPath)
+ }
+ }
+
+ // Add overlay volumes
+ for _, overlayVol := range c.config.OverlayVolumes {
+ upperDir, workDir, err := getOverlayUpperAndWorkDir(overlayVol.Options)
+ if err != nil {
+ return nil, err
+ }
+ contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID())
+ if err != nil {
+ return nil, err
+ }
+ overlayOpts := &overlay.Options{RootUID: c.RootUID(),
+ RootGID: c.RootGID(),
+ UpperDirOptionFragment: upperDir,
+ WorkDirOptionFragment: workDir,
+ GraphOpts: c.runtime.store.GraphOptions(),
+ }
+
+ overlayMount, err := overlay.MountWithOptions(contentDir, overlayVol.Source, overlayVol.Dest, overlayOpts)
+ if err != nil {
+ return nil, fmt.Errorf("mounting overlay failed %q: %w", overlayVol.Source, err)
+ }
+
+ // Check overlay volume options
+ for _, o := range overlayVol.Options {
+ if o == "U" {
+ if err := c.ChangeHostPathOwnership(overlayVol.Source, true, int(hostUID), int(hostGID)); err != nil {
+ return nil, err
+ }
+
+ if err := c.ChangeHostPathOwnership(contentDir, true, int(hostUID), int(hostGID)); err != nil {
+ return nil, err
+ }
+ }
+ }
+
+ g.AddMount(overlayMount)
+ }
+
+ // Add image volumes as overlay mounts
+ for _, volume := range c.config.ImageVolumes {
+ // Mount the specified image.
+ img, _, err := c.runtime.LibimageRuntime().LookupImage(volume.Source, nil)
+ if err != nil {
+ return nil, fmt.Errorf("error creating image volume %q:%q: %w", volume.Source, volume.Dest, err)
+ }
+ mountPoint, err := img.Mount(ctx, nil, "")
+ if err != nil {
+ return nil, fmt.Errorf("error mounting image volume %q:%q: %w", volume.Source, volume.Dest, err)
+ }
+
+ contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID())
+ if err != nil {
+ return nil, fmt.Errorf("failed to create TempDir in the %s directory: %w", c.config.StaticDir, err)
+ }
+
+ var overlayMount spec.Mount
+ if volume.ReadWrite {
+ overlayMount, err = overlay.Mount(contentDir, mountPoint, volume.Dest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions())
+ } else {
+ overlayMount, err = overlay.MountReadOnly(contentDir, mountPoint, volume.Dest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions())
+ }
+ if err != nil {
+ return nil, fmt.Errorf("creating overlay mount for image %q failed: %w", volume.Source, err)
+ }
+ g.AddMount(overlayMount)
+ }
+
+ hasHomeSet := false
+ for _, s := range c.config.Spec.Process.Env {
+ if strings.HasPrefix(s, "HOME=") {
+ hasHomeSet = true
+ break
+ }
+ }
+ if !hasHomeSet && execUser.Home != "" {
+ c.config.Spec.Process.Env = append(c.config.Spec.Process.Env, fmt.Sprintf("HOME=%s", execUser.Home))
+ }
+
+ if c.config.User != "" {
+ // User and Group must go together
+ g.SetProcessUID(uint32(execUser.Uid))
+ g.SetProcessGID(uint32(execUser.Gid))
+ g.AddProcessAdditionalGid(uint32(execUser.Gid))
+ }
+
+ if c.config.Umask != "" {
+ decVal, err := strconv.ParseUint(c.config.Umask, 8, 32)
+ if err != nil {
+ return nil, fmt.Errorf("invalid Umask Value: %w", err)
+ }
+ umask := uint32(decVal)
+ g.Config.Process.User.Umask = &umask
+ }
+
+ // Add addition groups if c.config.GroupAdd is not empty
+ if len(c.config.Groups) > 0 {
+ gids, err := lookup.GetContainerGroups(c.config.Groups, c.state.Mountpoint, overrides)
+ if err != nil {
+ return nil, fmt.Errorf("error looking up supplemental groups for container %s: %w", c.ID(), err)
+ }
+ for _, gid := range gids {
+ g.AddProcessAdditionalGid(gid)
+ }
+ }
+
+ if c.Systemd() {
+ if err := c.setupSystemd(g.Mounts(), g); err != nil {
+ return nil, fmt.Errorf("error adding systemd-specific mounts: %w", err)
+ }
+ }
+
+ // Look up and add groups the user belongs to, if a group wasn't directly specified
+ if !strings.Contains(c.config.User, ":") {
+ // the gidMappings that are present inside the container user namespace
+ var gidMappings []idtools.IDMap
+
+ switch {
+ case len(c.config.IDMappings.GIDMap) > 0:
+ gidMappings = c.config.IDMappings.GIDMap
+ case rootless.IsRootless():
+ // Check whether the current user namespace has enough gids available.
+ availableGids, err := rootless.GetAvailableGids()
+ if err != nil {
+ return nil, fmt.Errorf("cannot read number of available GIDs: %w", err)
+ }
+ gidMappings = []idtools.IDMap{{
+ ContainerID: 0,
+ HostID: 0,
+ Size: int(availableGids),
+ }}
+ default:
+ gidMappings = []idtools.IDMap{{
+ ContainerID: 0,
+ HostID: 0,
+ Size: math.MaxInt32,
+ }}
+ }
+ for _, gid := range execUser.Sgids {
+ isGIDAvailable := false
+ for _, m := range gidMappings {
+ if gid >= m.ContainerID && gid < m.ContainerID+m.Size {
+ isGIDAvailable = true
+ break
+ }
+ }
+ if isGIDAvailable {
+ g.AddProcessAdditionalGid(uint32(gid))
+ } else {
+ logrus.Warnf("Additional gid=%d is not present in the user namespace, skip setting it", gid)
+ }
+ }
+ }
+
+ // Add shared namespaces from other containers
+ if c.config.IPCNsCtr != "" {
+ if err := c.addNamespaceContainer(&g, IPCNS, c.config.IPCNsCtr, spec.IPCNamespace); err != nil {
+ return nil, err
+ }
+ }
+ if c.config.MountNsCtr != "" {
+ if err := c.addNamespaceContainer(&g, MountNS, c.config.MountNsCtr, spec.MountNamespace); err != nil {
+ return nil, err
+ }
+ }
+ if c.config.NetNsCtr != "" {
+ if err := c.addNamespaceContainer(&g, NetNS, c.config.NetNsCtr, spec.NetworkNamespace); err != nil {
+ return nil, err
+ }
+ }
+ if c.config.PIDNsCtr != "" {
+ if err := c.addNamespaceContainer(&g, PIDNS, c.config.PIDNsCtr, spec.PIDNamespace); err != nil {
+ return nil, err
+ }
+ }
+ if c.config.UserNsCtr != "" {
+ if err := c.addNamespaceContainer(&g, UserNS, c.config.UserNsCtr, spec.UserNamespace); err != nil {
+ return nil, err
+ }
+ if len(g.Config.Linux.UIDMappings) == 0 {
+ // runc complains if no mapping is specified, even if we join another ns. So provide a dummy mapping
+ g.AddLinuxUIDMapping(uint32(0), uint32(0), uint32(1))
+ g.AddLinuxGIDMapping(uint32(0), uint32(0), uint32(1))
+ }
+ }
+
+ availableUIDs, availableGIDs, err := rootless.GetAvailableIDMaps()
+ if err != nil {
+ if os.IsNotExist(err) {
+ // The kernel-provided files only exist if user namespaces are supported
+ logrus.Debugf("User or group ID mappings not available: %s", err)
+ } else {
+ return nil, err
+ }
+ } else {
+ g.Config.Linux.UIDMappings = rootless.MaybeSplitMappings(g.Config.Linux.UIDMappings, availableUIDs)
+ g.Config.Linux.GIDMappings = rootless.MaybeSplitMappings(g.Config.Linux.GIDMappings, availableGIDs)
+ }
+
+ // Hostname handling:
+ // If we have a UTS namespace, set Hostname in the OCI spec.
+ // Set the HOSTNAME environment variable unless explicitly overridden by
+ // the user (already present in OCI spec). If we don't have a UTS ns,
+ // set it to the host's hostname instead.
+ hostname := c.Hostname()
+ foundUTS := false
+
+ for _, i := range c.config.Spec.Linux.Namespaces {
+ if i.Type == spec.UTSNamespace && i.Path == "" {
+ foundUTS = true
+ g.SetHostname(hostname)
+ break
+ }
+ }
+ if !foundUTS {
+ tmpHostname, err := os.Hostname()
+ if err != nil {
+ return nil, err
+ }
+ hostname = tmpHostname
+ }
+ needEnv := true
+ for _, checkEnv := range g.Config.Process.Env {
+ if strings.SplitN(checkEnv, "=", 2)[0] == "HOSTNAME" {
+ needEnv = false
+ break
+ }
+ }
+ if needEnv {
+ g.AddProcessEnv("HOSTNAME", hostname)
+ }
+
+ if c.config.UTSNsCtr != "" {
+ if err := c.addNamespaceContainer(&g, UTSNS, c.config.UTSNsCtr, spec.UTSNamespace); err != nil {
+ return nil, err
+ }
+ }
+ if c.config.CgroupNsCtr != "" {
+ if err := c.addNamespaceContainer(&g, CgroupNS, c.config.CgroupNsCtr, spec.CgroupNamespace); err != nil {
+ return nil, err
+ }
+ }
+
+ if c.config.UserNsCtr == "" && c.config.IDMappings.AutoUserNs {
+ if err := g.AddOrReplaceLinuxNamespace(string(spec.UserNamespace), ""); err != nil {
+ return nil, err
+ }
+ g.ClearLinuxUIDMappings()
+ for _, uidmap := range c.config.IDMappings.UIDMap {
+ g.AddLinuxUIDMapping(uint32(uidmap.HostID), uint32(uidmap.ContainerID), uint32(uidmap.Size))
+ }
+ g.ClearLinuxGIDMappings()
+ for _, gidmap := range c.config.IDMappings.GIDMap {
+ g.AddLinuxGIDMapping(uint32(gidmap.HostID), uint32(gidmap.ContainerID), uint32(gidmap.Size))
+ }
+ }
+
+ g.SetRootPath(c.state.Mountpoint)
+ g.AddAnnotation(annotations.Created, c.config.CreatedTime.Format(time.RFC3339Nano))
+ g.AddAnnotation("org.opencontainers.image.stopSignal", fmt.Sprintf("%d", c.config.StopSignal))
+
+ if _, exists := g.Config.Annotations[annotations.ContainerManager]; !exists {
+ g.AddAnnotation(annotations.ContainerManager, annotations.ContainerManagerLibpod)
+ }
+
+ cgroupPath, err := c.getOCICgroupPath()
+ if err != nil {
+ return nil, err
+ }
+
+ g.SetLinuxCgroupsPath(cgroupPath)
+
+ // Warning: CDI may alter g.Config in place.
+ if len(c.config.CDIDevices) > 0 {
+ registry := cdi.GetRegistry(
+ cdi.WithAutoRefresh(false),
+ )
+ if err := registry.Refresh(); err != nil {
+ logrus.Debugf("The following error was triggered when refreshing the CDI registry: %v", err)
+ }
+ _, err := registry.InjectDevices(g.Config, c.config.CDIDevices...)
+ if err != nil {
+ return nil, fmt.Errorf("error setting up CDI devices: %w", err)
+ }
+ }
+
+ // Mounts need to be sorted so paths will not cover other paths
+ mounts := sortMounts(g.Mounts())
+ g.ClearMounts()
+
+ // Determine property of RootPropagation based on volume properties. If
+ // a volume is shared, then keep root propagation shared. This should
+ // work for slave and private volumes too.
+ //
+ // For slave volumes, it can be either [r]shared/[r]slave.
+ //
+ // For private volumes any root propagation value should work.
+ rootPropagation := ""
+ for _, m := range mounts {
+ // We need to remove all symlinks from tmpfs mounts.
+ // Runc and other runtimes may choke on them.
+ // Easy solution: use securejoin to do a scoped evaluation of
+ // the links, then trim off the mount prefix.
+ if m.Type == "tmpfs" {
+ finalPath, err := securejoin.SecureJoin(c.state.Mountpoint, m.Destination)
+ if err != nil {
+ return nil, fmt.Errorf("error resolving symlinks for mount destination %s: %w", m.Destination, err)
+ }
+ trimmedPath := strings.TrimPrefix(finalPath, strings.TrimSuffix(c.state.Mountpoint, "/"))
+ m.Destination = trimmedPath
+ }
+ g.AddMount(m)
+ for _, opt := range m.Options {
+ switch opt {
+ case MountShared, MountRShared:
+ if rootPropagation != MountShared && rootPropagation != MountRShared {
+ rootPropagation = MountShared
+ }
+ case MountSlave, MountRSlave:
+ if rootPropagation != MountShared && rootPropagation != MountRShared && rootPropagation != MountSlave && rootPropagation != MountRSlave {
+ rootPropagation = MountRSlave
+ }
+ }
+ }
+ }
+
+ if rootPropagation != "" {
+ logrus.Debugf("Set root propagation to %q", rootPropagation)
+ if err := g.SetLinuxRootPropagation(rootPropagation); err != nil {
+ return nil, err
+ }
+ }
+
+ // Warning: precreate hooks may alter g.Config in place.
+ if c.state.ExtensionStageHooks, err = c.setupOCIHooks(ctx, g.Config); err != nil {
+ return nil, fmt.Errorf("error setting up OCI Hooks: %w", err)
+ }
+ if len(c.config.EnvSecrets) > 0 {
+ manager, err := c.runtime.SecretsManager()
+ if err != nil {
+ return nil, err
+ }
+ if err != nil {
+ return nil, err
+ }
+ for name, secr := range c.config.EnvSecrets {
+ _, data, err := manager.LookupSecretData(secr.Name)
+ if err != nil {
+ return nil, err
+ }
+ g.AddProcessEnv(name, string(data))
+ }
+ }
+
+ // Pass down the LISTEN_* environment (see #10443).
+ for _, key := range []string{"LISTEN_PID", "LISTEN_FDS", "LISTEN_FDNAMES"} {
+ if val, ok := os.LookupEnv(key); ok {
+ // Force the PID to `1` since we cannot rely on (all
+ // versions of) all runtimes to do it for us.
+ if key == "LISTEN_PID" {
+ val = "1"
+ }
+ g.AddProcessEnv(key, val)
+ }
+ }
+
+ return g.Config, nil
+}