summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/source/markdown/podman-container-inspect.1.md2
-rw-r--r--docs/source/markdown/podman-create.1.md14
-rw-r--r--docs/source/markdown/podman-run.1.md4
-rw-r--r--go.mod2
-rw-r--r--go.sum10
-rw-r--r--libpod/container.go7
-rw-r--r--libpod/container_config.go4
-rw-r--r--libpod/container_inspect.go27
-rw-r--r--libpod/container_internal.go38
-rw-r--r--libpod/container_internal_linux.go6
-rw-r--r--libpod/options.go24
-rw-r--r--libpod/runtime_ctr.go4
-rw-r--r--pkg/specgen/generate/container.go7
-rw-r--r--pkg/specgen/generate/namespaces.go11
-rw-r--r--pkg/specgen/generate/security.go2
-rw-r--r--pkg/specgenutil/specgen.go2
-rw-r--r--test/system/190-run-ipcns.bats70
-rw-r--r--troubleshooting.md257
-rw-r--r--vendor/github.com/containerd/stargz-snapshotter/estargz/build.go44
-rw-r--r--vendor/github.com/containerd/stargz-snapshotter/estargz/estargz.go5
-rw-r--r--vendor/github.com/containerd/stargz-snapshotter/estargz/testutil.go5
-rw-r--r--vendor/github.com/containers/storage/drivers/overlay/check.go54
-rw-r--r--vendor/github.com/containers/storage/drivers/overlay/idmapped_utils.go160
-rw-r--r--vendor/github.com/containers/storage/drivers/overlay/overlay.go141
-rw-r--r--vendor/github.com/containers/storage/go.mod4
-rw-r--r--vendor/github.com/containers/storage/go.sum8
-rw-r--r--vendor/github.com/containers/storage/layers.go65
-rw-r--r--vendor/github.com/containers/storage/store.go10
-rw-r--r--vendor/github.com/containers/storage/utils.go8
-rw-r--r--vendor/github.com/moby/sys/mountinfo/mounted_unix.go7
-rw-r--r--vendor/modules.txt6
31 files changed, 791 insertions, 217 deletions
diff --git a/docs/source/markdown/podman-container-inspect.1.md b/docs/source/markdown/podman-container-inspect.1.md
index 9945fca7c..4e45bcc40 100644
--- a/docs/source/markdown/podman-container-inspect.1.md
+++ b/docs/source/markdown/podman-container-inspect.1.md
@@ -219,7 +219,7 @@ $ podman container inspect foobar
"DnsSearch": [],
"ExtraHosts": [],
"GroupAdd": [],
- "IpcMode": "private",
+ "IpcMode": "shareable",
"Cgroup": "",
"Cgroups": "default",
"Links": null,
diff --git a/docs/source/markdown/podman-create.1.md b/docs/source/markdown/podman-create.1.md
index 9af7174fc..45d0d0b3e 100644
--- a/docs/source/markdown/podman-create.1.md
+++ b/docs/source/markdown/podman-create.1.md
@@ -504,10 +504,16 @@ To specify multiple static IPv6 addresses per container, set multiple networks u
#### **--ipc**=*ipc*
-Default is to create a private IPC namespace (POSIX SysV IPC) for the container
- `container:<name|id>`: reuses another container shared memory, semaphores and message queues
- `host`: use the host shared memory,semaphores and message queues inside the container. Note: the host mode gives the container full access to local shared memory and is therefore considered insecure.
- `ns:<path>` path to an IPC namespace to join.
+Set the IPC namespace mode for a container. The default is to create
+a private IPC namespace.
+
+- "": Use Podman's default, defined in containers.conf.
+- **container:**_id_: reuses another container's shared memory, semaphores, and message queues
+- **host**: use the host's shared memory, semaphores, and message queues inside the container. Note: the host mode gives the container full access to local shared memory and is therefore considered insecure.
+- **none**: private IPC namespace, with /dev/shm not mounted.
+- **ns:**_path_: path to an IPC namespace to join.
+- **private**: private IPC namespace.
+= **shareable**: private IPC namespace with a possibility to share it with other containers.
#### **--label**, **-l**=*label*
diff --git a/docs/source/markdown/podman-run.1.md b/docs/source/markdown/podman-run.1.md
index ecb24ba6d..5c276c04a 100644
--- a/docs/source/markdown/podman-run.1.md
+++ b/docs/source/markdown/podman-run.1.md
@@ -528,9 +528,13 @@ To specify multiple static IPv6 addresses per container, set multiple networks u
Set the IPC namespace mode for a container. The default is to create
a private IPC namespace.
+- "": Use Podman's default, defined in containers.conf.
- **container:**_id_: reuses another container shared memory, semaphores and message queues
- **host**: use the host shared memory,semaphores and message queues inside the container. Note: the host mode gives the container full access to local shared memory and is therefore considered insecure.
+- **none**: private IPC namespace, with /dev/shm not mounted.
- **ns:**_path_: path to an IPC namespace to join.
+- **private**: private IPC namespace.
+= **shareable**: private IPC namespace with a possibility to share it with other containers.
#### **--label**, **-l**=*key*=*value*
diff --git a/go.mod b/go.mod
index f1317118c..e41b6982d 100644
--- a/go.mod
+++ b/go.mod
@@ -17,7 +17,7 @@ require (
github.com/containers/image/v5 v5.21.1-0.20220405081457-d1b64686e1d0
github.com/containers/ocicrypt v1.1.3
github.com/containers/psgo v1.7.2
- github.com/containers/storage v1.39.1-0.20220412073713-ea4008e14877
+ github.com/containers/storage v1.39.1-0.20220414183333-eea4e0f5f1f9
github.com/coreos/go-systemd/v22 v22.3.2
github.com/coreos/stream-metadata-go v0.0.0-20210225230131-70edb9eb47b3
github.com/cyphar/filepath-securejoin v0.2.3
diff --git a/go.sum b/go.sum
index 1f22ff339..66aa40eb6 100644
--- a/go.sum
+++ b/go.sum
@@ -325,8 +325,9 @@ github.com/containerd/stargz-snapshotter/estargz v0.9.0/go.mod h1:aE5PCyhFMwR8sb
github.com/containerd/stargz-snapshotter/estargz v0.10.1/go.mod h1:aE5PCyhFMwR8sbrErO5eM2GcvkyXTTJremG883D4qF0=
github.com/containerd/stargz-snapshotter/estargz v0.11.0/go.mod h1:/KsZXsJRllMbTKFfG0miFQWViQKdI9+9aSXs+HN0+ac=
github.com/containerd/stargz-snapshotter/estargz v0.11.1/go.mod h1:6VoPcf4M1wvnogWxqc4TqBWWErCS+R+ucnPZId2VbpQ=
-github.com/containerd/stargz-snapshotter/estargz v0.11.3 h1:k2kN16Px6LYuv++qFqK+JTcYqc8bEVxzGpf8/gFBL5M=
github.com/containerd/stargz-snapshotter/estargz v0.11.3/go.mod h1:7vRJIcImfY8bpifnMjt+HTJoQxASq7T28MYbP15/Nf0=
+github.com/containerd/stargz-snapshotter/estargz v0.11.4 h1:LjrYUZpyOhiSaU7hHrdR82/RBoxfGWSaC0VeSSMXqnk=
+github.com/containerd/stargz-snapshotter/estargz v0.11.4/go.mod h1:7vRJIcImfY8bpifnMjt+HTJoQxASq7T28MYbP15/Nf0=
github.com/containerd/ttrpc v0.0.0-20190828154514-0e0f228740de/go.mod h1:PvCDdDGpgqzQIzDW1TphrGLssLDZp2GuS+X5DkEJB8o=
github.com/containerd/ttrpc v0.0.0-20190828172938-92c8520ef9f8/go.mod h1:PvCDdDGpgqzQIzDW1TphrGLssLDZp2GuS+X5DkEJB8o=
github.com/containerd/ttrpc v0.0.0-20191028202541-4f1b8fe65a5c/go.mod h1:LPm1u0xBw8r8NOKoOdNMeVHSawSsltak+Ihv+etqsE8=
@@ -379,8 +380,8 @@ github.com/containers/storage v1.38.2/go.mod h1:INP0RPLHWBxx+pTsO5uiHlDUGHDFvWZP
github.com/containers/storage v1.38.3-0.20220301151551-d06b0f81c0aa/go.mod h1:LkkL34WRi4dI4jt9Cp+ImdZi/P5i36glSHimT5CP5zM=
github.com/containers/storage v1.39.0/go.mod h1:UAD0cKLouN4BOQRgZut/nMjrh/EnTCjSNPgp4ZuGWMs=
github.com/containers/storage v1.39.1-0.20220330193934-f3200eb5a5d9/go.mod h1:IMa2AfBI+Fxxk2hQqLTGhpJX6z2pZS1/I785QJeUwUY=
-github.com/containers/storage v1.39.1-0.20220412073713-ea4008e14877 h1:V3aVdbQt9qU6tu4HHAJtro4H8+Hnv6X/hrUNba8dll0=
-github.com/containers/storage v1.39.1-0.20220412073713-ea4008e14877/go.mod h1:UuYvGSKIdmzkjHbT/PENtxLRVGQ974nyhMbYp0KP19w=
+github.com/containers/storage v1.39.1-0.20220414183333-eea4e0f5f1f9 h1:cB2AvqxpfyqyyffXtDN0txJhD0lIaZWktbSRI92WpN4=
+github.com/containers/storage v1.39.1-0.20220414183333-eea4e0f5f1f9/go.mod h1:hFiHLMgNU0r3MiUpE97hEBaEKCN8fEIuEEBXoFC9eN0=
github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk=
github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
github.com/coreos/etcd v3.3.13+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
@@ -1005,8 +1006,9 @@ github.com/moby/sys/mount v0.2.0/go.mod h1:aAivFE2LB3W4bACsUXChRHQ0qKWsetY4Y9V7s
github.com/moby/sys/mountinfo v0.4.0/go.mod h1:rEr8tzG/lsIZHBtN/JjGG+LMYx9eXgW2JI+6q0qou+A=
github.com/moby/sys/mountinfo v0.4.1/go.mod h1:rEr8tzG/lsIZHBtN/JjGG+LMYx9eXgW2JI+6q0qou+A=
github.com/moby/sys/mountinfo v0.5.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU=
-github.com/moby/sys/mountinfo v0.6.0 h1:gUDhXQx58YNrpHlK4nSL+7y2pxFZkUcXqzFDKWdC0Oo=
github.com/moby/sys/mountinfo v0.6.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU=
+github.com/moby/sys/mountinfo v0.6.1 h1:+H/KnGEAGRpTrEAqNVQ2AM3SiwMgJUt/TXj+Z8cmCIc=
+github.com/moby/sys/mountinfo v0.6.1/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU=
github.com/moby/sys/signal v0.6.0/go.mod h1:GQ6ObYZfqacOwTtlXvcmh9A26dVRul/hbOZn88Kg8Tg=
github.com/moby/sys/symlink v0.1.0/go.mod h1:GGDODQmbFOjFsXvfLVn3+ZRxkch54RkSiGqsZeMYowQ=
github.com/moby/sys/symlink v0.2.0/go.mod h1:7uZVF2dqJjG/NsClqul95CqKOBRQyYSNnJ6BMgR/gFs=
diff --git a/libpod/container.go b/libpod/container.go
index 578f16905..bc3cab439 100644
--- a/libpod/container.go
+++ b/libpod/container.go
@@ -291,6 +291,13 @@ func (c *Container) Config() *ContainerConfig {
return returnConfig
}
+// ConfigNoCopy returns the configuration used by the container.
+// Note that the returned value is not a copy and must hence
+// only be used in a reading fashion.
+func (c *Container) ConfigNoCopy() *ContainerConfig {
+ return c.config
+}
+
// DeviceHostSrc returns the user supplied device to be passed down in the pod
func (c *Container) DeviceHostSrc() []spec.LinuxDevice {
return c.config.DeviceHostSrc
diff --git a/libpod/container_config.go b/libpod/container_config.go
index 8500c6db9..371a1dec0 100644
--- a/libpod/container_config.go
+++ b/libpod/container_config.go
@@ -120,6 +120,10 @@ type ContainerRootFSConfig struct {
// with the size specified in ShmSize and populate this with the path of
// said tmpfs.
ShmDir string `json:"ShmDir,omitempty"`
+ // NoShmShare indicates whether /dev/shm can be shared with other containers
+ NoShmShare bool `json:"NOShmShare,omitempty"`
+ // NoShm indicates whether a tmpfs should be created and mounted on /dev/shm
+ NoShm bool `json:"NoShm,omitempty"`
// ShmSize is the size of the container's SHM. Only used if ShmDir was
// not set manually at time of creation.
ShmSize int64 `json:"shmSize"`
diff --git a/libpod/container_inspect.go b/libpod/container_inspect.go
index c9d0b8a6c..14290ca0d 100644
--- a/libpod/container_inspect.go
+++ b/libpod/container_inspect.go
@@ -703,32 +703,31 @@ func (c *Container) generateInspectContainerHostConfig(ctrSpec *spec.Spec, named
}
hostConfig.CapAdd = capAdd
hostConfig.CapDrop = capDrop
-
- // IPC Namespace mode
- ipcMode := ""
- if c.config.IPCNsCtr != "" {
- ipcMode = fmt.Sprintf("container:%s", c.config.IPCNsCtr)
- } else if ctrSpec.Linux != nil {
+ switch {
+ case c.config.IPCNsCtr != "":
+ hostConfig.IpcMode = fmt.Sprintf("container:%s", c.config.IPCNsCtr)
+ case ctrSpec.Linux != nil:
// Locate the spec's IPC namespace.
// If there is none, it's ipc=host.
// If there is one and it has a path, it's "ns:".
// If no path, it's default - the empty string.
-
for _, ns := range ctrSpec.Linux.Namespaces {
if ns.Type == spec.IPCNamespace {
if ns.Path != "" {
- ipcMode = fmt.Sprintf("ns:%s", ns.Path)
+ hostConfig.IpcMode = fmt.Sprintf("ns:%s", ns.Path)
} else {
- ipcMode = "private"
+ break
}
- break
}
}
- if ipcMode == "" {
- ipcMode = "host"
- }
+ case c.config.NoShm:
+ hostConfig.IpcMode = "none"
+ case c.config.NoShmShare:
+ hostConfig.IpcMode = "private"
+ }
+ if hostConfig.IpcMode == "" {
+ hostConfig.IpcMode = "shareable"
}
- hostConfig.IpcMode = ipcMode
// Cgroup namespace mode
cgroupMode := ""
diff --git a/libpod/container_internal.go b/libpod/container_internal.go
index f1f467879..c7567a55e 100644
--- a/libpod/container_internal.go
+++ b/libpod/container_internal.go
@@ -1507,26 +1507,28 @@ func (c *Container) mountStorage() (_ string, deferredErr error) {
return c.state.Mountpoint, nil
}
- mounted, err := mount.Mounted(c.config.ShmDir)
- if err != nil {
- return "", errors.Wrapf(err, "unable to determine if %q is mounted", c.config.ShmDir)
- }
-
- if !mounted && !MountExists(c.config.Spec.Mounts, "/dev/shm") {
- shmOptions := fmt.Sprintf("mode=1777,size=%d", c.config.ShmSize)
- if err := c.mountSHM(shmOptions); err != nil {
- return "", err
- }
- if err := os.Chown(c.config.ShmDir, c.RootUID(), c.RootGID()); err != nil {
- return "", errors.Wrapf(err, "failed to chown %s", c.config.ShmDir)
+ if !c.config.NoShm {
+ mounted, err := mount.Mounted(c.config.ShmDir)
+ if err != nil {
+ return "", errors.Wrapf(err, "unable to determine if %q is mounted", c.config.ShmDir)
}
- defer func() {
- if deferredErr != nil {
- if err := c.unmountSHM(c.config.ShmDir); err != nil {
- logrus.Errorf("Unmounting SHM for container %s after mount error: %v", c.ID(), err)
- }
+
+ if !mounted && !MountExists(c.config.Spec.Mounts, "/dev/shm") {
+ shmOptions := fmt.Sprintf("mode=1777,size=%d", c.config.ShmSize)
+ if err := c.mountSHM(shmOptions); err != nil {
+ return "", err
}
- }()
+ if err := os.Chown(c.config.ShmDir, c.RootUID(), c.RootGID()); err != nil {
+ return "", errors.Wrapf(err, "failed to chown %s", c.config.ShmDir)
+ }
+ defer func() {
+ if deferredErr != nil {
+ if err := c.unmountSHM(c.config.ShmDir); err != nil {
+ logrus.Errorf("Unmounting SHM for container %s after mount error: %v", c.ID(), err)
+ }
+ }
+ }()
+ }
}
// We need to mount the container before volumes - to ensure the copyup
diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go
index 9369b746c..9991003d6 100644
--- a/libpod/container_internal_linux.go
+++ b/libpod/container_internal_linux.go
@@ -1963,8 +1963,10 @@ func (c *Container) makeBindMounts() error {
}
}
- // SHM is always added when we mount the container
- c.state.BindMounts["/dev/shm"] = c.config.ShmDir
+ if c.config.ShmDir != "" {
+ // If ShmDir has a value SHM is always added when we mount the container
+ c.state.BindMounts["/dev/shm"] = c.config.ShmDir
+ }
if c.config.Passwd == nil || *c.config.Passwd {
newPasswd, newGroup, err := c.generatePasswdAndGroup()
diff --git a/libpod/options.go b/libpod/options.go
index 6c4b4cc42..ffd0e6037 100644
--- a/libpod/options.go
+++ b/libpod/options.go
@@ -559,6 +559,30 @@ func WithShmDir(dir string) CtrCreateOption {
}
}
+// WithNOShmMount tells libpod whether to mount /dev/shm
+func WithNoShm(mount bool) CtrCreateOption {
+ return func(ctr *Container) error {
+ if ctr.valid {
+ return define.ErrCtrFinalized
+ }
+
+ ctr.config.NoShm = mount
+ return nil
+ }
+}
+
+// WithNoShmShare tells libpod whether to share containers /dev/shm with other containers
+func WithNoShmShare(share bool) CtrCreateOption {
+ return func(ctr *Container) error {
+ if ctr.valid {
+ return define.ErrCtrFinalized
+ }
+
+ ctr.config.NoShmShare = share
+ return nil
+ }
+}
+
// WithSystemd turns on systemd mode in the container
func WithSystemd() CtrCreateOption {
return func(ctr *Container) error {
diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go
index f92898b1c..7edd49fd1 100644
--- a/libpod/runtime_ctr.go
+++ b/libpod/runtime_ctr.go
@@ -174,6 +174,8 @@ func (r *Runtime) initContainerVariables(rSpec *spec.Spec, config *ContainerConf
return nil, errors.Wrapf(err, "converting containers.conf ShmSize %s to an int", r.config.Containers.ShmSize)
}
ctr.config.ShmSize = size
+ ctr.config.NoShm = false
+ ctr.config.NoShmShare = false
ctr.config.StopSignal = 15
ctr.config.StopTimeout = r.config.Engine.StopTimeout
@@ -514,7 +516,7 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Contai
}
}
- if !MountExists(ctr.config.Spec.Mounts, "/dev/shm") && ctr.config.ShmDir == "" {
+ if !MountExists(ctr.config.Spec.Mounts, "/dev/shm") && ctr.config.ShmDir == "" && !ctr.config.NoShm {
ctr.config.ShmDir = filepath.Join(ctr.bundlePath(), "shm")
if err := os.MkdirAll(ctr.config.ShmDir, 0700); err != nil {
if !os.IsExist(err) {
diff --git a/pkg/specgen/generate/container.go b/pkg/specgen/generate/container.go
index b38b0e695..f7ea2edfa 100644
--- a/pkg/specgen/generate/container.go
+++ b/pkg/specgen/generate/container.go
@@ -428,9 +428,12 @@ func ConfigToSpec(rt *libpod.Runtime, specg *specgen.SpecGenerator, contaierID s
case "cgroup":
specg.CgroupNS = specgen.Namespace{NSMode: specgen.Default} //default
case "ipc":
- if conf.ShmDir == "/dev/shm" {
+ switch conf.ShmDir {
+ case "/dev/shm":
specg.IpcNS = specgen.Namespace{NSMode: specgen.Host}
- } else {
+ case "":
+ specg.IpcNS = specgen.Namespace{NSMode: specgen.None}
+ default:
specg.IpcNS = specgen.Namespace{NSMode: specgen.Default} //default
}
case "uts":
diff --git a/pkg/specgen/generate/namespaces.go b/pkg/specgen/generate/namespaces.go
index 9ce45aaf0..05c2d1741 100644
--- a/pkg/specgen/generate/namespaces.go
+++ b/pkg/specgen/generate/namespaces.go
@@ -134,8 +134,17 @@ func namespaceOptions(ctx context.Context, s *specgen.SpecGenerator, rt *libpod.
if err != nil {
return nil, errors.Wrapf(err, "error looking up container to share ipc namespace with")
}
+ if ipcCtr.ConfigNoCopy().NoShmShare {
+ return nil, errors.Errorf("joining IPC of container %s is not allowed: non-shareable IPC (hint: use IpcMode:shareable for the donor container)", ipcCtr.ID())
+ }
toReturn = append(toReturn, libpod.WithIPCNSFrom(ipcCtr))
- toReturn = append(toReturn, libpod.WithShmDir(ipcCtr.ShmDir()))
+ if !ipcCtr.ConfigNoCopy().NoShm {
+ toReturn = append(toReturn, libpod.WithShmDir(ipcCtr.ShmDir()))
+ }
+ case specgen.None:
+ toReturn = append(toReturn, libpod.WithNoShm(true))
+ case specgen.Private:
+ toReturn = append(toReturn, libpod.WithNoShmShare(true))
}
// UTS
diff --git a/pkg/specgen/generate/security.go b/pkg/specgen/generate/security.go
index 988c29832..ec52164ab 100644
--- a/pkg/specgen/generate/security.go
+++ b/pkg/specgen/generate/security.go
@@ -222,7 +222,7 @@ func securityConfigureGenerator(s *specgen.SpecGenerator, g *generate.Generator,
for sysctlKey, sysctlVal := range defaultSysctls {
// Ignore mqueue sysctls if --ipc=host
if noUseIPC && strings.HasPrefix(sysctlKey, "fs.mqueue.") {
- logrus.Infof("Sysctl %s=%s ignored in containers.conf, since IPC Namespace set to host", sysctlKey, sysctlVal)
+ logrus.Infof("Sysctl %s=%s ignored in containers.conf, since IPC Namespace set to %q", sysctlKey, sysctlVal, s.IpcNS.NSMode)
continue
}
diff --git a/pkg/specgenutil/specgen.go b/pkg/specgenutil/specgen.go
index 42b66d909..c86af7295 100644
--- a/pkg/specgenutil/specgen.go
+++ b/pkg/specgenutil/specgen.go
@@ -190,7 +190,7 @@ func setNamespaces(s *specgen.SpecGenerator, c *entities.ContainerCreateOptions)
}
}
if c.IPC != "" {
- s.IpcNS, err = specgen.ParseNamespace(c.IPC)
+ s.IpcNS, err = specgen.ParseIPCNamespace(c.IPC)
if err != nil {
return err
}
diff --git a/test/system/190-run-ipcns.bats b/test/system/190-run-ipcns.bats
new file mode 100644
index 000000000..9327d8ec7
--- /dev/null
+++ b/test/system/190-run-ipcns.bats
@@ -0,0 +1,70 @@
+#!/usr/bin/env bats -*- bats -*-
+# shellcheck disable=SC2096
+#
+# Tests for podman build
+#
+
+load helpers
+
+@test "podman --ipc=host" {
+ run readlink /proc/self/ns/ipc
+ hostipc=$output
+ run_podman run --rm --ipc=host $IMAGE readlink /proc/self/ns/ipc
+ is "$output" "$hostipc" "HostIPC and container IPC should be same"
+}
+
+@test "podman --ipc=none" {
+ run readlink /proc/self/ns/ipc
+ hostipc=$output
+ run_podman run --rm --ipc=none $IMAGE readlink /proc/self/ns/ipc
+ if [[ $output == "$hostipc" ]]; then
+ die "hostipc and containeripc should be different"
+ fi
+ run_podman 1 run --rm --ipc=none $IMAGE ls /dev/shm
+ is "$output" "ls: /dev/shm: No such file or directory" "Should fail with missing /dev/shm"
+}
+
+@test "podman --ipc=private" {
+ run readlink /proc/self/ns/ipc
+ hostipc=$output
+ run_podman run -d --ipc=private --name test $IMAGE sleep 100
+ if [[ $output == "$hostipc" ]]; then
+ die "hostipc and containeripc should be different"
+ fi
+ run_podman 125 run --ipc=container:test --rm $IMAGE readlink /proc/self/ns/ipc
+ is "$output" ".*is not allowed: non-shareable IPC (hint: use IpcMode:shareable for the donor container)" "Containers should not share private ipc namespace"
+ run_podman stop -t 0 test
+ run_podman rm test
+}
+
+@test "podman --ipc=shareable" {
+ run readlink /proc/self/ns/ipc
+ hostipc=$output
+ run_podman run -d --ipc=shareable --name test $IMAGE sleep 100
+ if [[ $output == "$hostipc" ]]; then
+ die "hostipc and containeripc should be different"
+ fi
+ run_podman run --ipc=container:test --rm $IMAGE readlink /proc/self/ns/ipc
+ if [[ $output == "$hostipc" ]]; then
+ die "hostipc and containeripc should be different"
+ fi
+ run_podman stop -t 0 test
+ run_podman rm test
+}
+
+@test "podman --ipc=container@test" {
+ run readlink /proc/self/ns/ipc
+ hostipc=$output
+ run_podman run -d --name test $IMAGE sleep 100
+ run_podman exec test readlink /proc/self/ns/ipc
+ if [[ $output == "$hostipc" ]]; then
+ die "hostipc and containeripc should be different"
+ fi
+ testipc=$output
+ run_podman run --ipc=container:test --rm $IMAGE readlink /proc/self/ns/ipc
+ is "$output" "$testipc" "Containers should share ipc namespace"
+ run_podman stop -t 0 test
+ run_podman rm test
+}
+
+# vim: filetype=sh
diff --git a/troubleshooting.md b/troubleshooting.md
index 941d1a322..cf554654b 100644
--- a/troubleshooting.md
+++ b/troubleshooting.md
@@ -18,8 +18,10 @@ and retry your command before reporting the issue.
---
### 2) Can't use volume mount, get permission denied
+```console
$ podman run -v ~/mycontent:/content fedora touch /content/file
touch: cannot touch '/content/file': Permission denied
+```
#### Solution
@@ -38,14 +40,18 @@ content label. Shared volume labels allow all containers to read/write content.
The **Z** option tells Podman to label the content with a private unshared label.
Only the current container can use a private volume.
+```console
$ podman run -v ~/mycontent:/content:Z fedora touch /content/file
+```
Make sure the content is private for the container. Do not relabel system directories and content.
Relabeling system content might cause other confined services on your machine to fail. For these
types of containers we recommend having SELinux separation disabled. The option `--security-opt label=disable`
will disable SELinux separation for the container.
+```console
$ podman run --security-opt label=disable -v ~:/home/user fedora touch /home/user/file
+```
In cases where the container image runs as a specific, non-root user, though, the
solution is to fix the user namespace. This would include container images such as
@@ -53,7 +59,9 @@ the Jupyter Notebook image (which runs as "jovyan") and the Postgres image (whic
as "postgres"). In either case, use the `--userns` switch to map user namespaces,
most of the time by using the **keep-id** option.
+```console
$ podman run -v "$PWD":/home/jovyan/work --userns=keep-id jupyter/scipy-notebook
+```
---
### 3) No such image or Bare keys cannot contain ':'
@@ -131,8 +139,11 @@ It is most likely necessary to enable unprivileged pings on the host.
Be sure the UID of the user is part of the range in the
`/proc/sys/net/ipv4/ping_group_range` file.
-To change its value you can use something like: `sysctl -w
-"net.ipv4.ping_group_range=0 2000000"`.
+To change its value you can use something like:
+
+```console
+# sysctl -w "net.ipv4.ping_group_range=0 2000000"
+```
To make the change persistent, you'll need to add a file in
`/etc/sysctl.d` that contains `net.ipv4.ping_group_range=0 $MAX_UID`.
@@ -159,8 +170,8 @@ When rootless Podman attempts to execute a container on a non exec home director
If you are running Podman or Buildah on a home directory that is mounted noexec,
then they will fail with a message like:
-```
-podman run centos:7
+```console
+$ podman run centos:7
standard_init_linux.go:203: exec user process caused "permission denied"
```
@@ -170,8 +181,8 @@ Since the administrator of the system set up your home directory to be noexec, y
For example
-```
-cat ~/.config/containers/storage.conf
+```console
+$ cat ~/.config/containers/storage.conf
[storage]
driver = "overlay"
runroot = "/run/user/1000"
@@ -203,7 +214,9 @@ container processes to write to the cgroup file system. Turn on this boolean,
on SELinux separated systems, to allow systemd to run properly in the container.
Only do this on systems running older versions of Podman.
-`setsebool -P container_manage_cgroup true`
+```console
+# setsebool -P container_manage_cgroup true
+```
### 9) Newuidmap missing when running rootless Podman commands
@@ -214,8 +227,8 @@ Rootless Podman requires the newuidmap and newgidmap programs to be installed.
If you are running Podman or Buildah as a rootless user, you get an error complaining about
a missing newuidmap executable.
-```
-podman run -ti fedora sh
+```console
+$ podman run -ti fedora sh
command required for rootless mode with multiple IDs: exec: "newuidmap": executable file not found in $PATH
```
@@ -231,8 +244,8 @@ Rootless Podman requires the user running it to have a range of UIDs listed in /
A user, either via --user or through the default configured for the image, is not mapped inside the namespace.
-```
-podman run --rm -ti --user 1000000 alpine echo hi
+```console
+$ podman run --rm -ti --user 1000000 alpine echo hi
Error: container create failed: container_linux.go:344: starting container process caused "setup user: invalid argument"
```
@@ -240,38 +253,38 @@ Error: container create failed: container_linux.go:344: starting container proce
Update the /etc/subuid and /etc/subgid with fields for users that look like:
-```
-cat /etc/subuid
+```console
+$ cat /etc/subuid
johndoe:100000:65536
test:165536:65536
```
-The format of this file is USERNAME:UID:RANGE
+The format of this file is `USERNAME:UID:RANGE`
-* username as listed in /etc/passwd or getpwent.
+* username as listed in `/etc/passwd` or `getpwent`.
* The initial uid allocated for the user.
* The size of the range of UIDs allocated for the user.
-This means johndoe is allocated UIDS 100000-165535 as well as his standard UID in the
-/etc/passwd file.
+This means johndoe is allocated UIDs 100000-165535 as well as his standard UID in the
+`/etc/passwd` file.
-You should ensure that each user has a unique range of uids, because overlapping UIDs,
+You should ensure that each user has a unique range of UIDs, because overlapping UIDs,
would potentially allow one user to attack another user. In addition, make sure
-that the range of uids you allocate can cover all uids that the container
-requires. For example, if the container has a user with uid 10000, ensure you
+that the range of UIDs you allocate can cover all UIDs that the container
+requires. For example, if the container has a user with UID 10000, ensure you
have at least 10001 subuids, and if the container needs to be run as a user with
-uid 1000000, ensure you have at least 1000001 subuids.
+UID 1000000, ensure you have at least 1000001 subuids.
-You could also use the usermod program to assign UIDs to a user.
+You could also use the `usermod` program to assign UIDs to a user.
-If you update either the /etc/subuid or /etc/subgid file, you need to
+If you update either the `/etc/subuid` or `/etc/subgid` file, you need to
stop all running containers and kill the pause process. This is done
automatically by the `system migrate` command, which can also be used
to stop all the containers and kill the pause process.
-```
-usermod --add-subuids 200000-201000 --add-subgids 200000-201000 johndoe
-grep johndoe /etc/subuid /etc/subgid
+```console
+# usermod --add-subuids 200000-201000 --add-subgids 200000-201000 johndoe
+# grep johndoe /etc/subuid /etc/subgid
/etc/subuid:johndoe:200000:1001
/etc/subgid:johndoe:200000:1001
```
@@ -281,7 +294,7 @@ grep johndoe /etc/subuid /etc/subgid
When I change the graphroot storage location in storage.conf, the next time I
run Podman, I get an error like:
-```
+```console
# podman run -p 5000:5000 -it centos bash
bash: error while loading shared libraries: /lib64/libc.so.6: cannot apply additional memory protection after relocation: Permission denied
@@ -293,9 +306,9 @@ and points storage.conf at this directory.
#### Symptom
-SELinux blocks containers from using random locations for overlay storage.
+SELinux blocks containers from using arbitrary locations for overlay storage.
These directories need to be labeled with the same labels as if the content was
-under /var/lib/containers/storage.
+under `/var/lib/containers/storage`.
#### Solution
@@ -303,9 +316,9 @@ Tell SELinux about the new containers storage by setting up an equivalence recor
This tells SELinux to label content under the new path, as if it was stored
under `/var/lib/containers/storage`.
-```
-semanage fcontext -a -e /var/lib/containers /srv/containers
-restorecon -R -v /srv/containers
+```console
+# semanage fcontext -a -e /var/lib/containers /srv/containers
+# restorecon -R -v /srv/containers
```
The semanage command above tells SELinux to setup the default labeling of
@@ -326,8 +339,8 @@ If you pull an anonymous image, one that should not require credentials, you can
an `invalid username/password` error if you have credentials established in the
authentication file for the target container registry that are no longer valid.
-```
-podman run -it --rm docker://docker.io/library/alpine:latest ls
+```console
+$ podman run -it --rm docker://docker.io/library/alpine:latest ls
Trying to pull docker://docker.io/library/alpine:latest...ERRO[0000] Error pulling image ref //alpine:latest: Error determining manifest MIME type for docker://alpine:latest: unable to retrieve auth token: invalid username/password
Failed
Error: unable to pull docker://docker.io/library/alpine:latest: unable to pull image: Error determining manifest MIME type for docker://alpine:latest: unable to retrieve auth token: invalid username/password
@@ -376,7 +389,7 @@ error creating build container: Error committing the finished image: error addin
Choose one of the following:
* Setup containers/storage in a different directory, not on an NFS share.
* Create a directory on a local file system.
- * Edit `~/.config/containers/containers.conf` and point the `volume_path` option to that local directory. (Copy /usr/share/containers/containers.conf if ~/.config/containers/containers.conf does not exist)
+ * Edit `~/.config/containers/containers.conf` and point the `volume_path` option to that local directory. (Copy `/usr/share/containers/containers.conf` if `~/.config/containers/containers.conf` does not exist)
* Otherwise just run Podman as root, via `sudo podman`
### 15) Rootless 'podman build' fails when using OverlayFS:
@@ -386,7 +399,7 @@ when extracting an image. However, a rootless user does not have the privileges
#### Symptom
```console
-podman build --storage-driver overlay .
+$ podman build --storage-driver overlay .
STEP 1: FROM docker.io/ubuntu:xenial
Getting image source signatures
Copying blob edf72af6d627 done
@@ -411,13 +424,12 @@ Choose one of the following:
### 16) RHEL 7 and CentOS 7 based `init` images don't work with cgroup v2
-The systemd version shipped in RHEL 7 and CentOS 7 doesn't have support for cgroup v2. Support for cgroup V2 requires version 230 of systemd or newer, which
+The systemd version shipped in RHEL 7 and CentOS 7 doesn't have support for cgroup v2. Support for cgroup v2 requires version 230 of systemd or newer, which
was never shipped or supported on RHEL 7 or CentOS 7.
#### Symptom
```console
-
-sh# podman run --name test -d registry.access.redhat.com/rhel7-init:latest && sleep 10 && podman exec test systemctl status
+# podman run --name test -d registry.access.redhat.com/rhel7-init:latest && sleep 10 && podman exec test systemctl status
c8567461948439bce72fad3076a91ececfb7b14d469bfa5fbc32c6403185beff
Failed to get D-Bus connection: Operation not permitted
Error: non zero exit code: 1: OCI runtime error
@@ -426,10 +438,9 @@ Error: non zero exit code: 1: OCI runtime error
#### Solution
You'll need to either:
-* configure the host to use cgroup v1
+* configure the host to use cgroup v1. On Fedora you can do:
-```
-On Fedora you can do:
+```console
# dnf install -y grubby
# grubby --update-kernel=ALL --args=”systemd.unified_cgroup_hierarchy=0"
# reboot
@@ -449,11 +460,9 @@ Once the user logs out all the containers exit.
#### Solution
You'll need to either:
-* loginctl enable-linger $UID
-
-or as root if your user has not enough privileges.
-
-* sudo loginctl enable-linger $UID
+```console
+# loginctl enable-linger $UID
+```
### 18) `podman run` fails with "bpf create: permission denied error"
@@ -488,7 +497,7 @@ $ podman system migrate
Original command now returns
-```
+```console
$ podman unshare cat /proc/self/uid_map
0 1000 1
1 100000 65536
@@ -507,12 +516,13 @@ Any access inside the container is rejected with "Permission denied".
#### Solution
-The runtime uses `setgroups(2)` hence the process looses all additional groups
+The runtime uses `setgroups(2)` hence the process loses all additional groups
the non-root user has. Use the `--group-add keep-groups` flag to pass the
user's supplementary group access into the container. Currently only available
with the `crun` OCI runtime.
### 21) A rootless container running in detached mode is closed at logout
+<!-- This is the same as section 17 above and should be deleted -->
When running a container with a command like `podman run --detach httpd` as
a rootless user, the container is closed upon logout and is not kept running.
@@ -535,14 +545,14 @@ LOGINCTL(1), SYSTEMD(1)
### 22) Containers default detach keys conflict with shell history navigation
Podman defaults to `ctrl-p,ctrl-q` to detach from a running containers. The
-bash and zsh shells default to ctrl-p for the displaying of the previous
+bash and zsh shells default to `ctrl-p` for the displaying of the previous
command. This causes issues when running a shell inside of a container.
#### Symptom
With the default detach key combo ctrl-p,ctrl-q, shell history navigation
(tested in bash and zsh) using ctrl-p to access the previous command will not
-display this previous command. Or anything else. Conmon is waiting for an
+display this previous command, or anything else. Conmon is waiting for an
additional character to see if the user wants to detach from the container.
Adding additional characters to the command will cause it to be displayed along
with the additional character. If the user types ctrl-p a second time the shell
@@ -553,23 +563,23 @@ display the 2nd to last command.
The solution to this is to change the default detach_keys. For example in order
to change the defaults to `ctrl-q,ctrl-q` use the `--detach-keys` option.
-```
-podman run -ti --detach-keys ctrl-q,ctrl-q fedora sh
+```console
+$ podman run -ti --detach-keys ctrl-q,ctrl-q fedora sh
```
To make this change the default for all containers, users can modify the
containers.conf file. This can be done simply in your home directory, but adding the
following lines to users containers.conf
-```
-$ cat >> ~/.config/containers/containers.conf < _eof
+```console
+$ cat >> ~/.config/containers/containers.conf << _eof
[engine]
detach_keys="ctrl-q,ctrl-q"
_eof
```
In order to effect root running containers and all users, modify the system
-wide defaults in /etc/containers/containers.conf
+wide defaults in `/etc/containers/containers.conf`.
### 23) Container with exposed ports won't run in a pod
@@ -579,7 +589,7 @@ can not be run within a pod.
#### Symptom
-```
+```console
$ podman pod create --name srcview -p 127.0.0.1:3434:3434 -p 127.0.0.1:7080:7080 -p 127.0.0.1:3370:3370 4b2f4611fa2cbd60b3899b936368c2b3f4f0f68bc8e6593416e0ab8ecb0a3f1d
$ podman run --pod srcview --name src-expose -p 3434:3434 -v "${PWD}:/var/opt/localrepo":Z,ro sourcegraph/src-expose:latest serve /var/opt/localrepo
@@ -601,7 +611,7 @@ In the example from the symptom section, dropping the `-p 3434:3434` would allow
`podman run` command to complete, and the container as part of the pod would still have
access to that port. For example:
-```
+```console
$ podman run --pod srcview --name src-expose -v "${PWD}:/var/opt/localrepo":Z,ro sourcegraph/src-expose:latest serve /var/opt/localrepo
```
@@ -615,6 +625,7 @@ before they will run with the fuse filesystem in play.
When trying to run the container images found at quay.io/podman, quay.io/containers
registry.access.redhat.com/ubi8 or other locations, an error will sometimes be returned:
+<!-- this would be better if it showed the command being run, and use ```console markup -->
```
ERRO error unmounting /var/lib/containers/storage/overlay/30c058cdadc888177361dd14a7ed7edab441c58525b341df321f07bc11440e68/merged: invalid argument
error mounting container "1ae176ca72b3da7c70af31db7434bcf6f94b07dbc0328bc7e4e8fc9579d0dc2e": error mounting build container "1ae176ca72b3da7c70af31db7434bcf6f94b07dbc0328bc7e4e8fc9579d0dc2e": error creating overlay mount to /var/lib/containers/storage/overlay/30c058cdadc888177361dd14a7ed7edab441c58525b341df321f07bc11440e68/merged: using mount program /usr/bin/fuse-overlayfs: fuse: device not found, try 'modprobe fuse' first
@@ -646,7 +657,7 @@ to mount volumes on them.
Run the container once in read/write mode, Podman will generate all of the FDs on the rootfs, and
from that point forward you can run with a read-only rootfs.
-```
+```console
$ podman run --rm --rootfs /path/to/rootfs true
```
@@ -654,13 +665,13 @@ The command above will create all the missing directories needed to run the cont
After that, it can be used in read only mode, by multiple containers at the same time:
-```
+```console
$ podman run --read-only --rootfs /path/to/rootfs ....
```
Another option is to use an Overlay Rootfs Mount:
-```
+```console
$ podman run --rootfs /path/to/rootfs:O ....
```
@@ -685,7 +696,9 @@ This means that CPU limit delegation is not enabled for the current user.
You can verify whether CPU limit delegation is enabled by running the following command:
- cat "/sys/fs/cgroup/user.slice/user-$(id -u).slice/user@$(id -u).service/cgroup.controllers"
+```console
+$ cat "/sys/fs/cgroup/user.slice/user-$(id -u).slice/user@$(id -u).service/cgroup.controllers"
+```
Example output might be:
@@ -697,8 +710,10 @@ not have permission to set CPU limits.
If you want to enable CPU limit delegation for all users, you can create the
file `/etc/systemd/system/user@.service.d/delegate.conf` with the contents:
- [Service]
- Delegate=memory pids cpu io
+```ini
+[Service]
+Delegate=memory pids cpu io
+```
After logging out and logging back in, you should have permission to set CPU
limits.
@@ -724,26 +739,33 @@ You can confirm this is the case by attempting to connect to the host via `podma
Create a new key using a supported algorithm e.g. ecdsa:
-`ssh-keygen -t ecdsa -f ~/.ssh/podman`
+```console
+$ ssh-keygen -t ecdsa -f ~/.ssh/podman
+```
Then copy the new id over:
-`ssh-copy-id -i ~/.ssh/podman.pub user@host`
+```console
+$ ssh-copy-id -i ~/.ssh/podman.pub user@host
+```
And then re-add the connection (removing the old one if necessary):
-`podman-remote system connection add myuser --identity ~/.ssh/podman ssh://user@host/run/user/1000/podman/podman.sock`
+```console
+$ podman-remote system connection add myuser --identity ~/.ssh/podman ssh://user@host/run/user/1000/podman/podman.sock
+```
And now this should work:
-`podman-remote info`
+```console
+$ podman-remote info
+```
----
### 28) Rootless CNI networking fails in RHEL with Podman v2.2.1 to v3.0.1.
A failure is encountered when trying to use networking on a rootless
container in Podman v2.2.1 through v3.0.1 on RHEL. This error does not
-occur on other Linux Distributions.
+occur on other Linux distributions.
#### Symptom
@@ -757,6 +779,7 @@ an Infra container image for CNI-in-slirp4netns must be created. The
instructions for building the Infra container image can be found for
v2.2.1 [here](https://github.com/containers/podman/tree/v2.2.1-rhel/contrib/rootless-cni-infra),
and for v3.0.1 [here](https://github.com/containers/podman/tree/v3.0.1-rhel/contrib/rootless-cni-infra).
+
### 29) Container related firewall rules are lost after reloading firewalld
Container network can't be reached after `firewall-cmd --reload` and `systemctl restart firewalld` Running `podman network reload` will fix it but it has to be done manually.
@@ -767,7 +790,7 @@ The firewall rules created by podman are lost when the firewall is reloaded.
[@ranjithrajaram](https://github.com/containers/podman/issues/5431#issuecomment-847758377) has created a systemd-hook to fix this issue
1) For "firewall-cmd --reload", create a systemd unit file with the following
-```
+```ini
[Unit]
Description=firewalld reload hook - run a hook script on firewalld reload
Wants=dbus.service
@@ -780,8 +803,9 @@ ExecStart=/bin/bash -c '/bin/busctl monitor --system --match "interface=org.fedo
[Install]
WantedBy=default.target
```
+
2) For "systemctl restart firewalld", create a systemd unit file with the following
-```
+```ini
[Unit]
Description=podman network reload
Wants=firewalld.service
@@ -796,11 +820,12 @@ ExecStart=/usr/bin/podman network reload --all
[Install]
WantedBy=default.target
```
-However, If you use busctl monitor then you can't get machine-readable output on `RHEL 8`.
+
+However, If you use busctl monitor then you can't get machine-readable output on RHEL 8.
Since it doesn't have `busctl -j` as mentioned here by [@yrro](https://github.com/containers/podman/issues/5431#issuecomment-896943018).
For RHEL 8, you can use the following one-liner bash script.
-```
+```ini
[Unit]
Description=Redo podman NAT rules after firewalld starts or reloads
Wants=dbus.service
@@ -815,13 +840,13 @@ Restart=Always
[Install]
WantedBy=default.target
```
-`busctl-monitor` is almost usable in `RHEL 8`, except that it always outputs two bogus events when it starts up,
+`busctl-monitor` is almost usable in RHEL 8, except that it always outputs two bogus events when it starts up,
one of which is (in its only machine-readable format) indistinguishable from the `NameOwnerChanged` that you get when firewalld starts up.
This means you would get an extra `podman network reload --all` when this unit starts.
Apart from this, you can use the following systemd service with the python3 code.
-```
+```ini
[Unit]
Description=Redo podman NAT rules after firewalld starts or reloads
Wants=dbus.service
@@ -837,7 +862,7 @@ Restart=always
WantedBy=default.target
```
The code reloads podman network twice when you use `systemctl restart firewalld`.
-```
+```python3
import dbus
from gi.repository import GLib
from dbus.mainloop.glib import DBusGMainLoop
@@ -892,6 +917,7 @@ def signal_listener():
if __name__ == "__main__":
signal_listener()
```
+
### 30) Podman run fails with `ERRO[0000] XDG_RUNTIME_DIR directory "/run/user/0" is not owned by the current user` or `Error: error creating tmpdir: mkdir /run/user/1000: permission denied`.
A failure is encountered when performing `podman run` with a warning `XDG_RUNTIME_DIR is pointing to a path which is not writable. Most likely podman will fail.`
@@ -901,30 +927,30 @@ A failure is encountered when performing `podman run` with a warning `XDG_RUNTIM
A rootless container is being invoked with cgroup configuration as `cgroupv2` for user with missing or invalid **systemd session**.
Example cases
-```bash
+```console
# su user1 -c 'podman images'
ERRO[0000] XDG_RUNTIME_DIR directory "/run/user/0" is not owned by the current user
```
-```bash
+```console
# su - user1 -c 'podman images'
Error: error creating tmpdir: mkdir /run/user/1000: permission denied
```
#### Solution
-Podman expects a valid login session for the `rootless+cgroupv2` use-case. Podman execution is expected to fail if the login session is not present. In most cases, podman will figure out a solution on its own but if `XDG_RUNTIME_DIR` is pointing to a path that is not writable execution will most fail. Typical scenarios of such cases are seen when users are trying to use Podman with `su - <user> -c '<podman-command>`, or `sudo -l` and badly configured systemd session.
+Podman expects a valid login session for the `rootless+cgroupv2` use-case. Podman execution is expected to fail if the login session is not present. In most cases, podman will figure out a solution on its own but if `XDG_RUNTIME_DIR` is pointing to a path that is not writable execution will most likely fail. Typical scenarios of such cases are seen when users are trying to use Podman with `su - <user> -c '<podman-command>'`, or `sudo -l` and badly configured systemd session.
Alternatives:
* Execute Podman via __systemd-run__ that will first start a systemd login session:
- ```
- sudo systemd-run --machine=username@ --quiet --user --collect --pipe --wait podman run --rm docker.io/library/alpine echo hello
+ ```console
+ $ sudo systemd-run --machine=username@ --quiet --user --collect --pipe --wait podman run --rm docker.io/library/alpine echo hello
```
* Start an interactive shell in a systemd login session with the command `machinectl shell <username>@`
and then run Podman
- ```
+ ```console
$ sudo -i
# machinectl shell username@
Connected to the local host. Press ^] three times within 1s to exit session.
@@ -977,7 +1003,7 @@ from the user's subordinate UID and GID ranges on the host system.
An example
-```Text
+```console
$ mkdir dir1
$ chmod 777 dir1
$ podman run --rm -v ./dir1:/dir1:Z \
@@ -995,8 +1021,8 @@ If you want to read, chown, or remove such a file, enter a user
namespace. Instead of running commands such as `less dir1/a` or `rm dir1/a`, you
need to prepend the command-line with `podman unshare`, i.e.,
`podman unshare less dir1/a` or `podman unshare rm dir1/a`. To change the ownership
-of the file _dir1/a_ to your regular user's UID and GID, run `podman unshare chown 0:0 dir1/a`.
-A file having the ownership _0:0_ in the user namespace is owned by the regular
+of the file `dir1/a` to your regular user's UID and GID, run `podman unshare chown 0:0 dir1/a`.
+A file having the ownership `0:0` in the user namespace is owned by the regular
user on the host. To use Bash features, such as variable expansion and
globbing, you need to wrap the command with `bash -c`, e.g.
`podman unshare bash -c 'ls $HOME/dir1/a*'`.
@@ -1008,41 +1034,41 @@ between the container and the host. Let's try it out.
In the example above `ls -l` shows the UID 102002 and GID 102002. Set shell variables
-```Text
-$ uid_from_ls = 102002
-$ gid_from_ls = 102002
+```console
+$ uid_from_ls=102002
+$ gid_from_ls=102002
```
Set shell variables to the lowest subordinate UID and GID
-```Text
+```console
$ lowest_subuid=$(podman info --format "{{ (index .Host.IDMappings.UIDMap 1).HostID }}")
$ lowest_subgid=$(podman info --format "{{ (index .Host.IDMappings.GIDMap 1).HostID }}")
```
Compute the UID and GID inside the container that map to the owner of the created file on the host.
-```Text
+```console
$ uid=$(( $uid_from_ls - $lowest_subuid + 1))
$ gid=$(( $gid_from_ls - $lowest_subgid + 1))
```
(In the computation it was assumed that there is only one subuid range and one subgid range)
-```Text
+```console
$ echo $uid
2003
$ echo $gid
2003
```
-The computation shows that the UID is _2003_ and the GID is _2003_ inside the container.
+The computation shows that the UID is `2003` and the GID is `2003` inside the container.
This comes as no surprise as this is what was specified before with `--user=2003:2003`,
but the same computation could be used whenever a username is specified
-or the __--user__ option is not used.
+or the `--user` option is not used.
Run the container again but now with UIDs and GIDs mapped
-```Text
+```console
$ subuidSize=$(( $(podman info --format "{{ range .Host.IDMappings.UIDMap }}+{{.Size }}{{end }}" ) - 1 ))
$ subgidSize=$(( $(podman info --format "{{ range .Host.IDMappings.GIDMap }}+{{.Size }}{{end }}" ) - 1 ))
$ mkdir dir1
@@ -1066,18 +1092,17 @@ $ ls -l dir1/a
$
```
-In this example the __--user__ option specified a rootless user in the container.
-As the rootless user could also have been specified in the container image, e.g.,
+In this example the `--user` option specified a rootless user in the container.
+As the rootless user could also have been specified in the container image, e.g.
-```Text
+```console
$ podman image inspect --format "user: {{.User}}" IMAGE
user: hpc
-$
```
-the same problem could also occur even without specifying __--user__.
+the same problem could also occur even without specifying `--user`.
Another variant of the same problem could occur when using
-__--user=root:root__ (the default), but where the root user creates non-root owned files
+`--user=root:root` (the default), but where the root user creates non-root owned files
in some way (e.g by creating them themselves, or switching the effective UID to
a rootless user and then creates files).
@@ -1097,7 +1122,7 @@ permissions of the regular user of the host.
for files, directories or devices passed in to the container
with `--device=..`,`--volume=..` or `--mount=..`, e.g.
-```Text
+```console
$ mkdir dir1
$ chmod 700 dir1
$ podman run --rm -v ./dir1:/dir1:Z \
@@ -1110,7 +1135,9 @@ ls: cannot open directory '/dir1': Permission denied
We follow essentially the same solution as in the previous
troubleshooting tip:
- "_Container creates a file that is not owned by the regular UID_"
+
+ Container creates a file that is not owned by the regular UID
+
but for this problem the container UID and GID can't be as
easily computed by mere addition and subtraction.
@@ -1118,42 +1145,41 @@ In other words, it might be more challenging to find out the UID and
the GID inside the container that we want to map to the regular
user on the host.
-If the __--user__ option is used together with a numerical UID and GID
+If the `--user` option is used together with a numerical UID and GID
to specify a rootless user, we already know the answer.
-If the __--user__ option is used together with a username and groupname,
-we could look up the UID and GID in the file _/etc/passwd_ of the container.
+If the `--user` option is used together with a username and groupname,
+we could look up the UID and GID in the file `/etc/passwd` of the container.
-If the container user is not set via __--user__ but instead from the
+If the container user is not set via `--user` but instead from the
container image, we could inspect the container image
-```Text
+```console
$ podman image inspect --format "user: {{.User}}" IMAGE
user: hpc
-$
```
-and then look it up in _/etc/passwd_ of the container.
+and then look it up in `/etc/passwd` of the container.
If the problem occurs in a container that is started to run as root but later
switches to an effictive UID of a rootless user, it might be less
straightforward to find out the UID and the GID. Reading the
-_Containerfile_, _Dockerfile_ or the _/etc/passwd_ could give a clue.
+`Containerfile`, `Dockerfile` or the `/etc/passwd` could give a clue.
To run the container with the rootless container UID and GID mapped to the
user's regular UID and GID on the host follow these steps:
-Set the _uid_ and _gid_ shell variables in a Bash shell to the UID and GID
+Set the `uid` and `gid` shell variables in a Bash shell to the UID and GID
of the user that will be running inside the container, e.g.
-```Text
+```console
$ uid=2003
$ gid=2003
```
and run
-```Text
+```console
$ mkdir dir1
$ echo hello > dir1/file.txt
$ chmod 700 dir1/file.txt
@@ -1170,13 +1196,12 @@ $ podman run --rm \
--gidmap $(($gid+1)):$(($gid+1)):$(($subgidSize-$gid)) \
docker.io/library/alpine cat /dir1/file.txt
hello
-$
```
A side-note: Using [__--userns=keep-id__](https://docs.podman.io/en/latest/markdown/podman-run.1.html#userns-mode)
can sometimes be an alternative solution, but it forces the regular
user's host UID to be mapped to the same UID inside the container
-so it provides less flexibility than using __--uidmap__ and __--gidmap__.
+so it provides less flexibility than using `--uidmap` and `--gidmap`.
### 35) Images in the additional stores can be deleted even if there are containers using them
diff --git a/vendor/github.com/containerd/stargz-snapshotter/estargz/build.go b/vendor/github.com/containerd/stargz-snapshotter/estargz/build.go
index 9ee97fc91..0da3efe4c 100644
--- a/vendor/github.com/containerd/stargz-snapshotter/estargz/build.go
+++ b/vendor/github.com/containerd/stargz-snapshotter/estargz/build.go
@@ -26,10 +26,10 @@ import (
"archive/tar"
"bytes"
"compress/gzip"
+ "context"
"errors"
"fmt"
"io"
- "io/ioutil"
"os"
"path"
"runtime"
@@ -48,6 +48,7 @@ type options struct {
prioritizedFiles []string
missedPrioritizedFiles *[]string
compression Compression
+ ctx context.Context
}
type Option func(o *options) error
@@ -104,6 +105,14 @@ func WithCompression(compression Compression) Option {
}
}
+// WithContext specifies a context that can be used for clean canceleration.
+func WithContext(ctx context.Context) Option {
+ return func(o *options) error {
+ o.ctx = ctx
+ return nil
+ }
+}
+
// Blob is an eStargz blob.
type Blob struct {
io.ReadCloser
@@ -139,12 +148,29 @@ func Build(tarBlob *io.SectionReader, opt ...Option) (_ *Blob, rErr error) {
opts.compression = newGzipCompressionWithLevel(opts.compressionLevel)
}
layerFiles := newTempFiles()
+ ctx := opts.ctx
+ if ctx == nil {
+ ctx = context.Background()
+ }
+ done := make(chan struct{})
+ defer close(done)
+ go func() {
+ select {
+ case <-done:
+ // nop
+ case <-ctx.Done():
+ layerFiles.CleanupAll()
+ }
+ }()
defer func() {
if rErr != nil {
if err := layerFiles.CleanupAll(); err != nil {
rErr = fmt.Errorf("failed to cleanup tmp files: %v: %w", err, rErr)
}
}
+ if cErr := ctx.Err(); cErr != nil {
+ rErr = fmt.Errorf("error from context %q: %w", cErr, rErr)
+ }
}()
tarBlob, err := decompressBlob(tarBlob, layerFiles)
if err != nil {
@@ -506,12 +532,13 @@ func newTempFiles() *tempFiles {
}
type tempFiles struct {
- files []*os.File
- filesMu sync.Mutex
+ files []*os.File
+ filesMu sync.Mutex
+ cleanupOnce sync.Once
}
func (tf *tempFiles) TempFile(dir, pattern string) (*os.File, error) {
- f, err := ioutil.TempFile(dir, pattern)
+ f, err := os.CreateTemp(dir, pattern)
if err != nil {
return nil, err
}
@@ -521,7 +548,14 @@ func (tf *tempFiles) TempFile(dir, pattern string) (*os.File, error) {
return f, nil
}
-func (tf *tempFiles) CleanupAll() error {
+func (tf *tempFiles) CleanupAll() (err error) {
+ tf.cleanupOnce.Do(func() {
+ err = tf.cleanupAll()
+ })
+ return
+}
+
+func (tf *tempFiles) cleanupAll() error {
tf.filesMu.Lock()
defer tf.filesMu.Unlock()
var allErr []error
diff --git a/vendor/github.com/containerd/stargz-snapshotter/estargz/estargz.go b/vendor/github.com/containerd/stargz-snapshotter/estargz/estargz.go
index 4b655c145..921e59ec6 100644
--- a/vendor/github.com/containerd/stargz-snapshotter/estargz/estargz.go
+++ b/vendor/github.com/containerd/stargz-snapshotter/estargz/estargz.go
@@ -31,7 +31,6 @@ import (
"fmt"
"hash"
"io"
- "io/ioutil"
"os"
"path"
"sort"
@@ -579,7 +578,7 @@ func (fr *fileReader) ReadAt(p []byte, off int64) (n int, err error) {
return 0, fmt.Errorf("fileReader.ReadAt.decompressor.Reader: %v", err)
}
defer dr.Close()
- if n, err := io.CopyN(ioutil.Discard, dr, off); n != off || err != nil {
+ if n, err := io.CopyN(io.Discard, dr, off); n != off || err != nil {
return 0, fmt.Errorf("discard of %d bytes = %v, %v", off, n, err)
}
return io.ReadFull(dr, p)
@@ -933,7 +932,7 @@ func (w *Writer) appendTar(r io.Reader, lossless bool) error {
}
}
}
- remainDest := ioutil.Discard
+ remainDest := io.Discard
if lossless {
remainDest = dst // Preserve the remaining bytes in lossless mode
}
diff --git a/vendor/github.com/containerd/stargz-snapshotter/estargz/testutil.go b/vendor/github.com/containerd/stargz-snapshotter/estargz/testutil.go
index 1de13a470..8f27dfb3e 100644
--- a/vendor/github.com/containerd/stargz-snapshotter/estargz/testutil.go
+++ b/vendor/github.com/containerd/stargz-snapshotter/estargz/testutil.go
@@ -31,7 +31,6 @@ import (
"errors"
"fmt"
"io"
- "io/ioutil"
"os"
"reflect"
"sort"
@@ -287,11 +286,11 @@ func isSameTarGz(t *testing.T, controller TestingController, a, b []byte) bool {
return false
}
- aFile, err := ioutil.ReadAll(aTar)
+ aFile, err := io.ReadAll(aTar)
if err != nil {
t.Fatal("failed to read tar payload of A")
}
- bFile, err := ioutil.ReadAll(bTar)
+ bFile, err := io.ReadAll(bTar)
if err != nil {
t.Fatal("failed to read tar payload of B")
}
diff --git a/vendor/github.com/containers/storage/drivers/overlay/check.go b/vendor/github.com/containers/storage/drivers/overlay/check.go
index 44b3515a8..48fb7a550 100644
--- a/vendor/github.com/containers/storage/drivers/overlay/check.go
+++ b/vendor/github.com/containers/storage/drivers/overlay/check.go
@@ -1,3 +1,4 @@
+//go:build linux
// +build linux
package overlay
@@ -11,6 +12,7 @@ import (
"syscall"
"github.com/containers/storage/pkg/archive"
+ "github.com/containers/storage/pkg/idtools"
"github.com/containers/storage/pkg/ioutils"
"github.com/containers/storage/pkg/mount"
"github.com/containers/storage/pkg/system"
@@ -218,3 +220,55 @@ func doesVolatile(d string) (bool, error) {
}()
return true, nil
}
+
+// supportsIdmappedLowerLayers checks if the kernel supports mounting overlay on top of
+// a idmapped lower layer.
+func supportsIdmappedLowerLayers(home string) (bool, error) {
+ layerDir, err := ioutil.TempDir(home, "compat")
+ if err != nil {
+ return false, err
+ }
+ defer func() {
+ _ = os.RemoveAll(layerDir)
+ }()
+
+ mergedDir := filepath.Join(layerDir, "merged")
+ lowerDir := filepath.Join(layerDir, "lower")
+ lowerMappedDir := filepath.Join(layerDir, "lower-mapped")
+ upperDir := filepath.Join(layerDir, "upper")
+ workDir := filepath.Join(layerDir, "work")
+
+ _ = idtools.MkdirAs(mergedDir, 0700, 0, 0)
+ _ = idtools.MkdirAs(lowerDir, 0700, 0, 0)
+ _ = idtools.MkdirAs(lowerMappedDir, 0700, 0, 0)
+ _ = idtools.MkdirAs(upperDir, 0700, 0, 0)
+ _ = idtools.MkdirAs(workDir, 0700, 0, 0)
+
+ idmap := []idtools.IDMap{
+ {
+ ContainerID: 0,
+ HostID: 0,
+ Size: 1,
+ },
+ }
+ pid, cleanupFunc, err := createUsernsProcess(idmap, idmap)
+ if err != nil {
+ return false, err
+ }
+ defer cleanupFunc()
+
+ if err := createIDMappedMount(lowerDir, lowerMappedDir, int(pid)); err != nil {
+ return false, errors.Wrapf(err, "create mapped mount")
+ }
+ defer unix.Unmount(lowerMappedDir, unix.MNT_DETACH)
+
+ opts := fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", lowerMappedDir, upperDir, workDir)
+ flags := uintptr(0)
+ if err := unix.Mount("overlay", mergedDir, "overlay", flags, opts); err != nil {
+ return false, err
+ }
+ defer func() {
+ _ = unix.Unmount(mergedDir, unix.MNT_DETACH)
+ }()
+ return true, nil
+}
diff --git a/vendor/github.com/containers/storage/drivers/overlay/idmapped_utils.go b/vendor/github.com/containers/storage/drivers/overlay/idmapped_utils.go
new file mode 100644
index 000000000..34fdc5790
--- /dev/null
+++ b/vendor/github.com/containers/storage/drivers/overlay/idmapped_utils.go
@@ -0,0 +1,160 @@
+//go:build linux
+// +build linux
+
+package overlay
+
+import (
+ "fmt"
+ "io/ioutil"
+ "os"
+ "syscall"
+ "unsafe"
+
+ "github.com/containers/storage/pkg/idtools"
+ "github.com/pkg/errors"
+ "golang.org/x/sys/unix"
+)
+
+type attr struct {
+ attrSet uint64
+ attrClr uint64
+ propagation uint64
+ userNs uint64
+}
+
+const (
+ // _MOUNT_ATTR_IDMAP - Idmap mount to @userns_fd in struct mount_attr
+ _MOUNT_ATTR_IDMAP = 0x00100000 //nolint:golint
+
+ // _OPEN_TREE_CLONE - Clone the source path mount
+ _OPEN_TREE_CLONE = 0x00000001 //nolint:golint
+
+ // _MOVE_MOUNT_F_EMPTY_PATH - Move the path referenced by the fd
+ _MOVE_MOUNT_F_EMPTY_PATH = 0x00000004 //nolint:golint
+)
+
+// openTree is a wrapper for the open_tree syscall
+func openTree(path string, flags int) (fd int, err error) {
+ var _p0 *byte
+
+ if _p0, err = syscall.BytePtrFromString(path); err != nil {
+ return 0, err
+ }
+
+ r, _, e1 := syscall.Syscall6(uintptr(unix.SYS_OPEN_TREE), uintptr(0), uintptr(unsafe.Pointer(_p0)),
+ uintptr(flags), 0, 0, 0)
+ if e1 != 0 {
+ err = e1
+ }
+ return int(r), nil
+}
+
+// moveMount is a wrapper for the the move_mount syscall.
+func moveMount(fdTree int, target string) (err error) {
+ var _p0, _p1 *byte
+
+ empty := ""
+
+ if _p0, err = syscall.BytePtrFromString(target); err != nil {
+ return err
+ }
+ if _p1, err = syscall.BytePtrFromString(empty); err != nil {
+ return err
+ }
+
+ flags := _MOVE_MOUNT_F_EMPTY_PATH
+
+ _, _, e1 := syscall.Syscall6(uintptr(unix.SYS_MOVE_MOUNT),
+ uintptr(fdTree), uintptr(unsafe.Pointer(_p1)),
+ 0, uintptr(unsafe.Pointer(_p0)), uintptr(flags), 0)
+ if e1 != 0 {
+ err = e1
+ }
+ return
+}
+
+// mountSetAttr is a wrapper for the mount_setattr syscall
+func mountSetAttr(dfd int, path string, flags uint, attr *attr, size uint) (err error) {
+ var _p0 *byte
+
+ if _p0, err = syscall.BytePtrFromString(path); err != nil {
+ return err
+ }
+
+ _, _, e1 := syscall.Syscall6(uintptr(unix.SYS_MOUNT_SETATTR), uintptr(dfd), uintptr(unsafe.Pointer(_p0)),
+ uintptr(flags), uintptr(unsafe.Pointer(attr)), uintptr(size), 0)
+ if e1 != 0 {
+ err = e1
+ }
+ return
+}
+
+// createIDMappedMount creates a IDMapped bind mount from SOURCE to TARGET using the user namespace
+// for the PID process.
+func createIDMappedMount(source, target string, pid int) error {
+ path := fmt.Sprintf("/proc/%d/ns/user", pid)
+ userNsFile, err := os.Open(path)
+ if err != nil {
+ return errors.Wrapf(err, "unable to get user ns file descriptor for %q", path)
+ }
+
+ var attr attr
+ attr.attrSet = _MOUNT_ATTR_IDMAP
+ attr.attrClr = 0
+ attr.propagation = 0
+ attr.userNs = uint64(userNsFile.Fd())
+
+ defer userNsFile.Close()
+
+ targetDirFd, err := openTree(source, _OPEN_TREE_CLONE|unix.AT_RECURSIVE)
+ if err != nil {
+ return err
+ }
+ defer unix.Close(targetDirFd)
+
+ if err := mountSetAttr(targetDirFd, "", unix.AT_EMPTY_PATH|unix.AT_RECURSIVE,
+ &attr, uint(unsafe.Sizeof(attr))); err != nil {
+ return err
+ }
+ if err := os.Mkdir(target, 0700); err != nil && !os.IsExist(err) {
+ return err
+ }
+ return moveMount(targetDirFd, target)
+}
+
+// createUsernsProcess forks the current process and creates a user namespace using the specified
+// mappings. It returns the pid of the new process.
+func createUsernsProcess(uidMaps []idtools.IDMap, gidMaps []idtools.IDMap) (int, func(), error) {
+ pid, _, err := syscall.Syscall6(uintptr(unix.SYS_CLONE), unix.CLONE_NEWUSER|uintptr(unix.SIGCHLD), 0, 0, 0, 0, 0)
+ if err != 0 {
+ return -1, nil, err
+ }
+ if pid == 0 {
+ _ = unix.Prctl(unix.PR_SET_PDEATHSIG, uintptr(unix.SIGKILL), 0, 0, 0)
+ // just wait for the SIGKILL
+ for {
+ syscall.Syscall6(uintptr(unix.SYS_PAUSE), 0, 0, 0, 0, 0, 0)
+ }
+ }
+ cleanupFunc := func() {
+ unix.Kill(int(pid), unix.SIGKILL)
+ _, _ = unix.Wait4(int(pid), nil, 0, nil)
+ }
+ writeMappings := func(fname string, idmap []idtools.IDMap) error {
+ mappings := ""
+ for _, m := range idmap {
+ mappings = mappings + fmt.Sprintf("%d %d %d\n", m.ContainerID, m.HostID, m.Size)
+ }
+ return ioutil.WriteFile(fmt.Sprintf("/proc/%d/%s", pid, fname), []byte(mappings), 0600)
+ }
+ if err := writeMappings("uid_map", uidMaps); err != nil {
+ cleanupFunc()
+ return -1, nil, err
+ }
+ if err := writeMappings("gid_map", gidMaps); err != nil {
+ cleanupFunc()
+ return -1, nil, err
+ }
+
+ return int(pid), cleanupFunc, nil
+}
diff --git a/vendor/github.com/containers/storage/drivers/overlay/overlay.go b/vendor/github.com/containers/storage/drivers/overlay/overlay.go
index a780ef5da..c911acb5c 100644
--- a/vendor/github.com/containers/storage/drivers/overlay/overlay.go
+++ b/vendor/github.com/containers/storage/drivers/overlay/overlay.go
@@ -39,7 +39,6 @@ import (
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
- "github.com/vbatts/tar-split/tar/storage"
"golang.org/x/sys/unix"
)
@@ -121,6 +120,8 @@ type Driver struct {
supportsVolatile *bool
usingMetacopy bool
locker *locker.Locker
+
+ supportsIDMappedMounts *bool
}
type additionalLayerStore struct {
@@ -205,6 +206,26 @@ func checkSupportVolatile(home, runhome string) (bool, error) {
return usingVolatile, nil
}
+// checkAndRecordIDMappedSupport checks and stores if the kernel supports mounting overlay on top of a
+// idmapped lower layer.
+func checkAndRecordIDMappedSupport(home, runhome string) (bool, error) {
+ feature := "idmapped-lower-dir"
+ overlayCacheResult, overlayCacheText, err := cachedFeatureCheck(runhome, feature)
+ if err == nil {
+ if overlayCacheResult {
+ logrus.Debugf("Cached value indicated that overlay is supported")
+ return true, nil
+ }
+ logrus.Debugf("Cached value indicated that overlay is not supported")
+ return false, errors.New(overlayCacheText)
+ }
+ supportsIDMappedMounts, err := supportsIdmappedLowerLayers(home)
+ if err2 := cachedFeatureRecord(runhome, feature, supportsIDMappedMounts, ""); err2 != nil {
+ return false, errors.Wrap(err2, "recording overlay idmapped mounts support status")
+ }
+ return supportsIDMappedMounts, err
+}
+
func checkAndRecordOverlaySupport(fsMagic graphdriver.FsMagic, home, runhome string) (bool, error) {
var supportsDType bool
@@ -1485,6 +1506,51 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
}
}
+ if d.supportsIDmappedMounts() && len(options.UidMaps) > 0 && len(options.GidMaps) > 0 {
+ var newAbsDir []string
+ mappedRoot := filepath.Join(d.home, id, "mapped")
+ if err := os.MkdirAll(mappedRoot, 0700); err != nil {
+ return "", err
+ }
+
+ pid, cleanupFunc, err := createUsernsProcess(options.UidMaps, options.GidMaps)
+ if err != nil {
+ return "", err
+ }
+ defer cleanupFunc()
+
+ idMappedMounts := make(map[string]string)
+
+ // rewrite the lower dirs to their idmapped mount.
+ c := 0
+ for _, absLower := range absLowers {
+ mappedMountSrc := getMappedMountRoot(absLower)
+
+ root, found := idMappedMounts[mappedMountSrc]
+ if !found {
+ root = filepath.Join(mappedRoot, fmt.Sprintf("%d", c))
+ c++
+ if err := createIDMappedMount(mappedMountSrc, root, int(pid)); err != nil {
+ return "", errors.Wrapf(err, "create mapped mount for %q on %q", mappedMountSrc, root)
+ }
+ idMappedMounts[mappedMountSrc] = root
+
+ // overlay takes a reference on the mount, so it is safe to unmount
+ // the mapped idmounts as soon as the final overlay file system is mounted.
+ defer unix.Unmount(root, unix.MNT_DETACH)
+ }
+
+ // relative path to the layer through the id mapped mount
+ rel, err := filepath.Rel(mappedMountSrc, absLower)
+ if err != nil {
+ return "", err
+ }
+
+ newAbsDir = append(newAbsDir, filepath.Join(root, rel))
+ }
+ absLowers = newAbsDir
+ }
+
var opts string
if readWrite {
opts = fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", strings.Join(absLowers, ":"), diffDir, workdir)
@@ -1587,6 +1653,18 @@ func (d *Driver) Put(id string) error {
unmounted := false
+ mappedRoot := filepath.Join(d.home, id, "mapped")
+ // It should not happen, but cleanup any mapped mount if it was leaked.
+ if _, err := os.Stat(mappedRoot); err == nil {
+ mounts, err := ioutil.ReadDir(mappedRoot)
+ if err == nil {
+ // Go through all of the mapped mounts.
+ for _, m := range mounts {
+ _ = unix.Unmount(filepath.Join(mappedRoot, m.Name()), unix.MNT_DETACH)
+ }
+ }
+ }
+
if d.options.mountProgram != "" {
// Attempt to unmount the FUSE mount using either fusermount or fusermount3.
// If they fail, fallback to unix.Unmount
@@ -1664,11 +1742,24 @@ func (d *Driver) getWhiteoutFormat() archive.WhiteoutFormat {
return whiteoutFormat
}
-type fileGetNilCloser struct {
- storage.FileGetter
+type overlayFileGetter struct {
+ diffDirs []string
+}
+
+func (g *overlayFileGetter) Get(path string) (io.ReadCloser, error) {
+ for _, d := range g.diffDirs {
+ f, err := os.Open(filepath.Join(d, path))
+ if err == nil {
+ return f, nil
+ }
+ }
+ if len(g.diffDirs) > 0 {
+ return os.Open(filepath.Join(g.diffDirs[0], path))
+ }
+ return nil, fmt.Errorf("%s: %w", path, os.ErrNotExist)
}
-func (f fileGetNilCloser) Close() error {
+func (g *overlayFileGetter) Close() error {
return nil
}
@@ -1677,13 +1768,18 @@ func (d *Driver) getStagingDir() string {
}
// DiffGetter returns a FileGetCloser that can read files from the directory that
-// contains files for the layer differences. Used for direct access for tar-split.
+// contains files for the layer differences, either for this layer, or one of our
+// lowers if we're just a template directory. Used for direct access for tar-split.
func (d *Driver) DiffGetter(id string) (graphdriver.FileGetCloser, error) {
p, err := d.getDiffPath(id)
if err != nil {
return nil, err
}
- return fileGetNilCloser{storage.NewPathFileGetter(p)}, nil
+ paths, err := d.getLowerDiffPaths(id)
+ if err != nil {
+ return nil, err
+ }
+ return &overlayFileGetter{diffDirs: append([]string{p}, paths...)}, nil
}
// CleanupStagingDirectory cleanups the staging directory.
@@ -1958,12 +2054,31 @@ func (d *Driver) UpdateLayerIDMap(id string, toContainer, toHost *idtools.IDMapp
return nil
}
+// supportsIDmappedMounts returns whether the kernel supports using idmapped mounts with
+// overlay lower layers.
+func (d *Driver) supportsIDmappedMounts() bool {
+ if d.supportsIDMappedMounts != nil {
+ return *d.supportsIDMappedMounts
+ }
+
+ supportsIDMappedMounts, err := checkAndRecordIDMappedSupport(d.home, d.runhome)
+ d.supportsIDMappedMounts = &supportsIDMappedMounts
+ if err == nil {
+ return supportsIDMappedMounts
+ }
+ logrus.Debugf("Check for idmapped mounts support %v", err)
+ return false
+}
+
// SupportsShifting tells whether the driver support shifting of the UIDs/GIDs in an userNS
func (d *Driver) SupportsShifting() bool {
if os.Getenv("_TEST_FORCE_SUPPORT_SHIFTING") == "yes-please" {
return true
}
- return d.options.mountProgram != ""
+ if d.options.mountProgram != "" {
+ return true
+ }
+ return d.supportsIDmappedMounts()
}
// dumbJoin is more or less a dumber version of filepath.Join, but one which
@@ -2132,3 +2247,15 @@ func redirectDiffIfAdditionalLayer(diffPath string) (string, error) {
}
return diffPath, nil
}
+
+// getMappedMountRoot is a heuristic that calculates the parent directory where
+// the idmapped mount should be applied.
+// It is useful to minimize the number of idmapped mounts and at the same time use
+// a common path as long as possible to reduce the length of the mount data argument.
+func getMappedMountRoot(path string) string {
+ dirName := filepath.Dir(path)
+ if filepath.Base(dirName) == linkDir {
+ return filepath.Dir(dirName)
+ }
+ return dirName
+}
diff --git a/vendor/github.com/containers/storage/go.mod b/vendor/github.com/containers/storage/go.mod
index 8645bbf60..1915ea65d 100644
--- a/vendor/github.com/containers/storage/go.mod
+++ b/vendor/github.com/containers/storage/go.mod
@@ -6,7 +6,7 @@ require (
github.com/BurntSushi/toml v1.1.0
github.com/Microsoft/go-winio v0.5.2
github.com/Microsoft/hcsshim v0.9.2
- github.com/containerd/stargz-snapshotter/estargz v0.11.3
+ github.com/containerd/stargz-snapshotter/estargz v0.11.4
github.com/cyphar/filepath-securejoin v0.2.3
github.com/docker/go-units v0.4.0
github.com/google/go-intervals v0.0.2
@@ -16,7 +16,7 @@ require (
github.com/klauspost/pgzip v1.2.5
github.com/mattn/go-shellwords v1.0.12
github.com/mistifyio/go-zfs v2.1.2-0.20190413222219-f784269be439+incompatible
- github.com/moby/sys/mountinfo v0.6.0
+ github.com/moby/sys/mountinfo v0.6.1
github.com/opencontainers/go-digest v1.0.0
github.com/opencontainers/runc v1.1.1
github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417
diff --git a/vendor/github.com/containers/storage/go.sum b/vendor/github.com/containers/storage/go.sum
index 34924ada5..cd5bf3b97 100644
--- a/vendor/github.com/containers/storage/go.sum
+++ b/vendor/github.com/containers/storage/go.sum
@@ -176,8 +176,8 @@ github.com/containerd/nri v0.0.0-20201007170849-eb1350a75164/go.mod h1:+2wGSDGFY
github.com/containerd/nri v0.0.0-20210316161719-dbaa18c31c14/go.mod h1:lmxnXF6oMkbqs39FiCt1s0R2HSMhcLel9vNL3m4AaeY=
github.com/containerd/nri v0.1.0/go.mod h1:lmxnXF6oMkbqs39FiCt1s0R2HSMhcLel9vNL3m4AaeY=
github.com/containerd/stargz-snapshotter/estargz v0.4.1/go.mod h1:x7Q9dg9QYb4+ELgxmo4gBUeJB0tl5dqH1Sdz0nJU1QM=
-github.com/containerd/stargz-snapshotter/estargz v0.11.3 h1:k2kN16Px6LYuv++qFqK+JTcYqc8bEVxzGpf8/gFBL5M=
-github.com/containerd/stargz-snapshotter/estargz v0.11.3/go.mod h1:7vRJIcImfY8bpifnMjt+HTJoQxASq7T28MYbP15/Nf0=
+github.com/containerd/stargz-snapshotter/estargz v0.11.4 h1:LjrYUZpyOhiSaU7hHrdR82/RBoxfGWSaC0VeSSMXqnk=
+github.com/containerd/stargz-snapshotter/estargz v0.11.4/go.mod h1:7vRJIcImfY8bpifnMjt+HTJoQxASq7T28MYbP15/Nf0=
github.com/containerd/ttrpc v0.0.0-20190828154514-0e0f228740de/go.mod h1:PvCDdDGpgqzQIzDW1TphrGLssLDZp2GuS+X5DkEJB8o=
github.com/containerd/ttrpc v0.0.0-20190828172938-92c8520ef9f8/go.mod h1:PvCDdDGpgqzQIzDW1TphrGLssLDZp2GuS+X5DkEJB8o=
github.com/containerd/ttrpc v0.0.0-20191028202541-4f1b8fe65a5c/go.mod h1:LPm1u0xBw8r8NOKoOdNMeVHSawSsltak+Ihv+etqsE8=
@@ -467,8 +467,8 @@ github.com/moby/locker v1.0.1/go.mod h1:S7SDdo5zpBK84bzzVlKr2V0hz+7x9hWbYC/kq7oQ
github.com/moby/sys/mountinfo v0.4.0/go.mod h1:rEr8tzG/lsIZHBtN/JjGG+LMYx9eXgW2JI+6q0qou+A=
github.com/moby/sys/mountinfo v0.4.1/go.mod h1:rEr8tzG/lsIZHBtN/JjGG+LMYx9eXgW2JI+6q0qou+A=
github.com/moby/sys/mountinfo v0.5.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU=
-github.com/moby/sys/mountinfo v0.6.0 h1:gUDhXQx58YNrpHlK4nSL+7y2pxFZkUcXqzFDKWdC0Oo=
-github.com/moby/sys/mountinfo v0.6.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU=
+github.com/moby/sys/mountinfo v0.6.1 h1:+H/KnGEAGRpTrEAqNVQ2AM3SiwMgJUt/TXj+Z8cmCIc=
+github.com/moby/sys/mountinfo v0.6.1/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU=
github.com/moby/sys/symlink v0.1.0/go.mod h1:GGDODQmbFOjFsXvfLVn3+ZRxkch54RkSiGqsZeMYowQ=
github.com/moby/term v0.0.0-20200312100748-672ec06f55cd/go.mod h1:DdlQx2hp0Ss5/fLikoLlEeIYiATotOjgB//nb973jeo=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
diff --git a/vendor/github.com/containers/storage/layers.go b/vendor/github.com/containers/storage/layers.go
index 8a5616dfc..5e9930ea7 100644
--- a/vendor/github.com/containers/storage/layers.go
+++ b/vendor/github.com/containers/storage/layers.go
@@ -725,12 +725,32 @@ func (r *layerStore) Put(id string, parentLayer *Layer, names []string, mountLab
parent = parentLayer.ID
}
var parentMappings, templateIDMappings, oldMappings *idtools.IDMappings
+ var (
+ templateMetadata string
+ templateCompressedDigest digest.Digest
+ templateCompressedSize int64
+ templateUncompressedDigest digest.Digest
+ templateUncompressedSize int64
+ templateCompressionType archive.Compression
+ templateUIDs, templateGIDs []uint32
+ templateTSdata []byte
+ )
if moreOptions.TemplateLayer != "" {
+ var tserr error
templateLayer, ok := r.lookup(moreOptions.TemplateLayer)
if !ok {
return nil, -1, ErrLayerUnknown
}
+ templateMetadata = templateLayer.Metadata
templateIDMappings = idtools.NewIDMappingsFromMaps(templateLayer.UIDMap, templateLayer.GIDMap)
+ templateCompressedDigest, templateCompressedSize = templateLayer.CompressedDigest, templateLayer.CompressedSize
+ templateUncompressedDigest, templateUncompressedSize = templateLayer.UncompressedDigest, templateLayer.UncompressedSize
+ templateCompressionType = templateLayer.CompressionType
+ templateUIDs, templateGIDs = append([]uint32{}, templateLayer.UIDs...), append([]uint32{}, templateLayer.GIDs...)
+ templateTSdata, tserr = ioutil.ReadFile(r.tspath(templateLayer.ID))
+ if tserr != nil && !os.IsNotExist(tserr) {
+ return nil, -1, tserr
+ }
} else {
templateIDMappings = &idtools.IDMappings{}
}
@@ -775,17 +795,43 @@ func (r *layerStore) Put(id string, parentLayer *Layer, names []string, mountLab
return nil, -1, err
}
}
+ if len(templateTSdata) > 0 {
+ if err := os.MkdirAll(filepath.Dir(r.tspath(id)), 0o700); err != nil {
+ // We don't have a record of this layer, but at least
+ // try to clean it up underneath us.
+ if err2 := r.driver.Remove(id); err2 != nil {
+ logrus.Errorf("While recovering from a failure creating in UpdateLayerIDMap, error deleting layer %#v: %v", id, err2)
+ }
+ return nil, -1, err
+ }
+ if err = ioutils.AtomicWriteFile(r.tspath(id), templateTSdata, 0o600); err != nil {
+ // We don't have a record of this layer, but at least
+ // try to clean it up underneath us.
+ if err2 := r.driver.Remove(id); err2 != nil {
+ logrus.Errorf("While recovering from a failure creating in UpdateLayerIDMap, error deleting layer %#v: %v", id, err2)
+ }
+ return nil, -1, err
+ }
+ }
if err == nil {
layer = &Layer{
- ID: id,
- Parent: parent,
- Names: names,
- MountLabel: mountLabel,
- Created: time.Now().UTC(),
- Flags: make(map[string]interface{}),
- UIDMap: copyIDMap(moreOptions.UIDMap),
- GIDMap: copyIDMap(moreOptions.GIDMap),
- BigDataNames: []string{},
+ ID: id,
+ Parent: parent,
+ Names: names,
+ MountLabel: mountLabel,
+ Metadata: templateMetadata,
+ Created: time.Now().UTC(),
+ CompressedDigest: templateCompressedDigest,
+ CompressedSize: templateCompressedSize,
+ UncompressedDigest: templateUncompressedDigest,
+ UncompressedSize: templateUncompressedSize,
+ CompressionType: templateCompressionType,
+ UIDs: templateUIDs,
+ GIDs: templateGIDs,
+ Flags: make(map[string]interface{}),
+ UIDMap: copyIDMap(moreOptions.UIDMap),
+ GIDMap: copyIDMap(moreOptions.GIDMap),
+ BigDataNames: []string{},
}
r.layers = append(r.layers, layer)
r.idindex.Add(id)
@@ -872,7 +918,6 @@ func (r *layerStore) Mounted(id string) (int, error) {
}
func (r *layerStore) Mount(id string, options drivers.MountOpts) (string, error) {
-
// check whether options include ro option
hasReadOnlyOpt := func(opts []string) bool {
for _, item := range opts {
diff --git a/vendor/github.com/containers/storage/store.go b/vendor/github.com/containers/storage/store.go
index 6b40b68ca..30d3e8715 100644
--- a/vendor/github.com/containers/storage/store.go
+++ b/vendor/github.com/containers/storage/store.go
@@ -2452,6 +2452,10 @@ func (s *store) DeleteImage(id string, commit bool) (layers []string, err error)
}
layer := image.TopLayer
layersToRemoveMap := make(map[string]struct{})
+ layersToRemove = append(layersToRemove, image.MappedTopLayers...)
+ for _, mappedTopLayer := range image.MappedTopLayers {
+ layersToRemoveMap[mappedTopLayer] = struct{}{}
+ }
for layer != "" {
if rcstore.Exists(layer) {
break
@@ -2483,12 +2487,6 @@ func (s *store) DeleteImage(id string, commit bool) (layers []string, err error)
if hasChildrenNotBeingRemoved() {
break
}
- if layer == image.TopLayer {
- layersToRemove = append(layersToRemove, image.MappedTopLayers...)
- for _, mappedTopLayer := range image.MappedTopLayers {
- layersToRemoveMap[mappedTopLayer] = struct{}{}
- }
- }
layersToRemove = append(layersToRemove, layer)
layersToRemoveMap[layer] = struct{}{}
layer = parent
diff --git a/vendor/github.com/containers/storage/utils.go b/vendor/github.com/containers/storage/utils.go
index cec377f26..37d4b79b0 100644
--- a/vendor/github.com/containers/storage/utils.go
+++ b/vendor/github.com/containers/storage/utils.go
@@ -42,13 +42,14 @@ func validateMountOptions(mountOptions []string) error {
}
func applyNameOperation(oldNames []string, opParameters []string, op updateNameOperation) ([]string, error) {
- result := make([]string, 0)
+ var result []string
switch op {
case setNames:
// ignore all old names and just return new names
- return dedupeNames(opParameters), nil
+ result = opParameters
case removeNames:
// remove given names from old names
+ result = make([]string, 0, len(oldNames))
for _, name := range oldNames {
// only keep names in final result which do not intersect with input names
// basically `result = oldNames - opParameters`
@@ -62,11 +63,10 @@ func applyNameOperation(oldNames []string, opParameters []string, op updateNameO
result = append(result, name)
}
}
- return dedupeNames(result), nil
case addNames:
+ result = make([]string, 0, len(opParameters)+len(oldNames))
result = append(result, opParameters...)
result = append(result, oldNames...)
- return dedupeNames(result), nil
default:
return result, errInvalidUpdateNameOperation
}
diff --git a/vendor/github.com/moby/sys/mountinfo/mounted_unix.go b/vendor/github.com/moby/sys/mountinfo/mounted_unix.go
index 45ddad236..242f82cc7 100644
--- a/vendor/github.com/moby/sys/mountinfo/mounted_unix.go
+++ b/vendor/github.com/moby/sys/mountinfo/mounted_unix.go
@@ -4,7 +4,6 @@
package mountinfo
import (
- "fmt"
"os"
"path/filepath"
@@ -33,13 +32,13 @@ func mountedByStat(path string) (bool, error) {
func normalizePath(path string) (realPath string, err error) {
if realPath, err = filepath.Abs(path); err != nil {
- return "", fmt.Errorf("unable to get absolute path for %q: %w", path, err)
+ return "", err
}
if realPath, err = filepath.EvalSymlinks(realPath); err != nil {
- return "", fmt.Errorf("failed to canonicalise path for %q: %w", path, err)
+ return "", err
}
if _, err := os.Stat(realPath); err != nil {
- return "", fmt.Errorf("failed to stat target of %q: %w", path, err)
+ return "", err
}
return realPath, nil
}
diff --git a/vendor/modules.txt b/vendor/modules.txt
index d6d26b561..1f878d2ef 100644
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@@ -69,7 +69,7 @@ github.com/containerd/containerd/log
github.com/containerd/containerd/pkg/userns
github.com/containerd/containerd/platforms
github.com/containerd/containerd/sys
-# github.com/containerd/stargz-snapshotter/estargz v0.11.3
+# github.com/containerd/stargz-snapshotter/estargz v0.11.4
github.com/containerd/stargz-snapshotter/estargz
github.com/containerd/stargz-snapshotter/estargz/errorutil
# github.com/containernetworking/cni v1.0.1
@@ -233,7 +233,7 @@ github.com/containers/psgo/internal/dev
github.com/containers/psgo/internal/host
github.com/containers/psgo/internal/proc
github.com/containers/psgo/internal/process
-# github.com/containers/storage v1.39.1-0.20220412073713-ea4008e14877
+# github.com/containers/storage v1.39.1-0.20220414183333-eea4e0f5f1f9
## explicit
github.com/containers/storage
github.com/containers/storage/drivers
@@ -479,7 +479,7 @@ github.com/mistifyio/go-zfs
github.com/mitchellh/mapstructure
# github.com/moby/sys/mount v0.2.0
github.com/moby/sys/mount
-# github.com/moby/sys/mountinfo v0.6.0
+# github.com/moby/sys/mountinfo v0.6.1
github.com/moby/sys/mountinfo
# github.com/moby/term v0.0.0-20210619224110-3f7ff695adc6
## explicit