diff options
author | Giuseppe Scrivano <gscrivan@redhat.com> | 2019-03-21 12:18:42 +0100 |
---|---|---|
committer | Giuseppe Scrivano <gscrivan@redhat.com> | 2019-03-29 14:04:44 +0100 |
commit | 849548ffb8e958e901317eceffdcc2d918cafd8d (patch) | |
tree | df4ed8253470a4598d4c2a5561b60305f5401c8a | |
parent | f7e72bc86aff2ff986290f190309deceb7f22099 (diff) | |
download | podman-849548ffb8e958e901317eceffdcc2d918cafd8d.tar.gz podman-849548ffb8e958e901317eceffdcc2d918cafd8d.tar.bz2 podman-849548ffb8e958e901317eceffdcc2d918cafd8d.zip |
userns: do not use an intermediate mount namespace
We have an issue in the current implementation where the cleanup
process is not able to umount the storage as it is running in a
separate namespace.
Simplify the implementation for user namespaces by not using an
intermediate mount namespace. For doing it, we need to relax the
permissions on the parent directories and allow browsing
them. Containers that are running without a user namespace, will still
maintain mode 0700 on their directory.
Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
-rw-r--r-- | libpod/container.go | 12 | ||||
-rw-r--r-- | libpod/container_internal.go | 47 | ||||
-rw-r--r-- | libpod/container_internal_linux.go | 25 | ||||
-rw-r--r-- | libpod/oci_linux.go | 91 | ||||
-rw-r--r-- | libpod/runtime_ctr.go | 6 |
5 files changed, 43 insertions, 138 deletions
diff --git a/libpod/container.go b/libpod/container.go index 6d80a9bf4..739406e42 100644 --- a/libpod/container.go +++ b/libpod/container.go @@ -146,18 +146,12 @@ type ContainerState struct { ConfigPath string `json:"configPath,omitempty"` // RunDir is a per-boot directory for container content RunDir string `json:"runDir,omitempty"` - // DestinationRunDir is where the files in RunDir will be accessible for the container. - // It is different than RunDir when using userNS - DestinationRunDir string `json:"destinationRunDir,omitempty"` // Mounted indicates whether the container's storage has been mounted // for use Mounted bool `json:"mounted,omitempty"` // Mountpoint contains the path to the container's mounted storage as given - // by containers/storage. It can be different than RealMountpoint when - // usernamespaces are used + // by containers/storage. Mountpoint string `json:"mountPoint,omitempty"` - // RealMountpoint contains the path to the container's mounted storage - RealMountpoint string `json:"realMountPoint,omitempty"` // StartedTime is the time the container was started StartedTime time.Time `json:"startedTime,omitempty"` // FinishedTime is the time the container finished executing @@ -186,10 +180,6 @@ type ContainerState struct { // the path of the file on disk outside the container BindMounts map[string]string `json:"bindMounts,omitempty"` - // UserNSRoot is the directory used as root for the container when using - // user namespaces. - UserNSRoot string `json:"userNSRoot,omitempty"` - // ExtensionStageHooks holds hooks which will be executed by libpod // and not delegated to the OCI runtime. ExtensionStageHooks map[string][]spec.Hook `json:"extensionStageHooks,omitempty"` diff --git a/libpod/container_internal.go b/libpod/container_internal.go index 7a90bc7d4..daa32007a 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -310,23 +310,12 @@ func (c *Container) setupStorage(ctx context.Context) error { } if !rootless.IsRootless() && (len(c.config.IDMappings.UIDMap) != 0 || len(c.config.IDMappings.GIDMap) != 0) { - info, err := os.Stat(c.runtime.config.TmpDir) - if err != nil { - return errors.Wrapf(err, "cannot stat `%s`", c.runtime.config.TmpDir) - } - if err := os.Chmod(c.runtime.config.TmpDir, info.Mode()|0111); err != nil { - return errors.Wrapf(err, "cannot chmod `%s`", c.runtime.config.TmpDir) - } - root := filepath.Join(c.runtime.config.TmpDir, "containers-root", c.ID()) - if err := os.MkdirAll(root, 0755); err != nil { - return errors.Wrapf(err, "error creating userNS tmpdir for container %s", c.ID()) - } - if err := os.Chown(root, c.RootUID(), c.RootGID()); err != nil { + if err := os.Chown(containerInfo.RunDir, c.RootUID(), c.RootGID()); err != nil { return err } - c.state.UserNSRoot, err = filepath.EvalSymlinks(root) - if err != nil { - return errors.Wrapf(err, "failed to eval symlinks for %s", root) + + if err := os.Chown(containerInfo.Dir, c.RootUID(), c.RootGID()); err != nil { + return err } } @@ -334,10 +323,6 @@ func (c *Container) setupStorage(ctx context.Context) error { c.config.MountLabel = containerInfo.MountLabel c.config.StaticDir = containerInfo.Dir c.state.RunDir = containerInfo.RunDir - c.state.DestinationRunDir = c.state.RunDir - if c.state.UserNSRoot != "" { - c.state.DestinationRunDir = filepath.Join(c.state.UserNSRoot, "rundir") - } // Set the default Entrypoint and Command if containerInfo.Config != nil { @@ -372,12 +357,6 @@ func (c *Container) teardownStorage() error { return errors.Wrapf(err, "failed to cleanup container %s storage", c.ID()) } - if c.state.UserNSRoot != "" { - if err := os.RemoveAll(c.state.UserNSRoot); err != nil { - return errors.Wrapf(err, "error removing userns root %q", c.state.UserNSRoot) - } - } - if err := c.runtime.storageService.DeleteContainer(c.ID()); err != nil { // If the container has already been removed, warn but do not // error - we wanted it gone, it is already gone. @@ -432,6 +411,7 @@ func (c *Container) refresh() error { if err != nil { return errors.Wrapf(err, "error retrieving temporary directory for container %s", c.ID()) } + c.state.RunDir = dir if len(c.config.IDMappings.UIDMap) != 0 || len(c.config.IDMappings.GIDMap) != 0 { info, err := os.Stat(c.runtime.config.TmpDir) @@ -448,16 +428,6 @@ func (c *Container) refresh() error { if err := os.Chown(root, c.RootUID(), c.RootGID()); err != nil { return err } - c.state.UserNSRoot, err = filepath.EvalSymlinks(root) - if err != nil { - return errors.Wrapf(err, "failed to eval symlinks for %s", root) - } - } - - c.state.RunDir = dir - c.state.DestinationRunDir = c.state.RunDir - if c.state.UserNSRoot != "" { - c.state.DestinationRunDir = filepath.Join(c.state.UserNSRoot, "rundir") } // We need to pick up a new lock @@ -1260,7 +1230,7 @@ func (c *Container) writeStringToRundir(destFile, output string) (string, error) return "", err } - return filepath.Join(c.state.DestinationRunDir, destFile), nil + return filepath.Join(c.state.RunDir, destFile), nil } // appendStringToRundir appends the provided string to the runtimedir file @@ -1277,7 +1247,7 @@ func (c *Container) appendStringToRundir(destFile, output string) (string, error return "", errors.Wrapf(err, "unable to write %s", destFileName) } - return filepath.Join(c.state.DestinationRunDir, destFile), nil + return filepath.Join(c.state.RunDir, destFile), nil } // Save OCI spec to disk, replacing any existing specs for the container @@ -1410,6 +1380,9 @@ func (c *Container) mount() (string, error) { if err != nil { return "", errors.Wrapf(err, "error resolving storage path for container %s", c.ID()) } + if err := os.Chown(mountPoint, c.RootUID(), c.RootGID()); err != nil { + return "", errors.Wrapf(err, "cannot chown %s to %d:%d", mountPoint, c.RootUID(), c.RootGID()) + } return mountPoint, nil } diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index 23de1aa01..504d6c135 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -25,7 +25,6 @@ import ( "github.com/containers/libpod/pkg/lookup" "github.com/containers/libpod/pkg/resolvconf" "github.com/containers/libpod/pkg/rootless" - "github.com/containers/storage/pkg/idtools" "github.com/cyphar/filepath-securejoin" "github.com/opencontainers/runc/libcontainer/user" spec "github.com/opencontainers/runtime-spec/specs-go" @@ -99,11 +98,6 @@ func (c *Container) prepare() (err error) { // Finish up mountStorage c.state.Mounted = true c.state.Mountpoint = mountPoint - if c.state.UserNSRoot == "" { - c.state.RealMountpoint = c.state.Mountpoint - } else { - c.state.RealMountpoint = filepath.Join(c.state.UserNSRoot, "mountpoint") - } logrus.Debugf("Created root filesystem for container %s at %s", c.ID(), c.state.Mountpoint) }() @@ -220,13 +214,6 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) { } } m.Options = options - - // If we are using a user namespace, we will use an intermediate - // directory to bind mount volumes - if c.state.UserNSRoot != "" && strings.HasPrefix(m.Source, c.runtime.config.VolumePath) { - newSourceDir := filepath.Join(c.state.UserNSRoot, "volumes") - m.Source = strings.Replace(m.Source, c.runtime.config.VolumePath, newSourceDir, 1) - } } g.SetProcessSelinuxLabel(c.ProcessLabel()) @@ -313,13 +300,7 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) { } } - if c.config.Rootfs == "" { - if err := idtools.MkdirAllAs(c.state.RealMountpoint, 0700, c.RootUID(), c.RootGID()); err != nil { - return nil, err - } - } - - g.SetRootPath(c.state.RealMountpoint) + g.SetRootPath(c.state.Mountpoint) g.AddAnnotation(crioAnnotations.Created, c.config.CreatedTime.Format(time.RFC3339Nano)) g.AddAnnotation("org.opencontainers.image.stopSignal", fmt.Sprintf("%d", c.config.StopSignal)) @@ -820,7 +801,7 @@ func (c *Container) makeBindMounts() error { } // Add Secret Mounts - secretMounts := secrets.SecretMountsWithUIDGID(c.config.MountLabel, c.state.RunDir, c.runtime.config.DefaultMountsFile, c.state.DestinationRunDir, c.RootUID(), c.RootGID(), rootless.IsRootless()) + secretMounts := secrets.SecretMountsWithUIDGID(c.config.MountLabel, c.state.RunDir, c.runtime.config.DefaultMountsFile, c.state.RunDir, c.RootUID(), c.RootGID(), rootless.IsRootless()) for _, mount := range secretMounts { if _, ok := c.state.BindMounts[mount.Destination]; !ok { c.state.BindMounts[mount.Destination] = mount.Source @@ -907,7 +888,7 @@ func (c *Container) generateResolvConf() (string, error) { return "", err } - return filepath.Join(c.state.DestinationRunDir, "resolv.conf"), nil + return filepath.Join(c.state.RunDir, "resolv.conf"), nil } // generateHosts creates a containers hosts file diff --git a/libpod/oci_linux.go b/libpod/oci_linux.go index f85c5ee62..8c0abad80 100644 --- a/libpod/oci_linux.go +++ b/libpod/oci_linux.go @@ -3,17 +3,14 @@ package libpod import ( - "fmt" "os" "os/exec" "path/filepath" - "runtime" "strings" - "sync" + "syscall" "github.com/containerd/cgroups" "github.com/containers/libpod/utils" - "github.com/containers/storage/pkg/idtools" spec "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" @@ -62,72 +59,40 @@ func newPipe() (parent *os.File, child *os.File, err error) { return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil } -// CreateContainer creates a container in the OCI runtime -// TODO terminal support for container -// Presently just ignoring conmon opts related to it -func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string, restoreOptions *ContainerCheckpointOptions) (err error) { - if ctr.state.UserNSRoot == "" { - // no need of an intermediate mount ns - return r.createOCIContainer(ctr, cgroupParent, restoreOptions) - } - var wg sync.WaitGroup - wg.Add(1) - go func() { - defer wg.Done() - runtime.LockOSThread() - - var fd *os.File - fd, err = os.Open(fmt.Sprintf("/proc/%d/task/%d/ns/mnt", os.Getpid(), unix.Gettid())) +// makeAccessible changes the path permission and each parent directory to have --x--x--x +func makeAccessible(path string, uid, gid int) error { + for ; path != "/"; path = filepath.Dir(path) { + st, err := os.Stat(path) if err != nil { - return - } - defer fd.Close() - - // create a new mountns on the current thread - if err = unix.Unshare(unix.CLONE_NEWNS); err != nil { - return - } - defer unix.Setns(int(fd.Fd()), unix.CLONE_NEWNS) - - // don't spread our mounts around - err = unix.Mount("/", "/", "none", unix.MS_REC|unix.MS_SLAVE, "") - if err != nil { - return - } - err = unix.Mount(ctr.state.Mountpoint, ctr.state.RealMountpoint, "none", unix.MS_BIND, "") - if err != nil { - return + if os.IsNotExist(err) { + return nil + } + return err } - if err := idtools.MkdirAllAs(ctr.state.DestinationRunDir, 0700, ctr.RootUID(), ctr.RootGID()); err != nil { - return + if int(st.Sys().(*syscall.Stat_t).Uid) == uid && int(st.Sys().(*syscall.Stat_t).Gid) == gid { + continue } - - err = unix.Mount(ctr.state.RunDir, ctr.state.DestinationRunDir, "none", unix.MS_BIND, "") - if err != nil { - return + if st.Mode()&0111 != 0111 { + if err := os.Chmod(path, os.FileMode(st.Mode()|0111)); err != nil { + return err + } } + } + return nil +} - if ctr.state.UserNSRoot != "" { - _, err := os.Stat(ctr.runtime.config.VolumePath) - if err != nil && !os.IsNotExist(err) { - return - } - if err == nil { - volumesTarget := filepath.Join(ctr.state.UserNSRoot, "volumes") - if err := idtools.MkdirAs(volumesTarget, 0700, ctr.RootUID(), ctr.RootGID()); err != nil { - return - } - if err = unix.Mount(ctr.runtime.config.VolumePath, volumesTarget, "none", unix.MS_BIND, ""); err != nil { - return - } +// CreateContainer creates a container in the OCI runtime +// TODO terminal support for container +// Presently just ignoring conmon opts related to it +func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string, restoreOptions *ContainerCheckpointOptions) (err error) { + if len(ctr.config.IDMappings.UIDMap) != 0 || len(ctr.config.IDMappings.GIDMap) != 0 { + for _, i := range []string{ctr.state.RunDir, ctr.runtime.config.TmpDir, ctr.config.StaticDir, ctr.state.Mountpoint, ctr.runtime.config.VolumePath} { + if err := makeAccessible(i, ctr.RootUID(), ctr.RootGID()); err != nil { + return err } } - - err = r.createOCIContainer(ctr, cgroupParent, restoreOptions) - }() - wg.Wait() - - return err + } + return r.createOCIContainer(ctr, cgroupParent, restoreOptions) } func rpmVersion(path string) string { diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index 17340f56d..506aee477 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -201,11 +201,7 @@ func (r *Runtime) newContainer(ctx context.Context, rSpec *spec.Spec, options .. } if !MountExists(ctr.config.Spec.Mounts, "/dev/shm") && ctr.config.ShmDir == "" { - if ctr.state.UserNSRoot == "" { - ctr.config.ShmDir = filepath.Join(ctr.bundlePath(), "shm") - } else { - ctr.config.ShmDir = filepath.Join(ctr.state.UserNSRoot, "shm") - } + ctr.config.ShmDir = filepath.Join(ctr.bundlePath(), "shm") if err := os.MkdirAll(ctr.config.ShmDir, 0700); err != nil { if !os.IsExist(err) { return nil, errors.Wrapf(err, "unable to create shm %q dir", ctr.config.ShmDir) |