From 522a7197a88ab4e3730387df33f22e445f0f8f3c Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Tue, 24 Apr 2018 16:41:42 +0200 Subject: podman, userNS: configure an intermediate mount namespace Signed-off-by: Giuseppe Scrivano Closes: #690 Approved by: mheon --- libpod/container.go | 13 +++++- libpod/container_internal.go | 101 +++++++++++++++++++++++++++++++++++++------ libpod/oci.go | 50 +++++++++++++++++++++ libpod/runtime.go | 8 ++++ libpod/runtime_ctr.go | 6 ++- pkg/secrets/secrets.go | 10 ++--- 6 files changed, 168 insertions(+), 20 deletions(-) diff --git a/libpod/container.go b/libpod/container.go index 5fa27af58..e35a620ed 100644 --- a/libpod/container.go +++ b/libpod/container.go @@ -115,11 +115,18 @@ type containerState struct { ConfigPath string `json:"configPath,omitempty"` // RunDir is a per-boot directory for container content RunDir string `json:"runDir,omitempty"` + // DestinationRunDir is where the files in RunDir will be accessible for the container. + // It is different than RunDir when using userNS + DestinationRunDir string `json:"destinationRunDir,omitempty"` // Mounted indicates whether the container's storage has been mounted // for use Mounted bool `json:"mounted,omitempty"` - // MountPoint contains the path to the container's mounted storage + // Mountpoint contains the path to the container's mounted storage as given + // by containers/storage. It can be different than RealMountpoint when + // usernamespaces are used Mountpoint string `json:"mountPoint,omitempty"` + // RealMountpoint contains the path to the container's mounted storage + RealMountpoint string `json:"realMountPoint,omitempty"` // StartedTime is the time the container was started StartedTime time.Time `json:"startedTime,omitempty"` // FinishedTime is the time the container finished executing @@ -152,6 +159,10 @@ type containerState struct { // This maps the path the file will be mounted to in the container to // the path of the file on disk outside the container BindMounts map[string]string `json:"bindMounts,omitempty"` + + // UserNSRoot is the directory used as root for the container when using + // user namespaces. + UserNSRoot string `json:"userNSRoot,omitempty"` } // ExecSession contains information on an active exec session diff --git a/libpod/container_internal.go b/libpod/container_internal.go index 18b56e23c..5417c8a4f 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -16,6 +16,7 @@ import ( "github.com/containers/storage" "github.com/containers/storage/pkg/archive" "github.com/containers/storage/pkg/chrootarchive" + "github.com/containers/storage/pkg/idtools" "github.com/docker/docker/pkg/mount" "github.com/docker/docker/pkg/stringid" spec "github.com/opencontainers/runtime-spec/specs-go" @@ -196,8 +197,33 @@ func (c *Container) setupStorage(ctx context.Context) error { return errors.Wrapf(err, "error creating container storage") } + if len(c.config.IDMappings.UIDMap) != 0 || len(c.config.IDMappings.GIDMap) != 0 { + info, err := os.Stat(c.runtime.config.TmpDir) + if err != nil { + return errors.Wrapf(err, "cannot stat `%s`", c.runtime.config.TmpDir) + } + if err := os.Chmod(c.runtime.config.TmpDir, info.Mode()|0111); err != nil { + return errors.Wrapf(err, "cannot chmod `%s`", c.runtime.config.TmpDir) + } + root := filepath.Join(c.runtime.config.TmpDir, "containers-root", c.ID()) + if err := os.MkdirAll(root, 0755); err != nil { + return errors.Wrapf(err, "error creating userNS tmpdir for container %s", c.ID()) + } + if err := os.Chown(root, c.RootUID(), c.RootGID()); err != nil { + return err + } + c.state.UserNSRoot, err = filepath.EvalSymlinks(root) + if err != nil { + return errors.Wrapf(err, "failed to eval symlinks for %s", root) + } + } + c.config.StaticDir = containerInfo.Dir c.state.RunDir = containerInfo.RunDir + c.state.DestinationRunDir = c.state.RunDir + if c.state.UserNSRoot != "" { + c.state.DestinationRunDir = filepath.Join(c.state.UserNSRoot, "rundir") + } // Set the default Entrypoint and Command c.config.Entrypoint = containerInfo.Config.Config.Entrypoint @@ -230,6 +256,12 @@ func (c *Container) teardownStorage() error { return errors.Wrapf(err, "failed to cleanup container %s storage", c.ID()) } + if c.state.UserNSRoot != "" { + if err := os.RemoveAll(c.state.UserNSRoot); err != nil { + return errors.Wrapf(err, "error removing userns root %q", c.state.UserNSRoot) + } + } + if err := c.runtime.storageService.DeleteContainer(c.ID()); err != nil { // If the container has already been removed, warn but do not // error - we wanted it gone, it is already gone. @@ -261,9 +293,35 @@ func (c *Container) refresh() error { if err != nil { return errors.Wrapf(err, "error retrieving temporary directory for container %s", c.ID()) } + + if len(c.config.IDMappings.UIDMap) != 0 || len(c.config.IDMappings.GIDMap) != 0 { + info, err := os.Stat(c.runtime.config.TmpDir) + if err != nil { + return errors.Wrapf(err, "cannot stat `%s`", c.runtime.config.TmpDir) + } + if err := os.Chmod(c.runtime.config.TmpDir, info.Mode()|0111); err != nil { + return errors.Wrapf(err, "cannot chmod `%s`", c.runtime.config.TmpDir) + } + root := filepath.Join(c.runtime.config.TmpDir, "containers-root", c.ID()) + if err := os.MkdirAll(root, 0755); err != nil { + return errors.Wrapf(err, "error creating userNS tmpdir for container %s", c.ID()) + } + if err := os.Chown(root, c.RootUID(), c.RootGID()); err != nil { + return err + } + c.state.UserNSRoot, err = filepath.EvalSymlinks(root) + if err != nil { + return errors.Wrapf(err, "failed to eval symlinks for %s", root) + } + } + c.state.RunDir = dir + c.state.DestinationRunDir = c.state.RunDir + if c.state.UserNSRoot != "" { + c.state.DestinationRunDir = filepath.Join(c.state.UserNSRoot, "rundir") + } - if err := c.runtime.state.SaveContainer(c); err != nil { + if err := c.save(); err != nil { return errors.Wrapf(err, "error refreshing state for container %s", c.ID()) } @@ -600,6 +658,10 @@ func (c *Container) mountStorage() (err error) { return errors.Wrapf(err, "unable to determine if %q is mounted", c.config.ShmDir) } + if err := os.Chown(c.config.ShmDir, c.RootUID(), c.RootGID()); err != nil { + return err + } + if !mounted { shmOptions := fmt.Sprintf("mode=1777,size=%d", c.config.ShmSize) if err := unix.Mount("shm", c.config.ShmDir, "tmpfs", unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV, @@ -607,7 +669,7 @@ func (c *Container) mountStorage() (err error) { return errors.Wrapf(err, "failed to mount shm tmpfs %q", c.config.ShmDir) } if err := os.Chown(c.config.ShmDir, c.RootUID(), c.RootGID()); err != nil { - return err + return errors.Wrapf(err, "failed to chown %s", c.config.ShmDir) } } @@ -617,6 +679,11 @@ func (c *Container) mountStorage() (err error) { } c.state.Mounted = true c.state.Mountpoint = mountPoint + if c.state.UserNSRoot == "" { + c.state.RealMountpoint = c.state.Mountpoint + } else { + c.state.RealMountpoint = filepath.Join(c.state.UserNSRoot, "mountpoint") + } logrus.Debugf("Created root filesystem for container %s at %s", c.ID(), c.state.Mountpoint) @@ -716,6 +783,10 @@ func (c *Container) cleanup() error { // Make standard bind mounts to include in the container func (c *Container) makeBindMounts() error { + if err := os.Chown(c.state.RunDir, c.RootUID(), c.RootGID()); err != nil { + return errors.Wrapf(err, "error chown %s", c.state.RunDir) + } + if c.state.BindMounts == nil { c.state.BindMounts = make(map[string]string) } @@ -724,11 +795,8 @@ func (c *Container) makeBindMounts() error { c.state.BindMounts["/dev/shm"] = c.config.ShmDir // Make /etc/resolv.conf - if path, ok := c.state.BindMounts["/etc/resolv.conf"]; ok { + if _, ok := c.state.BindMounts["/etc/resolv.conf"]; ok { // If it already exists, delete so we can recreate - if err := os.Remove(path); err != nil { - return errors.Wrapf(err, "error removing resolv.conf for container %s", c.ID()) - } delete(c.state.BindMounts, "/etc/resolv.conf") } newResolv, err := c.generateResolvConf() @@ -738,11 +806,8 @@ func (c *Container) makeBindMounts() error { c.state.BindMounts["/etc/resolv.conf"] = newResolv // Make /etc/hosts - if path, ok := c.state.BindMounts["/etc/hosts"]; ok { + if _, ok := c.state.BindMounts["/etc/hosts"]; ok { // If it already exists, delete so we can recreate - if err := os.Remove(path); err != nil { - return errors.Wrapf(err, "error removing hosts file for container %s", c.ID()) - } delete(c.state.BindMounts, "/etc/hosts") } newHosts, err := c.generateHosts() @@ -773,7 +838,7 @@ func (c *Container) makeBindMounts() error { } // Add Secret Mounts - secretMounts := secrets.SecretMountsWithUIDGID(c.config.MountLabel, c.state.RunDir, c.runtime.config.DefaultMountsFile, c.RootUID(), c.RootGID()) + secretMounts := secrets.SecretMountsWithUIDGID(c.config.MountLabel, c.state.RunDir, c.runtime.config.DefaultMountsFile, c.state.DestinationRunDir, c.RootUID(), c.RootGID()) for _, mount := range secretMounts { if _, ok := c.state.BindMounts[mount.Destination]; !ok { c.state.BindMounts[mount.Destination] = mount.Source @@ -786,6 +851,11 @@ func (c *Container) makeBindMounts() error { // writeStringToRundir copies the provided file to the runtimedir func (c *Container) writeStringToRundir(destFile, output string) (string, error) { destFileName := filepath.Join(c.state.RunDir, destFile) + + if err := os.Remove(destFileName); err != nil && !os.IsNotExist(err) { + return "", errors.Wrapf(err, "error removing %s for container %s", destFile, c.ID()) + } + f, err := os.Create(destFileName) if err != nil { return "", errors.Wrapf(err, "unable to create %s", destFileName) @@ -802,7 +872,8 @@ func (c *Container) writeStringToRundir(destFile, output string) (string, error) if err := label.Relabel(destFileName, c.config.MountLabel, false); err != nil { return "", err } - return destFileName, nil + + return filepath.Join(c.state.DestinationRunDir, destFile), nil } type resolvConf struct { @@ -1035,7 +1106,11 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) { } } - g.SetRootPath(c.state.Mountpoint) + if err := idtools.MkdirAllAs(c.state.RealMountpoint, 0700, c.RootUID(), c.RootGID()); err != nil { + return nil, err + } + + g.SetRootPath(c.state.RealMountpoint) g.AddAnnotation(crioAnnotations.Created, c.config.CreatedTime.Format(time.RFC3339Nano)) g.AddAnnotation("org.opencontainers.image.stopSignal", fmt.Sprintf("%d", c.config.StopSignal)) diff --git a/libpod/oci.go b/libpod/oci.go index b59525de7..9c842f2c9 100644 --- a/libpod/oci.go +++ b/libpod/oci.go @@ -8,11 +8,14 @@ import ( "os" "os/exec" "path/filepath" + "runtime" "strconv" + "sync" "syscall" "time" "github.com/containerd/cgroups" + "github.com/containers/storage/pkg/idtools" "github.com/coreos/go-systemd/activation" spec "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" @@ -186,6 +189,53 @@ func waitPidsStop(pids []int, timeout time.Duration) error { // TODO terminal support for container // Presently just ignoring conmon opts related to it func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string) (err error) { + if ctr.state.UserNSRoot == "" { + // no need of an intermediate mount ns + return r.createOCIContainer(ctr, cgroupParent) + } + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + runtime.LockOSThread() + + fd, err := os.Open(fmt.Sprintf("/proc/%d/task/%d/ns/mnt", os.Getpid(), unix.Gettid())) + if err != nil { + return + } + defer fd.Close() + + // create a new mountns on the current thread + if err = unix.Unshare(unix.CLONE_NEWNS); err != nil { + return + } + defer unix.Setns(int(fd.Fd()), unix.CLONE_NEWNS) + + // don't spread our mounts around + err = unix.Mount("/", "/", "none", unix.MS_REC|unix.MS_SLAVE, "") + if err != nil { + return + } + err = unix.Mount(ctr.state.Mountpoint, ctr.state.RealMountpoint, "none", unix.MS_BIND, "") + if err != nil { + return + } + if err := idtools.MkdirAllAs(ctr.state.DestinationRunDir, 0700, ctr.RootUID(), ctr.RootGID()); err != nil { + return + } + + err = unix.Mount(ctr.state.RunDir, ctr.state.DestinationRunDir, "none", unix.MS_BIND, "") + if err != nil { + return + } + err = r.createOCIContainer(ctr, cgroupParent) + }() + wg.Wait() + + return err +} + +func (r *OCIRuntime) createOCIContainer(ctr *Container, cgroupParent string) (err error) { var stderrBuf bytes.Buffer parentPipe, childPipe, err := newPipe() diff --git a/libpod/runtime.go b/libpod/runtime.go index 9de70da1b..dde81a59d 100644 --- a/libpod/runtime.go +++ b/libpod/runtime.go @@ -340,6 +340,14 @@ func makeRuntime(runtime *Runtime) error { SignaturePolicyPath: runtime.config.SignaturePolicyPath, } + // Create the tmpDir + if err := os.MkdirAll(runtime.config.TmpDir, 0751); err != nil { + // The directory is allowed to exist + if !os.IsExist(err) { + return errors.Wrapf(err, "error creating tmpdir %s", runtime.config.TmpDir) + } + } + // Make an OCI runtime to perform container operations ociRuntime, err := newOCIRuntime("runc", runtime.ociRuntimePath, runtime.conmonPath, runtime.config.ConmonEnvVars, diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index 3f27df5b9..4708e0c8f 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -76,7 +76,11 @@ func (r *Runtime) NewContainer(ctx context.Context, rSpec *spec.Spec, options .. ctr.config.LogPath = filepath.Join(ctr.config.StaticDir, "ctr.log") } if ctr.config.ShmDir == "" { - ctr.config.ShmDir = filepath.Join(ctr.bundlePath(), "shm") + if ctr.state.UserNSRoot == "" { + ctr.config.ShmDir = filepath.Join(ctr.bundlePath(), "shm") + } else { + ctr.config.ShmDir = filepath.Join(ctr.state.UserNSRoot, "shm") + } if err := os.MkdirAll(ctr.config.ShmDir, 0700); err != nil { if !os.IsExist(err) { return nil, errors.Wrapf(err, "unable to create shm %q dir", ctr.config.ShmDir) diff --git a/pkg/secrets/secrets.go b/pkg/secrets/secrets.go index 29ccd4592..0e48c7a12 100644 --- a/pkg/secrets/secrets.go +++ b/pkg/secrets/secrets.go @@ -128,11 +128,11 @@ func getMountsMap(path string) (string, string, error) { // SecretMounts copies, adds, and mounts the secrets to the container root filesystem func SecretMounts(mountLabel, containerWorkingDir, mountFile string) []rspec.Mount { - return SecretMountsWithUIDGID(mountLabel, containerWorkingDir, mountFile, 0, 0) + return SecretMountsWithUIDGID(mountLabel, containerWorkingDir, mountFile, containerWorkingDir, 0, 0) } // SecretMountsWithUIDGID specifies the uid/gid of the owner -func SecretMountsWithUIDGID(mountLabel, containerWorkingDir, mountFile string, uid, gid int) []rspec.Mount { +func SecretMountsWithUIDGID(mountLabel, containerWorkingDir, mountFile, mountPrefix string, uid, gid int) []rspec.Mount { var ( secretMounts []rspec.Mount mountFiles []string @@ -146,7 +146,7 @@ func SecretMountsWithUIDGID(mountLabel, containerWorkingDir, mountFile string, u mountFiles = append(mountFiles, mountFile) } for _, file := range mountFiles { - mounts, err := addSecretsFromMountsFile(file, mountLabel, containerWorkingDir, uid, gid) + mounts, err := addSecretsFromMountsFile(file, mountLabel, containerWorkingDir, mountPrefix, uid, gid) if err != nil { logrus.Warnf("error mounting secrets, skipping: %v", err) } @@ -175,7 +175,7 @@ func rchown(chowndir string, uid, gid int) error { // addSecretsFromMountsFile copies the contents of host directory to container directory // and returns a list of mounts -func addSecretsFromMountsFile(filePath, mountLabel, containerWorkingDir string, uid, gid int) ([]rspec.Mount, error) { +func addSecretsFromMountsFile(filePath, mountLabel, containerWorkingDir, mountPrefix string, uid, gid int) ([]rspec.Mount, error) { var mounts []rspec.Mount defaultMountsPaths := getMounts(filePath) for _, path := range defaultMountsPaths { @@ -226,7 +226,7 @@ func addSecretsFromMountsFile(filePath, mountLabel, containerWorkingDir string, } m := rspec.Mount{ - Source: ctrDirOnHost, + Source: filepath.Join(mountPrefix, ctrDir), Destination: ctrDir, Type: "bind", Options: []string{"bind"}, -- cgit v1.2.3-54-g00ecf