From a090301bbb10424ce4f99e40c97959f0e8664718 Mon Sep 17 00:00:00 2001 From: Valentin Rothberg Date: Tue, 2 Mar 2021 09:20:53 +0100 Subject: podman cp: support copying on tmpfs mounts Traditionally, the path resolution for containers has been resolved on the *host*; relative to the container's mount point or relative to specified bind mounts or volumes. While this works nicely for non-running containers, it poses a problem for running ones. In that case, certain kinds of mounts (e.g., tmpfs) will not resolve correctly. A tmpfs is held in memory and hence cannot be resolved relatively to the container's mount point. A copy operation will succeed but the data will not show up inside the container. To support these kinds of mounts, we need to join the *running* container's mount namespace (and PID namespace) when copying. Note that this change implies moving the copy and stat logic into `libpod` since we need to keep the container locked to avoid race conditions. The immediate benefit is that all logic is now inside `libpod`; the code isn't scattered anymore. Further note that Docker does not support copying to tmpfs mounts. Tests have been extended to cover *both* path resolutions for running and created containers. New tests have been added to exercise the tmpfs-mount case. For the record: Some tests could be improved by using `start -a` instead of a start-exec sequence. Unfortunately, `start -a` is flaky in the CI which forced me to use the more expensive start-exec option. Signed-off-by: Valentin Rothberg --- libpod/container_copy_linux.go | 266 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 266 insertions(+) create mode 100644 libpod/container_copy_linux.go (limited to 'libpod/container_copy_linux.go') diff --git a/libpod/container_copy_linux.go b/libpod/container_copy_linux.go new file mode 100644 index 000000000..66ccd2f1f --- /dev/null +++ b/libpod/container_copy_linux.go @@ -0,0 +1,266 @@ +// +build linux + +package libpod + +import ( + "context" + "io" + "os" + "path/filepath" + "runtime" + "strings" + + buildahCopiah "github.com/containers/buildah/copier" + "github.com/containers/buildah/pkg/chrootuser" + "github.com/containers/buildah/util" + "github.com/containers/podman/v3/libpod/define" + "github.com/containers/storage" + "github.com/containers/storage/pkg/idtools" + "github.com/docker/docker/pkg/archive" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +func (c *Container) copyFromArchive(ctx context.Context, path string, reader io.Reader) (func() error, error) { + var ( + mountPoint string + resolvedRoot string + resolvedPath string + unmount func() + err error + ) + + // Make sure that "/" copies the *contents* of the mount point and not + // the directory. + if path == "/" { + path = "/." + } + + // Optimization: only mount if the container is not already. + if c.state.Mounted { + mountPoint = c.state.Mountpoint + unmount = func() {} + } else { + // NOTE: make sure to unmount in error paths. + mountPoint, err = c.mount() + if err != nil { + return nil, err + } + unmount = func() { c.unmount(false) } + } + + if c.state.State == define.ContainerStateRunning { + resolvedRoot = "/" + resolvedPath = c.pathAbs(path) + } else { + resolvedRoot, resolvedPath, err = c.resolvePath(mountPoint, path) + if err != nil { + unmount() + return nil, err + } + } + + decompressed, err := archive.DecompressStream(reader) + if err != nil { + unmount() + return nil, err + } + + idMappings, idPair, err := getIDMappingsAndPair(c, mountPoint) + if err != nil { + decompressed.Close() + unmount() + return nil, err + } + + logrus.Debugf("Container copy *to* %q (resolved: %q) on container %q (ID: %s)", path, resolvedPath, c.Name(), c.ID()) + + return func() error { + defer unmount() + defer decompressed.Close() + putOptions := buildahCopiah.PutOptions{ + UIDMap: idMappings.UIDMap, + GIDMap: idMappings.GIDMap, + ChownDirs: idPair, + ChownFiles: idPair, + } + + return c.joinMountAndExec(ctx, + func() error { + return buildahCopiah.Put(resolvedRoot, resolvedPath, putOptions, decompressed) + }, + ) + }, nil +} + +func (c *Container) copyToArchive(ctx context.Context, path string, writer io.Writer) (func() error, error) { + var ( + mountPoint string + unmount func() + err error + ) + + // Optimization: only mount if the container is not already. + if c.state.Mounted { + mountPoint = c.state.Mountpoint + unmount = func() {} + } else { + // NOTE: make sure to unmount in error paths. + mountPoint, err = c.mount() + if err != nil { + return nil, err + } + unmount = func() { c.unmount(false) } + } + + statInfo, resolvedRoot, resolvedPath, err := c.stat(ctx, mountPoint, path) + if err != nil { + unmount() + return nil, err + } + + idMappings, idPair, err := getIDMappingsAndPair(c, mountPoint) + if err != nil { + unmount() + return nil, err + } + + logrus.Debugf("Container copy *from* %q (resolved: %q) on container %q (ID: %s)", path, resolvedPath, c.Name(), c.ID()) + + return func() error { + defer unmount() + getOptions := buildahCopiah.GetOptions{ + // Unless the specified points to ".", we want to copy the base directory. + KeepDirectoryNames: statInfo.IsDir && filepath.Base(path) != ".", + UIDMap: idMappings.UIDMap, + GIDMap: idMappings.GIDMap, + ChownDirs: idPair, + ChownFiles: idPair, + Excludes: []string{"dev", "proc", "sys"}, + } + return c.joinMountAndExec(ctx, + func() error { + return buildahCopiah.Get(resolvedRoot, "", getOptions, []string{resolvedPath}, writer) + }, + ) + }, nil +} + +// getIDMappingsAndPair returns the ID mappings for the container and the host +// ID pair. +func getIDMappingsAndPair(container *Container, containerMount string) (*storage.IDMappingOptions, *idtools.IDPair, error) { + user, err := getContainerUser(container, containerMount) + if err != nil { + return nil, nil, err + } + + idMappingOpts, err := container.IDMappings() + if err != nil { + return nil, nil, err + } + + hostUID, hostGID, err := util.GetHostIDs(idtoolsToRuntimeSpec(idMappingOpts.UIDMap), idtoolsToRuntimeSpec(idMappingOpts.GIDMap), user.UID, user.GID) + if err != nil { + return nil, nil, err + } + + idPair := idtools.IDPair{UID: int(hostUID), GID: int(hostGID)} + return &idMappingOpts, &idPair, nil +} + +// getContainerUser returns the specs.User of the container. +func getContainerUser(container *Container, mountPoint string) (specs.User, error) { + userspec := container.Config().User + + uid, gid, _, err := chrootuser.GetUser(mountPoint, userspec) + u := specs.User{ + UID: uid, + GID: gid, + Username: userspec, + } + + if !strings.Contains(userspec, ":") { + groups, err2 := chrootuser.GetAdditionalGroupsForUser(mountPoint, uint64(u.UID)) + if err2 != nil { + if errors.Cause(err2) != chrootuser.ErrNoSuchUser && err == nil { + err = err2 + } + } else { + u.AdditionalGids = groups + } + } + + return u, err +} + +// idtoolsToRuntimeSpec converts idtools ID mapping to the one of the runtime spec. +func idtoolsToRuntimeSpec(idMaps []idtools.IDMap) (convertedIDMap []specs.LinuxIDMapping) { + for _, idmap := range idMaps { + tempIDMap := specs.LinuxIDMapping{ + ContainerID: uint32(idmap.ContainerID), + HostID: uint32(idmap.HostID), + Size: uint32(idmap.Size), + } + convertedIDMap = append(convertedIDMap, tempIDMap) + } + return convertedIDMap +} + +// joinMountAndExec executes the specified function `f` inside the container's +// mount and PID namespace. That allows for having the exact view on the +// container's file system. +// +// Note, if the container is not running `f()` will be executed as is. +func (c *Container) joinMountAndExec(ctx context.Context, f func() error) error { + if c.state.State != define.ContainerStateRunning { + return f() + } + + // Container's running, so we need to execute `f()` inside its mount NS. + errChan := make(chan error) + go func() { + runtime.LockOSThread() + + // Join the mount and PID NS of the container. + getFD := func(ns LinuxNS) (*os.File, error) { + nsPath, err := c.namespacePath(ns) + if err != nil { + return nil, err + } + return os.Open(nsPath) + } + + mountFD, err := getFD(MountNS) + if err != nil { + errChan <- err + return + } + defer mountFD.Close() + + pidFD, err := getFD(PIDNS) + if err != nil { + errChan <- err + return + } + defer pidFD.Close() + if err := unix.Unshare(unix.CLONE_NEWNS); err != nil { + errChan <- err + return + } + if err := unix.Setns(int(pidFD.Fd()), unix.CLONE_NEWPID); err != nil { + errChan <- err + return + } + + if err := unix.Setns(int(mountFD.Fd()), unix.CLONE_NEWNS); err != nil { + errChan <- err + return + } + + // Last but not least, execute the workload. + errChan <- f() + }() + return <-errChan +} -- cgit v1.2.3-54-g00ecf