diff options
Diffstat (limited to 'pkg')
-rw-r--r-- | pkg/adapter/images.go | 34 | ||||
-rw-r--r-- | pkg/adapter/images_remote.go | 32 | ||||
-rw-r--r-- | pkg/adapter/runtime.go | 73 | ||||
-rw-r--r-- | pkg/adapter/runtime_remote.go | 14 | ||||
-rw-r--r-- | pkg/annotations/annotations.go | 27 | ||||
-rw-r--r-- | pkg/rootless/rootless.go | 9 | ||||
-rw-r--r-- | pkg/rootless/rootless_linux.c | 41 | ||||
-rw-r--r-- | pkg/rootless/rootless_linux.go | 277 | ||||
-rw-r--r-- | pkg/rootless/rootless_unsupported.go | 45 | ||||
-rw-r--r-- | pkg/spec/createconfig.go | 130 | ||||
-rw-r--r-- | pkg/spec/spec.go | 40 | ||||
-rw-r--r-- | pkg/varlinkapi/images.go | 38 |
12 files changed, 362 insertions, 398 deletions
diff --git a/pkg/adapter/images.go b/pkg/adapter/images.go new file mode 100644 index 000000000..c8ea1cdea --- /dev/null +++ b/pkg/adapter/images.go @@ -0,0 +1,34 @@ +// +build !remoteclient + +package adapter + +import ( + "github.com/containers/libpod/cmd/podman/cliconfig" + "github.com/containers/libpod/libpod/image" + "github.com/pkg/errors" +) + +// Tree ... +func (r *LocalRuntime) Tree(c *cliconfig.TreeValues) (*image.InfoImage, map[string]*image.LayerInfo, *ContainerImage, error) { + img, err := r.NewImageFromLocal(c.InputArgs[0]) + if err != nil { + return nil, nil, nil, err + } + + // Fetch map of image-layers, which is used for printing output. + layerInfoMap, err := image.GetLayersMapWithImageInfo(r.Runtime.ImageRuntime()) + if err != nil { + return nil, nil, nil, errors.Wrapf(err, "error while retrieving layers of image %q", img.InputName) + } + + // Create an imageInfo and fill the image and layer info + imageInfo := &image.InfoImage{ + ID: img.ID(), + Tags: img.Names(), + } + + if err := image.BuildImageHierarchyMap(imageInfo, layerInfoMap, img.TopLayer()); err != nil { + return nil, nil, nil, err + } + return imageInfo, layerInfoMap, img, nil +} diff --git a/pkg/adapter/images_remote.go b/pkg/adapter/images_remote.go index e7b38dccc..722058d4a 100644 --- a/pkg/adapter/images_remote.go +++ b/pkg/adapter/images_remote.go @@ -6,8 +6,11 @@ import ( "context" "encoding/json" + "github.com/containers/libpod/cmd/podman/cliconfig" iopodman "github.com/containers/libpod/cmd/podman/varlink" + "github.com/containers/libpod/libpod/image" "github.com/containers/libpod/pkg/inspect" + "github.com/pkg/errors" ) // Inspect returns returns an ImageData struct from over a varlink connection @@ -22,3 +25,32 @@ func (i *ContainerImage) Inspect(ctx context.Context) (*inspect.ImageData, error } return &data, nil } + +// Tree ... +func (r *LocalRuntime) Tree(c *cliconfig.TreeValues) (*image.InfoImage, map[string]*image.LayerInfo, *ContainerImage, error) { + layerInfoMap := make(map[string]*image.LayerInfo) + imageInfo := &image.InfoImage{} + + img, err := r.NewImageFromLocal(c.InputArgs[0]) + if err != nil { + return nil, nil, nil, err + } + + reply, err := iopodman.GetLayersMapWithImageInfo().Call(r.Conn) + if err != nil { + return nil, nil, nil, errors.Wrap(err, "failed to obtain image layers") + } + if err := json.Unmarshal([]byte(reply), &layerInfoMap); err != nil { + return nil, nil, nil, errors.Wrap(err, "failed to unmarshal image layers") + } + + reply, err = iopodman.BuildImageHierarchyMap().Call(r.Conn, c.InputArgs[0]) + if err != nil { + return nil, nil, nil, errors.Wrap(err, "failed to get build image map") + } + if err := json.Unmarshal([]byte(reply), imageInfo); err != nil { + return nil, nil, nil, errors.Wrap(err, "failed to unmarshal build image map") + } + + return imageInfo, layerInfoMap, img, nil +} diff --git a/pkg/adapter/runtime.go b/pkg/adapter/runtime.go index dd51c7233..182a04044 100644 --- a/pkg/adapter/runtime.go +++ b/pkg/adapter/runtime.go @@ -8,7 +8,6 @@ import ( "io" "io/ioutil" "os" - "strconv" "text/template" "github.com/containers/buildah" @@ -124,38 +123,6 @@ func (r *LocalRuntime) Export(name string, path string) error { if err != nil { return errors.Wrapf(err, "error looking up container %q", name) } - if os.Geteuid() != 0 { - state, err := ctr.State() - if err != nil { - return errors.Wrapf(err, "cannot read container state %q", ctr.ID()) - } - if state == libpod.ContainerStateRunning || state == libpod.ContainerStatePaused { - data, err := ioutil.ReadFile(ctr.Config().ConmonPidFile) - if err != nil { - return errors.Wrapf(err, "cannot read conmon PID file %q", ctr.Config().ConmonPidFile) - } - conmonPid, err := strconv.Atoi(string(data)) - if err != nil { - return errors.Wrapf(err, "cannot parse PID %q", data) - } - became, ret, err := rootless.JoinDirectUserAndMountNS(uint(conmonPid)) - if err != nil { - return err - } - if became { - os.Exit(ret) - } - } else { - became, ret, err := rootless.BecomeRootInUserNS() - if err != nil { - return err - } - if became { - os.Exit(ret) - } - } - } - return ctr.Export(path) } @@ -343,46 +310,6 @@ func (r *LocalRuntime) HealthCheck(c *cliconfig.HealthCheckValues) (libpod.Healt return r.Runtime.HealthCheck(c.InputArgs[0]) } -// JoinOrCreateRootlessPod joins the specified pod if it is running or it creates a new user namespace -// if the pod is stopped -func (r *LocalRuntime) JoinOrCreateRootlessPod(pod *Pod) (bool, int, error) { - if os.Geteuid() == 0 { - return false, 0, nil - } - opts := rootless.Opts{ - Argument: pod.ID(), - } - - inspect, err := pod.Inspect() - if err != nil { - return false, 0, err - } - for _, ctr := range inspect.Containers { - prevCtr, err := r.LookupContainer(ctr.ID) - if err != nil { - return false, -1, err - } - s, err := prevCtr.State() - if err != nil { - return false, -1, err - } - if s != libpod.ContainerStateRunning && s != libpod.ContainerStatePaused { - continue - } - data, err := ioutil.ReadFile(prevCtr.Config().ConmonPidFile) - if err != nil { - return false, -1, errors.Wrapf(err, "cannot read conmon PID file %q", prevCtr.Config().ConmonPidFile) - } - conmonPid, err := strconv.Atoi(string(data)) - if err != nil { - return false, -1, errors.Wrapf(err, "cannot parse PID %q", data) - } - return rootless.JoinDirectUserAndMountNSWithOpts(uint(conmonPid), &opts) - } - - return rootless.BecomeRootInUserNSWithOpts(&opts) -} - // Events is a wrapper to libpod to obtain libpod/podman events func (r *LocalRuntime) Events(c *cliconfig.EventValues) error { var ( diff --git a/pkg/adapter/runtime_remote.go b/pkg/adapter/runtime_remote.go index c3a4f322d..807a9ad8f 100644 --- a/pkg/adapter/runtime_remote.go +++ b/pkg/adapter/runtime_remote.go @@ -82,6 +82,7 @@ type remoteImage struct { Digest digest.Digest isParent bool Runtime *LocalRuntime + TopLayer string } // Container ... @@ -147,6 +148,7 @@ func imageInListToContainerImage(i iopodman.Image, name string, runtime *LocalRu Names: i.RepoTags, isParent: i.IsParent, Runtime: runtime, + TopLayer: i.TopLayer, } return &ContainerImage{ri}, nil } @@ -280,6 +282,11 @@ func (ci *ContainerImage) Dangling() bool { return len(ci.Names()) == 0 } +// TopLayer returns an images top layer as a string +func (ci *ContainerImage) TopLayer() string { + return ci.remoteImage.TopLayer +} + // TagImage ... func (ci *ContainerImage) TagImage(tag string) error { _, err := iopodman.TagImage().Call(ci.Runtime.Conn, ci.ID(), tag) @@ -755,13 +762,6 @@ func (r *LocalRuntime) HealthCheck(c *cliconfig.HealthCheckValues) (libpod.Healt return -1, libpod.ErrNotImplemented } -// JoinOrCreateRootlessPod joins the specified pod if it is running or it creates a new user namespace -// if the pod is stopped -func (r *LocalRuntime) JoinOrCreateRootlessPod(pod *Pod) (bool, int, error) { - // Nothing to do in the remote case - return true, 0, nil -} - // Events monitors libpod/podman events over a varlink connection func (r *LocalRuntime) Events(c *cliconfig.EventValues) error { var more uint64 diff --git a/pkg/annotations/annotations.go b/pkg/annotations/annotations.go index 008cca7ee..fe2591a0c 100644 --- a/pkg/annotations/annotations.go +++ b/pkg/annotations/annotations.go @@ -19,9 +19,18 @@ const ( // HostName is the container host name annotation HostName = "io.kubernetes.cri-o.HostName" + // CgroupParent is the sandbox cgroup parent + CgroupParent = "io.kubernetes.cri-o.CgroupParent" + // IP is the container ipv4 or ipv6 address IP = "io.kubernetes.cri-o.IP" + // NamespaceOptions store the options for namespaces + NamespaceOptions = "io.kubernetes.cri-o.NamespaceOptions" + + // SeccompProfilePath is the node seccomp profile path + SeccompProfilePath = "io.kubernetes.cri-o.SeccompProfilePath" + // Image is the container image ID annotation Image = "io.kubernetes.cri-o.Image" @@ -34,6 +43,9 @@ const ( // KubeName is the kubernetes name annotation KubeName = "io.kubernetes.cri-o.KubeName" + // PortMappings holds the port mappings for the sandbox + PortMappings = "io.kubernetes.cri-o.PortMappings" + // Labels are the kubernetes labels annotation Labels = "io.kubernetes.cri-o.Labels" @@ -46,6 +58,9 @@ const ( // Name is the pod name annotation Name = "io.kubernetes.cri-o.Name" + // Namespace is the pod namespace annotation + Namespace = "io.kubernetes.cri-o.Namespace" + // PrivilegedRuntime is the annotation for the privileged runtime path PrivilegedRuntime = "io.kubernetes.cri-o.PrivilegedRuntime" @@ -67,8 +82,8 @@ const ( // MountPoint is the mount point of the container rootfs MountPoint = "io.kubernetes.cri-o.MountPoint" - // TrustedSandbox is the annotation for trusted sandboxes - TrustedSandbox = "io.kubernetes.cri-o.TrustedSandbox" + // RuntimeHandler is the annotation for runtime handler + RuntimeHandler = "io.kubernetes.cri-o.RuntimeHandler" // TTY is the terminal path annotation TTY = "io.kubernetes.cri-o.TTY" @@ -79,8 +94,14 @@ const ( // StdinOnce is the stdin_once annotation StdinOnce = "io.kubernetes.cri-o.StdinOnce" - // Volumes is the volumes annotation + // Volumes is the volumes annotatoin Volumes = "io.kubernetes.cri-o.Volumes" + + // HostNetwork indicates whether the host network namespace is used or not + HostNetwork = "io.kubernetes.cri-o.HostNetwork" + + // CNIResult is the JSON string representation of the Result from CNI + CNIResult = "io.kubernetes.cri-o.CNIResult" ) // ContainerType values diff --git a/pkg/rootless/rootless.go b/pkg/rootless/rootless.go deleted file mode 100644 index a531e43ce..000000000 --- a/pkg/rootless/rootless.go +++ /dev/null @@ -1,9 +0,0 @@ -package rootless - -// Opts allows to customize how re-execing to a rootless process is done -type Opts struct { - // Argument overrides the arguments on the command line - // for the re-execed process. The process in the namespace - // must use rootless.Argument() to read its value. - Argument string -} diff --git a/pkg/rootless/rootless_linux.c b/pkg/rootless/rootless_linux.c index 2e2c3acac..9cb79ed4d 100644 --- a/pkg/rootless/rootless_linux.c +++ b/pkg/rootless/rootless_linux.c @@ -13,10 +13,36 @@ #include <sys/wait.h> #include <string.h> #include <stdbool.h> +#include <sys/types.h> +#include <sys/prctl.h> +#include <dirent.h> static const char *_max_user_namespaces = "/proc/sys/user/max_user_namespaces"; static const char *_unprivileged_user_namespaces = "/proc/sys/kernel/unprivileged_userns_clone"; +static int n_files; + +static void __attribute__((constructor)) init() +{ + DIR *d; + + /* Store how many FDs were open before the Go runtime kicked in. */ + d = opendir ("/proc/self/fd"); + if (d) + { + struct dirent *ent; + + for (ent = readdir (d); ent; ent = readdir (d)) + { + int fd = atoi (ent->d_name); + if (fd > n_files && fd != dirfd (d)) + n_files = fd; + } + closedir (d); + } +} + + static int syscall_setresuid (uid_t ruid, uid_t euid, uid_t suid) { @@ -133,12 +159,25 @@ reexec_userns_join (int userns, int mountns) pid = fork (); if (pid < 0) fprintf (stderr, "cannot fork: %s\n", strerror (errno)); + if (pid) - return pid; + { + /* We passed down these fds, close them. */ + int f; + for (f = 3; f < n_files; f++) + close (f); + return pid; + } setenv ("_CONTAINERS_USERNS_CONFIGURED", "init", 1); setenv ("_CONTAINERS_ROOTLESS_UID", uid, 1); + if (prctl (PR_SET_PDEATHSIG, SIGTERM, 0, 0, 0) < 0) + { + fprintf (stderr, "cannot prctl(PR_SET_PDEATHSIG): %s\n", strerror (errno)); + _exit (EXIT_FAILURE); + } + if (setns (userns, 0) < 0) { fprintf (stderr, "cannot setns: %s\n", strerror (errno)); diff --git a/pkg/rootless/rootless_linux.go b/pkg/rootless/rootless_linux.go index 0be0e08bf..1d1b1713d 100644 --- a/pkg/rootless/rootless_linux.go +++ b/pkg/rootless/rootless_linux.go @@ -46,25 +46,6 @@ func IsRootless() bool { return isRootless } -var ( - skipStorageSetup = false -) - -// SetSkipStorageSetup tells the runtime to not setup containers/storage -func SetSkipStorageSetup(v bool) { - skipStorageSetup = v -} - -// SkipStorageSetup tells if we should skip the containers/storage setup -func SkipStorageSetup() bool { - return skipStorageSetup -} - -// Argument returns the argument that was set for the rootless session. -func Argument() string { - return os.Getenv("_CONTAINERS_ROOTLESS_ARG") -} - // GetRootlessUID returns the UID of the user in the parent userNS func GetRootlessUID() int { uidEnv := os.Getenv("_CONTAINERS_ROOTLESS_UID") @@ -104,51 +85,86 @@ func tryMappingTool(tool string, pid int, hostID int, mappings []idtools.IDMap) return nil } -// JoinNS re-exec podman in a new userNS and join the user namespace of the specified -// PID. -func JoinNS(pid uint, preserveFDs int) (bool, int, error) { - if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" { - return false, -1, nil +func readUserNs(path string) (string, error) { + b := make([]byte, 256) + _, err := syscall.Readlink(path, b) + if err != nil { + return "", err } + return string(b), nil +} - userNS, err := getUserNSForPid(pid) +func readUserNsFd(fd uintptr) (string, error) { + return readUserNs(fmt.Sprintf("/proc/self/fd/%d", fd)) +} + +func getParentUserNs(fd uintptr) (uintptr, error) { + const nsGetParent = 0xb702 + ret, _, errno := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(nsGetParent), 0) + if errno != 0 { + return 0, errno + } + return (uintptr)(unsafe.Pointer(ret)), nil +} + +// getUserNSFirstChild returns an open FD for the first direct child user namespace that created the process +// Each container creates a new user namespace where the runtime runs. The current process in the container +// might have created new user namespaces that are child of the initial namespace we created. +// This function finds the initial namespace created for the container that is a child of the current namespace. +// +// current ns +// / \ +// TARGET -> a [other containers] +// / +// b +// / +// NS READ USING THE PID -> c +func getUserNSFirstChild(fd uintptr) (*os.File, error) { + currentNS, err := readUserNs("/proc/self/ns/user") if err != nil { - return false, -1, err + return nil, err } - defer userNS.Close() - pidC := C.reexec_userns_join(C.int(userNS.Fd()), -1) - if int(pidC) < 0 { - return false, -1, errors.Errorf("cannot re-exec process") + ns, err := readUserNsFd(fd) + if err != nil { + return nil, errors.Wrapf(err, "cannot read user namespace") } - if preserveFDs > 0 { - for fd := 3; fd < 3+preserveFDs; fd++ { - // These fds were passed down to the runtime. Close them - // and not interfere - os.NewFile(uintptr(fd), fmt.Sprintf("fd-%d", fd)).Close() - } + if ns == currentNS { + return nil, errors.New("process running in the same user namespace") } - ret := C.reexec_in_user_namespace_wait(pidC) - if ret < 0 { - return false, -1, errors.New("error waiting for the re-exec process") - } + for { + nextFd, err := getParentUserNs(fd) + if err != nil { + return nil, errors.Wrapf(err, "cannot get parent user namespace") + } - return true, int(ret), nil -} + ns, err = readUserNsFd(nextFd) + if err != nil { + return nil, errors.Wrapf(err, "cannot read user namespace") + } -// JoinDirectUserAndMountNS re-exec podman in a new userNS and join the user and mount -// namespace of the specified PID without looking up its parent. Useful to join directly -// the conmon process. It is a convenience function for JoinDirectUserAndMountNSWithOpts -// with a default configuration. -func JoinDirectUserAndMountNS(pid uint) (bool, int, error) { - return JoinDirectUserAndMountNSWithOpts(pid, nil) + if ns == currentNS { + syscall.Close(int(nextFd)) + + // Drop O_CLOEXEC for the fd. + _, _, errno := syscall.Syscall(syscall.SYS_FCNTL, fd, syscall.F_SETFD, 0) + if errno != 0 { + syscall.Close(int(fd)) + return nil, errno + } + + return os.NewFile(fd, "userns child"), nil + } + syscall.Close(int(fd)) + fd = nextFd + } } -// JoinDirectUserAndMountNSWithOpts re-exec podman in a new userNS and join the user and -// mount namespace of the specified PID without looking up its parent. Useful to join -// directly the conmon process. -func JoinDirectUserAndMountNSWithOpts(pid uint, opts *Opts) (bool, int, error) { +// JoinUserAndMountNS re-exec podman in a new userNS and join the user and mount +// namespace of the specified PID without looking up its parent. Useful to join directly +// the conmon process. +func JoinUserAndMountNS(pid uint) (bool, int, error) { if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" { return false, -1, nil } @@ -165,39 +181,11 @@ func JoinDirectUserAndMountNSWithOpts(pid uint, opts *Opts) (bool, int, error) { } defer userNS.Close() - if opts != nil && opts.Argument != "" { - if err := os.Setenv("_CONTAINERS_ROOTLESS_ARG", opts.Argument); err != nil { - return false, -1, err - } - } - - pidC := C.reexec_userns_join(C.int(userNS.Fd()), C.int(mountNS.Fd())) - if int(pidC) < 0 { - return false, -1, errors.Errorf("cannot re-exec process") - } - - ret := C.reexec_in_user_namespace_wait(pidC) - if ret < 0 { - return false, -1, errors.New("error waiting for the re-exec process") - } - - return true, int(ret), nil -} - -// JoinNSPath re-exec podman in a new userNS and join the owner user namespace of the -// specified path. -func JoinNSPath(path string) (bool, int, error) { - if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" { - return false, -1, nil - } - - userNS, err := getUserNSForPath(path) + fd, err := getUserNSFirstChild(userNS.Fd()) if err != nil { return false, -1, err } - defer userNS.Close() - - pidC := C.reexec_userns_join(C.int(userNS.Fd()), -1) + pidC := C.reexec_userns_join(C.int(fd.Fd()), C.int(mountNS.Fd())) if int(pidC) < 0 { return false, -1, errors.Errorf("cannot re-exec process") } @@ -213,16 +201,8 @@ func JoinNSPath(path string) (bool, int, error) { // BecomeRootInUserNS re-exec podman in a new userNS. It returns whether podman was re-executed // into a new user namespace and the return code from the re-executed podman process. // If podman was re-executed the caller needs to propagate the error code returned by the child -// process. It is a convenience function for BecomeRootInUserNSWithOpts with a default configuration. -func BecomeRootInUserNS() (bool, int, error) { - return BecomeRootInUserNSWithOpts(nil) -} - -// BecomeRootInUserNSWithOpts re-exec podman in a new userNS. It returns whether podman was -// re-execute into a new user namespace and the return code from the re-executed podman process. -// If podman was re-executed the caller needs to propagate the error code returned by the child // process. -func BecomeRootInUserNSWithOpts(opts *Opts) (bool, int, error) { +func BecomeRootInUserNS() (bool, int, error) { if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" { if os.Getenv("_CONTAINERS_USERNS_CONFIGURED") == "init" { return false, 0, runInUser() @@ -241,12 +221,6 @@ func BecomeRootInUserNSWithOpts(opts *Opts) (bool, int, error) { defer w.Close() defer w.Write([]byte("0")) - if opts != nil && opts.Argument != "" { - if err := os.Setenv("_CONTAINERS_ROOTLESS_ARG", opts.Argument); err != nil { - return false, -1, err - } - } - pidC := C.reexec_in_user_namespace(C.int(r.Fd())) pid := int(pidC) if pid < 0 { @@ -328,112 +302,3 @@ func BecomeRootInUserNSWithOpts(opts *Opts) (bool, int, error) { return true, int(ret), nil } - -func readUserNs(path string) (string, error) { - b := make([]byte, 256) - _, err := syscall.Readlink(path, b) - if err != nil { - return "", err - } - return string(b), nil -} - -func readUserNsFd(fd uintptr) (string, error) { - return readUserNs(fmt.Sprintf("/proc/self/fd/%d", fd)) -} - -func getOwner(fd uintptr) (uintptr, error) { - const nsGetUserns = 0xb701 - ret, _, errno := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(nsGetUserns), 0) - if errno != 0 { - return 0, errno - } - return (uintptr)(unsafe.Pointer(ret)), nil -} - -func getParentUserNs(fd uintptr) (uintptr, error) { - const nsGetParent = 0xb702 - ret, _, errno := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(nsGetParent), 0) - if errno != 0 { - return 0, errno - } - return (uintptr)(unsafe.Pointer(ret)), nil -} - -func getUserNSForPath(path string) (*os.File, error) { - u, err := os.Open(path) - if err != nil { - return nil, errors.Wrapf(err, "cannot open %s", path) - } - defer u.Close() - fd, err := getOwner(u.Fd()) - if err != nil { - return nil, err - } - - return getUserNSFirstChild(fd) -} - -func getUserNSForPid(pid uint) (*os.File, error) { - path := fmt.Sprintf("/proc/%d/ns/user", pid) - u, err := os.Open(path) - if err != nil { - return nil, errors.Wrapf(err, "cannot open %s", path) - } - - return getUserNSFirstChild(u.Fd()) -} - -// getUserNSFirstChild returns an open FD for the first direct child user namespace that created the process -// Each container creates a new user namespace where the runtime runs. The current process in the container -// might have created new user namespaces that are child of the initial namespace we created. -// This function finds the initial namespace created for the container that is a child of the current namespace. -// -// current ns -// / \ -// TARGET -> a [other containers] -// / -// b -// / -// NS READ USING THE PID -> c -func getUserNSFirstChild(fd uintptr) (*os.File, error) { - currentNS, err := readUserNs("/proc/self/ns/user") - if err != nil { - return nil, err - } - - ns, err := readUserNsFd(fd) - if err != nil { - return nil, errors.Wrapf(err, "cannot read user namespace") - } - if ns == currentNS { - return nil, errors.New("process running in the same user namespace") - } - - for { - nextFd, err := getParentUserNs(fd) - if err != nil { - return nil, errors.Wrapf(err, "cannot get parent user namespace") - } - - ns, err = readUserNsFd(nextFd) - if err != nil { - return nil, errors.Wrapf(err, "cannot read user namespace") - } - - if ns == currentNS { - syscall.Close(int(nextFd)) - - // Drop O_CLOEXEC for the fd. - _, _, errno := syscall.Syscall(syscall.SYS_FCNTL, fd, syscall.F_SETFD, 0) - if errno != 0 { - syscall.Close(int(fd)) - return nil, errno - } - - return os.NewFile(fd, "userns child"), nil - } - syscall.Close(int(fd)) - fd = nextFd - } -} diff --git a/pkg/rootless/rootless_unsupported.go b/pkg/rootless/rootless_unsupported.go index e01d7855c..47b5dd7cc 100644 --- a/pkg/rootless/rootless_unsupported.go +++ b/pkg/rootless/rootless_unsupported.go @@ -19,54 +19,15 @@ func BecomeRootInUserNS() (bool, int, error) { return false, -1, errors.New("this function is not supported on this os") } -// BecomeRootInUserNS is a stub function that always returns false and an -// error on unsupported OS's -func BecomeRootInUserNSWithOpts(opts *Opts) (bool, int, error) { - return false, -1, errors.New("this function is not supported on this os") -} - // GetRootlessUID returns the UID of the user in the parent userNS func GetRootlessUID() int { return -1 } -// SetSkipStorageSetup tells the runtime to not setup containers/storage -func SetSkipStorageSetup(bool) { -} - -// SkipStorageSetup tells if we should skip the containers/storage setup -func SkipStorageSetup() bool { - return false -} - -// JoinNS re-exec podman in a new userNS and join the user namespace of the specified -// PID. -func JoinNS(pid uint, preserveFDs int) (bool, int, error) { - return false, -1, errors.New("this function is not supported on this os") -} - -// JoinNSPath re-exec podman in a new userNS and join the owner user namespace of the -// specified path. -func JoinNSPath(path string) (bool, int, error) { - return false, -1, errors.New("this function is not supported on this os") -} - -// JoinDirectUserAndMountNSWithOpts re-exec podman in a new userNS and join the user and -// mount namespace of the specified PID without looking up its parent. Useful to join -// directly the conmon process. -func JoinDirectUserAndMountNSWithOpts(pid uint, opts *Opts) (bool, int, error) { - return false, -1, errors.New("this function is not supported on this os") -} - -// JoinDirectUserAndMountNS re-exec podman in a new userNS and join the user and mount +// JoinUserAndMountNS re-exec podman in a new userNS and join the user and mount // namespace of the specified PID without looking up its parent. Useful to join directly -// the conmon process. It is a convenience function for JoinDirectUserAndMountNSWithOpts +// the conmon process. It is a convenience function for JoinUserAndMountNSWithOpts // with a default configuration. -func JoinDirectUserAndMountNS(pid uint) (bool, int, error) { +func JoinUserAndMountNS(pid uint) (bool, int, error) { return false, -1, errors.New("this function is not supported on this os") } - -// Argument returns the argument that was set for the rootless session. -func Argument() string { - return "" -} diff --git a/pkg/spec/createconfig.go b/pkg/spec/createconfig.go index 0a12e3dca..e71d9d3db 100644 --- a/pkg/spec/createconfig.go +++ b/pkg/spec/createconfig.go @@ -1,7 +1,6 @@ package createconfig import ( - "encoding/json" "fmt" "net" "os" @@ -12,7 +11,6 @@ import ( "github.com/containers/image/manifest" "github.com/containers/libpod/libpod" "github.com/containers/libpod/pkg/namespaces" - "github.com/containers/libpod/pkg/rootless" "github.com/containers/storage" "github.com/containers/storage/pkg/stringid" "github.com/cri-o/ocicni/pkg/ocicni" @@ -24,18 +22,16 @@ import ( "golang.org/x/sys/unix" ) -type mountType string - // Type constants const ( bps = iota iops // TypeBind is the type for mounting host dir - TypeBind mountType = "bind" + TypeBind = "bind" // TypeVolume is the type for remote storage volumes - // TypeVolume mountType = "volume" // re-enable upon use + // TypeVolume = "volume" // re-enable upon use // TypeTmpfs is the type for mounting tmpfs - TypeTmpfs mountType = "tmpfs" + TypeTmpfs = "tmpfs" ) // CreateResourceConfig represents resource elements in CreateConfig @@ -131,15 +127,15 @@ type CreateConfig struct { Mounts []spec.Mount //mounts Volumes []string //volume VolumesFrom []string - WorkDir string //workdir - LabelOpts []string //SecurityOpts - NoNewPrivs bool //SecurityOpts - ApparmorProfile string //SecurityOpts - SeccompProfilePath string //SecurityOpts + NamedVolumes []*libpod.ContainerNamedVolume // Filled in by CreateConfigToOCISpec + WorkDir string //workdir + LabelOpts []string //SecurityOpts + NoNewPrivs bool //SecurityOpts + ApparmorProfile string //SecurityOpts + SeccompProfilePath string //SecurityOpts SecurityOpts []string Rootfs string - LocalVolumes []spec.Mount //Keeps track of the built-in volumes of container used in the --volumes-from flag - Syslog bool // Whether to enable syslog on exit commands + Syslog bool // Whether to enable syslog on exit commands } func u32Ptr(i int64) *uint32 { u := uint32(i); return &u } @@ -173,9 +169,9 @@ func (c *CreateConfig) AddContainerInitBinary(path string) error { c.Command = append([]string{"/dev/init", "--"}, c.Command...) c.Mounts = append(c.Mounts, spec.Mount{ Destination: "/dev/init", - Type: "bind", + Type: TypeBind, Source: path, - Options: []string{"bind", "ro"}, + Options: []string{TypeBind, "ro"}, }) return nil } @@ -218,9 +214,9 @@ func (c *CreateConfig) initFSMounts() []spec.Mount { return mounts } -//GetVolumeMounts takes user provided input for bind mounts and creates Mount structs +// GetVolumeMounts takes user provided input for bind mounts and creates Mount structs func (c *CreateConfig) GetVolumeMounts(specMounts []spec.Mount) ([]spec.Mount, error) { - m := c.LocalVolumes + m := []spec.Mount{} for _, i := range c.Volumes { var options []string spliti := strings.Split(i, ":") @@ -256,9 +252,11 @@ func (c *CreateConfig) GetVolumeMounts(specMounts []spec.Mount) ([]spec.Mount, e mount.Source = "tmpfs" mount.Options = append(mount.Options, "tmpcopyup") } else { + // TODO: Move support for this and tmpfs into libpod + // Should tmpfs also be handled as named volumes? Wouldn't be hard // This will cause a new local Volume to be created on your system mount.Source = stringid.GenerateNonCryptoID() - mount.Options = append(mount.Options, "bind") + mount.Options = append(mount.Options, TypeBind) } m = append(m, mount) } @@ -269,13 +267,12 @@ func (c *CreateConfig) GetVolumeMounts(specMounts []spec.Mount) ([]spec.Mount, e // GetVolumesFrom reads the create-config artifact of the container to get volumes from // and adds it to c.Volumes of the current container. func (c *CreateConfig) GetVolumesFrom() error { - var options string - - if rootless.SkipStorageSetup() { + if os.Geteuid() != 0 { return nil } for _, vol := range c.VolumesFrom { + options := "" splitVol := strings.SplitN(vol, ":", 2) if len(splitVol) == 2 { options = splitVol[1] @@ -284,41 +281,60 @@ func (c *CreateConfig) GetVolumesFrom() error { if err != nil { return errors.Wrapf(err, "error looking up container %q", splitVol[0]) } - inspect, err := ctr.Inspect(false) - if err != nil { - return errors.Wrapf(err, "error inspecting %q", splitVol[0]) - } - var createArtifact CreateConfig - artifact, err := ctr.GetArtifact("create-config") - if err != nil { - return errors.Wrapf(err, "error getting create-config artifact for %q", splitVol[0]) + + logrus.Debugf("Adding volumes from container %s", ctr.ID()) + + // Look up the container's user volumes. This gets us the + // destinations of all mounts the user added to the container. + userVolumesArr := ctr.UserVolumes() + + // We're going to need to access them a lot, so convert to a map + // to reduce looping. + // We'll also use the map to indicate if we missed any volumes along the way. + userVolumes := make(map[string]bool) + for _, dest := range userVolumesArr { + userVolumes[dest] = false } - if err := json.Unmarshal(artifact, &createArtifact); err != nil { - return err + + // Now we get the container's spec and loop through its volumes + // and append them in if we can find them. + spec := ctr.Spec() + if spec == nil { + return errors.Errorf("error retrieving container %s spec", ctr.ID()) } - for key := range createArtifact.BuiltinImgVolumes { - for _, m := range inspect.Mounts { - if m.Destination == key { - c.LocalVolumes = append(c.LocalVolumes, m) - break + for _, mnt := range spec.Mounts { + if mnt.Type != TypeBind { + continue + } + if _, exists := userVolumes[mnt.Destination]; exists { + userVolumes[mnt.Destination] = true + localOptions := options + if localOptions == "" { + localOptions = strings.Join(mnt.Options, ",") } + c.Volumes = append(c.Volumes, fmt.Sprintf("%s:%s:%s", mnt.Source, mnt.Destination, localOptions)) } } - for _, i := range createArtifact.Volumes { - // Volumes format is host-dir:ctr-dir[:options], so get the host and ctr dir - // and add on the options given by the user to the flag. - spliti := strings.SplitN(i, ":", 3) - // Throw error if mounting volume from container with Z option (private label) - // Override this by adding 'z' to options. - if len(spliti) > 2 && strings.Contains(spliti[2], "Z") && !strings.Contains(options, "z") { - return errors.Errorf("volume mounted with private option 'Z' in %q. Use option 'z' to mount in current container", ctr.ID()) + // We're done with the spec mounts. Add named volumes. + // Add these unconditionally - none of them are automatically + // part of the container, as some spec mounts are. + namedVolumes := ctr.NamedVolumes() + for _, namedVol := range namedVolumes { + if _, exists := userVolumes[namedVol.Dest]; exists { + userVolumes[namedVol.Dest] = true } - if options == "" { - // Mount the volumes with the default options - c.Volumes = append(c.Volumes, createArtifact.Volumes...) - } else { - c.Volumes = append(c.Volumes, spliti[0]+":"+spliti[1]+":"+options) + localOptions := options + if localOptions == "" { + localOptions = strings.Join(namedVol.Options, ",") + } + c.Volumes = append(c.Volumes, fmt.Sprintf("%s:%s:%s", namedVol.Name, namedVol.Dest, localOptions)) + } + + // Check if we missed any volumes + for volDest, found := range userVolumes { + if !found { + logrus.Warnf("Unable to match volume %s from container %s for volumes-from", volDest, ctr.ID()) } } } @@ -418,14 +434,20 @@ func (c *CreateConfig) GetContainerCreateOptions(runtime *libpod.Runtime, pod *l // others, if they are included volumes := make([]string, 0, len(c.Volumes)) for _, vol := range c.Volumes { - volumes = append(volumes, strings.SplitN(vol, ":", 2)[0]) + // We always want the volume destination + splitVol := strings.SplitN(vol, ":", 3) + if len(splitVol) > 1 { + volumes = append(volumes, splitVol[1]) + } else { + volumes = append(volumes, splitVol[0]) + } } options = append(options, libpod.WithUserVolumes(volumes)) } - if len(c.LocalVolumes) != 0 { - options = append(options, libpod.WithLocalVolumes(c.LocalVolumes)) + if len(c.NamedVolumes) != 0 { + options = append(options, libpod.WithNamedVolumes(c.NamedVolumes)) } if len(c.Command) != 0 { @@ -539,7 +561,7 @@ func (c *CreateConfig) GetContainerCreateOptions(runtime *libpod.Runtime, pod *l options = append(options, libpod.WithPrivileged(c.Privileged)) - useImageVolumes := c.ImageVolumeType == "bind" + useImageVolumes := c.ImageVolumeType == TypeBind // Gather up the options for NewContainer which consist of With... funcs options = append(options, libpod.WithRootFSFromImage(c.ImageID, c.Image, useImageVolumes)) options = append(options, libpod.WithSecLabels(c.LabelOpts)) diff --git a/pkg/spec/spec.go b/pkg/spec/spec.go index a61741f73..9b6bd089e 100644 --- a/pkg/spec/spec.go +++ b/pkg/spec/spec.go @@ -6,6 +6,7 @@ import ( "path/filepath" "strings" + "github.com/containers/libpod/libpod" "github.com/containers/libpod/pkg/rootless" "github.com/containers/storage/pkg/mount" pmount "github.com/containers/storage/pkg/mount" @@ -48,6 +49,33 @@ func supercedeUserMounts(mounts []spec.Mount, configMount []spec.Mount) []spec.M return configMount } +// Split named volumes from normal volumes +func splitNamedVolumes(mounts []spec.Mount) ([]spec.Mount, []*libpod.ContainerNamedVolume) { + newMounts := make([]spec.Mount, 0) + namedVolumes := make([]*libpod.ContainerNamedVolume, 0) + for _, mount := range mounts { + // If it's not a named volume, append unconditionally + if mount.Type != TypeBind { + newMounts = append(newMounts, mount) + continue + } + // Volumes that are not named volumes must be an absolute or + // relative path. + // Volume names may not begin with a non-alphanumeric character + // so the HasPrefix() check is safe here. + if strings.HasPrefix(mount.Source, "/") || strings.HasPrefix(mount.Source, ".") { + newMounts = append(newMounts, mount) + } else { + namedVolume := new(libpod.ContainerNamedVolume) + namedVolume.Name = mount.Source + namedVolume.Dest = mount.Destination + namedVolume.Options = mount.Options + namedVolumes = append(namedVolumes, namedVolume) + } + } + return newMounts, namedVolumes +} + func getAvailableGids() (int64, error) { idMap, err := user.ParseIDMapFile("/proc/self/gid_map") if err != nil { @@ -99,7 +127,7 @@ func CreateConfigToOCISpec(config *CreateConfig) (*spec.Spec, error) { //nolint } sysMnt := spec.Mount{ Destination: "/sys", - Type: "bind", + Type: TypeBind, Source: "/sys", Options: []string{"rprivate", "nosuid", "noexec", "nodev", r, "rbind"}, } @@ -126,7 +154,7 @@ func CreateConfigToOCISpec(config *CreateConfig) (*spec.Spec, error) { //nolint g.RemoveMount("/dev/mqueue") devMqueue := spec.Mount{ Destination: "/dev/mqueue", - Type: "bind", + Type: TypeBind, Source: "/dev/mqueue", Options: []string{"bind", "nosuid", "noexec", "nodev"}, } @@ -136,7 +164,7 @@ func CreateConfigToOCISpec(config *CreateConfig) (*spec.Spec, error) { //nolint g.RemoveMount("/proc") procMount := spec.Mount{ Destination: "/proc", - Type: "bind", + Type: TypeBind, Source: "/proc", Options: []string{"rbind", "nosuid", "noexec", "nodev"}, } @@ -377,6 +405,12 @@ func CreateConfigToOCISpec(config *CreateConfig) (*spec.Spec, error) { //nolint configSpec.Mounts = supercedeUserMounts(volumeMounts, configSpec.Mounts) //--mount configSpec.Mounts = supercedeUserMounts(config.initFSMounts(), configSpec.Mounts) + + // Split normal mounts and named volumes + newMounts, namedVolumes := splitNamedVolumes(configSpec.Mounts) + configSpec.Mounts = newMounts + config.NamedVolumes = namedVolumes + // BLOCK IO blkio, err := config.CreateBlockIO() if err != nil { diff --git a/pkg/varlinkapi/images.go b/pkg/varlinkapi/images.go index 63d500204..8cd13e251 100644 --- a/pkg/varlinkapi/images.go +++ b/pkg/varlinkapi/images.go @@ -103,6 +103,7 @@ func (i *LibpodAPI) GetImage(call iopodman.VarlinkCall, id string) error { VirtualSize: newImage.VirtualSize, Containers: int64(len(containers)), Labels: labels, + TopLayer: newImage.TopLayer(), } return call.ReplyGetImage(il) } @@ -923,3 +924,40 @@ func (i *LibpodAPI) Diff(call iopodman.VarlinkCall, name string) error { } return call.ReplyDiff(response) } + +// GetLayersMapWithImageInfo is a development only endpoint to obtain layer information for an image. +func (i *LibpodAPI) GetLayersMapWithImageInfo(call iopodman.VarlinkCall) error { + layerInfo, err := image.GetLayersMapWithImageInfo(i.Runtime.ImageRuntime()) + if err != nil { + return call.ReplyErrorOccurred(err.Error()) + } + b, err := json.Marshal(layerInfo) + if err != nil { + return call.ReplyErrorOccurred(err.Error()) + } + return call.ReplyGetLayersMapWithImageInfo(string(b)) +} + +// BuildImageHierarchyMap ... +func (i *LibpodAPI) BuildImageHierarchyMap(call iopodman.VarlinkCall, name string) error { + img, err := i.Runtime.ImageRuntime().NewFromLocal(name) + if err != nil { + return call.ReplyErrorOccurred(err.Error()) + } + imageInfo := &image.InfoImage{ + ID: img.ID(), + Tags: img.Names(), + } + layerInfo, err := image.GetLayersMapWithImageInfo(i.Runtime.ImageRuntime()) + if err != nil { + return call.ReplyErrorOccurred(err.Error()) + } + if err := image.BuildImageHierarchyMap(imageInfo, layerInfo, img.TopLayer()); err != nil { + return call.ReplyErrorOccurred(err.Error()) + } + b, err := json.Marshal(imageInfo) + if err != nil { + return call.ReplyErrorOccurred(err.Error()) + } + return call.ReplyBuildImageHierarchyMap(string(b)) +} |