From 5813c8246eb32205cc3e68a293c6cf3eb2ba291d Mon Sep 17 00:00:00 2001 From: Gabi Beyer Date: Wed, 12 Jun 2019 17:31:18 +0000 Subject: rootless: Rearrange setup of rootless containers In order to run Podman with VM-based runtimes unprivileged, the network must be set up prior to the container creation. Therefore this commit modifies Podman to run rootless containers by: 1. create a network namespace 2. pass the netns persistent mount path to the slirp4netns to create the tap inferface 3. pass the netns path to the OCI spec, so the runtime can enter the netns Closes #2897 Signed-off-by: Gabi Beyer --- libpod/container_internal_linux.go | 14 ++++++--- libpod/networking_linux.go | 62 ++++++++++++++++++++++++-------------- libpod/oci_internal_linux.go | 11 +++++-- libpod/runtime_pod_infra_linux.go | 4 ++- pkg/netns/netns_linux.go | 30 ++++++++++++++++-- pkg/spec/createconfig.go | 2 +- 6 files changed, 89 insertions(+), 34 deletions(-) diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index 230b5b888..e7b4b2b22 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -78,15 +78,21 @@ func (c *Container) prepare() (Err error) { // Set up network namespace if not already set up if c.config.CreateNetNS && c.state.NetNS == nil && !c.config.PostConfigureNetNS { netNS, networkStatus, createNetNSErr = c.runtime.createNetNS(c) + if createNetNSErr != nil { + return + } tmpStateLock.Lock() defer tmpStateLock.Unlock() // Assign NetNS attributes to container - if createNetNSErr == nil { - c.state.NetNS = netNS - c.state.NetworkStatus = networkStatus - } + c.state.NetNS = netNS + c.state.NetworkStatus = networkStatus + } + + // handle rootless network namespace setup + if c.state.NetNS != nil && c.config.NetMode == "slirp4netns" && !c.config.PostConfigureNetNS { + createNetNSErr = c.runtime.setupRootlessNetNS(c) } }() // Mount storage if not mounted diff --git a/libpod/networking_linux.go b/libpod/networking_linux.go index 67dd0150b..d854a2de6 100644 --- a/libpod/networking_linux.go +++ b/libpod/networking_linux.go @@ -90,9 +90,6 @@ func (r *Runtime) configureNetNS(ctr *Container, ctrNS ns.NetNS) ([]*cnitypes.Re // Create and configure a new network namespace for a container func (r *Runtime) createNetNS(ctr *Container) (n ns.NetNS, q []*cnitypes.Result, err error) { - if rootless.IsRootless() { - return nil, nil, errors.New("cannot configure a new network namespace in rootless mode, only --network=slirp4netns is supported") - } ctrNS, err := netns.NewNS() if err != nil { return nil, nil, errors.Wrapf(err, "error creating network namespace for container %s", ctr.ID()) @@ -110,7 +107,10 @@ func (r *Runtime) createNetNS(ctr *Container) (n ns.NetNS, q []*cnitypes.Result, logrus.Debugf("Made network namespace at %s for container %s", ctrNS.Path(), ctr.ID()) - networkStatus, err := r.configureNetNS(ctr, ctrNS) + networkStatus := []*cnitypes.Result{} + if !rootless.IsRootless() { + networkStatus, err = r.configureNetNS(ctr, ctrNS) + } return ctrNS, networkStatus, err } @@ -138,9 +138,6 @@ func checkSlirpFlags(path string) (bool, bool, bool, error) { // Configure the network namespace for a rootless container func (r *Runtime) setupRootlessNetNS(ctr *Container) (err error) { - defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncR) - defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncW) - path := r.config.NetworkCmdPath if path == "" { @@ -164,7 +161,7 @@ func (r *Runtime) setupRootlessNetNS(ctr *Container) (err error) { cmdArgs := []string{} if havePortMapping { - cmdArgs = append(cmdArgs, "--api-socket", apiSocket, fmt.Sprintf("%d", ctr.state.PID)) + cmdArgs = append(cmdArgs, "--api-socket", apiSocket) } dhp, mtu, sandbox, err := checkSlirpFlags(path) if err != nil { @@ -179,13 +176,32 @@ func (r *Runtime) setupRootlessNetNS(ctr *Container) (err error) { if sandbox { cmdArgs = append(cmdArgs, "--enable-sandbox") } - cmdArgs = append(cmdArgs, "-c", "-e", "3", "-r", "4", fmt.Sprintf("%d", ctr.state.PID), "tap0") - cmd := exec.Command(path, cmdArgs...) + // the slirp4netns arguments being passed are describes as follows: + // from the slirp4netns documentation: https://github.com/rootless-containers/slirp4netns + // -c, --configure Brings up the tap interface + // -e, --exit-fd=FD specify the FD for terminating slirp4netns + // -r, --ready-fd=FD specify the FD to write to when the initialization steps are finished + cmdArgs = append(cmdArgs, "-c", "-e", "3", "-r", "4") + if !ctr.config.PostConfigureNetNS { + ctr.rootlessSlirpSyncR, ctr.rootlessSlirpSyncW, err = os.Pipe() + if err != nil { + return errors.Wrapf(err, "failed to create rootless network sync pipe") + } + cmdArgs = append(cmdArgs, "--netns-type=path", ctr.state.NetNS.Path(), "tap0") + } else { + defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncR) + defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncW) + cmdArgs = append(cmdArgs, fmt.Sprintf("%d", ctr.state.PID), "tap0") + } + cmd := exec.Command(path, cmdArgs...) + logrus.Debugf("slirp4netns command: %s", strings.Join(cmd.Args, " ")) cmd.SysProcAttr = &syscall.SysProcAttr{ Setpgid: true, } + + // Leak one end of the pipe in slirp4netns, the other will be sent to conmon cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessSlirpSyncR, syncW) if err := cmd.Start(); err != nil { @@ -388,20 +404,22 @@ func (r *Runtime) teardownNetNS(ctr *Container) error { logrus.Debugf("Tearing down network namespace at %s for container %s", ctr.state.NetNS.Path(), ctr.ID()) - var requestedIP net.IP - if ctr.requestedIP != nil { - requestedIP = ctr.requestedIP - // cancel request for a specific IP in case the container is reused later - ctr.requestedIP = nil - } else { - requestedIP = ctr.config.StaticIP - } + // rootless containers do not use the CNI plugin + if !rootless.IsRootless() { + var requestedIP net.IP + if ctr.requestedIP != nil { + requestedIP = ctr.requestedIP + // cancel request for a specific IP in case the container is reused later + ctr.requestedIP = nil + } else { + requestedIP = ctr.config.StaticIP + } - podNetwork := r.getPodNetwork(ctr.ID(), ctr.Name(), ctr.state.NetNS.Path(), ctr.config.Networks, ctr.config.PortMappings, requestedIP) + podNetwork := r.getPodNetwork(ctr.ID(), ctr.Name(), ctr.state.NetNS.Path(), ctr.config.Networks, ctr.config.PortMappings, requestedIP) - // The network may have already been torn down, so don't fail here, just log - if err := r.netPlugin.TearDownPod(podNetwork); err != nil { - return errors.Wrapf(err, "error tearing down CNI namespace configuration for container %s", ctr.ID()) + if err := r.netPlugin.TearDownPod(podNetwork); err != nil { + return errors.Wrapf(err, "error tearing down CNI namespace configuration for container %s", ctr.ID()) + } } // First unmount the namespace diff --git a/libpod/oci_internal_linux.go b/libpod/oci_internal_linux.go index a90af32ed..a5cce795b 100644 --- a/libpod/oci_internal_linux.go +++ b/libpod/oci_internal_linux.go @@ -131,9 +131,14 @@ func (r *OCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Containe } if ctr.config.NetMode.IsSlirp4netns() { - ctr.rootlessSlirpSyncR, ctr.rootlessSlirpSyncW, err = os.Pipe() - if err != nil { - return errors.Wrapf(err, "failed to create rootless network sync pipe") + if ctr.config.PostConfigureNetNS { + ctr.rootlessSlirpSyncR, ctr.rootlessSlirpSyncW, err = os.Pipe() + if err != nil { + return errors.Wrapf(err, "failed to create rootless network sync pipe") + } + } else { + defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncR) + defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncW) } // Leak one end in conmon, the other one will be leaked into slirp4netns cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessSlirpSyncW) diff --git a/libpod/runtime_pod_infra_linux.go b/libpod/runtime_pod_infra_linux.go index ad6662f03..6a27c2800 100644 --- a/libpod/runtime_pod_infra_linux.go +++ b/libpod/runtime_pod_infra_linux.go @@ -99,7 +99,9 @@ func (r *Runtime) makeInfraContainer(ctx context.Context, p *Pod, imgName, imgID if isRootless { netmode = "slirp4netns" } - options = append(options, WithNetNS(p.config.InfraContainer.PortBindings, isRootless, netmode, networks)) + // PostConfigureNetNS should not be set since user namespace sharing is not implemented + // and rootless networking no longer supports post configuration setup + options = append(options, WithNetNS(p.config.InfraContainer.PortBindings, false, netmode, networks)) return r.newContainer(ctx, g.Config, options...) } diff --git a/pkg/netns/netns_linux.go b/pkg/netns/netns_linux.go index 1d6fb873c..18d638809 100644 --- a/pkg/netns/netns_linux.go +++ b/pkg/netns/netns_linux.go @@ -23,23 +23,42 @@ import ( "fmt" "os" "path" + "path/filepath" "runtime" "strings" "sync" "github.com/containernetworking/plugins/pkg/ns" + "github.com/containers/libpod/pkg/rootless" + "github.com/containers/libpod/pkg/util" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" ) -const nsRunDir = "/var/run/netns" +// get NSRunDir returns the dir of where to create the netNS. When running +// rootless, it needs to be at a location writable by user. +func getNSRunDir() (string, error) { + if rootless.IsRootless() { + rootlessDir, err := util.GetRuntimeDir() + if err != nil { + return "", err + } + return filepath.Join(rootlessDir, "netns"), nil + } + return "/var/run/netns", nil +} // NewNS creates a new persistent (bind-mounted) network namespace and returns // an object representing that namespace, without switching to it. func NewNS() (ns.NetNS, error) { + nsRunDir, err := getNSRunDir() + if err != nil { + return nil, err + } + b := make([]byte, 16) - _, err := rand.Reader.Read(b) + _, err = rand.Reader.Read(b) if err != nil { return nil, fmt.Errorf("failed to generate random netns name: %v", err) } @@ -127,7 +146,7 @@ func NewNS() (ns.NetNS, error) { // Put this thread back to the orig ns, since it might get reused (pre go1.10) defer func() { if err := origNS.Set(); err != nil { - logrus.Errorf("unable to set namespace: %q", err) + logrus.Warnf("unable to set namespace: %q", err) } }() @@ -150,6 +169,11 @@ func NewNS() (ns.NetNS, error) { // UnmountNS unmounts the NS held by the netns object func UnmountNS(ns ns.NetNS) error { + nsRunDir, err := getNSRunDir() + if err != nil { + return err + } + nsPath := ns.Path() // Only unmount if it's been bind-mounted (don't touch namespaces in /proc...) if strings.HasPrefix(nsPath, nsRunDir) { diff --git a/pkg/spec/createconfig.go b/pkg/spec/createconfig.go index c17172016..7c3195be4 100644 --- a/pkg/spec/createconfig.go +++ b/pkg/spec/createconfig.go @@ -275,7 +275,7 @@ func (c *CreateConfig) getContainerCreateOptions(runtime *libpod.Runtime, pod *l options = append(options, libpod.WithNetNSFrom(connectedCtr)) } else if !c.NetMode.IsHost() && !c.NetMode.IsNone() { hasUserns := c.UsernsMode.IsContainer() || c.UsernsMode.IsNS() || len(c.IDMappings.UIDMap) > 0 || len(c.IDMappings.GIDMap) > 0 - postConfigureNetNS := c.NetMode.IsSlirp4netns() || (hasUserns && !c.UsernsMode.IsHost()) + postConfigureNetNS := hasUserns && !c.UsernsMode.IsHost() options = append(options, libpod.WithNetNS(portBindings, postConfigureNetNS, string(c.NetMode), networks)) } -- cgit v1.2.3-54-g00ecf From f50ba201c4d54aa51dc53af89d53caf552a4b5bb Mon Sep 17 00:00:00 2001 From: gabi beyer Date: Tue, 27 Aug 2019 18:21:14 +0000 Subject: Add Kata Containers support Update documentation to show Kata Containers support is no longer a limitation with merging of commit 486a5b9 Signed-off-by: gabi beyer --- rootless.md | 1 - 1 file changed, 1 deletion(-) diff --git a/rootless.md b/rootless.md index 53463dccc..8cccb86eb 100644 --- a/rootless.md +++ b/rootless.md @@ -27,7 +27,6 @@ can easily fail * Can not use overlayfs driver, but does support fuse-overlayfs * Ubuntu supports non root overlay, but no other Linux distros do. * Only other supported driver is VFS. -* No KATA Container support * No CNI Support * CNI wants to modify IPTables, plus other network manipulation that requires CAP_SYS_ADMIN. * There is potential we could probably do some sort of blacklisting of the relevant plugins, and add a new plugin for rootless networking - slirp4netns as one example and there may be others -- cgit v1.2.3-54-g00ecf From 0115be19f5a7e1e29b78b13b769c6b33149e1437 Mon Sep 17 00:00:00 2001 From: gabi beyer Date: Fri, 23 Aug 2019 17:52:45 +0000 Subject: Make netns bind mount shared To 'avoid unknown FS magic on "/run/user/1000/netns/...": 1021994' make the network namespace bind-mount recursively shared, so the mount is back-propogated to the host. Signed-off-by: gabi beyer --- pkg/netns/netns_linux.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pkg/netns/netns_linux.go b/pkg/netns/netns_linux.go index 18d638809..a62296549 100644 --- a/pkg/netns/netns_linux.go +++ b/pkg/netns/netns_linux.go @@ -152,8 +152,9 @@ func NewNS() (ns.NetNS, error) { // bind mount the netns from the current thread (from /proc) onto the // mount point. This causes the namespace to persist, even when there - // are no threads in the ns. - err = unix.Mount(getCurrentThreadNetNSPath(), nsPath, "none", unix.MS_BIND, "") + // are no threads in the ns. Make this a shared mount; it needs to be + // back-propogated to the host + err = unix.Mount(getCurrentThreadNetNSPath(), nsPath, "none", unix.MS_BIND|unix.MS_SHARED|unix.MS_REC, "") if err != nil { err = fmt.Errorf("failed to bind mount ns at %s: %v", nsPath, err) } -- cgit v1.2.3-54-g00ecf From 65d5a9823e0544141b356c77f56cd49cd599e0ba Mon Sep 17 00:00:00 2001 From: gabi beyer Date: Wed, 4 Sep 2019 18:34:08 +0000 Subject: add list mount tests Add two unit tests to determine whether mounts are being listed correctly. One tests that a created container is not listed until mounted. The second checks that running containers are mounted, and then no longer listed as mounted when they stop running. The final test creates three containers, mounts two, and checks that mount correctly only lists the two mounted. Signed-off-by: gabi beyer --- test/e2e/mount_test.go | 120 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) diff --git a/test/e2e/mount_test.go b/test/e2e/mount_test.go index 3197aa655..dda83ba31 100644 --- a/test/e2e/mount_test.go +++ b/test/e2e/mount_test.go @@ -156,4 +156,124 @@ var _ = Describe("Podman mount", func() { umount.WaitWithDefaultTimeout() Expect(umount.ExitCode()).To(Equal(0)) }) + + It("podman list mounted container", func() { + setup := podmanTest.Podman([]string{"create", ALPINE, "ls"}) + setup.WaitWithDefaultTimeout() + Expect(setup.ExitCode()).To(Equal(0)) + cid := setup.OutputToString() + + lmount := podmanTest.Podman([]string{"mount", "--notruncate"}) + lmount.WaitWithDefaultTimeout() + Expect(lmount.ExitCode()).To(Equal(0)) + Expect(lmount.OutputToString()).To(Equal("")) + + mount := podmanTest.Podman([]string{"mount", cid}) + mount.WaitWithDefaultTimeout() + Expect(mount.ExitCode()).To(Equal(0)) + + lmount = podmanTest.Podman([]string{"mount", "--notruncate"}) + lmount.WaitWithDefaultTimeout() + Expect(lmount.ExitCode()).To(Equal(0)) + Expect(lmount.OutputToString()).To(ContainSubstring(cid)) + + umount := podmanTest.Podman([]string{"umount", cid}) + umount.WaitWithDefaultTimeout() + Expect(umount.ExitCode()).To(Equal(0)) + }) + + It("podman list running container", func() { + SkipIfRootless() + + setup := podmanTest.Podman([]string{"run", "-dt", ALPINE, "top"}) + setup.WaitWithDefaultTimeout() + Expect(setup.ExitCode()).To(Equal(0)) + cid := setup.OutputToString() + + lmount := podmanTest.Podman([]string{"mount", "--notruncate"}) + lmount.WaitWithDefaultTimeout() + Expect(lmount.ExitCode()).To(Equal(0)) + Expect(lmount.OutputToString()).To(ContainSubstring(cid)) + + stop := podmanTest.Podman([]string{"stop", cid}) + stop.WaitWithDefaultTimeout() + Expect(stop.ExitCode()).To(Equal(0)) + + lmount = podmanTest.Podman([]string{"mount", "--notruncate"}) + lmount.WaitWithDefaultTimeout() + Expect(lmount.ExitCode()).To(Equal(0)) + Expect(lmount.OutputToString()).To(Equal("")) + }) + + It("podman list mulitple mounted containers", func() { + SkipIfRootless() + + setup := podmanTest.Podman([]string{"create", ALPINE, "ls"}) + setup.WaitWithDefaultTimeout() + Expect(setup.ExitCode()).To(Equal(0)) + cid1 := setup.OutputToString() + + setup = podmanTest.Podman([]string{"create", ALPINE, "ls"}) + setup.WaitWithDefaultTimeout() + Expect(setup.ExitCode()).To(Equal(0)) + cid2 := setup.OutputToString() + + setup = podmanTest.Podman([]string{"create", ALPINE, "ls"}) + setup.WaitWithDefaultTimeout() + Expect(setup.ExitCode()).To(Equal(0)) + cid3 := setup.OutputToString() + + lmount := podmanTest.Podman([]string{"mount", "--notruncate"}) + lmount.WaitWithDefaultTimeout() + Expect(lmount.ExitCode()).To(Equal(0)) + Expect(lmount.OutputToString()).To(Equal("")) + + mount := podmanTest.Podman([]string{"mount", cid1, cid3}) + mount.WaitWithDefaultTimeout() + Expect(mount.ExitCode()).To(Equal(0)) + + lmount = podmanTest.Podman([]string{"mount", "--notruncate"}) + lmount.WaitWithDefaultTimeout() + Expect(lmount.ExitCode()).To(Equal(0)) + Expect(lmount.OutputToString()).To(ContainSubstring(cid1)) + Expect(lmount.OutputToString()).ToNot(ContainSubstring(cid2)) + Expect(lmount.OutputToString()).To(ContainSubstring(cid3)) + + umount := podmanTest.Podman([]string{"umount", cid1, cid3}) + umount.WaitWithDefaultTimeout() + Expect(umount.ExitCode()).To(Equal(0)) + + lmount = podmanTest.Podman([]string{"mount", "--notruncate"}) + lmount.WaitWithDefaultTimeout() + Expect(lmount.ExitCode()).To(Equal(0)) + Expect(lmount.OutputToString()).To(Equal("")) + + }) + + It("podman list mounted container", func() { + SkipIfRootless() + + setup := podmanTest.Podman([]string{"create", ALPINE, "ls"}) + setup.WaitWithDefaultTimeout() + Expect(setup.ExitCode()).To(Equal(0)) + cid := setup.OutputToString() + + lmount := podmanTest.Podman([]string{"mount", "--notruncate"}) + lmount.WaitWithDefaultTimeout() + Expect(lmount.ExitCode()).To(Equal(0)) + Expect(lmount.OutputToString()).To(Equal("")) + + mount := podmanTest.Podman([]string{"mount", cid}) + mount.WaitWithDefaultTimeout() + Expect(mount.ExitCode()).To(Equal(0)) + + lmount = podmanTest.Podman([]string{"mount", "--notruncate"}) + lmount.WaitWithDefaultTimeout() + Expect(lmount.ExitCode()).To(Equal(0)) + Expect(lmount.OutputToString()).To(ContainSubstring(cid)) + + umount := podmanTest.Podman([]string{"umount", cid}) + umount.WaitWithDefaultTimeout() + Expect(umount.ExitCode()).To(Equal(0)) + }) }) -- cgit v1.2.3-54-g00ecf