diff options
-rw-r--r-- | libpod/container_linux.go | 14 | ||||
-rw-r--r-- | libpod/networking_linux.go | 46 | ||||
-rw-r--r-- | pkg/netns/netns_linux.go | 161 |
3 files changed, 198 insertions, 23 deletions
diff --git a/libpod/container_linux.go b/libpod/container_linux.go index 2330f27a7..1b1b3a1d9 100644 --- a/libpod/container_linux.go +++ b/libpod/container_linux.go @@ -21,9 +21,10 @@ func (ctr *Container) setNamespace(netNSPath string, newState *containerState) e if ctr.state.NetNS != nil && netNSPath == ctr.state.NetNS.Path() { newState.NetNS = ctr.state.NetNS } else { - // Tear down the existing namespace - if err := ctr.runtime.teardownNetNS(ctr); err != nil { - logrus.Warnf(err.Error()) + // Close the existing namespace. + // Whoever removed it from the database already tore it down. + if err := ctr.runtime.closeNetNS(ctr); err != nil { + return err } // Open the new network namespace @@ -37,9 +38,10 @@ func (ctr *Container) setNamespace(netNSPath string, newState *containerState) e } } else { // The container no longer has a network namespace - // Tear down the old one - if err := ctr.runtime.teardownNetNS(ctr); err != nil { - logrus.Warnf(err.Error()) + // Close the old one, whoever removed it from the DB should have + // cleaned it up already. + if err := ctr.runtime.closeNetNS(ctr); err != nil { + return err } } return nil diff --git a/libpod/networking_linux.go b/libpod/networking_linux.go index d9eb87572..dbc68e04b 100644 --- a/libpod/networking_linux.go +++ b/libpod/networking_linux.go @@ -9,13 +9,13 @@ import ( "path/filepath" "strconv" "strings" - "syscall" cnitypes "github.com/containernetworking/cni/pkg/types/current" "github.com/containernetworking/plugins/pkg/ns" "github.com/cri-o/ocicni/pkg/ocicni" "github.com/pkg/errors" "github.com/projectatomic/libpod/pkg/inspect" + "github.com/projectatomic/libpod/pkg/netns" "github.com/projectatomic/libpod/utils" "github.com/sirupsen/logrus" "github.com/vishvananda/netlink" @@ -77,7 +77,7 @@ func (r *Runtime) configureNetNS(ctr *Container, ctrNS ns.NetNS) (err error) { // Create and configure a new network namespace for a container func (r *Runtime) createNetNS(ctr *Container) (err error) { - ctrNS, err := ns.NewNS() + ctrNS, err := netns.NewNS() if err != nil { return errors.Wrapf(err, "error creating network namespace for container %s", ctr.ID()) } @@ -148,7 +148,27 @@ func joinNetNS(path string) (ns.NetNS, error) { return ns, nil } -// Tear down a network namespace +// Close a network namespace. +// Differs from teardownNetNS() in that it will not attempt to undo the setup of +// the namespace, but will instead only close the open file descriptor +func (r *Runtime) closeNetNS(ctr *Container) error { + if ctr.state.NetNS == nil { + // The container has no network namespace, we're set + return nil + } + + if err := ctr.state.NetNS.Close(); err != nil { + return errors.Wrapf(err, "error closing network namespace for container %s", ctr.ID()) + } + + ctr.state.NetNS = nil + + return nil +} + +// Tear down a network namespace, undoing all state associated with it. +// The CNI firewall rules will be removed, the namespace will be unmounted, +// and the file descriptor associated with it closed. func (r *Runtime) teardownNetNS(ctr *Container) error { if ctr.state.NetNS == nil { // The container has no network namespace, we're set @@ -173,27 +193,19 @@ func (r *Runtime) teardownNetNS(ctr *Container) error { // The network may have already been torn down, so don't fail here, just log if err := r.netPlugin.TearDownPod(podNetwork); err != nil { - logrus.Errorf("Failed to tear down network namespace for container %s: %v", ctr.ID(), err) + return errors.Wrapf(err, "error tearing down CNI namespace configuration for container %s", ctr.ID()) } - nsPath := ctr.state.NetNS.Path() + // First unmount the namespace + if err := netns.UnmountNS(ctr.state.NetNS); err != nil { + return errors.Wrapf(err, "error unmounting network namespace for container %s", ctr.ID()) + } + // Now close the open file descriptor if err := ctr.state.NetNS.Close(); err != nil { return errors.Wrapf(err, "error closing network namespace for container %s", ctr.ID()) } - // We need to unconditionally try to unmount/remove the namespace - // because we may be in a separate process from the one that created the - // namespace, and Close() will only do that if it is the same process. - if err := unix.Unmount(nsPath, unix.MNT_DETACH); err != nil { - if err != syscall.EINVAL && err != syscall.ENOENT { - return errors.Wrapf(err, "error unmounting network namespace %s for container %s", nsPath, ctr.ID()) - } - } - if err := os.RemoveAll(nsPath); err != nil && !os.IsNotExist(err) { - return errors.Wrapf(err, "error removing network namespace %s for container %s", nsPath, ctr.ID()) - } - ctr.state.NetNS = nil return nil diff --git a/pkg/netns/netns_linux.go b/pkg/netns/netns_linux.go new file mode 100644 index 000000000..f88939f11 --- /dev/null +++ b/pkg/netns/netns_linux.go @@ -0,0 +1,161 @@ +// Copyright 2018 CNI authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file was originally a part of the containernetworking/plugins +// repository. +// It was copied here and modified for local use by the libpod maintainers. + +package netns + +import ( + "crypto/rand" + "fmt" + "os" + "path" + "runtime" + "strings" + "sync" + + "github.com/containernetworking/plugins/pkg/ns" + "golang.org/x/sys/unix" +) + +const nsRunDir = "/var/run/netns" + +// Creates a new persistent (bind-mounted) network namespace and returns an object +// representing that namespace, without switching to it. +func NewNS() (ns.NetNS, error) { + + b := make([]byte, 16) + _, err := rand.Reader.Read(b) + if err != nil { + return nil, fmt.Errorf("failed to generate random netns name: %v", err) + } + + // Create the directory for mounting network namespaces + // This needs to be a shared mountpoint in case it is mounted in to + // other namespaces (containers) + err = os.MkdirAll(nsRunDir, 0755) + if err != nil { + return nil, err + } + + // Remount the namespace directory shared. This will fail if it is not + // already a mountpoint, so bind-mount it on to itself to "upgrade" it + // to a mountpoint. + err = unix.Mount("", nsRunDir, "none", unix.MS_SHARED|unix.MS_REC, "") + if err != nil { + if err != unix.EINVAL { + return nil, fmt.Errorf("mount --make-rshared %s failed: %q", nsRunDir, err) + } + + // Recursively remount /var/run/netns on itself. The recursive flag is + // so that any existing netns bindmounts are carried over. + err = unix.Mount(nsRunDir, nsRunDir, "none", unix.MS_BIND|unix.MS_REC, "") + if err != nil { + return nil, fmt.Errorf("mount --rbind %s %s failed: %q", nsRunDir, nsRunDir, err) + } + + // Now we can make it shared + err = unix.Mount("", nsRunDir, "none", unix.MS_SHARED|unix.MS_REC, "") + if err != nil { + return nil, fmt.Errorf("mount --make-rshared %s failed: %q", nsRunDir, err) + } + + } + + nsName := fmt.Sprintf("cni-%x-%x-%x-%x-%x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:]) + + // create an empty file at the mount point + nsPath := path.Join(nsRunDir, nsName) + mountPointFd, err := os.Create(nsPath) + if err != nil { + return nil, err + } + mountPointFd.Close() + + // Ensure the mount point is cleaned up on errors; if the namespace + // was successfully mounted this will have no effect because the file + // is in-use + defer os.RemoveAll(nsPath) + + var wg sync.WaitGroup + wg.Add(1) + + // do namespace work in a dedicated goroutine, so that we can safely + // Lock/Unlock OSThread without upsetting the lock/unlock state of + // the caller of this function + go (func() { + defer wg.Done() + runtime.LockOSThread() + // Don't unlock. By not unlocking, golang will kill the OS thread when the + // goroutine is done (for go1.10+) + + var origNS ns.NetNS + origNS, err = ns.GetNS(getCurrentThreadNetNSPath()) + if err != nil { + return + } + defer origNS.Close() + + // create a new netns on the current thread + err = unix.Unshare(unix.CLONE_NEWNET) + if err != nil { + return + } + + // Put this thread back to the orig ns, since it might get reused (pre go1.10) + defer origNS.Set() + + // bind mount the netns from the current thread (from /proc) onto the + // mount point. This causes the namespace to persist, even when there + // are no threads in the ns. + err = unix.Mount(getCurrentThreadNetNSPath(), nsPath, "none", unix.MS_BIND, "") + if err != nil { + err = fmt.Errorf("failed to bind mount ns at %s: %v", nsPath, err) + } + })() + wg.Wait() + + if err != nil { + return nil, fmt.Errorf("failed to create namespace: %v", err) + } + + return ns.GetNS(nsPath) +} + +// UnmountNS unmounts the NS held by the netns object +func UnmountNS(ns ns.NetNS) error { + nsPath := ns.Path() + // Only unmount if it's been bind-mounted (don't touch namespaces in /proc...) + if strings.HasPrefix(nsPath, nsRunDir) { + if err := unix.Unmount(nsPath, unix.MNT_DETACH); err != nil { + return fmt.Errorf("failed to unmount NS: at %s: %v", nsPath, err) + } + + if err := os.Remove(nsPath); err != nil { + return fmt.Errorf("failed to remove ns path %s: %v", nsPath, err) + } + } + + return nil +} + +// getCurrentThreadNetNSPath copied from pkg/ns +func getCurrentThreadNetNSPath() string { + // /proc/self/ns/net returns the namespace of the main thread, not + // of whatever thread this goroutine is running on. Make sure we + // use the thread's net namespace since the thread is switching around + return fmt.Sprintf("/proc/%d/task/%d/ns/net", os.Getpid(), unix.Gettid()) +} |