summaryrefslogtreecommitdiff
path: root/libpod
diff options
context:
space:
mode:
authorAkihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>2019-11-28 23:33:42 +0900
committerAkihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>2020-01-08 19:35:17 +0900
commitda7595a69fc15d131c9d8123d0a165bdde4232b6 (patch)
tree57985f4d9fbc903610f31f3076011cd413d82fdf /libpod
parentc41fd09a8da3a96bc0e58f9f29f87b9bdf30264d (diff)
downloadpodman-da7595a69fc15d131c9d8123d0a165bdde4232b6.tar.gz
podman-da7595a69fc15d131c9d8123d0a165bdde4232b6.tar.bz2
podman-da7595a69fc15d131c9d8123d0a165bdde4232b6.zip
rootless: use RootlessKit port forwarder
RootlessKit port forwarder has a lot of advantages over the slirp4netns port forwarder: * Very high throughput. Benchmark result on Travis: socat: 5.2 Gbps, slirp4netns: 8.3 Gbps, RootlessKit: 27.3 Gbps (https://travis-ci.org/rootless-containers/rootlesskit/builds/597056377) * Connections from the host are treated as 127.0.0.1 rather than 10.0.2.2 in the namespace. No UDP issue (#4586) * No tcp_rmem issue (#4537) * Probably works with IPv6. Even if not, it is trivial to support IPv6. (#4311) * Easily extensible for future support of SCTP * Easily extensible for future support of `lxc-user-nic` SUID network RootlessKit port forwarder has been already adopted as the default port forwarder by Rootless Docker/Moby, and no issue has been reported AFAIK. As the port forwarder is imported as a Go package, no `rootlesskit` binary is required for Podman. Fix #4586 May-fix #4559 Fix #4537 May-fix #4311 See https://github.com/rootless-containers/rootlesskit/blob/v0.7.0/pkg/port/builtin/builtin.go Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
Diffstat (limited to 'libpod')
-rw-r--r--libpod/container.go3
-rw-r--r--libpod/networking_linux.go205
-rw-r--r--libpod/oci_conmon_linux.go9
3 files changed, 115 insertions, 102 deletions
diff --git a/libpod/container.go b/libpod/container.go
index 2693190b5..edf72f4ee 100644
--- a/libpod/container.go
+++ b/libpod/container.go
@@ -135,6 +135,9 @@ type Container struct {
rootlessSlirpSyncR *os.File
rootlessSlirpSyncW *os.File
+ rootlessPortSyncR *os.File
+ rootlessPortSyncW *os.File
+
// A restored container should have the same IP address as before
// being checkpointed. If requestedIP is set it will be used instead
// of config.StaticIP.
diff --git a/libpod/networking_linux.go b/libpod/networking_linux.go
index a68338dbb..06b3fe957 100644
--- a/libpod/networking_linux.go
+++ b/libpod/networking_linux.go
@@ -3,8 +3,10 @@
package libpod
import (
+ "bytes"
"crypto/rand"
"fmt"
+ "io"
"io/ioutil"
"net"
"os"
@@ -20,6 +22,7 @@ import (
"github.com/containers/libpod/pkg/errorhandling"
"github.com/containers/libpod/pkg/netns"
"github.com/containers/libpod/pkg/rootless"
+ "github.com/containers/libpod/pkg/rootlessport"
"github.com/cri-o/ocicni/pkg/ocicni"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
@@ -151,19 +154,6 @@ func (r *Runtime) createNetNS(ctr *Container) (n ns.NetNS, q []*cnitypes.Result,
return ctrNS, networkStatus, err
}
-type slirp4netnsCmdArg struct {
- Proto string `json:"proto,omitempty"`
- HostAddr string `json:"host_addr"`
- HostPort int32 `json:"host_port"`
- GuestAddr string `json:"guest_addr"`
- GuestPort int32 `json:"guest_port"`
-}
-
-type slirp4netnsCmd struct {
- Execute string `json:"execute"`
- Args slirp4netnsCmdArg `json:"arguments"`
-}
-
func checkSlirpFlags(path string) (bool, bool, bool, error) {
cmd := exec.Command(path, "--help")
out, err := cmd.CombinedOutput()
@@ -194,13 +184,9 @@ func (r *Runtime) setupRootlessNetNS(ctr *Container) (err error) {
defer errorhandling.CloseQuiet(syncW)
havePortMapping := len(ctr.Config().PortMappings) > 0
- apiSocket := filepath.Join(ctr.runtime.config.TmpDir, fmt.Sprintf("%s.net", ctr.config.ID))
logPath := filepath.Join(ctr.runtime.config.TmpDir, fmt.Sprintf("slirp4netns-%s.log", ctr.config.ID))
cmdArgs := []string{}
- if havePortMapping {
- cmdArgs = append(cmdArgs, "--api-socket", apiSocket)
- }
dhp, mtu, sandbox, err := checkSlirpFlags(path)
if err != nil {
return errors.Wrapf(err, "error checking slirp4netns binary %s: %q", path, err)
@@ -221,15 +207,19 @@ func (r *Runtime) setupRootlessNetNS(ctr *Container) (err error) {
// -e, --exit-fd=FD specify the FD for terminating slirp4netns
// -r, --ready-fd=FD specify the FD to write to when the initialization steps are finished
cmdArgs = append(cmdArgs, "-c", "-e", "3", "-r", "4")
+ netnsPath := ""
if !ctr.config.PostConfigureNetNS {
ctr.rootlessSlirpSyncR, ctr.rootlessSlirpSyncW, err = os.Pipe()
if err != nil {
return errors.Wrapf(err, "failed to create rootless network sync pipe")
}
- cmdArgs = append(cmdArgs, "--netns-type=path", ctr.state.NetNS.Path(), "tap0")
+ netnsPath = ctr.state.NetNS.Path()
+ cmdArgs = append(cmdArgs, "--netns-type=path", netnsPath, "tap0")
} else {
defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncR)
defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncW)
+ netnsPath = fmt.Sprintf("/proc/%d/ns/net", ctr.state.PID)
+ // we don't use --netns-path here (unavailable for slirp4netns < v0.4)
cmdArgs = append(cmdArgs, fmt.Sprintf("%d", ctr.state.PID), "tap0")
}
@@ -269,11 +259,27 @@ func (r *Runtime) setupRootlessNetNS(ctr *Container) (err error) {
}
}()
+ if err := waitForSync(syncR, cmd, logFile, 1*time.Second); err != nil {
+ return err
+ }
+
+ if havePortMapping {
+ return r.setupRootlessPortMapping(ctr, netnsPath)
+ }
+ return nil
+}
+
+func waitForSync(syncR *os.File, cmd *exec.Cmd, logFile io.ReadSeeker, timeout time.Duration) error {
+ prog := filepath.Base(cmd.Path)
+ if len(cmd.Args) > 0 {
+ prog = cmd.Args[0]
+ }
b := make([]byte, 16)
for {
- if err := syncR.SetDeadline(time.Now().Add(1 * time.Second)); err != nil {
- return errors.Wrapf(err, "error setting slirp4netns pipe timeout")
+ if err := syncR.SetDeadline(time.Now().Add(timeout)); err != nil {
+ return errors.Wrapf(err, "error setting %s pipe timeout", prog)
}
+ // FIXME: return err as soon as proc exits, without waiting for timeout
if _, err := syncR.Read(b); err == nil {
break
} else {
@@ -282,7 +288,7 @@ func (r *Runtime) setupRootlessNetNS(ctr *Container) (err error) {
var status syscall.WaitStatus
pid, err := syscall.Wait4(cmd.Process.Pid, &status, syscall.WNOHANG, nil)
if err != nil {
- return errors.Wrapf(err, "failed to read slirp4netns process status")
+ return errors.Wrapf(err, "failed to read %s process status", prog)
}
if pid != cmd.Process.Pid {
continue
@@ -294,100 +300,86 @@ func (r *Runtime) setupRootlessNetNS(ctr *Container) (err error) {
}
logContent, err := ioutil.ReadAll(logFile)
if err != nil {
- return errors.Wrapf(err, "slirp4netns failed")
+ return errors.Wrapf(err, "%s failed", prog)
}
- return errors.Errorf("slirp4netns failed: %q", logContent)
+ return errors.Errorf("%s failed: %q", prog, logContent)
}
if status.Signaled() {
- return errors.New("slirp4netns killed by signal")
+ return errors.Errorf("%s killed by signal", prog)
}
continue
}
- return errors.Wrapf(err, "failed to read from slirp4netns sync pipe")
+ return errors.Wrapf(err, "failed to read from %s sync pipe", prog)
}
}
+ return nil
+}
- if havePortMapping {
- const pidWaitTimeout = 60 * time.Second
- chWait := make(chan error)
- go func() {
- interval := 25 * time.Millisecond
- for i := time.Duration(0); i < pidWaitTimeout; i += interval {
- // Check if the process is still running.
- var status syscall.WaitStatus
- pid, err := syscall.Wait4(cmd.Process.Pid, &status, syscall.WNOHANG, nil)
- if err != nil {
- break
- }
- if pid != cmd.Process.Pid {
- continue
- }
- if status.Exited() || status.Signaled() {
- chWait <- fmt.Errorf("slirp4netns exited with status %d", status.ExitStatus())
- }
- time.Sleep(interval)
- }
- }()
- defer close(chWait)
+func (r *Runtime) setupRootlessPortMapping(ctr *Container, netnsPath string) (err error) {
+ syncR, syncW, err := os.Pipe()
+ if err != nil {
+ return errors.Wrapf(err, "failed to open pipe")
+ }
+ defer errorhandling.CloseQuiet(syncR)
+ defer errorhandling.CloseQuiet(syncW)
- // wait that API socket file appears before trying to use it.
- if _, err := WaitForFile(apiSocket, chWait, pidWaitTimeout); err != nil {
- return errors.Wrapf(err, "waiting for slirp4nets to create the api socket file %s", apiSocket)
- }
+ logPath := filepath.Join(ctr.runtime.config.TmpDir, fmt.Sprintf("rootlessport-%s.log", ctr.config.ID))
+ logFile, err := os.Create(logPath)
+ if err != nil {
+ return errors.Wrapf(err, "failed to open rootlessport log file %s", logPath)
+ }
+ defer logFile.Close()
+ // Unlink immediately the file so we won't need to worry about cleaning it up later.
+ // It is still accessible through the open fd logFile.
+ if err := os.Remove(logPath); err != nil {
+ return errors.Wrapf(err, "delete file %s", logPath)
+ }
- // for each port we want to add we need to open a connection to the slirp4netns control socket
- // and send the add_hostfwd command.
- for _, i := range ctr.config.PortMappings {
- conn, err := net.Dial("unix", apiSocket)
- if err != nil {
- return errors.Wrapf(err, "cannot open connection to %s", apiSocket)
- }
- defer func() {
- if err := conn.Close(); err != nil {
- logrus.Errorf("unable to close connection: %q", err)
- }
- }()
- hostIP := i.HostIP
- if hostIP == "" {
- hostIP = "0.0.0.0"
- }
- cmd := slirp4netnsCmd{
- Execute: "add_hostfwd",
- Args: slirp4netnsCmdArg{
- Proto: i.Protocol,
- HostAddr: hostIP,
- HostPort: i.HostPort,
- GuestPort: i.ContainerPort,
- },
- }
- // create the JSON payload and send it. Mark the end of request shutting down writes
- // to the socket, as requested by slirp4netns.
- data, err := json.Marshal(&cmd)
- if err != nil {
- return errors.Wrapf(err, "cannot marshal JSON for slirp4netns")
- }
- if _, err := conn.Write([]byte(fmt.Sprintf("%s\n", data))); err != nil {
- return errors.Wrapf(err, "cannot write to control socket %s", apiSocket)
- }
- if err := conn.(*net.UnixConn).CloseWrite(); err != nil {
- return errors.Wrapf(err, "cannot shutdown the socket %s", apiSocket)
- }
- buf := make([]byte, 2048)
- readLength, err := conn.Read(buf)
- if err != nil {
- return errors.Wrapf(err, "cannot read from control socket %s", apiSocket)
- }
- // if there is no 'error' key in the received JSON data, then the operation was
- // successful.
- var y map[string]interface{}
- if err := json.Unmarshal(buf[0:readLength], &y); err != nil {
- return errors.Wrapf(err, "error parsing error status from slirp4netns")
- }
- if e, found := y["error"]; found {
- return errors.Errorf("error from slirp4netns while setting up port redirection: %v", e)
- }
+ ctr.rootlessPortSyncR, ctr.rootlessPortSyncW, err = os.Pipe()
+ if err != nil {
+ return errors.Wrapf(err, "failed to create rootless port sync pipe")
+ }
+ cfg := rootlessport.Config{
+ Mappings: ctr.config.PortMappings,
+ NetNSPath: netnsPath,
+ ExitFD: 3,
+ ReadyFD: 4,
+ }
+ cfgJSON, err := json.Marshal(cfg)
+ if err != nil {
+ return err
+ }
+ cfgR := bytes.NewReader(cfgJSON)
+ var stdout bytes.Buffer
+ cmd := exec.Command(fmt.Sprintf("/proc/%d/exe", os.Getpid()))
+ cmd.Args = []string{rootlessport.ReexecKey}
+ // Leak one end of the pipe in rootlessport process, the other will be sent to conmon
+ cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessPortSyncR, syncW)
+ cmd.Stdin = cfgR
+ // stdout is for human-readable error, stderr is for debug log
+ cmd.Stdout = &stdout
+ cmd.Stderr = io.MultiWriter(logFile, &logrusDebugWriter{"rootlessport: "})
+ cmd.SysProcAttr = &syscall.SysProcAttr{
+ Setpgid: true,
+ }
+ if err := cmd.Start(); err != nil {
+ return errors.Wrapf(err, "failed to start rootlessport process")
+ }
+ defer func() {
+ if err := cmd.Process.Release(); err != nil {
+ logrus.Errorf("unable to release rootlessport process: %q", err)
}
+ }()
+ if err := waitForSync(syncR, cmd, logFile, 3*time.Second); err != nil {
+ stdoutStr := stdout.String()
+ if stdoutStr != "" {
+ // err contains full debug log and too verbose, so return stdoutStr
+ logrus.Debug(err)
+ return errors.Errorf("failed to expose ports via rootlessport: %q", stdoutStr)
+ }
+ return err
}
+ logrus.Debug("rootlessport is ready")
return nil
}
@@ -587,3 +579,12 @@ func (c *Container) getContainerNetworkInfo(data *InspectContainerData) *Inspect
}
return data
}
+
+type logrusDebugWriter struct {
+ prefix string
+}
+
+func (w *logrusDebugWriter) Write(p []byte) (int, error) {
+ logrus.Debugf("%s%s", w.prefix, string(p))
+ return len(p), nil
+}
diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go
index 37aa71cbb..ef881802c 100644
--- a/libpod/oci_conmon_linux.go
+++ b/libpod/oci_conmon_linux.go
@@ -1000,6 +1000,15 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co
}
// Leak one end in conmon, the other one will be leaked into slirp4netns
cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessSlirpSyncW)
+
+ if ctr.rootlessPortSyncR != nil {
+ defer errorhandling.CloseQuiet(ctr.rootlessPortSyncR)
+ }
+ if ctr.rootlessPortSyncW != nil {
+ defer errorhandling.CloseQuiet(ctr.rootlessPortSyncW)
+ // Leak one end in conmon, the other one will be leaked into rootlessport
+ cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessPortSyncW)
+ }
}
err = startCommandGivenSelinux(cmd)