From da7595a69fc15d131c9d8123d0a165bdde4232b6 Mon Sep 17 00:00:00 2001 From: Akihiro Suda Date: Thu, 28 Nov 2019 23:33:42 +0900 Subject: rootless: use RootlessKit port forwarder RootlessKit port forwarder has a lot of advantages over the slirp4netns port forwarder: * Very high throughput. Benchmark result on Travis: socat: 5.2 Gbps, slirp4netns: 8.3 Gbps, RootlessKit: 27.3 Gbps (https://travis-ci.org/rootless-containers/rootlesskit/builds/597056377) * Connections from the host are treated as 127.0.0.1 rather than 10.0.2.2 in the namespace. No UDP issue (#4586) * No tcp_rmem issue (#4537) * Probably works with IPv6. Even if not, it is trivial to support IPv6. (#4311) * Easily extensible for future support of SCTP * Easily extensible for future support of `lxc-user-nic` SUID network RootlessKit port forwarder has been already adopted as the default port forwarder by Rootless Docker/Moby, and no issue has been reported AFAIK. As the port forwarder is imported as a Go package, no `rootlesskit` binary is required for Podman. Fix #4586 May-fix #4559 Fix #4537 May-fix #4311 See https://github.com/rootless-containers/rootlesskit/blob/v0.7.0/pkg/port/builtin/builtin.go Signed-off-by: Akihiro Suda --- pkg/rootlessport/rootlessport_linux.go | 262 +++++++++++++++++++++++++++++++++ 1 file changed, 262 insertions(+) create mode 100644 pkg/rootlessport/rootlessport_linux.go (limited to 'pkg') diff --git a/pkg/rootlessport/rootlessport_linux.go b/pkg/rootlessport/rootlessport_linux.go new file mode 100644 index 000000000..655d1a448 --- /dev/null +++ b/pkg/rootlessport/rootlessport_linux.go @@ -0,0 +1,262 @@ +// +build linux + +// Package rootlessport provides reexec for RootlessKit-based port forwarder. +// +// init() contains reexec.Register() for ReexecKey . +// +// The reexec requires Config to be provided via stdin. +// +// The reexec writes human-readable error message on stdout on error. +// +// Debug log is printed on stderr. +package rootlessport + +import ( + "context" + "encoding/json" + "fmt" + "io" + "io/ioutil" + "os" + "os/exec" + "syscall" + + "github.com/containernetworking/plugins/pkg/ns" + "github.com/containers/storage/pkg/reexec" + "github.com/cri-o/ocicni/pkg/ocicni" + "github.com/pkg/errors" + rkport "github.com/rootless-containers/rootlesskit/pkg/port" + rkbuiltin "github.com/rootless-containers/rootlesskit/pkg/port/builtin" + rkportutil "github.com/rootless-containers/rootlesskit/pkg/port/portutil" + "github.com/sirupsen/logrus" +) + +const ( + // ReexecKey is the reexec key for the parent process. + ReexecKey = "containers-rootlessport" + // reexecChildKey is used internally for the second reexec + reexecChildKey = "containers-rootlessport-child" + reexecChildEnvOpaque = "_CONTAINERS_ROOTLESSPORT_CHILD_OPAQUE" +) + +// Config needs to be provided to the process via stdin as a JSON string. +// stdin needs to be closed after the message has been written. +type Config struct { + Mappings []ocicni.PortMapping + NetNSPath string + ExitFD int + ReadyFD int +} + +func init() { + reexec.Register(ReexecKey, func() { + if err := parent(); err != nil { + fmt.Println(err) + os.Exit(1) + } + }) + reexec.Register(reexecChildKey, func() { + if err := child(); err != nil { + fmt.Println(err) + os.Exit(1) + } + }) + +} + +func loadConfig(r io.Reader) (*Config, io.ReadCloser, io.WriteCloser, error) { + stdin, err := ioutil.ReadAll(r) + if err != nil { + return nil, nil, nil, err + } + var cfg Config + if err := json.Unmarshal(stdin, &cfg); err != nil { + return nil, nil, nil, err + } + if cfg.NetNSPath == "" { + return nil, nil, nil, errors.New("missing NetNSPath") + } + if cfg.ExitFD <= 0 { + return nil, nil, nil, errors.New("missing ExitFD") + } + exitFile := os.NewFile(uintptr(cfg.ExitFD), "exitfile") + if exitFile == nil { + return nil, nil, nil, errors.New("invalid ExitFD") + } + if cfg.ReadyFD <= 0 { + return nil, nil, nil, errors.New("missing ReadyFD") + } + readyFile := os.NewFile(uintptr(cfg.ReadyFD), "readyfile") + if readyFile == nil { + return nil, nil, nil, errors.New("invalid ReadyFD") + } + return &cfg, exitFile, readyFile, nil +} + +func parent() error { + // load config from stdin + cfg, exitR, readyW, err := loadConfig(os.Stdin) + if err != nil { + return err + } + + // create the parent driver + stateDir, err := ioutil.TempDir("", "rootlessport") + if err != nil { + return err + } + driver, err := rkbuiltin.NewParentDriver(&logrusWriter{prefix: "parent: "}, stateDir) + if err != nil { + return err + } + initComplete := make(chan struct{}) + quit := make(chan struct{}) + errCh := make(chan error) + // start the parent driver. initComplete will be closed when the child connected to the parent. + logrus.Infof("starting parent driver") + go func() { + driverErr := driver.RunParentDriver(initComplete, quit, nil) + if driverErr != nil { + logrus.WithError(driverErr).Warn("parent driver exited") + } + errCh <- driverErr + }() + opaque := driver.OpaqueForChild() + logrus.Infof("opaque=%+v", opaque) + opaqueJSON, err := json.Marshal(opaque) + if err != nil { + return err + } + childQuitR, childQuitW, err := os.Pipe() + if err != nil { + return err + } + defer func() { + // stop the child + logrus.Info("stopping child driver") + if err := childQuitW.Close(); err != nil { + logrus.WithError(err).Warn("unable to close childQuitW") + } + }() + + // reexec the child process in the child netns + cmd := exec.Command(fmt.Sprintf("/proc/%d/exe", os.Getpid())) + cmd.Args = []string{reexecChildKey} + cmd.Stdin = childQuitR + cmd.Stdout = &logrusWriter{prefix: "child"} + cmd.Stderr = cmd.Stdout + cmd.Env = append(os.Environ(), reexecChildEnvOpaque+"="+string(opaqueJSON)) + cmd.SysProcAttr = &syscall.SysProcAttr{ + Pdeathsig: syscall.SIGTERM, + } + childNS, err := ns.GetNS(cfg.NetNSPath) + if err != nil { + return err + } + if err := childNS.Do(func(_ ns.NetNS) error { + logrus.Infof("starting child driver in child netns (%q %v)", cmd.Path, cmd.Args) + return cmd.Start() + }); err != nil { + return err + } + + logrus.Info("waiting for initComplete") + // wait for the child to connect to the parent + select { + case <-initComplete: + logrus.Infof("initComplete is closed; parent and child established the communication channel") + case err := <-errCh: + return err + } + defer func() { + logrus.Info("stopping parent driver") + quit <- struct{}{} + if err := <-errCh; err != nil { + logrus.WithError(err).Warn("parent driver returned error on exit") + } + }() + + // let parent expose ports + logrus.Infof("exposing ports %v", cfg.Mappings) + if err := exposePorts(driver, cfg.Mappings); err != nil { + return err + } + + // write and close ReadyFD (convention is same as slirp4netns --ready-fd) + logrus.Info("ready") + if _, err := readyW.Write([]byte("1")); err != nil { + return err + } + if err := readyW.Close(); err != nil { + return err + } + + // wait for ExitFD to be closed + logrus.Info("waiting for exitfd to be closed") + if _, err := ioutil.ReadAll(exitR); err != nil { + return err + } + return nil +} + +func exposePorts(pm rkport.Manager, portMappings []ocicni.PortMapping) error { + ctx := context.TODO() + for _, i := range portMappings { + hostIP := i.HostIP + if hostIP == "" { + hostIP = "0.0.0.0" + } + spec := rkport.Spec{ + Proto: i.Protocol, + ParentIP: hostIP, + ParentPort: int(i.HostPort), + ChildPort: int(i.ContainerPort), + } + if err := rkportutil.ValidatePortSpec(spec, nil); err != nil { + return err + } + if _, err := pm.AddPort(ctx, spec); err != nil { + return err + } + } + return nil +} + +func child() error { + // load the config from the parent + var opaque map[string]string + if err := json.Unmarshal([]byte(os.Getenv(reexecChildEnvOpaque)), &opaque); err != nil { + return err + } + + // start the child driver + quit := make(chan struct{}) + errCh := make(chan error) + go func() { + d := rkbuiltin.NewChildDriver(os.Stderr) + dErr := d.RunChildDriver(opaque, quit) + errCh <- dErr + }() + defer func() { + logrus.Info("stopping child driver") + quit <- struct{}{} + if err := <-errCh; err != nil { + logrus.WithError(err).Warn("child driver returned error on exit") + } + }() + + // wait for stdin to be closed + if _, err := ioutil.ReadAll(os.Stdin); err != nil { + return err + } + return nil +} + +type logrusWriter struct { + prefix string +} + +func (w *logrusWriter) Write(p []byte) (int, error) { + logrus.Infof("%s%s", w.prefix, string(p)) + return len(p), nil +} -- cgit v1.2.3-54-g00ecf