summaryrefslogtreecommitdiff
path: root/vendor/github.com/rootless-containers/rootlesskit/pkg/port/builtin/parent
diff options
context:
space:
mode:
authorAkihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>2019-11-28 23:33:42 +0900
committerAkihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>2020-01-08 19:35:17 +0900
commitda7595a69fc15d131c9d8123d0a165bdde4232b6 (patch)
tree57985f4d9fbc903610f31f3076011cd413d82fdf /vendor/github.com/rootless-containers/rootlesskit/pkg/port/builtin/parent
parentc41fd09a8da3a96bc0e58f9f29f87b9bdf30264d (diff)
downloadpodman-da7595a69fc15d131c9d8123d0a165bdde4232b6.tar.gz
podman-da7595a69fc15d131c9d8123d0a165bdde4232b6.tar.bz2
podman-da7595a69fc15d131c9d8123d0a165bdde4232b6.zip
rootless: use RootlessKit port forwarder
RootlessKit port forwarder has a lot of advantages over the slirp4netns port forwarder: * Very high throughput. Benchmark result on Travis: socat: 5.2 Gbps, slirp4netns: 8.3 Gbps, RootlessKit: 27.3 Gbps (https://travis-ci.org/rootless-containers/rootlesskit/builds/597056377) * Connections from the host are treated as 127.0.0.1 rather than 10.0.2.2 in the namespace. No UDP issue (#4586) * No tcp_rmem issue (#4537) * Probably works with IPv6. Even if not, it is trivial to support IPv6. (#4311) * Easily extensible for future support of SCTP * Easily extensible for future support of `lxc-user-nic` SUID network RootlessKit port forwarder has been already adopted as the default port forwarder by Rootless Docker/Moby, and no issue has been reported AFAIK. As the port forwarder is imported as a Go package, no `rootlesskit` binary is required for Podman. Fix #4586 May-fix #4559 Fix #4537 May-fix #4311 See https://github.com/rootless-containers/rootlesskit/blob/v0.7.0/pkg/port/builtin/builtin.go Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
Diffstat (limited to 'vendor/github.com/rootless-containers/rootlesskit/pkg/port/builtin/parent')
-rw-r--r--vendor/github.com/rootless-containers/rootlesskit/pkg/port/builtin/parent/parent.go145
-rw-r--r--vendor/github.com/rootless-containers/rootlesskit/pkg/port/builtin/parent/tcp/tcp.go104
-rw-r--r--vendor/github.com/rootless-containers/rootlesskit/pkg/port/builtin/parent/udp/udp.go60
-rw-r--r--vendor/github.com/rootless-containers/rootlesskit/pkg/port/builtin/parent/udp/udpproxy/udp_proxy.go150
4 files changed, 459 insertions, 0 deletions
diff --git a/vendor/github.com/rootless-containers/rootlesskit/pkg/port/builtin/parent/parent.go b/vendor/github.com/rootless-containers/rootlesskit/pkg/port/builtin/parent/parent.go
new file mode 100644
index 000000000..893bf1da9
--- /dev/null
+++ b/vendor/github.com/rootless-containers/rootlesskit/pkg/port/builtin/parent/parent.go
@@ -0,0 +1,145 @@
+package parent
+
+import (
+ "context"
+ "io"
+ "io/ioutil"
+ "net"
+ "os"
+ "path/filepath"
+ "sync"
+ "syscall"
+
+ "github.com/pkg/errors"
+
+ "github.com/rootless-containers/rootlesskit/pkg/port"
+ "github.com/rootless-containers/rootlesskit/pkg/port/builtin/msg"
+ "github.com/rootless-containers/rootlesskit/pkg/port/builtin/opaque"
+ "github.com/rootless-containers/rootlesskit/pkg/port/builtin/parent/tcp"
+ "github.com/rootless-containers/rootlesskit/pkg/port/builtin/parent/udp"
+ "github.com/rootless-containers/rootlesskit/pkg/port/portutil"
+)
+
+// NewDriver for builtin driver.
+func NewDriver(logWriter io.Writer, stateDir string) (port.ParentDriver, error) {
+ // TODO: consider using socketpair FD instead of socket file
+ socketPath := filepath.Join(stateDir, ".bp.sock")
+ childReadyPipePath := filepath.Join(stateDir, ".bp-ready.pipe")
+ // remove the path just in case the previous rootlesskit instance crashed
+ if err := os.RemoveAll(childReadyPipePath); err != nil {
+ return nil, errors.Wrapf(err, "cannot remove %s", childReadyPipePath)
+ }
+ if err := syscall.Mkfifo(childReadyPipePath, 0600); err != nil {
+ return nil, errors.Wrapf(err, "cannot mkfifo %s", childReadyPipePath)
+ }
+ d := driver{
+ logWriter: logWriter,
+ socketPath: socketPath,
+ childReadyPipePath: childReadyPipePath,
+ ports: make(map[int]*port.Status, 0),
+ stoppers: make(map[int]func() error, 0),
+ nextID: 1,
+ }
+ return &d, nil
+}
+
+type driver struct {
+ logWriter io.Writer
+ socketPath string
+ childReadyPipePath string
+ mu sync.Mutex
+ ports map[int]*port.Status
+ stoppers map[int]func() error
+ nextID int
+}
+
+func (d *driver) OpaqueForChild() map[string]string {
+ return map[string]string{
+ opaque.SocketPath: d.socketPath,
+ opaque.ChildReadyPipePath: d.childReadyPipePath,
+ }
+}
+
+func (d *driver) RunParentDriver(initComplete chan struct{}, quit <-chan struct{}, _ *port.ChildContext) error {
+ childReadyPipeR, err := os.OpenFile(d.childReadyPipePath, os.O_RDONLY, os.ModeNamedPipe)
+ if err != nil {
+ return err
+ }
+ if _, err = ioutil.ReadAll(childReadyPipeR); err != nil {
+ return err
+ }
+ childReadyPipeR.Close()
+ var dialer net.Dialer
+ conn, err := dialer.Dial("unix", d.socketPath)
+ if err != nil {
+ return err
+ }
+ err = msg.Initiate(conn.(*net.UnixConn))
+ conn.Close()
+ if err != nil {
+ return err
+ }
+ initComplete <- struct{}{}
+ <-quit
+ return nil
+}
+
+func (d *driver) AddPort(ctx context.Context, spec port.Spec) (*port.Status, error) {
+ d.mu.Lock()
+ err := portutil.ValidatePortSpec(spec, d.ports)
+ d.mu.Unlock()
+ if err != nil {
+ return nil, err
+ }
+ routineStopCh := make(chan struct{})
+ routineStop := func() error {
+ close(routineStopCh)
+ return nil // FIXME
+ }
+ switch spec.Proto {
+ case "tcp":
+ err = tcp.Run(d.socketPath, spec, routineStopCh, d.logWriter)
+ case "udp":
+ err = udp.Run(d.socketPath, spec, routineStopCh, d.logWriter)
+ default:
+ // NOTREACHED
+ return nil, errors.New("spec was not validated?")
+ }
+ if err != nil {
+ return nil, err
+ }
+ d.mu.Lock()
+ id := d.nextID
+ st := port.Status{
+ ID: id,
+ Spec: spec,
+ }
+ d.ports[id] = &st
+ d.stoppers[id] = routineStop
+ d.nextID++
+ d.mu.Unlock()
+ return &st, nil
+}
+
+func (d *driver) ListPorts(ctx context.Context) ([]port.Status, error) {
+ var ports []port.Status
+ d.mu.Lock()
+ for _, p := range d.ports {
+ ports = append(ports, *p)
+ }
+ d.mu.Unlock()
+ return ports, nil
+}
+
+func (d *driver) RemovePort(ctx context.Context, id int) error {
+ d.mu.Lock()
+ defer d.mu.Unlock()
+ stop, ok := d.stoppers[id]
+ if !ok {
+ return errors.Errorf("unknown id: %d", id)
+ }
+ err := stop()
+ delete(d.stoppers, id)
+ delete(d.ports, id)
+ return err
+}
diff --git a/vendor/github.com/rootless-containers/rootlesskit/pkg/port/builtin/parent/tcp/tcp.go b/vendor/github.com/rootless-containers/rootlesskit/pkg/port/builtin/parent/tcp/tcp.go
new file mode 100644
index 000000000..b9f2d1802
--- /dev/null
+++ b/vendor/github.com/rootless-containers/rootlesskit/pkg/port/builtin/parent/tcp/tcp.go
@@ -0,0 +1,104 @@
+package tcp
+
+import (
+ "fmt"
+ "io"
+ "net"
+ "os"
+ "sync"
+
+ "github.com/rootless-containers/rootlesskit/pkg/port"
+ "github.com/rootless-containers/rootlesskit/pkg/port/builtin/msg"
+)
+
+func Run(socketPath string, spec port.Spec, stopCh <-chan struct{}, logWriter io.Writer) error {
+ ln, err := net.Listen("tcp", fmt.Sprintf("%s:%d", spec.ParentIP, spec.ParentPort))
+ if err != nil {
+ fmt.Fprintf(logWriter, "listen: %v\n", err)
+ return err
+ }
+ newConns := make(chan net.Conn)
+ go func() {
+ for {
+ c, err := ln.Accept()
+ if err != nil {
+ fmt.Fprintf(logWriter, "accept: %v\n", err)
+ close(newConns)
+ return
+ }
+ newConns <- c
+ }
+ }()
+ go func() {
+ defer ln.Close()
+ for {
+ select {
+ case c, ok := <-newConns:
+ if !ok {
+ return
+ }
+ go func() {
+ if err := copyConnToChild(c, socketPath, spec, stopCh); err != nil {
+ fmt.Fprintf(logWriter, "copyConnToChild: %v\n", err)
+ return
+ }
+ }()
+ case <-stopCh:
+ return
+ }
+ }
+ }()
+ // no wait
+ return nil
+}
+
+func copyConnToChild(c net.Conn, socketPath string, spec port.Spec, stopCh <-chan struct{}) error {
+ defer c.Close()
+ // get fd from the child as an SCM_RIGHTS cmsg
+ fd, err := msg.ConnectToChildWithRetry(socketPath, spec, 10)
+ if err != nil {
+ return err
+ }
+ f := os.NewFile(uintptr(fd), "")
+ defer f.Close()
+ fc, err := net.FileConn(f)
+ if err != nil {
+ return err
+ }
+ defer fc.Close()
+ bicopy(c, fc, stopCh)
+ return nil
+}
+
+// bicopy is based on libnetwork/cmd/proxy/tcp_proxy.go .
+// NOTE: sendfile(2) cannot be used for sockets
+func bicopy(x, y net.Conn, quit <-chan struct{}) {
+ var wg sync.WaitGroup
+ var broker = func(to, from net.Conn) {
+ io.Copy(to, from)
+ if fromTCP, ok := from.(*net.TCPConn); ok {
+ fromTCP.CloseRead()
+ }
+ if toTCP, ok := to.(*net.TCPConn); ok {
+ toTCP.CloseWrite()
+ }
+ wg.Done()
+ }
+
+ wg.Add(2)
+ go broker(x, y)
+ go broker(y, x)
+ finish := make(chan struct{})
+ go func() {
+ wg.Wait()
+ close(finish)
+ }()
+
+ select {
+ case <-quit:
+ case <-finish:
+ }
+ x.Close()
+ y.Close()
+ <-finish
+}
diff --git a/vendor/github.com/rootless-containers/rootlesskit/pkg/port/builtin/parent/udp/udp.go b/vendor/github.com/rootless-containers/rootlesskit/pkg/port/builtin/parent/udp/udp.go
new file mode 100644
index 000000000..d8f646b5d
--- /dev/null
+++ b/vendor/github.com/rootless-containers/rootlesskit/pkg/port/builtin/parent/udp/udp.go
@@ -0,0 +1,60 @@
+package udp
+
+import (
+ "fmt"
+ "io"
+ "net"
+ "os"
+
+ "github.com/pkg/errors"
+
+ "github.com/rootless-containers/rootlesskit/pkg/port"
+ "github.com/rootless-containers/rootlesskit/pkg/port/builtin/msg"
+ "github.com/rootless-containers/rootlesskit/pkg/port/builtin/parent/udp/udpproxy"
+)
+
+func Run(socketPath string, spec port.Spec, stopCh <-chan struct{}, logWriter io.Writer) error {
+ addr, err := net.ResolveUDPAddr("udp", fmt.Sprintf("%s:%d", spec.ParentIP, spec.ParentPort))
+ if err != nil {
+ return err
+ }
+ c, err := net.ListenUDP("udp", addr)
+ if err != nil {
+ return err
+ }
+ udpp := &udpproxy.UDPProxy{
+ LogWriter: logWriter,
+ Listener: c,
+ BackendDial: func() (*net.UDPConn, error) {
+ // get fd from the child as an SCM_RIGHTS cmsg
+ fd, err := msg.ConnectToChildWithRetry(socketPath, spec, 10)
+ if err != nil {
+ return nil, err
+ }
+ f := os.NewFile(uintptr(fd), "")
+ defer f.Close()
+ fc, err := net.FileConn(f)
+ if err != nil {
+ return nil, err
+ }
+ uc, ok := fc.(*net.UDPConn)
+ if !ok {
+ return nil, errors.Errorf("file conn doesn't implement *net.UDPConn: %+v", fc)
+ }
+ return uc, nil
+ },
+ }
+ go udpp.Run()
+ go func() {
+ for {
+ select {
+ case <-stopCh:
+ // udpp.Close closes ln as well
+ udpp.Close()
+ return
+ }
+ }
+ }()
+ // no wait
+ return nil
+}
diff --git a/vendor/github.com/rootless-containers/rootlesskit/pkg/port/builtin/parent/udp/udpproxy/udp_proxy.go b/vendor/github.com/rootless-containers/rootlesskit/pkg/port/builtin/parent/udp/udpproxy/udp_proxy.go
new file mode 100644
index 000000000..af7b7d5d9
--- /dev/null
+++ b/vendor/github.com/rootless-containers/rootlesskit/pkg/port/builtin/parent/udp/udpproxy/udp_proxy.go
@@ -0,0 +1,150 @@
+// Package udpproxy is from https://raw.githubusercontent.com/docker/libnetwork/fec6476dfa21380bf8ee4d74048515d968c1ee63/cmd/proxy/udp_proxy.go
+package udpproxy
+
+import (
+ "encoding/binary"
+ "fmt"
+ "io"
+ "net"
+ "strings"
+ "sync"
+ "syscall"
+ "time"
+)
+
+const (
+ // UDPConnTrackTimeout is the timeout used for UDP connection tracking
+ UDPConnTrackTimeout = 90 * time.Second
+ // UDPBufSize is the buffer size for the UDP proxy
+ UDPBufSize = 65507
+)
+
+// A net.Addr where the IP is split into two fields so you can use it as a key
+// in a map:
+type connTrackKey struct {
+ IPHigh uint64
+ IPLow uint64
+ Port int
+}
+
+func newConnTrackKey(addr *net.UDPAddr) *connTrackKey {
+ if len(addr.IP) == net.IPv4len {
+ return &connTrackKey{
+ IPHigh: 0,
+ IPLow: uint64(binary.BigEndian.Uint32(addr.IP)),
+ Port: addr.Port,
+ }
+ }
+ return &connTrackKey{
+ IPHigh: binary.BigEndian.Uint64(addr.IP[:8]),
+ IPLow: binary.BigEndian.Uint64(addr.IP[8:]),
+ Port: addr.Port,
+ }
+}
+
+type connTrackMap map[connTrackKey]*net.UDPConn
+
+// UDPProxy is proxy for which handles UDP datagrams.
+// From libnetwork udp_proxy.go .
+type UDPProxy struct {
+ LogWriter io.Writer
+ Listener *net.UDPConn
+ BackendDial func() (*net.UDPConn, error)
+ connTrackTable connTrackMap
+ connTrackLock sync.Mutex
+}
+
+func (proxy *UDPProxy) replyLoop(proxyConn *net.UDPConn, clientAddr *net.UDPAddr, clientKey *connTrackKey) {
+ defer func() {
+ proxy.connTrackLock.Lock()
+ delete(proxy.connTrackTable, *clientKey)
+ proxy.connTrackLock.Unlock()
+ proxyConn.Close()
+ }()
+
+ readBuf := make([]byte, UDPBufSize)
+ for {
+ proxyConn.SetReadDeadline(time.Now().Add(UDPConnTrackTimeout))
+ again:
+ read, err := proxyConn.Read(readBuf)
+ if err != nil {
+ if err, ok := err.(*net.OpError); ok && err.Err == syscall.ECONNREFUSED {
+ // This will happen if the last write failed
+ // (e.g: nothing is actually listening on the
+ // proxied port on the container), ignore it
+ // and continue until UDPConnTrackTimeout
+ // expires:
+ goto again
+ }
+ return
+ }
+ for i := 0; i != read; {
+ written, err := proxy.Listener.WriteToUDP(readBuf[i:read], clientAddr)
+ if err != nil {
+ return
+ }
+ i += written
+ }
+ }
+}
+
+// Run starts forwarding the traffic using UDP.
+func (proxy *UDPProxy) Run() {
+ proxy.connTrackTable = make(connTrackMap)
+ readBuf := make([]byte, UDPBufSize)
+ for {
+ read, from, err := proxy.Listener.ReadFromUDP(readBuf)
+ if err != nil {
+ // NOTE: Apparently ReadFrom doesn't return
+ // ECONNREFUSED like Read do (see comment in
+ // UDPProxy.replyLoop)
+ if !isClosedError(err) {
+ fmt.Fprintf(proxy.LogWriter, "Stopping proxy on udp: %v\n", err)
+ }
+ break
+ }
+
+ fromKey := newConnTrackKey(from)
+ proxy.connTrackLock.Lock()
+ proxyConn, hit := proxy.connTrackTable[*fromKey]
+ if !hit {
+ proxyConn, err = proxy.BackendDial()
+ if err != nil {
+ fmt.Fprintf(proxy.LogWriter, "Can't proxy a datagram to udp: %v\n", err)
+ proxy.connTrackLock.Unlock()
+ continue
+ }
+ proxy.connTrackTable[*fromKey] = proxyConn
+ go proxy.replyLoop(proxyConn, from, fromKey)
+ }
+ proxy.connTrackLock.Unlock()
+ for i := 0; i != read; {
+ written, err := proxyConn.Write(readBuf[i:read])
+ if err != nil {
+ fmt.Fprintf(proxy.LogWriter, "Can't proxy a datagram to udp: %v\n", err)
+ break
+ }
+ i += written
+ }
+ }
+}
+
+// Close stops forwarding the traffic.
+func (proxy *UDPProxy) Close() {
+ proxy.Listener.Close()
+ proxy.connTrackLock.Lock()
+ defer proxy.connTrackLock.Unlock()
+ for _, conn := range proxy.connTrackTable {
+ conn.Close()
+ }
+}
+
+func isClosedError(err error) bool {
+ /* This comparison is ugly, but unfortunately, net.go doesn't export errClosing.
+ * See:
+ * http://golang.org/src/pkg/net/net.go
+ * https://code.google.com/p/go/issues/detail?id=4337
+ * https://groups.google.com/forum/#!msg/golang-nuts/0_aaCvBmOcM/SptmDyX1XJMJ
+ */
+ return strings.HasSuffix(err.Error(), "use of closed network connection")
+}