summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Holzinger <pholzing@redhat.com>2021-09-02 14:14:59 +0200
committerPaul Holzinger <pholzing@redhat.com>2021-10-12 21:43:11 +0200
commit3ba69dccf78194792a4b0156db8c69417b20a713 (patch)
tree325afc80df95c0b5eb49b74d175cdd0ba1a480ff
parentc90beedbe160eb6e8094b492091231f3c5838006 (diff)
downloadpodman-3ba69dccf78194792a4b0156db8c69417b20a713.tar.gz
podman-3ba69dccf78194792a4b0156db8c69417b20a713.tar.bz2
podman-3ba69dccf78194792a4b0156db8c69417b20a713.zip
rootlessport: reduce memory usage of the process
Don't use reexec for the rootlessport process, instead make it a separate binary to reduce the memory usage. The problem with reexec is that it will import all packages that podman uses and therefore loads a lot of stuff into the heap. The rootlessport process however only needs the rootlesskit library. The memory usage is a concern since the rootlessport process will spawn two process per container which has ports forwarded. The processes stay until the container dies. On my laptop the current reexec version uses 47800 KB RSS. The new separate binary only uses 4540 KB RSS. This is more than a 90% improvement. The Makefile has been updated to compile the new binary and install it to the libexec directory. Fixes #10790 [NO TESTS NEEDED] Signed-off-by: Paul Holzinger <pholzing@redhat.com>
-rw-r--r--Makefile17
-rw-r--r--cmd/rootlessport/main.go353
-rwxr-xr-xcontrib/cirrus/setup_environment.sh12
-rw-r--r--contrib/spec/podman.spec.in12
-rw-r--r--libpod/networking_slirp4netns.go10
-rw-r--r--nix/default-arm64.nix2
-rw-r--r--nix/default.nix2
-rw-r--r--pkg/rootlessport/rootlessport_linux.go347
8 files changed, 394 insertions, 361 deletions
diff --git a/Makefile b/Makefile
index 8af9cfd1b..7c8c6f1c5 100644
--- a/Makefile
+++ b/Makefile
@@ -39,6 +39,7 @@ LIBPOD_INSTANCE := libpod_dev
PREFIX ?= /usr/local
BINDIR ?= ${PREFIX}/bin
LIBEXECDIR ?= ${PREFIX}/libexec
+LIBEXECPODMAN ?= ${LIBEXECDIR}/podman
MANDIR ?= ${PREFIX}/share/man
SHAREDIR_CONTAINERS ?= ${PREFIX}/share/containers
ETCDIR ?= ${PREFIX}/etc
@@ -186,7 +187,7 @@ default: all
all: binaries docs
.PHONY: binaries
-binaries: podman podman-remote ## Build podman and podman-remote binaries
+binaries: podman podman-remote rootlessport ## Build podman, podman-remote and rootlessport binaries
# Extract text following double-# for targets, as their description for
# the `help` target. Otherwise These simple-substitutions are resolved
@@ -355,6 +356,15 @@ podman-remote-darwin: ## Build podman-remote for macOS
GOARCH=$(GOARCH) \
bin/darwin/podman
+bin/rootlessport: .gopathok $(SOURCES) go.mod go.sum
+ CGO_ENABLED=$(CGO_ENABLED) \
+ $(GO) build \
+ $(BUILDFLAGS) \
+ -o $@ ./cmd/rootlessport
+
+.PHONY: rootlessport
+rootlessport: bin/rootlessport
+
###
### Secondary binary-build targets
###
@@ -718,11 +728,14 @@ install.bin-nobuild:
install ${SELINUXOPT} -d -m 755 $(DESTDIR)$(BINDIR)
install ${SELINUXOPT} -m 755 bin/podman $(DESTDIR)$(BINDIR)/podman
test -z "${SELINUXOPT}" || chcon --verbose --reference=$(DESTDIR)$(BINDIR)/podman bin/podman
+ install ${SELINUXOPT} -d -m 755 $(DESTDIR)$(LIBEXECPODMAN)
+ install ${SELINUXOPT} -m 755 bin/rootlessport $(DESTDIR)$(LIBEXECPODMAN)/rootlessport
+ test -z "${SELINUXOPT}" || chcon --verbose --reference=$(DESTDIR)$(LIBEXECPODMAN)/rootlessport bin/rootlessport
install ${SELINUXOPT} -m 755 -d ${DESTDIR}${TMPFILESDIR}
install ${SELINUXOPT} -m 644 contrib/tmpfile/podman.conf ${DESTDIR}${TMPFILESDIR}/podman.conf
.PHONY: install.bin
-install.bin: podman install.bin-nobuild
+install.bin: podman rootlessport install.bin-nobuild
.PHONY: install.man-nobuild
install.man-nobuild:
diff --git a/cmd/rootlessport/main.go b/cmd/rootlessport/main.go
new file mode 100644
index 000000000..feb9f5c06
--- /dev/null
+++ b/cmd/rootlessport/main.go
@@ -0,0 +1,353 @@
+package main
+
+import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "net"
+ "os"
+ "os/exec"
+ "path/filepath"
+
+ "github.com/containernetworking/plugins/pkg/ns"
+ "github.com/containers/podman/v3/libpod/network/types"
+ "github.com/containers/podman/v3/pkg/rootlessport"
+ "github.com/pkg/errors"
+ rkport "github.com/rootless-containers/rootlesskit/pkg/port"
+ rkbuiltin "github.com/rootless-containers/rootlesskit/pkg/port/builtin"
+ rkportutil "github.com/rootless-containers/rootlesskit/pkg/port/portutil"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/sys/unix"
+)
+
+const (
+ // ReexecChildKey is used internally for the second reexec
+ ReexecChildKey = "rootlessport-child"
+ reexecChildEnvOpaque = "_CONTAINERS_ROOTLESSPORT_CHILD_OPAQUE"
+)
+
+func main() {
+ if len(os.Args) > 1 {
+ fmt.Fprintln(os.Stderr, `too many arguments, rootlessport expects a json config via STDIN`)
+ os.Exit(1)
+ }
+ var err error
+ if os.Args[0] == ReexecChildKey {
+ err = child()
+ } else {
+ err = parent()
+ }
+ if err != nil {
+ fmt.Println(err)
+ os.Exit(1)
+ }
+}
+
+func loadConfig(r io.Reader) (*rootlessport.Config, io.ReadCloser, io.WriteCloser, error) {
+ stdin, err := ioutil.ReadAll(r)
+ if err != nil {
+ return nil, nil, nil, err
+ }
+ var cfg rootlessport.Config
+ if err := json.Unmarshal(stdin, &cfg); err != nil {
+ return nil, nil, nil, err
+ }
+ if cfg.NetNSPath == "" {
+ return nil, nil, nil, errors.New("missing NetNSPath")
+ }
+ if cfg.ExitFD <= 0 {
+ return nil, nil, nil, errors.New("missing ExitFD")
+ }
+ exitFile := os.NewFile(uintptr(cfg.ExitFD), "exitfile")
+ if exitFile == nil {
+ return nil, nil, nil, errors.New("invalid ExitFD")
+ }
+ if cfg.ReadyFD <= 0 {
+ return nil, nil, nil, errors.New("missing ReadyFD")
+ }
+ readyFile := os.NewFile(uintptr(cfg.ReadyFD), "readyfile")
+ if readyFile == nil {
+ return nil, nil, nil, errors.New("invalid ReadyFD")
+ }
+ return &cfg, exitFile, readyFile, nil
+}
+
+func parent() error {
+ // load config from stdin
+ cfg, exitR, readyW, err := loadConfig(os.Stdin)
+ if err != nil {
+ return err
+ }
+
+ socketDir := filepath.Join(cfg.TmpDir, "rp")
+ err = os.MkdirAll(socketDir, 0700)
+ if err != nil {
+ return err
+ }
+
+ // create the parent driver
+ stateDir, err := ioutil.TempDir(cfg.TmpDir, "rootlessport")
+ if err != nil {
+ return err
+ }
+ defer os.RemoveAll(stateDir)
+ driver, err := rkbuiltin.NewParentDriver(&logrusWriter{prefix: "parent: "}, stateDir)
+ if err != nil {
+ return err
+ }
+ initComplete := make(chan struct{})
+ quit := make(chan struct{})
+ errCh := make(chan error)
+ // start the parent driver. initComplete will be closed when the child connected to the parent.
+ logrus.Infof("Starting parent driver")
+ go func() {
+ driverErr := driver.RunParentDriver(initComplete, quit, nil)
+ if driverErr != nil {
+ logrus.WithError(driverErr).Warn("Parent driver exited")
+ }
+ errCh <- driverErr
+ close(errCh)
+ }()
+ opaque := driver.OpaqueForChild()
+ logrus.Infof("opaque=%+v", opaque)
+ opaqueJSON, err := json.Marshal(opaque)
+ if err != nil {
+ return err
+ }
+ childQuitR, childQuitW, err := os.Pipe()
+ if err != nil {
+ return err
+ }
+ defer func() {
+ // stop the child
+ logrus.Info("Stopping child driver")
+ if err := childQuitW.Close(); err != nil {
+ logrus.WithError(err).Warn("Unable to close childQuitW")
+ }
+ }()
+
+ // reexec the child process in the child netns
+ cmd := exec.Command("/proc/self/exe")
+ cmd.Args = []string{ReexecChildKey}
+ cmd.Stdin = childQuitR
+ cmd.Stdout = &logrusWriter{prefix: "child"}
+ cmd.Stderr = cmd.Stdout
+ cmd.Env = append(os.Environ(), reexecChildEnvOpaque+"="+string(opaqueJSON))
+ childNS, err := ns.GetNS(cfg.NetNSPath)
+ if err != nil {
+ return err
+ }
+ if err := childNS.Do(func(_ ns.NetNS) error {
+ logrus.Infof("Starting child driver in child netns (%q %v)", cmd.Path, cmd.Args)
+ return cmd.Start()
+ }); err != nil {
+ return err
+ }
+
+ childErrCh := make(chan error)
+ go func() {
+ err := cmd.Wait()
+ childErrCh <- err
+ close(childErrCh)
+ }()
+
+ defer func() {
+ if err := unix.Kill(cmd.Process.Pid, unix.SIGTERM); err != nil {
+ logrus.WithError(err).Warn("Kill child process")
+ }
+ }()
+
+ logrus.Info("Waiting for initComplete")
+ // wait for the child to connect to the parent
+outer:
+ for {
+ select {
+ case <-initComplete:
+ logrus.Infof("initComplete is closed; parent and child established the communication channel")
+ break outer
+ case err := <-childErrCh:
+ if err != nil {
+ return err
+ }
+ case err := <-errCh:
+ if err != nil {
+ return err
+ }
+ }
+ }
+
+ defer func() {
+ logrus.Info("Stopping parent driver")
+ quit <- struct{}{}
+ if err := <-errCh; err != nil {
+ logrus.WithError(err).Warn("Parent driver returned error on exit")
+ }
+ }()
+
+ // let parent expose ports
+ logrus.Infof("Exposing ports %v", cfg.Mappings)
+ if err := exposePorts(driver, cfg.Mappings, cfg.ChildIP); err != nil {
+ return err
+ }
+
+ // we only need to have a socket to reload ports when we run under rootless cni
+ if cfg.RootlessCNI {
+ socketfile := filepath.Join(socketDir, cfg.ContainerID)
+ // make sure to remove the file if it exists to prevent EADDRINUSE
+ _ = os.Remove(socketfile)
+ // workaround to bypass the 108 char socket path limit
+ // open the fd and use the path to the fd as bind argument
+ fd, err := unix.Open(socketDir, unix.O_PATH, 0)
+ if err != nil {
+ return err
+ }
+ socket, err := net.ListenUnix("unixpacket", &net.UnixAddr{Name: fmt.Sprintf("/proc/self/fd/%d/%s", fd, cfg.ContainerID), Net: "unixpacket"})
+ if err != nil {
+ return err
+ }
+ err = unix.Close(fd)
+ // remove the socket file on exit
+ defer os.Remove(socketfile)
+ if err != nil {
+ logrus.Warnf("Failed to close the socketDir fd: %v", err)
+ }
+ defer socket.Close()
+ go serve(socket, driver)
+ }
+
+ logrus.Info("Ready")
+
+ // https://github.com/containers/podman/issues/11248
+ // Copy /dev/null to stdout and stderr to prevent SIGPIPE errors
+ if f, err := os.OpenFile("/dev/null", os.O_WRONLY, 0755); err == nil {
+ unix.Dup2(int(f.Fd()), 1) // nolint:errcheck
+ unix.Dup2(int(f.Fd()), 2) // nolint:errcheck
+ f.Close()
+ }
+ // write and close ReadyFD (convention is same as slirp4netns --ready-fd)
+ if _, err := readyW.Write([]byte("1")); err != nil {
+ return err
+ }
+ if err := readyW.Close(); err != nil {
+ return err
+ }
+
+ // wait for ExitFD to be closed
+ logrus.Info("Waiting for exitfd to be closed")
+ if _, err := ioutil.ReadAll(exitR); err != nil {
+ return err
+ }
+ return nil
+}
+
+func serve(listener net.Listener, pm rkport.Manager) {
+ for {
+ conn, err := listener.Accept()
+ if err != nil {
+ // we cannot log this error, stderr is already closed
+ continue
+ }
+ ctx := context.TODO()
+ err = handler(ctx, conn, pm)
+ if err != nil {
+ conn.Write([]byte(err.Error()))
+ } else {
+ conn.Write([]byte("OK"))
+ }
+ conn.Close()
+ }
+}
+
+func handler(ctx context.Context, conn io.Reader, pm rkport.Manager) error {
+ var childIP string
+ dec := json.NewDecoder(conn)
+ err := dec.Decode(&childIP)
+ if err != nil {
+ return errors.Wrap(err, "rootless port failed to decode ports")
+ }
+ portStatus, err := pm.ListPorts(ctx)
+ if err != nil {
+ return errors.Wrap(err, "rootless port failed to list ports")
+ }
+ for _, status := range portStatus {
+ err = pm.RemovePort(ctx, status.ID)
+ if err != nil {
+ return errors.Wrap(err, "rootless port failed to remove port")
+ }
+ }
+ // add the ports with the new child IP
+ for _, status := range portStatus {
+ // set the new child IP
+ status.Spec.ChildIP = childIP
+ _, err = pm.AddPort(ctx, status.Spec)
+ if err != nil {
+ return errors.Wrap(err, "rootless port failed to add port")
+ }
+ }
+ return nil
+}
+
+func exposePorts(pm rkport.Manager, portMappings []types.OCICNIPortMapping, childIP string) error {
+ ctx := context.TODO()
+ for _, i := range portMappings {
+ hostIP := i.HostIP
+ if hostIP == "" {
+ hostIP = "0.0.0.0"
+ }
+ spec := rkport.Spec{
+ Proto: i.Protocol,
+ ParentIP: hostIP,
+ ParentPort: int(i.HostPort),
+ ChildPort: int(i.ContainerPort),
+ ChildIP: childIP,
+ }
+ if err := rkportutil.ValidatePortSpec(spec, nil); err != nil {
+ return err
+ }
+ if _, err := pm.AddPort(ctx, spec); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func child() error {
+ // load the config from the parent
+ var opaque map[string]string
+ if err := json.Unmarshal([]byte(os.Getenv(reexecChildEnvOpaque)), &opaque); err != nil {
+ return err
+ }
+
+ // start the child driver
+ quit := make(chan struct{})
+ errCh := make(chan error)
+ go func() {
+ d := rkbuiltin.NewChildDriver(os.Stderr)
+ dErr := d.RunChildDriver(opaque, quit)
+ errCh <- dErr
+ }()
+ defer func() {
+ logrus.Info("Stopping child driver")
+ quit <- struct{}{}
+ if err := <-errCh; err != nil {
+ logrus.WithError(err).Warn("Child driver returned error on exit")
+ }
+ }()
+
+ // wait for stdin to be closed
+ if _, err := ioutil.ReadAll(os.Stdin); err != nil {
+ return err
+ }
+ return nil
+}
+
+type logrusWriter struct {
+ prefix string
+}
+
+func (w *logrusWriter) Write(p []byte) (int, error) {
+ logrus.Infof("%s%s", w.prefix, string(p))
+ return len(p), nil
+}
diff --git a/contrib/cirrus/setup_environment.sh b/contrib/cirrus/setup_environment.sh
index f2afbfef5..3786054a7 100755
--- a/contrib/cirrus/setup_environment.sh
+++ b/contrib/cirrus/setup_environment.sh
@@ -236,9 +236,19 @@ case "$TEST_FLAVOR" in
# Use existing host bits when testing is to happen inside a container
# since this script will run again in that environment.
# shellcheck disable=SC2154
- if ((CONTAINER==0)) && [[ "$TEST_ENVIRON" == "host" ]]; then
+ if [[ "$TEST_ENVIRON" == "host" ]]; then
+ if ((CONTAINER)); then
+ die "Refusing to config. host-test in container";
+ fi
remove_packaged_podman_files
make install PREFIX=/usr ETCDIR=/etc
+ elif [[ "$TEST_ENVIRON" == "container" ]]; then
+ if ((CONTAINER)); then
+ remove_packaged_podman_files
+ make install PREFIX=/usr ETCDIR=/etc
+ fi
+ else
+ die "Invalid value for $$TEST_ENVIRON=$TEST_ENVIRON"
fi
install_test_configs
diff --git a/contrib/spec/podman.spec.in b/contrib/spec/podman.spec.in
index 3a4026038..295a953ef 100644
--- a/contrib/spec/podman.spec.in
+++ b/contrib/spec/podman.spec.in
@@ -3,18 +3,8 @@
%global with_check 0
%global with_unit_test 0
%bcond_without doc
-%bcond_without debug
-%if %{with debug}
-%global _find_debuginfo_dwz_opts %{nil}
-%global _dwz_low_mem_die_limit 0
-%else
%global debug_package %{nil}
-%endif
-
-%if ! 0%{?gobuild:1}
-%define gobuild(o:) go build -buildmode pie -compiler gc -tags="rpm_crashtraceback ${BUILDTAGS:-}" -ldflags "${LDFLAGS:-} -B 0x$(head -c20 /dev/urandom|od -An -tx1|tr -d ' \n') -extldflags '-Wl,-z,relro -Wl,--as-needed -Wl,-z,now -specs=/usr/lib/rpm/redhat/redhat-hardened-ld '" -a -v -x %{?**};
-%endif
# podman hack directory
%define hackdir %{_builddir}/%{repo}-%{shortcommit0}
@@ -536,6 +526,8 @@ export GOPATH=%{buildroot}/%{gopath}:$(pwd)/vendor:%{gopath}
%{_usr}/lib/systemd/user/podman-auto-update.timer
%{_usr}/lib/systemd/user/podman-restart.service
%{_usr}/lib/tmpfiles.d/podman.conf
+%dir %{_libexecdir}/%{name}
+%{_libexecdir}/%{name}/rootlessport
%if 0%{?with_devel}
%files -n libpod-devel -f devel.file-list
diff --git a/libpod/networking_slirp4netns.go b/libpod/networking_slirp4netns.go
index 46cda89a9..ffd53ec2b 100644
--- a/libpod/networking_slirp4netns.go
+++ b/libpod/networking_slirp4netns.go
@@ -484,10 +484,14 @@ func (r *Runtime) setupRootlessPortMappingViaRLK(ctr *Container, netnsPath strin
}
cfgR := bytes.NewReader(cfgJSON)
var stdout bytes.Buffer
- cmd := exec.Command(fmt.Sprintf("/proc/%d/exe", os.Getpid()))
- cmd.Args = []string{rootlessport.ReexecKey}
- // Leak one end of the pipe in rootlessport process, the other will be sent to conmon
+ path, err := r.config.FindHelperBinary(rootlessport.BinaryName, false)
+ if err != nil {
+ return err
+ }
+ cmd := exec.Command(path)
+ cmd.Args = []string{rootlessport.BinaryName}
+ // Leak one end of the pipe in rootlessport process, the other will be sent to conmon
if ctr.rootlessPortSyncR != nil {
defer errorhandling.CloseQuiet(ctr.rootlessPortSyncR)
}
diff --git a/nix/default-arm64.nix b/nix/default-arm64.nix
index 8868788ae..bb958a193 100644
--- a/nix/default-arm64.nix
+++ b/nix/default-arm64.nix
@@ -77,10 +77,12 @@ let
patchShebangs .
make bin/podman
make bin/podman-remote
+ make bin/rootlessport
'';
installPhase = ''
install -Dm755 bin/podman $out/bin/podman
install -Dm755 bin/podman-remote $out/bin/podman-remote
+ install -Dm755 bin/rootlessport $out/libexec/podman/rootlessport
'';
};
in
diff --git a/nix/default.nix b/nix/default.nix
index 4d15532c2..1dc6f92b6 100644
--- a/nix/default.nix
+++ b/nix/default.nix
@@ -75,10 +75,12 @@ let
patchShebangs .
make bin/podman
make bin/podman-remote
+ make bin/rootlessport
'';
installPhase = ''
install -Dm755 bin/podman $out/bin/podman
install -Dm755 bin/podman-remote $out/bin/podman-remote
+ install -Dm755 bin/rootlessport $out/libexec/podman/rootlessport
'';
};
in
diff --git a/pkg/rootlessport/rootlessport_linux.go b/pkg/rootlessport/rootlessport_linux.go
index 37fb7ce79..7b9e5bbfa 100644
--- a/pkg/rootlessport/rootlessport_linux.go
+++ b/pkg/rootlessport/rootlessport_linux.go
@@ -12,33 +12,12 @@
package rootlessport
import (
- "context"
- "encoding/json"
- "fmt"
- "io"
- "io/ioutil"
- "net"
- "os"
- "os/exec"
- "path/filepath"
-
- "github.com/containernetworking/plugins/pkg/ns"
"github.com/containers/podman/v3/libpod/network/types"
- "github.com/containers/storage/pkg/reexec"
- "github.com/pkg/errors"
- rkport "github.com/rootless-containers/rootlesskit/pkg/port"
- rkbuiltin "github.com/rootless-containers/rootlesskit/pkg/port/builtin"
- rkportutil "github.com/rootless-containers/rootlesskit/pkg/port/portutil"
- "github.com/sirupsen/logrus"
- "golang.org/x/sys/unix"
)
const (
- // ReexecKey is the reexec key for the parent process.
- ReexecKey = "containers-rootlessport"
- // reexecChildKey is used internally for the second reexec
- reexecChildKey = "containers-rootlessport-child"
- reexecChildEnvOpaque = "_CONTAINERS_ROOTLESSPORT_CHILD_OPAQUE"
+ // BinaryName is the binary name for the parent process.
+ BinaryName = "rootlessport"
)
// Config needs to be provided to the process via stdin as a JSON string.
@@ -53,325 +32,3 @@ type Config struct {
ContainerID string
RootlessCNI bool
}
-
-func init() {
- reexec.Register(ReexecKey, func() {
- if err := parent(); err != nil {
- fmt.Println(err)
- os.Exit(1)
- }
- })
- reexec.Register(reexecChildKey, func() {
- if err := child(); err != nil {
- fmt.Println(err)
- os.Exit(1)
- }
- })
-}
-
-func loadConfig(r io.Reader) (*Config, io.ReadCloser, io.WriteCloser, error) {
- stdin, err := ioutil.ReadAll(r)
- if err != nil {
- return nil, nil, nil, err
- }
- var cfg Config
- if err := json.Unmarshal(stdin, &cfg); err != nil {
- return nil, nil, nil, err
- }
- if cfg.NetNSPath == "" {
- return nil, nil, nil, errors.New("missing NetNSPath")
- }
- if cfg.ExitFD <= 0 {
- return nil, nil, nil, errors.New("missing ExitFD")
- }
- exitFile := os.NewFile(uintptr(cfg.ExitFD), "exitfile")
- if exitFile == nil {
- return nil, nil, nil, errors.New("invalid ExitFD")
- }
- if cfg.ReadyFD <= 0 {
- return nil, nil, nil, errors.New("missing ReadyFD")
- }
- readyFile := os.NewFile(uintptr(cfg.ReadyFD), "readyfile")
- if readyFile == nil {
- return nil, nil, nil, errors.New("invalid ReadyFD")
- }
- return &cfg, exitFile, readyFile, nil
-}
-
-func parent() error {
- // load config from stdin
- cfg, exitR, readyW, err := loadConfig(os.Stdin)
- if err != nil {
- return err
- }
-
- socketDir := filepath.Join(cfg.TmpDir, "rp")
- err = os.MkdirAll(socketDir, 0700)
- if err != nil {
- return err
- }
-
- // create the parent driver
- stateDir, err := ioutil.TempDir(cfg.TmpDir, "rootlessport")
- if err != nil {
- return err
- }
- defer os.RemoveAll(stateDir)
- driver, err := rkbuiltin.NewParentDriver(&logrusWriter{prefix: "parent: "}, stateDir)
- if err != nil {
- return err
- }
- initComplete := make(chan struct{})
- quit := make(chan struct{})
- errCh := make(chan error)
- // start the parent driver. initComplete will be closed when the child connected to the parent.
- logrus.Infof("Starting parent driver")
- go func() {
- driverErr := driver.RunParentDriver(initComplete, quit, nil)
- if driverErr != nil {
- logrus.WithError(driverErr).Warn("Parent driver exited")
- }
- errCh <- driverErr
- close(errCh)
- }()
- opaque := driver.OpaqueForChild()
- logrus.Infof("Opaque=%+v", opaque)
- opaqueJSON, err := json.Marshal(opaque)
- if err != nil {
- return err
- }
- childQuitR, childQuitW, err := os.Pipe()
- if err != nil {
- return err
- }
- defer func() {
- // stop the child
- logrus.Info("Stopping child driver")
- if err := childQuitW.Close(); err != nil {
- logrus.WithError(err).Warn("Unable to close childQuitW")
- }
- }()
-
- // reexec the child process in the child netns
- cmd := exec.Command("/proc/self/exe")
- cmd.Args = []string{reexecChildKey}
- cmd.Stdin = childQuitR
- cmd.Stdout = &logrusWriter{prefix: "child"}
- cmd.Stderr = cmd.Stdout
- cmd.Env = append(os.Environ(), reexecChildEnvOpaque+"="+string(opaqueJSON))
- childNS, err := ns.GetNS(cfg.NetNSPath)
- if err != nil {
- return err
- }
- if err := childNS.Do(func(_ ns.NetNS) error {
- logrus.Infof("Starting child driver in child netns (%q %v)", cmd.Path, cmd.Args)
- return cmd.Start()
- }); err != nil {
- return err
- }
-
- childErrCh := make(chan error)
- go func() {
- err := cmd.Wait()
- childErrCh <- err
- close(childErrCh)
- }()
-
- defer func() {
- if err := unix.Kill(cmd.Process.Pid, unix.SIGTERM); err != nil {
- logrus.WithError(err).Warn("Kill child process")
- }
- }()
-
- logrus.Info("Waiting for initComplete")
- // wait for the child to connect to the parent
-outer:
- for {
- select {
- case <-initComplete:
- logrus.Infof("initComplete is closed; parent and child established the communication channel")
- break outer
- case err := <-childErrCh:
- if err != nil {
- return err
- }
- case err := <-errCh:
- if err != nil {
- return err
- }
- }
- }
-
- defer func() {
- logrus.Info("Stopping parent driver")
- quit <- struct{}{}
- if err := <-errCh; err != nil {
- logrus.WithError(err).Warn("Parent driver returned error on exit")
- }
- }()
-
- // let parent expose ports
- logrus.Infof("Exposing ports %v", cfg.Mappings)
- if err := exposePorts(driver, cfg.Mappings, cfg.ChildIP); err != nil {
- return err
- }
-
- // we only need to have a socket to reload ports when we run under rootless cni
- if cfg.RootlessCNI {
- socketfile := filepath.Join(socketDir, cfg.ContainerID)
- // make sure to remove the file if it exists to prevent EADDRINUSE
- _ = os.Remove(socketfile)
- // workaround to bypass the 108 char socket path limit
- // open the fd and use the path to the fd as bind argument
- fd, err := unix.Open(socketDir, unix.O_PATH, 0)
- if err != nil {
- return err
- }
- socket, err := net.ListenUnix("unixpacket", &net.UnixAddr{Name: fmt.Sprintf("/proc/self/fd/%d/%s", fd, cfg.ContainerID), Net: "unixpacket"})
- if err != nil {
- return err
- }
- err = unix.Close(fd)
- // remove the socket file on exit
- defer os.Remove(socketfile)
- if err != nil {
- logrus.Warnf("Failed to close the socketDir fd: %v", err)
- }
- defer socket.Close()
- go serve(socket, driver)
- }
-
- logrus.Info("Ready")
-
- // https://github.com/containers/podman/issues/11248
- // Copy /dev/null to stdout and stderr to prevent SIGPIPE errors
- if f, err := os.OpenFile("/dev/null", os.O_WRONLY, 0755); err == nil {
- unix.Dup2(int(f.Fd()), 1) // nolint:errcheck
- unix.Dup2(int(f.Fd()), 2) // nolint:errcheck
- f.Close()
- }
- // write and close ReadyFD (convention is same as slirp4netns --ready-fd)
- if _, err := readyW.Write([]byte("1")); err != nil {
- return err
- }
- if err := readyW.Close(); err != nil {
- return err
- }
-
- // wait for ExitFD to be closed
- logrus.Info("Waiting for exitfd to be closed")
- if _, err := ioutil.ReadAll(exitR); err != nil {
- return err
- }
- return nil
-}
-
-func serve(listener net.Listener, pm rkport.Manager) {
- for {
- conn, err := listener.Accept()
- if err != nil {
- // we cannot log this error, stderr is already closed
- continue
- }
- ctx := context.TODO()
- err = handler(ctx, conn, pm)
- if err != nil {
- conn.Write([]byte(err.Error()))
- } else {
- conn.Write([]byte("OK"))
- }
- conn.Close()
- }
-}
-
-func handler(ctx context.Context, conn io.Reader, pm rkport.Manager) error {
- var childIP string
- dec := json.NewDecoder(conn)
- err := dec.Decode(&childIP)
- if err != nil {
- return errors.Wrap(err, "rootless port failed to decode ports")
- }
- portStatus, err := pm.ListPorts(ctx)
- if err != nil {
- return errors.Wrap(err, "rootless port failed to list ports")
- }
- for _, status := range portStatus {
- err = pm.RemovePort(ctx, status.ID)
- if err != nil {
- return errors.Wrap(err, "rootless port failed to remove port")
- }
- }
- // add the ports with the new child IP
- for _, status := range portStatus {
- // set the new child IP
- status.Spec.ChildIP = childIP
- _, err = pm.AddPort(ctx, status.Spec)
- if err != nil {
- return errors.Wrap(err, "rootless port failed to add port")
- }
- }
- return nil
-}
-
-func exposePorts(pm rkport.Manager, portMappings []types.OCICNIPortMapping, childIP string) error {
- ctx := context.TODO()
- for _, i := range portMappings {
- hostIP := i.HostIP
- if hostIP == "" {
- hostIP = "0.0.0.0"
- }
- spec := rkport.Spec{
- Proto: i.Protocol,
- ParentIP: hostIP,
- ParentPort: int(i.HostPort),
- ChildPort: int(i.ContainerPort),
- ChildIP: childIP,
- }
- if err := rkportutil.ValidatePortSpec(spec, nil); err != nil {
- return err
- }
- if _, err := pm.AddPort(ctx, spec); err != nil {
- return err
- }
- }
- return nil
-}
-
-func child() error {
- // load the config from the parent
- var opaque map[string]string
- if err := json.Unmarshal([]byte(os.Getenv(reexecChildEnvOpaque)), &opaque); err != nil {
- return err
- }
-
- // start the child driver
- quit := make(chan struct{})
- errCh := make(chan error)
- go func() {
- d := rkbuiltin.NewChildDriver(os.Stderr)
- dErr := d.RunChildDriver(opaque, quit)
- errCh <- dErr
- }()
- defer func() {
- logrus.Info("Stopping child driver")
- quit <- struct{}{}
- if err := <-errCh; err != nil {
- logrus.WithError(err).Warn("Child driver returned error on exit")
- }
- }()
-
- // wait for stdin to be closed
- if _, err := ioutil.ReadAll(os.Stdin); err != nil {
- return err
- }
- return nil
-}
-
-type logrusWriter struct {
- prefix string
-}
-
-func (w *logrusWriter) Write(p []byte) (int, error) {
- logrus.Infof("%s%s", w.prefix, string(p))
- return len(p), nil
-}