diff options
Diffstat (limited to 'libpod')
-rw-r--r-- | libpod/container_api.go | 20 | ||||
-rw-r--r-- | libpod/container_copy_linux.go | 3 | ||||
-rw-r--r-- | libpod/container_internal.go | 1 | ||||
-rw-r--r-- | libpod/container_internal_linux.go | 125 | ||||
-rw-r--r-- | libpod/container_log_linux.go | 19 | ||||
-rw-r--r-- | libpod/define/pod_inspect.go | 2 | ||||
-rw-r--r-- | libpod/diff.go | 3 | ||||
-rw-r--r-- | libpod/network/network.go | 6 | ||||
-rw-r--r-- | libpod/networking_linux.go | 389 | ||||
-rw-r--r-- | libpod/oci_conmon_exec_linux.go | 3 | ||||
-rw-r--r-- | libpod/oci_conmon_linux.go | 24 | ||||
-rw-r--r-- | libpod/options.go | 46 | ||||
-rw-r--r-- | libpod/pod.go | 8 | ||||
-rw-r--r-- | libpod/pod_api.go | 1 | ||||
-rw-r--r-- | libpod/runtime_ctr.go | 26 | ||||
-rw-r--r-- | libpod/runtime_pod_infra_linux.go | 18 | ||||
-rw-r--r-- | libpod/runtime_pod_linux.go | 1 |
17 files changed, 480 insertions, 215 deletions
diff --git a/libpod/container_api.go b/libpod/container_api.go index 390bba7bb..637f5b686 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -780,6 +780,16 @@ type ContainerCheckpointOptions struct { // Compression tells the API which compression to use for // the exported checkpoint archive. Compression archive.Compression + // If Pod is set the container should be restored into the + // given Pod. If Pod is empty it is a restore without a Pod. + // Restoring a non Pod container into a Pod or a Pod container + // without a Pod is theoretically possible, but will + // probably not work if a PID namespace is shared. + // A shared PID namespace means that a Pod container has PID 1 + // in the infrastructure container, but without the infrastructure + // container no PID 1 will be in the namespace and that is not + // possible. + Pod string } // Checkpoint checkpoints a container @@ -811,7 +821,11 @@ func (c *Container) Checkpoint(ctx context.Context, options ContainerCheckpointO // Restore restores a container func (c *Container) Restore(ctx context.Context, options ContainerCheckpointOptions) error { - logrus.Debugf("Trying to restore container %s", c.ID()) + if options.Pod == "" { + logrus.Debugf("Trying to restore container %s", c.ID()) + } else { + logrus.Debugf("Trying to restore container %s into pod %s", c.ID(), options.Pod) + } if !c.batched { c.lock.Lock() defer c.lock.Unlock() @@ -840,7 +854,7 @@ func (c *Container) ShouldRestart(ctx context.Context) bool { // CopyFromArchive copies the contents from the specified tarStream to path // *inside* the container. -func (c *Container) CopyFromArchive(ctx context.Context, containerPath string, chown bool, tarStream io.Reader) (func() error, error) { +func (c *Container) CopyFromArchive(ctx context.Context, containerPath string, chown bool, rename map[string]string, tarStream io.Reader) (func() error, error) { if !c.batched { c.lock.Lock() defer c.lock.Unlock() @@ -850,7 +864,7 @@ func (c *Container) CopyFromArchive(ctx context.Context, containerPath string, c } } - return c.copyFromArchive(ctx, containerPath, chown, tarStream) + return c.copyFromArchive(ctx, containerPath, chown, rename, tarStream) } // CopyToArchive copies the contents from the specified path *inside* the diff --git a/libpod/container_copy_linux.go b/libpod/container_copy_linux.go index 01e7ecacb..a35824289 100644 --- a/libpod/container_copy_linux.go +++ b/libpod/container_copy_linux.go @@ -23,7 +23,7 @@ import ( "golang.org/x/sys/unix" ) -func (c *Container) copyFromArchive(ctx context.Context, path string, chown bool, reader io.Reader) (func() error, error) { +func (c *Container) copyFromArchive(ctx context.Context, path string, chown bool, rename map[string]string, reader io.Reader) (func() error, error) { var ( mountPoint string resolvedRoot string @@ -89,6 +89,7 @@ func (c *Container) copyFromArchive(ctx context.Context, path string, chown bool GIDMap: c.config.IDMappings.GIDMap, ChownDirs: idPair, ChownFiles: idPair, + Rename: rename, } return c.joinMountAndExec(ctx, diff --git a/libpod/container_internal.go b/libpod/container_internal.go index 2555f15ec..e7694227a 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -420,7 +420,6 @@ func (c *Container) setupStorage(ctx context.Context) error { if c.config.Rootfs == "" && (c.config.RootfsImageID == "" || c.config.RootfsImageName == "") { return errors.Wrapf(define.ErrInvalidArg, "must provide image ID and image name to use an image") } - options := storage.ContainerOptions{ IDMappingOptions: storage.IDMappingOptions{ HostUIDMapping: true, diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index 850af235f..bff64aa95 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -901,8 +901,27 @@ func (c *Container) addNamespaceContainer(g *generate.Generator, ns LinuxNS, ctr } func (c *Container) exportCheckpoint(options ContainerCheckpointOptions) error { - if len(c.Dependencies()) > 0 { - return errors.Errorf("Cannot export checkpoints of containers with dependencies") + if len(c.Dependencies()) == 1 { + // Check if the dependency is an infra container. If it is we can checkpoint + // the container out of the Pod. + if c.config.Pod == "" { + return errors.Errorf("cannot export checkpoints of containers with dependencies") + } + + pod, err := c.runtime.state.Pod(c.config.Pod) + if err != nil { + return errors.Wrapf(err, "container %s is in pod %s, but pod cannot be retrieved", c.ID(), c.config.Pod) + } + infraID, err := pod.InfraContainerID() + if err != nil { + return errors.Wrapf(err, "cannot retrieve infra container ID for pod %s", c.config.Pod) + } + if c.Dependencies()[0] != infraID { + return errors.Errorf("cannot export checkpoints of containers with dependencies") + } + } + if len(c.Dependencies()) > 1 { + return errors.Errorf("cannot export checkpoints of containers with dependencies") } logrus.Debugf("Exporting checkpoint image of container %q to %q", c.ID(), options.TargetFile) @@ -1021,9 +1040,9 @@ func (c *Container) exportCheckpoint(options ContainerCheckpointOptions) error { return nil } -func (c *Container) checkpointRestoreSupported() error { - if !criu.CheckForCriu() { - return errors.Errorf("checkpoint/restore requires at least CRIU %d", criu.MinCriuVersion) +func (c *Container) checkpointRestoreSupported(version int) error { + if !criu.CheckForCriu(version) { + return errors.Errorf("checkpoint/restore requires at least CRIU %d", version) } if !c.ociRuntime.SupportsCheckpoint() { return errors.Errorf("configured runtime does not support checkpoint/restore") @@ -1032,7 +1051,7 @@ func (c *Container) checkpointRestoreSupported() error { } func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointOptions) error { - if err := c.checkpointRestoreSupported(); err != nil { + if err := c.checkpointRestoreSupported(criu.MinCriuVersion); err != nil { return err } @@ -1136,10 +1155,20 @@ func (c *Container) importPreCheckpoint(input string) error { } func (c *Container) restore(ctx context.Context, options ContainerCheckpointOptions) (retErr error) { - if err := c.checkpointRestoreSupported(); err != nil { + minCriuVersion := func() int { + if options.Pod == "" { + return criu.MinCriuVersion + } + return criu.PodCriuVersion + }() + if err := c.checkpointRestoreSupported(minCriuVersion); err != nil { return err } + if options.Pod != "" && !crutils.CRRuntimeSupportsPodCheckpointRestore(c.ociRuntime.Path()) { + return errors.Errorf("runtime %s does not support pod restore", c.ociRuntime.Path()) + } + if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) { return errors.Wrapf(define.ErrCtrStateInvalid, "container %s is running or paused, cannot restore", c.ID()) } @@ -1247,6 +1276,83 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti } } + if options.Pod != "" { + // Running in a Pod means that we have to change all namespace settings to + // the ones from the infrastructure container. + pod, err := c.runtime.LookupPod(options.Pod) + if err != nil { + return errors.Wrapf(err, "pod %q cannot be retrieved", options.Pod) + } + + infraContainer, err := pod.InfraContainer() + if err != nil { + return errors.Wrapf(err, "cannot retrieved infra container from pod %q", options.Pod) + } + + infraContainer.lock.Lock() + if err := infraContainer.syncContainer(); err != nil { + infraContainer.lock.Unlock() + return errors.Wrapf(err, "Error syncing infrastructure container %s status", infraContainer.ID()) + } + if infraContainer.state.State != define.ContainerStateRunning { + if err := infraContainer.initAndStart(ctx); err != nil { + infraContainer.lock.Unlock() + return errors.Wrapf(err, "Error starting infrastructure container %s status", infraContainer.ID()) + } + } + infraContainer.lock.Unlock() + + if c.config.IPCNsCtr != "" { + nsPath, err := infraContainer.namespacePath(IPCNS) + if err != nil { + return errors.Wrapf(err, "cannot retrieve IPC namespace path for Pod %q", options.Pod) + } + if err := g.AddOrReplaceLinuxNamespace(string(spec.IPCNamespace), nsPath); err != nil { + return err + } + } + + if c.config.NetNsCtr != "" { + nsPath, err := infraContainer.namespacePath(NetNS) + if err != nil { + return errors.Wrapf(err, "cannot retrieve network namespace path for Pod %q", options.Pod) + } + if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), nsPath); err != nil { + return err + } + } + + if c.config.PIDNsCtr != "" { + nsPath, err := infraContainer.namespacePath(PIDNS) + if err != nil { + return errors.Wrapf(err, "cannot retrieve PID namespace path for Pod %q", options.Pod) + } + if err := g.AddOrReplaceLinuxNamespace(string(spec.PIDNamespace), nsPath); err != nil { + return err + } + } + + if c.config.UTSNsCtr != "" { + nsPath, err := infraContainer.namespacePath(UTSNS) + if err != nil { + return errors.Wrapf(err, "cannot retrieve UTS namespace path for Pod %q", options.Pod) + } + if err := g.AddOrReplaceLinuxNamespace(string(spec.UTSNamespace), nsPath); err != nil { + return err + } + } + + if c.config.CgroupNsCtr != "" { + nsPath, err := infraContainer.namespacePath(CgroupNS) + if err != nil { + return errors.Wrapf(err, "cannot retrieve Cgroup namespace path for Pod %q", options.Pod) + } + if err := g.AddOrReplaceLinuxNamespace(string(spec.CgroupNamespace), nsPath); err != nil { + return err + } + } + } + if err := c.makeBindMounts(); err != nil { return err } @@ -2490,6 +2596,11 @@ func (c *Container) fixVolumePermissions(v *ContainerNamedVolume) error { // https://github.com/containers/podman/issues/10188 st, err := os.Lstat(filepath.Join(c.state.Mountpoint, v.Dest)) if err == nil { + if stat, ok := st.Sys().(*syscall.Stat_t); ok { + if err := os.Lchown(mountPoint, int(stat.Uid), int(stat.Gid)); err != nil { + return err + } + } if err := os.Chmod(mountPoint, st.Mode()|0111); err != nil { return err } diff --git a/libpod/container_log_linux.go b/libpod/container_log_linux.go index 9f9dd3b0d..d4afaa52a 100644 --- a/libpod/container_log_linux.go +++ b/libpod/container_log_linux.go @@ -97,8 +97,6 @@ func (c *Container) readFromJournal(ctx context.Context, options *logs.LogOption } }() - beforeTimeStamp := true - afterTimeStamp := false // needed for options.Since tailQueue := []*logs.LogLine{} // needed for options.Tail doTail := options.Tail > 0 for { @@ -150,21 +148,10 @@ func (c *Container) readFromJournal(ctx context.Context, options *logs.LogOption return } - if !afterTimeStamp { - entryTime := time.Unix(0, int64(entry.RealtimeTimestamp)*int64(time.Microsecond)) - if entryTime.Before(options.Since) { - continue - } - afterTimeStamp = true - } - if beforeTimeStamp { - entryTime := time.Unix(0, int64(entry.RealtimeTimestamp)*int64(time.Microsecond)) - if entryTime.Before(options.Until) || !options.Until.IsZero() { - continue - } - beforeTimeStamp = false + entryTime := time.Unix(0, int64(entry.RealtimeTimestamp)*int64(time.Microsecond)) + if (entryTime.Before(options.Since) && !options.Since.IsZero()) || (entryTime.After(options.Until) && !options.Until.IsZero()) { + continue } - // If we're reading an event and the container exited/died, // then we're done and can return. event, ok := entry.Fields["PODMAN_EVENT"] diff --git a/libpod/define/pod_inspect.go b/libpod/define/pod_inspect.go index 67f075b3c..a17304875 100644 --- a/libpod/define/pod_inspect.go +++ b/libpod/define/pod_inspect.go @@ -103,6 +103,8 @@ type InspectPodInfraConfig struct { CPUQuota int64 `json:"cpu_quota,omitempty"` // CPUSetCPUs contains linux specific CPU data for the container CPUSetCPUs string `json:"cpuset_cpus,omitempty"` + // Pid is the PID namespace mode of the pod's infra container + PidNS string `json:"pid_ns,omitempty"` } // InspectPodContainerInfo contains information on a container in a pod. diff --git a/libpod/diff.go b/libpod/diff.go index 5bd162e7b..cdd5e79cb 100644 --- a/libpod/diff.go +++ b/libpod/diff.go @@ -1,7 +1,6 @@ package libpod import ( - "github.com/containers/common/libimage" "github.com/containers/podman/v3/libpod/define" "github.com/containers/podman/v3/libpod/layers" "github.com/containers/storage/pkg/archive" @@ -53,7 +52,7 @@ func (r *Runtime) GetDiff(from, to string, diffType define.DiffType) ([]archive. func (r *Runtime) getLayerID(id string, diffType define.DiffType) (string, error) { var lastErr error if diffType&define.DiffImage == define.DiffImage { - toImage, _, err := r.libimageRuntime.LookupImage(id, &libimage.LookupImageOptions{IgnorePlatform: true}) + toImage, _, err := r.libimageRuntime.LookupImage(id, nil) if err == nil { return toImage.TopLayer(), nil } diff --git a/libpod/network/network.go b/libpod/network/network.go index ed4e6388a..805988432 100644 --- a/libpod/network/network.go +++ b/libpod/network/network.go @@ -111,8 +111,10 @@ func allocatorToIPNets(networks []*allocator.Net) []*net.IPNet { if len(network.IPAM.Ranges) > 0 { // this is the new IPAM range style // append each subnet from ipam the rangeset - for _, r := range network.IPAM.Ranges[0] { - nets = append(nets, newIPNetFromSubnet(r.Subnet)) + for _, allocatorRange := range network.IPAM.Ranges { + for _, r := range allocatorRange { + nets = append(nets, newIPNetFromSubnet(r.Subnet)) + } } } else { // looks like the old, deprecated style diff --git a/libpod/networking_linux.go b/libpod/networking_linux.go index 48b0c495c..0f3e03e06 100644 --- a/libpod/networking_linux.go +++ b/libpod/networking_linux.go @@ -108,7 +108,7 @@ func (r *Runtime) getPodNetwork(id, name, nsPath string, networks []string, port type RootlessCNI struct { ns ns.NetNS dir string - lock lockfile.Locker + Lock lockfile.Locker } // getPath will join the given path to the rootless cni dir @@ -177,6 +177,21 @@ func (r *RootlessCNI) Do(toRun func() error) error { if err != nil { return err } + logrus.Debugf("The actual path of /etc/resolv.conf on the host is %q", resolvePath) + // When /etc/resolv.conf on the host is a symlink to /run/systemd/resolve/stub-resolv.conf, + // we have to mount an empty filesystem on /run/systemd/resolve in the child namespace, + // so as to isolate the directory from the host mount namespace. + // + // Otherwise our bind-mount for /run/systemd/resolve/stub-resolv.conf is unmounted + // when systemd-resolved unlinks and recreates /run/systemd/resolve/stub-resolv.conf on the host. + // see: https://github.com/containers/podman/issues/10929 + if strings.HasPrefix(resolvePath, "/run/systemd/resolve/") { + rsr := r.getPath("/run/systemd/resolve") + err = unix.Mount("", rsr, "tmpfs", unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV, "") + if err != nil { + return errors.Wrapf(err, "failed to mount tmpfs on %q for rootless cni", rsr) + } + } if strings.HasPrefix(resolvePath, "/run/") { resolvePath = r.getPath(resolvePath) err = os.MkdirAll(filepath.Dir(resolvePath), 0700) @@ -234,16 +249,17 @@ func (r *RootlessCNI) Do(toRun func() error) error { return err } -// Cleanup the rootless cni namespace if needed -// check if we have running containers with the bridge network mode +// Cleanup the rootless cni namespace if needed. +// It checks if we have running containers with the bridge network mode. +// Cleanup() will try to lock RootlessCNI, therefore you have to call it with an unlocked func (r *RootlessCNI) Cleanup(runtime *Runtime) error { _, err := os.Stat(r.dir) if os.IsNotExist(err) { // the directory does not exists no need for cleanup return nil } - r.lock.Lock() - defer r.lock.Unlock() + r.Lock.Lock() + defer r.Lock.Unlock() running := func(c *Container) bool { // we cannot use c.state() because it will try to lock the container // using c.state.State directly should be good enough for this use case @@ -301,205 +317,216 @@ func (r *RootlessCNI) Cleanup(runtime *Runtime) error { // GetRootlessCNINetNs returns the rootless cni object. If create is set to true // the rootless cni namespace will be created if it does not exists already. +// If called as root it returns always nil. +// On success the returned RootlessCNI lock is locked and must be unlocked by the caller. func (r *Runtime) GetRootlessCNINetNs(new bool) (*RootlessCNI, error) { + if !rootless.IsRootless() { + return nil, nil + } var rootlessCNINS *RootlessCNI - if rootless.IsRootless() { - runDir, err := util.GetRuntimeDir() - if err != nil { - return nil, err + runDir, err := util.GetRuntimeDir() + if err != nil { + return nil, err + } + + lfile := filepath.Join(runDir, "rootless-cni.lock") + lock, err := lockfile.GetLockfile(lfile) + if err != nil { + return nil, errors.Wrap(err, "failed to get rootless-cni lockfile") + } + lock.Lock() + defer func() { + // In case of an error (early exit) rootlessCNINS will be nil. + // Make sure to unlock otherwise we could deadlock. + if rootlessCNINS == nil { + lock.Unlock() } - cniDir := filepath.Join(runDir, "rootless-cni") - err = os.MkdirAll(cniDir, 0700) + }() + + cniDir := filepath.Join(runDir, "rootless-cni") + err = os.MkdirAll(cniDir, 0700) + if err != nil { + return nil, errors.Wrap(err, "could not create rootless-cni directory") + } + + nsDir, err := netns.GetNSRunDir() + if err != nil { + return nil, err + } + path := filepath.Join(nsDir, rootlessCNINSName) + ns, err := ns.GetNS(path) + if err != nil { + if !new { + // return a error if we could not get the namespace and should no create one + return nil, errors.Wrap(err, "error getting rootless cni network namespace") + } + // create a new namespace + logrus.Debug("creating rootless cni network namespace") + ns, err = netns.NewNSWithName(rootlessCNINSName) if err != nil { - return nil, errors.Wrap(err, "could not create rootless-cni directory") + return nil, errors.Wrap(err, "error creating rootless cni network namespace") + } + // setup slirp4netns here + path := r.config.Engine.NetworkCmdPath + if path == "" { + var err error + path, err = exec.LookPath("slirp4netns") + if err != nil { + return nil, err + } } - lfile := filepath.Join(cniDir, "rootless-cni.lck") - lock, err := lockfile.GetLockfile(lfile) + syncR, syncW, err := os.Pipe() if err != nil { - return nil, errors.Wrap(err, "failed to get rootless-cni lockfile") + return nil, errors.Wrapf(err, "failed to open pipe") } - lock.Lock() - defer lock.Unlock() + defer errorhandling.CloseQuiet(syncR) + defer errorhandling.CloseQuiet(syncW) - nsDir, err := netns.GetNSRunDir() + netOptions, err := parseSlirp4netnsNetworkOptions(r, nil) if err != nil { return nil, err } - path := filepath.Join(nsDir, rootlessCNINSName) - ns, err := ns.GetNS(path) + slirpFeatures, err := checkSlirpFlags(path) if err != nil { - if new { - // create a new namespace - logrus.Debug("creating rootless cni network namespace") - ns, err = netns.NewNSWithName(rootlessCNINSName) - if err != nil { - return nil, errors.Wrap(err, "error creating rootless cni network namespace") - } - // setup slirp4netns here - path := r.config.Engine.NetworkCmdPath - if path == "" { - var err error - path, err = exec.LookPath("slirp4netns") - if err != nil { - return nil, err - } - } - - syncR, syncW, err := os.Pipe() - if err != nil { - return nil, errors.Wrapf(err, "failed to open pipe") - } - defer errorhandling.CloseQuiet(syncR) - defer errorhandling.CloseQuiet(syncW) - - netOptions, err := parseSlirp4netnsNetworkOptions(r, nil) - if err != nil { - return nil, err - } - slirpFeatures, err := checkSlirpFlags(path) - if err != nil { - return nil, errors.Wrapf(err, "error checking slirp4netns binary %s: %q", path, err) - } - cmdArgs, err := createBasicSlirp4netnsCmdArgs(netOptions, slirpFeatures) - if err != nil { - return nil, err - } - // Note we do not use --exit-fd, we kill this process by pid - cmdArgs = append(cmdArgs, "-c", "-r", "3") - cmdArgs = append(cmdArgs, "--netns-type=path", ns.Path(), "tap0") - - cmd := exec.Command(path, cmdArgs...) - logrus.Debugf("slirp4netns command: %s", strings.Join(cmd.Args, " ")) - cmd.SysProcAttr = &syscall.SysProcAttr{ - Setpgid: true, - } - - // workaround for https://github.com/rootless-containers/slirp4netns/pull/153 - if !netOptions.noPivotRoot && slirpFeatures.HasEnableSandbox { - cmd.SysProcAttr.Cloneflags = syscall.CLONE_NEWNS - cmd.SysProcAttr.Unshareflags = syscall.CLONE_NEWNS - } - - // Leak one end of the pipe in slirp4netns - cmd.ExtraFiles = append(cmd.ExtraFiles, syncW) + return nil, errors.Wrapf(err, "error checking slirp4netns binary %s: %q", path, err) + } + cmdArgs, err := createBasicSlirp4netnsCmdArgs(netOptions, slirpFeatures) + if err != nil { + return nil, err + } + // Note we do not use --exit-fd, we kill this process by pid + cmdArgs = append(cmdArgs, "-c", "-r", "3") + cmdArgs = append(cmdArgs, "--netns-type=path", ns.Path(), "tap0") + + cmd := exec.Command(path, cmdArgs...) + logrus.Debugf("slirp4netns command: %s", strings.Join(cmd.Args, " ")) + cmd.SysProcAttr = &syscall.SysProcAttr{ + Setpgid: true, + } - logPath := filepath.Join(r.config.Engine.TmpDir, "slirp4netns-rootless-cni.log") - logFile, err := os.Create(logPath) - if err != nil { - return nil, errors.Wrapf(err, "failed to open slirp4netns log file %s", logPath) - } - defer logFile.Close() - // Unlink immediately the file so we won't need to worry about cleaning it up later. - // It is still accessible through the open fd logFile. - if err := os.Remove(logPath); err != nil { - return nil, errors.Wrapf(err, "delete file %s", logPath) - } - cmd.Stdout = logFile - cmd.Stderr = logFile - if err := cmd.Start(); err != nil { - return nil, errors.Wrapf(err, "failed to start slirp4netns process") - } - // create pid file for the slirp4netns process - // this is need to kill the process in the cleanup - pid := strconv.Itoa(cmd.Process.Pid) - err = ioutil.WriteFile(filepath.Join(cniDir, "rootless-cni-slirp4netns.pid"), []byte(pid), 0700) - if err != nil { - errors.Wrap(err, "unable to write rootless-cni slirp4netns pid file") - } + // workaround for https://github.com/rootless-containers/slirp4netns/pull/153 + if !netOptions.noPivotRoot && slirpFeatures.HasEnableSandbox { + cmd.SysProcAttr.Cloneflags = syscall.CLONE_NEWNS + cmd.SysProcAttr.Unshareflags = syscall.CLONE_NEWNS + } - defer func() { - if err := cmd.Process.Release(); err != nil { - logrus.Errorf("unable to release command process: %q", err) - } - }() + // Leak one end of the pipe in slirp4netns + cmd.ExtraFiles = append(cmd.ExtraFiles, syncW) - if err := waitForSync(syncR, cmd, logFile, 1*time.Second); err != nil { - return nil, err - } + logPath := filepath.Join(r.config.Engine.TmpDir, "slirp4netns-rootless-cni.log") + logFile, err := os.Create(logPath) + if err != nil { + return nil, errors.Wrapf(err, "failed to open slirp4netns log file %s", logPath) + } + defer logFile.Close() + // Unlink immediately the file so we won't need to worry about cleaning it up later. + // It is still accessible through the open fd logFile. + if err := os.Remove(logPath); err != nil { + return nil, errors.Wrapf(err, "delete file %s", logPath) + } + cmd.Stdout = logFile + cmd.Stderr = logFile + if err := cmd.Start(); err != nil { + return nil, errors.Wrapf(err, "failed to start slirp4netns process") + } + // create pid file for the slirp4netns process + // this is need to kill the process in the cleanup + pid := strconv.Itoa(cmd.Process.Pid) + err = ioutil.WriteFile(filepath.Join(cniDir, "rootless-cni-slirp4netns.pid"), []byte(pid), 0700) + if err != nil { + errors.Wrap(err, "unable to write rootless-cni slirp4netns pid file") + } - // build a new resolv.conf file which uses the slirp4netns dns server address - resolveIP, err := GetSlirp4netnsDNS(nil) - if err != nil { - return nil, errors.Wrap(err, "failed to determine default slirp4netns DNS address") - } + defer func() { + if err := cmd.Process.Release(); err != nil { + logrus.Errorf("unable to release command process: %q", err) + } + }() - if netOptions.cidr != "" { - _, cidr, err := net.ParseCIDR(netOptions.cidr) - if err != nil { - return nil, errors.Wrap(err, "failed to parse slirp4netns cidr") - } - resolveIP, err = GetSlirp4netnsDNS(cidr) - if err != nil { - return nil, errors.Wrapf(err, "failed to determine slirp4netns DNS address from cidr: %s", cidr.String()) - } - } - conf, err := resolvconf.Get() - if err != nil { - return nil, err - } - conf, err = resolvconf.FilterResolvDNS(conf.Content, netOptions.enableIPv6, true) - if err != nil { - return nil, err - } - searchDomains := resolvconf.GetSearchDomains(conf.Content) - dnsOptions := resolvconf.GetOptions(conf.Content) - nameServers := resolvconf.GetNameservers(conf.Content) + if err := waitForSync(syncR, cmd, logFile, 1*time.Second); err != nil { + return nil, err + } - _, err = resolvconf.Build(filepath.Join(cniDir, "resolv.conf"), append([]string{resolveIP.String()}, nameServers...), searchDomains, dnsOptions) - if err != nil { - return nil, errors.Wrap(err, "failed to create rootless cni resolv.conf") - } + // build a new resolv.conf file which uses the slirp4netns dns server address + resolveIP, err := GetSlirp4netnsDNS(nil) + if err != nil { + return nil, errors.Wrap(err, "failed to determine default slirp4netns DNS address") + } - // create cni directories to store files - // they will be bind mounted to the correct location in a extra mount ns - err = os.MkdirAll(filepath.Join(cniDir, strings.TrimPrefix(persistentCNIDir, "/")), 0700) - if err != nil { - return nil, errors.Wrap(err, "could not create rootless-cni var directory") - } - runDir := filepath.Join(cniDir, "run") - err = os.MkdirAll(runDir, 0700) - if err != nil { - return nil, errors.Wrap(err, "could not create rootless-cni run directory") - } - // relabel the new run directory to the iptables /run label - // this is important, otherwise the iptables command will fail - err = label.Relabel(runDir, "system_u:object_r:iptables_var_run_t:s0", false) - if err != nil { - return nil, errors.Wrap(err, "could not create relabel rootless-cni run directory") - } - // create systemd run directory - err = os.MkdirAll(filepath.Join(runDir, "systemd"), 0700) - if err != nil { - return nil, errors.Wrap(err, "could not create rootless-cni systemd directory") - } - // create the directory for the netns files at the same location - // relative to the rootless-cni location - err = os.MkdirAll(filepath.Join(cniDir, nsDir), 0700) - if err != nil { - return nil, errors.Wrap(err, "could not create rootless-cni netns directory") - } - } else { - // return a error if we could not get the namespace and should no create one - return nil, errors.Wrap(err, "error getting rootless cni network namespace") + if netOptions.cidr != "" { + _, cidr, err := net.ParseCIDR(netOptions.cidr) + if err != nil { + return nil, errors.Wrap(err, "failed to parse slirp4netns cidr") + } + resolveIP, err = GetSlirp4netnsDNS(cidr) + if err != nil { + return nil, errors.Wrapf(err, "failed to determine slirp4netns DNS address from cidr: %s", cidr.String()) } } + conf, err := resolvconf.Get() + if err != nil { + return nil, err + } + conf, err = resolvconf.FilterResolvDNS(conf.Content, netOptions.enableIPv6, true) + if err != nil { + return nil, err + } + searchDomains := resolvconf.GetSearchDomains(conf.Content) + dnsOptions := resolvconf.GetOptions(conf.Content) + nameServers := resolvconf.GetNameservers(conf.Content) - // The CNI plugins need access to iptables in $PATH. As it turns out debian doesn't put - // /usr/sbin in $PATH for rootless users. This will break rootless cni completely. - // We might break existing users and we cannot expect everyone to change their $PATH so - // lets add /usr/sbin to $PATH ourselves. - path = os.Getenv("PATH") - if !strings.Contains(path, "/usr/sbin") { - path = path + ":/usr/sbin" - os.Setenv("PATH", path) + _, err = resolvconf.Build(filepath.Join(cniDir, "resolv.conf"), append([]string{resolveIP.String()}, nameServers...), searchDomains, dnsOptions) + if err != nil { + return nil, errors.Wrap(err, "failed to create rootless cni resolv.conf") } - rootlessCNINS = &RootlessCNI{ - ns: ns, - dir: cniDir, - lock: lock, + // create cni directories to store files + // they will be bind mounted to the correct location in a extra mount ns + err = os.MkdirAll(filepath.Join(cniDir, strings.TrimPrefix(persistentCNIDir, "/")), 0700) + if err != nil { + return nil, errors.Wrap(err, "could not create rootless-cni var directory") + } + runDir := filepath.Join(cniDir, "run") + err = os.MkdirAll(runDir, 0700) + if err != nil { + return nil, errors.Wrap(err, "could not create rootless-cni run directory") + } + // relabel the new run directory to the iptables /run label + // this is important, otherwise the iptables command will fail + err = label.Relabel(runDir, "system_u:object_r:iptables_var_run_t:s0", false) + if err != nil { + return nil, errors.Wrap(err, "could not create relabel rootless-cni run directory") + } + // create systemd run directory + err = os.MkdirAll(filepath.Join(runDir, "systemd"), 0700) + if err != nil { + return nil, errors.Wrap(err, "could not create rootless-cni systemd directory") } + // create the directory for the netns files at the same location + // relative to the rootless-cni location + err = os.MkdirAll(filepath.Join(cniDir, nsDir), 0700) + if err != nil { + return nil, errors.Wrap(err, "could not create rootless-cni netns directory") + } + } + + // The CNI plugins need access to iptables in $PATH. As it turns out debian doesn't put + // /usr/sbin in $PATH for rootless users. This will break rootless cni completely. + // We might break existing users and we cannot expect everyone to change their $PATH so + // lets add /usr/sbin to $PATH ourselves. + path = os.Getenv("PATH") + if !strings.Contains(path, "/usr/sbin") { + path = path + ":/usr/sbin" + os.Setenv("PATH", path) + } + + // Important set rootlessCNINS as last step. + // Do not return any errors after this. + rootlessCNINS = &RootlessCNI{ + ns: ns, + dir: cniDir, + Lock: lock, } return rootlessCNINS, nil } @@ -548,9 +575,8 @@ func (r *Runtime) setUpOCICNIPod(podNetwork ocicni.PodNetwork) ([]ocicni.NetResu // rootlessCNINS is nil if we are root if rootlessCNINS != nil { // execute the cni setup in the rootless net ns - rootlessCNINS.lock.Lock() err = rootlessCNINS.Do(setUpPod) - rootlessCNINS.lock.Unlock() + rootlessCNINS.Lock.Unlock() } else { err = setUpPod() } @@ -762,9 +788,8 @@ func (r *Runtime) teardownOCICNIPod(podNetwork ocicni.PodNetwork) error { // rootlessCNINS is nil if we are root if rootlessCNINS != nil { // execute the cni setup in the rootless net ns - rootlessCNINS.lock.Lock() err = rootlessCNINS.Do(tearDownPod) - rootlessCNINS.lock.Unlock() + rootlessCNINS.Lock.Unlock() if err == nil { err = rootlessCNINS.Cleanup(r) } diff --git a/libpod/oci_conmon_exec_linux.go b/libpod/oci_conmon_exec_linux.go index 09d3d1833..05a4e19b0 100644 --- a/libpod/oci_conmon_exec_linux.go +++ b/libpod/oci_conmon_exec_linux.go @@ -610,6 +610,9 @@ func attachExecHTTP(c *Container, sessionID string, r *http.Request, w http.Resp _, err := utils.CopyDetachable(conn, httpBuf, detachKeys) logrus.Debugf("STDIN copy completed") stdinChan <- err + if connErr := conn.CloseWrite(); connErr != nil { + logrus.Errorf("Unable to close conn: %v", connErr) + } }() } diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go index 2914bd1a1..846d3815a 100644 --- a/libpod/oci_conmon_linux.go +++ b/libpod/oci_conmon_linux.go @@ -1064,6 +1064,30 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co if restoreOptions.TCPEstablished { args = append(args, "--runtime-opt", "--tcp-established") } + if restoreOptions.Pod != "" { + mountLabel := ctr.config.MountLabel + processLabel := ctr.config.ProcessLabel + if mountLabel != "" { + args = append( + args, + "--runtime-opt", + fmt.Sprintf( + "--lsm-mount-context=%s", + mountLabel, + ), + ) + } + if processLabel != "" { + args = append( + args, + "--runtime-opt", + fmt.Sprintf( + "--lsm-profile=selinux:%s", + processLabel, + ), + ) + } + } } logrus.WithFields(logrus.Fields{ diff --git a/libpod/options.go b/libpod/options.go index b12153512..17a36008d 100644 --- a/libpod/options.go +++ b/libpod/options.go @@ -16,6 +16,7 @@ import ( "github.com/containers/podman/v3/libpod/events" "github.com/containers/podman/v3/pkg/namespaces" "github.com/containers/podman/v3/pkg/rootless" + "github.com/containers/podman/v3/pkg/specgen" "github.com/containers/podman/v3/pkg/util" "github.com/containers/storage" "github.com/containers/storage/pkg/idtools" @@ -458,6 +459,19 @@ func WithDefaultInfraCommand(cmd string) RuntimeOption { } } +// WithDefaultInfraName sets the infra container name for a single pod. +func WithDefaultInfraName(name string) RuntimeOption { + return func(rt *Runtime) error { + if rt.valid { + return define.ErrRuntimeFinalized + } + + rt.config.Engine.InfraImage = name + + return nil + } +} + // WithRenumber instructs libpod to perform a lock renumbering while // initializing. This will handle migrations from early versions of libpod with // file locks to newer versions with SHM locking, as well as changes in the @@ -1786,6 +1800,19 @@ func WithInfraCommand(cmd []string) PodCreateOption { } } +// WithInfraName sets the infra container name for a single pod. +func WithInfraName(name string) PodCreateOption { + return func(pod *Pod) error { + if pod.valid { + return define.ErrPodFinalized + } + + pod.config.InfraContainer.InfraName = name + + return nil + } +} + // WithPodName sets the name of the pod. func WithPodName(name string) PodCreateOption { return func(pod *Pod) error { @@ -2397,3 +2424,22 @@ func WithPodCPUSetCPUs(inp string) PodCreateOption { return nil } } + +func WithPodPidNS(inp specgen.Namespace) PodCreateOption { + return func(p *Pod) error { + if p.valid { + return define.ErrPodFinalized + } + if p.config.UsePodPID { + switch inp.NSMode { + case "container": + return errors.Wrap(define.ErrInvalidArg, "Cannot take container in a different NS as an argument") + case "host": + p.config.UsePodPID = false + } + p.config.InfraContainer.PidNS = inp + } + + return nil + } +} diff --git a/libpod/pod.go b/libpod/pod.go index d7a9b15d9..62f5c9e5b 100644 --- a/libpod/pod.go +++ b/libpod/pod.go @@ -7,6 +7,7 @@ import ( "github.com/containers/podman/v3/libpod/define" "github.com/containers/podman/v3/libpod/lock" + "github.com/containers/podman/v3/pkg/specgen" "github.com/cri-o/ocicni/pkg/ocicni" "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" @@ -97,6 +98,7 @@ type InfraContainerConfig struct { HasInfraContainer bool `json:"makeInfraContainer"` NoNetwork bool `json:"noNetwork,omitempty"` HostNetwork bool `json:"infraHostNetwork,omitempty"` + PidNS specgen.Namespace `json:"infraPid,omitempty"` PortBindings []ocicni.PortMapping `json:"infraPortBindings"` StaticIP net.IP `json:"staticIP,omitempty"` StaticMAC net.HardwareAddr `json:"staticMAC,omitempty"` @@ -110,6 +112,7 @@ type InfraContainerConfig struct { ExitCommand []string `json:"exitCommand,omitempty"` InfraImage string `json:"infraImage,omitempty"` InfraCommand []string `json:"infraCommand,omitempty"` + InfraName string `json:"infraName,omitempty"` Slirp4netns bool `json:"slirp4netns,omitempty"` NetworkOptions map[string][]string `json:"network_options,omitempty"` ResourceLimits *specs.LinuxResources `json:"resource_limits,omitempty"` @@ -170,6 +173,11 @@ func (p *Pod) CPUQuota() int64 { return 0 } +// PidMode returns the PID mode given by the user ex: pod, private... +func (p *Pod) PidMode() string { + return string(p.config.InfraContainer.PidNS.NSMode) +} + // Labels returns the pod's labels func (p *Pod) Labels() map[string]string { labels := make(map[string]string) diff --git a/libpod/pod_api.go b/libpod/pod_api.go index d8f5d15f8..1ab012a8b 100644 --- a/libpod/pod_api.go +++ b/libpod/pod_api.go @@ -541,6 +541,7 @@ func (p *Pod) Inspect() (*define.InspectPodData, error) { infraConfig.CPUPeriod = p.CPUPeriod() infraConfig.CPUQuota = p.CPUQuota() infraConfig.CPUSetCPUs = p.ResourceLim().CPU.Cpus + infraConfig.PidNS = p.PidMode() if len(p.config.InfraContainer.DNSServer) > 0 { infraConfig.DNSServer = make([]string, 0, len(p.config.InfraContainer.DNSServer)) diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index 6c69d1b72..ce4c5d758 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -47,6 +47,32 @@ func (r *Runtime) NewContainer(ctx context.Context, rSpec *spec.Spec, options .. return r.newContainer(ctx, rSpec, options...) } +func (r *Runtime) PrepareVolumeOnCreateContainer(ctx context.Context, ctr *Container) error { + // Copy the content from the underlying image into the newly created + // volume if configured to do so. + if !r.config.Containers.PrepareVolumeOnCreate { + return nil + } + + defer func() { + if err := ctr.cleanupStorage(); err != nil { + logrus.Errorf("error cleaning up container storage %s: %v", ctr.ID(), err) + } + }() + + mountPoint, err := ctr.mountStorage() + if err == nil { + // Finish up mountStorage + ctr.state.Mounted = true + ctr.state.Mountpoint = mountPoint + if err = ctr.save(); err != nil { + logrus.Errorf("Error saving container %s state: %v", ctr.ID(), err) + } + } + + return err +} + // RestoreContainer re-creates a container from an imported checkpoint func (r *Runtime) RestoreContainer(ctx context.Context, rSpec *spec.Spec, config *ContainerConfig) (*Container, error) { r.lock.Lock() diff --git a/libpod/runtime_pod_infra_linux.go b/libpod/runtime_pod_infra_linux.go index 6b002f65a..d4f861118 100644 --- a/libpod/runtime_pod_infra_linux.go +++ b/libpod/runtime_pod_infra_linux.go @@ -145,6 +145,18 @@ func (r *Runtime) makeInfraContainer(ctx context.Context, p *Pod, imgName, rawIm if len(p.config.InfraContainer.ExitCommand) > 0 { options = append(options, WithExitCommand(p.config.InfraContainer.ExitCommand)) } + + if p.config.UsePodPID && p.config.InfraContainer.PidNS.NSMode != "host" { + g.AddOrReplaceLinuxNamespace(string(spec.LinuxNamespaceType("pid")), p.config.InfraContainer.PidNS.Value) + } else if p.config.InfraContainer.PidNS.NSMode == "host" { + newNS := []spec.LinuxNamespace{} + for _, entry := range g.Config.Linux.Namespaces { + if entry.Type != spec.LinuxNamespaceType("pid") { + newNS = append(newNS, entry) + } + } + g.Config.Linux.Namespaces = newNS + } } g.SetRootReadonly(true) g.SetProcessArgs(infraCtrCommand) @@ -189,7 +201,11 @@ func (r *Runtime) makeInfraContainer(ctx context.Context, p *Pod, imgName, rawIm g.AddLinuxSysctl(sysctlKey, sysctlVal) } - containerName := p.ID()[:IDTruncLength] + "-infra" + containerName := p.config.InfraContainer.InfraName + if containerName == "" { + containerName = p.ID()[:IDTruncLength] + "-infra" + } + logrus.Infof("Infra container name %s", containerName) options = append(options, r.WithPod(p)) options = append(options, WithRootFSFromImage(imgID, imgName, rawImageName)) options = append(options, WithName(containerName)) diff --git a/libpod/runtime_pod_linux.go b/libpod/runtime_pod_linux.go index 4ede23cac..fce3f38a7 100644 --- a/libpod/runtime_pod_linux.go +++ b/libpod/runtime_pod_linux.go @@ -116,6 +116,7 @@ func (r *Runtime) NewPod(ctx context.Context, options ...PodCreateOption) (_ *Po if pod.config.UsePodCgroup { logrus.Debugf("Got pod cgroup as %s", pod.state.CgroupPath) } + if !pod.HasInfraContainer() && pod.SharesNamespaces() { return nil, errors.Errorf("Pods must have an infra container to share namespaces") } |