diff options
-rwxr-xr-x | API.md | 2 | ||||
-rw-r--r-- | cmd/podman/common.go | 4 | ||||
-rw-r--r-- | cmd/podman/shared/create.go | 18 | ||||
-rw-r--r-- | cmd/podman/shared/intermediate.go | 1 | ||||
-rw-r--r-- | cmd/podman/varlink/io.podman.varlink | 9 | ||||
-rwxr-xr-x | contrib/cirrus/notice_branch_failure.sh | 2 | ||||
-rw-r--r-- | docs/podman-create.1.md | 8 | ||||
-rw-r--r-- | docs/podman-run.1.md | 8 | ||||
-rw-r--r-- | libpod/container_api.go | 9 | ||||
-rw-r--r-- | pkg/adapter/containers_remote.go | 1 | ||||
-rw-r--r-- | pkg/namespaces/namespaces.go | 57 | ||||
-rw-r--r-- | pkg/spec/config_linux.go | 41 | ||||
-rw-r--r-- | pkg/spec/createconfig.go | 19 | ||||
-rw-r--r-- | pkg/spec/spec.go | 27 | ||||
-rw-r--r-- | pkg/varlinkapi/containers.go | 54 | ||||
-rw-r--r-- | pkg/varlinkapi/util.go | 2 | ||||
-rw-r--r-- | test/e2e/run_device_test.go | 9 | ||||
-rw-r--r-- | test/e2e/run_ns_test.go | 9 |
18 files changed, 237 insertions, 43 deletions
@@ -1894,6 +1894,8 @@ pod [?bool](#?bool) quiet [?bool](#?bool) +size [?bool](#?bool) + sort [?string](#?string) sync [?bool](#?bool) diff --git a/cmd/podman/common.go b/cmd/podman/common.go index 15f753d55..1e9092bd6 100644 --- a/cmd/podman/common.go +++ b/cmd/podman/common.go @@ -130,6 +130,10 @@ func getCreateFlags(c *cliconfig.PodmanCommand) { "Drop capabilities from the container", ) createFlags.String( + "cgroupns", "host", + "cgroup namespace to use", + ) + createFlags.String( "cgroup-parent", "", "Optional parent cgroup for the container", ) diff --git a/cmd/podman/shared/create.go b/cmd/podman/shared/create.go index 5c37a1875..b14ce431d 100644 --- a/cmd/podman/shared/create.go +++ b/cmd/podman/shared/create.go @@ -400,11 +400,12 @@ func ParseCreateOpts(ctx context.Context, c *GenericCLIResults, runtime *libpod. namespaceNet = c.String("net") } namespaces = map[string]string{ - "pid": c.String("pid"), - "net": namespaceNet, - "ipc": c.String("ipc"), - "user": c.String("userns"), - "uts": c.String("uts"), + "cgroup": c.String("cgroupns"), + "pid": c.String("pid"), + "net": namespaceNet, + "ipc": c.String("ipc"), + "user": c.String("userns"), + "uts": c.String("uts"), } originalPodName := c.String("pod") @@ -462,6 +463,11 @@ func ParseCreateOpts(ctx context.Context, c *GenericCLIResults, runtime *libpod. return nil, errors.Errorf("--uts %q is not valid", namespaces["uts"]) } + cgroupMode := ns.CgroupMode(namespaces["cgroup"]) + if !cgroupMode.Valid() { + return nil, errors.Errorf("--cgroup %q is not valid", namespaces["cgroup"]) + } + ipcMode := ns.IpcMode(namespaces["ipc"]) if !cc.Valid(string(ipcMode), ipcMode) { return nil, errors.Errorf("--ipc %q is not valid", ipcMode) @@ -652,6 +658,7 @@ func ParseCreateOpts(ctx context.Context, c *GenericCLIResults, runtime *libpod. CapAdd: c.StringSlice("cap-add"), CapDrop: c.StringSlice("cap-drop"), CidFile: c.String("cidfile"), + Cgroupns: c.String("cgroupns"), CgroupParent: c.String("cgroup-parent"), Command: command, Detach: c.Bool("detach"), @@ -687,6 +694,7 @@ func ParseCreateOpts(ctx context.Context, c *GenericCLIResults, runtime *libpod. NetMode: netMode, UtsMode: utsMode, PidMode: pidMode, + CgroupMode: cgroupMode, Pod: podName, Privileged: c.Bool("privileged"), Publish: c.StringSlice("publish"), diff --git a/cmd/podman/shared/intermediate.go b/cmd/podman/shared/intermediate.go index 8337dc647..4062ac48a 100644 --- a/cmd/podman/shared/intermediate.go +++ b/cmd/podman/shared/intermediate.go @@ -370,6 +370,7 @@ func NewIntermediateLayer(c *cliconfig.PodmanCommand, remote bool) GenericCLIRes m["blkio-weight-device"] = newCRStringSlice(c, "blkio-weight-device") m["cap-add"] = newCRStringSlice(c, "cap-add") m["cap-drop"] = newCRStringSlice(c, "cap-drop") + m["cgroupns"] = newCRString(c, "cgroupns") m["cgroup-parent"] = newCRString(c, "cgroup-parent") m["cidfile"] = newCRString(c, "cidfile") m["conmon-pidfile"] = newCRString(c, "conmon-pidfile") diff --git a/cmd/podman/varlink/io.podman.varlink b/cmd/podman/varlink/io.podman.varlink index 9410b9459..0bf236b77 100644 --- a/cmd/podman/varlink/io.podman.varlink +++ b/cmd/podman/varlink/io.podman.varlink @@ -144,10 +144,11 @@ type PsOpts ( last: ?int, latest: ?bool, noTrunc: ?bool, - pod: ?bool, - quiet: ?bool, - sort: ?string, - sync: ?bool + pod: ?bool, + quiet: ?bool, + size: ?bool, + sort: ?string, + sync: ?bool ) type PsContainer ( diff --git a/contrib/cirrus/notice_branch_failure.sh b/contrib/cirrus/notice_branch_failure.sh index 423231dfd..f030c12e5 100755 --- a/contrib/cirrus/notice_branch_failure.sh +++ b/contrib/cirrus/notice_branch_failure.sh @@ -12,7 +12,7 @@ NOR="$(echo -n -e '\x0f')" if [[ "$CIRRUS_BRANCH" = "$DEST_BRANCH" ]] then BURL="https://cirrus-ci.com/build/$CIRRUS_BUILD_ID" - ircmsg "${RED}[Action Recommended]: ${NOR}Post-merge testing on ${RED}$CIRRUS_BRANCH failed${NOR} in $CIRRUS_TASK_NAME on $(OS_RELEASE_ID)-$(OS_RELEASE_VER): $BURL. Please investigate, and re-run if appropriate." + ircmsg "${RED}[Action Recommended]: ${NOR}Post-merge testing on ${RED}$CIRRUS_BRANCH failed${NOR} in $CIRRUS_TASK_NAME on ${OS_RELEASE_ID}-${OS_RELEASE_VER}: $BURL. Please investigate, and re-run if appropriate." fi # This script assumed to be executed on failure diff --git a/docs/podman-create.1.md b/docs/podman-create.1.md index b9380f434..4008b64e6 100644 --- a/docs/podman-create.1.md +++ b/docs/podman-create.1.md @@ -63,6 +63,14 @@ Add Linux capabilities Drop Linux capabilities +**--cgroupns**=*mode* + +Set the cgroup namespace mode for the container, by default **host** is used. + **host**: use the host's cgroup namespace inside the container. + **container:<NAME|ID>**: join the namespace of the specified container. + **private**: create a new cgroup namespace. + **ns:<PATH>**: join the namespace at the specified path. + **--cgroup-parent**=*path* Path to cgroups under which the cgroup for the container will be created. If the path is not absolute, the path is considered to be relative to the cgroups path of the init process. Cgroups will be created if they do not already exist. diff --git a/docs/podman-run.1.md b/docs/podman-run.1.md index 035820b3f..6d95d6045 100644 --- a/docs/podman-run.1.md +++ b/docs/podman-run.1.md @@ -77,6 +77,14 @@ Add Linux capabilities Drop Linux capabilities +**--cgroupns**=*mode* + +Set the cgroup namespace mode for the container, by default **host** is used. + **host**: use the host's cgroup namespace inside the container. + **container:<NAME|ID>**: join the namespace of the specified container. + **private**: create a new cgroup namespace. + **ns:<PATH>**: join the namespace at the specified path. + **--cgroup-parent**=*cgroup* Path to cgroups under which the cgroup for the container will be created. If the path is not absolute, the path is considered to be relative to the cgroups path of the init process. Cgroups will be created if they do not already exist. diff --git a/libpod/container_api.go b/libpod/container_api.go index 3577b8e8c..ae181887e 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -305,6 +305,11 @@ func (c *Container) Exec(tty, privileged bool, env, cmd []string, user, workDir if err != nil { if exited { // If the runtime exited, propagate the error we got from the process. + // We need to remove PID files to ensure no memory leaks + if err2 := os.Remove(pidFile); err2 != nil { + logrus.Errorf("Error removing exit file for container %s exec session %s: %v", c.ID(), sessionID, err2) + } + return err } return errors.Wrapf(err, "timed out waiting for runtime to create pidfile for exec session in container %s", c.ID()) @@ -312,6 +317,10 @@ func (c *Container) Exec(tty, privileged bool, env, cmd []string, user, workDir // Pidfile exists, read it contents, err := ioutil.ReadFile(pidFile) + // We need to remove PID files to ensure no memory leaks + if err2 := os.Remove(pidFile); err2 != nil { + logrus.Errorf("Error removing exit file for container %s exec session %s: %v", c.ID(), sessionID, err2) + } if err != nil { // We don't know the PID of the exec session // However, it may still be alive diff --git a/pkg/adapter/containers_remote.go b/pkg/adapter/containers_remote.go index 5836d0788..c816fd980 100644 --- a/pkg/adapter/containers_remote.go +++ b/pkg/adapter/containers_remote.go @@ -493,6 +493,7 @@ func (r *LocalRuntime) Ps(c *cliconfig.PsValues, opts shared.PsOptions) ([]share NoTrunc: &c.NoTrunct, Pod: &c.Pod, Quiet: &c.Quiet, + Size: &c.Size, Sort: &c.Sort, Sync: &c.Sync, } diff --git a/pkg/namespaces/namespaces.go b/pkg/namespaces/namespaces.go index ec9276344..7ed95bd0f 100644 --- a/pkg/namespaces/namespaces.go +++ b/pkg/namespaces/namespaces.go @@ -4,6 +4,63 @@ import ( "strings" ) +// CgroupMode represents cgroup mode in the container. +type CgroupMode string + +// IsHost indicates whether the container uses the host's cgroup. +func (n CgroupMode) IsHost() bool { + return n == "host" +} + +// IsNS indicates a cgroup namespace passed in by path (ns:<path>) +func (n CgroupMode) IsNS() bool { + return strings.HasPrefix(string(n), "ns:") +} + +// NS gets the path associated with a ns:<path> cgroup ns +func (n CgroupMode) NS() string { + parts := strings.SplitN(string(n), ":", 2) + if len(parts) > 1 { + return parts[1] + } + return "" +} + +// IsContainer indicates whether the container uses a new cgroup namespace. +func (n CgroupMode) IsContainer() bool { + parts := strings.SplitN(string(n), ":", 2) + return len(parts) > 1 && parts[0] == "container" +} + +// Container returns the name of the container whose cgroup namespace is going to be used. +func (n CgroupMode) Container() string { + parts := strings.SplitN(string(n), ":", 2) + if len(parts) > 1 { + return parts[1] + } + return "" +} + +// IsPrivate indicates whether the container uses the a private cgroup. +func (n CgroupMode) IsPrivate() bool { + return n == "private" +} + +// Valid indicates whether the Cgroup namespace is valid. +func (n CgroupMode) Valid() bool { + parts := strings.Split(string(n), ":") + switch mode := parts[0]; mode { + case "", "host", "private", "ns": + case "container": + if len(parts) != 2 || parts[1] == "" { + return false + } + default: + return false + } + return true +} + // UsernsMode represents userns mode in the container. type UsernsMode string diff --git a/pkg/spec/config_linux.go b/pkg/spec/config_linux.go index 9f6a4a058..a84e9a72f 100644 --- a/pkg/spec/config_linux.go +++ b/pkg/spec/config_linux.go @@ -8,6 +8,7 @@ import ( "path/filepath" "strings" + "github.com/containers/libpod/pkg/rootless" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/devices" spec "github.com/opencontainers/runtime-spec/specs-go" @@ -118,8 +119,44 @@ func (c *CreateConfig) addPrivilegedDevices(g *generate.Generator) error { return err } g.ClearLinuxDevices() - for _, d := range hostDevices { - g.AddDevice(Device(d)) + + if rootless.IsRootless() { + mounts := make(map[string]interface{}) + for _, m := range g.Mounts() { + mounts[m.Destination] = true + } + newMounts := []spec.Mount{} + for _, d := range hostDevices { + devMnt := spec.Mount{ + Destination: d.Path, + Type: TypeBind, + Source: d.Path, + Options: []string{"slave", "nosuid", "noexec", "rw", "rbind"}, + } + if d.Path == "/dev/ptmx" || strings.HasPrefix(d.Path, "/dev/tty") { + continue + } + if _, found := mounts[d.Path]; found { + continue + } + st, err := os.Stat(d.Path) + if err != nil { + if err == unix.EPERM { + continue + } + return errors.Wrapf(err, "stat %s", d.Path) + } + // Skip devices that the user has not access to. + if st.Mode()&0007 == 0 { + continue + } + newMounts = append(newMounts, devMnt) + } + g.Config.Mounts = append(newMounts, g.Config.Mounts...) + } else { + for _, d := range hostDevices { + g.AddDevice(Device(d)) + } } // Add resources device - need to clear the existing one first. diff --git a/pkg/spec/createconfig.go b/pkg/spec/createconfig.go index 0042ed401..1fb1f829b 100644 --- a/pkg/spec/createconfig.go +++ b/pkg/spec/createconfig.go @@ -63,6 +63,7 @@ type CreateConfig struct { CapDrop []string // cap-drop CidFile string ConmonPidFile string + Cgroupns string CgroupParent string // cgroup-parent Command []string Detach bool // detach @@ -101,6 +102,7 @@ type CreateConfig struct { NetworkAlias []string //network-alias PidMode namespaces.PidMode //pid Pod string //pod + CgroupMode namespaces.CgroupMode //cgroup PortBindings nat.PortMap Privileged bool //privileged Publish []string //publish @@ -268,6 +270,23 @@ func (c *CreateConfig) getContainerCreateOptions(runtime *libpod.Runtime, pod *l options = append(options, libpod.WithNetNS(portBindings, postConfigureNetNS, string(c.NetMode), networks)) } + if c.CgroupMode.IsNS() { + ns := c.CgroupMode.NS() + if ns == "" { + return nil, errors.Errorf("invalid empty user-defined network namespace") + } + _, err := os.Stat(ns) + if err != nil { + return nil, err + } + } else if c.CgroupMode.IsContainer() { + connectedCtr, err := runtime.LookupContainer(c.CgroupMode.Container()) + if err != nil { + return nil, errors.Wrapf(err, "container %q not found", c.CgroupMode.Container()) + } + options = append(options, libpod.WithCgroupNSFrom(connectedCtr)) + } + if c.PidMode.IsContainer() { connectedCtr, err := runtime.LookupContainer(c.PidMode.Container()) if err != nil { diff --git a/pkg/spec/spec.go b/pkg/spec/spec.go index 41054633f..824c99025 100644 --- a/pkg/spec/spec.go +++ b/pkg/spec/spec.go @@ -264,10 +264,8 @@ func (config *CreateConfig) createConfigToOCISpec(runtime *libpod.Runtime, userM // If privileged, we need to add all the host devices to the // spec. We do not add the user provided ones because we are // already adding them all. - if !rootless.IsRootless() { - if err := config.AddPrivilegedDevices(&g); err != nil { - return nil, err - } + if err := config.AddPrivilegedDevices(&g); err != nil { + return nil, err } } else { for _, devicePath := range config.Devices { @@ -325,6 +323,10 @@ func (config *CreateConfig) createConfigToOCISpec(runtime *libpod.Runtime, userM if err := addIpcNS(config, &g); err != nil { return nil, err } + + if err := addCgroupNS(config, &g); err != nil { + return nil, err + } configSpec := g.Config // HANDLE CAPABILITIES @@ -622,6 +624,23 @@ func addIpcNS(config *CreateConfig, g *generate.Generator) error { return nil } +func addCgroupNS(config *CreateConfig, g *generate.Generator) error { + cgroupMode := config.CgroupMode + if cgroupMode.IsNS() { + return g.AddOrReplaceLinuxNamespace(string(spec.CgroupNamespace), NS(string(cgroupMode))) + } + if cgroupMode.IsHost() { + return g.RemoveLinuxNamespace(spec.CgroupNamespace) + } + if cgroupMode.IsPrivate() { + return g.AddOrReplaceLinuxNamespace(spec.CgroupNamespace, "") + } + if cgroupMode.IsContainer() { + logrus.Debug("Using container cgroup mode") + } + return nil +} + func addRlimits(config *CreateConfig, g *generate.Generator) error { var ( kernelMax uint64 = 1048576 diff --git a/pkg/varlinkapi/containers.go b/pkg/varlinkapi/containers.go index 700e02b0c..6f6909fac 100644 --- a/pkg/varlinkapi/containers.go +++ b/pkg/varlinkapi/containers.go @@ -65,32 +65,34 @@ func (i *LibpodAPI) Ps(call iopodman.VarlinkCall, opts iopodman.PsOpts) error { for _, ctr := range psContainerOutputs { container := iopodman.PsContainer{ - Id: ctr.ID, - Image: ctr.Image, - Command: ctr.Command, - Created: ctr.Created, - Ports: ctr.Ports, - Names: ctr.Names, - IsInfra: ctr.IsInfra, - Status: ctr.Status, - State: ctr.State.String(), - PidNum: int64(ctr.Pid), - RootFsSize: ctr.Size.RootFsSize, - RwSize: ctr.Size.RwSize, - Pod: ctr.Pod, - CreatedAt: ctr.CreatedAt.Format(time.RFC3339Nano), - ExitedAt: ctr.ExitedAt.Format(time.RFC3339Nano), - StartedAt: ctr.StartedAt.Format(time.RFC3339Nano), - Labels: ctr.Labels, - NsPid: ctr.PID, - Cgroup: ctr.Cgroup, - Ipc: ctr.Cgroup, - Mnt: ctr.MNT, - Net: ctr.NET, - PidNs: ctr.PIDNS, - User: ctr.User, - Uts: ctr.UTS, - Mounts: ctr.Mounts, + Id: ctr.ID, + Image: ctr.Image, + Command: ctr.Command, + Created: ctr.Created, + Ports: ctr.Ports, + Names: ctr.Names, + IsInfra: ctr.IsInfra, + Status: ctr.Status, + State: ctr.State.String(), + PidNum: int64(ctr.Pid), + Pod: ctr.Pod, + CreatedAt: ctr.CreatedAt.Format(time.RFC3339Nano), + ExitedAt: ctr.ExitedAt.Format(time.RFC3339Nano), + StartedAt: ctr.StartedAt.Format(time.RFC3339Nano), + Labels: ctr.Labels, + NsPid: ctr.PID, + Cgroup: ctr.Cgroup, + Ipc: ctr.Cgroup, + Mnt: ctr.MNT, + Net: ctr.NET, + PidNs: ctr.PIDNS, + User: ctr.User, + Uts: ctr.UTS, + Mounts: ctr.Mounts, + } + if ctr.Size != nil { + container.RootFsSize = ctr.Size.RootFsSize + container.RwSize = ctr.Size.RwSize } containers = append(containers, container) } diff --git a/pkg/varlinkapi/util.go b/pkg/varlinkapi/util.go index e8f74e6aa..d3a41f7ab 100644 --- a/pkg/varlinkapi/util.go +++ b/pkg/varlinkapi/util.go @@ -191,7 +191,7 @@ func makePsOpts(inOpts iopodman.PsOpts) shared.PsOptions { Latest: derefBool(inOpts.Latest), NoTrunc: derefBool(inOpts.NoTrunc), Pod: derefBool(inOpts.Pod), - Size: true, + Size: derefBool(inOpts.Size), Sort: derefString(inOpts.Sort), Namespace: true, Sync: derefBool(inOpts.Sync), diff --git a/test/e2e/run_device_test.go b/test/e2e/run_device_test.go index 2e537a9f9..cf7ce9cdf 100644 --- a/test/e2e/run_device_test.go +++ b/test/e2e/run_device_test.go @@ -85,4 +85,13 @@ var _ = Describe("Podman run device", func() { session.WaitWithDefaultTimeout() Expect(session.ExitCode()).To(Equal(0)) }) + + It("podman run device host device with --privileged", func() { + if _, err := os.Stat("/dev/kvm"); err != nil { + Skip("/dev/kvm not available") + } + session := podmanTest.Podman([]string{"run", "--privileged", ALPINE, "ls", "/dev/kvm"}) + session.WaitWithDefaultTimeout() + Expect(session.ExitCode()).To(Equal(0)) + }) }) diff --git a/test/e2e/run_ns_test.go b/test/e2e/run_ns_test.go index 6ba0d1aba..e3e86fc66 100644 --- a/test/e2e/run_ns_test.go +++ b/test/e2e/run_ns_test.go @@ -51,6 +51,15 @@ var _ = Describe("Podman run ns", func() { Expect(session.ExitCode()).To(Not(Equal(0))) }) + It("podman run --cgroup private test", func() { + session := podmanTest.Podman([]string{"run", "--cgroupns=private", fedoraMinimal, "cat", "/proc/self/cgroup"}) + session.WaitWithDefaultTimeout() + Expect(session.ExitCode()).To(Equal(0)) + + output := session.OutputToString() + Expect(output).ToNot(ContainSubstring("slice")) + }) + It("podman run ipcns test", func() { setup := SystemExec("ls", []string{"--inode", "-d", "/dev/shm"}) Expect(setup.ExitCode()).To(Equal(0)) |