diff options
author | Matthew Heon <mheon@redhat.com> | 2020-05-08 17:41:50 -0400 |
---|---|---|
committer | Matthew Heon <mheon@redhat.com> | 2020-05-08 18:00:42 -0400 |
commit | c57c560d9014b38040df8529f09208ba7743f794 (patch) | |
tree | 94a9db7cd4402b0778071ff3fc3daa0487dbc0b8 | |
parent | 7f8b31f5fb59d8b66a26dcd822863fbc18687905 (diff) | |
download | podman-c57c560d9014b38040df8529f09208ba7743f794.tar.gz podman-c57c560d9014b38040df8529f09208ba7743f794.tar.bz2 podman-c57c560d9014b38040df8529f09208ba7743f794.zip |
Fix bug where pods would unintentionally share cgroupns
This one was a massive pain to track down.
The original symptom was an error message from rootless Podman
trying to make a container in a pod. I unfortunately did not look
at the error message closely enough to realize that the namespace
in question was the cgroup namespace (the reproducer pod was
explicitly set to only share the network namespace), else this
would have been quite a bit shorter.
I spent considerable effort trying to track down differences
between the inspect output of the two containers, and when that
failed I was forced to resort to diffing the OCI specs. That
finally proved fruitful, and I was able to determine what should
have been obvious all along: the container was joining the cgroup
namespace of the infra container when it really ought not to
have.
From there, I discovered a variable collision in pod config. The
UsePodCgroup variable means "create a parent cgroup for the pod
and join containers in the pod to it". Unfortunately, it is very
similar to UsePodUTS, UsePodNet, etc, which mean "the pod shares
this namespace", so an accessor was accidentally added for it
that indicated the pod shared the cgroup namespace when it really
did not. Once I realized that, it was a quick fix - add a bool to
the pod's configuration to indicate whether the cgroup ns was
shared (distinct from UsePodCgroup) and use that for the
accessor.
Also included are fixes for `podman inspect` and
`podman pod inspect` that fix them to actually display the state
of the cgroup namespace (for container inspect) and what
namespaces are shared (for pod inspect). Either of those would
have made tracking this down considerably quicker.
Fixes #6149
Signed-off-by: Matthew Heon <mheon@redhat.com>
-rw-r--r-- | libpod/container_inspect.go | 24 | ||||
-rw-r--r-- | libpod/define/container_inspect.go | 7 | ||||
-rw-r--r-- | libpod/options.go | 16 | ||||
-rw-r--r-- | libpod/pod.go | 15 | ||||
-rw-r--r-- | libpod/pod_api.go | 20 |
5 files changed, 74 insertions, 8 deletions
diff --git a/libpod/container_inspect.go b/libpod/container_inspect.go index ae28dde94..a42b50609 100644 --- a/libpod/container_inspect.go +++ b/libpod/container_inspect.go @@ -698,6 +698,30 @@ func (c *Container) generateInspectContainerHostConfig(ctrSpec *spec.Spec, named } hostConfig.IpcMode = ipcMode + // Cgroup namespace mode + cgroupMode := "" + if c.config.CgroupNsCtr != "" { + cgroupMode = fmt.Sprintf("container:%s", c.config.CgroupNsCtr) + } else if ctrSpec.Linux != nil { + // Locate the spec's cgroup namespace + // If there is none, it's cgroup=host. + // If there is one and it has a path, it's "ns:". + // If there is no path, it's private. + for _, ns := range ctrSpec.Linux.Namespaces { + if ns.Type == spec.CgroupNamespace { + if ns.Path != "" { + cgroupMode = fmt.Sprintf("ns:%s", ns.Path) + } else { + cgroupMode = "private" + } + } + } + if cgroupMode == "" { + cgroupMode = "host" + } + } + hostConfig.CgroupMode = cgroupMode + // CGroup parent // Need to check if it's the default, and not print if so. defaultCgroupParent := "" diff --git a/libpod/define/container_inspect.go b/libpod/define/container_inspect.go index e6a19e5b4..27ada8706 100644 --- a/libpod/define/container_inspect.go +++ b/libpod/define/container_inspect.go @@ -228,6 +228,13 @@ type InspectContainerHostConfig struct { // include a Mounts field in inspect. // Format: <src>:<destination>[:<comma-separated options>] Binds []string `json:"Binds"` + // CgroupMode is the configuration of the container's cgroup namespace. + // Populated as follows: + // private - a cgroup namespace has been created + // host - No cgroup namespace created + // container:<id> - Using another container's cgroup namespace + // ns:<path> - A path to a cgroup namespace has been specified + CgroupMode string `json:"CgroupMode"` // ContainerIDFile is a file created during container creation to hold // the ID of the created container. // This is not handled within libpod and is stored in an annotation. diff --git a/libpod/options.go b/libpod/options.go index 33b423bce..05241baf3 100644 --- a/libpod/options.go +++ b/libpod/options.go @@ -1692,6 +1692,22 @@ func WithPodUTS() PodCreateOption { } } +// WithPodCgroup tells containers in this pod to use the cgroup namespace +// created for this pod. +// Containers in a pod will inherit the kernel namespaces from the first +// container added. +func WithPodCgroup() PodCreateOption { + return func(pod *Pod) error { + if pod.valid { + return define.ErrPodFinalized + } + + pod.config.UsePodCgroupNS = true + + return nil + } +} + // WithInfraContainer tells the pod to create a pause container func WithInfraContainer() PodCreateOption { return func(pod *Pod) error { diff --git a/libpod/pod.go b/libpod/pod.go index 8eb06ae2f..34ceef5ef 100644 --- a/libpod/pod.go +++ b/libpod/pod.go @@ -51,12 +51,13 @@ type PodConfig struct { // The following UsePod{kernelNamespace} indicate whether the containers // in the pod will inherit the namespace from the first container in the pod. - UsePodPID bool `json:"sharesPid,omitempty"` - UsePodIPC bool `json:"sharesIpc,omitempty"` - UsePodNet bool `json:"sharesNet,omitempty"` - UsePodMount bool `json:"sharesMnt,omitempty"` - UsePodUser bool `json:"sharesUser,omitempty"` - UsePodUTS bool `json:"sharesUts,omitempty"` + UsePodPID bool `json:"sharesPid,omitempty"` + UsePodIPC bool `json:"sharesIpc,omitempty"` + UsePodNet bool `json:"sharesNet,omitempty"` + UsePodMount bool `json:"sharesMnt,omitempty"` + UsePodUser bool `json:"sharesUser,omitempty"` + UsePodUTS bool `json:"sharesUts,omitempty"` + UsePodCgroupNS bool `json:"sharesCgroupNS,omitempty"` InfraContainer *InfraContainerConfig `json:"infraConfig"` @@ -167,7 +168,7 @@ func (p *Pod) SharesUTS() bool { // SharesCgroup returns whether containers in the pod will default to this pod's // cgroup instead of the default libpod parent func (p *Pod) SharesCgroup() bool { - return p.config.UsePodCgroup + return p.config.UsePodCgroupNS } // CgroupPath returns the path to the pod's CGroup diff --git a/libpod/pod_api.go b/libpod/pod_api.go index 45aa5cb8d..0be9f2573 100644 --- a/libpod/pod_api.go +++ b/libpod/pod_api.go @@ -466,6 +466,24 @@ func (p *Pod) Inspect() (*define.InspectPodData, error) { if err != nil { return nil, err } + + namespaces := map[string]bool{ + "pid": p.config.UsePodPID, + "ipc": p.config.UsePodIPC, + "net": p.config.UsePodNet, + "mount": p.config.UsePodMount, + "user": p.config.UsePodUser, + "uts": p.config.UsePodUTS, + "cgroup": p.config.UsePodCgroupNS, + } + + sharesNS := []string{} + for nsStr, include := range namespaces { + if include { + sharesNS = append(sharesNS, nsStr) + } + } + inspectData := define.InspectPodData{ ID: p.ID(), Name: p.Name(), @@ -480,7 +498,7 @@ func (p *Pod) Inspect() (*define.InspectPodData, error) { CreateInfra: false, InfraContainerID: p.state.InfraContainerID, InfraConfig: nil, - SharedNamespaces: nil, + SharedNamespaces: sharesNS, NumContainers: uint(len(containers)), Containers: ctrs, } |