From ad63aa20db09b3d4b8f9d5312a0022e50c417341 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Thu, 8 Sep 2022 16:23:37 +0200 Subject: kube: plug HostUsers in the pod spec map HostUsers=false to userns=auto. One difference with the current implementation in the Kubelet is that the podman default size is 1024 while the Kubelet uses 65536. This is done on purpose, because 65536 is a problem for rootless as the entire IDs space would be allocated to a single pod. Signed-off-by: Giuseppe Scrivano --- pkg/domain/infra/abi/play.go | 5 +++++ pkg/k8s.io/api/core/v1/types.go | 12 ++++++++++++ test/e2e/play_kube_test.go | 33 +++++++++++++++++++++++++++++++-- 3 files changed, 48 insertions(+), 2 deletions(-) diff --git a/pkg/domain/infra/abi/play.go b/pkg/domain/infra/abi/play.go index 12786afcd..57d795682 100644 --- a/pkg/domain/infra/abi/play.go +++ b/pkg/domain/infra/abi/play.go @@ -355,6 +355,11 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY if options.Userns == "" { options.Userns = "host" + if podYAML.Spec.HostUsers != nil && !*podYAML.Spec.HostUsers { + options.Userns = "auto" + } + } else if podYAML.Spec.HostUsers != nil { + logrus.Info("overriding the user namespace mode in the pod spec") } // Validate the userns modes supported. diff --git a/pkg/k8s.io/api/core/v1/types.go b/pkg/k8s.io/api/core/v1/types.go index d47178878..6f20cd351 100644 --- a/pkg/k8s.io/api/core/v1/types.go +++ b/pkg/k8s.io/api/core/v1/types.go @@ -1984,6 +1984,18 @@ type PodSpec struct { // Default to false. // +optional SetHostnameAsFQDN *bool `json:"setHostnameAsFQDN,omitempty"` + // Use the host's user namespace. + // Optional: Default to true. + // If set to true or not present, the pod will be run in the host user namespace, useful + // for when the pod needs a feature only available to the host user namespace, such as + // loading a kernel module with CAP_SYS_MODULE. + // When set to false, a new userns is created for the pod. Setting false is useful for + // mitigating container breakout vulnerabilities even allowing users to run their + // containers as root without actually having root privileges on the host. + // This field is alpha-level and is only honored by servers that enable the UserNamespacesSupport feature. + // +k8s:conversion-gen=false + // +optional + HostUsers *bool `json:"hostUsers,omitempty"` } type UnsatisfiableConstraintAction string diff --git a/test/e2e/play_kube_test.go b/test/e2e/play_kube_test.go index d1eb960cd..baa74cb51 100644 --- a/test/e2e/play_kube_test.go +++ b/test/e2e/play_kube_test.go @@ -380,6 +380,9 @@ spec: restartPolicy: {{ .RestartPolicy }} hostname: {{ .Hostname }} hostNetwork: {{ .HostNetwork }} +{{ if .HostUsers }} + hostUsers: {{ .HostUsers }} +{{ end }} hostAliases: {{ range .HostAliases }} - hostnames: @@ -844,6 +847,7 @@ type Pod struct { RestartPolicy string Hostname string HostNetwork bool + HostUsers *bool HostAliases []HostAlias Ctrs []*Ctr InitCtrs []*Ctr @@ -968,6 +972,12 @@ func withHostNetwork() podOption { } } +func withHostUsers(val bool) podOption { + return func(pod *Pod) { + pod.HostUsers = &val + } +} + // Deployment describes the options a kube yaml can be configured at deployment level type Deployment struct { Name string @@ -3783,8 +3793,7 @@ ENV OPENJ9_JAVA_OPTIONS=%q Expect((inspect.InspectContainerToJSON()[0]).HostConfig.LogConfig.Tag).To(Equal("{{.ImageName}}")) }) - // Check that --userns=auto creates a user namespace - It("podman play kube --userns=auto", func() { + It("podman play kube using a user namespace", func() { u, err := user.Current() Expect(err).To(BeNil()) name := u.Name @@ -3831,6 +3840,26 @@ ENV OPENJ9_JAVA_OPTIONS=%q usernsInCtr.WaitWithDefaultTimeout() Expect(usernsInCtr).Should(Exit(0)) Expect(string(usernsInCtr.Out.Contents())).To(Not(Equal(string(initialUsernsConfig)))) + + // Now try with hostUsers in the pod spec + for _, hostUsers := range []bool{true, false} { + pod = getPod(withHostUsers(hostUsers)) + err = generateKubeYaml("pod", pod, kubeYaml) + Expect(err).To(BeNil()) + + kube = podmanTest.PodmanNoCache([]string{"play", "kube", "--replace", kubeYaml}) + kube.WaitWithDefaultTimeout() + Expect(kube).Should(Exit(0)) + + usernsInCtr = podmanTest.Podman([]string{"exec", getCtrNameInPod(pod), "cat", "/proc/self/uid_map"}) + usernsInCtr.WaitWithDefaultTimeout() + Expect(usernsInCtr).Should(Exit(0)) + if hostUsers { + Expect(string(usernsInCtr.Out.Contents())).To(Equal(string(initialUsernsConfig))) + } else { + Expect(string(usernsInCtr.Out.Contents())).To(Not(Equal(string(initialUsernsConfig)))) + } + } }) // Check the block devices are exposed inside container -- cgit v1.2.3-54-g00ecf