diff options
Diffstat (limited to 'pkg/specgen/generate')
-rw-r--r-- | pkg/specgen/generate/config_linux.go | 14 | ||||
-rw-r--r-- | pkg/specgen/generate/container_create.go | 31 | ||||
-rw-r--r-- | pkg/specgen/generate/kube/kube.go | 326 | ||||
-rw-r--r-- | pkg/specgen/generate/kube/play_test.go | 172 | ||||
-rw-r--r-- | pkg/specgen/generate/kube/seccomp.go | 84 | ||||
-rw-r--r-- | pkg/specgen/generate/kube/volume.go | 124 | ||||
-rw-r--r-- | pkg/specgen/generate/namespaces.go | 13 | ||||
-rw-r--r-- | pkg/specgen/generate/security.go | 10 | ||||
-rw-r--r-- | pkg/specgen/generate/storage.go | 61 | ||||
-rw-r--r-- | pkg/specgen/generate/validate.go | 65 |
10 files changed, 858 insertions, 42 deletions
diff --git a/pkg/specgen/generate/config_linux.go b/pkg/specgen/generate/config_linux.go index fac02ad01..2d40dba8f 100644 --- a/pkg/specgen/generate/config_linux.go +++ b/pkg/specgen/generate/config_linux.go @@ -52,7 +52,7 @@ func addPrivilegedDevices(g *generate.Generator) error { if err == unix.EPERM { continue } - return errors.Wrapf(err, "stat %s", d.Path) + return err } // Skip devices that the user has not access to. if st.Mode()&0007 == 0 { @@ -90,7 +90,7 @@ func DevicesFromPath(g *generate.Generator, devicePath string) error { } st, err := os.Stat(resolvedDevicePath) if err != nil { - return errors.Wrapf(err, "cannot stat device path %s", devicePath) + return err } if st.IsDir() { found := false @@ -231,10 +231,7 @@ func addDevice(g *generate.Generator, device string) error { } if rootless.IsRootless() { if _, err := os.Stat(src); err != nil { - if os.IsNotExist(err) { - return errors.Wrapf(err, "the specified device %s doesn't exist", src) - } - return errors.Wrapf(err, "stat device %s exist", src) + return err } perm := "ro" if strings.Contains(permissions, "w") { @@ -353,3 +350,8 @@ func deviceFromPath(path string) (*spec.LinuxDevice, error) { Minor: int64(unix.Minor(devNumber)), }, nil } + +func supportAmbientCapabilities() bool { + err := unix.Prctl(unix.PR_CAP_AMBIENT, unix.PR_CAP_AMBIENT_IS_SET, 0, 0, 0) + return err == nil +} diff --git a/pkg/specgen/generate/container_create.go b/pkg/specgen/generate/container_create.go index f051537de..45a374216 100644 --- a/pkg/specgen/generate/container_create.go +++ b/pkg/specgen/generate/container_create.go @@ -111,7 +111,7 @@ func MakeContainer(ctx context.Context, rt *libpod.Runtime, s *specgen.SpecGener return nil, errors.Wrap(err, "invalid config provided") } - finalMounts, finalVolumes, err := finalizeMounts(ctx, s, rt, rtc, newImage) + finalMounts, finalVolumes, finalOverlays, err := finalizeMounts(ctx, s, rt, rtc, newImage) if err != nil { return nil, err } @@ -121,7 +121,7 @@ func MakeContainer(ctx context.Context, rt *libpod.Runtime, s *specgen.SpecGener return nil, err } - opts, err := createContainerOptions(ctx, rt, s, pod, finalVolumes, newImage, command) + opts, err := createContainerOptions(ctx, rt, s, pod, finalVolumes, finalOverlays, newImage, command) if err != nil { return nil, err } @@ -133,6 +133,10 @@ func MakeContainer(ctx context.Context, rt *libpod.Runtime, s *specgen.SpecGener } options = append(options, libpod.WithExitCommand(exitCommandArgs)) + if len(s.Aliases) > 0 { + options = append(options, libpod.WithNetworkAliases(s.Aliases)) + } + runtimeSpec, err := SpecGenToOCI(ctx, s, rt, rtc, newImage, finalMounts, pod, command) if err != nil { return nil, err @@ -140,7 +144,7 @@ func MakeContainer(ctx context.Context, rt *libpod.Runtime, s *specgen.SpecGener return rt.NewContainer(ctx, runtimeSpec, options...) } -func createContainerOptions(ctx context.Context, rt *libpod.Runtime, s *specgen.SpecGenerator, pod *libpod.Pod, volumes []*specgen.NamedVolume, img *image.Image, command []string) ([]libpod.CtrCreateOption, error) { +func createContainerOptions(ctx context.Context, rt *libpod.Runtime, s *specgen.SpecGenerator, pod *libpod.Pod, volumes []*specgen.NamedVolume, overlays []*specgen.OverlayVolume, img *image.Image, command []string) ([]libpod.CtrCreateOption, error) { var options []libpod.CtrCreateOption var err error @@ -220,9 +224,12 @@ func createContainerOptions(ctx context.Context, rt *libpod.Runtime, s *specgen. for _, volume := range volumes { destinations = append(destinations, volume.Dest) } - for _, overlayVolume := range s.OverlayVolumes { + for _, overlayVolume := range overlays { destinations = append(destinations, overlayVolume.Destination) } + for _, imageVolume := range s.ImageVolumes { + destinations = append(destinations, imageVolume.Destination) + } options = append(options, libpod.WithUserVolumes(destinations)) if len(volumes) != 0 { @@ -237,9 +244,9 @@ func createContainerOptions(ctx context.Context, rt *libpod.Runtime, s *specgen. options = append(options, libpod.WithNamedVolumes(vols)) } - if len(s.OverlayVolumes) != 0 { + if len(overlays) != 0 { var vols []*libpod.ContainerOverlayVolume - for _, v := range s.OverlayVolumes { + for _, v := range overlays { vols = append(vols, &libpod.ContainerOverlayVolume{ Dest: v.Destination, Source: v.Source, @@ -248,6 +255,18 @@ func createContainerOptions(ctx context.Context, rt *libpod.Runtime, s *specgen. options = append(options, libpod.WithOverlayVolumes(vols)) } + if len(s.ImageVolumes) != 0 { + var vols []*libpod.ContainerImageVolume + for _, v := range s.ImageVolumes { + vols = append(vols, &libpod.ContainerImageVolume{ + Dest: v.Destination, + Source: v.Source, + ReadWrite: v.ReadWrite, + }) + } + options = append(options, libpod.WithImageVolumes(vols)) + } + if s.Command != nil { options = append(options, libpod.WithCommand(s.Command)) } diff --git a/pkg/specgen/generate/kube/kube.go b/pkg/specgen/generate/kube/kube.go new file mode 100644 index 000000000..5f72d28bb --- /dev/null +++ b/pkg/specgen/generate/kube/kube.go @@ -0,0 +1,326 @@ +package kube + +import ( + "context" + "fmt" + "strings" + + "github.com/containers/buildah/pkg/parse" + "github.com/containers/podman/v2/libpod/image" + ann "github.com/containers/podman/v2/pkg/annotations" + "github.com/containers/podman/v2/pkg/specgen" + "github.com/containers/podman/v2/pkg/util" + spec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" +) + +func ToPodGen(ctx context.Context, podName string, podYAML *v1.PodTemplateSpec) (*specgen.PodSpecGenerator, error) { + p := specgen.NewPodSpecGenerator() + p.Name = podName + p.Labels = podYAML.ObjectMeta.Labels + // TODO we only configure Process namespace. We also need to account for Host{IPC,Network,PID} + // which is not currently possible with pod create + if podYAML.Spec.ShareProcessNamespace != nil && *podYAML.Spec.ShareProcessNamespace { + p.SharedNamespaces = append(p.SharedNamespaces, "pid") + } + p.Hostname = podYAML.Spec.Hostname + if p.Hostname == "" { + p.Hostname = podName + } + if podYAML.Spec.HostNetwork { + p.NetNS.Value = "host" + } + if podYAML.Spec.HostAliases != nil { + hosts := make([]string, 0, len(podYAML.Spec.HostAliases)) + for _, hostAlias := range podYAML.Spec.HostAliases { + for _, host := range hostAlias.Hostnames { + hosts = append(hosts, host+":"+hostAlias.IP) + } + } + p.HostAdd = hosts + } + podPorts := getPodPorts(podYAML.Spec.Containers) + p.PortMappings = podPorts + + return p, nil +} + +func ToSpecGen(ctx context.Context, containerYAML v1.Container, iid string, newImage *image.Image, volumes map[string]*KubeVolume, podID, podName, infraID string, configMaps []v1.ConfigMap, seccompPaths *KubeSeccompPaths, restartPolicy string) (*specgen.SpecGenerator, error) { + s := specgen.NewSpecGenerator(iid, false) + + // podName should be non-empty for Deployment objects to be able to create + // multiple pods having containers with unique names + if len(podName) < 1 { + return nil, errors.Errorf("kubeContainerToCreateConfig got empty podName") + } + + s.Name = fmt.Sprintf("%s-%s", podName, containerYAML.Name) + + s.Terminal = containerYAML.TTY + + s.Pod = podID + + setupSecurityContext(s, containerYAML) + + // Since we prefix the container name with pod name to work-around the uniqueness requirement, + // the seccomp profile should reference the actual container name from the YAML + // but apply to the containers with the prefixed name + s.SeccompProfilePath = seccompPaths.FindForContainer(containerYAML.Name) + + s.ResourceLimits = &spec.LinuxResources{} + milliCPU, err := quantityToInt64(containerYAML.Resources.Limits.Cpu()) + if err != nil { + return nil, errors.Wrap(err, "Failed to set CPU quota") + } + if milliCPU > 0 { + period, quota := util.CoresToPeriodAndQuota(float64(milliCPU) / 1000) + s.ResourceLimits.CPU = &spec.LinuxCPU{ + Quota: "a, + Period: &period, + } + } + + limit, err := quantityToInt64(containerYAML.Resources.Limits.Memory()) + if err != nil { + return nil, errors.Wrap(err, "Failed to set memory limit") + } + + memoryRes, err := quantityToInt64(containerYAML.Resources.Requests.Memory()) + if err != nil { + return nil, errors.Wrap(err, "Failed to set memory reservation") + } + + if limit > 0 || memoryRes > 0 { + s.ResourceLimits.Memory = &spec.LinuxMemory{} + } + + if limit > 0 { + s.ResourceLimits.Memory.Limit = &limit + } + + if memoryRes > 0 { + s.ResourceLimits.Memory.Reservation = &memoryRes + } + + // TODO: We dont understand why specgen does not take of this, but + // integration tests clearly pointed out that it was required. + s.Command = []string{} + imageData, err := newImage.Inspect(ctx) + if err != nil { + return nil, err + } + s.WorkDir = "/" + if imageData != nil && imageData.Config != nil { + if imageData.Config.WorkingDir != "" { + s.WorkDir = imageData.Config.WorkingDir + } + s.Command = imageData.Config.Entrypoint + s.Labels = imageData.Config.Labels + if len(imageData.Config.StopSignal) > 0 { + stopSignal, err := util.ParseSignal(imageData.Config.StopSignal) + if err != nil { + return nil, err + } + s.StopSignal = &stopSignal + } + } + if len(containerYAML.Command) != 0 { + s.Command = containerYAML.Command + } + // doc https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#notes + if len(containerYAML.Args) != 0 { + s.Command = append(s.Command, containerYAML.Args...) + } + // FIXME, + // we are currently ignoring imageData.Config.ExposedPorts + if containerYAML.WorkingDir != "" { + s.WorkDir = containerYAML.WorkingDir + } + + annotations := make(map[string]string) + if infraID != "" { + annotations[ann.SandboxID] = infraID + annotations[ann.ContainerType] = ann.ContainerTypeContainer + } + s.Annotations = annotations + + // Environment Variables + envs := map[string]string{} + for _, env := range containerYAML.Env { + value := envVarValue(env, configMaps) + + envs[env.Name] = value + } + for _, envFrom := range containerYAML.EnvFrom { + cmEnvs := envVarsFromConfigMap(envFrom, configMaps) + + for k, v := range cmEnvs { + envs[k] = v + } + } + s.Env = envs + + for _, volume := range containerYAML.VolumeMounts { + volumeSource, exists := volumes[volume.Name] + if !exists { + return nil, errors.Errorf("Volume mount %s specified for container but not configured in volumes", volume.Name) + } + switch volumeSource.Type { + case KubeVolumeTypeBindMount: + if err := parse.ValidateVolumeCtrDir(volume.MountPath); err != nil { + return nil, errors.Wrapf(err, "error in parsing MountPath") + } + mount := spec.Mount{ + Destination: volume.MountPath, + Source: volumeSource.Source, + Type: "bind", + } + if volume.ReadOnly { + mount.Options = []string{"ro"} + } + s.Mounts = append(s.Mounts, mount) + case KubeVolumeTypeNamed: + namedVolume := specgen.NamedVolume{ + Dest: volume.MountPath, + Name: volumeSource.Source, + } + if volume.ReadOnly { + namedVolume.Options = []string{"ro"} + } + s.Volumes = append(s.Volumes, &namedVolume) + default: + return nil, errors.Errorf("Unsupported volume source type") + } + } + + s.RestartPolicy = restartPolicy + + return s, nil +} + +func setupSecurityContext(s *specgen.SpecGenerator, containerYAML v1.Container) { + if containerYAML.SecurityContext == nil { + return + } + if containerYAML.SecurityContext.ReadOnlyRootFilesystem != nil { + s.ReadOnlyFilesystem = *containerYAML.SecurityContext.ReadOnlyRootFilesystem + } + if containerYAML.SecurityContext.Privileged != nil { + s.Privileged = *containerYAML.SecurityContext.Privileged + } + + if containerYAML.SecurityContext.AllowPrivilegeEscalation != nil { + s.NoNewPrivileges = !*containerYAML.SecurityContext.AllowPrivilegeEscalation + } + + if seopt := containerYAML.SecurityContext.SELinuxOptions; seopt != nil { + if seopt.User != "" { + s.SelinuxOpts = append(s.SelinuxOpts, fmt.Sprintf("role:%s", seopt.User)) + } + if seopt.Role != "" { + s.SelinuxOpts = append(s.SelinuxOpts, fmt.Sprintf("role:%s", seopt.Role)) + } + if seopt.Type != "" { + s.SelinuxOpts = append(s.SelinuxOpts, fmt.Sprintf("role:%s", seopt.Type)) + } + if seopt.Level != "" { + s.SelinuxOpts = append(s.SelinuxOpts, fmt.Sprintf("role:%s", seopt.Level)) + } + } + if caps := containerYAML.SecurityContext.Capabilities; caps != nil { + for _, capability := range caps.Add { + s.CapAdd = append(s.CapAdd, string(capability)) + } + for _, capability := range caps.Drop { + s.CapDrop = append(s.CapDrop, string(capability)) + } + } + if containerYAML.SecurityContext.RunAsUser != nil { + s.User = fmt.Sprintf("%d", *containerYAML.SecurityContext.RunAsUser) + } + if containerYAML.SecurityContext.RunAsGroup != nil { + if s.User == "" { + s.User = "0" + } + s.User = fmt.Sprintf("%s:%d", s.User, *containerYAML.SecurityContext.RunAsGroup) + } +} + +func quantityToInt64(quantity *resource.Quantity) (int64, error) { + if i, ok := quantity.AsInt64(); ok { + return i, nil + } + + if i, ok := quantity.AsDec().Unscaled(); ok { + return i, nil + } + + return 0, errors.Errorf("Quantity cannot be represented as int64: %v", quantity) +} + +// envVarsFromConfigMap returns all key-value pairs as env vars from a configMap that matches the envFrom setting of a container +func envVarsFromConfigMap(envFrom v1.EnvFromSource, configMaps []v1.ConfigMap) map[string]string { + envs := map[string]string{} + + if envFrom.ConfigMapRef != nil { + cmName := envFrom.ConfigMapRef.Name + + for _, c := range configMaps { + if cmName == c.Name { + envs = c.Data + break + } + } + } + + return envs +} + +// envVarValue returns the environment variable value configured within the container's env setting. +// It gets the value from a configMap if specified, otherwise returns env.Value +func envVarValue(env v1.EnvVar, configMaps []v1.ConfigMap) string { + for _, c := range configMaps { + if env.ValueFrom != nil { + if env.ValueFrom.ConfigMapKeyRef != nil { + if env.ValueFrom.ConfigMapKeyRef.Name == c.Name { + if value, ok := c.Data[env.ValueFrom.ConfigMapKeyRef.Key]; ok { + return value + } + } + } + } + } + + return env.Value +} + +// getPodPorts converts a slice of kube container descriptions to an +// array of portmapping +func getPodPorts(containers []v1.Container) []specgen.PortMapping { + var infraPorts []specgen.PortMapping + for _, container := range containers { + for _, p := range container.Ports { + if p.HostPort != 0 && p.ContainerPort == 0 { + p.ContainerPort = p.HostPort + } + if p.Protocol == "" { + p.Protocol = "tcp" + } + portBinding := specgen.PortMapping{ + HostPort: uint16(p.HostPort), + ContainerPort: uint16(p.ContainerPort), + Protocol: strings.ToLower(string(p.Protocol)), + HostIP: p.HostIP, + } + // only hostPort is utilized in podman context, all container ports + // are accessible inside the shared network namespace + if p.HostPort != 0 { + infraPorts = append(infraPorts, portBinding) + } + + } + } + return infraPorts +} diff --git a/pkg/specgen/generate/kube/play_test.go b/pkg/specgen/generate/kube/play_test.go new file mode 100644 index 000000000..148540e9f --- /dev/null +++ b/pkg/specgen/generate/kube/play_test.go @@ -0,0 +1,172 @@ +package kube + +import ( + "testing" + + "github.com/stretchr/testify/assert" + v1 "k8s.io/api/core/v1" + v12 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestEnvVarsFromConfigMap(t *testing.T) { + tests := []struct { + name string + envFrom v1.EnvFromSource + configMapList []v1.ConfigMap + expected map[string]string + }{ + { + "ConfigMapExists", + v1.EnvFromSource{ + ConfigMapRef: &v1.ConfigMapEnvSource{ + LocalObjectReference: v1.LocalObjectReference{ + Name: "foo", + }, + }, + }, + configMapList, + map[string]string{ + "myvar": "foo", + }, + }, + { + "ConfigMapDoesNotExist", + v1.EnvFromSource{ + ConfigMapRef: &v1.ConfigMapEnvSource{ + LocalObjectReference: v1.LocalObjectReference{ + Name: "doesnotexist", + }, + }, + }, + configMapList, + map[string]string{}, + }, + { + "EmptyConfigMapList", + v1.EnvFromSource{ + ConfigMapRef: &v1.ConfigMapEnvSource{ + LocalObjectReference: v1.LocalObjectReference{ + Name: "foo", + }, + }, + }, + []v1.ConfigMap{}, + map[string]string{}, + }, + } + + for _, test := range tests { + test := test + t.Run(test.name, func(t *testing.T) { + result := envVarsFromConfigMap(test.envFrom, test.configMapList) + assert.Equal(t, test.expected, result) + }) + } +} + +func TestEnvVarValue(t *testing.T) { + tests := []struct { + name string + envVar v1.EnvVar + configMapList []v1.ConfigMap + expected string + }{ + { + "ConfigMapExists", + v1.EnvVar{ + Name: "FOO", + ValueFrom: &v1.EnvVarSource{ + ConfigMapKeyRef: &v1.ConfigMapKeySelector{ + LocalObjectReference: v1.LocalObjectReference{ + Name: "foo", + }, + Key: "myvar", + }, + }, + }, + configMapList, + "foo", + }, + { + "ContainerKeyDoesNotExistInConfigMap", + v1.EnvVar{ + Name: "FOO", + ValueFrom: &v1.EnvVarSource{ + ConfigMapKeyRef: &v1.ConfigMapKeySelector{ + LocalObjectReference: v1.LocalObjectReference{ + Name: "foo", + }, + Key: "doesnotexist", + }, + }, + }, + configMapList, + "", + }, + { + "ConfigMapDoesNotExist", + v1.EnvVar{ + Name: "FOO", + ValueFrom: &v1.EnvVarSource{ + ConfigMapKeyRef: &v1.ConfigMapKeySelector{ + LocalObjectReference: v1.LocalObjectReference{ + Name: "doesnotexist", + }, + Key: "myvar", + }, + }, + }, + configMapList, + "", + }, + { + "EmptyConfigMapList", + v1.EnvVar{ + Name: "FOO", + ValueFrom: &v1.EnvVarSource{ + ConfigMapKeyRef: &v1.ConfigMapKeySelector{ + LocalObjectReference: v1.LocalObjectReference{ + Name: "foo", + }, + Key: "myvar", + }, + }, + }, + []v1.ConfigMap{}, + "", + }, + } + + for _, test := range tests { + test := test + t.Run(test.name, func(t *testing.T) { + result := envVarValue(test.envVar, test.configMapList) + assert.Equal(t, test.expected, result) + }) + } +} + +var configMapList = []v1.ConfigMap{ + { + TypeMeta: v12.TypeMeta{ + Kind: "ConfigMap", + }, + ObjectMeta: v12.ObjectMeta{ + Name: "bar", + }, + Data: map[string]string{ + "myvar": "bar", + }, + }, + { + TypeMeta: v12.TypeMeta{ + Kind: "ConfigMap", + }, + ObjectMeta: v12.ObjectMeta{ + Name: "foo", + }, + Data: map[string]string{ + "myvar": "foo", + }, + }, +} diff --git a/pkg/specgen/generate/kube/seccomp.go b/pkg/specgen/generate/kube/seccomp.go new file mode 100644 index 000000000..4cbdf6e2e --- /dev/null +++ b/pkg/specgen/generate/kube/seccomp.go @@ -0,0 +1,84 @@ +package kube + +import ( + "path/filepath" + "strings" + + "github.com/containers/podman/v2/libpod" + "github.com/pkg/errors" + v1 "k8s.io/api/core/v1" +) + +// KubeSeccompPaths holds information about a pod YAML's seccomp configuration +// it holds both container and pod seccomp paths +type KubeSeccompPaths struct { + containerPaths map[string]string + podPath string +} + +// FindForContainer checks whether a container has a seccomp path configured for it +// if not, it returns the podPath, which should always have a value +func (k *KubeSeccompPaths) FindForContainer(ctrName string) string { + if path, ok := k.containerPaths[ctrName]; ok { + return path + } + return k.podPath +} + +// InitializeSeccompPaths takes annotations from the pod object metadata and finds annotations pertaining to seccomp +// it parses both pod and container level +// if the annotation is of the form "localhost/%s", the seccomp profile will be set to profileRoot/%s +func InitializeSeccompPaths(annotations map[string]string, profileRoot string) (*KubeSeccompPaths, error) { + seccompPaths := &KubeSeccompPaths{containerPaths: make(map[string]string)} + var err error + if annotations != nil { + for annKeyValue, seccomp := range annotations { + // check if it is prefaced with container.seccomp.security.alpha.kubernetes.io/ + prefixAndCtr := strings.Split(annKeyValue, "/") + if prefixAndCtr[0]+"/" != v1.SeccompContainerAnnotationKeyPrefix { + continue + } else if len(prefixAndCtr) != 2 { + // this could be caused by a user inputting either of + // container.seccomp.security.alpha.kubernetes.io{,/} + // both of which are invalid + return nil, errors.Errorf("Invalid seccomp path: %s", prefixAndCtr[0]) + } + + path, err := verifySeccompPath(seccomp, profileRoot) + if err != nil { + return nil, err + } + seccompPaths.containerPaths[prefixAndCtr[1]] = path + } + + podSeccomp, ok := annotations[v1.SeccompPodAnnotationKey] + if ok { + seccompPaths.podPath, err = verifySeccompPath(podSeccomp, profileRoot) + } else { + seccompPaths.podPath, err = libpod.DefaultSeccompPath() + } + if err != nil { + return nil, err + } + } + return seccompPaths, nil +} + +// verifySeccompPath takes a path and checks whether it is a default, unconfined, or a path +// the available options are parsed as defined in https://kubernetes.io/docs/concepts/policy/pod-security-policy/#seccomp +func verifySeccompPath(path string, profileRoot string) (string, error) { + switch path { + case v1.DeprecatedSeccompProfileDockerDefault: + fallthrough + case v1.SeccompProfileRuntimeDefault: + return libpod.DefaultSeccompPath() + case "unconfined": + return path, nil + default: + parts := strings.Split(path, "/") + if parts[0] == "localhost" { + return filepath.Join(profileRoot, parts[1]), nil + } + return "", errors.Errorf("invalid seccomp path: %s", path) + } +} diff --git a/pkg/specgen/generate/kube/volume.go b/pkg/specgen/generate/kube/volume.go new file mode 100644 index 000000000..2ef0f4c23 --- /dev/null +++ b/pkg/specgen/generate/kube/volume.go @@ -0,0 +1,124 @@ +package kube + +import ( + "os" + + "github.com/containers/buildah/pkg/parse" + "github.com/containers/podman/v2/libpod" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + v1 "k8s.io/api/core/v1" +) + +const ( + // https://kubernetes.io/docs/concepts/storage/volumes/#hostpath + kubeDirectoryPermission = 0755 + // https://kubernetes.io/docs/concepts/storage/volumes/#hostpath + kubeFilePermission = 0644 +) + +type KubeVolumeType int + +const ( + KubeVolumeTypeBindMount KubeVolumeType = iota + KubeVolumeTypeNamed KubeVolumeType = iota +) + +type KubeVolume struct { + // Type of volume to create + Type KubeVolumeType + // Path for bind mount or volume name for named volume + Source string +} + +// Create a KubeVolume from an HostPathVolumeSource +func VolumeFromHostPath(hostPath *v1.HostPathVolumeSource) (*KubeVolume, error) { + if hostPath.Type != nil { + switch *hostPath.Type { + case v1.HostPathDirectoryOrCreate: + if _, err := os.Stat(hostPath.Path); os.IsNotExist(err) { + if err := os.Mkdir(hostPath.Path, kubeDirectoryPermission); err != nil { + return nil, err + } + } + // Label a newly created volume + if err := libpod.LabelVolumePath(hostPath.Path); err != nil { + return nil, errors.Wrapf(err, "error giving %s a label", hostPath.Path) + } + case v1.HostPathFileOrCreate: + if _, err := os.Stat(hostPath.Path); os.IsNotExist(err) { + f, err := os.OpenFile(hostPath.Path, os.O_RDONLY|os.O_CREATE, kubeFilePermission) + if err != nil { + return nil, errors.Wrap(err, "error creating HostPath") + } + if err := f.Close(); err != nil { + logrus.Warnf("Error in closing newly created HostPath file: %v", err) + } + } + // unconditionally label a newly created volume + if err := libpod.LabelVolumePath(hostPath.Path); err != nil { + return nil, errors.Wrapf(err, "error giving %s a label", hostPath.Path) + } + case v1.HostPathSocket: + st, err := os.Stat(hostPath.Path) + if err != nil { + return nil, errors.Wrap(err, "error checking HostPathSocket") + } + if st.Mode()&os.ModeSocket != os.ModeSocket { + return nil, errors.Errorf("error checking HostPathSocket: path %s is not a socket", hostPath.Path) + } + + case v1.HostPathDirectory: + case v1.HostPathFile: + case v1.HostPathUnset: + // do nothing here because we will verify the path exists in validateVolumeHostDir + break + default: + return nil, errors.Errorf("Invalid HostPath type %v", hostPath.Type) + } + } + + if err := parse.ValidateVolumeHostDir(hostPath.Path); err != nil { + return nil, errors.Wrapf(err, "error in parsing HostPath in YAML") + } + + return &KubeVolume{ + Type: KubeVolumeTypeBindMount, + Source: hostPath.Path, + }, nil +} + +// Create a KubeVolume from a PersistentVolumeClaimVolumeSource +func VolumeFromPersistentVolumeClaim(claim *v1.PersistentVolumeClaimVolumeSource) (*KubeVolume, error) { + return &KubeVolume{ + Type: KubeVolumeTypeNamed, + Source: claim.ClaimName, + }, nil +} + +// Create a KubeVolume from one of the supported VolumeSource +func VolumeFromSource(volumeSource v1.VolumeSource) (*KubeVolume, error) { + if volumeSource.HostPath != nil { + return VolumeFromHostPath(volumeSource.HostPath) + } else if volumeSource.PersistentVolumeClaim != nil { + return VolumeFromPersistentVolumeClaim(volumeSource.PersistentVolumeClaim) + } else { + return nil, errors.Errorf("HostPath and PersistentVolumeClaim are currently the conly supported VolumeSource") + } +} + +// Create a map of volume name to KubeVolume +func InitializeVolumes(specVolumes []v1.Volume) (map[string]*KubeVolume, error) { + volumes := make(map[string]*KubeVolume) + + for _, specVolume := range specVolumes { + volume, err := VolumeFromSource(specVolume.VolumeSource) + if err != nil { + return nil, err + } + + volumes[specVolume.Name] = volume + } + + return volumes, nil +} diff --git a/pkg/specgen/generate/namespaces.go b/pkg/specgen/generate/namespaces.go index 7e4f09dc4..ddc73ca61 100644 --- a/pkg/specgen/generate/namespaces.go +++ b/pkg/specgen/generate/namespaces.go @@ -127,6 +127,7 @@ func namespaceOptions(ctx context.Context, s *specgen.SpecGenerator, rt *libpod. return nil, errNoInfra } toReturn = append(toReturn, libpod.WithIPCNSFrom(infraCtr)) + toReturn = append(toReturn, libpod.WithShmDir(infraCtr.ShmDir())) case specgen.FromContainer: ipcCtr, err := rt.LookupContainer(s.IpcNS.Value) if err != nil { @@ -278,7 +279,7 @@ func specConfigureNamespaces(s *specgen.SpecGenerator, g *generate.Generator, rt switch s.PidNS.NSMode { case specgen.Path: if _, err := os.Stat(s.PidNS.Value); err != nil { - return errors.Wrapf(err, "cannot find specified PID namespace path %q", s.PidNS.Value) + return errors.Wrap(err, "cannot find specified PID namespace path") } if err := g.AddOrReplaceLinuxNamespace(string(spec.PIDNamespace), s.PidNS.Value); err != nil { return err @@ -297,7 +298,7 @@ func specConfigureNamespaces(s *specgen.SpecGenerator, g *generate.Generator, rt switch s.IpcNS.NSMode { case specgen.Path: if _, err := os.Stat(s.IpcNS.Value); err != nil { - return errors.Wrapf(err, "cannot find specified IPC namespace path %q", s.IpcNS.Value) + return errors.Wrap(err, "cannot find specified IPC namespace path") } if err := g.AddOrReplaceLinuxNamespace(string(spec.IPCNamespace), s.IpcNS.Value); err != nil { return err @@ -316,7 +317,7 @@ func specConfigureNamespaces(s *specgen.SpecGenerator, g *generate.Generator, rt switch s.UtsNS.NSMode { case specgen.Path: if _, err := os.Stat(s.UtsNS.Value); err != nil { - return errors.Wrapf(err, "cannot find specified UTS namespace path %q", s.UtsNS.Value) + return errors.Wrap(err, "cannot find specified UTS namespace path") } if err := g.AddOrReplaceLinuxNamespace(string(spec.UTSNamespace), s.UtsNS.Value); err != nil { return err @@ -367,7 +368,7 @@ func specConfigureNamespaces(s *specgen.SpecGenerator, g *generate.Generator, rt switch s.UserNS.NSMode { case specgen.Path: if _, err := os.Stat(s.UserNS.Value); err != nil { - return errors.Wrapf(err, "cannot find specified user namespace path %s", s.UserNS.Value) + return errors.Wrap(err, "cannot find specified user namespace path") } if err := g.AddOrReplaceLinuxNamespace(string(spec.UserNamespace), s.UserNS.Value); err != nil { return err @@ -410,7 +411,7 @@ func specConfigureNamespaces(s *specgen.SpecGenerator, g *generate.Generator, rt switch s.CgroupNS.NSMode { case specgen.Path: if _, err := os.Stat(s.CgroupNS.Value); err != nil { - return errors.Wrapf(err, "cannot find specified cgroup namespace path %s", s.CgroupNS.Value) + return errors.Wrap(err, "cannot find specified cgroup namespace path") } if err := g.AddOrReplaceLinuxNamespace(string(spec.CgroupNamespace), s.CgroupNS.Value); err != nil { return err @@ -429,7 +430,7 @@ func specConfigureNamespaces(s *specgen.SpecGenerator, g *generate.Generator, rt switch s.NetNS.NSMode { case specgen.Path: if _, err := os.Stat(s.NetNS.Value); err != nil { - return errors.Wrapf(err, "cannot find specified network namespace path %s", s.NetNS.Value) + return errors.Wrap(err, "cannot find specified network namespace path") } if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), s.NetNS.Value); err != nil { return err diff --git a/pkg/specgen/generate/security.go b/pkg/specgen/generate/security.go index d17cd4a9a..dee140282 100644 --- a/pkg/specgen/generate/security.go +++ b/pkg/specgen/generate/security.go @@ -135,7 +135,9 @@ func securityConfigureGenerator(s *specgen.SpecGenerator, g *generate.Generator, configSpec.Process.Capabilities.Bounding = caplist configSpec.Process.Capabilities.Inheritable = caplist - if s.User == "" || s.User == "root" || s.User == "0" { + user := strings.Split(s.User, ":")[0] + + if (user == "" && s.UserNS.NSMode != specgen.KeepID) || user == "root" || user == "0" { configSpec.Process.Capabilities.Effective = caplist configSpec.Process.Capabilities.Permitted = caplist } else { @@ -145,6 +147,12 @@ func securityConfigureGenerator(s *specgen.SpecGenerator, g *generate.Generator, } configSpec.Process.Capabilities.Effective = userCaps configSpec.Process.Capabilities.Permitted = userCaps + + // Ambient capabilities were added to Linux 4.3. Set ambient + // capabilities only when the kernel supports them. + if supportAmbientCapabilities() { + configSpec.Process.Capabilities.Ambient = userCaps + } } g.SetProcessNoNewPrivileges(s.NoNewPrivileges) diff --git a/pkg/specgen/generate/storage.go b/pkg/specgen/generate/storage.go index b225f79ee..331a5c5bf 100644 --- a/pkg/specgen/generate/storage.go +++ b/pkg/specgen/generate/storage.go @@ -33,17 +33,17 @@ var ( ) // Produce final mounts and named volumes for a container -func finalizeMounts(ctx context.Context, s *specgen.SpecGenerator, rt *libpod.Runtime, rtc *config.Config, img *image.Image) ([]spec.Mount, []*specgen.NamedVolume, error) { +func finalizeMounts(ctx context.Context, s *specgen.SpecGenerator, rt *libpod.Runtime, rtc *config.Config, img *image.Image) ([]spec.Mount, []*specgen.NamedVolume, []*specgen.OverlayVolume, error) { // Get image volumes baseMounts, baseVolumes, err := getImageVolumes(ctx, img, s) if err != nil { - return nil, nil, err + return nil, nil, nil, err } // Get volumes-from mounts volFromMounts, volFromVolumes, err := getVolumesFrom(s.VolumesFrom, rt) if err != nil { - return nil, nil, err + return nil, nil, nil, err } // Supersede from --volumes-from. @@ -57,19 +57,53 @@ func finalizeMounts(ctx context.Context, s *specgen.SpecGenerator, rt *libpod.Ru // Need to make map forms of specgen mounts/volumes. unifiedMounts := map[string]spec.Mount{} unifiedVolumes := map[string]*specgen.NamedVolume{} + unifiedOverlays := map[string]*specgen.OverlayVolume{} + + // Need to make map forms of specgen mounts/volumes. + commonMounts, commonVolumes, commonOverlayVolumes, err := specgen.GenVolumeMounts(rtc.Volumes()) + if err != nil { + return nil, nil, nil, err + } + for _, m := range s.Mounts { if _, ok := unifiedMounts[m.Destination]; ok { - return nil, nil, errors.Wrapf(errDuplicateDest, "conflict in specified mounts - multiple mounts at %q", m.Destination) + return nil, nil, nil, errors.Wrapf(errDuplicateDest, "conflict in specified mounts - multiple mounts at %q", m.Destination) } unifiedMounts[m.Destination] = m } + + for _, m := range commonMounts { + if _, ok := unifiedMounts[m.Destination]; !ok { + unifiedMounts[m.Destination] = m + } + } + for _, v := range s.Volumes { if _, ok := unifiedVolumes[v.Dest]; ok { - return nil, nil, errors.Wrapf(errDuplicateDest, "conflict in specified volumes - multiple volumes at %q", v.Dest) + return nil, nil, nil, errors.Wrapf(errDuplicateDest, "conflict in specified volumes - multiple volumes at %q", v.Dest) } unifiedVolumes[v.Dest] = v } + for _, v := range commonVolumes { + if _, ok := unifiedVolumes[v.Dest]; !ok { + unifiedVolumes[v.Dest] = v + } + } + + for _, v := range s.OverlayVolumes { + if _, ok := unifiedOverlays[v.Destination]; ok { + return nil, nil, nil, errors.Wrapf(errDuplicateDest, "conflict in specified volumes - multiple volumes at %q", v.Destination) + } + unifiedOverlays[v.Destination] = v + } + + for _, v := range commonOverlayVolumes { + if _, ok := unifiedOverlays[v.Destination]; ok { + unifiedOverlays[v.Destination] = v + } + } + // If requested, add container init binary if s.Init { initPath := s.InitPath @@ -78,10 +112,10 @@ func finalizeMounts(ctx context.Context, s *specgen.SpecGenerator, rt *libpod.Ru } initMount, err := addContainerInitBinary(s, initPath) if err != nil { - return nil, nil, err + return nil, nil, nil, err } if _, ok := unifiedMounts[initMount.Destination]; ok { - return nil, nil, errors.Wrapf(errDuplicateDest, "conflict with mount added by --init to %q", initMount.Destination) + return nil, nil, nil, errors.Wrapf(errDuplicateDest, "conflict with mount added by --init to %q", initMount.Destination) } unifiedMounts[initMount.Destination] = initMount } @@ -115,12 +149,12 @@ func finalizeMounts(ctx context.Context, s *specgen.SpecGenerator, rt *libpod.Ru // Check for conflicts between named volumes and mounts for dest := range baseMounts { if _, ok := baseVolumes[dest]; ok { - return nil, nil, errors.Wrapf(errDuplicateDest, "conflict at mount destination %v", dest) + return nil, nil, nil, errors.Wrapf(errDuplicateDest, "conflict at mount destination %v", dest) } } for dest := range baseVolumes { if _, ok := baseMounts[dest]; ok { - return nil, nil, errors.Wrapf(errDuplicateDest, "conflict at mount destination %v", dest) + return nil, nil, nil, errors.Wrapf(errDuplicateDest, "conflict at mount destination %v", dest) } } // Final step: maps to arrays @@ -129,7 +163,7 @@ func finalizeMounts(ctx context.Context, s *specgen.SpecGenerator, rt *libpod.Ru if mount.Type == TypeBind { absSrc, err := filepath.Abs(mount.Source) if err != nil { - return nil, nil, errors.Wrapf(err, "error getting absolute path of %s", mount.Source) + return nil, nil, nil, errors.Wrapf(err, "error getting absolute path of %s", mount.Source) } mount.Source = absSrc } @@ -140,7 +174,12 @@ func finalizeMounts(ctx context.Context, s *specgen.SpecGenerator, rt *libpod.Ru finalVolumes = append(finalVolumes, volume) } - return finalMounts, finalVolumes, nil + finalOverlays := make([]*specgen.OverlayVolume, 0, len(unifiedOverlays)) + for _, volume := range unifiedOverlays { + finalOverlays = append(finalOverlays, volume) + } + + return finalMounts, finalVolumes, finalOverlays, nil } // Get image volumes from the given image diff --git a/pkg/specgen/generate/validate.go b/pkg/specgen/generate/validate.go index ed337321b..f0ab4b994 100644 --- a/pkg/specgen/generate/validate.go +++ b/pkg/specgen/generate/validate.go @@ -1,22 +1,20 @@ package generate import ( + "os" + "path/filepath" + "github.com/containers/common/pkg/sysinfo" "github.com/containers/podman/v2/pkg/cgroups" "github.com/containers/podman/v2/pkg/specgen" + "github.com/containers/podman/v2/utils" "github.com/pkg/errors" ) -// Verify resource limits are sanely set, removing any limits that are not -// possible with the current cgroups config. -func verifyContainerResources(s *specgen.SpecGenerator) ([]string, error) { +// Verify resource limits are sanely set when running on cgroup v1. +func verifyContainerResourcesCgroupV1(s *specgen.SpecGenerator) ([]string, error) { warnings := []string{} - cgroup2, err := cgroups.IsCgroup2UnifiedMode() - if err != nil || cgroup2 { - return warnings, err - } - sysInfo := sysinfo.New(true) if s.ResourceLimits == nil { @@ -24,9 +22,7 @@ func verifyContainerResources(s *specgen.SpecGenerator) ([]string, error) { } if s.ResourceLimits.Unified != nil { - if !cgroup2 { - return nil, errors.New("Cannot use --cgroup-conf without cgroup v2") - } + return nil, errors.New("Cannot use --cgroup-conf without cgroup v2") } // Memory checks @@ -38,7 +34,7 @@ func verifyContainerResources(s *specgen.SpecGenerator) ([]string, error) { memory.Swap = nil } if memory.Limit != nil && memory.Swap != nil && !sysInfo.SwapLimit { - warnings = append(warnings, "Your kernel does not support swap limit capabilities,or the cgroup is not mounted. Memory limited without swap.") + warnings = append(warnings, "Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap.") memory.Swap = nil } if memory.Limit != nil && memory.Swap != nil && *memory.Swap < *memory.Limit { @@ -163,3 +159,48 @@ func verifyContainerResources(s *specgen.SpecGenerator) ([]string, error) { return warnings, nil } + +// Verify resource limits are sanely set when running on cgroup v2. +func verifyContainerResourcesCgroupV2(s *specgen.SpecGenerator) ([]string, error) { + warnings := []string{} + + if s.ResourceLimits == nil { + return warnings, nil + } + + if s.ResourceLimits.Memory != nil && s.ResourceLimits.Memory.Swap != nil { + own, err := utils.GetOwnCgroup() + if err != nil { + return warnings, err + } + memoryMax := filepath.Join("/sys/fs/cgroup", own, "memory.max") + memorySwapMax := filepath.Join("/sys/fs/cgroup", own, "memory.swap.max") + _, errMemoryMax := os.Stat(memoryMax) + _, errMemorySwapMax := os.Stat(memorySwapMax) + // Differently than cgroup v1, the memory.*max files are not present in the + // root directory, so we cannot query directly that, so as best effort use + // the current cgroup. + // Check whether memory.max exists in the current cgroup and memory.swap.max + // does not. In this case we can be sure memory swap is not enabled. + // If both files don't exist, the memory controller might not be enabled + // for the current cgroup. + if errMemoryMax == nil && errMemorySwapMax != nil { + warnings = append(warnings, "Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap.") + s.ResourceLimits.Memory.Swap = nil + } + } + return warnings, nil +} + +// Verify resource limits are sanely set, removing any limits that are not +// possible with the current cgroups config. +func verifyContainerResources(s *specgen.SpecGenerator) ([]string, error) { + cgroup2, err := cgroups.IsCgroup2UnifiedMode() + if err != nil { + return []string{}, err + } + if cgroup2 { + return verifyContainerResourcesCgroupV2(s) + } + return verifyContainerResourcesCgroupV1(s) +} |