summaryrefslogtreecommitdiff
path: root/server/sandbox_run.go
diff options
context:
space:
mode:
Diffstat (limited to 'server/sandbox_run.go')
-rw-r--r--server/sandbox_run.go615
1 files changed, 0 insertions, 615 deletions
diff --git a/server/sandbox_run.go b/server/sandbox_run.go
deleted file mode 100644
index 72afdb229..000000000
--- a/server/sandbox_run.go
+++ /dev/null
@@ -1,615 +0,0 @@
-package server
-
-import (
- "encoding/json"
- "fmt"
- "io/ioutil"
- "os"
- "path"
- "path/filepath"
- "regexp"
- "strconv"
- "strings"
- "time"
-
- "github.com/containers/storage"
- "github.com/kubernetes-incubator/cri-o/libkpod/sandbox"
- "github.com/kubernetes-incubator/cri-o/oci"
- "github.com/kubernetes-incubator/cri-o/pkg/annotations"
- runtimespec "github.com/opencontainers/runtime-spec/specs-go"
- "github.com/opencontainers/runtime-tools/generate"
- "github.com/opencontainers/selinux/go-selinux/label"
- "github.com/pkg/errors"
- "github.com/sirupsen/logrus"
- "golang.org/x/net/context"
- "golang.org/x/sys/unix"
- "k8s.io/kubernetes/pkg/api/v1"
- pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
- "k8s.io/kubernetes/pkg/kubelet/leaky"
- "k8s.io/kubernetes/pkg/kubelet/network/hostport"
- "k8s.io/kubernetes/pkg/kubelet/types"
-)
-
-const (
- // PodInfraOOMAdj is the value that we set for oom score adj for
- // the pod infra container.
- // TODO: Remove this const once this value is provided over CRI
- // See https://github.com/kubernetes/kubernetes/issues/47938
- PodInfraOOMAdj int = -998
- // PodInfraCPUshares is default cpu shares for sandbox container.
- PodInfraCPUshares = 2
-)
-
-// privilegedSandbox returns true if the sandbox configuration
-// requires additional host privileges for the sandbox.
-func (s *Server) privilegedSandbox(req *pb.RunPodSandboxRequest) bool {
- securityContext := req.GetConfig().GetLinux().GetSecurityContext()
- if securityContext == nil {
- return false
- }
-
- if securityContext.Privileged {
- return true
- }
-
- namespaceOptions := securityContext.GetNamespaceOptions()
- if namespaceOptions == nil {
- return false
- }
-
- if namespaceOptions.HostNetwork ||
- namespaceOptions.HostPid ||
- namespaceOptions.HostIpc {
- return true
- }
-
- return false
-}
-
-// trustedSandbox returns true if the sandbox will run trusted workloads.
-func (s *Server) trustedSandbox(req *pb.RunPodSandboxRequest) bool {
- kubeAnnotations := req.GetConfig().GetAnnotations()
-
- trustedAnnotation, ok := kubeAnnotations[annotations.TrustedSandbox]
- if !ok {
- // A sandbox is trusted by default.
- return true
- }
-
- return isTrue(trustedAnnotation)
-}
-
-func (s *Server) runContainer(container *oci.Container, cgroupParent string) error {
- if err := s.Runtime().CreateContainer(container, cgroupParent); err != nil {
- return err
- }
- return s.Runtime().StartContainer(container)
-}
-
-var (
- conflictRE = regexp.MustCompile(`already reserved for pod "([0-9a-z]+)"`)
-)
-
-// RunPodSandbox creates and runs a pod-level sandbox.
-func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest) (resp *pb.RunPodSandboxResponse, err error) {
- s.updateLock.RLock()
- defer s.updateLock.RUnlock()
-
- logrus.Debugf("RunPodSandboxRequest %+v", req)
- var processLabel, mountLabel, resolvPath string
- // process req.Name
- kubeName := req.GetConfig().GetMetadata().Name
- if kubeName == "" {
- return nil, fmt.Errorf("PodSandboxConfig.Name should not be empty")
- }
-
- namespace := req.GetConfig().GetMetadata().Namespace
- attempt := req.GetConfig().GetMetadata().Attempt
-
- id, name, err := s.generatePodIDandName(req.GetConfig())
- if err != nil {
- if strings.Contains(err.Error(), "already reserved for pod") {
- matches := conflictRE.FindStringSubmatch(err.Error())
- if len(matches) != 2 {
- return nil, err
- }
- dupID := matches[1]
- if _, err := s.StopPodSandbox(ctx, &pb.StopPodSandboxRequest{PodSandboxId: dupID}); err != nil {
- return nil, err
- }
- if _, err := s.RemovePodSandbox(ctx, &pb.RemovePodSandboxRequest{PodSandboxId: dupID}); err != nil {
- return nil, err
- }
- id, name, err = s.generatePodIDandName(req.GetConfig())
- if err != nil {
- return nil, err
- }
- } else {
- return nil, err
- }
- }
-
- defer func() {
- if err != nil {
- s.ReleasePodName(name)
- }
- }()
-
- _, containerName, err := s.generateContainerIDandNameForSandbox(req.GetConfig())
- if err != nil {
- return nil, err
- }
-
- defer func() {
- if err != nil {
- s.ReleaseContainerName(containerName)
- }
- }()
-
- podContainer, err := s.StorageRuntimeServer().CreatePodSandbox(s.ImageContext(),
- name, id,
- s.config.PauseImage, "",
- containerName,
- req.GetConfig().GetMetadata().Name,
- req.GetConfig().GetMetadata().Uid,
- namespace,
- attempt,
- nil)
- if errors.Cause(err) == storage.ErrDuplicateName {
- return nil, fmt.Errorf("pod sandbox with name %q already exists", name)
- }
- if err != nil {
- return nil, fmt.Errorf("error creating pod sandbox with name %q: %v", name, err)
- }
- defer func() {
- if err != nil {
- if err2 := s.StorageRuntimeServer().RemovePodSandbox(id); err2 != nil {
- logrus.Warnf("couldn't cleanup pod sandbox %q: %v", id, err2)
- }
- }
- }()
-
- // TODO: factor generating/updating the spec into something other projects can vendor
-
- // creates a spec Generator with the default spec.
- g := generate.New()
-
- // setup defaults for the pod sandbox
- g.SetRootReadonly(true)
- if s.config.PauseCommand == "" {
- if podContainer.Config != nil {
- g.SetProcessArgs(podContainer.Config.Config.Cmd)
- } else {
- g.SetProcessArgs([]string{sandbox.PodInfraCommand})
- }
- } else {
- g.SetProcessArgs([]string{s.config.PauseCommand})
- }
-
- // set DNS options
- if req.GetConfig().GetDnsConfig() != nil {
- dnsServers := req.GetConfig().GetDnsConfig().Servers
- dnsSearches := req.GetConfig().GetDnsConfig().Searches
- dnsOptions := req.GetConfig().GetDnsConfig().Options
- resolvPath = fmt.Sprintf("%s/resolv.conf", podContainer.RunDir)
- err = parseDNSOptions(dnsServers, dnsSearches, dnsOptions, resolvPath)
- if err != nil {
- err1 := removeFile(resolvPath)
- if err1 != nil {
- err = err1
- return nil, fmt.Errorf("%v; failed to remove %s: %v", err, resolvPath, err1)
- }
- return nil, err
- }
- if err := label.Relabel(resolvPath, mountLabel, true); err != nil && err != unix.ENOTSUP {
- return nil, err
- }
-
- g.AddBindMount(resolvPath, "/etc/resolv.conf", []string{"ro"})
- }
-
- // add metadata
- metadata := req.GetConfig().GetMetadata()
- metadataJSON, err := json.Marshal(metadata)
- if err != nil {
- return nil, err
- }
-
- // add labels
- labels := req.GetConfig().GetLabels()
-
- // Add special container name label for the infra container
- labelsJSON := []byte{}
- if labels != nil {
- labels[types.KubernetesContainerNameLabel] = leaky.PodInfraContainerName
- labelsJSON, err = json.Marshal(labels)
- if err != nil {
- return nil, err
- }
- }
-
- // add annotations
- kubeAnnotations := req.GetConfig().GetAnnotations()
- kubeAnnotationsJSON, err := json.Marshal(kubeAnnotations)
- if err != nil {
- return nil, err
- }
-
- // set log directory
- logDir := req.GetConfig().LogDirectory
- if logDir == "" {
- logDir = filepath.Join(s.config.LogDir, id)
- }
- if err = os.MkdirAll(logDir, 0700); err != nil {
- return nil, err
- }
- // This should always be absolute from k8s.
- if !filepath.IsAbs(logDir) {
- return nil, fmt.Errorf("requested logDir for sbox id %s is a relative path: %s", id, logDir)
- }
-
- privileged := s.privilegedSandbox(req)
-
- securityContext := req.GetConfig().GetLinux().GetSecurityContext()
- if securityContext == nil {
- logrus.Warn("no security context found in config.")
- }
-
- processLabel, mountLabel, err = getSELinuxLabels(securityContext.GetSelinuxOptions(), privileged)
- if err != nil {
- return nil, err
- }
-
- // Don't use SELinux separation with Host Pid or IPC Namespace or privileged.
- if securityContext.GetNamespaceOptions().GetHostPid() || securityContext.GetNamespaceOptions().GetHostIpc() {
- processLabel, mountLabel = "", ""
- }
- g.SetProcessSelinuxLabel(processLabel)
- g.SetLinuxMountLabel(mountLabel)
-
- // create shm mount for the pod containers.
- var shmPath string
- if securityContext.GetNamespaceOptions().GetHostIpc() {
- shmPath = "/dev/shm"
- } else {
- shmPath, err = setupShm(podContainer.RunDir, mountLabel)
- if err != nil {
- return nil, err
- }
- defer func() {
- if err != nil {
- if err2 := unix.Unmount(shmPath, unix.MNT_DETACH); err2 != nil {
- logrus.Warnf("failed to unmount shm for pod: %v", err2)
- }
- }
- }()
- }
-
- err = s.setPodSandboxMountLabel(id, mountLabel)
- if err != nil {
- return nil, err
- }
-
- if err = s.CtrIDIndex().Add(id); err != nil {
- return nil, err
- }
-
- defer func() {
- if err != nil {
- if err2 := s.CtrIDIndex().Delete(id); err2 != nil {
- logrus.Warnf("couldn't delete ctr id %s from idIndex", id)
- }
- }
- }()
-
- // set log path inside log directory
- logPath := filepath.Join(logDir, id+".log")
-
- // Handle https://issues.k8s.io/44043
- if err := ensureSaneLogPath(logPath); err != nil {
- return nil, err
- }
-
- hostNetwork := securityContext.GetNamespaceOptions().GetHostNetwork()
-
- hostname, err := getHostname(id, req.GetConfig().Hostname, hostNetwork)
- if err != nil {
- return nil, err
- }
- g.SetHostname(hostname)
-
- trusted := s.trustedSandbox(req)
- g.AddAnnotation(annotations.Metadata, string(metadataJSON))
- g.AddAnnotation(annotations.Labels, string(labelsJSON))
- g.AddAnnotation(annotations.Annotations, string(kubeAnnotationsJSON))
- g.AddAnnotation(annotations.LogPath, logPath)
- g.AddAnnotation(annotations.Name, name)
- g.AddAnnotation(annotations.ContainerType, annotations.ContainerTypeSandbox)
- g.AddAnnotation(annotations.SandboxID, id)
- g.AddAnnotation(annotations.ContainerName, containerName)
- g.AddAnnotation(annotations.ContainerID, id)
- g.AddAnnotation(annotations.ShmPath, shmPath)
- g.AddAnnotation(annotations.PrivilegedRuntime, fmt.Sprintf("%v", privileged))
- g.AddAnnotation(annotations.TrustedSandbox, fmt.Sprintf("%v", trusted))
- g.AddAnnotation(annotations.ResolvPath, resolvPath)
- g.AddAnnotation(annotations.HostName, hostname)
- g.AddAnnotation(annotations.KubeName, kubeName)
- if podContainer.Config.Config.StopSignal != "" {
- // this key is defined in image-spec conversion document at https://github.com/opencontainers/image-spec/pull/492/files#diff-8aafbe2c3690162540381b8cdb157112R57
- g.AddAnnotation("org.opencontainers.image.stopSignal", podContainer.Config.Config.StopSignal)
- }
-
- created := time.Now()
- g.AddAnnotation(annotations.Created, created.Format(time.RFC3339Nano))
-
- portMappings := convertPortMappings(req.GetConfig().GetPortMappings())
-
- // setup cgroup settings
- cgroupParent := req.GetConfig().GetLinux().GetCgroupParent()
- if cgroupParent != "" {
- if s.config.CgroupManager == oci.SystemdCgroupsManager {
- if len(cgroupParent) <= 6 || !strings.HasSuffix(path.Base(cgroupParent), ".slice") {
- return nil, fmt.Errorf("cri-o configured with systemd cgroup manager, but did not receive slice as parent: %s", cgroupParent)
- }
- cgPath, err := convertCgroupFsNameToSystemd(cgroupParent)
- if err != nil {
- return nil, err
- }
- g.SetLinuxCgroupsPath(cgPath + ":" + "crio" + ":" + id)
- cgroupParent = cgPath
- } else {
- if strings.HasSuffix(path.Base(cgroupParent), ".slice") {
- return nil, fmt.Errorf("cri-o configured with cgroupfs cgroup manager, but received systemd slice as parent: %s", cgroupParent)
- }
- cgPath := filepath.Join(cgroupParent, scopePrefix+"-"+id)
- g.SetLinuxCgroupsPath(cgPath)
- }
- }
-
- sb, err := sandbox.New(id, namespace, name, kubeName, logDir, labels, kubeAnnotations, processLabel, mountLabel, metadata, shmPath, cgroupParent, privileged, trusted, resolvPath, hostname, portMappings)
- if err != nil {
- return nil, err
- }
-
- s.addSandbox(sb)
- defer func() {
- if err != nil {
- s.removeSandbox(id)
- }
- }()
-
- if err = s.PodIDIndex().Add(id); err != nil {
- return nil, err
- }
-
- defer func() {
- if err != nil {
- if err := s.PodIDIndex().Delete(id); err != nil {
- logrus.Warnf("couldn't delete pod id %s from idIndex", id)
- }
- }
- }()
-
- for k, v := range kubeAnnotations {
- g.AddAnnotation(k, v)
- }
- for k, v := range labels {
- g.AddAnnotation(k, v)
- }
-
- // extract linux sysctls from annotations and pass down to oci runtime
- safe, unsafe, err := SysctlsFromPodAnnotations(kubeAnnotations)
- if err != nil {
- return nil, err
- }
- for _, sysctl := range safe {
- g.AddLinuxSysctl(sysctl.Name, sysctl.Value)
- }
- for _, sysctl := range unsafe {
- g.AddLinuxSysctl(sysctl.Name, sysctl.Value)
- }
-
- // Set OOM score adjust of the infra container to be very low
- // so it doesn't get killed.
- g.SetProcessOOMScoreAdj(PodInfraOOMAdj)
-
- g.SetLinuxResourcesCPUShares(PodInfraCPUshares)
-
- // set up namespaces
- if hostNetwork {
- err = g.RemoveLinuxNamespace(string(runtimespec.NetworkNamespace))
- if err != nil {
- return nil, err
- }
- } else {
- // Create the sandbox network namespace
- if err = sb.NetNsCreate(); err != nil {
- return nil, err
- }
-
- defer func() {
- if err == nil {
- return
- }
-
- if netnsErr := sb.NetNsRemove(); netnsErr != nil {
- logrus.Warnf("Failed to remove networking namespace: %v", netnsErr)
- }
- }()
-
- // Pass the created namespace path to the runtime
- err = g.AddOrReplaceLinuxNamespace(string(runtimespec.NetworkNamespace), sb.NetNsPath())
- if err != nil {
- return nil, err
- }
- }
-
- if securityContext.GetNamespaceOptions().GetHostPid() {
- err = g.RemoveLinuxNamespace(string(runtimespec.PIDNamespace))
- if err != nil {
- return nil, err
- }
- }
-
- if securityContext.GetNamespaceOptions().GetHostIpc() {
- err = g.RemoveLinuxNamespace(string(runtimespec.IPCNamespace))
- if err != nil {
- return nil, err
- }
- }
-
- if !s.seccompEnabled {
- g.Spec().Linux.Seccomp = nil
- }
-
- saveOptions := generate.ExportOptions{}
- mountPoint, err := s.StorageRuntimeServer().StartContainer(id)
- if err != nil {
- return nil, fmt.Errorf("failed to mount container %s in pod sandbox %s(%s): %v", containerName, sb.Name(), id, err)
- }
- g.AddAnnotation(annotations.MountPoint, mountPoint)
- g.SetRootPath(mountPoint)
-
- hostnamePath := fmt.Sprintf("%s/hostname", podContainer.RunDir)
- if err := ioutil.WriteFile(hostnamePath, []byte(hostname+"\n"), 0644); err != nil {
- return nil, err
- }
- if err := label.Relabel(hostnamePath, mountLabel, true); err != nil && err != unix.ENOTSUP {
- return nil, err
- }
- g.AddBindMount(hostnamePath, "/etc/hostname", []string{"ro"})
- g.AddAnnotation(annotations.HostnamePath, hostnamePath)
- sb.AddHostnamePath(hostnamePath)
-
- container, err := oci.NewContainer(id, containerName, podContainer.RunDir, logPath, sb.NetNs(), labels, g.Spec().Annotations, kubeAnnotations, "", "", "", nil, id, false, false, false, sb.Privileged(), sb.Trusted(), podContainer.Dir, created, podContainer.Config.Config.StopSignal)
- if err != nil {
- return nil, err
- }
- container.SetSpec(g.Spec())
- container.SetMountPoint(mountPoint)
-
- sb.SetInfraContainer(container)
-
- var ip string
- ip, err = s.networkStart(hostNetwork, sb)
- if err != nil {
- return nil, err
- }
- defer func() {
- if err != nil {
- s.networkStop(hostNetwork, sb)
- }
- }()
-
- g.AddAnnotation(annotations.IP, ip)
- sb.AddIP(ip)
-
- err = g.SaveToFile(filepath.Join(podContainer.Dir, "config.json"), saveOptions)
- if err != nil {
- return nil, fmt.Errorf("failed to save template configuration for pod sandbox %s(%s): %v", sb.Name(), id, err)
- }
- if err = g.SaveToFile(filepath.Join(podContainer.RunDir, "config.json"), saveOptions); err != nil {
- return nil, fmt.Errorf("failed to write runtime configuration for pod sandbox %s(%s): %v", sb.Name(), id, err)
- }
-
- if err = s.runContainer(container, sb.CgroupParent()); err != nil {
- return nil, err
- }
-
- s.addInfraContainer(container)
-
- s.ContainerStateToDisk(container)
-
- resp = &pb.RunPodSandboxResponse{PodSandboxId: id}
- logrus.Debugf("RunPodSandboxResponse: %+v", resp)
- return resp, nil
-}
-
-func convertPortMappings(in []*pb.PortMapping) []*hostport.PortMapping {
- if in == nil {
- return nil
- }
- out := make([]*hostport.PortMapping, len(in))
- for i, v := range in {
- out[i] = &hostport.PortMapping{
- HostPort: v.HostPort,
- ContainerPort: v.ContainerPort,
- Protocol: v1.Protocol(v.Protocol.String()),
- HostIP: v.HostIp,
- }
- }
- return out
-}
-
-func getHostname(id, hostname string, hostNetwork bool) (string, error) {
- if hostNetwork {
- if hostname == "" {
- h, err := os.Hostname()
- if err != nil {
- return "", err
- }
- hostname = h
- }
- } else {
- if hostname == "" {
- hostname = id[:12]
- }
- }
- return hostname, nil
-}
-
-func (s *Server) setPodSandboxMountLabel(id, mountLabel string) error {
- storageMetadata, err := s.StorageRuntimeServer().GetContainerMetadata(id)
- if err != nil {
- return err
- }
- storageMetadata.SetMountLabel(mountLabel)
- return s.StorageRuntimeServer().SetContainerMetadata(id, storageMetadata)
-}
-
-func getSELinuxLabels(selinuxOptions *pb.SELinuxOption, privileged bool) (processLabel string, mountLabel string, err error) {
- if privileged {
- return "", "", nil
- }
- labels := []string{}
- if selinuxOptions != nil {
- if selinuxOptions.User != "" {
- labels = append(labels, "user:"+selinuxOptions.User)
- }
- if selinuxOptions.Role != "" {
- labels = append(labels, "role:"+selinuxOptions.Role)
- }
- if selinuxOptions.Type != "" {
- labels = append(labels, "type:"+selinuxOptions.Type)
- }
- if selinuxOptions.Level != "" {
- labels = append(labels, "level:"+selinuxOptions.Level)
- }
- }
- return label.InitLabels(labels)
-}
-
-func setupShm(podSandboxRunDir, mountLabel string) (shmPath string, err error) {
- shmPath = filepath.Join(podSandboxRunDir, "shm")
- if err = os.Mkdir(shmPath, 0700); err != nil {
- return "", err
- }
- shmOptions := "mode=1777,size=" + strconv.Itoa(sandbox.DefaultShmSize)
- if err = unix.Mount("shm", shmPath, "tmpfs", unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV,
- label.FormatMountLabel(shmOptions, mountLabel)); err != nil {
- return "", fmt.Errorf("failed to mount shm tmpfs for pod: %v", err)
- }
- return shmPath, nil
-}
-
-// convertCgroupFsNameToSystemd converts an expanded cgroupfs name to its systemd name.
-// For example, it will convert test.slice/test-a.slice/test-a-b.slice to become test-a-b.slice
-// NOTE: this is public right now to allow its usage in dockermanager and dockershim, ideally both those
-// code areas could use something from libcontainer if we get this style function upstream.
-func convertCgroupFsNameToSystemd(cgroupfsName string) (string, error) {
- // TODO: see if libcontainer systemd implementation could use something similar, and if so, move
- // this function up to that library. At that time, it would most likely do validation specific to systemd
- // above and beyond the simple assumption here that the base of the path encodes the hierarchy
- // per systemd convention.
- return path.Base(cgroupfsName), nil
-}