diff options
Diffstat (limited to 'libpod')
-rw-r--r-- | libpod/container_internal.go | 26 | ||||
-rw-r--r-- | libpod/container_internal_linux.go | 1 | ||||
-rw-r--r-- | libpod/container_log_linux.go | 46 | ||||
-rw-r--r-- | libpod/info.go | 2 | ||||
-rw-r--r-- | libpod/kube.go | 142 | ||||
-rw-r--r-- | libpod/lock/shm/shm_lock.go | 11 | ||||
-rw-r--r-- | libpod/network/cni/cni_types.go | 2 | ||||
-rw-r--r-- | libpod/network/cni/run.go | 6 | ||||
-rw-r--r-- | libpod/networking_slirp4netns.go | 39 | ||||
-rw-r--r-- | libpod/runtime.go | 2 | ||||
-rw-r--r-- | libpod/runtime_ctr.go | 9 | ||||
-rw-r--r-- | libpod/runtime_pod_linux.go | 52 |
12 files changed, 266 insertions, 72 deletions
diff --git a/libpod/container_internal.go b/libpod/container_internal.go index 2ca49758d..54d6b1303 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -479,9 +479,27 @@ func (c *Container) setupStorage(ctx context.Context) error { c.setupStorageMapping(&options.IDMappingOptions, &c.config.IDMappings) - containerInfo, err := c.runtime.storageService.CreateContainerStorage(ctx, c.runtime.imageContext, c.config.RootfsImageName, c.config.RootfsImageID, c.config.Name, c.config.ID, options) - if err != nil { - return errors.Wrapf(err, "error creating container storage") + // Unless the user has specified a name, use a randomly generated one. + // Note that name conflicts may occur (see #11735), so we need to loop. + generateName := c.config.Name == "" + var containerInfo ContainerInfo + var containerInfoErr error + for { + if generateName { + name, err := c.runtime.generateName() + if err != nil { + return err + } + c.config.Name = name + } + containerInfo, containerInfoErr = c.runtime.storageService.CreateContainerStorage(ctx, c.runtime.imageContext, c.config.RootfsImageName, c.config.RootfsImageID, c.config.Name, c.config.ID, options) + + if !generateName || errors.Cause(containerInfoErr) != storage.ErrDuplicateName { + break + } + } + if containerInfoErr != nil { + return errors.Wrapf(containerInfoErr, "error creating container storage") } c.config.IDMappings.UIDMap = containerInfo.UIDMap @@ -2100,7 +2118,7 @@ func (c *Container) checkReadyForRemoval() error { return errors.Wrapf(define.ErrCtrStateInvalid, "container %s is in invalid state", c.ID()) } - if c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) { + if c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) && !c.IsInfra() { return errors.Wrapf(define.ErrCtrStateInvalid, "cannot remove container %s as it is %s - running or paused containers cannot be removed without force", c.ID(), c.state.State.String()) } diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index 310110679..208e089b5 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -1483,6 +1483,7 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti logrus.Debugf("Restored container %s", c.ID()) c.state.State = define.ContainerStateRunning + c.state.Checkpointed = false if !options.Keep { // Delete all checkpoint related files. At this point, in theory, all files diff --git a/libpod/container_log_linux.go b/libpod/container_log_linux.go index ca1e11ef5..b6b780bab 100644 --- a/libpod/container_log_linux.go +++ b/libpod/container_log_linux.go @@ -91,8 +91,12 @@ func (c *Container) readFromJournal(ctx context.Context, options *logs.LogOption var cursorError error for i := 1; i <= 3; i++ { cursor, cursorError = journal.GetCursor() + hundreds := 1 + for j := 1; j < i; j++ { + hundreds *= 2 + } if cursorError != nil { - time.Sleep(time.Duration(i*100) * time.Millisecond) + time.Sleep(time.Duration(hundreds*100) * time.Millisecond) continue } break @@ -117,8 +121,26 @@ func (c *Container) readFromJournal(ctx context.Context, options *logs.LogOption }() tailQueue := []*logs.LogLine{} // needed for options.Tail - doTail := options.Tail > 0 + doTail := options.Tail >= 0 + doTailFunc := func() { + // Flush *once* we hit the end of the journal. + startIndex := int64(len(tailQueue)) + outputLines := int64(0) + for startIndex > 0 && outputLines < options.Tail { + startIndex-- + for startIndex > 0 && tailQueue[startIndex].Partial() { + startIndex-- + } + outputLines++ + } + for i := startIndex; i < int64(len(tailQueue)); i++ { + logChannel <- tailQueue[i] + } + tailQueue = nil + doTail = false + } lastReadCursor := "" + partial := "" for { select { case <-ctx.Done(): @@ -148,16 +170,7 @@ func (c *Container) readFromJournal(ctx context.Context, options *logs.LogOption // Hit the end of the journal (so far?). if cursor == lastReadCursor { if doTail { - // Flush *once* we hit the end of the journal. - startIndex := int64(len(tailQueue)-1) - options.Tail - if startIndex < 0 { - startIndex = 0 - } - for i := startIndex; i < int64(len(tailQueue)); i++ { - logChannel <- tailQueue[i] - } - tailQueue = nil - doTail = false + doTailFunc() } // Unless we follow, quit. if !options.Follow { @@ -190,6 +203,9 @@ func (c *Container) readFromJournal(ctx context.Context, options *logs.LogOption return } if status == events.Exited { + if doTail { + doTailFunc() + } return } continue @@ -214,6 +230,12 @@ func (c *Container) readFromJournal(ctx context.Context, options *logs.LogOption logrus.Errorf("Failed parse log line: %v", err) return } + if logLine.Partial() { + partial += logLine.Msg + continue + } + logLine.Msg = partial + logLine.Msg + partial = "" if doTail { tailQueue = append(tailQueue, logLine) continue diff --git a/libpod/info.go b/libpod/info.go index 2eba4bbff..47fda0725 100644 --- a/libpod/info.go +++ b/libpod/info.go @@ -333,7 +333,7 @@ func readKernelVersion() (string, error) { return "", err } f := bytes.Fields(buf) - if len(f) < 2 { + if len(f) < 3 { return string(bytes.TrimSpace(buf)), nil } return string(f[2]), nil diff --git a/libpod/kube.go b/libpod/kube.go index f5291ce60..b34734513 100644 --- a/libpod/kube.go +++ b/libpod/kube.go @@ -166,9 +166,83 @@ func (v *Volume) GenerateForKube() *v1.PersistentVolumeClaim { } } +// YAMLPodSpec represents the same k8s API core PodSpec struct with a small +// change and that is having Containers as a pointer to YAMLContainer. +// Because Go doesn't omit empty struct and we want to omit Status in YAML +// if it's empty. Fixes: GH-11998 +type YAMLPodSpec struct { + v1.PodSpec + Containers []*YAMLContainer `json:"containers"` +} + +// YAMLPod represents the same k8s API core Pod struct with a small +// change and that is having Spec as a pointer to YAMLPodSpec and +// Status as a pointer to k8s API core PodStatus. +// Because Go doesn't omit empty struct and we want to omit Status in YAML +// if it's empty. Fixes: GH-11998 +type YAMLPod struct { + v1.Pod + Spec *YAMLPodSpec `json:"spec,omitempty"` + Status *v1.PodStatus `json:"status,omitempty"` +} + +// YAMLService represents the same k8s API core Service struct with a small +// change and that is having Status as a pointer to k8s API core ServiceStatus. +// Because Go doesn't omit empty struct and we want to omit Status in YAML +// if it's empty. Fixes: GH-11998 +type YAMLService struct { + v1.Service + Status *v1.ServiceStatus `json:"status,omitempty"` +} + +// YAMLContainer represents the same k8s API core Container struct with a small +// change and that is having Resources as a pointer to k8s API core ResourceRequirements. +// Because Go doesn't omit empty struct and we want to omit Status in YAML +// if it's empty. Fixes: GH-11998 +type YAMLContainer struct { + v1.Container + Resources *v1.ResourceRequirements `json:"resources,omitempty"` +} + +// ConvertV1PodToYAMLPod takes k8s API core Pod and returns a pointer to YAMLPod +func ConvertV1PodToYAMLPod(pod *v1.Pod) *YAMLPod { + cs := []*YAMLContainer{} + for _, cc := range pod.Spec.Containers { + var res *v1.ResourceRequirements = nil + if len(cc.Resources.Limits) > 0 || len(cc.Resources.Requests) > 0 { + res = &cc.Resources + } + cs = append(cs, &YAMLContainer{Container: cc, Resources: res}) + } + mpo := &YAMLPod{Pod: *pod} + mpo.Spec = &YAMLPodSpec{PodSpec: (*pod).Spec, Containers: cs} + for _, ctr := range pod.Spec.Containers { + if ctr.SecurityContext == nil || ctr.SecurityContext.SELinuxOptions == nil { + continue + } + selinuxOpts := ctr.SecurityContext.SELinuxOptions + if selinuxOpts.User == "" && selinuxOpts.Role == "" && selinuxOpts.Type == "" && selinuxOpts.Level == "" { + ctr.SecurityContext.SELinuxOptions = nil + } + } + dnsCfg := pod.Spec.DNSConfig + if dnsCfg != nil && (len(dnsCfg.Nameservers)+len(dnsCfg.Searches)+len(dnsCfg.Options) > 0) { + mpo.Spec.DNSConfig = dnsCfg + } + status := pod.Status + if status.Phase != "" || len(status.Conditions) > 0 || + status.Message != "" || status.Reason != "" || + status.NominatedNodeName != "" || status.HostIP != "" || + status.PodIP != "" || status.StartTime != nil || + len(status.InitContainerStatuses) > 0 || len(status.ContainerStatuses) > 0 || status.QOSClass != "" || len(status.EphemeralContainerStatuses) > 0 { + mpo.Status = &status + } + return mpo +} + // GenerateKubeServiceFromV1Pod creates a v1 service object from a v1 pod object -func GenerateKubeServiceFromV1Pod(pod *v1.Pod, servicePorts []v1.ServicePort) v1.Service { - service := v1.Service{} +func GenerateKubeServiceFromV1Pod(pod *v1.Pod, servicePorts []v1.ServicePort) YAMLService { + service := YAMLService{} selector := make(map[string]string) selector["app"] = pod.Labels["app"] ports := servicePorts @@ -332,7 +406,7 @@ func newPodObject(podName string, annotations map[string]string, initCtrs, conta InitContainers: initCtrs, Volumes: volumes, } - if dnsOptions != nil { + if dnsOptions != nil && (len(dnsOptions.Nameservers)+len(dnsOptions.Searches)+len(dnsOptions.Options) > 0) { ps.DNSConfig = dnsOptions } p := v1.Pod{ @@ -447,11 +521,6 @@ func containerToV1Container(ctx context.Context, c *Container) (v1.Container, [] kubeVolumes = append(kubeVolumes, volumes...) } - envVariables, err := libpodEnvVarsToKubeEnvVars(c.config.Spec.Process.Env) - if err != nil { - return kubeContainer, kubeVolumes, nil, annotations, err - } - portmappings, err := c.PortMappings() if err != nil { return kubeContainer, kubeVolumes, nil, annotations, err @@ -489,15 +558,23 @@ func containerToV1Container(ctx context.Context, c *Container) (v1.Container, [] kubeContainer.Command = nil } + if c.WorkingDir() != "/" && imgData.Config.WorkingDir != c.WorkingDir() { + kubeContainer.WorkingDir = c.WorkingDir() + } + if imgData.User == c.User() { kubeSec.RunAsGroup, kubeSec.RunAsUser = nil, nil } - kubeContainer.WorkingDir = c.WorkingDir() + envVariables, err := libpodEnvVarsToKubeEnvVars(c.config.Spec.Process.Env, imgData.Config.Env) + if err != nil { + return kubeContainer, kubeVolumes, nil, annotations, err + } + kubeContainer.Env = envVariables + kubeContainer.Ports = ports // This should not be applicable //container.EnvFromSource = - kubeContainer.Env = envVariables kubeContainer.SecurityContext = kubeSec kubeContainer.StdinOnce = false kubeContainer.TTY = c.config.Spec.Process.Terminal @@ -600,9 +677,14 @@ func ocicniPortMappingToContainerPort(portMappings []ocicni.PortMapping) ([]v1.C } // libpodEnvVarsToKubeEnvVars converts a key=value string slice to []v1.EnvVar -func libpodEnvVarsToKubeEnvVars(envs []string) ([]v1.EnvVar, error) { +func libpodEnvVarsToKubeEnvVars(envs []string, imageEnvs []string) ([]v1.EnvVar, error) { defaultEnv := env.DefaultEnvVariables() envVars := make([]v1.EnvVar, 0, len(envs)) + imageMap := make(map[string]string, len(imageEnvs)) + for _, ie := range envs { + split := strings.SplitN(ie, "=", 2) + imageMap[split[0]] = split[1] + } for _, e := range envs { split := strings.SplitN(e, "=", 2) if len(split) != 2 { @@ -611,6 +693,9 @@ func libpodEnvVarsToKubeEnvVars(envs []string) ([]v1.EnvVar, error) { if defaultEnv[split[0]] == split[1] { continue } + if imageMap[split[0]] == split[1] { + continue + } ev := v1.EnvVar{ Name: split[0], Value: split[1], @@ -808,33 +893,42 @@ func generateKubeSecurityContext(c *Container) (*v1.SecurityContext, error) { capabilities = newCaps } + sc := v1.SecurityContext{ + // RunAsNonRoot is an optional parameter; our first implementations should be root only; however + // I'm leaving this as a bread-crumb for later + //RunAsNonRoot: &nonRoot, + } + if capabilities != nil { + sc.Capabilities = capabilities + } var selinuxOpts v1.SELinuxOptions opts := strings.SplitN(c.config.Spec.Annotations[define.InspectAnnotationLabel], ":", 2) - if len(opts) == 2 { + switch len(opts) { + case 2: switch opts[0] { case "type": selinuxOpts.Type = opts[1] + sc.SELinuxOptions = &selinuxOpts case "level": selinuxOpts.Level = opts[1] + sc.SELinuxOptions = &selinuxOpts } - } - if len(opts) == 1 { + case 1: if opts[0] == "disable" { selinuxOpts.Type = "spc_t" + sc.SELinuxOptions = &selinuxOpts } } - sc := v1.SecurityContext{ - Capabilities: capabilities, - Privileged: &privileged, - SELinuxOptions: &selinuxOpts, - // RunAsNonRoot is an optional parameter; our first implementations should be root only; however - // I'm leaving this as a bread-crumb for later - //RunAsNonRoot: &nonRoot, - ReadOnlyRootFilesystem: &ro, - AllowPrivilegeEscalation: &allowPrivEscalation, + if !allowPrivEscalation { + sc.AllowPrivilegeEscalation = &allowPrivEscalation + } + if privileged { + sc.Privileged = &privileged + } + if ro { + sc.ReadOnlyRootFilesystem = &ro } - if c.User() != "" { if !c.batched { c.lock.Lock() diff --git a/libpod/lock/shm/shm_lock.go b/libpod/lock/shm/shm_lock.go index 322e92a8f..fea02a619 100644 --- a/libpod/lock/shm/shm_lock.go +++ b/libpod/lock/shm/shm_lock.go @@ -130,8 +130,17 @@ func (locks *SHMLocks) AllocateSemaphore() (uint32, error) { // semaphore indexes, and can still return error codes. retCode := C.allocate_semaphore(locks.lockStruct) if retCode < 0 { + var err = syscall.Errno(-1 * retCode) // Negative errno returned - return 0, syscall.Errno(-1 * retCode) + if errors.Is(err, syscall.ENOSPC) { + // ENOSPC expands to "no space left on device". While it is technically true + // that there's no room in the SHM inn for this lock, this tends to send normal people + // down the path of checking disk-space which is not actually their problem. + // Give a clue that it's actually due to num_locks filling up. + var errFull = errors.Errorf("allocation failed; exceeded num_locks (%d)", locks.maxLocks) + return uint32(retCode), errFull + } + return uint32(retCode), syscall.Errno(-1 * retCode) } return uint32(retCode), nil diff --git a/libpod/network/cni/cni_types.go b/libpod/network/cni/cni_types.go index 91fd1c27b..35548f4f9 100644 --- a/libpod/network/cni/cni_types.go +++ b/libpod/network/cni/cni_types.go @@ -175,7 +175,7 @@ func newIPAMLocalHostRange(subnet types.IPNet, leaseRange *types.LeaseRange, gw Subnet: subnet.String(), } - // an user provided a range, we add it here + // a user provided a range, we add it here if leaseRange != nil { if leaseRange.StartIP != nil { hostRange.RangeStart = leaseRange.StartIP.String() diff --git a/libpod/network/cni/run.go b/libpod/network/cni/run.go index 14634262c..3f78d79a4 100644 --- a/libpod/network/cni/run.go +++ b/libpod/network/cni/run.go @@ -199,10 +199,8 @@ func getRuntimeConfig(netns, conName, conID, networkName string, ports []cniPort IfName: opts.InterfaceName, Args: [][2]string{ {"IgnoreUnknown", "1"}, - // FIXME: Should we set the K8S args? - //{"K8S_POD_NAMESPACE", conName}, - //{"K8S_POD_INFRA_CONTAINER_ID", conID}, - // K8S_POD_NAME is used by dnsname to get the container name + // Do not set the K8S env vars, see https://github.com/containers/podman/issues/12083. + // Only K8S_POD_NAME is used by dnsname to get the container name. {"K8S_POD_NAME", conName}, }, CapabilityArgs: map[string]interface{}{}, diff --git a/libpod/networking_slirp4netns.go b/libpod/networking_slirp4netns.go index c06d215e1..e24b54032 100644 --- a/libpod/networking_slirp4netns.go +++ b/libpod/networking_slirp4netns.go @@ -16,6 +16,7 @@ import ( "syscall" "time" + "github.com/containernetworking/plugins/pkg/ns" "github.com/containers/podman/v3/pkg/errorhandling" "github.com/containers/podman/v3/pkg/rootless" "github.com/containers/podman/v3/pkg/rootlessport" @@ -58,6 +59,8 @@ type slirp4netnsNetworkOptions struct { outboundAddr6 string } +const ipv6ConfDefaultAcceptDadSysctl = "/proc/sys/net/ipv6/conf/default/accept_dad" + func checkSlirpFlags(path string) (*slirpFeatures, error) { cmd := exec.Command(path, "--help") out, err := cmd.CombinedOutput() @@ -297,6 +300,39 @@ func (r *Runtime) setupSlirp4netns(ctr *Container) error { } cmd.Stdout = logFile cmd.Stderr = logFile + + var slirpReadyChan (chan struct{}) + + if netOptions.enableIPv6 { + slirpReadyChan = make(chan struct{}) + defer close(slirpReadyChan) + go func() { + err := ns.WithNetNSPath(netnsPath, func(_ ns.NetNS) error { + // Duplicate Address Detection slows the ipv6 setup down for 1-2 seconds. + // Since slirp4netns is run it is own namespace and not directly routed + // we can skip this to make the ipv6 address immediately available. + // We change the default to make sure the slirp tap interface gets the + // correct value assigned so DAD is disabled for it + // Also make sure to change this value back to the original after slirp4netns + // is ready in case users rely on this sysctl. + orgValue, err := ioutil.ReadFile(ipv6ConfDefaultAcceptDadSysctl) + if err != nil { + return err + } + err = ioutil.WriteFile(ipv6ConfDefaultAcceptDadSysctl, []byte("0"), 0644) + if err != nil { + return err + } + // wait for slirp to finish setup + <-slirpReadyChan + return ioutil.WriteFile(ipv6ConfDefaultAcceptDadSysctl, orgValue, 0644) + }) + if err != nil { + logrus.Warnf("failed to set net.ipv6.conf.default.accept_dad sysctl: %v", err) + } + }() + } + if err := cmd.Start(); err != nil { return errors.Wrapf(err, "failed to start slirp4netns process") } @@ -310,6 +346,9 @@ func (r *Runtime) setupSlirp4netns(ctr *Container) error { if err := waitForSync(syncR, cmd, logFile, 1*time.Second); err != nil { return err } + if slirpReadyChan != nil { + slirpReadyChan <- struct{}{} + } // Set a default slirp subnet. Parsing a string with the net helper is easier than building the struct myself _, ctr.slirp4netnsSubnet, _ = net.ParseCIDR(defaultSlirp4netnsSubnet) diff --git a/libpod/runtime.go b/libpod/runtime.go index c22d87324..a208b6db4 100644 --- a/libpod/runtime.go +++ b/libpod/runtime.go @@ -541,6 +541,8 @@ func makeRuntime(ctx context.Context, runtime *Runtime) (retErr error) { return err } if became { + // Check if the pause process was created. If it was created, then + // move it to its own systemd scope. utils.MovePauseProcessToScope(pausePid) os.Exit(ret) } diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index 7d3891f6e..f27e854a4 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -322,15 +322,6 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Contai } } - if ctr.config.Name == "" { - name, err := r.generateName() - if err != nil { - return nil, err - } - - ctr.config.Name = name - } - // Check CGroup parent sanity, and set it if it was not set. // Only if we're actually configuring CGroups. if !ctr.config.NoCgroups { diff --git a/libpod/runtime_pod_linux.go b/libpod/runtime_pod_linux.go index 7571fdfff..de00eb8e0 100644 --- a/libpod/runtime_pod_linux.go +++ b/libpod/runtime_pod_linux.go @@ -43,18 +43,6 @@ func (r *Runtime) NewPod(ctx context.Context, p specgen.PodSpecGenerator, option } } - if pod.config.Name == "" { - name, err := r.generateName() - if err != nil { - return nil, err - } - pod.config.Name = name - } - - if p.InfraContainerSpec != nil && p.InfraContainerSpec.Hostname == "" { - p.InfraContainerSpec.Hostname = pod.config.Name - } - // Allocate a lock for the pod lock, err := r.lockManager.AllocateLock() if err != nil { @@ -131,9 +119,33 @@ func (r *Runtime) NewPod(ctx context.Context, p specgen.PodSpecGenerator, option logrus.Infof("Pod has an infra container, but shares no namespaces") } - if err := r.state.AddPod(pod); err != nil { - return nil, errors.Wrapf(err, "error adding pod to state") + // Unless the user has specified a name, use a randomly generated one. + // Note that name conflicts may occur (see #11735), so we need to loop. + generateName := pod.config.Name == "" + var addPodErr error + for { + if generateName { + name, err := r.generateName() + if err != nil { + return nil, err + } + pod.config.Name = name + } + + if p.InfraContainerSpec != nil && p.InfraContainerSpec.Hostname == "" { + p.InfraContainerSpec.Hostname = pod.config.Name + } + if addPodErr = r.state.AddPod(pod); addPodErr == nil { + return pod, nil + } + if !generateName || (errors.Cause(addPodErr) != define.ErrPodExists && errors.Cause(addPodErr) != define.ErrCtrExists) { + break + } } + if addPodErr != nil { + return nil, errors.Wrapf(addPodErr, "error adding pod to state") + } + return pod, nil } @@ -177,10 +189,9 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool) if err != nil { return err } - numCtrs := len(ctrs) - // If the only container in the pod is the pause container, remove the pod and container unconditionally. + // If the only running container in the pod is the pause container, remove the pod and container unconditionally. pauseCtrID := p.state.InfraContainerID if numCtrs == 1 && ctrs[0].ID() == pauseCtrID { removeCtrs = true @@ -264,6 +275,15 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool) } } + // Clear infra container ID before we remove the infra container. + // There is a potential issue if we don't do that, and removal is + // interrupted between RemoveAllContainers() below and the pod's removal + // later - we end up with a reference to a nonexistent infra container. + p.state.InfraContainerID = "" + if err := p.save(); err != nil { + return err + } + // Remove all containers in the pod from the state. if err := r.state.RemovePodContainers(p); err != nil { // If this fails, there isn't much more we can do. |