diff options
Diffstat (limited to 'libpod')
-rw-r--r-- | libpod/container.go | 32 | ||||
-rw-r--r-- | libpod/container_inspect.go | 11 | ||||
-rw-r--r-- | libpod/container_internal.go | 46 | ||||
-rw-r--r-- | libpod/container_internal_linux.go | 13 | ||||
-rw-r--r-- | libpod/container_log_linux.go | 26 | ||||
-rw-r--r-- | libpod/container_top_linux.go | 18 | ||||
-rw-r--r-- | libpod/define/container_inspect.go | 1 | ||||
-rw-r--r-- | libpod/kube.go | 65 | ||||
-rw-r--r-- | libpod/oci_conmon_linux.go | 7 | ||||
-rw-r--r-- | libpod/stats.go | 24 |
10 files changed, 202 insertions, 41 deletions
diff --git a/libpod/container.go b/libpod/container.go index a4bbb5dd0..5ae07f602 100644 --- a/libpod/container.go +++ b/libpod/container.go @@ -6,10 +6,12 @@ import ( "io/ioutil" "net" "os" + "strings" "time" "github.com/containernetworking/cni/pkg/types" cnitypes "github.com/containernetworking/cni/pkg/types/current" + "github.com/containers/common/pkg/config" "github.com/containers/common/pkg/secrets" "github.com/containers/image/v5/manifest" "github.com/containers/podman/v3/libpod/define" @@ -253,6 +255,8 @@ type ContainerSecret struct { GID uint32 // Mode is the mode of the secret file Mode uint32 + // Secret target inside container + Target string } // ContainerNetworkDescriptions describes the relationship between the CNI @@ -973,6 +977,11 @@ func (c *Container) cGroupPath() (string, error) { procPath := fmt.Sprintf("/proc/%d/cgroup", c.state.PID) lines, err := ioutil.ReadFile(procPath) if err != nil { + // If the file doesn't exist, it means the container could have been terminated + // so report it. + if os.IsNotExist(err) { + return "", errors.Wrapf(define.ErrCtrStopped, "cannot get cgroup path unless container %s is running", c.ID()) + } return "", err } @@ -999,6 +1008,29 @@ func (c *Container) cGroupPath() (string, error) { return "", errors.Errorf("could not find any cgroup in %q", procPath) } + cgroupManager := c.CgroupManager() + switch { + case c.config.CgroupsMode == cgroupSplit: + name := fmt.Sprintf("/libpod-payload-%s/", c.ID()) + if index := strings.LastIndex(cgroupPath, name); index >= 0 { + return cgroupPath[:index+len(name)-1], nil + } + case cgroupManager == config.CgroupfsCgroupsManager: + name := fmt.Sprintf("/libpod-%s/", c.ID()) + if index := strings.LastIndex(cgroupPath, name); index >= 0 { + return cgroupPath[:index+len(name)-1], nil + } + case cgroupManager == config.SystemdCgroupsManager: + // When running under systemd, try to detect the scope that was requested + // to be created. It improves the heuristic since we report the first + // cgroup that was created instead of the cgroup where PID 1 might have + // moved to. + name := fmt.Sprintf("/libpod-%s.scope/", c.ID()) + if index := strings.LastIndex(cgroupPath, name); index >= 0 { + return cgroupPath[:index+len(name)-1], nil + } + } + return cgroupPath, nil } diff --git a/libpod/container_inspect.go b/libpod/container_inspect.go index 9fcf13d53..e3b4b5bb2 100644 --- a/libpod/container_inspect.go +++ b/libpod/container_inspect.go @@ -97,6 +97,16 @@ func (c *Container) getContainerInspectData(size bool, driverData *define.Driver return nil, err } + cgroupPath, err := c.cGroupPath() + if err != nil { + // Handle the case where the container is not running or has no cgroup. + if errors.Is(err, define.ErrNoCgroups) || errors.Is(err, define.ErrCtrStopped) { + cgroupPath = "" + } else { + return nil, err + } + } + data := &define.InspectContainerData{ ID: config.ID, Created: config.CreatedTime, @@ -116,6 +126,7 @@ func (c *Container) getContainerInspectData(size bool, driverData *define.Driver StartedAt: runtimeInfo.StartedTime, FinishedAt: runtimeInfo.FinishedTime, Checkpointed: runtimeInfo.Checkpointed, + CgroupPath: cgroupPath, }, Image: config.RootfsImageID, ImageName: config.RootfsImageName, diff --git a/libpod/container_internal.go b/libpod/container_internal.go index 54d6b1303..c6809ad86 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -36,6 +36,7 @@ import ( "github.com/opencontainers/selinux/go-selinux/label" "github.com/pkg/errors" "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" ) const ( @@ -1581,14 +1582,49 @@ func (c *Container) mountStorage() (_ string, deferredErr error) { }() } + rootUID, rootGID := c.RootUID(), c.RootGID() + + dirfd, err := unix.Open(mountPoint, unix.O_RDONLY|unix.O_PATH, 0) + if err != nil { + return "", errors.Wrap(err, "open mount point") + } + defer unix.Close(dirfd) + + err = unix.Mkdirat(dirfd, "etc", 0755) + if err != nil && !os.IsExist(err) { + return "", errors.Wrap(err, "create /etc") + } + // If the etc directory was created, chown it to root in the container + if err == nil && (rootUID != 0 || rootGID != 0) { + err = unix.Fchownat(dirfd, "etc", rootUID, rootGID, unix.AT_SYMLINK_NOFOLLOW) + if err != nil { + return "", errors.Wrap(err, "chown /etc") + } + } + + etcInTheContainerPath, err := securejoin.SecureJoin(mountPoint, "etc") + if err != nil { + return "", errors.Wrap(err, "resolve /etc in the container") + } + + etcInTheContainerFd, err := unix.Open(etcInTheContainerPath, unix.O_RDONLY|unix.O_PATH, 0) + if err != nil { + return "", errors.Wrap(err, "open /etc in the container") + } + defer unix.Close(etcInTheContainerFd) + // If /etc/mtab does not exist in container image, then we need to // create it, so that mount command within the container will work. - mtab := filepath.Join(mountPoint, "/etc/mtab") - if err := idtools.MkdirAllAs(filepath.Dir(mtab), 0755, c.RootUID(), c.RootGID()); err != nil { - return "", errors.Wrap(err, "error creating mtab directory") + err = unix.Symlinkat("/proc/mounts", etcInTheContainerFd, "mtab") + if err != nil && !os.IsExist(err) { + return "", errors.Wrap(err, "creating /etc/mtab symlink") } - if err = os.Symlink("/proc/mounts", mtab); err != nil && !os.IsExist(err) { - return "", err + // If the symlink was created, then also chown it to root in the container + if err == nil && (rootUID != 0 || rootGID != 0) { + err = unix.Fchownat(etcInTheContainerFd, "mtab", rootUID, rootGID, unix.AT_SYMLINK_NOFOLLOW) + if err != nil { + return "", errors.Wrap(err, "chown /etc/mtab") + } } // Request a mount of all named volumes diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index 89869e2f5..8bd433427 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -1777,8 +1777,17 @@ rootless=%d return errors.Wrapf(err, "error creating secrets mount") } for _, secret := range c.Secrets() { + secretFileName := secret.Name + base := "/run/secrets" + if secret.Target != "" { + secretFileName = secret.Target + //If absolute path for target given remove base. + if filepath.IsAbs(secretFileName) { + base = "" + } + } src := filepath.Join(c.config.SecretsPath, secret.Name) - dest := filepath.Join("/run/secrets", secret.Name) + dest := filepath.Join(base, secretFileName) c.state.BindMounts[dest] = src } } @@ -2503,7 +2512,7 @@ func (c *Container) getOCICgroupPath() (string, error) { if err != nil { return "", err } - return filepath.Join(selfCgroup, "container"), nil + return filepath.Join(selfCgroup, fmt.Sprintf("libpod-payload-%s", c.ID())), nil case cgroupManager == config.SystemdCgroupsManager: // When the OCI runtime is set to use Systemd as a cgroup manager, it // expects cgroups to be passed as follows: diff --git a/libpod/container_log_linux.go b/libpod/container_log_linux.go index b6b780bab..90da8053d 100644 --- a/libpod/container_log_linux.go +++ b/libpod/container_log_linux.go @@ -37,13 +37,21 @@ func (c *Container) initializeJournal(ctx context.Context) error { m := make(map[string]string) m["SYSLOG_IDENTIFIER"] = "podman" m["PODMAN_ID"] = c.ID() - m["CONTAINER_ID_FULL"] = c.ID() history := events.History m["PODMAN_EVENT"] = history.String() + container := events.Container + m["PODMAN_TYPE"] = container.String() + m["PODMAN_TIME"] = time.Now().Format(time.RFC3339Nano) return journal.Send("", journal.PriInfo, m) } func (c *Container) readFromJournal(ctx context.Context, options *logs.LogOptions, logChannel chan *logs.LogLine) error { + // We need the container's events in the same journal to guarantee + // consistency, see #10323. + if options.Follow && c.runtime.config.Engine.EventsLogger != "journald" { + return errors.Errorf("using --follow with the journald --log-driver but without the journald --events-backend (%s) is not supported", c.runtime.config.Engine.EventsLogger) + } + journal, err := sdjournal.NewJournal() if err != nil { return err @@ -89,6 +97,7 @@ func (c *Container) readFromJournal(ctx context.Context, options *logs.LogOption // exponential backoff. var cursor string var cursorError error + var containerCouldBeLogging bool for i := 1; i <= 3; i++ { cursor, cursorError = journal.GetCursor() hundreds := 1 @@ -105,12 +114,6 @@ func (c *Container) readFromJournal(ctx context.Context, options *logs.LogOption return errors.Wrap(cursorError, "initial journal cursor") } - // We need the container's events in the same journal to guarantee - // consistency, see #10323. - if options.Follow && c.runtime.config.Engine.EventsLogger != "journald" { - return errors.Errorf("using --follow with the journald --log-driver but without the journald --events-backend (%s) is not supported", c.runtime.config.Engine.EventsLogger) - } - options.WaitGroup.Add(1) go func() { defer func() { @@ -173,7 +176,7 @@ func (c *Container) readFromJournal(ctx context.Context, options *logs.LogOption doTailFunc() } // Unless we follow, quit. - if !options.Follow { + if !options.Follow || !containerCouldBeLogging { return } // Sleep until something's happening on the journal. @@ -202,11 +205,14 @@ func (c *Container) readFromJournal(ctx context.Context, options *logs.LogOption logrus.Errorf("Failed to translate event: %v", err) return } - if status == events.Exited { + switch status { + case events.History, events.Init, events.Start, events.Restart: + containerCouldBeLogging = true + case events.Exited: + containerCouldBeLogging = false if doTail { doTailFunc() } - return } continue } diff --git a/libpod/container_top_linux.go b/libpod/container_top_linux.go index 0d4cba85e..d4f4ddfc1 100644 --- a/libpod/container_top_linux.go +++ b/libpod/container_top_linux.go @@ -4,6 +4,7 @@ package libpod import ( "bufio" + "fmt" "os" "strconv" "strings" @@ -11,6 +12,7 @@ import ( "github.com/containers/podman/v3/libpod/define" "github.com/containers/podman/v3/pkg/rootless" "github.com/containers/psgo" + "github.com/google/shlex" "github.com/pkg/errors" "github.com/sirupsen/logrus" ) @@ -51,7 +53,21 @@ func (c *Container) Top(descriptors []string) ([]string, error) { return nil, psgoErr } - output, err = c.execPS(descriptors) + // Note that the descriptors to ps(1) must be shlexed (see #12452). + psDescriptors := []string{} + for _, d := range descriptors { + shSplit, err := shlex.Split(d) + if err != nil { + return nil, fmt.Errorf("parsing ps args: %v", err) + } + for _, s := range shSplit { + if s != "" { + psDescriptors = append(psDescriptors, s) + } + } + } + + output, err = c.execPS(psDescriptors) if err != nil { return nil, errors.Wrapf(err, "error executing ps(1) in the container") } diff --git a/libpod/define/container_inspect.go b/libpod/define/container_inspect.go index 90703a807..7a5d4e572 100644 --- a/libpod/define/container_inspect.go +++ b/libpod/define/container_inspect.go @@ -204,6 +204,7 @@ type InspectContainerState struct { FinishedAt time.Time `json:"FinishedAt"` Healthcheck HealthCheckResults `json:"Healthcheck,omitempty"` Checkpointed bool `json:"Checkpointed,omitempty"` + CgroupPath string `json:"CgroupPath,omitempty"` } // HealthCheckResults describes the results/logs from a healthcheck diff --git a/libpod/kube.go b/libpod/kube.go index b34734513..12d1c5be8 100644 --- a/libpod/kube.go +++ b/libpod/kube.go @@ -78,7 +78,11 @@ func (p *Pod) GenerateForKube(ctx context.Context) (*v1.Pod, []v1.ServicePort, e if err != nil { return nil, servicePorts, err } - servicePorts = containerPortsToServicePorts(ports) + spState := newServicePortState() + servicePorts, err = spState.containerPortsToServicePorts(ports) + if err != nil { + return nil, servicePorts, err + } hostNetwork = infraContainer.NetworkMode() == string(namespaces.NetworkMode(specgen.Host)) } pod, err := p.podWithContainers(ctx, allContainers, ports, hostNetwork) @@ -241,13 +245,17 @@ func ConvertV1PodToYAMLPod(pod *v1.Pod) *YAMLPod { } // GenerateKubeServiceFromV1Pod creates a v1 service object from a v1 pod object -func GenerateKubeServiceFromV1Pod(pod *v1.Pod, servicePorts []v1.ServicePort) YAMLService { +func GenerateKubeServiceFromV1Pod(pod *v1.Pod, servicePorts []v1.ServicePort) (YAMLService, error) { service := YAMLService{} selector := make(map[string]string) selector["app"] = pod.Labels["app"] ports := servicePorts if len(ports) == 0 { - ports = containersToServicePorts(pod.Spec.Containers) + p, err := containersToServicePorts(pod.Spec.Containers) + if err != nil { + return service, err + } + ports = p } serviceSpec := v1.ServiceSpec{ Ports: ports, @@ -261,15 +269,43 @@ func GenerateKubeServiceFromV1Pod(pod *v1.Pod, servicePorts []v1.ServicePort) YA APIVersion: pod.TypeMeta.APIVersion, } service.TypeMeta = tm - return service + return service, nil +} + +// servicePortState allows calling containerPortsToServicePorts for a single service +type servicePortState struct { + // A program using the shared math/rand state with the default seed will produce the same sequence of pseudo-random numbers + // for each execution. Use a private RNG state not to interfere with other users. + rng *rand.Rand + usedPorts map[int]struct{} +} + +func newServicePortState() servicePortState { + return servicePortState{ + rng: rand.New(rand.NewSource(time.Now().UnixNano())), + usedPorts: map[int]struct{}{}, + } } // containerPortsToServicePorts takes a slice of containerports and generates a // slice of service ports -func containerPortsToServicePorts(containerPorts []v1.ContainerPort) []v1.ServicePort { +func (state *servicePortState) containerPortsToServicePorts(containerPorts []v1.ContainerPort) ([]v1.ServicePort, error) { sps := make([]v1.ServicePort, 0, len(containerPorts)) for _, cp := range containerPorts { - nodePort := 30000 + rand.Intn(32767-30000+1) + var nodePort int + attempt := 0 + for { + // Legal nodeport range is 30000-32767 + nodePort = 30000 + state.rng.Intn(32767-30000+1) + if _, found := state.usedPorts[nodePort]; !found { + state.usedPorts[nodePort] = struct{}{} + break + } + attempt++ + if attempt >= 100 { + return nil, fmt.Errorf("too many attempts trying to generate a unique NodePort number") + } + } servicePort := v1.ServicePort{ Protocol: cp.Protocol, Port: cp.ContainerPort, @@ -279,21 +315,22 @@ func containerPortsToServicePorts(containerPorts []v1.ContainerPort) []v1.Servic } sps = append(sps, servicePort) } - return sps + return sps, nil } // containersToServicePorts takes a slice of v1.Containers and generates an // inclusive list of serviceports to expose -func containersToServicePorts(containers []v1.Container) []v1.ServicePort { - // Without the call to rand.Seed, a program will produce the same sequence of pseudo-random numbers - // for each execution. Legal nodeport range is 30000-32767 - rand.Seed(time.Now().UnixNano()) - +func containersToServicePorts(containers []v1.Container) ([]v1.ServicePort, error) { + state := newServicePortState() sps := make([]v1.ServicePort, 0, len(containers)) for _, ctr := range containers { - sps = append(sps, containerPortsToServicePorts(ctr.Ports)...) + ports, err := state.containerPortsToServicePorts(ctr.Ports) + if err != nil { + return nil, err + } + sps = append(sps, ports...) } - return sps + return sps, nil } func (p *Pod) podWithContainers(ctx context.Context, containers []*Container, ports []v1.ContainerPort, hostNetwork bool) (*v1.Pod, error) { diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go index ea0ef842d..9403b286c 100644 --- a/libpod/oci_conmon_linux.go +++ b/libpod/oci_conmon_linux.go @@ -289,7 +289,7 @@ func (r *ConmonOCIRuntime) UpdateContainerStatus(ctr *Container) error { if err2 != nil { return errors.Wrapf(err, "error getting container %s state", ctr.ID()) } - if strings.Contains(string(out), "does not exist") { + if strings.Contains(string(out), "does not exist") || strings.Contains(string(out), "No such file") { if err := ctr.removeConmonFiles(); err != nil { logrus.Debugf("unable to remove conmon files for container %s", ctr.ID()) } @@ -399,6 +399,11 @@ func (r *ConmonOCIRuntime) KillContainer(ctr *Container, signal uint, all bool) args = append(args, "kill", ctr.ID(), fmt.Sprintf("%d", signal)) } if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, args...); err != nil { + // try updating container state but ignore errors we cant do anything if this fails. + r.UpdateContainerStatus(ctr) + if ctr.state.State == define.ContainerStateExited { + return nil + } return errors.Wrapf(err, "error sending signal to container %s", ctr.ID()) } diff --git a/libpod/stats.go b/libpod/stats.go index 975152535..cc1250e83 100644 --- a/libpod/stats.go +++ b/libpod/stats.go @@ -3,6 +3,7 @@ package libpod import ( + "math" "strings" "syscall" "time" @@ -68,7 +69,7 @@ func (c *Container) GetContainerStats(previousStats *define.ContainerStats) (*de stats.AvgCPU = calculateAvgCPU(stats.CPU, previousStats.AvgCPU, previousStats.DataPoints) stats.DataPoints = previousStats.DataPoints + 1 stats.MemUsage = cgroupStats.Memory.Usage.Usage - stats.MemLimit = getMemLimit(cgroupStats.Memory.Usage.Limit) + stats.MemLimit = c.getMemLimit() stats.MemPerc = (float64(stats.MemUsage) / float64(stats.MemLimit)) * 100 stats.PIDs = 0 if conState == define.ContainerStateRunning || conState == define.ContainerStatePaused { @@ -91,22 +92,29 @@ func (c *Container) GetContainerStats(previousStats *define.ContainerStats) (*de return stats, nil } -// getMemory limit returns the memory limit for a given cgroup -// If the configured memory limit is larger than the total memory on the sys, the -// physical system memory size is returned -func getMemLimit(cgroupLimit uint64) uint64 { +// getMemory limit returns the memory limit for a container +func (c *Container) getMemLimit() uint64 { + memLimit := uint64(math.MaxUint64) + + if c.config.Spec.Linux != nil && c.config.Spec.Linux.Resources != nil && + c.config.Spec.Linux.Resources.Memory != nil && c.config.Spec.Linux.Resources.Memory.Limit != nil { + memLimit = uint64(*c.config.Spec.Linux.Resources.Memory.Limit) + } + si := &syscall.Sysinfo_t{} err := syscall.Sysinfo(si) if err != nil { - return cgroupLimit + return memLimit } //nolint:unconvert physicalLimit := uint64(si.Totalram) - if cgroupLimit > physicalLimit { + + if memLimit <= 0 || memLimit > physicalLimit { return physicalLimit } - return cgroupLimit + + return memLimit } // calculateCPUPercent calculates the cpu usage using the latest measurement in stats. |