summaryrefslogtreecommitdiff
path: root/libpod
diff options
context:
space:
mode:
Diffstat (limited to 'libpod')
-rw-r--r--libpod/container.go32
-rw-r--r--libpod/container_inspect.go11
-rw-r--r--libpod/container_internal.go46
-rw-r--r--libpod/container_internal_linux.go13
-rw-r--r--libpod/container_log_linux.go26
-rw-r--r--libpod/container_top_linux.go18
-rw-r--r--libpod/define/container_inspect.go1
-rw-r--r--libpod/kube.go65
-rw-r--r--libpod/oci_conmon_linux.go7
-rw-r--r--libpod/stats.go24
10 files changed, 202 insertions, 41 deletions
diff --git a/libpod/container.go b/libpod/container.go
index a4bbb5dd0..5ae07f602 100644
--- a/libpod/container.go
+++ b/libpod/container.go
@@ -6,10 +6,12 @@ import (
"io/ioutil"
"net"
"os"
+ "strings"
"time"
"github.com/containernetworking/cni/pkg/types"
cnitypes "github.com/containernetworking/cni/pkg/types/current"
+ "github.com/containers/common/pkg/config"
"github.com/containers/common/pkg/secrets"
"github.com/containers/image/v5/manifest"
"github.com/containers/podman/v3/libpod/define"
@@ -253,6 +255,8 @@ type ContainerSecret struct {
GID uint32
// Mode is the mode of the secret file
Mode uint32
+ // Secret target inside container
+ Target string
}
// ContainerNetworkDescriptions describes the relationship between the CNI
@@ -973,6 +977,11 @@ func (c *Container) cGroupPath() (string, error) {
procPath := fmt.Sprintf("/proc/%d/cgroup", c.state.PID)
lines, err := ioutil.ReadFile(procPath)
if err != nil {
+ // If the file doesn't exist, it means the container could have been terminated
+ // so report it.
+ if os.IsNotExist(err) {
+ return "", errors.Wrapf(define.ErrCtrStopped, "cannot get cgroup path unless container %s is running", c.ID())
+ }
return "", err
}
@@ -999,6 +1008,29 @@ func (c *Container) cGroupPath() (string, error) {
return "", errors.Errorf("could not find any cgroup in %q", procPath)
}
+ cgroupManager := c.CgroupManager()
+ switch {
+ case c.config.CgroupsMode == cgroupSplit:
+ name := fmt.Sprintf("/libpod-payload-%s/", c.ID())
+ if index := strings.LastIndex(cgroupPath, name); index >= 0 {
+ return cgroupPath[:index+len(name)-1], nil
+ }
+ case cgroupManager == config.CgroupfsCgroupsManager:
+ name := fmt.Sprintf("/libpod-%s/", c.ID())
+ if index := strings.LastIndex(cgroupPath, name); index >= 0 {
+ return cgroupPath[:index+len(name)-1], nil
+ }
+ case cgroupManager == config.SystemdCgroupsManager:
+ // When running under systemd, try to detect the scope that was requested
+ // to be created. It improves the heuristic since we report the first
+ // cgroup that was created instead of the cgroup where PID 1 might have
+ // moved to.
+ name := fmt.Sprintf("/libpod-%s.scope/", c.ID())
+ if index := strings.LastIndex(cgroupPath, name); index >= 0 {
+ return cgroupPath[:index+len(name)-1], nil
+ }
+ }
+
return cgroupPath, nil
}
diff --git a/libpod/container_inspect.go b/libpod/container_inspect.go
index 9fcf13d53..e3b4b5bb2 100644
--- a/libpod/container_inspect.go
+++ b/libpod/container_inspect.go
@@ -97,6 +97,16 @@ func (c *Container) getContainerInspectData(size bool, driverData *define.Driver
return nil, err
}
+ cgroupPath, err := c.cGroupPath()
+ if err != nil {
+ // Handle the case where the container is not running or has no cgroup.
+ if errors.Is(err, define.ErrNoCgroups) || errors.Is(err, define.ErrCtrStopped) {
+ cgroupPath = ""
+ } else {
+ return nil, err
+ }
+ }
+
data := &define.InspectContainerData{
ID: config.ID,
Created: config.CreatedTime,
@@ -116,6 +126,7 @@ func (c *Container) getContainerInspectData(size bool, driverData *define.Driver
StartedAt: runtimeInfo.StartedTime,
FinishedAt: runtimeInfo.FinishedTime,
Checkpointed: runtimeInfo.Checkpointed,
+ CgroupPath: cgroupPath,
},
Image: config.RootfsImageID,
ImageName: config.RootfsImageName,
diff --git a/libpod/container_internal.go b/libpod/container_internal.go
index 54d6b1303..c6809ad86 100644
--- a/libpod/container_internal.go
+++ b/libpod/container_internal.go
@@ -36,6 +36,7 @@ import (
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
+ "golang.org/x/sys/unix"
)
const (
@@ -1581,14 +1582,49 @@ func (c *Container) mountStorage() (_ string, deferredErr error) {
}()
}
+ rootUID, rootGID := c.RootUID(), c.RootGID()
+
+ dirfd, err := unix.Open(mountPoint, unix.O_RDONLY|unix.O_PATH, 0)
+ if err != nil {
+ return "", errors.Wrap(err, "open mount point")
+ }
+ defer unix.Close(dirfd)
+
+ err = unix.Mkdirat(dirfd, "etc", 0755)
+ if err != nil && !os.IsExist(err) {
+ return "", errors.Wrap(err, "create /etc")
+ }
+ // If the etc directory was created, chown it to root in the container
+ if err == nil && (rootUID != 0 || rootGID != 0) {
+ err = unix.Fchownat(dirfd, "etc", rootUID, rootGID, unix.AT_SYMLINK_NOFOLLOW)
+ if err != nil {
+ return "", errors.Wrap(err, "chown /etc")
+ }
+ }
+
+ etcInTheContainerPath, err := securejoin.SecureJoin(mountPoint, "etc")
+ if err != nil {
+ return "", errors.Wrap(err, "resolve /etc in the container")
+ }
+
+ etcInTheContainerFd, err := unix.Open(etcInTheContainerPath, unix.O_RDONLY|unix.O_PATH, 0)
+ if err != nil {
+ return "", errors.Wrap(err, "open /etc in the container")
+ }
+ defer unix.Close(etcInTheContainerFd)
+
// If /etc/mtab does not exist in container image, then we need to
// create it, so that mount command within the container will work.
- mtab := filepath.Join(mountPoint, "/etc/mtab")
- if err := idtools.MkdirAllAs(filepath.Dir(mtab), 0755, c.RootUID(), c.RootGID()); err != nil {
- return "", errors.Wrap(err, "error creating mtab directory")
+ err = unix.Symlinkat("/proc/mounts", etcInTheContainerFd, "mtab")
+ if err != nil && !os.IsExist(err) {
+ return "", errors.Wrap(err, "creating /etc/mtab symlink")
}
- if err = os.Symlink("/proc/mounts", mtab); err != nil && !os.IsExist(err) {
- return "", err
+ // If the symlink was created, then also chown it to root in the container
+ if err == nil && (rootUID != 0 || rootGID != 0) {
+ err = unix.Fchownat(etcInTheContainerFd, "mtab", rootUID, rootGID, unix.AT_SYMLINK_NOFOLLOW)
+ if err != nil {
+ return "", errors.Wrap(err, "chown /etc/mtab")
+ }
}
// Request a mount of all named volumes
diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go
index 89869e2f5..8bd433427 100644
--- a/libpod/container_internal_linux.go
+++ b/libpod/container_internal_linux.go
@@ -1777,8 +1777,17 @@ rootless=%d
return errors.Wrapf(err, "error creating secrets mount")
}
for _, secret := range c.Secrets() {
+ secretFileName := secret.Name
+ base := "/run/secrets"
+ if secret.Target != "" {
+ secretFileName = secret.Target
+ //If absolute path for target given remove base.
+ if filepath.IsAbs(secretFileName) {
+ base = ""
+ }
+ }
src := filepath.Join(c.config.SecretsPath, secret.Name)
- dest := filepath.Join("/run/secrets", secret.Name)
+ dest := filepath.Join(base, secretFileName)
c.state.BindMounts[dest] = src
}
}
@@ -2503,7 +2512,7 @@ func (c *Container) getOCICgroupPath() (string, error) {
if err != nil {
return "", err
}
- return filepath.Join(selfCgroup, "container"), nil
+ return filepath.Join(selfCgroup, fmt.Sprintf("libpod-payload-%s", c.ID())), nil
case cgroupManager == config.SystemdCgroupsManager:
// When the OCI runtime is set to use Systemd as a cgroup manager, it
// expects cgroups to be passed as follows:
diff --git a/libpod/container_log_linux.go b/libpod/container_log_linux.go
index b6b780bab..90da8053d 100644
--- a/libpod/container_log_linux.go
+++ b/libpod/container_log_linux.go
@@ -37,13 +37,21 @@ func (c *Container) initializeJournal(ctx context.Context) error {
m := make(map[string]string)
m["SYSLOG_IDENTIFIER"] = "podman"
m["PODMAN_ID"] = c.ID()
- m["CONTAINER_ID_FULL"] = c.ID()
history := events.History
m["PODMAN_EVENT"] = history.String()
+ container := events.Container
+ m["PODMAN_TYPE"] = container.String()
+ m["PODMAN_TIME"] = time.Now().Format(time.RFC3339Nano)
return journal.Send("", journal.PriInfo, m)
}
func (c *Container) readFromJournal(ctx context.Context, options *logs.LogOptions, logChannel chan *logs.LogLine) error {
+ // We need the container's events in the same journal to guarantee
+ // consistency, see #10323.
+ if options.Follow && c.runtime.config.Engine.EventsLogger != "journald" {
+ return errors.Errorf("using --follow with the journald --log-driver but without the journald --events-backend (%s) is not supported", c.runtime.config.Engine.EventsLogger)
+ }
+
journal, err := sdjournal.NewJournal()
if err != nil {
return err
@@ -89,6 +97,7 @@ func (c *Container) readFromJournal(ctx context.Context, options *logs.LogOption
// exponential backoff.
var cursor string
var cursorError error
+ var containerCouldBeLogging bool
for i := 1; i <= 3; i++ {
cursor, cursorError = journal.GetCursor()
hundreds := 1
@@ -105,12 +114,6 @@ func (c *Container) readFromJournal(ctx context.Context, options *logs.LogOption
return errors.Wrap(cursorError, "initial journal cursor")
}
- // We need the container's events in the same journal to guarantee
- // consistency, see #10323.
- if options.Follow && c.runtime.config.Engine.EventsLogger != "journald" {
- return errors.Errorf("using --follow with the journald --log-driver but without the journald --events-backend (%s) is not supported", c.runtime.config.Engine.EventsLogger)
- }
-
options.WaitGroup.Add(1)
go func() {
defer func() {
@@ -173,7 +176,7 @@ func (c *Container) readFromJournal(ctx context.Context, options *logs.LogOption
doTailFunc()
}
// Unless we follow, quit.
- if !options.Follow {
+ if !options.Follow || !containerCouldBeLogging {
return
}
// Sleep until something's happening on the journal.
@@ -202,11 +205,14 @@ func (c *Container) readFromJournal(ctx context.Context, options *logs.LogOption
logrus.Errorf("Failed to translate event: %v", err)
return
}
- if status == events.Exited {
+ switch status {
+ case events.History, events.Init, events.Start, events.Restart:
+ containerCouldBeLogging = true
+ case events.Exited:
+ containerCouldBeLogging = false
if doTail {
doTailFunc()
}
- return
}
continue
}
diff --git a/libpod/container_top_linux.go b/libpod/container_top_linux.go
index 0d4cba85e..d4f4ddfc1 100644
--- a/libpod/container_top_linux.go
+++ b/libpod/container_top_linux.go
@@ -4,6 +4,7 @@ package libpod
import (
"bufio"
+ "fmt"
"os"
"strconv"
"strings"
@@ -11,6 +12,7 @@ import (
"github.com/containers/podman/v3/libpod/define"
"github.com/containers/podman/v3/pkg/rootless"
"github.com/containers/psgo"
+ "github.com/google/shlex"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
@@ -51,7 +53,21 @@ func (c *Container) Top(descriptors []string) ([]string, error) {
return nil, psgoErr
}
- output, err = c.execPS(descriptors)
+ // Note that the descriptors to ps(1) must be shlexed (see #12452).
+ psDescriptors := []string{}
+ for _, d := range descriptors {
+ shSplit, err := shlex.Split(d)
+ if err != nil {
+ return nil, fmt.Errorf("parsing ps args: %v", err)
+ }
+ for _, s := range shSplit {
+ if s != "" {
+ psDescriptors = append(psDescriptors, s)
+ }
+ }
+ }
+
+ output, err = c.execPS(psDescriptors)
if err != nil {
return nil, errors.Wrapf(err, "error executing ps(1) in the container")
}
diff --git a/libpod/define/container_inspect.go b/libpod/define/container_inspect.go
index 90703a807..7a5d4e572 100644
--- a/libpod/define/container_inspect.go
+++ b/libpod/define/container_inspect.go
@@ -204,6 +204,7 @@ type InspectContainerState struct {
FinishedAt time.Time `json:"FinishedAt"`
Healthcheck HealthCheckResults `json:"Healthcheck,omitempty"`
Checkpointed bool `json:"Checkpointed,omitempty"`
+ CgroupPath string `json:"CgroupPath,omitempty"`
}
// HealthCheckResults describes the results/logs from a healthcheck
diff --git a/libpod/kube.go b/libpod/kube.go
index b34734513..12d1c5be8 100644
--- a/libpod/kube.go
+++ b/libpod/kube.go
@@ -78,7 +78,11 @@ func (p *Pod) GenerateForKube(ctx context.Context) (*v1.Pod, []v1.ServicePort, e
if err != nil {
return nil, servicePorts, err
}
- servicePorts = containerPortsToServicePorts(ports)
+ spState := newServicePortState()
+ servicePorts, err = spState.containerPortsToServicePorts(ports)
+ if err != nil {
+ return nil, servicePorts, err
+ }
hostNetwork = infraContainer.NetworkMode() == string(namespaces.NetworkMode(specgen.Host))
}
pod, err := p.podWithContainers(ctx, allContainers, ports, hostNetwork)
@@ -241,13 +245,17 @@ func ConvertV1PodToYAMLPod(pod *v1.Pod) *YAMLPod {
}
// GenerateKubeServiceFromV1Pod creates a v1 service object from a v1 pod object
-func GenerateKubeServiceFromV1Pod(pod *v1.Pod, servicePorts []v1.ServicePort) YAMLService {
+func GenerateKubeServiceFromV1Pod(pod *v1.Pod, servicePorts []v1.ServicePort) (YAMLService, error) {
service := YAMLService{}
selector := make(map[string]string)
selector["app"] = pod.Labels["app"]
ports := servicePorts
if len(ports) == 0 {
- ports = containersToServicePorts(pod.Spec.Containers)
+ p, err := containersToServicePorts(pod.Spec.Containers)
+ if err != nil {
+ return service, err
+ }
+ ports = p
}
serviceSpec := v1.ServiceSpec{
Ports: ports,
@@ -261,15 +269,43 @@ func GenerateKubeServiceFromV1Pod(pod *v1.Pod, servicePorts []v1.ServicePort) YA
APIVersion: pod.TypeMeta.APIVersion,
}
service.TypeMeta = tm
- return service
+ return service, nil
+}
+
+// servicePortState allows calling containerPortsToServicePorts for a single service
+type servicePortState struct {
+ // A program using the shared math/rand state with the default seed will produce the same sequence of pseudo-random numbers
+ // for each execution. Use a private RNG state not to interfere with other users.
+ rng *rand.Rand
+ usedPorts map[int]struct{}
+}
+
+func newServicePortState() servicePortState {
+ return servicePortState{
+ rng: rand.New(rand.NewSource(time.Now().UnixNano())),
+ usedPorts: map[int]struct{}{},
+ }
}
// containerPortsToServicePorts takes a slice of containerports and generates a
// slice of service ports
-func containerPortsToServicePorts(containerPorts []v1.ContainerPort) []v1.ServicePort {
+func (state *servicePortState) containerPortsToServicePorts(containerPorts []v1.ContainerPort) ([]v1.ServicePort, error) {
sps := make([]v1.ServicePort, 0, len(containerPorts))
for _, cp := range containerPorts {
- nodePort := 30000 + rand.Intn(32767-30000+1)
+ var nodePort int
+ attempt := 0
+ for {
+ // Legal nodeport range is 30000-32767
+ nodePort = 30000 + state.rng.Intn(32767-30000+1)
+ if _, found := state.usedPorts[nodePort]; !found {
+ state.usedPorts[nodePort] = struct{}{}
+ break
+ }
+ attempt++
+ if attempt >= 100 {
+ return nil, fmt.Errorf("too many attempts trying to generate a unique NodePort number")
+ }
+ }
servicePort := v1.ServicePort{
Protocol: cp.Protocol,
Port: cp.ContainerPort,
@@ -279,21 +315,22 @@ func containerPortsToServicePorts(containerPorts []v1.ContainerPort) []v1.Servic
}
sps = append(sps, servicePort)
}
- return sps
+ return sps, nil
}
// containersToServicePorts takes a slice of v1.Containers and generates an
// inclusive list of serviceports to expose
-func containersToServicePorts(containers []v1.Container) []v1.ServicePort {
- // Without the call to rand.Seed, a program will produce the same sequence of pseudo-random numbers
- // for each execution. Legal nodeport range is 30000-32767
- rand.Seed(time.Now().UnixNano())
-
+func containersToServicePorts(containers []v1.Container) ([]v1.ServicePort, error) {
+ state := newServicePortState()
sps := make([]v1.ServicePort, 0, len(containers))
for _, ctr := range containers {
- sps = append(sps, containerPortsToServicePorts(ctr.Ports)...)
+ ports, err := state.containerPortsToServicePorts(ctr.Ports)
+ if err != nil {
+ return nil, err
+ }
+ sps = append(sps, ports...)
}
- return sps
+ return sps, nil
}
func (p *Pod) podWithContainers(ctx context.Context, containers []*Container, ports []v1.ContainerPort, hostNetwork bool) (*v1.Pod, error) {
diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go
index ea0ef842d..9403b286c 100644
--- a/libpod/oci_conmon_linux.go
+++ b/libpod/oci_conmon_linux.go
@@ -289,7 +289,7 @@ func (r *ConmonOCIRuntime) UpdateContainerStatus(ctr *Container) error {
if err2 != nil {
return errors.Wrapf(err, "error getting container %s state", ctr.ID())
}
- if strings.Contains(string(out), "does not exist") {
+ if strings.Contains(string(out), "does not exist") || strings.Contains(string(out), "No such file") {
if err := ctr.removeConmonFiles(); err != nil {
logrus.Debugf("unable to remove conmon files for container %s", ctr.ID())
}
@@ -399,6 +399,11 @@ func (r *ConmonOCIRuntime) KillContainer(ctr *Container, signal uint, all bool)
args = append(args, "kill", ctr.ID(), fmt.Sprintf("%d", signal))
}
if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, args...); err != nil {
+ // try updating container state but ignore errors we cant do anything if this fails.
+ r.UpdateContainerStatus(ctr)
+ if ctr.state.State == define.ContainerStateExited {
+ return nil
+ }
return errors.Wrapf(err, "error sending signal to container %s", ctr.ID())
}
diff --git a/libpod/stats.go b/libpod/stats.go
index 975152535..cc1250e83 100644
--- a/libpod/stats.go
+++ b/libpod/stats.go
@@ -3,6 +3,7 @@
package libpod
import (
+ "math"
"strings"
"syscall"
"time"
@@ -68,7 +69,7 @@ func (c *Container) GetContainerStats(previousStats *define.ContainerStats) (*de
stats.AvgCPU = calculateAvgCPU(stats.CPU, previousStats.AvgCPU, previousStats.DataPoints)
stats.DataPoints = previousStats.DataPoints + 1
stats.MemUsage = cgroupStats.Memory.Usage.Usage
- stats.MemLimit = getMemLimit(cgroupStats.Memory.Usage.Limit)
+ stats.MemLimit = c.getMemLimit()
stats.MemPerc = (float64(stats.MemUsage) / float64(stats.MemLimit)) * 100
stats.PIDs = 0
if conState == define.ContainerStateRunning || conState == define.ContainerStatePaused {
@@ -91,22 +92,29 @@ func (c *Container) GetContainerStats(previousStats *define.ContainerStats) (*de
return stats, nil
}
-// getMemory limit returns the memory limit for a given cgroup
-// If the configured memory limit is larger than the total memory on the sys, the
-// physical system memory size is returned
-func getMemLimit(cgroupLimit uint64) uint64 {
+// getMemory limit returns the memory limit for a container
+func (c *Container) getMemLimit() uint64 {
+ memLimit := uint64(math.MaxUint64)
+
+ if c.config.Spec.Linux != nil && c.config.Spec.Linux.Resources != nil &&
+ c.config.Spec.Linux.Resources.Memory != nil && c.config.Spec.Linux.Resources.Memory.Limit != nil {
+ memLimit = uint64(*c.config.Spec.Linux.Resources.Memory.Limit)
+ }
+
si := &syscall.Sysinfo_t{}
err := syscall.Sysinfo(si)
if err != nil {
- return cgroupLimit
+ return memLimit
}
//nolint:unconvert
physicalLimit := uint64(si.Totalram)
- if cgroupLimit > physicalLimit {
+
+ if memLimit <= 0 || memLimit > physicalLimit {
return physicalLimit
}
- return cgroupLimit
+
+ return memLimit
}
// calculateCPUPercent calculates the cpu usage using the latest measurement in stats.