diff options
Diffstat (limited to 'libpod')
-rw-r--r-- | libpod/container_exec.go | 16 | ||||
-rw-r--r-- | libpod/container_internal.go | 62 | ||||
-rw-r--r-- | libpod/container_internal_linux.go | 84 | ||||
-rw-r--r-- | libpod/container_internal_unsupported.go | 5 | ||||
-rw-r--r-- | libpod/container_log.go | 24 | ||||
-rw-r--r-- | libpod/container_validate.go | 1 | ||||
-rw-r--r-- | libpod/define/containerstate.go | 10 | ||||
-rw-r--r-- | libpod/define/pod_inspect.go | 12 | ||||
-rw-r--r-- | libpod/networking_linux.go | 38 | ||||
-rw-r--r-- | libpod/oci.go | 7 | ||||
-rw-r--r-- | libpod/oci_attach_linux.go | 13 | ||||
-rw-r--r-- | libpod/oci_conmon_exec_linux.go | 19 | ||||
-rw-r--r-- | libpod/oci_missing.go | 5 | ||||
-rw-r--r-- | libpod/options.go | 48 | ||||
-rw-r--r-- | libpod/pod.go | 104 | ||||
-rw-r--r-- | libpod/pod_api.go | 6 | ||||
-rw-r--r-- | libpod/runtime.go | 7 | ||||
-rw-r--r-- | libpod/runtime_pod_infra_linux.go | 8 | ||||
-rw-r--r-- | libpod/stats.go | 10 |
19 files changed, 355 insertions, 124 deletions
diff --git a/libpod/container_exec.go b/libpod/container_exec.go index 737bf74ad..5d4bcb422 100644 --- a/libpod/container_exec.go +++ b/libpod/container_exec.go @@ -277,9 +277,10 @@ func (c *Container) ExecStart(sessionID string) error { } // ExecStartAndAttach starts and attaches to an exec session in a container. +// newSize resizes the tty to this size before the process is started, must be nil if the exec session has no tty // TODO: Should we include detach keys in the signature to allow override? // TODO: How do we handle AttachStdin/AttachStdout/AttachStderr? -func (c *Container) ExecStartAndAttach(sessionID string, streams *define.AttachStreams) error { +func (c *Container) ExecStartAndAttach(sessionID string, streams *define.AttachStreams, newSize *define.TerminalSize) error { if !c.batched { c.lock.Lock() defer c.lock.Unlock() @@ -310,7 +311,7 @@ func (c *Container) ExecStartAndAttach(sessionID string, streams *define.AttachS return err } - pid, attachChan, err := c.ociRuntime.ExecContainer(c, session.ID(), opts, streams) + pid, attachChan, err := c.ociRuntime.ExecContainer(c, session.ID(), opts, streams, newSize) if err != nil { return err } @@ -373,7 +374,9 @@ func (c *Container) ExecStartAndAttach(sessionID string, streams *define.AttachS } // ExecHTTPStartAndAttach starts and performs an HTTP attach to an exec session. -func (c *Container) ExecHTTPStartAndAttach(sessionID string, r *http.Request, w http.ResponseWriter, streams *HTTPAttachStreams, detachKeys *string, cancel <-chan bool, hijackDone chan<- bool) error { +// newSize resizes the tty to this size before the process is started, must be nil if the exec session has no tty +func (c *Container) ExecHTTPStartAndAttach(sessionID string, r *http.Request, w http.ResponseWriter, + streams *HTTPAttachStreams, detachKeys *string, cancel <-chan bool, hijackDone chan<- bool, newSize *define.TerminalSize) error { // TODO: How do we combine streams with the default streams set in the exec session? // Ensure that we don't leak a goroutine here @@ -431,7 +434,7 @@ func (c *Container) ExecHTTPStartAndAttach(sessionID string, r *http.Request, w close(holdConnOpen) }() - pid, attachChan, err := c.ociRuntime.ExecContainerHTTP(c, session.ID(), execOpts, r, w, streams, cancel, hijackDone, holdConnOpen) + pid, attachChan, err := c.ociRuntime.ExecContainerHTTP(c, session.ID(), execOpts, r, w, streams, cancel, hijackDone, holdConnOpen, newSize) if err != nil { session.State = define.ExecStateStopped session.ExitCode = define.TranslateExecErrorToExitCode(define.ExecErrorCodeGeneric, err) @@ -719,7 +722,10 @@ func (c *Container) Exec(config *ExecConfig, streams *define.AttachStreams, resi // API there. // TODO: Refactor so this is closed here, before we remove the exec // session. + var size *define.TerminalSize if resize != nil { + s := <-resize + size = &s go func() { logrus.Debugf("Sending resize events to exec session %s", sessionID) for resizeRequest := range resize { @@ -737,7 +743,7 @@ func (c *Container) Exec(config *ExecConfig, streams *define.AttachStreams, resi }() } - if err := c.ExecStartAndAttach(sessionID, streams); err != nil { + if err := c.ExecStartAndAttach(sessionID, streams, size); err != nil { return -1, err } diff --git a/libpod/container_internal.go b/libpod/container_internal.go index 3e4eea003..2555f15ec 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -1062,7 +1062,7 @@ func (c *Container) init(ctx context.Context, retainRetries bool) error { } for _, v := range c.config.NamedVolumes { - if err := c.chownVolume(v.Name); err != nil { + if err := c.fixVolumePermissions(v); err != nil { return err } } @@ -1535,7 +1535,7 @@ func (c *Container) mountStorage() (_ string, deferredErr error) { // If /etc/mtab does not exist in container image, then we need to // create it, so that mount command within the container will work. mtab := filepath.Join(mountPoint, "/etc/mtab") - if err := os.MkdirAll(filepath.Dir(mtab), 0755); err != nil { + if err := idtools.MkdirAllAs(filepath.Dir(mtab), 0755, c.RootUID(), c.RootGID()); err != nil { return "", errors.Wrap(err, "error creating mtab directory") } if err = os.Symlink("/proc/mounts", mtab); err != nil && !os.IsExist(err) { @@ -1681,64 +1681,6 @@ func (c *Container) mountNamedVolume(v *ContainerNamedVolume, mountpoint string) return vol, nil } -// Chown the specified volume if necessary. -func (c *Container) chownVolume(volumeName string) error { - vol, err := c.runtime.state.Volume(volumeName) - if err != nil { - return errors.Wrapf(err, "error retrieving named volume %s for container %s", volumeName, c.ID()) - } - - vol.lock.Lock() - defer vol.lock.Unlock() - - // The volume may need a copy-up. Check the state. - if err := vol.update(); err != nil { - return err - } - - // TODO: For now, I've disabled chowning volumes owned by non-Podman - // drivers. This may be safe, but it's really going to be a case-by-case - // thing, I think - safest to leave disabled now and re-enable later if - // there is a demand. - if vol.state.NeedsChown && !vol.UsesVolumeDriver() { - vol.state.NeedsChown = false - - uid := int(c.config.Spec.Process.User.UID) - gid := int(c.config.Spec.Process.User.GID) - - if c.config.IDMappings.UIDMap != nil { - p := idtools.IDPair{ - UID: uid, - GID: gid, - } - mappings := idtools.NewIDMappingsFromMaps(c.config.IDMappings.UIDMap, c.config.IDMappings.GIDMap) - newPair, err := mappings.ToHost(p) - if err != nil { - return errors.Wrapf(err, "error mapping user %d:%d", uid, gid) - } - uid = newPair.UID - gid = newPair.GID - } - - vol.state.UIDChowned = uid - vol.state.GIDChowned = gid - - if err := vol.save(); err != nil { - return err - } - - mountPoint, err := vol.MountPoint() - if err != nil { - return err - } - - if err := os.Lchown(mountPoint, uid, gid); err != nil { - return err - } - } - return nil -} - // cleanupStorage unmounts and cleans up the container's root filesystem func (c *Container) cleanupStorage() error { if !c.state.Mounted { diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index ddfccb999..25db3cff0 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -1661,9 +1661,13 @@ func (c *Container) generateResolvConf() (string, error) { // check if systemd-resolved is used, assume it is used when 127.0.0.53 is the only nameserver if len(ns) == 1 && ns[0] == "127.0.0.53" { // read the actual resolv.conf file for systemd-resolved - contents, err = ioutil.ReadFile("/run/systemd/resolve/resolv.conf") + resolvedContents, err := ioutil.ReadFile("/run/systemd/resolve/resolv.conf") if err != nil { - return "", errors.Wrapf(err, "detected that systemd-resolved is in use, but could not locate real resolv.conf") + if !os.IsNotExist(err) { + return "", errors.Wrapf(err, "detected that systemd-resolved is in use, but could not locate real resolv.conf") + } + } else { + contents = resolvedContents } } @@ -1816,7 +1820,7 @@ func (c *Container) getHosts() string { if c.Hostname() != "" { if c.config.NetMode.IsSlirp4netns() { // When using slirp4netns, the interface gets a static IP - slirp4netnsIP, err := GetSlirp4netnsGateway(c.slirp4netnsSubnet) + slirp4netnsIP, err := GetSlirp4netnsIP(c.slirp4netnsSubnet) if err != nil { logrus.Warn("failed to determine slirp4netnsIP: ", err.Error()) } else { @@ -2426,3 +2430,77 @@ func (c *Container) createSecretMountDir() error { return err } + +// Fix ownership and permissions of the specified volume if necessary. +func (c *Container) fixVolumePermissions(v *ContainerNamedVolume) error { + vol, err := c.runtime.state.Volume(v.Name) + if err != nil { + return errors.Wrapf(err, "error retrieving named volume %s for container %s", v.Name, c.ID()) + } + + vol.lock.Lock() + defer vol.lock.Unlock() + + // The volume may need a copy-up. Check the state. + if err := vol.update(); err != nil { + return err + } + + // TODO: For now, I've disabled chowning volumes owned by non-Podman + // drivers. This may be safe, but it's really going to be a case-by-case + // thing, I think - safest to leave disabled now and re-enable later if + // there is a demand. + if vol.state.NeedsChown && !vol.UsesVolumeDriver() { + vol.state.NeedsChown = false + + uid := int(c.config.Spec.Process.User.UID) + gid := int(c.config.Spec.Process.User.GID) + + if c.config.IDMappings.UIDMap != nil { + p := idtools.IDPair{ + UID: uid, + GID: gid, + } + mappings := idtools.NewIDMappingsFromMaps(c.config.IDMappings.UIDMap, c.config.IDMappings.GIDMap) + newPair, err := mappings.ToHost(p) + if err != nil { + return errors.Wrapf(err, "error mapping user %d:%d", uid, gid) + } + uid = newPair.UID + gid = newPair.GID + } + + vol.state.UIDChowned = uid + vol.state.GIDChowned = gid + + if err := vol.save(); err != nil { + return err + } + + mountPoint, err := vol.MountPoint() + if err != nil { + return err + } + + if err := os.Lchown(mountPoint, uid, gid); err != nil { + return err + } + + // Make sure the new volume matches the permissions of the target directory. + // https://github.com/containers/podman/issues/10188 + st, err := os.Lstat(filepath.Join(c.state.Mountpoint, v.Dest)) + if err == nil { + if err := os.Chmod(mountPoint, st.Mode()|0111); err != nil { + return err + } + stat := st.Sys().(*syscall.Stat_t) + atime := time.Unix(int64(stat.Atim.Sec), int64(stat.Atim.Nsec)) + if err := os.Chtimes(mountPoint, atime, st.ModTime()); err != nil { + return err + } + } else if !os.IsNotExist(err) { + return err + } + } + return nil +} diff --git a/libpod/container_internal_unsupported.go b/libpod/container_internal_unsupported.go index f979bcbde..125329ce5 100644 --- a/libpod/container_internal_unsupported.go +++ b/libpod/container_internal_unsupported.go @@ -57,3 +57,8 @@ func (c *Container) reloadNetwork() error { func (c *Container) getUserOverrides() *lookup.Overrides { return nil } + +// Fix ownership and permissions of the specified volume if necessary. +func (c *Container) fixVolumePermissions(v *ContainerNamedVolume) error { + return define.ErrNotImplemented +} diff --git a/libpod/container_log.go b/libpod/container_log.go index a30e4f5cc..43b3f7736 100644 --- a/libpod/container_log.go +++ b/libpod/container_log.go @@ -4,10 +4,12 @@ import ( "context" "fmt" "os" + "time" "github.com/containers/podman/v3/libpod/define" "github.com/containers/podman/v3/libpod/events" "github.com/containers/podman/v3/libpod/logs" + "github.com/hpcloud/tail/watch" "github.com/pkg/errors" "github.com/sirupsen/logrus" ) @@ -74,7 +76,7 @@ func (c *Container) readFromLogFile(ctx context.Context, options *logs.LogOption } nll, err := logs.NewLogLine(line.Text) if err != nil { - logrus.Error(err) + logrus.Errorf("Error getting new log line: %v", err) continue } if nll.Partial() { @@ -93,17 +95,20 @@ func (c *Container) readFromLogFile(ctx context.Context, options *logs.LogOption }() // Check if container is still running or paused if options.Follow { + // If the container isn't running or if we encountered an error + // getting its state, instruct the logger to read the file + // until EOF. state, err := c.State() if err != nil || state != define.ContainerStateRunning { - // If the container isn't running or if we encountered - // an error getting its state, instruct the logger to - // read the file until EOF. + // Make sure to wait at least for the poll duration + // before stopping the file logger (see #10675). + time.Sleep(watch.POLL_DURATION) tailError := t.StopAtEOF() if tailError != nil && fmt.Sprintf("%v", tailError) != "tail: stop at eof" { - logrus.Error(tailError) + logrus.Errorf("Error stopping logger: %v", tailError) } - if errors.Cause(err) != define.ErrNoSuchCtr { - logrus.Error(err) + if err != nil && errors.Cause(err) != define.ErrNoSuchCtr { + logrus.Errorf("Error getting container state: %v", err) } return nil } @@ -124,9 +129,12 @@ func (c *Container) readFromLogFile(ctx context.Context, options *logs.LogOption // Now wait for the died event and signal to finish // reading the log until EOF. <-eventChannel + // Make sure to wait at least for the poll duration + // before stopping the file logger (see #10675). + time.Sleep(watch.POLL_DURATION) tailError := t.StopAtEOF() if tailError != nil && fmt.Sprintf("%v", tailError) != "tail: stop at eof" { - logrus.Error(tailError) + logrus.Errorf("Error stopping logger: %v", tailError) } }() } diff --git a/libpod/container_validate.go b/libpod/container_validate.go index aae96ae85..6ff46f1b1 100644 --- a/libpod/container_validate.go +++ b/libpod/container_validate.go @@ -131,6 +131,5 @@ func (c *Container) validate() error { if c.config.User == "" && (c.config.Spec.Process.User.UID != 0 || c.config.Spec.Process.User.GID != 0) { return errors.Wrapf(define.ErrInvalidArg, "please set User explicitly via WithUser() instead of in OCI spec directly") } - return nil } diff --git a/libpod/define/containerstate.go b/libpod/define/containerstate.go index 5d2bc9099..fc272beaa 100644 --- a/libpod/define/containerstate.go +++ b/libpod/define/containerstate.go @@ -1,6 +1,10 @@ package define -import "github.com/pkg/errors" +import ( + "time" + + "github.com/pkg/errors" +) // ContainerStatus represents the current state of a container type ContainerStatus int @@ -120,12 +124,14 @@ func (s ContainerExecStatus) String() string { // ContainerStats contains the statistics information for a running container type ContainerStats struct { + AvgCPU float64 ContainerID string Name string PerCPU []uint64 CPU float64 CPUNano uint64 CPUSystemNano uint64 + DataPoints int64 SystemNano uint64 MemUsage uint64 MemLimit uint64 @@ -135,4 +141,6 @@ type ContainerStats struct { BlockInput uint64 BlockOutput uint64 PIDs uint64 + UpTime time.Duration + Duration uint64 } diff --git a/libpod/define/pod_inspect.go b/libpod/define/pod_inspect.go index 2fa91166f..67f075b3c 100644 --- a/libpod/define/pod_inspect.go +++ b/libpod/define/pod_inspect.go @@ -51,6 +51,12 @@ type InspectPodData struct { // Containers gives a brief summary of all containers in the pod and // their current status. Containers []InspectPodContainerInfo `json:"Containers,omitempty"` + // CPUPeriod contains the CPU period of the pod + CPUPeriod uint64 `json:"cpu_period,omitempty"` + // CPUQuota contains the CPU quota of the pod + CPUQuota int64 `json:"cpu_quota,omitempty"` + // CPUSetCPUs contains linux specific CPU data for the pod + CPUSetCPUs string `json:"cpuset_cpus,omitempty"` } // InspectPodInfraConfig contains the configuration of the pod's infra @@ -91,6 +97,12 @@ type InspectPodInfraConfig struct { Networks []string // NetworkOptions are additional options for each network NetworkOptions map[string][]string + // CPUPeriod contains the CPU period of the pod + CPUPeriod uint64 `json:"cpu_period,omitempty"` + // CPUQuota contains the CPU quota of the pod + CPUQuota int64 `json:"cpu_quota,omitempty"` + // CPUSetCPUs contains linux specific CPU data for the container + CPUSetCPUs string `json:"cpuset_cpus,omitempty"` } // InspectPodContainerInfo contains information on a container in a pod. diff --git a/libpod/networking_linux.go b/libpod/networking_linux.go index 5446841f6..1b775a4f3 100644 --- a/libpod/networking_linux.go +++ b/libpod/networking_linux.go @@ -46,6 +46,9 @@ const ( // rootlessCNINSName is the file name for the rootless network namespace bind mount rootlessCNINSName = "rootless-cni-ns" + + // persistentCNIDir is the directory where the CNI files are stored + persistentCNIDir = "/var/lib/cni" ) // Get an OCICNI network config @@ -150,14 +153,31 @@ func (r *RootlessCNI) Do(toRun func() error) error { } } - // cni plugins need access to /var and /run - runDir := filepath.Join(r.dir, "run") - varDir := filepath.Join(r.dir, "var") + // cni plugins need access to /var/lib/cni and /run + varDir := "" + varTarget := persistentCNIDir + // we can only mount to a target dir which exists, check /var/lib/cni recursively + // while we could always use /var there are cases where a user might store the cni + // configs under /var/custom and this would break + for { + if _, err := os.Stat(varTarget); err == nil { + varDir = filepath.Join(r.dir, strings.TrimPrefix(varTarget, "/")) + break + } + varTarget = filepath.Base(varTarget) + if varTarget == "/" { + break + } + } + if varDir == "" { + return errors.New("failed to stat /var directory") + } // make sure to mount var first - err = unix.Mount(varDir, "/var", "none", unix.MS_BIND, "") + err = unix.Mount(varDir, varTarget, "none", unix.MS_BIND, "") if err != nil { - return errors.Wrap(err, "failed to mount /var for rootless cni") + return errors.Wrapf(err, "failed to mount %s for rootless cni", varTarget) } + runDir := filepath.Join(r.dir, "run") // recursive mount to keep the netns mount err = unix.Mount(runDir, "/run", "none", unix.MS_BIND|unix.MS_REC, "") if err != nil { @@ -385,7 +405,7 @@ func (r *Runtime) GetRootlessCNINetNs(new bool) (*RootlessCNI, error) { // create cni directories to store files // they will be bind mounted to the correct location in a extra mount ns - err = os.MkdirAll(filepath.Join(cniDir, "var"), 0700) + err = os.MkdirAll(filepath.Join(cniDir, strings.TrimPrefix(persistentCNIDir, "/")), 0700) if err != nil { return nil, errors.Wrap(err, "could not create rootless-cni var directory") } @@ -888,6 +908,10 @@ func (c *Container) getContainerNetworkInfo() (*define.InspectNetworkSettings, e if err != nil { return nil, err } + // see https://github.com/containers/podman/issues/10090 + // the container has to be locked for syncContainer() + netNsCtr.lock.Lock() + defer netNsCtr.lock.Unlock() // Have to sync to ensure that state is populated if err := netNsCtr.syncContainer(); err != nil { return nil, err @@ -1043,7 +1067,7 @@ func resultToBasicNetworkConfig(result *cnitypes.Result) (define.InspectBasicNet // after itself on an unclean reboot. Return what we're pretty sure is the path // to CNI's internal files (it's not really exposed to us). func getCNINetworksDir() (string, error) { - return "/var/lib/cni/networks", nil + return filepath.Join(persistentCNIDir, "networks"), nil } type logrusDebugWriter struct { diff --git a/libpod/oci.go b/libpod/oci.go index 1f2c7dd71..c92d9a077 100644 --- a/libpod/oci.go +++ b/libpod/oci.go @@ -72,13 +72,16 @@ type OCIRuntime interface { // has completed, as one might expect. The attach session will remain // running, in a goroutine that will return via the chan error in the // return signature. - ExecContainer(ctr *Container, sessionID string, options *ExecOptions, streams *define.AttachStreams) (int, chan error, error) + // newSize resizes the tty to this size before the process is started, must be nil if the exec session has no tty + ExecContainer(ctr *Container, sessionID string, options *ExecOptions, streams *define.AttachStreams, newSize *define.TerminalSize) (int, chan error, error) // ExecContainerHTTP executes a command in a running container and // attaches its standard streams to a provided hijacked HTTP session. // Maintains the same invariants as ExecContainer (returns on session // start, with a goroutine running in the background to handle attach). // The HTTP attach itself maintains the same invariants as HTTPAttach. - ExecContainerHTTP(ctr *Container, sessionID string, options *ExecOptions, r *http.Request, w http.ResponseWriter, streams *HTTPAttachStreams, cancel <-chan bool, hijackDone chan<- bool, holdConnOpen <-chan bool) (int, chan error, error) + // newSize resizes the tty to this size before the process is started, must be nil if the exec session has no tty + ExecContainerHTTP(ctr *Container, sessionID string, options *ExecOptions, r *http.Request, w http.ResponseWriter, + streams *HTTPAttachStreams, cancel <-chan bool, hijackDone chan<- bool, holdConnOpen <-chan bool, newSize *define.TerminalSize) (int, chan error, error) // ExecContainerDetached executes a command in a running container, but // does not attach to it. Returns the PID of the exec session and an // error (if starting the exec session failed) diff --git a/libpod/oci_attach_linux.go b/libpod/oci_attach_linux.go index b5040de3e..de435b58a 100644 --- a/libpod/oci_attach_linux.go +++ b/libpod/oci_attach_linux.go @@ -94,17 +94,18 @@ func (c *Container) attach(streams *define.AttachStreams, keys string, resize <- // this ensures attachToExec gets all of the output of the called process // conmon will then send the exit code of the exec process, or an error in the exec session // startFd must be the input side of the fd. +// newSize resizes the tty to this size before the process is started, must be nil if the exec session has no tty // conmon will wait to start the exec session until the parent process has setup the console socket. // Once attachToExec successfully attaches to the console socket, the child conmon process responsible for calling runtime exec // will read from the output side of start fd, thus learning to start the child process. // Thus, the order goes as follow: // 1. conmon parent process sets up its console socket. sends on attachFd -// 2. attachToExec attaches to the console socket after reading on attachFd +// 2. attachToExec attaches to the console socket after reading on attachFd and resizes the tty // 3. child waits on startFd for attachToExec to attach to said console socket // 4. attachToExec sends on startFd, signalling it has attached to the socket and child is ready to go // 5. child receives on startFd, runs the runtime exec command // attachToExec is responsible for closing startFd and attachFd -func (c *Container) attachToExec(streams *define.AttachStreams, keys *string, sessionID string, startFd, attachFd *os.File) error { +func (c *Container) attachToExec(streams *define.AttachStreams, keys *string, sessionID string, startFd, attachFd *os.File, newSize *define.TerminalSize) error { if !streams.AttachOutput && !streams.AttachError && !streams.AttachInput { return errors.Wrapf(define.ErrInvalidArg, "must provide at least one stream to attach to") } @@ -137,6 +138,14 @@ func (c *Container) attachToExec(streams *define.AttachStreams, keys *string, se return err } + // resize before we start the container process + if newSize != nil { + err = c.ociRuntime.ExecAttachResize(c, sessionID, *newSize) + if err != nil { + logrus.Warn("resize failed", err) + } + } + // 2: then attach conn, err := openUnixSocket(sockPath) if err != nil { diff --git a/libpod/oci_conmon_exec_linux.go b/libpod/oci_conmon_exec_linux.go index 76338b86c..09d3d1833 100644 --- a/libpod/oci_conmon_exec_linux.go +++ b/libpod/oci_conmon_exec_linux.go @@ -25,7 +25,7 @@ import ( ) // ExecContainer executes a command in a running container -func (r *ConmonOCIRuntime) ExecContainer(c *Container, sessionID string, options *ExecOptions, streams *define.AttachStreams) (int, chan error, error) { +func (r *ConmonOCIRuntime) ExecContainer(c *Container, sessionID string, options *ExecOptions, streams *define.AttachStreams, newSize *define.TerminalSize) (int, chan error, error) { if options == nil { return -1, nil, errors.Wrapf(define.ErrInvalidArg, "must provide an ExecOptions struct to ExecContainer") } @@ -68,7 +68,7 @@ func (r *ConmonOCIRuntime) ExecContainer(c *Container, sessionID string, options attachChan := make(chan error) go func() { // attachToExec is responsible for closing pipes - attachChan <- c.attachToExec(streams, options.DetachKeys, sessionID, pipes.startPipe, pipes.attachPipe) + attachChan <- c.attachToExec(streams, options.DetachKeys, sessionID, pipes.startPipe, pipes.attachPipe, newSize) close(attachChan) }() @@ -83,7 +83,8 @@ func (r *ConmonOCIRuntime) ExecContainer(c *Container, sessionID string, options // ExecContainerHTTP executes a new command in an existing container and // forwards its standard streams over an attach -func (r *ConmonOCIRuntime) ExecContainerHTTP(ctr *Container, sessionID string, options *ExecOptions, req *http.Request, w http.ResponseWriter, streams *HTTPAttachStreams, cancel <-chan bool, hijackDone chan<- bool, holdConnOpen <-chan bool) (int, chan error, error) { +func (r *ConmonOCIRuntime) ExecContainerHTTP(ctr *Container, sessionID string, options *ExecOptions, req *http.Request, w http.ResponseWriter, + streams *HTTPAttachStreams, cancel <-chan bool, hijackDone chan<- bool, holdConnOpen <-chan bool, newSize *define.TerminalSize) (int, chan error, error) { if streams != nil { if !streams.Stdin && !streams.Stdout && !streams.Stderr { return -1, nil, errors.Wrapf(define.ErrInvalidArg, "must provide at least one stream to attach to") @@ -133,7 +134,7 @@ func (r *ConmonOCIRuntime) ExecContainerHTTP(ctr *Container, sessionID string, o conmonPipeDataChan := make(chan conmonPipeData) go func() { // attachToExec is responsible for closing pipes - attachChan <- attachExecHTTP(ctr, sessionID, req, w, streams, pipes, detachKeys, options.Terminal, cancel, hijackDone, holdConnOpen, execCmd, conmonPipeDataChan, ociLog) + attachChan <- attachExecHTTP(ctr, sessionID, req, w, streams, pipes, detachKeys, options.Terminal, cancel, hijackDone, holdConnOpen, execCmd, conmonPipeDataChan, ociLog, newSize) close(attachChan) }() @@ -486,7 +487,7 @@ func (r *ConmonOCIRuntime) startExec(c *Container, sessionID string, options *Ex } // Attach to a container over HTTP -func attachExecHTTP(c *Container, sessionID string, r *http.Request, w http.ResponseWriter, streams *HTTPAttachStreams, pipes *execPipes, detachKeys []byte, isTerminal bool, cancel <-chan bool, hijackDone chan<- bool, holdConnOpen <-chan bool, execCmd *exec.Cmd, conmonPipeDataChan chan<- conmonPipeData, ociLog string) (deferredErr error) { +func attachExecHTTP(c *Container, sessionID string, r *http.Request, w http.ResponseWriter, streams *HTTPAttachStreams, pipes *execPipes, detachKeys []byte, isTerminal bool, cancel <-chan bool, hijackDone chan<- bool, holdConnOpen <-chan bool, execCmd *exec.Cmd, conmonPipeDataChan chan<- conmonPipeData, ociLog string, newSize *define.TerminalSize) (deferredErr error) { // NOTE: As you may notice, the attach code is quite complex. // Many things happen concurrently and yet are interdependent. // If you ever change this function, make sure to write to the @@ -524,6 +525,14 @@ func attachExecHTTP(c *Container, sessionID string, r *http.Request, w http.Resp return err } + // resize before we start the container process + if newSize != nil { + err = c.ociRuntime.ExecAttachResize(c, sessionID, *newSize) + if err != nil { + logrus.Warn("resize failed", err) + } + } + // 2: then attach conn, err := openUnixSocket(sockPath) if err != nil { diff --git a/libpod/oci_missing.go b/libpod/oci_missing.go index 10526f368..fcf2ffca8 100644 --- a/libpod/oci_missing.go +++ b/libpod/oci_missing.go @@ -119,12 +119,13 @@ func (r *MissingRuntime) AttachResize(ctr *Container, newSize define.TerminalSiz } // ExecContainer is not available as the runtime is missing -func (r *MissingRuntime) ExecContainer(ctr *Container, sessionID string, options *ExecOptions, streams *define.AttachStreams) (int, chan error, error) { +func (r *MissingRuntime) ExecContainer(ctr *Container, sessionID string, options *ExecOptions, streams *define.AttachStreams, newSize *define.TerminalSize) (int, chan error, error) { return -1, nil, r.printError() } // ExecContainerHTTP is not available as the runtime is missing -func (r *MissingRuntime) ExecContainerHTTP(ctr *Container, sessionID string, options *ExecOptions, req *http.Request, w http.ResponseWriter, streams *HTTPAttachStreams, cancel <-chan bool, hijackDone chan<- bool, holdConnOpen <-chan bool) (int, chan error, error) { +func (r *MissingRuntime) ExecContainerHTTP(ctr *Container, sessionID string, options *ExecOptions, req *http.Request, w http.ResponseWriter, + streams *HTTPAttachStreams, cancel <-chan bool, hijackDone chan<- bool, holdConnOpen <-chan bool, newSize *define.TerminalSize) (int, chan error, error) { return -1, nil, r.printError() } diff --git a/libpod/options.go b/libpod/options.go index d3be46ad8..b12153512 100644 --- a/libpod/options.go +++ b/libpod/options.go @@ -7,6 +7,7 @@ import ( "strings" "syscall" + "github.com/containers/buildah/pkg/parse" "github.com/containers/common/pkg/config" "github.com/containers/common/pkg/secrets" "github.com/containers/image/v5/manifest" @@ -19,6 +20,7 @@ import ( "github.com/containers/storage" "github.com/containers/storage/pkg/idtools" "github.com/cri-o/ocicni/pkg/ocicni" + "github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/runtime-tools/generate" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -268,8 +270,11 @@ func WithRegistriesConf(path string) RuntimeOption { return errors.Wrap(err, "error locating specified registries.conf") } if rt.imageContext == nil { - rt.imageContext = &types.SystemContext{} + rt.imageContext = &types.SystemContext{ + BigFilesTemporaryDir: parse.GetTempDir(), + } } + rt.imageContext.SystemRegistriesConfPath = path return nil } @@ -555,7 +560,6 @@ func WithMaxLogSize(limit int64) CtrCreateOption { if ctr.valid { return define.ErrRuntimeFinalized } - ctr.config.LogSize = limit return nil @@ -863,7 +867,6 @@ func WithMountNSFrom(nsCtr *Container) CtrCreateOption { if err := checkDependencyContainer(nsCtr, ctr); err != nil { return err } - ctr.config.MountNsCtr = nsCtr.ID() return nil @@ -2355,3 +2358,42 @@ func WithVolatile() CtrCreateOption { return nil } } + +// WithPodCPUPAQ takes the given cpu period and quota and inserts them in the proper place. +func WithPodCPUPAQ(period uint64, quota int64) PodCreateOption { + return func(pod *Pod) error { + if pod.valid { + return define.ErrPodFinalized + } + if pod.CPUPeriod() != 0 && pod.CPUQuota() != 0 { + pod.config.InfraContainer.ResourceLimits.CPU = &specs.LinuxCPU{ + Period: &period, + Quota: "a, + } + } else { + pod.config.InfraContainer.ResourceLimits = &specs.LinuxResources{} + pod.config.InfraContainer.ResourceLimits.CPU = &specs.LinuxCPU{ + Period: &period, + Quota: "a, + } + } + return nil + } +} + +// WithPodCPUSetCPUS computes and sets the Cpus linux resource string which determines the amount of cores, from those available, we are allowed to execute on +func WithPodCPUSetCPUs(inp string) PodCreateOption { + return func(pod *Pod) error { + if pod.valid { + return define.ErrPodFinalized + } + if pod.ResourceLim().CPU.Period != nil { + pod.config.InfraContainer.ResourceLimits.CPU.Cpus = inp + } else { + pod.config.InfraContainer.ResourceLimits = &specs.LinuxResources{} + pod.config.InfraContainer.ResourceLimits.CPU = &specs.LinuxCPU{} + pod.config.InfraContainer.ResourceLimits.CPU.Cpus = inp + } + return nil + } +} diff --git a/libpod/pod.go b/libpod/pod.go index dce2a0c1c..d7a9b15d9 100644 --- a/libpod/pod.go +++ b/libpod/pod.go @@ -1,12 +1,14 @@ package libpod import ( + "context" "net" "time" "github.com/containers/podman/v3/libpod/define" "github.com/containers/podman/v3/libpod/lock" "github.com/cri-o/ocicni/pkg/ocicni" + "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" ) @@ -91,25 +93,26 @@ type podState struct { // Generally speaking, aside from those two exceptions, these options will set // the equivalent field in the container's configuration. type InfraContainerConfig struct { - ConmonPidFile string `json:"conmonPidFile"` - HasInfraContainer bool `json:"makeInfraContainer"` - NoNetwork bool `json:"noNetwork,omitempty"` - HostNetwork bool `json:"infraHostNetwork,omitempty"` - PortBindings []ocicni.PortMapping `json:"infraPortBindings"` - StaticIP net.IP `json:"staticIP,omitempty"` - StaticMAC net.HardwareAddr `json:"staticMAC,omitempty"` - UseImageResolvConf bool `json:"useImageResolvConf,omitempty"` - DNSServer []string `json:"dnsServer,omitempty"` - DNSSearch []string `json:"dnsSearch,omitempty"` - DNSOption []string `json:"dnsOption,omitempty"` - UseImageHosts bool `json:"useImageHosts,omitempty"` - HostAdd []string `json:"hostsAdd,omitempty"` - Networks []string `json:"networks,omitempty"` - ExitCommand []string `json:"exitCommand,omitempty"` - InfraImage string `json:"infraImage,omitempty"` - InfraCommand []string `json:"infraCommand,omitempty"` - Slirp4netns bool `json:"slirp4netns,omitempty"` - NetworkOptions map[string][]string `json:"network_options,omitempty"` + ConmonPidFile string `json:"conmonPidFile"` + HasInfraContainer bool `json:"makeInfraContainer"` + NoNetwork bool `json:"noNetwork,omitempty"` + HostNetwork bool `json:"infraHostNetwork,omitempty"` + PortBindings []ocicni.PortMapping `json:"infraPortBindings"` + StaticIP net.IP `json:"staticIP,omitempty"` + StaticMAC net.HardwareAddr `json:"staticMAC,omitempty"` + UseImageResolvConf bool `json:"useImageResolvConf,omitempty"` + DNSServer []string `json:"dnsServer,omitempty"` + DNSSearch []string `json:"dnsSearch,omitempty"` + DNSOption []string `json:"dnsOption,omitempty"` + UseImageHosts bool `json:"useImageHosts,omitempty"` + HostAdd []string `json:"hostsAdd,omitempty"` + Networks []string `json:"networks,omitempty"` + ExitCommand []string `json:"exitCommand,omitempty"` + InfraImage string `json:"infraImage,omitempty"` + InfraCommand []string `json:"infraCommand,omitempty"` + Slirp4netns bool `json:"slirp4netns,omitempty"` + NetworkOptions map[string][]string `json:"network_options,omitempty"` + ResourceLimits *specs.LinuxResources `json:"resource_limits,omitempty"` } // ID retrieves the pod's ID @@ -128,6 +131,45 @@ func (p *Pod) Namespace() string { return p.config.Namespace } +// ResourceLim returns the cpuset resource limits for the pod +func (p *Pod) ResourceLim() *specs.LinuxResources { + resCopy := &specs.LinuxResources{} + if err := JSONDeepCopy(p.config.InfraContainer.ResourceLimits, resCopy); err != nil { + return nil + } + if resCopy != nil && resCopy.CPU != nil { + return resCopy + } + empty := &specs.LinuxResources{ + CPU: &specs.LinuxCPU{}, + } + return empty +} + +// CPUPeriod returns the pod CPU period +func (p *Pod) CPUPeriod() uint64 { + resCopy := &specs.LinuxResources{} + if err := JSONDeepCopy(p.config.InfraContainer.ResourceLimits, resCopy); err != nil { + return 0 + } + if resCopy != nil && resCopy.CPU != nil && resCopy.CPU.Period != nil { + return *resCopy.CPU.Period + } + return 0 +} + +// CPUQuota returns the pod CPU quota +func (p *Pod) CPUQuota() int64 { + resCopy := &specs.LinuxResources{} + if err := JSONDeepCopy(p.config.InfraContainer.ResourceLimits, resCopy); err != nil { + return 0 + } + if resCopy != nil && resCopy.CPU != nil && resCopy.CPU.Quota != nil { + return *resCopy.CPU.Quota + } + return 0 +} + // Labels returns the pod's labels func (p *Pod) Labels() map[string]string { labels := make(map[string]string) @@ -208,7 +250,31 @@ func (p *Pod) CgroupPath() (string, error) { if err := p.updatePod(); err != nil { return "", err } + if p.state.CgroupPath != "" { + return p.state.CgroupPath, nil + } + if !p.HasInfraContainer() { + return "", errors.Wrap(define.ErrNoSuchCtr, "pod has no infra container") + } + + id := p.state.InfraContainerID + if id != "" { + ctr, err := p.runtime.state.Container(id) + if err != nil { + return "", errors.Wrapf(err, "could not get infra") + } + if ctr != nil { + ctr.Start(context.Background(), false) + cgroupPath, err := ctr.CGroupPath() + if err != nil { + return "", errors.Wrapf(err, "could not get container cgroup") + } + p.state.CgroupPath = cgroupPath + p.save() + return cgroupPath, nil + } + } return p.state.CgroupPath, nil } diff --git a/libpod/pod_api.go b/libpod/pod_api.go index 14fe8276c..d8f5d15f8 100644 --- a/libpod/pod_api.go +++ b/libpod/pod_api.go @@ -538,6 +538,9 @@ func (p *Pod) Inspect() (*define.InspectPodData, error) { infraConfig.StaticMAC = p.config.InfraContainer.StaticMAC.String() infraConfig.NoManageResolvConf = p.config.InfraContainer.UseImageResolvConf infraConfig.NoManageHosts = p.config.InfraContainer.UseImageHosts + infraConfig.CPUPeriod = p.CPUPeriod() + infraConfig.CPUQuota = p.CPUQuota() + infraConfig.CPUSetCPUs = p.ResourceLim().CPU.Cpus if len(p.config.InfraContainer.DNSServer) > 0 { infraConfig.DNSServer = make([]string, 0, len(p.config.InfraContainer.DNSServer)) @@ -581,6 +584,9 @@ func (p *Pod) Inspect() (*define.InspectPodData, error) { SharedNamespaces: sharesNS, NumContainers: uint(len(containers)), Containers: ctrs, + CPUSetCPUs: p.ResourceLim().CPU.Cpus, + CPUPeriod: p.CPUPeriod(), + CPUQuota: p.CPUQuota(), } return &inspectData, nil diff --git a/libpod/runtime.go b/libpod/runtime.go index d775b55e1..f53789e89 100644 --- a/libpod/runtime.go +++ b/libpod/runtime.go @@ -15,6 +15,7 @@ import ( "syscall" "time" + "github.com/containers/buildah/pkg/parse" "github.com/containers/common/libimage" "github.com/containers/common/pkg/config" "github.com/containers/common/pkg/defaultnet" @@ -381,7 +382,9 @@ func makeRuntime(ctx context.Context, runtime *Runtime) (retErr error) { // Set up containers/image if runtime.imageContext == nil { - runtime.imageContext = &types.SystemContext{} + runtime.imageContext = &types.SystemContext{ + BigFilesTemporaryDir: parse.GetTempDir(), + } } runtime.imageContext.SignaturePolicyPath = runtime.config.Engine.SignaturePolicyPath @@ -465,7 +468,7 @@ func makeRuntime(ctx context.Context, runtime *Runtime) (retErr error) { } // Set up the CNI net plugin - netPlugin, err := ocicni.InitCNI(runtime.config.Network.DefaultNetwork, runtime.config.Network.NetworkConfigDir, runtime.config.Network.CNIPluginDirs...) + netPlugin, err := ocicni.InitCNINoInotify(runtime.config.Network.DefaultNetwork, runtime.config.Network.NetworkConfigDir, "", runtime.config.Network.CNIPluginDirs...) if err != nil { return errors.Wrapf(err, "error configuring CNI network plugin") } diff --git a/libpod/runtime_pod_infra_linux.go b/libpod/runtime_pod_infra_linux.go index c20153c8d..6b002f65a 100644 --- a/libpod/runtime_pod_infra_linux.go +++ b/libpod/runtime_pod_infra_linux.go @@ -146,7 +146,6 @@ func (r *Runtime) makeInfraContainer(ctx context.Context, p *Pod, imgName, rawIm options = append(options, WithExitCommand(p.config.InfraContainer.ExitCommand)) } } - g.SetRootReadonly(true) g.SetProcessArgs(infraCtrCommand) @@ -173,7 +172,6 @@ func (r *Runtime) makeInfraContainer(ctx context.Context, p *Pod, imgName, rawIm // Ignore mqueue sysctls if not sharing IPC if !p.config.UsePodIPC && strings.HasPrefix(sysctlKey, "fs.mqueue.") { logrus.Infof("Sysctl %s=%s ignored in containers.conf, since IPC Namespace for pod is unused", sysctlKey, sysctlVal) - continue } @@ -188,7 +186,6 @@ func (r *Runtime) makeInfraContainer(ctx context.Context, p *Pod, imgName, rawIm logrus.Infof("Sysctl %s=%s ignored in containers.conf, since UTS Namespace for pod is unused", sysctlKey, sysctlVal) continue } - g.AddLinuxSysctl(sysctlKey, sysctlVal) } @@ -200,7 +197,11 @@ func (r *Runtime) makeInfraContainer(ctx context.Context, p *Pod, imgName, rawIm if len(p.config.InfraContainer.ConmonPidFile) > 0 { options = append(options, WithConmonPidFile(p.config.InfraContainer.ConmonPidFile)) } + newRes := new(spec.LinuxResources) + newRes.CPU = new(spec.LinuxCPU) + newRes.CPU = p.ResourceLim().CPU + g.Config.Linux.Resources.CPU = newRes.CPU return r.newContainer(ctx, g.Config, options...) } @@ -211,7 +212,6 @@ func (r *Runtime) createInfraContainer(ctx context.Context, p *Pod) (*Container, if !r.valid { return nil, define.ErrRuntimeStopped } - imageName := p.config.InfraContainer.InfraImage if imageName == "" { imageName = r.config.Engine.InfraImage diff --git a/libpod/stats.go b/libpod/stats.go index f4732b4fc..6f0360ef1 100644 --- a/libpod/stats.go +++ b/libpod/stats.go @@ -56,7 +56,11 @@ func (c *Container) GetContainerStats(previousStats *define.ContainerStats) (*de previousCPU := previousStats.CPUNano now := uint64(time.Now().UnixNano()) + stats.Duration = cgroupStats.CPU.Usage.Total + stats.UpTime = time.Duration(stats.Duration) stats.CPU = calculateCPUPercent(cgroupStats, previousCPU, now, previousStats.SystemNano) + stats.AvgCPU = calculateAvgCPU(stats.CPU, previousStats.AvgCPU, previousStats.DataPoints) + stats.DataPoints = previousStats.DataPoints + 1 stats.MemUsage = cgroupStats.Memory.Usage.Usage stats.MemLimit = getMemLimit(cgroupStats.Memory.Usage.Limit) stats.MemPerc = (float64(stats.MemUsage) / float64(stats.MemLimit)) * 100 @@ -127,3 +131,9 @@ func calculateBlockIO(stats *cgroups.Metrics) (read uint64, write uint64) { } return } + +// calculateAvgCPU calculates the avg CPU percentage given the previous average and the number of data points. +func calculateAvgCPU(statsCPU float64, prevAvg float64, prevData int64) float64 { + avgPer := ((prevAvg * float64(prevData)) + statsCPU) / (float64(prevData) + 1) + return avgPer +} |