diff options
Diffstat (limited to 'libpod')
-rw-r--r-- | libpod/container.go | 10 | ||||
-rw-r--r-- | libpod/container_exec.go | 83 | ||||
-rw-r--r-- | libpod/container_internal.go | 65 | ||||
-rw-r--r-- | libpod/container_internal_linux.go | 89 | ||||
-rw-r--r-- | libpod/container_internal_unsupported.go | 5 | ||||
-rw-r--r-- | libpod/container_log.go | 24 | ||||
-rw-r--r-- | libpod/define/containerstate.go | 10 | ||||
-rw-r--r-- | libpod/events.go | 39 | ||||
-rw-r--r-- | libpod/events/config.go | 2 | ||||
-rw-r--r-- | libpod/events/events.go | 2 | ||||
-rw-r--r-- | libpod/networking_linux.go | 42 | ||||
-rw-r--r-- | libpod/oci.go | 7 | ||||
-rw-r--r-- | libpod/oci_attach_linux.go | 13 | ||||
-rw-r--r-- | libpod/oci_conmon_exec_linux.go | 19 | ||||
-rw-r--r-- | libpod/oci_conmon_linux.go | 6 | ||||
-rw-r--r-- | libpod/oci_missing.go | 5 | ||||
-rw-r--r-- | libpod/options.go | 19 | ||||
-rw-r--r-- | libpod/runtime.go | 7 | ||||
-rw-r--r-- | libpod/stats.go | 10 | ||||
-rw-r--r-- | libpod/volume_internal.go | 19 |
20 files changed, 345 insertions, 131 deletions
diff --git a/libpod/container.go b/libpod/container.go index c6f0cd618..4b9bea5fc 100644 --- a/libpod/container.go +++ b/libpod/container.go @@ -957,6 +957,12 @@ func (c *Container) cGroupPath() (string, error) { // is the libpod-specific one we're looking for. // // See #8397 on the need for the longest-path look up. + // + // And another workaround for containers running systemd as the payload. + // containers running systemd moves themselves into a child subgroup of + // the named systemd cgroup hierarchy. Ignore any named cgroups during + // the lookup. + // See #10602 for more details. procPath := fmt.Sprintf("/proc/%d/cgroup", c.state.PID) lines, err := ioutil.ReadFile(procPath) if err != nil { @@ -972,6 +978,10 @@ func (c *Container) cGroupPath() (string, error) { logrus.Debugf("Error parsing cgroup: expected 3 fields but got %d: %s", len(fields), procPath) continue } + // Ignore named cgroups like name=systemd. + if bytes.Contains(fields[1], []byte("=")) { + continue + } path := string(fields[2]) if len(path) > len(cgroupPath) { cgroupPath = path diff --git a/libpod/container_exec.go b/libpod/container_exec.go index c359f1e5d..5d4bcb422 100644 --- a/libpod/container_exec.go +++ b/libpod/container_exec.go @@ -1,6 +1,7 @@ package libpod import ( + "context" "io/ioutil" "net/http" "os" @@ -276,9 +277,10 @@ func (c *Container) ExecStart(sessionID string) error { } // ExecStartAndAttach starts and attaches to an exec session in a container. +// newSize resizes the tty to this size before the process is started, must be nil if the exec session has no tty // TODO: Should we include detach keys in the signature to allow override? // TODO: How do we handle AttachStdin/AttachStdout/AttachStderr? -func (c *Container) ExecStartAndAttach(sessionID string, streams *define.AttachStreams) error { +func (c *Container) ExecStartAndAttach(sessionID string, streams *define.AttachStreams, newSize *define.TerminalSize) error { if !c.batched { c.lock.Lock() defer c.lock.Unlock() @@ -309,7 +311,7 @@ func (c *Container) ExecStartAndAttach(sessionID string, streams *define.AttachS return err } - pid, attachChan, err := c.ociRuntime.ExecContainer(c, session.ID(), opts, streams) + pid, attachChan, err := c.ociRuntime.ExecContainer(c, session.ID(), opts, streams, newSize) if err != nil { return err } @@ -372,7 +374,9 @@ func (c *Container) ExecStartAndAttach(sessionID string, streams *define.AttachS } // ExecHTTPStartAndAttach starts and performs an HTTP attach to an exec session. -func (c *Container) ExecHTTPStartAndAttach(sessionID string, r *http.Request, w http.ResponseWriter, streams *HTTPAttachStreams, detachKeys *string, cancel <-chan bool, hijackDone chan<- bool) error { +// newSize resizes the tty to this size before the process is started, must be nil if the exec session has no tty +func (c *Container) ExecHTTPStartAndAttach(sessionID string, r *http.Request, w http.ResponseWriter, + streams *HTTPAttachStreams, detachKeys *string, cancel <-chan bool, hijackDone chan<- bool, newSize *define.TerminalSize) error { // TODO: How do we combine streams with the default streams set in the exec session? // Ensure that we don't leak a goroutine here @@ -430,7 +434,7 @@ func (c *Container) ExecHTTPStartAndAttach(sessionID string, r *http.Request, w close(holdConnOpen) }() - pid, attachChan, err := c.ociRuntime.ExecContainerHTTP(c, session.ID(), execOpts, r, w, streams, cancel, hijackDone, holdConnOpen) + pid, attachChan, err := c.ociRuntime.ExecContainerHTTP(c, session.ID(), execOpts, r, w, streams, cancel, hijackDone, holdConnOpen, newSize) if err != nil { session.State = define.ExecStateStopped session.ExitCode = define.TranslateExecErrorToExitCode(define.ExecErrorCodeGeneric, err) @@ -539,18 +543,7 @@ func (c *Container) ExecStop(sessionID string, timeout *uint) error { var cleanupErr error // Retrieve exit code and update status - exitCode, err := c.readExecExitCode(session.ID()) - if err != nil { - cleanupErr = err - } - session.ExitCode = exitCode - session.PID = 0 - session.State = define.ExecStateStopped - - if err := c.save(); err != nil { - if cleanupErr != nil { - logrus.Errorf("Error stopping container %s exec session %s: %v", c.ID(), session.ID(), cleanupErr) - } + if err := retrieveAndWriteExecExitCode(c, session.ID()); err != nil { cleanupErr = err } @@ -592,15 +585,7 @@ func (c *Container) ExecCleanup(sessionID string) error { return errors.Wrapf(define.ErrExecSessionStateInvalid, "cannot clean up container %s exec session %s as it is running", c.ID(), session.ID()) } - exitCode, err := c.readExecExitCode(session.ID()) - if err != nil { - return err - } - session.ExitCode = exitCode - session.PID = 0 - session.State = define.ExecStateStopped - - if err := c.save(); err != nil { + if err := retrieveAndWriteExecExitCode(c, session.ID()); err != nil { return err } } @@ -637,9 +622,9 @@ func (c *Container) ExecRemove(sessionID string, force bool) error { return err } if !running { - session.State = define.ExecStateStopped - // TODO: should we retrieve exit code here? - // TODO: Might be worth saving state here. + if err := retrieveAndWriteExecExitCode(c, session.ID()); err != nil { + return err + } } } @@ -653,6 +638,10 @@ func (c *Container) ExecRemove(sessionID string, force bool) error { return err } + if err := retrieveAndWriteExecExitCode(c, session.ID()); err != nil { + return err + } + if err := c.cleanupExecBundle(session.ID()); err != nil { return err } @@ -733,7 +722,10 @@ func (c *Container) Exec(config *ExecConfig, streams *define.AttachStreams, resi // API there. // TODO: Refactor so this is closed here, before we remove the exec // session. + var size *define.TerminalSize if resize != nil { + s := <-resize + size = &s go func() { logrus.Debugf("Sending resize events to exec session %s", sessionID) for resizeRequest := range resize { @@ -751,16 +743,31 @@ func (c *Container) Exec(config *ExecConfig, streams *define.AttachStreams, resi }() } - if err := c.ExecStartAndAttach(sessionID, streams); err != nil { + if err := c.ExecStartAndAttach(sessionID, streams, size); err != nil { return -1, err } session, err := c.ExecSession(sessionID) if err != nil { + if errors.Cause(err) == define.ErrNoSuchExecSession { + // TODO: If a proper Context is ever plumbed in here, we + // should use it. + // As things stand, though, it's not worth it - this + // should always terminate quickly since it's not + // streaming. + diedEvent, err := c.runtime.GetExecDiedEvent(context.Background(), c.ID(), sessionID) + if err != nil { + return -1, errors.Wrapf(err, "error retrieving exec session %s exit code", sessionID) + } + return diedEvent.ContainerExitCode, nil + } return -1, err } exitCode := session.ExitCode if err := c.ExecRemove(sessionID, false); err != nil { + if errors.Cause(err) == define.ErrNoSuchExecSession { + return exitCode, nil + } return -1, err } @@ -927,6 +934,8 @@ func (c *Container) getActiveExecSessions() ([]string, error) { session.PID = 0 session.State = define.ExecStateStopped + c.newExecDiedEvent(session.ID(), exitCode) + needSave = true } if err := c.cleanupExecBundle(id); err != nil { @@ -1036,6 +1045,22 @@ func writeExecExitCode(c *Container, sessionID string, exitCode int) error { return errors.Wrapf(err, "error syncing container %s state to remove exec session %s", c.ID(), sessionID) } + return justWriteExecExitCode(c, sessionID, exitCode) +} + +func retrieveAndWriteExecExitCode(c *Container, sessionID string) error { + exitCode, err := c.readExecExitCode(sessionID) + if err != nil { + return err + } + + return justWriteExecExitCode(c, sessionID, exitCode) +} + +func justWriteExecExitCode(c *Container, sessionID string, exitCode int) error { + // Write an event first + c.newExecDiedEvent(sessionID, exitCode) + session, ok := c.state.ExecSessions[sessionID] if !ok { // Exec session already removed. diff --git a/libpod/container_internal.go b/libpod/container_internal.go index f77825efd..545b78976 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -42,6 +42,7 @@ const ( // name of the directory holding the artifacts artifactsDir = "artifacts" execDirPermission = 0755 + preCheckpointDir = "pre-checkpoint" ) // rootFsSize gets the size of the container's root filesystem @@ -141,7 +142,7 @@ func (c *Container) CheckpointPath() string { // PreCheckpointPath returns the path to the directory containing the pre-checkpoint-images func (c *Container) PreCheckPointPath() string { - return filepath.Join(c.bundlePath(), "pre-checkpoint") + return filepath.Join(c.bundlePath(), preCheckpointDir) } // AttachSocketPath retrieves the path of the container's attach socket @@ -427,7 +428,7 @@ func (c *Container) setupStorage(ctx context.Context) error { }, LabelOpts: c.config.LabelOpts, } - if c.restoreFromCheckpoint { + if c.restoreFromCheckpoint && !c.config.Privileged { // If restoring from a checkpoint, the root file-system // needs to be mounted with the same SELinux labels as // it was mounted previously. @@ -1061,7 +1062,7 @@ func (c *Container) init(ctx context.Context, retainRetries bool) error { } for _, v := range c.config.NamedVolumes { - if err := c.chownVolume(v.Name); err != nil { + if err := c.fixVolumePermissions(v); err != nil { return err } } @@ -1680,64 +1681,6 @@ func (c *Container) mountNamedVolume(v *ContainerNamedVolume, mountpoint string) return vol, nil } -// Chown the specified volume if necessary. -func (c *Container) chownVolume(volumeName string) error { - vol, err := c.runtime.state.Volume(volumeName) - if err != nil { - return errors.Wrapf(err, "error retrieving named volume %s for container %s", volumeName, c.ID()) - } - - vol.lock.Lock() - defer vol.lock.Unlock() - - // The volume may need a copy-up. Check the state. - if err := vol.update(); err != nil { - return err - } - - // TODO: For now, I've disabled chowning volumes owned by non-Podman - // drivers. This may be safe, but it's really going to be a case-by-case - // thing, I think - safest to leave disabled now and re-enable later if - // there is a demand. - if vol.state.NeedsChown && !vol.UsesVolumeDriver() { - vol.state.NeedsChown = false - - uid := int(c.config.Spec.Process.User.UID) - gid := int(c.config.Spec.Process.User.GID) - - if c.config.IDMappings.UIDMap != nil { - p := idtools.IDPair{ - UID: uid, - GID: gid, - } - mappings := idtools.NewIDMappingsFromMaps(c.config.IDMappings.UIDMap, c.config.IDMappings.GIDMap) - newPair, err := mappings.ToHost(p) - if err != nil { - return errors.Wrapf(err, "error mapping user %d:%d", uid, gid) - } - uid = newPair.UID - gid = newPair.GID - } - - vol.state.UIDChowned = uid - vol.state.GIDChowned = gid - - if err := vol.save(); err != nil { - return err - } - - mountPoint, err := vol.MountPoint() - if err != nil { - return err - } - - if err := os.Lchown(mountPoint, uid, gid); err != nil { - return err - } - } - return nil -} - // cleanupStorage unmounts and cleans up the container's root filesystem func (c *Container) cleanupStorage() error { if !c.state.Mounted { diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index 94bf7855b..25db3cff0 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -909,14 +909,15 @@ func (c *Container) exportCheckpoint(options ContainerCheckpointOptions) error { includeFiles := []string{ "artifacts", "ctr.log", - metadata.CheckpointDirectory, metadata.ConfigDumpFile, metadata.SpecDumpFile, metadata.NetworkStatusFile, } if options.PreCheckPoint { - includeFiles[0] = "pre-checkpoint" + includeFiles = append(includeFiles, preCheckpointDir) + } else { + includeFiles = append(includeFiles, metadata.CheckpointDirectory) } // Get root file-system changes included in the checkpoint archive var addToTarFiles []string @@ -1660,9 +1661,13 @@ func (c *Container) generateResolvConf() (string, error) { // check if systemd-resolved is used, assume it is used when 127.0.0.53 is the only nameserver if len(ns) == 1 && ns[0] == "127.0.0.53" { // read the actual resolv.conf file for systemd-resolved - contents, err = ioutil.ReadFile("/run/systemd/resolve/resolv.conf") + resolvedContents, err := ioutil.ReadFile("/run/systemd/resolve/resolv.conf") if err != nil { - return "", errors.Wrapf(err, "detected that systemd-resolved is in use, but could not locate real resolv.conf") + if !os.IsNotExist(err) { + return "", errors.Wrapf(err, "detected that systemd-resolved is in use, but could not locate real resolv.conf") + } + } else { + contents = resolvedContents } } @@ -1815,7 +1820,7 @@ func (c *Container) getHosts() string { if c.Hostname() != "" { if c.config.NetMode.IsSlirp4netns() { // When using slirp4netns, the interface gets a static IP - slirp4netnsIP, err := GetSlirp4netnsGateway(c.slirp4netnsSubnet) + slirp4netnsIP, err := GetSlirp4netnsIP(c.slirp4netnsSubnet) if err != nil { logrus.Warn("failed to determine slirp4netnsIP: ", err.Error()) } else { @@ -2425,3 +2430,77 @@ func (c *Container) createSecretMountDir() error { return err } + +// Fix ownership and permissions of the specified volume if necessary. +func (c *Container) fixVolumePermissions(v *ContainerNamedVolume) error { + vol, err := c.runtime.state.Volume(v.Name) + if err != nil { + return errors.Wrapf(err, "error retrieving named volume %s for container %s", v.Name, c.ID()) + } + + vol.lock.Lock() + defer vol.lock.Unlock() + + // The volume may need a copy-up. Check the state. + if err := vol.update(); err != nil { + return err + } + + // TODO: For now, I've disabled chowning volumes owned by non-Podman + // drivers. This may be safe, but it's really going to be a case-by-case + // thing, I think - safest to leave disabled now and re-enable later if + // there is a demand. + if vol.state.NeedsChown && !vol.UsesVolumeDriver() { + vol.state.NeedsChown = false + + uid := int(c.config.Spec.Process.User.UID) + gid := int(c.config.Spec.Process.User.GID) + + if c.config.IDMappings.UIDMap != nil { + p := idtools.IDPair{ + UID: uid, + GID: gid, + } + mappings := idtools.NewIDMappingsFromMaps(c.config.IDMappings.UIDMap, c.config.IDMappings.GIDMap) + newPair, err := mappings.ToHost(p) + if err != nil { + return errors.Wrapf(err, "error mapping user %d:%d", uid, gid) + } + uid = newPair.UID + gid = newPair.GID + } + + vol.state.UIDChowned = uid + vol.state.GIDChowned = gid + + if err := vol.save(); err != nil { + return err + } + + mountPoint, err := vol.MountPoint() + if err != nil { + return err + } + + if err := os.Lchown(mountPoint, uid, gid); err != nil { + return err + } + + // Make sure the new volume matches the permissions of the target directory. + // https://github.com/containers/podman/issues/10188 + st, err := os.Lstat(filepath.Join(c.state.Mountpoint, v.Dest)) + if err == nil { + if err := os.Chmod(mountPoint, st.Mode()|0111); err != nil { + return err + } + stat := st.Sys().(*syscall.Stat_t) + atime := time.Unix(int64(stat.Atim.Sec), int64(stat.Atim.Nsec)) + if err := os.Chtimes(mountPoint, atime, st.ModTime()); err != nil { + return err + } + } else if !os.IsNotExist(err) { + return err + } + } + return nil +} diff --git a/libpod/container_internal_unsupported.go b/libpod/container_internal_unsupported.go index f979bcbde..125329ce5 100644 --- a/libpod/container_internal_unsupported.go +++ b/libpod/container_internal_unsupported.go @@ -57,3 +57,8 @@ func (c *Container) reloadNetwork() error { func (c *Container) getUserOverrides() *lookup.Overrides { return nil } + +// Fix ownership and permissions of the specified volume if necessary. +func (c *Container) fixVolumePermissions(v *ContainerNamedVolume) error { + return define.ErrNotImplemented +} diff --git a/libpod/container_log.go b/libpod/container_log.go index a30e4f5cc..43b3f7736 100644 --- a/libpod/container_log.go +++ b/libpod/container_log.go @@ -4,10 +4,12 @@ import ( "context" "fmt" "os" + "time" "github.com/containers/podman/v3/libpod/define" "github.com/containers/podman/v3/libpod/events" "github.com/containers/podman/v3/libpod/logs" + "github.com/hpcloud/tail/watch" "github.com/pkg/errors" "github.com/sirupsen/logrus" ) @@ -74,7 +76,7 @@ func (c *Container) readFromLogFile(ctx context.Context, options *logs.LogOption } nll, err := logs.NewLogLine(line.Text) if err != nil { - logrus.Error(err) + logrus.Errorf("Error getting new log line: %v", err) continue } if nll.Partial() { @@ -93,17 +95,20 @@ func (c *Container) readFromLogFile(ctx context.Context, options *logs.LogOption }() // Check if container is still running or paused if options.Follow { + // If the container isn't running or if we encountered an error + // getting its state, instruct the logger to read the file + // until EOF. state, err := c.State() if err != nil || state != define.ContainerStateRunning { - // If the container isn't running or if we encountered - // an error getting its state, instruct the logger to - // read the file until EOF. + // Make sure to wait at least for the poll duration + // before stopping the file logger (see #10675). + time.Sleep(watch.POLL_DURATION) tailError := t.StopAtEOF() if tailError != nil && fmt.Sprintf("%v", tailError) != "tail: stop at eof" { - logrus.Error(tailError) + logrus.Errorf("Error stopping logger: %v", tailError) } - if errors.Cause(err) != define.ErrNoSuchCtr { - logrus.Error(err) + if err != nil && errors.Cause(err) != define.ErrNoSuchCtr { + logrus.Errorf("Error getting container state: %v", err) } return nil } @@ -124,9 +129,12 @@ func (c *Container) readFromLogFile(ctx context.Context, options *logs.LogOption // Now wait for the died event and signal to finish // reading the log until EOF. <-eventChannel + // Make sure to wait at least for the poll duration + // before stopping the file logger (see #10675). + time.Sleep(watch.POLL_DURATION) tailError := t.StopAtEOF() if tailError != nil && fmt.Sprintf("%v", tailError) != "tail: stop at eof" { - logrus.Error(tailError) + logrus.Errorf("Error stopping logger: %v", tailError) } }() } diff --git a/libpod/define/containerstate.go b/libpod/define/containerstate.go index 5d2bc9099..fc272beaa 100644 --- a/libpod/define/containerstate.go +++ b/libpod/define/containerstate.go @@ -1,6 +1,10 @@ package define -import "github.com/pkg/errors" +import ( + "time" + + "github.com/pkg/errors" +) // ContainerStatus represents the current state of a container type ContainerStatus int @@ -120,12 +124,14 @@ func (s ContainerExecStatus) String() string { // ContainerStats contains the statistics information for a running container type ContainerStats struct { + AvgCPU float64 ContainerID string Name string PerCPU []uint64 CPU float64 CPUNano uint64 CPUSystemNano uint64 + DataPoints int64 SystemNano uint64 MemUsage uint64 MemLimit uint64 @@ -135,4 +141,6 @@ type ContainerStats struct { BlockInput uint64 BlockOutput uint64 PIDs uint64 + UpTime time.Duration + Duration uint64 } diff --git a/libpod/events.go b/libpod/events.go index 839229674..22c51aeec 100644 --- a/libpod/events.go +++ b/libpod/events.go @@ -46,7 +46,22 @@ func (c *Container) newContainerExitedEvent(exitCode int32) { e.Type = events.Container e.ContainerExitCode = int(exitCode) if err := c.runtime.eventer.Write(e); err != nil { - logrus.Errorf("unable to write pod event: %q", err) + logrus.Errorf("unable to write container exited event: %q", err) + } +} + +// newExecDiedEvent creates a new event for an exec session's death +func (c *Container) newExecDiedEvent(sessionID string, exitCode int) { + e := events.NewEvent(events.ExecDied) + e.ID = c.ID() + e.Name = c.Name() + e.Image = c.config.RootfsImageName + e.Type = events.Container + e.ContainerExitCode = exitCode + e.Attributes = make(map[string]string) + e.Attributes["execID"] = sessionID + if err := c.runtime.eventer.Write(e); err != nil { + logrus.Errorf("unable to write exec died event: %q", err) } } @@ -154,3 +169,25 @@ func (r *Runtime) GetLastContainerEvent(ctx context.Context, nameOrID string, co // return the last element in the slice return containerEvents[len(containerEvents)-1], nil } + +// GetExecDiedEvent takes a container name or ID, exec session ID, and returns +// that exec session's Died event (if it has already occurred). +func (r *Runtime) GetExecDiedEvent(ctx context.Context, nameOrID, execSessionID string) (*events.Event, error) { + filters := []string{ + fmt.Sprintf("container=%s", nameOrID), + "event=exec_died", + "type=container", + fmt.Sprintf("label=execID=%s", execSessionID), + } + + containerEvents, err := r.GetEvents(ctx, filters) + if err != nil { + return nil, err + } + // There *should* only be one event maximum. + // But... just in case... let's not blow up if there's more than one. + if len(containerEvents) < 1 { + return nil, errors.Wrapf(events.ErrEventNotFound, "exec died event for session %s (container %s) not found", execSessionID, nameOrID) + } + return containerEvents[len(containerEvents)-1], nil +} diff --git a/libpod/events/config.go b/libpod/events/config.go index 085fa9d52..d88d7b6e3 100644 --- a/libpod/events/config.go +++ b/libpod/events/config.go @@ -127,6 +127,8 @@ const ( Create Status = "create" // Exec ... Exec Status = "exec" + // ExecDied indicates that an exec session in a container died. + ExecDied Status = "exec_died" // Exited indicates that a container's process died Exited Status = "died" // Export ... diff --git a/libpod/events/events.go b/libpod/events/events.go index 01ea6a386..e03215eff 100644 --- a/libpod/events/events.go +++ b/libpod/events/events.go @@ -149,6 +149,8 @@ func StringToStatus(name string) (Status, error) { return Create, nil case Exec.String(): return Exec, nil + case ExecDied.String(): + return ExecDied, nil case Exited.String(): return Exited, nil case Export.String(): diff --git a/libpod/networking_linux.go b/libpod/networking_linux.go index c928e02a6..1b775a4f3 100644 --- a/libpod/networking_linux.go +++ b/libpod/networking_linux.go @@ -46,6 +46,9 @@ const ( // rootlessCNINSName is the file name for the rootless network namespace bind mount rootlessCNINSName = "rootless-cni-ns" + + // persistentCNIDir is the directory where the CNI files are stored + persistentCNIDir = "/var/lib/cni" ) // Get an OCICNI network config @@ -150,14 +153,31 @@ func (r *RootlessCNI) Do(toRun func() error) error { } } - // cni plugins need access to /var and /run - runDir := filepath.Join(r.dir, "run") - varDir := filepath.Join(r.dir, "var") + // cni plugins need access to /var/lib/cni and /run + varDir := "" + varTarget := persistentCNIDir + // we can only mount to a target dir which exists, check /var/lib/cni recursively + // while we could always use /var there are cases where a user might store the cni + // configs under /var/custom and this would break + for { + if _, err := os.Stat(varTarget); err == nil { + varDir = filepath.Join(r.dir, strings.TrimPrefix(varTarget, "/")) + break + } + varTarget = filepath.Base(varTarget) + if varTarget == "/" { + break + } + } + if varDir == "" { + return errors.New("failed to stat /var directory") + } // make sure to mount var first - err = unix.Mount(varDir, "/var", "none", unix.MS_BIND, "") + err = unix.Mount(varDir, varTarget, "none", unix.MS_BIND, "") if err != nil { - return errors.Wrap(err, "failed to mount /var for rootless cni") + return errors.Wrapf(err, "failed to mount %s for rootless cni", varTarget) } + runDir := filepath.Join(r.dir, "run") // recursive mount to keep the netns mount err = unix.Mount(runDir, "/run", "none", unix.MS_BIND|unix.MS_REC, "") if err != nil { @@ -385,7 +405,7 @@ func (r *Runtime) GetRootlessCNINetNs(new bool) (*RootlessCNI, error) { // create cni directories to store files // they will be bind mounted to the correct location in a extra mount ns - err = os.MkdirAll(filepath.Join(cniDir, "var"), 0700) + err = os.MkdirAll(filepath.Join(cniDir, strings.TrimPrefix(persistentCNIDir, "/")), 0700) if err != nil { return nil, errors.Wrap(err, "could not create rootless-cni var directory") } @@ -888,6 +908,10 @@ func (c *Container) getContainerNetworkInfo() (*define.InspectNetworkSettings, e if err != nil { return nil, err } + // see https://github.com/containers/podman/issues/10090 + // the container has to be locked for syncContainer() + netNsCtr.lock.Lock() + defer netNsCtr.lock.Unlock() // Have to sync to ensure that state is populated if err := netNsCtr.syncContainer(); err != nil { return nil, err @@ -1043,7 +1067,7 @@ func resultToBasicNetworkConfig(result *cnitypes.Result) (define.InspectBasicNet // after itself on an unclean reboot. Return what we're pretty sure is the path // to CNI's internal files (it's not really exposed to us). func getCNINetworksDir() (string, error) { - return "/var/lib/cni/networks", nil + return filepath.Join(persistentCNIDir, "networks"), nil } type logrusDebugWriter struct { @@ -1090,7 +1114,7 @@ func (c *Container) NetworkDisconnect(nameOrID, netName string, force bool) erro } c.newNetworkEvent(events.NetworkDisconnect, netName) - if c.state.State != define.ContainerStateRunning { + if !c.ensureState(define.ContainerStateRunning, define.ContainerStateCreated) { return nil } @@ -1145,7 +1169,7 @@ func (c *Container) NetworkConnect(nameOrID, netName string, aliases []string) e return err } c.newNetworkEvent(events.NetworkConnect, netName) - if c.state.State != define.ContainerStateRunning { + if !c.ensureState(define.ContainerStateRunning, define.ContainerStateCreated) { return nil } if c.state.NetNS == nil { diff --git a/libpod/oci.go b/libpod/oci.go index 1f2c7dd71..c92d9a077 100644 --- a/libpod/oci.go +++ b/libpod/oci.go @@ -72,13 +72,16 @@ type OCIRuntime interface { // has completed, as one might expect. The attach session will remain // running, in a goroutine that will return via the chan error in the // return signature. - ExecContainer(ctr *Container, sessionID string, options *ExecOptions, streams *define.AttachStreams) (int, chan error, error) + // newSize resizes the tty to this size before the process is started, must be nil if the exec session has no tty + ExecContainer(ctr *Container, sessionID string, options *ExecOptions, streams *define.AttachStreams, newSize *define.TerminalSize) (int, chan error, error) // ExecContainerHTTP executes a command in a running container and // attaches its standard streams to a provided hijacked HTTP session. // Maintains the same invariants as ExecContainer (returns on session // start, with a goroutine running in the background to handle attach). // The HTTP attach itself maintains the same invariants as HTTPAttach. - ExecContainerHTTP(ctr *Container, sessionID string, options *ExecOptions, r *http.Request, w http.ResponseWriter, streams *HTTPAttachStreams, cancel <-chan bool, hijackDone chan<- bool, holdConnOpen <-chan bool) (int, chan error, error) + // newSize resizes the tty to this size before the process is started, must be nil if the exec session has no tty + ExecContainerHTTP(ctr *Container, sessionID string, options *ExecOptions, r *http.Request, w http.ResponseWriter, + streams *HTTPAttachStreams, cancel <-chan bool, hijackDone chan<- bool, holdConnOpen <-chan bool, newSize *define.TerminalSize) (int, chan error, error) // ExecContainerDetached executes a command in a running container, but // does not attach to it. Returns the PID of the exec session and an // error (if starting the exec session failed) diff --git a/libpod/oci_attach_linux.go b/libpod/oci_attach_linux.go index b5040de3e..de435b58a 100644 --- a/libpod/oci_attach_linux.go +++ b/libpod/oci_attach_linux.go @@ -94,17 +94,18 @@ func (c *Container) attach(streams *define.AttachStreams, keys string, resize <- // this ensures attachToExec gets all of the output of the called process // conmon will then send the exit code of the exec process, or an error in the exec session // startFd must be the input side of the fd. +// newSize resizes the tty to this size before the process is started, must be nil if the exec session has no tty // conmon will wait to start the exec session until the parent process has setup the console socket. // Once attachToExec successfully attaches to the console socket, the child conmon process responsible for calling runtime exec // will read from the output side of start fd, thus learning to start the child process. // Thus, the order goes as follow: // 1. conmon parent process sets up its console socket. sends on attachFd -// 2. attachToExec attaches to the console socket after reading on attachFd +// 2. attachToExec attaches to the console socket after reading on attachFd and resizes the tty // 3. child waits on startFd for attachToExec to attach to said console socket // 4. attachToExec sends on startFd, signalling it has attached to the socket and child is ready to go // 5. child receives on startFd, runs the runtime exec command // attachToExec is responsible for closing startFd and attachFd -func (c *Container) attachToExec(streams *define.AttachStreams, keys *string, sessionID string, startFd, attachFd *os.File) error { +func (c *Container) attachToExec(streams *define.AttachStreams, keys *string, sessionID string, startFd, attachFd *os.File, newSize *define.TerminalSize) error { if !streams.AttachOutput && !streams.AttachError && !streams.AttachInput { return errors.Wrapf(define.ErrInvalidArg, "must provide at least one stream to attach to") } @@ -137,6 +138,14 @@ func (c *Container) attachToExec(streams *define.AttachStreams, keys *string, se return err } + // resize before we start the container process + if newSize != nil { + err = c.ociRuntime.ExecAttachResize(c, sessionID, *newSize) + if err != nil { + logrus.Warn("resize failed", err) + } + } + // 2: then attach conn, err := openUnixSocket(sockPath) if err != nil { diff --git a/libpod/oci_conmon_exec_linux.go b/libpod/oci_conmon_exec_linux.go index 76338b86c..09d3d1833 100644 --- a/libpod/oci_conmon_exec_linux.go +++ b/libpod/oci_conmon_exec_linux.go @@ -25,7 +25,7 @@ import ( ) // ExecContainer executes a command in a running container -func (r *ConmonOCIRuntime) ExecContainer(c *Container, sessionID string, options *ExecOptions, streams *define.AttachStreams) (int, chan error, error) { +func (r *ConmonOCIRuntime) ExecContainer(c *Container, sessionID string, options *ExecOptions, streams *define.AttachStreams, newSize *define.TerminalSize) (int, chan error, error) { if options == nil { return -1, nil, errors.Wrapf(define.ErrInvalidArg, "must provide an ExecOptions struct to ExecContainer") } @@ -68,7 +68,7 @@ func (r *ConmonOCIRuntime) ExecContainer(c *Container, sessionID string, options attachChan := make(chan error) go func() { // attachToExec is responsible for closing pipes - attachChan <- c.attachToExec(streams, options.DetachKeys, sessionID, pipes.startPipe, pipes.attachPipe) + attachChan <- c.attachToExec(streams, options.DetachKeys, sessionID, pipes.startPipe, pipes.attachPipe, newSize) close(attachChan) }() @@ -83,7 +83,8 @@ func (r *ConmonOCIRuntime) ExecContainer(c *Container, sessionID string, options // ExecContainerHTTP executes a new command in an existing container and // forwards its standard streams over an attach -func (r *ConmonOCIRuntime) ExecContainerHTTP(ctr *Container, sessionID string, options *ExecOptions, req *http.Request, w http.ResponseWriter, streams *HTTPAttachStreams, cancel <-chan bool, hijackDone chan<- bool, holdConnOpen <-chan bool) (int, chan error, error) { +func (r *ConmonOCIRuntime) ExecContainerHTTP(ctr *Container, sessionID string, options *ExecOptions, req *http.Request, w http.ResponseWriter, + streams *HTTPAttachStreams, cancel <-chan bool, hijackDone chan<- bool, holdConnOpen <-chan bool, newSize *define.TerminalSize) (int, chan error, error) { if streams != nil { if !streams.Stdin && !streams.Stdout && !streams.Stderr { return -1, nil, errors.Wrapf(define.ErrInvalidArg, "must provide at least one stream to attach to") @@ -133,7 +134,7 @@ func (r *ConmonOCIRuntime) ExecContainerHTTP(ctr *Container, sessionID string, o conmonPipeDataChan := make(chan conmonPipeData) go func() { // attachToExec is responsible for closing pipes - attachChan <- attachExecHTTP(ctr, sessionID, req, w, streams, pipes, detachKeys, options.Terminal, cancel, hijackDone, holdConnOpen, execCmd, conmonPipeDataChan, ociLog) + attachChan <- attachExecHTTP(ctr, sessionID, req, w, streams, pipes, detachKeys, options.Terminal, cancel, hijackDone, holdConnOpen, execCmd, conmonPipeDataChan, ociLog, newSize) close(attachChan) }() @@ -486,7 +487,7 @@ func (r *ConmonOCIRuntime) startExec(c *Container, sessionID string, options *Ex } // Attach to a container over HTTP -func attachExecHTTP(c *Container, sessionID string, r *http.Request, w http.ResponseWriter, streams *HTTPAttachStreams, pipes *execPipes, detachKeys []byte, isTerminal bool, cancel <-chan bool, hijackDone chan<- bool, holdConnOpen <-chan bool, execCmd *exec.Cmd, conmonPipeDataChan chan<- conmonPipeData, ociLog string) (deferredErr error) { +func attachExecHTTP(c *Container, sessionID string, r *http.Request, w http.ResponseWriter, streams *HTTPAttachStreams, pipes *execPipes, detachKeys []byte, isTerminal bool, cancel <-chan bool, hijackDone chan<- bool, holdConnOpen <-chan bool, execCmd *exec.Cmd, conmonPipeDataChan chan<- conmonPipeData, ociLog string, newSize *define.TerminalSize) (deferredErr error) { // NOTE: As you may notice, the attach code is quite complex. // Many things happen concurrently and yet are interdependent. // If you ever change this function, make sure to write to the @@ -524,6 +525,14 @@ func attachExecHTTP(c *Container, sessionID string, r *http.Request, w http.Resp return err } + // resize before we start the container process + if newSize != nil { + err = c.ociRuntime.ExecAttachResize(c, sessionID, *newSize) + if err != nil { + logrus.Warn("resize failed", err) + } + } + // 2: then attach conn, err := openUnixSocket(sockPath) if err != nil { diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go index 3da49b85f..2914bd1a1 100644 --- a/libpod/oci_conmon_linux.go +++ b/libpod/oci_conmon_linux.go @@ -787,7 +787,11 @@ func (r *ConmonOCIRuntime) CheckpointContainer(ctr *Container, options Container args = append(args, "--pre-dump") } if !options.PreCheckPoint && options.WithPrevious { - args = append(args, "--parent-path", ctr.PreCheckPointPath()) + args = append( + args, + "--parent-path", + filepath.Join("..", preCheckpointDir), + ) } runtimeDir, err := util.GetRuntimeDir() if err != nil { diff --git a/libpod/oci_missing.go b/libpod/oci_missing.go index 10526f368..fcf2ffca8 100644 --- a/libpod/oci_missing.go +++ b/libpod/oci_missing.go @@ -119,12 +119,13 @@ func (r *MissingRuntime) AttachResize(ctr *Container, newSize define.TerminalSiz } // ExecContainer is not available as the runtime is missing -func (r *MissingRuntime) ExecContainer(ctr *Container, sessionID string, options *ExecOptions, streams *define.AttachStreams) (int, chan error, error) { +func (r *MissingRuntime) ExecContainer(ctr *Container, sessionID string, options *ExecOptions, streams *define.AttachStreams, newSize *define.TerminalSize) (int, chan error, error) { return -1, nil, r.printError() } // ExecContainerHTTP is not available as the runtime is missing -func (r *MissingRuntime) ExecContainerHTTP(ctr *Container, sessionID string, options *ExecOptions, req *http.Request, w http.ResponseWriter, streams *HTTPAttachStreams, cancel <-chan bool, hijackDone chan<- bool, holdConnOpen <-chan bool) (int, chan error, error) { +func (r *MissingRuntime) ExecContainerHTTP(ctr *Container, sessionID string, options *ExecOptions, req *http.Request, w http.ResponseWriter, + streams *HTTPAttachStreams, cancel <-chan bool, hijackDone chan<- bool, holdConnOpen <-chan bool, newSize *define.TerminalSize) (int, chan error, error) { return -1, nil, r.printError() } diff --git a/libpod/options.go b/libpod/options.go index f942d264b..f2468e41b 100644 --- a/libpod/options.go +++ b/libpod/options.go @@ -7,6 +7,7 @@ import ( "strings" "syscall" + "github.com/containers/buildah/pkg/parse" "github.com/containers/common/pkg/config" "github.com/containers/common/pkg/secrets" "github.com/containers/image/v5/manifest" @@ -268,8 +269,11 @@ func WithRegistriesConf(path string) RuntimeOption { return errors.Wrap(err, "error locating specified registries.conf") } if rt.imageContext == nil { - rt.imageContext = &types.SystemContext{} + rt.imageContext = &types.SystemContext{ + BigFilesTemporaryDir: parse.GetTempDir(), + } } + rt.imageContext.SystemRegistriesConfPath = path return nil } @@ -1641,6 +1645,19 @@ func WithVolumeGID(gid int) VolumeCreateOption { } } +// WithVolumeNoChown prevents the volume from being chowned to the process uid at first use. +func WithVolumeNoChown() VolumeCreateOption { + return func(volume *Volume) error { + if volume.valid { + return define.ErrVolumeFinalized + } + + volume.state.NeedsChown = false + + return nil + } +} + // withSetAnon sets a bool notifying libpod that this volume is anonymous and // should be removed when containers using it are removed and volumes are // specified for removal. diff --git a/libpod/runtime.go b/libpod/runtime.go index d775b55e1..f53789e89 100644 --- a/libpod/runtime.go +++ b/libpod/runtime.go @@ -15,6 +15,7 @@ import ( "syscall" "time" + "github.com/containers/buildah/pkg/parse" "github.com/containers/common/libimage" "github.com/containers/common/pkg/config" "github.com/containers/common/pkg/defaultnet" @@ -381,7 +382,9 @@ func makeRuntime(ctx context.Context, runtime *Runtime) (retErr error) { // Set up containers/image if runtime.imageContext == nil { - runtime.imageContext = &types.SystemContext{} + runtime.imageContext = &types.SystemContext{ + BigFilesTemporaryDir: parse.GetTempDir(), + } } runtime.imageContext.SignaturePolicyPath = runtime.config.Engine.SignaturePolicyPath @@ -465,7 +468,7 @@ func makeRuntime(ctx context.Context, runtime *Runtime) (retErr error) { } // Set up the CNI net plugin - netPlugin, err := ocicni.InitCNI(runtime.config.Network.DefaultNetwork, runtime.config.Network.NetworkConfigDir, runtime.config.Network.CNIPluginDirs...) + netPlugin, err := ocicni.InitCNINoInotify(runtime.config.Network.DefaultNetwork, runtime.config.Network.NetworkConfigDir, "", runtime.config.Network.CNIPluginDirs...) if err != nil { return errors.Wrapf(err, "error configuring CNI network plugin") } diff --git a/libpod/stats.go b/libpod/stats.go index f4732b4fc..6f0360ef1 100644 --- a/libpod/stats.go +++ b/libpod/stats.go @@ -56,7 +56,11 @@ func (c *Container) GetContainerStats(previousStats *define.ContainerStats) (*de previousCPU := previousStats.CPUNano now := uint64(time.Now().UnixNano()) + stats.Duration = cgroupStats.CPU.Usage.Total + stats.UpTime = time.Duration(stats.Duration) stats.CPU = calculateCPUPercent(cgroupStats, previousCPU, now, previousStats.SystemNano) + stats.AvgCPU = calculateAvgCPU(stats.CPU, previousStats.AvgCPU, previousStats.DataPoints) + stats.DataPoints = previousStats.DataPoints + 1 stats.MemUsage = cgroupStats.Memory.Usage.Usage stats.MemLimit = getMemLimit(cgroupStats.Memory.Usage.Limit) stats.MemPerc = (float64(stats.MemUsage) / float64(stats.MemLimit)) * 100 @@ -127,3 +131,9 @@ func calculateBlockIO(stats *cgroups.Metrics) (read uint64, write uint64) { } return } + +// calculateAvgCPU calculates the avg CPU percentage given the previous average and the number of data points. +func calculateAvgCPU(statsCPU float64, prevAvg float64, prevData int64) float64 { + avgPer := ((prevAvg * float64(prevData)) + statsCPU) / (float64(prevData) + 1) + return avgPer +} diff --git a/libpod/volume_internal.go b/libpod/volume_internal.go index 694cdd149..19008a253 100644 --- a/libpod/volume_internal.go +++ b/libpod/volume_internal.go @@ -39,8 +39,23 @@ func (v *Volume) needsMount() bool { return true } - // Local driver with options needs mount - return len(v.config.Options) > 0 + // Commit 28138dafcc added the UID and GID options to this map + // However we should only mount when options other than uid and gid are set. + // see https://github.com/containers/podman/issues/10620 + index := 0 + if _, ok := v.config.Options["UID"]; ok { + index++ + } + if _, ok := v.config.Options["GID"]; ok { + index++ + } + // when uid or gid is set there is also the "o" option + // set so we have to ignore this one as well + if index > 0 { + index++ + } + // Local driver with options other than uid,gid needs mount + return len(v.config.Options) > index } // update() updates the volume state from the DB. |