From 1b88927c2cf1db7b6530748d67528b5209d93b42 Mon Sep 17 00:00:00 2001 From: Doug Rabson Date: Fri, 12 Aug 2022 10:51:48 +0100 Subject: libpod: Add stubs for non-linux builds Note: this makes info.go linux-only since it mixes linux-specific and generic code. This should be addressed in a separate refactoring PR. [NO NEW TESTS NEEDED] Signed-off-by: Doug Rabson --- libpod/boltdb_state_unsupported.go | 19 +++++++ libpod/container_copy_unsupported.go | 17 ++++++ libpod/container_internal_unsupported.go | 95 ++++++++++++++++++++++++++++++++ libpod/container_stat_unsupported.go | 14 +++++ libpod/container_top_unsupported.go | 14 +++++ libpod/container_unsupported.go | 7 +++ libpod/healthcheck_unsupported.go | 25 +++++++++ libpod/info.go | 3 + libpod/info_unsupported.go | 14 +++++ libpod/networking_unsupported.go | 79 ++++++++++++++++++++++++++ libpod/oci_conmon_unsupported.go | 24 ++++++++ libpod/pod_top_unsupported.go | 20 +++++++ libpod/runtime_migrate_unsupported.go | 16 ++++++ libpod/runtime_pod_unsupported.go | 30 ++++++++++ libpod/runtime_volume_unsupported.go | 42 ++++++++++++++ libpod/stats_unsupported.go | 17 ++++++ libpod/util_unsupported.go | 27 +++++++++ libpod/volume_internal_unsupported.go | 32 +++++++++++ 18 files changed, 495 insertions(+) create mode 100644 libpod/boltdb_state_unsupported.go create mode 100644 libpod/container_copy_unsupported.go create mode 100644 libpod/container_internal_unsupported.go create mode 100644 libpod/container_stat_unsupported.go create mode 100644 libpod/container_top_unsupported.go create mode 100644 libpod/container_unsupported.go create mode 100644 libpod/healthcheck_unsupported.go create mode 100644 libpod/info_unsupported.go create mode 100644 libpod/networking_unsupported.go create mode 100644 libpod/oci_conmon_unsupported.go create mode 100644 libpod/pod_top_unsupported.go create mode 100644 libpod/runtime_migrate_unsupported.go create mode 100644 libpod/runtime_pod_unsupported.go create mode 100644 libpod/runtime_volume_unsupported.go create mode 100644 libpod/stats_unsupported.go create mode 100644 libpod/util_unsupported.go create mode 100644 libpod/volume_internal_unsupported.go (limited to 'libpod') diff --git a/libpod/boltdb_state_unsupported.go b/libpod/boltdb_state_unsupported.go new file mode 100644 index 000000000..97d59614e --- /dev/null +++ b/libpod/boltdb_state_unsupported.go @@ -0,0 +1,19 @@ +//go:build !linux +// +build !linux + +package libpod + +import ( + "errors" +) + +// replaceNetNS handle network namespace transitions after updating a +// container's state. +func replaceNetNS(netNSPath string, ctr *Container, newState *ContainerState) error { + return errors.New("replaceNetNS not supported on this platform") +} + +// getNetNSPath retrieves the netns path to be stored in the database +func getNetNSPath(ctr *Container) string { + return "" +} diff --git a/libpod/container_copy_unsupported.go b/libpod/container_copy_unsupported.go new file mode 100644 index 000000000..62937279a --- /dev/null +++ b/libpod/container_copy_unsupported.go @@ -0,0 +1,17 @@ +//go:build !linux +// +build !linux + +package libpod + +import ( + "errors" + "io" +) + +func (c *Container) copyFromArchive(path string, chown, noOverwriteDirNonDir bool, rename map[string]string, reader io.Reader) (func() error, error) { + return nil, errors.New("not implemented (*Container) copyFromArchive") +} + +func (c *Container) copyToArchive(path string, writer io.Writer) (func() error, error) { + return nil, errors.New("not implemented (*Container) copyToArchive") +} diff --git a/libpod/container_internal_unsupported.go b/libpod/container_internal_unsupported.go new file mode 100644 index 000000000..379be9298 --- /dev/null +++ b/libpod/container_internal_unsupported.go @@ -0,0 +1,95 @@ +//go:build !linux +// +build !linux + +package libpod + +import ( + "context" + "errors" + + "github.com/containers/common/libnetwork/etchosts" + "github.com/containers/podman/v4/libpod/define" + "github.com/containers/podman/v4/pkg/lookup" + spec "github.com/opencontainers/runtime-spec/specs-go" +) + +func (c *Container) mountSHM(shmOptions string) error { + return errors.New("not implemented (*Container) mountSHM") +} + +func (c *Container) unmountSHM(mount string) error { + return errors.New("not implemented (*Container) unmountSHM") +} + +func (c *Container) cleanupOverlayMounts() error { + return errors.New("not implemented (*Container) cleanupOverlayMounts") +} + +// prepare mounts the container and sets up other required resources like net +// namespaces +func (c *Container) prepare() error { + return errors.New("not implemented (*Container) prepare") +} + +// resolveWorkDir resolves the container's workdir and, depending on the +// configuration, will create it, or error out if it does not exist. +// Note that the container must be mounted before. +func (c *Container) resolveWorkDir() error { + return errors.New("not implemented (*Container) resolveWorkDir") +} + +// cleanupNetwork unmounts and cleans up the container's network +func (c *Container) cleanupNetwork() error { + return errors.New("not implemented (*Container) cleanupNetwork") +} + +// reloadNetwork reloads the network for the given container, recreating +// firewall rules. +func (c *Container) reloadNetwork() error { + return errors.New("not implemented (*Container) reloadNetwork") +} + +// Generate spec for a container +// Accepts a map of the container's dependencies +func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) { + return nil, errors.New("not implemented (*Container) generateSpec") +} + +func (c *Container) getUserOverrides() *lookup.Overrides { + return &lookup.Overrides{} +} + +func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointOptions) (*define.CRIUCheckpointRestoreStatistics, int64, error) { + return nil, 0, errors.New("not implemented (*Container) checkpoint") +} + +func (c *Container) restore(ctx context.Context, options ContainerCheckpointOptions) (criuStatistics *define.CRIUCheckpointRestoreStatistics, runtimeRestoreDuration int64, retErr error) { + return nil, 0, errors.New("not implemented (*Container) restore") +} + +// getHostsEntries returns the container ip host entries for the correct netmode +func (c *Container) getHostsEntries() (etchosts.HostEntries, error) { + return nil, errors.New("unspported (*Container) getHostsEntries") +} + +// Fix ownership and permissions of the specified volume if necessary. +func (c *Container) fixVolumePermissions(v *ContainerNamedVolume) error { + return errors.New("unspported (*Container) fixVolumePermissions") +} + +func (c *Container) expectPodCgroup() (bool, error) { + return false, errors.New("unspported (*Container) expectPodCgroup") +} + +// Get cgroup path in a format suitable for the OCI spec +func (c *Container) getOCICgroupPath() (string, error) { + return "", errors.New("unspported (*Container) getOCICgroupPath") +} + +func getLocalhostHostEntry(c *Container) etchosts.HostEntries { + return nil +} + +func isRootlessCgroupSet(cgroup string) bool { + return false +} diff --git a/libpod/container_stat_unsupported.go b/libpod/container_stat_unsupported.go new file mode 100644 index 000000000..2f1acd44d --- /dev/null +++ b/libpod/container_stat_unsupported.go @@ -0,0 +1,14 @@ +//go:build !linux +// +build !linux + +package libpod + +import ( + "errors" + + "github.com/containers/podman/v4/libpod/define" +) + +func (c *Container) stat(containerMountPoint string, containerPath string) (*define.FileInfo, string, string, error) { + return nil, "", "", errors.New("Containers stat not supported on this platform") +} diff --git a/libpod/container_top_unsupported.go b/libpod/container_top_unsupported.go new file mode 100644 index 000000000..a8d9b970b --- /dev/null +++ b/libpod/container_top_unsupported.go @@ -0,0 +1,14 @@ +//go:build !linux +// +build !linux + +package libpod + +import ( + "errors" +) + +// Top gathers statistics about the running processes in a container. It returns a +// []string for output +func (c *Container) Top(descriptors []string) ([]string, error) { + return nil, errors.New("not implemented (*Container) Top") +} diff --git a/libpod/container_unsupported.go b/libpod/container_unsupported.go new file mode 100644 index 000000000..5a46c163c --- /dev/null +++ b/libpod/container_unsupported.go @@ -0,0 +1,7 @@ +//go:build !linux +// +build !linux + +package libpod + +type containerPlatformState struct { +} diff --git a/libpod/healthcheck_unsupported.go b/libpod/healthcheck_unsupported.go new file mode 100644 index 000000000..92cd5d0a3 --- /dev/null +++ b/libpod/healthcheck_unsupported.go @@ -0,0 +1,25 @@ +//go:build !linux +// +build !linux + +package libpod + +import ( + "context" + "errors" +) + +// createTimer systemd timers for healthchecks of a container +func (c *Container) createTimer() error { + return errors.New("not implemented (*Container) createTimer") +} + +// startTimer starts a systemd timer for the healthchecks +func (c *Container) startTimer() error { + return errors.New("not implemented (*Container) startTimer") +} + +// removeTransientFiles removes the systemd timer and unit files +// for the container +func (c *Container) removeTransientFiles(ctx context.Context) error { + return errors.New("not implemented (*Container) removeTransientFiles") +} diff --git a/libpod/info.go b/libpod/info.go index c4193b40d..8db6df8cc 100644 --- a/libpod/info.go +++ b/libpod/info.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux + package libpod import ( diff --git a/libpod/info_unsupported.go b/libpod/info_unsupported.go new file mode 100644 index 000000000..53ee4b32f --- /dev/null +++ b/libpod/info_unsupported.go @@ -0,0 +1,14 @@ +//go:build !linux +// +build !linux + +package libpod + +import ( + "errors" + + "github.com/containers/podman/v4/libpod/define" +) + +func (r *Runtime) info() (*define.Info, error) { + return nil, errors.New("not implemented (*Runtime) info") +} diff --git a/libpod/networking_unsupported.go b/libpod/networking_unsupported.go new file mode 100644 index 000000000..227b512cd --- /dev/null +++ b/libpod/networking_unsupported.go @@ -0,0 +1,79 @@ +//go:build !linux +// +build !linux + +package libpod + +import ( + "errors" + "path/filepath" + + "github.com/containers/common/libnetwork/types" + "github.com/containers/podman/v4/libpod/define" + "github.com/containers/storage/pkg/lockfile" +) + +type RootlessNetNS struct { + dir string + Lock lockfile.Locker +} + +// ocicniPortsToNetTypesPorts convert the old port format to the new one +// while deduplicating ports into ranges +func ocicniPortsToNetTypesPorts(ports []types.OCICNIPortMapping) []types.PortMapping { + return []types.PortMapping{} +} + +func (c *Container) getContainerNetworkInfo() (*define.InspectNetworkSettings, error) { + return nil, errors.New("not implemented (*Container) getContainerNetworkInfo") +} + +func (c *Container) setupRootlessNetwork() error { + return errors.New("not implemented (*Container) setupRootlessNetwork") +} + +func (r *Runtime) setupNetNS(ctr *Container) error { + return errors.New("not implemented (*Runtime) setupNetNS") +} + +// normalizeNetworkName takes a network name, a partial or a full network ID and returns the network name. +// If the network is not found a errors is returned. +func (r *Runtime) normalizeNetworkName(nameOrID string) (string, error) { + return "", errors.New("not implemented (*Runtime) normalizeNetworkName") +} + +// DisconnectContainerFromNetwork removes a container from its CNI network +func (r *Runtime) DisconnectContainerFromNetwork(nameOrID, netName string, force bool) error { + return errors.New("not implemented (*Runtime) DisconnectContainerFromNetwork") +} + +// ConnectContainerToNetwork connects a container to a CNI network +func (r *Runtime) ConnectContainerToNetwork(nameOrID, netName string, netOpts types.PerNetworkOptions) error { + return errors.New("not implemented (*Runtime) ConnectContainerToNetwork") +} + +// getPath will join the given path to the rootless netns dir +func (r *RootlessNetNS) getPath(path string) string { + return filepath.Join(r.dir, path) +} + +// Do - run the given function in the rootless netns. +// It does not lock the rootlessCNI lock, the caller +// should only lock when needed, e.g. for cni operations. +func (r *RootlessNetNS) Do(toRun func() error) error { + return errors.New("not implemented (*RootlessNetNS) Do") +} + +// Cleanup the rootless network namespace if needed. +// It checks if we have running containers with the bridge network mode. +// Cleanup() expects that r.Lock is locked +func (r *RootlessNetNS) Cleanup(runtime *Runtime) error { + return errors.New("not implemented (*RootlessNetNS) Cleanup") +} + +// GetRootlessNetNs returns the rootless netns object. If create is set to true +// the rootless network namespace will be created if it does not exists already. +// If called as root it returns always nil. +// On success the returned RootlessCNI lock is locked and must be unlocked by the caller. +func (r *Runtime) GetRootlessNetNs(new bool) (*RootlessNetNS, error) { + return nil, errors.New("not implemented (*Runtime) GetRootlessNetNs") +} diff --git a/libpod/oci_conmon_unsupported.go b/libpod/oci_conmon_unsupported.go new file mode 100644 index 000000000..c72dc0f0d --- /dev/null +++ b/libpod/oci_conmon_unsupported.go @@ -0,0 +1,24 @@ +//go:build !linux +// +build !linux + +package libpod + +import ( + "errors" + + "github.com/containers/common/pkg/config" + "github.com/containers/common/pkg/resize" +) + +// Make a new Conmon-based OCI runtime with the given options. +// Conmon will wrap the given OCI runtime, which can be `runc`, `crun`, or +// any runtime with a runc-compatible CLI. +// The first path that points to a valid executable will be used. +// Deliberately private. Someone should not be able to construct this outside of +// libpod. +func newConmonOCIRuntime(name string, paths []string, conmonPath string, runtimeFlags []string, runtimeCfg *config.Config) (OCIRuntime, error) { + return nil, errors.New("newConmonOCIRuntime not supported on this platform") +} + +func registerResizeFunc(r <-chan resize.TerminalSize, bundlePath string) { +} diff --git a/libpod/pod_top_unsupported.go b/libpod/pod_top_unsupported.go new file mode 100644 index 000000000..92323043a --- /dev/null +++ b/libpod/pod_top_unsupported.go @@ -0,0 +1,20 @@ +//go:build !linux +// +build !linux + +package libpod + +import ( + "errors" +) + +// GetPodPidInformation returns process-related data of all processes in +// the pod. The output data can be controlled via the `descriptors` +// argument which expects format descriptors and supports all AIXformat +// descriptors of ps (1) plus some additional ones to for instance inspect the +// set of effective capabilities. Each element in the returned string slice +// is a tab-separated string. +// +// For more details, please refer to github.com/containers/psgo. +func (p *Pod) GetPodPidInformation(descriptors []string) ([]string, error) { + return nil, errors.New("not implemented (*Pod) GetPodPidInformation") +} diff --git a/libpod/runtime_migrate_unsupported.go b/libpod/runtime_migrate_unsupported.go new file mode 100644 index 000000000..77c2737a9 --- /dev/null +++ b/libpod/runtime_migrate_unsupported.go @@ -0,0 +1,16 @@ +//go:build !linux +// +build !linux + +package libpod + +import ( + "errors" +) + +func (r *Runtime) stopPauseProcess() error { + return errors.New("not implemented (*Runtime) stopPauseProcess") +} + +func (r *Runtime) migrate() error { + return errors.New("not implemented (*Runtime) migrate") +} diff --git a/libpod/runtime_pod_unsupported.go b/libpod/runtime_pod_unsupported.go new file mode 100644 index 000000000..0c7ff8655 --- /dev/null +++ b/libpod/runtime_pod_unsupported.go @@ -0,0 +1,30 @@ +//go:build !linux +// +build !linux + +package libpod + +import ( + "context" + "errors" + + "github.com/containers/podman/v4/pkg/specgen" +) + +// NewPod makes a new, empty pod +func (r *Runtime) NewPod(ctx context.Context, p specgen.PodSpecGenerator, options ...PodCreateOption) (_ *Pod, deferredErr error) { + return nil, errors.New("not implemented (*Runtime) NewPod") +} + +// AddInfra adds the created infra container to the pod state +func (r *Runtime) AddInfra(ctx context.Context, pod *Pod, infraCtr *Container) (*Pod, error) { + return nil, errors.New("not implemented (*Runtime) AddInfra") +} + +// SavePod is a helper function to save the pod state from outside of libpod +func (r *Runtime) SavePod(pod *Pod) error { + return errors.New("not implemented (*Runtime) SavePod") +} + +func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool, timeout *uint) error { + return errors.New("not implemented (*Runtime) removePod") +} diff --git a/libpod/runtime_volume_unsupported.go b/libpod/runtime_volume_unsupported.go new file mode 100644 index 000000000..c2816b817 --- /dev/null +++ b/libpod/runtime_volume_unsupported.go @@ -0,0 +1,42 @@ +//go:build !linux +// +build !linux + +package libpod + +import ( + "context" + "errors" + + "github.com/containers/podman/v4/libpod/define" +) + +// NewVolume creates a new empty volume +func (r *Runtime) NewVolume(ctx context.Context, options ...VolumeCreateOption) (*Volume, error) { + if !r.valid { + return nil, define.ErrRuntimeStopped + } + return r.newVolume(false, options...) +} + +// NewVolume creates a new empty volume +func (r *Runtime) newVolume(noCreatePluginVolume bool, options ...VolumeCreateOption) (*Volume, error) { + return nil, errors.New("not implemented (*Runtime) newVolume") +} + +// UpdateVolumePlugins reads all volumes from all configured volume plugins and +// imports them into the libpod db. It also checks if existing libpod volumes +// are removed in the plugin, in this case we try to remove it from libpod. +// On errors we continue and try to do as much as possible. all errors are +// returned as array in the returned struct. +// This function has many race conditions, it is best effort but cannot guarantee +// a perfect state since plugins can be modified from the outside at any time. +func (r *Runtime) UpdateVolumePlugins(ctx context.Context) *define.VolumeReload { + return nil +} + +// removeVolume removes the specified volume from state as well tears down its mountpoint and storage. +// ignoreVolumePlugin is used to only remove the volume from the db and not the plugin, +// this is required when the volume was already removed from the plugin, i.e. in UpdateVolumePlugins(). +func (r *Runtime) removeVolume(ctx context.Context, v *Volume, force bool, timeout *uint, ignoreVolumePlugin bool) error { + return errors.New("not implemented (*Runtime) removeVolume") +} diff --git a/libpod/stats_unsupported.go b/libpod/stats_unsupported.go new file mode 100644 index 000000000..b23333c2e --- /dev/null +++ b/libpod/stats_unsupported.go @@ -0,0 +1,17 @@ +//go:build !linux +// +build !linux + +package libpod + +import ( + "errors" + + "github.com/containers/podman/v4/libpod/define" +) + +// GetContainerStats gets the running stats for a given container. +// The previousStats is used to correctly calculate cpu percentages. You +// should pass nil if there is no previous stat for this container. +func (c *Container) GetContainerStats(previousStats *define.ContainerStats) (*define.ContainerStats, error) { + return nil, errors.New("not implemented (*Container) GetContainerStats") +} diff --git a/libpod/util_unsupported.go b/libpod/util_unsupported.go new file mode 100644 index 000000000..d2ec3ae7b --- /dev/null +++ b/libpod/util_unsupported.go @@ -0,0 +1,27 @@ +//go:build !linux +// +build !linux + +package libpod + +import ( + "errors" + + spec "github.com/opencontainers/runtime-spec/specs-go" +) + +// systemdSliceFromPath makes a new systemd slice under the given parent with +// the given name. +// The parent must be a slice. The name must NOT include ".slice" +func systemdSliceFromPath(parent, name string, resources *spec.LinuxResources) (string, error) { + return "", errors.New("not implemented systemdSliceFromPath") +} + +// Unmount umounts a target directory +func Unmount(mount string) { +} + +// LabelVolumePath takes a mount path for a volume and gives it an +// selinux label of either shared or not +func LabelVolumePath(path string) error { + return errors.New("not implemented LabelVolumePath") +} diff --git a/libpod/volume_internal_unsupported.go b/libpod/volume_internal_unsupported.go new file mode 100644 index 000000000..50515e692 --- /dev/null +++ b/libpod/volume_internal_unsupported.go @@ -0,0 +1,32 @@ +//go:build !linux +// +build !linux + +package libpod + +import ( + "errors" +) + +// mount mounts the volume if necessary. +// A mount is necessary if a volume has any options set. +// If a mount is necessary, v.state.MountCount will be incremented. +// If it was 0 when the increment occurred, the volume will be mounted on the +// host. Otherwise, we assume it is already mounted. +// Must be done while the volume is locked. +// Is a no-op on volumes that do not require a mount (as defined by +// volumeNeedsMount()). +func (v *Volume) mount() error { + return errors.New("not implemented (*Volume) mount") +} + +// unmount unmounts the volume if necessary. +// Unmounting a volume that is not mounted is a no-op. +// Unmounting a volume that does not require a mount is a no-op. +// The volume must be locked for this to occur. +// The mount counter will be decremented if non-zero. If the counter reaches 0, +// the volume will really be unmounted, as no further containers are using the +// volume. +// If force is set, the volume will be unmounted regardless of mount counter. +func (v *Volume) unmount(force bool) error { + return errors.New("not implemented (*Volume) unmount") +} -- cgit v1.2.3-54-g00ecf From 5d7778411a14b2a10d2e66bb0e4cdf7262cf2689 Mon Sep 17 00:00:00 2001 From: Doug Rabson Date: Tue, 16 Aug 2022 11:14:54 +0100 Subject: libpod: Move rootless network setup details to container_internal_linux.go This removes a use of state.NetNS which is a linux-specific field defined in container_linux.go from the generic container_internal.go, allowing that to build on non-linux platforms. [NO NEW TESTS NEEDED] Signed-off-by: Doug Rabson --- libpod/container_internal.go | 16 ++-------------- libpod/container_internal_linux.go | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+), 14 deletions(-) (limited to 'libpod') diff --git a/libpod/container_internal.go b/libpod/container_internal.go index 7cef067b0..2d08b56f8 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -293,20 +293,8 @@ func (c *Container) handleRestartPolicy(ctx context.Context) (_ bool, retErr err } // set up slirp4netns again because slirp4netns will die when conmon exits - if c.config.NetMode.IsSlirp4netns() { - err := c.runtime.setupSlirp4netns(c, c.state.NetNS) - if err != nil { - return false, err - } - } - - // set up rootlesskit port forwarder again since it dies when conmon exits - // we use rootlesskit port forwarder only as rootless and when bridge network is used - if rootless.IsRootless() && c.config.NetMode.IsBridge() && len(c.config.PortMappings) > 0 { - err := c.runtime.setupRootlessPortMappingViaRLK(c, c.state.NetNS.Path(), c.state.NetworkStatus) - if err != nil { - return false, err - } + if err := c.setupRootlessNetwork(); err != nil { + return false, err } if c.state.State == define.ContainerStateStopped { diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index 3c77cb18c..6000c2cdd 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -3228,3 +3228,24 @@ func (c *Container) ChangeHostPathOwnership(src string, recurse bool, uid, gid i } return chown.ChangeHostPathOwnership(src, recurse, uid, gid) } + +// If the container is rootless, set up the slirp4netns network +func (c *Container) setupRootlessNetwork() error { + // set up slirp4netns again because slirp4netns will die when conmon exits + if c.config.NetMode.IsSlirp4netns() { + err := c.runtime.setupSlirp4netns(c, c.state.NetNS) + if err != nil { + return err + } + } + + // set up rootlesskit port forwarder again since it dies when conmon exits + // we use rootlesskit port forwarder only as rootless and when bridge network is used + if rootless.IsRootless() && c.config.NetMode.IsBridge() && len(c.config.PortMappings) > 0 { + err := c.runtime.setupRootlessPortMappingViaRLK(c, c.state.NetNS.Path(), c.state.NetworkStatus) + if err != nil { + return err + } + } + return nil +} -- cgit v1.2.3-54-g00ecf From 1572420c3fbf8a8022faaa93848a7239037a77e4 Mon Sep 17 00:00:00 2001 From: Doug Rabson Date: Tue, 16 Aug 2022 11:15:57 +0100 Subject: libpod: Move uses of unix.O_PATH to container_internal_linux.go The O_PATH flag is a recent addition to the open syscall and is not present in darwin or in FreeBSD releases before 13.1. The constant is not present in the FreeBSD version of x/sys/unix since that package supports FreeBSD 12.3 and later. [NO NEW TESTS NEEDED] Signed-off-by: Doug Rabson --- libpod/container_internal.go | 4 ++-- libpod/container_internal_linux.go | 4 ++++ libpod/container_internal_unsupported.go | 4 ++++ 3 files changed, 10 insertions(+), 2 deletions(-) (limited to 'libpod') diff --git a/libpod/container_internal.go b/libpod/container_internal.go index 2d08b56f8..60fb29607 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -1545,7 +1545,7 @@ func (c *Container) mountStorage() (_ string, deferredErr error) { rootUID, rootGID := c.RootUID(), c.RootGID() - dirfd, err := unix.Open(mountPoint, unix.O_RDONLY|unix.O_PATH, 0) + dirfd, err := openDirectory(mountPoint) if err != nil { return "", fmt.Errorf("open mount point: %w", err) } @@ -1568,7 +1568,7 @@ func (c *Container) mountStorage() (_ string, deferredErr error) { return "", fmt.Errorf("resolve /etc in the container: %w", err) } - etcInTheContainerFd, err := unix.Open(etcInTheContainerPath, unix.O_RDONLY|unix.O_PATH, 0) + etcInTheContainerFd, err := openDirectory(etcInTheContainerPath) if err != nil { return "", fmt.Errorf("open /etc in the container: %w", err) } diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index 6000c2cdd..5c5fd471b 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -3249,3 +3249,7 @@ func (c *Container) setupRootlessNetwork() error { } return nil } + +func openDirectory(path string) (fd int, err error) { + return unix.Open(path, unix.O_RDONLY|unix.O_PATH, 0) +} diff --git a/libpod/container_internal_unsupported.go b/libpod/container_internal_unsupported.go index 379be9298..de92ff260 100644 --- a/libpod/container_internal_unsupported.go +++ b/libpod/container_internal_unsupported.go @@ -93,3 +93,7 @@ func getLocalhostHostEntry(c *Container) etchosts.HostEntries { func isRootlessCgroupSet(cgroup string) bool { return false } + +func openDirectory(path string) (fd int, err error) { + return -1, errors.New("unsupported openDirectory") +} -- cgit v1.2.3-54-g00ecf From 20ad1227411b2883089cc1d585138f6998c435b6 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Tue, 16 Aug 2022 15:43:13 +0200 Subject: runtime: use FindCommon from c/common it detects conmon-rs. [NO NEW TESTS NEEDED] no functionalities added. Signed-off-by: Giuseppe Scrivano --- libpod/runtime.go | 113 +----------------------------------------------------- 1 file changed, 1 insertion(+), 112 deletions(-) (limited to 'libpod') diff --git a/libpod/runtime.go b/libpod/runtime.go index ea4b34954..684f4abd7 100644 --- a/libpod/runtime.go +++ b/libpod/runtime.go @@ -2,15 +2,11 @@ package libpod import ( "bufio" - "bytes" "context" "errors" "fmt" "os" - "os/exec" "path/filepath" - "regexp" - "strconv" "strings" "sync" "syscall" @@ -44,17 +40,6 @@ import ( "github.com/sirupsen/logrus" ) -const ( - // conmonMinMajorVersion is the major version required for conmon. - conmonMinMajorVersion = 2 - - // conmonMinMinorVersion is the minor version required for conmon. - conmonMinMinorVersion = 0 - - // conmonMinPatchVersion is the sub-minor version required for conmon. - conmonMinPatchVersion = 24 -) - // A RuntimeOption is a functional option which alters the Runtime created by // NewRuntime type RuntimeOption func(*Runtime) error @@ -308,7 +293,7 @@ func getLockManager(runtime *Runtime) (lock.Manager, error) { // Sets up containers/storage, state store, OCI runtime func makeRuntime(runtime *Runtime) (retErr error) { // Find a working conmon binary - cPath, err := findConmon(runtime.config.Engine.ConmonPath) + cPath, err := runtime.config.FindConmon() if err != nil { return err } @@ -670,102 +655,6 @@ func makeRuntime(runtime *Runtime) (retErr error) { return nil } -// findConmon iterates over conmonPaths and returns the path -// to the first conmon binary with a new enough version. If none is found, -// we try to do a path lookup of "conmon". -func findConmon(conmonPaths []string) (string, error) { - foundOutdatedConmon := false - for _, path := range conmonPaths { - stat, err := os.Stat(path) - if err != nil { - continue - } - if stat.IsDir() { - continue - } - if err := probeConmon(path); err != nil { - logrus.Warnf("Conmon at %s invalid: %v", path, err) - foundOutdatedConmon = true - continue - } - logrus.Debugf("Using conmon: %q", path) - return path, nil - } - - // Search the $PATH as last fallback - if path, err := exec.LookPath("conmon"); err == nil { - if err := probeConmon(path); err != nil { - logrus.Warnf("Conmon at %s is invalid: %v", path, err) - foundOutdatedConmon = true - } else { - logrus.Debugf("Using conmon from $PATH: %q", path) - return path, nil - } - } - - if foundOutdatedConmon { - return "", fmt.Errorf( - "please update to v%d.%d.%d or later: %w", - conmonMinMajorVersion, conmonMinMinorVersion, conmonMinPatchVersion, define.ErrConmonOutdated) - } - - return "", fmt.Errorf( - "could not find a working conmon binary (configured options: %v): %w", - conmonPaths, define.ErrInvalidArg) -} - -// probeConmon calls conmon --version and verifies it is a new enough version for -// the runtime expectations the container engine currently has. -func probeConmon(conmonBinary string) error { - cmd := exec.Command(conmonBinary, "--version") - var out bytes.Buffer - cmd.Stdout = &out - err := cmd.Run() - if err != nil { - return err - } - r := regexp.MustCompile(`^conmon version (?P\d+).(?P\d+).(?P\d+)`) - - matches := r.FindStringSubmatch(out.String()) - if len(matches) != 4 { - return fmt.Errorf("%v: %w", define.ErrConmonVersionFormat, err) - } - major, err := strconv.Atoi(matches[1]) - if err != nil { - return fmt.Errorf("%v: %w", define.ErrConmonVersionFormat, err) - } - if major < conmonMinMajorVersion { - return define.ErrConmonOutdated - } - if major > conmonMinMajorVersion { - return nil - } - - minor, err := strconv.Atoi(matches[2]) - if err != nil { - return fmt.Errorf("%v: %w", define.ErrConmonVersionFormat, err) - } - if minor < conmonMinMinorVersion { - return define.ErrConmonOutdated - } - if minor > conmonMinMinorVersion { - return nil - } - - patch, err := strconv.Atoi(matches[3]) - if err != nil { - return fmt.Errorf("%v: %w", define.ErrConmonVersionFormat, err) - } - if patch < conmonMinPatchVersion { - return define.ErrConmonOutdated - } - if patch > conmonMinPatchVersion { - return nil - } - - return nil -} - // TmpDir gets the current Libpod temporary files directory. func (r *Runtime) TmpDir() (string, error) { if !r.valid { -- cgit v1.2.3-54-g00ecf From bebf55c0f22d6723a27cd39561c0577aa557c5e1 Mon Sep 17 00:00:00 2001 From: Doug Rabson Date: Wed, 17 Aug 2022 09:35:19 +0100 Subject: libpod: Move oci_conmon_linux.go to oci_conmon_common.go [NO NEW TESTS NEEDED] Signed-off-by: Doug Rabson --- libpod/oci_conmon_common.go | 1929 +++++++++++++++++++++++++++++++++++++++++++ libpod/oci_conmon_linux.go | 1929 ------------------------------------------- 2 files changed, 1929 insertions(+), 1929 deletions(-) create mode 100644 libpod/oci_conmon_common.go delete mode 100644 libpod/oci_conmon_linux.go (limited to 'libpod') diff --git a/libpod/oci_conmon_common.go b/libpod/oci_conmon_common.go new file mode 100644 index 000000000..1b654ed33 --- /dev/null +++ b/libpod/oci_conmon_common.go @@ -0,0 +1,1929 @@ +//go:build linux +// +build linux + +package libpod + +import ( + "bufio" + "bytes" + "context" + "errors" + "fmt" + "io" + "io/ioutil" + "net" + "net/http" + "os" + "os/exec" + "path/filepath" + "runtime" + "strconv" + "strings" + "sync" + "syscall" + "text/template" + "time" + + runcconfig "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/devices" + + "github.com/containers/common/pkg/cgroups" + "github.com/containers/common/pkg/config" + "github.com/containers/common/pkg/resize" + cutil "github.com/containers/common/pkg/util" + conmonConfig "github.com/containers/conmon/runner/config" + "github.com/containers/podman/v4/libpod/define" + "github.com/containers/podman/v4/libpod/logs" + "github.com/containers/podman/v4/pkg/checkpoint/crutils" + "github.com/containers/podman/v4/pkg/errorhandling" + "github.com/containers/podman/v4/pkg/rootless" + "github.com/containers/podman/v4/pkg/specgenutil" + "github.com/containers/podman/v4/pkg/util" + "github.com/containers/podman/v4/utils" + "github.com/containers/storage/pkg/homedir" + pmount "github.com/containers/storage/pkg/mount" + spec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/selinux/go-selinux/label" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +const ( + // This is Conmon's STDIO_BUF_SIZE. I don't believe we have access to it + // directly from the Go code, so const it here + // Important: The conmon attach socket uses an extra byte at the beginning of each + // message to specify the STREAM so we have to increase the buffer size by one + bufferSize = conmonConfig.BufSize + 1 +) + +// ConmonOCIRuntime is an OCI runtime managed by Conmon. +// TODO: Make all calls to OCI runtime have a timeout. +type ConmonOCIRuntime struct { + name string + path string + conmonPath string + conmonEnv []string + tmpDir string + exitsDir string + logSizeMax int64 + noPivot bool + reservePorts bool + runtimeFlags []string + supportsJSON bool + supportsKVM bool + supportsNoCgroups bool + enableKeyring bool +} + +// Make a new Conmon-based OCI runtime with the given options. +// Conmon will wrap the given OCI runtime, which can be `runc`, `crun`, or +// any runtime with a runc-compatible CLI. +// The first path that points to a valid executable will be used. +// Deliberately private. Someone should not be able to construct this outside of +// libpod. +func newConmonOCIRuntime(name string, paths []string, conmonPath string, runtimeFlags []string, runtimeCfg *config.Config) (OCIRuntime, error) { + if name == "" { + return nil, fmt.Errorf("the OCI runtime must be provided a non-empty name: %w", define.ErrInvalidArg) + } + + // Make lookup tables for runtime support + supportsJSON := make(map[string]bool, len(runtimeCfg.Engine.RuntimeSupportsJSON)) + supportsNoCgroups := make(map[string]bool, len(runtimeCfg.Engine.RuntimeSupportsNoCgroups)) + supportsKVM := make(map[string]bool, len(runtimeCfg.Engine.RuntimeSupportsKVM)) + for _, r := range runtimeCfg.Engine.RuntimeSupportsJSON { + supportsJSON[r] = true + } + for _, r := range runtimeCfg.Engine.RuntimeSupportsNoCgroups { + supportsNoCgroups[r] = true + } + for _, r := range runtimeCfg.Engine.RuntimeSupportsKVM { + supportsKVM[r] = true + } + + runtime := new(ConmonOCIRuntime) + runtime.name = name + runtime.conmonPath = conmonPath + runtime.runtimeFlags = runtimeFlags + + runtime.conmonEnv = runtimeCfg.Engine.ConmonEnvVars + runtime.tmpDir = runtimeCfg.Engine.TmpDir + runtime.logSizeMax = runtimeCfg.Containers.LogSizeMax + runtime.noPivot = runtimeCfg.Engine.NoPivotRoot + runtime.reservePorts = runtimeCfg.Engine.EnablePortReservation + runtime.enableKeyring = runtimeCfg.Containers.EnableKeyring + + // TODO: probe OCI runtime for feature and enable automatically if + // available. + + base := filepath.Base(name) + runtime.supportsJSON = supportsJSON[base] + runtime.supportsNoCgroups = supportsNoCgroups[base] + runtime.supportsKVM = supportsKVM[base] + + foundPath := false + for _, path := range paths { + stat, err := os.Stat(path) + if err != nil { + if os.IsNotExist(err) { + continue + } + return nil, fmt.Errorf("cannot stat OCI runtime %s path: %w", name, err) + } + if !stat.Mode().IsRegular() { + continue + } + foundPath = true + logrus.Tracef("found runtime %q", path) + runtime.path = path + break + } + + // Search the $PATH as last fallback + if !foundPath { + if foundRuntime, err := exec.LookPath(name); err == nil { + foundPath = true + runtime.path = foundRuntime + logrus.Debugf("using runtime %q from $PATH: %q", name, foundRuntime) + } + } + + if !foundPath { + return nil, fmt.Errorf("no valid executable found for OCI runtime %s: %w", name, define.ErrInvalidArg) + } + + runtime.exitsDir = filepath.Join(runtime.tmpDir, "exits") + + // Create the exit files and attach sockets directories + if err := os.MkdirAll(runtime.exitsDir, 0750); err != nil { + // The directory is allowed to exist + if !os.IsExist(err) { + return nil, fmt.Errorf("error creating OCI runtime exit files directory: %w", err) + } + } + return runtime, nil +} + +// Name returns the name of the runtime being wrapped by Conmon. +func (r *ConmonOCIRuntime) Name() string { + return r.name +} + +// Path returns the path of the OCI runtime being wrapped by Conmon. +func (r *ConmonOCIRuntime) Path() string { + return r.path +} + +// hasCurrentUserMapped checks whether the current user is mapped inside the container user namespace +func hasCurrentUserMapped(ctr *Container) bool { + if len(ctr.config.IDMappings.UIDMap) == 0 && len(ctr.config.IDMappings.GIDMap) == 0 { + return true + } + uid := os.Geteuid() + for _, m := range ctr.config.IDMappings.UIDMap { + if uid >= m.HostID && uid < m.HostID+m.Size { + return true + } + } + return false +} + +// CreateContainer creates a container. +func (r *ConmonOCIRuntime) CreateContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) { + // always make the run dir accessible to the current user so that the PID files can be read without + // being in the rootless user namespace. + if err := makeAccessible(ctr.state.RunDir, 0, 0); err != nil { + return 0, err + } + if !hasCurrentUserMapped(ctr) { + for _, i := range []string{ctr.state.RunDir, ctr.runtime.config.Engine.TmpDir, ctr.config.StaticDir, ctr.state.Mountpoint, ctr.runtime.config.Engine.VolumePath} { + if err := makeAccessible(i, ctr.RootUID(), ctr.RootGID()); err != nil { + return 0, err + } + } + + // if we are running a non privileged container, be sure to umount some kernel paths so they are not + // bind mounted inside the container at all. + if !ctr.config.Privileged && !rootless.IsRootless() { + type result struct { + restoreDuration int64 + err error + } + ch := make(chan result) + go func() { + runtime.LockOSThread() + restoreDuration, err := func() (int64, error) { + fd, err := os.Open(fmt.Sprintf("/proc/%d/task/%d/ns/mnt", os.Getpid(), unix.Gettid())) + if err != nil { + return 0, err + } + defer errorhandling.CloseQuiet(fd) + + // create a new mountns on the current thread + if err = unix.Unshare(unix.CLONE_NEWNS); err != nil { + return 0, err + } + defer func() { + if err := unix.Setns(int(fd.Fd()), unix.CLONE_NEWNS); err != nil { + logrus.Errorf("Unable to clone new namespace: %q", err) + } + }() + + // don't spread our mounts around. We are setting only /sys to be slave + // so that the cleanup process is still able to umount the storage and the + // changes are propagated to the host. + err = unix.Mount("/sys", "/sys", "none", unix.MS_REC|unix.MS_SLAVE, "") + if err != nil { + return 0, fmt.Errorf("cannot make /sys slave: %w", err) + } + + mounts, err := pmount.GetMounts() + if err != nil { + return 0, err + } + for _, m := range mounts { + if !strings.HasPrefix(m.Mountpoint, "/sys/kernel") { + continue + } + err = unix.Unmount(m.Mountpoint, 0) + if err != nil && !os.IsNotExist(err) { + return 0, fmt.Errorf("cannot unmount %s: %w", m.Mountpoint, err) + } + } + return r.createOCIContainer(ctr, restoreOptions) + }() + ch <- result{ + restoreDuration: restoreDuration, + err: err, + } + }() + r := <-ch + return r.restoreDuration, r.err + } + } + return r.createOCIContainer(ctr, restoreOptions) +} + +// UpdateContainerStatus retrieves the current status of the container from the +// runtime. It updates the container's state but does not save it. +// If useRuntime is false, we will not directly hit runc to see the container's +// status, but will instead only check for the existence of the conmon exit file +// and update state to stopped if it exists. +func (r *ConmonOCIRuntime) UpdateContainerStatus(ctr *Container) error { + runtimeDir, err := util.GetRuntimeDir() + if err != nil { + return err + } + + // Store old state so we know if we were already stopped + oldState := ctr.state.State + + state := new(spec.State) + + cmd := exec.Command(r.path, "state", ctr.ID()) + cmd.Env = append(cmd.Env, fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)) + + outPipe, err := cmd.StdoutPipe() + if err != nil { + return fmt.Errorf("getting stdout pipe: %w", err) + } + errPipe, err := cmd.StderrPipe() + if err != nil { + return fmt.Errorf("getting stderr pipe: %w", err) + } + + if err := cmd.Start(); err != nil { + out, err2 := ioutil.ReadAll(errPipe) + if err2 != nil { + return fmt.Errorf("error getting container %s state: %w", ctr.ID(), err) + } + if strings.Contains(string(out), "does not exist") || strings.Contains(string(out), "No such file") { + if err := ctr.removeConmonFiles(); err != nil { + logrus.Debugf("unable to remove conmon files for container %s", ctr.ID()) + } + ctr.state.ExitCode = -1 + ctr.state.FinishedTime = time.Now() + ctr.state.State = define.ContainerStateExited + return ctr.runtime.state.AddContainerExitCode(ctr.ID(), ctr.state.ExitCode) + } + return fmt.Errorf("error getting container %s state. stderr/out: %s: %w", ctr.ID(), out, err) + } + defer func() { + _ = cmd.Wait() + }() + + if err := errPipe.Close(); err != nil { + return err + } + out, err := ioutil.ReadAll(outPipe) + if err != nil { + return fmt.Errorf("error reading stdout: %s: %w", ctr.ID(), err) + } + if err := json.NewDecoder(bytes.NewBuffer(out)).Decode(state); err != nil { + return fmt.Errorf("error decoding container status for container %s: %w", ctr.ID(), err) + } + ctr.state.PID = state.Pid + + switch state.Status { + case "created": + ctr.state.State = define.ContainerStateCreated + case "paused": + ctr.state.State = define.ContainerStatePaused + case "running": + ctr.state.State = define.ContainerStateRunning + case "stopped": + ctr.state.State = define.ContainerStateStopped + default: + return fmt.Errorf("unrecognized status returned by runtime for container %s: %s: %w", + ctr.ID(), state.Status, define.ErrInternal) + } + + // Only grab exit status if we were not already stopped + // If we were, it should already be in the database + if ctr.state.State == define.ContainerStateStopped && oldState != define.ContainerStateStopped { + if _, err := ctr.Wait(context.Background()); err != nil { + logrus.Errorf("Waiting for container %s to exit: %v", ctr.ID(), err) + } + return nil + } + + // Handle ContainerStateStopping - keep it unless the container + // transitioned to no longer running. + if oldState == define.ContainerStateStopping && (ctr.state.State == define.ContainerStatePaused || ctr.state.State == define.ContainerStateRunning) { + ctr.state.State = define.ContainerStateStopping + } + + return nil +} + +// StartContainer starts the given container. +// Sets time the container was started, but does not save it. +func (r *ConmonOCIRuntime) StartContainer(ctr *Container) error { + // TODO: streams should probably *not* be our STDIN/OUT/ERR - redirect to buffers? + runtimeDir, err := util.GetRuntimeDir() + if err != nil { + return err + } + env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)} + if path, ok := os.LookupEnv("PATH"); ok { + env = append(env, fmt.Sprintf("PATH=%s", path)) + } + if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "start", ctr.ID())...); err != nil { + return err + } + + ctr.state.StartedTime = time.Now() + + return nil +} + +// KillContainer sends the given signal to the given container. +// If all is set, send to all PIDs in the container. +// All is only supported if the container created cgroups. +func (r *ConmonOCIRuntime) KillContainer(ctr *Container, signal uint, all bool) error { + logrus.Debugf("Sending signal %d to container %s", signal, ctr.ID()) + runtimeDir, err := util.GetRuntimeDir() + if err != nil { + return err + } + env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)} + var args []string + args = append(args, r.runtimeFlags...) + if all { + args = append(args, "kill", "--all", ctr.ID(), fmt.Sprintf("%d", signal)) + } else { + args = append(args, "kill", ctr.ID(), fmt.Sprintf("%d", signal)) + } + if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, args...); err != nil { + // Update container state - there's a chance we failed because + // the container exited in the meantime. + if err2 := r.UpdateContainerStatus(ctr); err2 != nil { + logrus.Infof("Error updating status for container %s: %v", ctr.ID(), err2) + } + if ctr.ensureState(define.ContainerStateStopped, define.ContainerStateExited) { + return define.ErrCtrStateInvalid + } + return fmt.Errorf("error sending signal to container %s: %w", ctr.ID(), err) + } + + return nil +} + +// StopContainer stops a container, first using its given stop signal (or +// SIGTERM if no signal was specified), then using SIGKILL. +// Timeout is given in seconds. If timeout is 0, the container will be +// immediately kill with SIGKILL. +// Does not set finished time for container, assumes you will run updateStatus +// after to pull the exit code. +func (r *ConmonOCIRuntime) StopContainer(ctr *Container, timeout uint, all bool) error { + logrus.Debugf("Stopping container %s (PID %d)", ctr.ID(), ctr.state.PID) + + // Ping the container to see if it's alive + // If it's not, it's already stopped, return + err := unix.Kill(ctr.state.PID, 0) + if err == unix.ESRCH { + return nil + } + + stopSignal := ctr.config.StopSignal + if stopSignal == 0 { + stopSignal = uint(syscall.SIGTERM) + } + + if timeout > 0 { + if err := r.KillContainer(ctr, stopSignal, all); err != nil { + // Is the container gone? + // If so, it probably died between the first check and + // our sending the signal + // The container is stopped, so exit cleanly + err := unix.Kill(ctr.state.PID, 0) + if err == unix.ESRCH { + return nil + } + + return err + } + + if err := waitContainerStop(ctr, time.Duration(timeout)*time.Second); err != nil { + logrus.Debugf("Timed out stopping container %s with %s, resorting to SIGKILL: %v", ctr.ID(), unix.SignalName(syscall.Signal(stopSignal)), err) + logrus.Warnf("StopSignal %s failed to stop container %s in %d seconds, resorting to SIGKILL", unix.SignalName(syscall.Signal(stopSignal)), ctr.Name(), timeout) + } else { + // No error, the container is dead + return nil + } + } + + if err := r.KillContainer(ctr, 9, all); err != nil { + // Again, check if the container is gone. If it is, exit cleanly. + err := unix.Kill(ctr.state.PID, 0) + if err == unix.ESRCH { + return nil + } + + return fmt.Errorf("error sending SIGKILL to container %s: %w", ctr.ID(), err) + } + + // Give runtime a few seconds to make it happen + if err := waitContainerStop(ctr, killContainerTimeout); err != nil { + return err + } + + return nil +} + +// DeleteContainer deletes a container from the OCI runtime. +func (r *ConmonOCIRuntime) DeleteContainer(ctr *Container) error { + runtimeDir, err := util.GetRuntimeDir() + if err != nil { + return err + } + env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)} + return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "delete", "--force", ctr.ID())...) +} + +// PauseContainer pauses the given container. +func (r *ConmonOCIRuntime) PauseContainer(ctr *Container) error { + runtimeDir, err := util.GetRuntimeDir() + if err != nil { + return err + } + env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)} + return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "pause", ctr.ID())...) +} + +// UnpauseContainer unpauses the given container. +func (r *ConmonOCIRuntime) UnpauseContainer(ctr *Container) error { + runtimeDir, err := util.GetRuntimeDir() + if err != nil { + return err + } + env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)} + return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "resume", ctr.ID())...) +} + +// HTTPAttach performs an attach for the HTTP API. +// The caller must handle closing the HTTP connection after this returns. +// The cancel channel is not closed; it is up to the caller to do so after +// this function returns. +// If this is a container with a terminal, we will stream raw. If it is not, we +// will stream with an 8-byte header to multiplex STDOUT and STDERR. +// Returns any errors that occurred, and whether the connection was successfully +// hijacked before that error occurred. +func (r *ConmonOCIRuntime) HTTPAttach(ctr *Container, req *http.Request, w http.ResponseWriter, streams *HTTPAttachStreams, detachKeys *string, cancel <-chan bool, hijackDone chan<- bool, streamAttach, streamLogs bool) (deferredErr error) { + isTerminal := false + if ctr.config.Spec.Process != nil { + isTerminal = ctr.config.Spec.Process.Terminal + } + + if streams != nil { + if !streams.Stdin && !streams.Stdout && !streams.Stderr { + return fmt.Errorf("must specify at least one stream to attach to: %w", define.ErrInvalidArg) + } + } + + attachSock, err := r.AttachSocketPath(ctr) + if err != nil { + return err + } + + var conn *net.UnixConn + if streamAttach { + newConn, err := openUnixSocket(attachSock) + if err != nil { + return fmt.Errorf("failed to connect to container's attach socket: %v: %w", attachSock, err) + } + conn = newConn + defer func() { + if err := conn.Close(); err != nil { + logrus.Errorf("Unable to close container %s attach socket: %q", ctr.ID(), err) + } + }() + + logrus.Debugf("Successfully connected to container %s attach socket %s", ctr.ID(), attachSock) + } + + detachString := ctr.runtime.config.Engine.DetachKeys + if detachKeys != nil { + detachString = *detachKeys + } + detach, err := processDetachKeys(detachString) + if err != nil { + return err + } + + attachStdout := true + attachStderr := true + attachStdin := true + if streams != nil { + attachStdout = streams.Stdout + attachStderr = streams.Stderr + attachStdin = streams.Stdin + } + + logrus.Debugf("Going to hijack container %s attach connection", ctr.ID()) + + // Alright, let's hijack. + hijacker, ok := w.(http.Hijacker) + if !ok { + return fmt.Errorf("unable to hijack connection") + } + + httpCon, httpBuf, err := hijacker.Hijack() + if err != nil { + return fmt.Errorf("error hijacking connection: %w", err) + } + + hijackDone <- true + + writeHijackHeader(req, httpBuf) + + // Force a flush after the header is written. + if err := httpBuf.Flush(); err != nil { + return fmt.Errorf("error flushing HTTP hijack header: %w", err) + } + + defer func() { + hijackWriteErrorAndClose(deferredErr, ctr.ID(), isTerminal, httpCon, httpBuf) + }() + + logrus.Debugf("Hijack for container %s attach session done, ready to stream", ctr.ID()) + + // TODO: This is gross. Really, really gross. + // I want to say we should read all the logs into an array before + // calling this, in container_api.go, but that could take a lot of + // memory... + // On the whole, we need to figure out a better way of doing this, + // though. + logSize := 0 + if streamLogs { + logrus.Debugf("Will stream logs for container %s attach session", ctr.ID()) + + // Get all logs for the container + logChan := make(chan *logs.LogLine) + logOpts := new(logs.LogOptions) + logOpts.Tail = -1 + logOpts.WaitGroup = new(sync.WaitGroup) + errChan := make(chan error) + go func() { + var err error + // In non-terminal mode we need to prepend with the + // stream header. + logrus.Debugf("Writing logs for container %s to HTTP attach", ctr.ID()) + for logLine := range logChan { + if !isTerminal { + device := logLine.Device + var header []byte + headerLen := uint32(len(logLine.Msg)) + logSize += len(logLine.Msg) + switch strings.ToLower(device) { + case "stdin": + header = makeHTTPAttachHeader(0, headerLen) + case "stdout": + header = makeHTTPAttachHeader(1, headerLen) + case "stderr": + header = makeHTTPAttachHeader(2, headerLen) + default: + logrus.Errorf("Unknown device for log line: %s", device) + header = makeHTTPAttachHeader(1, headerLen) + } + _, err = httpBuf.Write(header) + if err != nil { + break + } + } + _, err = httpBuf.Write([]byte(logLine.Msg)) + if err != nil { + break + } + if !logLine.Partial() { + _, err = httpBuf.Write([]byte("\n")) + if err != nil { + break + } + } + err = httpBuf.Flush() + if err != nil { + break + } + } + errChan <- err + }() + if err := ctr.ReadLog(context.Background(), logOpts, logChan, 0); err != nil { + return err + } + go func() { + logOpts.WaitGroup.Wait() + close(logChan) + }() + logrus.Debugf("Done reading logs for container %s, %d bytes", ctr.ID(), logSize) + if err := <-errChan; err != nil { + return err + } + } + if !streamAttach { + logrus.Debugf("Done streaming logs for container %s attach, exiting as attach streaming not requested", ctr.ID()) + return nil + } + + logrus.Debugf("Forwarding attach output for container %s", ctr.ID()) + + stdoutChan := make(chan error) + stdinChan := make(chan error) + + // Handle STDOUT/STDERR + go func() { + var err error + if isTerminal { + // Hack: return immediately if attachStdout not set to + // emulate Docker. + // Basically, when terminal is set, STDERR goes nowhere. + // Everything does over STDOUT. + // Therefore, if not attaching STDOUT - we'll never copy + // anything from here. + logrus.Debugf("Performing terminal HTTP attach for container %s", ctr.ID()) + if attachStdout { + err = httpAttachTerminalCopy(conn, httpBuf, ctr.ID()) + } + } else { + logrus.Debugf("Performing non-terminal HTTP attach for container %s", ctr.ID()) + err = httpAttachNonTerminalCopy(conn, httpBuf, ctr.ID(), attachStdin, attachStdout, attachStderr) + } + stdoutChan <- err + logrus.Debugf("STDOUT/ERR copy completed") + }() + // Next, STDIN. Avoid entirely if attachStdin unset. + if attachStdin { + go func() { + _, err := cutil.CopyDetachable(conn, httpBuf, detach) + logrus.Debugf("STDIN copy completed") + stdinChan <- err + }() + } + + for { + select { + case err := <-stdoutChan: + if err != nil { + return err + } + + return nil + case err := <-stdinChan: + if err != nil { + return err + } + // copy stdin is done, close it + if connErr := conn.CloseWrite(); connErr != nil { + logrus.Errorf("Unable to close conn: %v", connErr) + } + case <-cancel: + return nil + } + } +} + +// isRetryable returns whether the error was caused by a blocked syscall or the +// specified operation on a non blocking file descriptor wasn't ready for completion. +func isRetryable(err error) bool { + var errno syscall.Errno + if errors.As(err, &errno) { + return errno == syscall.EINTR || errno == syscall.EAGAIN + } + return false +} + +// openControlFile opens the terminal control file. +func openControlFile(ctr *Container, parentDir string) (*os.File, error) { + controlPath := filepath.Join(parentDir, "ctl") + for i := 0; i < 600; i++ { + controlFile, err := os.OpenFile(controlPath, unix.O_WRONLY|unix.O_NONBLOCK, 0) + if err == nil { + return controlFile, nil + } + if !isRetryable(err) { + return nil, fmt.Errorf("could not open ctl file for terminal resize for container %s: %w", ctr.ID(), err) + } + time.Sleep(time.Second / 10) + } + return nil, fmt.Errorf("timeout waiting for %q", controlPath) +} + +// AttachResize resizes the terminal used by the given container. +func (r *ConmonOCIRuntime) AttachResize(ctr *Container, newSize resize.TerminalSize) error { + controlFile, err := openControlFile(ctr, ctr.bundlePath()) + if err != nil { + return err + } + defer controlFile.Close() + + logrus.Debugf("Received a resize event for container %s: %+v", ctr.ID(), newSize) + if _, err = fmt.Fprintf(controlFile, "%d %d %d\n", 1, newSize.Height, newSize.Width); err != nil { + return fmt.Errorf("failed to write to ctl file to resize terminal: %w", err) + } + + return nil +} + +// CheckpointContainer checkpoints the given container. +func (r *ConmonOCIRuntime) CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) (int64, error) { + // imagePath is used by CRIU to store the actual checkpoint files + imagePath := ctr.CheckpointPath() + if options.PreCheckPoint { + imagePath = ctr.PreCheckPointPath() + } + // workPath will be used to store dump.log and stats-dump + workPath := ctr.bundlePath() + logrus.Debugf("Writing checkpoint to %s", imagePath) + logrus.Debugf("Writing checkpoint logs to %s", workPath) + logrus.Debugf("Pre-dump the container %t", options.PreCheckPoint) + args := []string{} + args = append(args, r.runtimeFlags...) + args = append(args, "checkpoint") + args = append(args, "--image-path") + args = append(args, imagePath) + args = append(args, "--work-path") + args = append(args, workPath) + if options.KeepRunning { + args = append(args, "--leave-running") + } + if options.TCPEstablished { + args = append(args, "--tcp-established") + } + if options.FileLocks { + args = append(args, "--file-locks") + } + if !options.PreCheckPoint && options.KeepRunning { + args = append(args, "--leave-running") + } + if options.PreCheckPoint { + args = append(args, "--pre-dump") + } + if !options.PreCheckPoint && options.WithPrevious { + args = append( + args, + "--parent-path", + filepath.Join("..", preCheckpointDir), + ) + } + + args = append(args, ctr.ID()) + logrus.Debugf("the args to checkpoint: %s %s", r.path, strings.Join(args, " ")) + + runtimeDir, err := util.GetRuntimeDir() + if err != nil { + return 0, err + } + env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)} + if path, ok := os.LookupEnv("PATH"); ok { + env = append(env, fmt.Sprintf("PATH=%s", path)) + } + + runtime.LockOSThread() + if err := label.SetSocketLabel(ctr.ProcessLabel()); err != nil { + return 0, err + } + + runtimeCheckpointStarted := time.Now() + err = utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, args...) + // Ignore error returned from SetSocketLabel("") call, + // can't recover. + if labelErr := label.SetSocketLabel(""); labelErr == nil { + // Unlock the thread only if the process label could be restored + // successfully. Otherwise leave the thread locked and the Go runtime + // will terminate it once it returns to the threads pool. + runtime.UnlockOSThread() + } else { + logrus.Errorf("Unable to reset socket label: %q", labelErr) + } + + runtimeCheckpointDuration := func() int64 { + if options.PrintStats { + return time.Since(runtimeCheckpointStarted).Microseconds() + } + return 0 + }() + + return runtimeCheckpointDuration, err +} + +func (r *ConmonOCIRuntime) CheckConmonRunning(ctr *Container) (bool, error) { + if ctr.state.ConmonPID == 0 { + // If the container is running or paused, assume Conmon is + // running. We didn't record Conmon PID on some old versions, so + // that is likely what's going on... + // Unusual enough that we should print a warning message though. + if ctr.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) { + logrus.Warnf("Conmon PID is not set, but container is running!") + return true, nil + } + // Container's not running, so conmon PID being unset is + // expected. Conmon is not running. + return false, nil + } + + // We have a conmon PID. Ping it with signal 0. + if err := unix.Kill(ctr.state.ConmonPID, 0); err != nil { + if err == unix.ESRCH { + return false, nil + } + return false, fmt.Errorf("error pinging container %s conmon with signal 0: %w", ctr.ID(), err) + } + return true, nil +} + +// SupportsCheckpoint checks if the OCI runtime supports checkpointing +// containers. +func (r *ConmonOCIRuntime) SupportsCheckpoint() bool { + return crutils.CRRuntimeSupportsCheckpointRestore(r.path) +} + +// SupportsJSONErrors checks if the OCI runtime supports JSON-formatted error +// messages. +func (r *ConmonOCIRuntime) SupportsJSONErrors() bool { + return r.supportsJSON +} + +// SupportsNoCgroups checks if the OCI runtime supports running containers +// without cgroups (the --cgroup-manager=disabled flag). +func (r *ConmonOCIRuntime) SupportsNoCgroups() bool { + return r.supportsNoCgroups +} + +// SupportsKVM checks if the OCI runtime supports running containers +// without KVM separation +func (r *ConmonOCIRuntime) SupportsKVM() bool { + return r.supportsKVM +} + +// AttachSocketPath is the path to a single container's attach socket. +func (r *ConmonOCIRuntime) AttachSocketPath(ctr *Container) (string, error) { + if ctr == nil { + return "", fmt.Errorf("must provide a valid container to get attach socket path: %w", define.ErrInvalidArg) + } + + return filepath.Join(ctr.bundlePath(), "attach"), nil +} + +// ExitFilePath is the path to a container's exit file. +func (r *ConmonOCIRuntime) ExitFilePath(ctr *Container) (string, error) { + if ctr == nil { + return "", fmt.Errorf("must provide a valid container to get exit file path: %w", define.ErrInvalidArg) + } + return filepath.Join(r.exitsDir, ctr.ID()), nil +} + +// RuntimeInfo provides information on the runtime. +func (r *ConmonOCIRuntime) RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntimeInfo, error) { + runtimePackage := packageVersion(r.path) + conmonPackage := packageVersion(r.conmonPath) + runtimeVersion, err := r.getOCIRuntimeVersion() + if err != nil { + return nil, nil, fmt.Errorf("error getting version of OCI runtime %s: %w", r.name, err) + } + conmonVersion, err := r.getConmonVersion() + if err != nil { + return nil, nil, fmt.Errorf("error getting conmon version: %w", err) + } + + conmon := define.ConmonInfo{ + Package: conmonPackage, + Path: r.conmonPath, + Version: conmonVersion, + } + ocirt := define.OCIRuntimeInfo{ + Name: r.name, + Path: r.path, + Package: runtimePackage, + Version: runtimeVersion, + } + return &conmon, &ocirt, nil +} + +// makeAccessible changes the path permission and each parent directory to have --x--x--x +func makeAccessible(path string, uid, gid int) error { + for ; path != "/"; path = filepath.Dir(path) { + st, err := os.Stat(path) + if err != nil { + if os.IsNotExist(err) { + return nil + } + return err + } + if int(st.Sys().(*syscall.Stat_t).Uid) == uid && int(st.Sys().(*syscall.Stat_t).Gid) == gid { + continue + } + if st.Mode()&0111 != 0111 { + if err := os.Chmod(path, st.Mode()|0111); err != nil { + return err + } + } + } + return nil +} + +// Wait for a container which has been sent a signal to stop +func waitContainerStop(ctr *Container, timeout time.Duration) error { + return waitPidStop(ctr.state.PID, timeout) +} + +// Wait for a given PID to stop +func waitPidStop(pid int, timeout time.Duration) error { + done := make(chan struct{}) + chControl := make(chan struct{}) + go func() { + for { + select { + case <-chControl: + return + default: + if err := unix.Kill(pid, 0); err != nil { + if err == unix.ESRCH { + close(done) + return + } + logrus.Errorf("Pinging PID %d with signal 0: %v", pid, err) + } + time.Sleep(100 * time.Millisecond) + } + } + }() + select { + case <-done: + return nil + case <-time.After(timeout): + close(chControl) + return fmt.Errorf("given PIDs did not die within timeout") + } +} + +func (r *ConmonOCIRuntime) getLogTag(ctr *Container) (string, error) { + logTag := ctr.LogTag() + if logTag == "" { + return "", nil + } + data, err := ctr.inspectLocked(false) + if err != nil { + // FIXME: this error should probably be returned + return "", nil //nolint: nilerr + } + tmpl, err := template.New("container").Parse(logTag) + if err != nil { + return "", fmt.Errorf("template parsing error %s: %w", logTag, err) + } + var b bytes.Buffer + err = tmpl.Execute(&b, data) + if err != nil { + return "", err + } + return b.String(), nil +} + +// createOCIContainer generates this container's main conmon instance and prepares it for starting +func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) { + var stderrBuf bytes.Buffer + + runtimeDir, err := util.GetRuntimeDir() + if err != nil { + return 0, err + } + + parentSyncPipe, childSyncPipe, err := newPipe() + if err != nil { + return 0, fmt.Errorf("error creating socket pair: %w", err) + } + defer errorhandling.CloseQuiet(parentSyncPipe) + + childStartPipe, parentStartPipe, err := newPipe() + if err != nil { + return 0, fmt.Errorf("error creating socket pair for start pipe: %w", err) + } + + defer errorhandling.CloseQuiet(parentStartPipe) + + var ociLog string + if logrus.GetLevel() != logrus.DebugLevel && r.supportsJSON { + ociLog = filepath.Join(ctr.state.RunDir, "oci-log") + } + + logTag, err := r.getLogTag(ctr) + if err != nil { + return 0, err + } + + if ctr.config.CgroupsMode == cgroupSplit { + if err := utils.MoveUnderCgroupSubtree("runtime"); err != nil { + return 0, err + } + } + + pidfile := ctr.config.PidFile + if pidfile == "" { + pidfile = filepath.Join(ctr.state.RunDir, "pidfile") + } + + args := r.sharedConmonArgs(ctr, ctr.ID(), ctr.bundlePath(), pidfile, ctr.LogPath(), r.exitsDir, ociLog, ctr.LogDriver(), logTag) + + if ctr.config.SdNotifyMode == define.SdNotifyModeContainer && ctr.config.SdNotifySocket != "" { + args = append(args, fmt.Sprintf("--sdnotify-socket=%s", ctr.config.SdNotifySocket)) + } + + if ctr.config.Spec.Process.Terminal { + args = append(args, "-t") + } else if ctr.config.Stdin { + args = append(args, "-i") + } + + if ctr.config.Timeout > 0 { + args = append(args, fmt.Sprintf("--timeout=%d", ctr.config.Timeout)) + } + + if !r.enableKeyring { + args = append(args, "--no-new-keyring") + } + if ctr.config.ConmonPidFile != "" { + args = append(args, "--conmon-pidfile", ctr.config.ConmonPidFile) + } + + if r.noPivot { + args = append(args, "--no-pivot") + } + + exitCommand, err := specgenutil.CreateExitCommandArgs(ctr.runtime.storageConfig, ctr.runtime.config, logrus.IsLevelEnabled(logrus.DebugLevel), ctr.AutoRemove(), false) + if err != nil { + return 0, err + } + exitCommand = append(exitCommand, ctr.config.ID) + + args = append(args, "--exit-command", exitCommand[0]) + for _, arg := range exitCommand[1:] { + args = append(args, []string{"--exit-command-arg", arg}...) + } + + // Pass down the LISTEN_* environment (see #10443). + preserveFDs := ctr.config.PreserveFDs + if val := os.Getenv("LISTEN_FDS"); val != "" { + if ctr.config.PreserveFDs > 0 { + logrus.Warnf("Ignoring LISTEN_FDS to preserve custom user-specified FDs") + } else { + fds, err := strconv.Atoi(val) + if err != nil { + return 0, fmt.Errorf("converting LISTEN_FDS=%s: %w", val, err) + } + preserveFDs = uint(fds) + } + } + + if preserveFDs > 0 { + args = append(args, formatRuntimeOpts("--preserve-fds", fmt.Sprintf("%d", preserveFDs))...) + } + + if restoreOptions != nil { + args = append(args, "--restore", ctr.CheckpointPath()) + if restoreOptions.TCPEstablished { + args = append(args, "--runtime-opt", "--tcp-established") + } + if restoreOptions.FileLocks { + args = append(args, "--runtime-opt", "--file-locks") + } + if restoreOptions.Pod != "" { + mountLabel := ctr.config.MountLabel + processLabel := ctr.config.ProcessLabel + if mountLabel != "" { + args = append( + args, + "--runtime-opt", + fmt.Sprintf( + "--lsm-mount-context=%s", + mountLabel, + ), + ) + } + if processLabel != "" { + args = append( + args, + "--runtime-opt", + fmt.Sprintf( + "--lsm-profile=selinux:%s", + processLabel, + ), + ) + } + } + } + + logrus.WithFields(logrus.Fields{ + "args": args, + }).Debugf("running conmon: %s", r.conmonPath) + + cmd := exec.Command(r.conmonPath, args...) + cmd.SysProcAttr = &syscall.SysProcAttr{ + Setpgid: true, + } + // TODO this is probably a really bad idea for some uses + // Make this configurable + cmd.Stdin = os.Stdin + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if ctr.config.Spec.Process.Terminal { + cmd.Stderr = &stderrBuf + } + + // 0, 1 and 2 are stdin, stdout and stderr + conmonEnv := r.configureConmonEnv(runtimeDir) + + var filesToClose []*os.File + if preserveFDs > 0 { + for fd := 3; fd < int(3+preserveFDs); fd++ { + f := os.NewFile(uintptr(fd), fmt.Sprintf("fd-%d", fd)) + filesToClose = append(filesToClose, f) + cmd.ExtraFiles = append(cmd.ExtraFiles, f) + } + } + + cmd.Env = r.conmonEnv + // we don't want to step on users fds they asked to preserve + // Since 0-2 are used for stdio, start the fds we pass in at preserveFDs+3 + cmd.Env = append(cmd.Env, fmt.Sprintf("_OCI_SYNCPIPE=%d", preserveFDs+3), fmt.Sprintf("_OCI_STARTPIPE=%d", preserveFDs+4)) + cmd.Env = append(cmd.Env, conmonEnv...) + cmd.ExtraFiles = append(cmd.ExtraFiles, childSyncPipe, childStartPipe) + + if r.reservePorts && !rootless.IsRootless() && !ctr.config.NetMode.IsSlirp4netns() { + ports, err := bindPorts(ctr.convertPortMappings()) + if err != nil { + return 0, err + } + filesToClose = append(filesToClose, ports...) + + // Leak the port we bound in the conmon process. These fd's won't be used + // by the container and conmon will keep the ports busy so that another + // process cannot use them. + cmd.ExtraFiles = append(cmd.ExtraFiles, ports...) + } + + if ctr.config.NetMode.IsSlirp4netns() || rootless.IsRootless() { + if ctr.config.PostConfigureNetNS { + havePortMapping := len(ctr.config.PortMappings) > 0 + if havePortMapping { + ctr.rootlessPortSyncR, ctr.rootlessPortSyncW, err = os.Pipe() + if err != nil { + return 0, fmt.Errorf("failed to create rootless port sync pipe: %w", err) + } + } + ctr.rootlessSlirpSyncR, ctr.rootlessSlirpSyncW, err = os.Pipe() + if err != nil { + return 0, fmt.Errorf("failed to create rootless network sync pipe: %w", err) + } + } else { + if ctr.rootlessSlirpSyncR != nil { + defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncR) + } + if ctr.rootlessSlirpSyncW != nil { + defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncW) + } + } + // Leak one end in conmon, the other one will be leaked into slirp4netns + cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessSlirpSyncW) + + if ctr.rootlessPortSyncW != nil { + defer errorhandling.CloseQuiet(ctr.rootlessPortSyncW) + // Leak one end in conmon, the other one will be leaked into rootlessport + cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessPortSyncW) + } + } + var runtimeRestoreStarted time.Time + if restoreOptions != nil { + runtimeRestoreStarted = time.Now() + } + err = startCommand(cmd, ctr) + + // regardless of whether we errored or not, we no longer need the children pipes + childSyncPipe.Close() + childStartPipe.Close() + if err != nil { + return 0, err + } + if err := r.moveConmonToCgroupAndSignal(ctr, cmd, parentStartPipe); err != nil { + return 0, err + } + /* Wait for initial setup and fork, and reap child */ + err = cmd.Wait() + if err != nil { + return 0, err + } + + pid, err := readConmonPipeData(r.name, parentSyncPipe, ociLog) + if err != nil { + if err2 := r.DeleteContainer(ctr); err2 != nil { + logrus.Errorf("Removing container %s from runtime after creation failed", ctr.ID()) + } + return 0, err + } + ctr.state.PID = pid + + conmonPID, err := readConmonPidFile(ctr.config.ConmonPidFile) + if err != nil { + logrus.Warnf("Error reading conmon pid file for container %s: %v", ctr.ID(), err) + } else if conmonPID > 0 { + // conmon not having a pid file is a valid state, so don't set it if we don't have it + logrus.Infof("Got Conmon PID as %d", conmonPID) + ctr.state.ConmonPID = conmonPID + } + + runtimeRestoreDuration := func() int64 { + if restoreOptions != nil && restoreOptions.PrintStats { + return time.Since(runtimeRestoreStarted).Microseconds() + } + return 0 + }() + + // These fds were passed down to the runtime. Close them + // and not interfere + for _, f := range filesToClose { + errorhandling.CloseQuiet(f) + } + + return runtimeRestoreDuration, nil +} + +// configureConmonEnv gets the environment values to add to conmon's exec struct +// TODO this may want to be less hardcoded/more configurable in the future +func (r *ConmonOCIRuntime) configureConmonEnv(runtimeDir string) []string { + var env []string + for _, e := range os.Environ() { + if strings.HasPrefix(e, "LC_") { + env = append(env, e) + } + } + conf, ok := os.LookupEnv("CONTAINERS_CONF") + if ok { + env = append(env, fmt.Sprintf("CONTAINERS_CONF=%s", conf)) + } + env = append(env, fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)) + env = append(env, fmt.Sprintf("_CONTAINERS_USERNS_CONFIGURED=%s", os.Getenv("_CONTAINERS_USERNS_CONFIGURED"))) + env = append(env, fmt.Sprintf("_CONTAINERS_ROOTLESS_UID=%s", os.Getenv("_CONTAINERS_ROOTLESS_UID"))) + home := homedir.Get() + if home != "" { + env = append(env, fmt.Sprintf("HOME=%s", home)) + } + + return env +} + +// sharedConmonArgs takes common arguments for exec and create/restore and formats them for the conmon CLI +func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, exitDir, ociLogPath, logDriver, logTag string) []string { + // set the conmon API version to be able to use the correct sync struct keys + args := []string{ + "--api-version", "1", + "-c", ctr.ID(), + "-u", cuuid, + "-r", r.path, + "-b", bundlePath, + "-p", pidPath, + "-n", ctr.Name(), + "--exit-dir", exitDir, + "--full-attach", + } + if len(r.runtimeFlags) > 0 { + rFlags := []string{} + for _, arg := range r.runtimeFlags { + rFlags = append(rFlags, "--runtime-arg", arg) + } + args = append(args, rFlags...) + } + + if ctr.CgroupManager() == config.SystemdCgroupsManager && !ctr.config.NoCgroups && ctr.config.CgroupsMode != cgroupSplit { + args = append(args, "-s") + } + + var logDriverArg string + switch logDriver { + case define.JournaldLogging: + logDriverArg = define.JournaldLogging + case define.NoLogging: + logDriverArg = define.NoLogging + case define.PassthroughLogging: + logDriverArg = define.PassthroughLogging + //lint:ignore ST1015 the default case has to be here + default: //nolint:stylecheck,gocritic + // No case here should happen except JSONLogging, but keep this here in case the options are extended + logrus.Errorf("%s logging specified but not supported. Choosing k8s-file logging instead", ctr.LogDriver()) + fallthrough + case "": + // to get here, either a user would specify `--log-driver ""`, or this came from another place in libpod + // since the former case is obscure, and the latter case isn't an error, let's silently fallthrough + fallthrough + case define.JSONLogging: + fallthrough + case define.KubernetesLogging: + logDriverArg = fmt.Sprintf("%s:%s", define.KubernetesLogging, logPath) + } + + args = append(args, "-l", logDriverArg) + logLevel := logrus.GetLevel() + args = append(args, "--log-level", logLevel.String()) + + if logLevel == logrus.DebugLevel { + logrus.Debugf("%s messages will be logged to syslog", r.conmonPath) + args = append(args, "--syslog") + } + + size := r.logSizeMax + if ctr.config.LogSize > 0 { + size = ctr.config.LogSize + } + if size > 0 { + args = append(args, "--log-size-max", fmt.Sprintf("%v", size)) + } + + if ociLogPath != "" { + args = append(args, "--runtime-arg", "--log-format=json", "--runtime-arg", "--log", fmt.Sprintf("--runtime-arg=%s", ociLogPath)) + } + if logTag != "" { + args = append(args, "--log-tag", logTag) + } + if ctr.config.NoCgroups { + logrus.Debugf("Running with no Cgroups") + args = append(args, "--runtime-arg", "--cgroup-manager", "--runtime-arg", "disabled") + } + return args +} + +func startCommand(cmd *exec.Cmd, ctr *Container) error { + // Make sure to unset the NOTIFY_SOCKET and reset it afterwards if needed. + switch ctr.config.SdNotifyMode { + case define.SdNotifyModeContainer, define.SdNotifyModeIgnore: + if prev := os.Getenv("NOTIFY_SOCKET"); prev != "" { + if err := os.Unsetenv("NOTIFY_SOCKET"); err != nil { + logrus.Warnf("Error unsetting NOTIFY_SOCKET %v", err) + } + defer func() { + if err := os.Setenv("NOTIFY_SOCKET", prev); err != nil { + logrus.Errorf("Resetting NOTIFY_SOCKET=%s", prev) + } + }() + } + } + + return cmd.Start() +} + +// moveConmonToCgroupAndSignal gets a container's cgroupParent and moves the conmon process to that cgroup +// it then signals for conmon to start by sending nonce data down the start fd +func (r *ConmonOCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec.Cmd, startFd *os.File) error { + mustCreateCgroup := true + + if ctr.config.NoCgroups { + mustCreateCgroup = false + } + + // If cgroup creation is disabled - just signal. + switch ctr.config.CgroupsMode { + case "disabled", "no-conmon", cgroupSplit: + mustCreateCgroup = false + } + + // $INVOCATION_ID is set by systemd when running as a service. + if ctr.runtime.RemoteURI() == "" && os.Getenv("INVOCATION_ID") != "" { + mustCreateCgroup = false + } + + if mustCreateCgroup { + // Usually rootless users are not allowed to configure cgroupfs. + // There are cases though, where it is allowed, e.g. if the cgroup + // is manually configured and chowned). Avoid detecting all + // such cases and simply use a lower log level. + logLevel := logrus.WarnLevel + if rootless.IsRootless() { + logLevel = logrus.InfoLevel + } + // TODO: This should be a switch - we are not guaranteed that + // there are only 2 valid cgroup managers + cgroupParent := ctr.CgroupParent() + cgroupPath := filepath.Join(ctr.config.CgroupParent, "conmon") + Resource := ctr.Spec().Linux.Resources + cgroupResources, err := GetLimits(Resource) + if err != nil { + logrus.StandardLogger().Log(logLevel, "Could not get ctr resources") + } + if ctr.CgroupManager() == config.SystemdCgroupsManager { + unitName := createUnitName("libpod-conmon", ctr.ID()) + realCgroupParent := cgroupParent + splitParent := strings.Split(cgroupParent, "/") + if strings.HasSuffix(cgroupParent, ".slice") && len(splitParent) > 1 { + realCgroupParent = splitParent[len(splitParent)-1] + } + + logrus.Infof("Running conmon under slice %s and unitName %s", realCgroupParent, unitName) + if err := utils.RunUnderSystemdScope(cmd.Process.Pid, realCgroupParent, unitName); err != nil { + logrus.StandardLogger().Logf(logLevel, "Failed to add conmon to systemd sandbox cgroup: %v", err) + } + } else { + control, err := cgroups.New(cgroupPath, &cgroupResources) + if err != nil { + logrus.StandardLogger().Logf(logLevel, "Failed to add conmon to cgroupfs sandbox cgroup: %v", err) + } else if err := control.AddPid(cmd.Process.Pid); err != nil { + // we need to remove this defer and delete the cgroup once conmon exits + // maybe need a conmon monitor? + logrus.StandardLogger().Logf(logLevel, "Failed to add conmon to cgroupfs sandbox cgroup: %v", err) + } + } + } + + /* We set the cgroup, now the child can start creating children */ + if err := writeConmonPipeData(startFd); err != nil { + return err + } + return nil +} + +// newPipe creates a unix socket pair for communication. +// Returns two files - first is parent, second is child. +func newPipe() (*os.File, *os.File, error) { + fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_SEQPACKET|unix.SOCK_CLOEXEC, 0) + if err != nil { + return nil, nil, err + } + return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil +} + +// readConmonPidFile attempts to read conmon's pid from its pid file +func readConmonPidFile(pidFile string) (int, error) { + // Let's try reading the Conmon pid at the same time. + if pidFile != "" { + contents, err := ioutil.ReadFile(pidFile) + if err != nil { + return -1, err + } + // Convert it to an int + conmonPID, err := strconv.Atoi(string(contents)) + if err != nil { + return -1, err + } + return conmonPID, nil + } + return 0, nil +} + +// readConmonPipeData attempts to read a syncInfo struct from the pipe +func readConmonPipeData(runtimeName string, pipe *os.File, ociLog string) (int, error) { + // syncInfo is used to return data from monitor process to daemon + type syncInfo struct { + Data int `json:"data"` + Message string `json:"message,omitempty"` + } + + // Wait to get container pid from conmon + type syncStruct struct { + si *syncInfo + err error + } + ch := make(chan syncStruct) + go func() { + var si *syncInfo + rdr := bufio.NewReader(pipe) + b, err := rdr.ReadBytes('\n') + // ignore EOF here, error is returned even when data was read + // if it is no valid json unmarshal will fail below + if err != nil && !errors.Is(err, io.EOF) { + ch <- syncStruct{err: err} + } + if err := json.Unmarshal(b, &si); err != nil { + ch <- syncStruct{err: fmt.Errorf("conmon bytes %q: %w", string(b), err)} + return + } + ch <- syncStruct{si: si} + }() + + data := -1 //nolint: wastedassign + select { + case ss := <-ch: + if ss.err != nil { + if ociLog != "" { + ociLogData, err := ioutil.ReadFile(ociLog) + if err == nil { + var ociErr ociError + if err := json.Unmarshal(ociLogData, &ociErr); err == nil { + return -1, getOCIRuntimeError(runtimeName, ociErr.Msg) + } + } + } + return -1, fmt.Errorf("container create failed (no logs from conmon): %w", ss.err) + } + logrus.Debugf("Received: %d", ss.si.Data) + if ss.si.Data < 0 { + if ociLog != "" { + ociLogData, err := ioutil.ReadFile(ociLog) + if err == nil { + var ociErr ociError + if err := json.Unmarshal(ociLogData, &ociErr); err == nil { + return ss.si.Data, getOCIRuntimeError(runtimeName, ociErr.Msg) + } + } + } + // If we failed to parse the JSON errors, then print the output as it is + if ss.si.Message != "" { + return ss.si.Data, getOCIRuntimeError(runtimeName, ss.si.Message) + } + return ss.si.Data, fmt.Errorf("container create failed: %w", define.ErrInternal) + } + data = ss.si.Data + case <-time.After(define.ContainerCreateTimeout): + return -1, fmt.Errorf("container creation timeout: %w", define.ErrInternal) + } + return data, nil +} + +// writeConmonPipeData writes nonce data to a pipe +func writeConmonPipeData(pipe *os.File) error { + someData := []byte{0} + _, err := pipe.Write(someData) + return err +} + +// formatRuntimeOpts prepends opts passed to it with --runtime-opt for passing to conmon +func formatRuntimeOpts(opts ...string) []string { + args := make([]string, 0, len(opts)*2) + for _, o := range opts { + args = append(args, "--runtime-opt", o) + } + return args +} + +// getConmonVersion returns a string representation of the conmon version. +func (r *ConmonOCIRuntime) getConmonVersion() (string, error) { + output, err := utils.ExecCmd(r.conmonPath, "--version") + if err != nil { + return "", err + } + return strings.TrimSuffix(strings.Replace(output, "\n", ", ", 1), "\n"), nil +} + +// getOCIRuntimeVersion returns a string representation of the OCI runtime's +// version. +func (r *ConmonOCIRuntime) getOCIRuntimeVersion() (string, error) { + output, err := utils.ExecCmd(r.path, "--version") + if err != nil { + return "", err + } + return strings.TrimSuffix(output, "\n"), nil +} + +// Copy data from container to HTTP connection, for terminal attach. +// Container is the container's attach socket connection, http is a buffer for +// the HTTP connection. cid is the ID of the container the attach session is +// running for (used solely for error messages). +func httpAttachTerminalCopy(container *net.UnixConn, http *bufio.ReadWriter, cid string) error { + buf := make([]byte, bufferSize) + for { + numR, err := container.Read(buf) + logrus.Debugf("Read fd(%d) %d/%d bytes for container %s", int(buf[0]), numR, len(buf), cid) + + if numR > 0 { + switch buf[0] { + case AttachPipeStdout: + // Do nothing + default: + logrus.Errorf("Received unexpected attach type %+d, discarding %d bytes", buf[0], numR) + continue + } + + numW, err2 := http.Write(buf[1:numR]) + if err2 != nil { + if err != nil { + logrus.Errorf("Reading container %s STDOUT: %v", cid, err) + } + return err2 + } else if numW+1 != numR { + return io.ErrShortWrite + } + // We need to force the buffer to write immediately, so + // there isn't a delay on the terminal side. + if err2 := http.Flush(); err2 != nil { + if err != nil { + logrus.Errorf("Reading container %s STDOUT: %v", cid, err) + } + return err2 + } + } + if err != nil { + if err == io.EOF { + return nil + } + return err + } + } +} + +// Copy data from a container to an HTTP connection, for non-terminal attach. +// Appends a header to multiplex input. +func httpAttachNonTerminalCopy(container *net.UnixConn, http *bufio.ReadWriter, cid string, stdin, stdout, stderr bool) error { + buf := make([]byte, bufferSize) + for { + numR, err := container.Read(buf) + if numR > 0 { + var headerBuf []byte + + // Subtract 1 because we strip the first byte (used for + // multiplexing by Conmon). + headerLen := uint32(numR - 1) + // Practically speaking, we could make this buf[0] - 1, + // but we need to validate it anyway. + switch buf[0] { + case AttachPipeStdin: + headerBuf = makeHTTPAttachHeader(0, headerLen) + if !stdin { + continue + } + case AttachPipeStdout: + if !stdout { + continue + } + headerBuf = makeHTTPAttachHeader(1, headerLen) + case AttachPipeStderr: + if !stderr { + continue + } + headerBuf = makeHTTPAttachHeader(2, headerLen) + default: + logrus.Errorf("Received unexpected attach type %+d, discarding %d bytes", buf[0], numR) + continue + } + + numH, err2 := http.Write(headerBuf) + if err2 != nil { + if err != nil { + logrus.Errorf("Reading container %s standard streams: %v", cid, err) + } + + return err2 + } + // Hardcoding header length is pretty gross, but + // fast. Should be safe, as this is a fixed part + // of the protocol. + if numH != 8 { + if err != nil { + logrus.Errorf("Reading container %s standard streams: %v", cid, err) + } + + return io.ErrShortWrite + } + + numW, err2 := http.Write(buf[1:numR]) + if err2 != nil { + if err != nil { + logrus.Errorf("Reading container %s standard streams: %v", cid, err) + } + + return err2 + } else if numW+1 != numR { + if err != nil { + logrus.Errorf("Reading container %s standard streams: %v", cid, err) + } + + return io.ErrShortWrite + } + // We need to force the buffer to write immediately, so + // there isn't a delay on the terminal side. + if err2 := http.Flush(); err2 != nil { + if err != nil { + logrus.Errorf("Reading container %s STDOUT: %v", cid, err) + } + return err2 + } + } + if err != nil { + if err == io.EOF { + return nil + } + + return err + } + } +} + +// GetLimits converts spec resource limits to cgroup consumable limits +func GetLimits(resource *spec.LinuxResources) (runcconfig.Resources, error) { + if resource == nil { + resource = &spec.LinuxResources{} + } + final := &runcconfig.Resources{} + devs := []*devices.Rule{} + + // Devices + for _, entry := range resource.Devices { + if entry.Major == nil || entry.Minor == nil { + continue + } + runeType := 'a' + switch entry.Type { + case "b": + runeType = 'b' + case "c": + runeType = 'c' + } + + devs = append(devs, &devices.Rule{ + Type: devices.Type(runeType), + Major: *entry.Major, + Minor: *entry.Minor, + Permissions: devices.Permissions(entry.Access), + Allow: entry.Allow, + }) + } + final.Devices = devs + + // HugepageLimits + pageLimits := []*runcconfig.HugepageLimit{} + for _, entry := range resource.HugepageLimits { + pageLimits = append(pageLimits, &runcconfig.HugepageLimit{ + Pagesize: entry.Pagesize, + Limit: entry.Limit, + }) + } + final.HugetlbLimit = pageLimits + + // Networking + netPriorities := []*runcconfig.IfPrioMap{} + if resource.Network != nil { + for _, entry := range resource.Network.Priorities { + netPriorities = append(netPriorities, &runcconfig.IfPrioMap{ + Interface: entry.Name, + Priority: int64(entry.Priority), + }) + } + } + final.NetPrioIfpriomap = netPriorities + rdma := make(map[string]runcconfig.LinuxRdma) + for name, entry := range resource.Rdma { + rdma[name] = runcconfig.LinuxRdma{HcaHandles: entry.HcaHandles, HcaObjects: entry.HcaObjects} + } + final.Rdma = rdma + + // Memory + if resource.Memory != nil { + if resource.Memory.Limit != nil { + final.Memory = *resource.Memory.Limit + } + if resource.Memory.Reservation != nil { + final.MemoryReservation = *resource.Memory.Reservation + } + if resource.Memory.Swap != nil { + final.MemorySwap = *resource.Memory.Swap + } + if resource.Memory.Swappiness != nil { + final.MemorySwappiness = resource.Memory.Swappiness + } + } + + // CPU + if resource.CPU != nil { + if resource.CPU.Period != nil { + final.CpuPeriod = *resource.CPU.Period + } + if resource.CPU.Quota != nil { + final.CpuQuota = *resource.CPU.Quota + } + if resource.CPU.RealtimePeriod != nil { + final.CpuRtPeriod = *resource.CPU.RealtimePeriod + } + if resource.CPU.RealtimeRuntime != nil { + final.CpuRtRuntime = *resource.CPU.RealtimeRuntime + } + if resource.CPU.Shares != nil { + final.CpuShares = *resource.CPU.Shares + } + final.CpusetCpus = resource.CPU.Cpus + final.CpusetMems = resource.CPU.Mems + } + + // BlkIO + if resource.BlockIO != nil { + if len(resource.BlockIO.ThrottleReadBpsDevice) > 0 { + for _, entry := range resource.BlockIO.ThrottleReadBpsDevice { + throttle := &runcconfig.ThrottleDevice{} + dev := &runcconfig.BlockIODevice{ + Major: entry.Major, + Minor: entry.Minor, + } + throttle.BlockIODevice = *dev + throttle.Rate = entry.Rate + final.BlkioThrottleReadBpsDevice = append(final.BlkioThrottleReadBpsDevice, throttle) + } + } + if len(resource.BlockIO.ThrottleWriteBpsDevice) > 0 { + for _, entry := range resource.BlockIO.ThrottleWriteBpsDevice { + throttle := &runcconfig.ThrottleDevice{} + dev := &runcconfig.BlockIODevice{ + Major: entry.Major, + Minor: entry.Minor, + } + throttle.BlockIODevice = *dev + throttle.Rate = entry.Rate + final.BlkioThrottleWriteBpsDevice = append(final.BlkioThrottleWriteBpsDevice, throttle) + } + } + if len(resource.BlockIO.ThrottleReadIOPSDevice) > 0 { + for _, entry := range resource.BlockIO.ThrottleReadIOPSDevice { + throttle := &runcconfig.ThrottleDevice{} + dev := &runcconfig.BlockIODevice{ + Major: entry.Major, + Minor: entry.Minor, + } + throttle.BlockIODevice = *dev + throttle.Rate = entry.Rate + final.BlkioThrottleReadIOPSDevice = append(final.BlkioThrottleReadIOPSDevice, throttle) + } + } + if len(resource.BlockIO.ThrottleWriteIOPSDevice) > 0 { + for _, entry := range resource.BlockIO.ThrottleWriteIOPSDevice { + throttle := &runcconfig.ThrottleDevice{} + dev := &runcconfig.BlockIODevice{ + Major: entry.Major, + Minor: entry.Minor, + } + throttle.BlockIODevice = *dev + throttle.Rate = entry.Rate + final.BlkioThrottleWriteIOPSDevice = append(final.BlkioThrottleWriteIOPSDevice, throttle) + } + } + if resource.BlockIO.LeafWeight != nil { + final.BlkioLeafWeight = *resource.BlockIO.LeafWeight + } + if resource.BlockIO.Weight != nil { + final.BlkioWeight = *resource.BlockIO.Weight + } + if len(resource.BlockIO.WeightDevice) > 0 { + for _, entry := range resource.BlockIO.WeightDevice { + weight := &runcconfig.WeightDevice{} + dev := &runcconfig.BlockIODevice{ + Major: entry.Major, + Minor: entry.Minor, + } + if entry.Weight != nil { + weight.Weight = *entry.Weight + } + if entry.LeafWeight != nil { + weight.LeafWeight = *entry.LeafWeight + } + weight.BlockIODevice = *dev + final.BlkioWeightDevice = append(final.BlkioWeightDevice, weight) + } + } + } + + // Pids + if resource.Pids != nil { + final.PidsLimit = resource.Pids.Limit + } + + // Networking + if resource.Network != nil { + if resource.Network.ClassID != nil { + final.NetClsClassid = *resource.Network.ClassID + } + } + + // Unified state + final.Unified = resource.Unified + + return *final, nil +} diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go deleted file mode 100644 index 1b654ed33..000000000 --- a/libpod/oci_conmon_linux.go +++ /dev/null @@ -1,1929 +0,0 @@ -//go:build linux -// +build linux - -package libpod - -import ( - "bufio" - "bytes" - "context" - "errors" - "fmt" - "io" - "io/ioutil" - "net" - "net/http" - "os" - "os/exec" - "path/filepath" - "runtime" - "strconv" - "strings" - "sync" - "syscall" - "text/template" - "time" - - runcconfig "github.com/opencontainers/runc/libcontainer/configs" - "github.com/opencontainers/runc/libcontainer/devices" - - "github.com/containers/common/pkg/cgroups" - "github.com/containers/common/pkg/config" - "github.com/containers/common/pkg/resize" - cutil "github.com/containers/common/pkg/util" - conmonConfig "github.com/containers/conmon/runner/config" - "github.com/containers/podman/v4/libpod/define" - "github.com/containers/podman/v4/libpod/logs" - "github.com/containers/podman/v4/pkg/checkpoint/crutils" - "github.com/containers/podman/v4/pkg/errorhandling" - "github.com/containers/podman/v4/pkg/rootless" - "github.com/containers/podman/v4/pkg/specgenutil" - "github.com/containers/podman/v4/pkg/util" - "github.com/containers/podman/v4/utils" - "github.com/containers/storage/pkg/homedir" - pmount "github.com/containers/storage/pkg/mount" - spec "github.com/opencontainers/runtime-spec/specs-go" - "github.com/opencontainers/selinux/go-selinux/label" - "github.com/sirupsen/logrus" - "golang.org/x/sys/unix" -) - -const ( - // This is Conmon's STDIO_BUF_SIZE. I don't believe we have access to it - // directly from the Go code, so const it here - // Important: The conmon attach socket uses an extra byte at the beginning of each - // message to specify the STREAM so we have to increase the buffer size by one - bufferSize = conmonConfig.BufSize + 1 -) - -// ConmonOCIRuntime is an OCI runtime managed by Conmon. -// TODO: Make all calls to OCI runtime have a timeout. -type ConmonOCIRuntime struct { - name string - path string - conmonPath string - conmonEnv []string - tmpDir string - exitsDir string - logSizeMax int64 - noPivot bool - reservePorts bool - runtimeFlags []string - supportsJSON bool - supportsKVM bool - supportsNoCgroups bool - enableKeyring bool -} - -// Make a new Conmon-based OCI runtime with the given options. -// Conmon will wrap the given OCI runtime, which can be `runc`, `crun`, or -// any runtime with a runc-compatible CLI. -// The first path that points to a valid executable will be used. -// Deliberately private. Someone should not be able to construct this outside of -// libpod. -func newConmonOCIRuntime(name string, paths []string, conmonPath string, runtimeFlags []string, runtimeCfg *config.Config) (OCIRuntime, error) { - if name == "" { - return nil, fmt.Errorf("the OCI runtime must be provided a non-empty name: %w", define.ErrInvalidArg) - } - - // Make lookup tables for runtime support - supportsJSON := make(map[string]bool, len(runtimeCfg.Engine.RuntimeSupportsJSON)) - supportsNoCgroups := make(map[string]bool, len(runtimeCfg.Engine.RuntimeSupportsNoCgroups)) - supportsKVM := make(map[string]bool, len(runtimeCfg.Engine.RuntimeSupportsKVM)) - for _, r := range runtimeCfg.Engine.RuntimeSupportsJSON { - supportsJSON[r] = true - } - for _, r := range runtimeCfg.Engine.RuntimeSupportsNoCgroups { - supportsNoCgroups[r] = true - } - for _, r := range runtimeCfg.Engine.RuntimeSupportsKVM { - supportsKVM[r] = true - } - - runtime := new(ConmonOCIRuntime) - runtime.name = name - runtime.conmonPath = conmonPath - runtime.runtimeFlags = runtimeFlags - - runtime.conmonEnv = runtimeCfg.Engine.ConmonEnvVars - runtime.tmpDir = runtimeCfg.Engine.TmpDir - runtime.logSizeMax = runtimeCfg.Containers.LogSizeMax - runtime.noPivot = runtimeCfg.Engine.NoPivotRoot - runtime.reservePorts = runtimeCfg.Engine.EnablePortReservation - runtime.enableKeyring = runtimeCfg.Containers.EnableKeyring - - // TODO: probe OCI runtime for feature and enable automatically if - // available. - - base := filepath.Base(name) - runtime.supportsJSON = supportsJSON[base] - runtime.supportsNoCgroups = supportsNoCgroups[base] - runtime.supportsKVM = supportsKVM[base] - - foundPath := false - for _, path := range paths { - stat, err := os.Stat(path) - if err != nil { - if os.IsNotExist(err) { - continue - } - return nil, fmt.Errorf("cannot stat OCI runtime %s path: %w", name, err) - } - if !stat.Mode().IsRegular() { - continue - } - foundPath = true - logrus.Tracef("found runtime %q", path) - runtime.path = path - break - } - - // Search the $PATH as last fallback - if !foundPath { - if foundRuntime, err := exec.LookPath(name); err == nil { - foundPath = true - runtime.path = foundRuntime - logrus.Debugf("using runtime %q from $PATH: %q", name, foundRuntime) - } - } - - if !foundPath { - return nil, fmt.Errorf("no valid executable found for OCI runtime %s: %w", name, define.ErrInvalidArg) - } - - runtime.exitsDir = filepath.Join(runtime.tmpDir, "exits") - - // Create the exit files and attach sockets directories - if err := os.MkdirAll(runtime.exitsDir, 0750); err != nil { - // The directory is allowed to exist - if !os.IsExist(err) { - return nil, fmt.Errorf("error creating OCI runtime exit files directory: %w", err) - } - } - return runtime, nil -} - -// Name returns the name of the runtime being wrapped by Conmon. -func (r *ConmonOCIRuntime) Name() string { - return r.name -} - -// Path returns the path of the OCI runtime being wrapped by Conmon. -func (r *ConmonOCIRuntime) Path() string { - return r.path -} - -// hasCurrentUserMapped checks whether the current user is mapped inside the container user namespace -func hasCurrentUserMapped(ctr *Container) bool { - if len(ctr.config.IDMappings.UIDMap) == 0 && len(ctr.config.IDMappings.GIDMap) == 0 { - return true - } - uid := os.Geteuid() - for _, m := range ctr.config.IDMappings.UIDMap { - if uid >= m.HostID && uid < m.HostID+m.Size { - return true - } - } - return false -} - -// CreateContainer creates a container. -func (r *ConmonOCIRuntime) CreateContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) { - // always make the run dir accessible to the current user so that the PID files can be read without - // being in the rootless user namespace. - if err := makeAccessible(ctr.state.RunDir, 0, 0); err != nil { - return 0, err - } - if !hasCurrentUserMapped(ctr) { - for _, i := range []string{ctr.state.RunDir, ctr.runtime.config.Engine.TmpDir, ctr.config.StaticDir, ctr.state.Mountpoint, ctr.runtime.config.Engine.VolumePath} { - if err := makeAccessible(i, ctr.RootUID(), ctr.RootGID()); err != nil { - return 0, err - } - } - - // if we are running a non privileged container, be sure to umount some kernel paths so they are not - // bind mounted inside the container at all. - if !ctr.config.Privileged && !rootless.IsRootless() { - type result struct { - restoreDuration int64 - err error - } - ch := make(chan result) - go func() { - runtime.LockOSThread() - restoreDuration, err := func() (int64, error) { - fd, err := os.Open(fmt.Sprintf("/proc/%d/task/%d/ns/mnt", os.Getpid(), unix.Gettid())) - if err != nil { - return 0, err - } - defer errorhandling.CloseQuiet(fd) - - // create a new mountns on the current thread - if err = unix.Unshare(unix.CLONE_NEWNS); err != nil { - return 0, err - } - defer func() { - if err := unix.Setns(int(fd.Fd()), unix.CLONE_NEWNS); err != nil { - logrus.Errorf("Unable to clone new namespace: %q", err) - } - }() - - // don't spread our mounts around. We are setting only /sys to be slave - // so that the cleanup process is still able to umount the storage and the - // changes are propagated to the host. - err = unix.Mount("/sys", "/sys", "none", unix.MS_REC|unix.MS_SLAVE, "") - if err != nil { - return 0, fmt.Errorf("cannot make /sys slave: %w", err) - } - - mounts, err := pmount.GetMounts() - if err != nil { - return 0, err - } - for _, m := range mounts { - if !strings.HasPrefix(m.Mountpoint, "/sys/kernel") { - continue - } - err = unix.Unmount(m.Mountpoint, 0) - if err != nil && !os.IsNotExist(err) { - return 0, fmt.Errorf("cannot unmount %s: %w", m.Mountpoint, err) - } - } - return r.createOCIContainer(ctr, restoreOptions) - }() - ch <- result{ - restoreDuration: restoreDuration, - err: err, - } - }() - r := <-ch - return r.restoreDuration, r.err - } - } - return r.createOCIContainer(ctr, restoreOptions) -} - -// UpdateContainerStatus retrieves the current status of the container from the -// runtime. It updates the container's state but does not save it. -// If useRuntime is false, we will not directly hit runc to see the container's -// status, but will instead only check for the existence of the conmon exit file -// and update state to stopped if it exists. -func (r *ConmonOCIRuntime) UpdateContainerStatus(ctr *Container) error { - runtimeDir, err := util.GetRuntimeDir() - if err != nil { - return err - } - - // Store old state so we know if we were already stopped - oldState := ctr.state.State - - state := new(spec.State) - - cmd := exec.Command(r.path, "state", ctr.ID()) - cmd.Env = append(cmd.Env, fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)) - - outPipe, err := cmd.StdoutPipe() - if err != nil { - return fmt.Errorf("getting stdout pipe: %w", err) - } - errPipe, err := cmd.StderrPipe() - if err != nil { - return fmt.Errorf("getting stderr pipe: %w", err) - } - - if err := cmd.Start(); err != nil { - out, err2 := ioutil.ReadAll(errPipe) - if err2 != nil { - return fmt.Errorf("error getting container %s state: %w", ctr.ID(), err) - } - if strings.Contains(string(out), "does not exist") || strings.Contains(string(out), "No such file") { - if err := ctr.removeConmonFiles(); err != nil { - logrus.Debugf("unable to remove conmon files for container %s", ctr.ID()) - } - ctr.state.ExitCode = -1 - ctr.state.FinishedTime = time.Now() - ctr.state.State = define.ContainerStateExited - return ctr.runtime.state.AddContainerExitCode(ctr.ID(), ctr.state.ExitCode) - } - return fmt.Errorf("error getting container %s state. stderr/out: %s: %w", ctr.ID(), out, err) - } - defer func() { - _ = cmd.Wait() - }() - - if err := errPipe.Close(); err != nil { - return err - } - out, err := ioutil.ReadAll(outPipe) - if err != nil { - return fmt.Errorf("error reading stdout: %s: %w", ctr.ID(), err) - } - if err := json.NewDecoder(bytes.NewBuffer(out)).Decode(state); err != nil { - return fmt.Errorf("error decoding container status for container %s: %w", ctr.ID(), err) - } - ctr.state.PID = state.Pid - - switch state.Status { - case "created": - ctr.state.State = define.ContainerStateCreated - case "paused": - ctr.state.State = define.ContainerStatePaused - case "running": - ctr.state.State = define.ContainerStateRunning - case "stopped": - ctr.state.State = define.ContainerStateStopped - default: - return fmt.Errorf("unrecognized status returned by runtime for container %s: %s: %w", - ctr.ID(), state.Status, define.ErrInternal) - } - - // Only grab exit status if we were not already stopped - // If we were, it should already be in the database - if ctr.state.State == define.ContainerStateStopped && oldState != define.ContainerStateStopped { - if _, err := ctr.Wait(context.Background()); err != nil { - logrus.Errorf("Waiting for container %s to exit: %v", ctr.ID(), err) - } - return nil - } - - // Handle ContainerStateStopping - keep it unless the container - // transitioned to no longer running. - if oldState == define.ContainerStateStopping && (ctr.state.State == define.ContainerStatePaused || ctr.state.State == define.ContainerStateRunning) { - ctr.state.State = define.ContainerStateStopping - } - - return nil -} - -// StartContainer starts the given container. -// Sets time the container was started, but does not save it. -func (r *ConmonOCIRuntime) StartContainer(ctr *Container) error { - // TODO: streams should probably *not* be our STDIN/OUT/ERR - redirect to buffers? - runtimeDir, err := util.GetRuntimeDir() - if err != nil { - return err - } - env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)} - if path, ok := os.LookupEnv("PATH"); ok { - env = append(env, fmt.Sprintf("PATH=%s", path)) - } - if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "start", ctr.ID())...); err != nil { - return err - } - - ctr.state.StartedTime = time.Now() - - return nil -} - -// KillContainer sends the given signal to the given container. -// If all is set, send to all PIDs in the container. -// All is only supported if the container created cgroups. -func (r *ConmonOCIRuntime) KillContainer(ctr *Container, signal uint, all bool) error { - logrus.Debugf("Sending signal %d to container %s", signal, ctr.ID()) - runtimeDir, err := util.GetRuntimeDir() - if err != nil { - return err - } - env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)} - var args []string - args = append(args, r.runtimeFlags...) - if all { - args = append(args, "kill", "--all", ctr.ID(), fmt.Sprintf("%d", signal)) - } else { - args = append(args, "kill", ctr.ID(), fmt.Sprintf("%d", signal)) - } - if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, args...); err != nil { - // Update container state - there's a chance we failed because - // the container exited in the meantime. - if err2 := r.UpdateContainerStatus(ctr); err2 != nil { - logrus.Infof("Error updating status for container %s: %v", ctr.ID(), err2) - } - if ctr.ensureState(define.ContainerStateStopped, define.ContainerStateExited) { - return define.ErrCtrStateInvalid - } - return fmt.Errorf("error sending signal to container %s: %w", ctr.ID(), err) - } - - return nil -} - -// StopContainer stops a container, first using its given stop signal (or -// SIGTERM if no signal was specified), then using SIGKILL. -// Timeout is given in seconds. If timeout is 0, the container will be -// immediately kill with SIGKILL. -// Does not set finished time for container, assumes you will run updateStatus -// after to pull the exit code. -func (r *ConmonOCIRuntime) StopContainer(ctr *Container, timeout uint, all bool) error { - logrus.Debugf("Stopping container %s (PID %d)", ctr.ID(), ctr.state.PID) - - // Ping the container to see if it's alive - // If it's not, it's already stopped, return - err := unix.Kill(ctr.state.PID, 0) - if err == unix.ESRCH { - return nil - } - - stopSignal := ctr.config.StopSignal - if stopSignal == 0 { - stopSignal = uint(syscall.SIGTERM) - } - - if timeout > 0 { - if err := r.KillContainer(ctr, stopSignal, all); err != nil { - // Is the container gone? - // If so, it probably died between the first check and - // our sending the signal - // The container is stopped, so exit cleanly - err := unix.Kill(ctr.state.PID, 0) - if err == unix.ESRCH { - return nil - } - - return err - } - - if err := waitContainerStop(ctr, time.Duration(timeout)*time.Second); err != nil { - logrus.Debugf("Timed out stopping container %s with %s, resorting to SIGKILL: %v", ctr.ID(), unix.SignalName(syscall.Signal(stopSignal)), err) - logrus.Warnf("StopSignal %s failed to stop container %s in %d seconds, resorting to SIGKILL", unix.SignalName(syscall.Signal(stopSignal)), ctr.Name(), timeout) - } else { - // No error, the container is dead - return nil - } - } - - if err := r.KillContainer(ctr, 9, all); err != nil { - // Again, check if the container is gone. If it is, exit cleanly. - err := unix.Kill(ctr.state.PID, 0) - if err == unix.ESRCH { - return nil - } - - return fmt.Errorf("error sending SIGKILL to container %s: %w", ctr.ID(), err) - } - - // Give runtime a few seconds to make it happen - if err := waitContainerStop(ctr, killContainerTimeout); err != nil { - return err - } - - return nil -} - -// DeleteContainer deletes a container from the OCI runtime. -func (r *ConmonOCIRuntime) DeleteContainer(ctr *Container) error { - runtimeDir, err := util.GetRuntimeDir() - if err != nil { - return err - } - env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)} - return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "delete", "--force", ctr.ID())...) -} - -// PauseContainer pauses the given container. -func (r *ConmonOCIRuntime) PauseContainer(ctr *Container) error { - runtimeDir, err := util.GetRuntimeDir() - if err != nil { - return err - } - env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)} - return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "pause", ctr.ID())...) -} - -// UnpauseContainer unpauses the given container. -func (r *ConmonOCIRuntime) UnpauseContainer(ctr *Container) error { - runtimeDir, err := util.GetRuntimeDir() - if err != nil { - return err - } - env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)} - return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "resume", ctr.ID())...) -} - -// HTTPAttach performs an attach for the HTTP API. -// The caller must handle closing the HTTP connection after this returns. -// The cancel channel is not closed; it is up to the caller to do so after -// this function returns. -// If this is a container with a terminal, we will stream raw. If it is not, we -// will stream with an 8-byte header to multiplex STDOUT and STDERR. -// Returns any errors that occurred, and whether the connection was successfully -// hijacked before that error occurred. -func (r *ConmonOCIRuntime) HTTPAttach(ctr *Container, req *http.Request, w http.ResponseWriter, streams *HTTPAttachStreams, detachKeys *string, cancel <-chan bool, hijackDone chan<- bool, streamAttach, streamLogs bool) (deferredErr error) { - isTerminal := false - if ctr.config.Spec.Process != nil { - isTerminal = ctr.config.Spec.Process.Terminal - } - - if streams != nil { - if !streams.Stdin && !streams.Stdout && !streams.Stderr { - return fmt.Errorf("must specify at least one stream to attach to: %w", define.ErrInvalidArg) - } - } - - attachSock, err := r.AttachSocketPath(ctr) - if err != nil { - return err - } - - var conn *net.UnixConn - if streamAttach { - newConn, err := openUnixSocket(attachSock) - if err != nil { - return fmt.Errorf("failed to connect to container's attach socket: %v: %w", attachSock, err) - } - conn = newConn - defer func() { - if err := conn.Close(); err != nil { - logrus.Errorf("Unable to close container %s attach socket: %q", ctr.ID(), err) - } - }() - - logrus.Debugf("Successfully connected to container %s attach socket %s", ctr.ID(), attachSock) - } - - detachString := ctr.runtime.config.Engine.DetachKeys - if detachKeys != nil { - detachString = *detachKeys - } - detach, err := processDetachKeys(detachString) - if err != nil { - return err - } - - attachStdout := true - attachStderr := true - attachStdin := true - if streams != nil { - attachStdout = streams.Stdout - attachStderr = streams.Stderr - attachStdin = streams.Stdin - } - - logrus.Debugf("Going to hijack container %s attach connection", ctr.ID()) - - // Alright, let's hijack. - hijacker, ok := w.(http.Hijacker) - if !ok { - return fmt.Errorf("unable to hijack connection") - } - - httpCon, httpBuf, err := hijacker.Hijack() - if err != nil { - return fmt.Errorf("error hijacking connection: %w", err) - } - - hijackDone <- true - - writeHijackHeader(req, httpBuf) - - // Force a flush after the header is written. - if err := httpBuf.Flush(); err != nil { - return fmt.Errorf("error flushing HTTP hijack header: %w", err) - } - - defer func() { - hijackWriteErrorAndClose(deferredErr, ctr.ID(), isTerminal, httpCon, httpBuf) - }() - - logrus.Debugf("Hijack for container %s attach session done, ready to stream", ctr.ID()) - - // TODO: This is gross. Really, really gross. - // I want to say we should read all the logs into an array before - // calling this, in container_api.go, but that could take a lot of - // memory... - // On the whole, we need to figure out a better way of doing this, - // though. - logSize := 0 - if streamLogs { - logrus.Debugf("Will stream logs for container %s attach session", ctr.ID()) - - // Get all logs for the container - logChan := make(chan *logs.LogLine) - logOpts := new(logs.LogOptions) - logOpts.Tail = -1 - logOpts.WaitGroup = new(sync.WaitGroup) - errChan := make(chan error) - go func() { - var err error - // In non-terminal mode we need to prepend with the - // stream header. - logrus.Debugf("Writing logs for container %s to HTTP attach", ctr.ID()) - for logLine := range logChan { - if !isTerminal { - device := logLine.Device - var header []byte - headerLen := uint32(len(logLine.Msg)) - logSize += len(logLine.Msg) - switch strings.ToLower(device) { - case "stdin": - header = makeHTTPAttachHeader(0, headerLen) - case "stdout": - header = makeHTTPAttachHeader(1, headerLen) - case "stderr": - header = makeHTTPAttachHeader(2, headerLen) - default: - logrus.Errorf("Unknown device for log line: %s", device) - header = makeHTTPAttachHeader(1, headerLen) - } - _, err = httpBuf.Write(header) - if err != nil { - break - } - } - _, err = httpBuf.Write([]byte(logLine.Msg)) - if err != nil { - break - } - if !logLine.Partial() { - _, err = httpBuf.Write([]byte("\n")) - if err != nil { - break - } - } - err = httpBuf.Flush() - if err != nil { - break - } - } - errChan <- err - }() - if err := ctr.ReadLog(context.Background(), logOpts, logChan, 0); err != nil { - return err - } - go func() { - logOpts.WaitGroup.Wait() - close(logChan) - }() - logrus.Debugf("Done reading logs for container %s, %d bytes", ctr.ID(), logSize) - if err := <-errChan; err != nil { - return err - } - } - if !streamAttach { - logrus.Debugf("Done streaming logs for container %s attach, exiting as attach streaming not requested", ctr.ID()) - return nil - } - - logrus.Debugf("Forwarding attach output for container %s", ctr.ID()) - - stdoutChan := make(chan error) - stdinChan := make(chan error) - - // Handle STDOUT/STDERR - go func() { - var err error - if isTerminal { - // Hack: return immediately if attachStdout not set to - // emulate Docker. - // Basically, when terminal is set, STDERR goes nowhere. - // Everything does over STDOUT. - // Therefore, if not attaching STDOUT - we'll never copy - // anything from here. - logrus.Debugf("Performing terminal HTTP attach for container %s", ctr.ID()) - if attachStdout { - err = httpAttachTerminalCopy(conn, httpBuf, ctr.ID()) - } - } else { - logrus.Debugf("Performing non-terminal HTTP attach for container %s", ctr.ID()) - err = httpAttachNonTerminalCopy(conn, httpBuf, ctr.ID(), attachStdin, attachStdout, attachStderr) - } - stdoutChan <- err - logrus.Debugf("STDOUT/ERR copy completed") - }() - // Next, STDIN. Avoid entirely if attachStdin unset. - if attachStdin { - go func() { - _, err := cutil.CopyDetachable(conn, httpBuf, detach) - logrus.Debugf("STDIN copy completed") - stdinChan <- err - }() - } - - for { - select { - case err := <-stdoutChan: - if err != nil { - return err - } - - return nil - case err := <-stdinChan: - if err != nil { - return err - } - // copy stdin is done, close it - if connErr := conn.CloseWrite(); connErr != nil { - logrus.Errorf("Unable to close conn: %v", connErr) - } - case <-cancel: - return nil - } - } -} - -// isRetryable returns whether the error was caused by a blocked syscall or the -// specified operation on a non blocking file descriptor wasn't ready for completion. -func isRetryable(err error) bool { - var errno syscall.Errno - if errors.As(err, &errno) { - return errno == syscall.EINTR || errno == syscall.EAGAIN - } - return false -} - -// openControlFile opens the terminal control file. -func openControlFile(ctr *Container, parentDir string) (*os.File, error) { - controlPath := filepath.Join(parentDir, "ctl") - for i := 0; i < 600; i++ { - controlFile, err := os.OpenFile(controlPath, unix.O_WRONLY|unix.O_NONBLOCK, 0) - if err == nil { - return controlFile, nil - } - if !isRetryable(err) { - return nil, fmt.Errorf("could not open ctl file for terminal resize for container %s: %w", ctr.ID(), err) - } - time.Sleep(time.Second / 10) - } - return nil, fmt.Errorf("timeout waiting for %q", controlPath) -} - -// AttachResize resizes the terminal used by the given container. -func (r *ConmonOCIRuntime) AttachResize(ctr *Container, newSize resize.TerminalSize) error { - controlFile, err := openControlFile(ctr, ctr.bundlePath()) - if err != nil { - return err - } - defer controlFile.Close() - - logrus.Debugf("Received a resize event for container %s: %+v", ctr.ID(), newSize) - if _, err = fmt.Fprintf(controlFile, "%d %d %d\n", 1, newSize.Height, newSize.Width); err != nil { - return fmt.Errorf("failed to write to ctl file to resize terminal: %w", err) - } - - return nil -} - -// CheckpointContainer checkpoints the given container. -func (r *ConmonOCIRuntime) CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) (int64, error) { - // imagePath is used by CRIU to store the actual checkpoint files - imagePath := ctr.CheckpointPath() - if options.PreCheckPoint { - imagePath = ctr.PreCheckPointPath() - } - // workPath will be used to store dump.log and stats-dump - workPath := ctr.bundlePath() - logrus.Debugf("Writing checkpoint to %s", imagePath) - logrus.Debugf("Writing checkpoint logs to %s", workPath) - logrus.Debugf("Pre-dump the container %t", options.PreCheckPoint) - args := []string{} - args = append(args, r.runtimeFlags...) - args = append(args, "checkpoint") - args = append(args, "--image-path") - args = append(args, imagePath) - args = append(args, "--work-path") - args = append(args, workPath) - if options.KeepRunning { - args = append(args, "--leave-running") - } - if options.TCPEstablished { - args = append(args, "--tcp-established") - } - if options.FileLocks { - args = append(args, "--file-locks") - } - if !options.PreCheckPoint && options.KeepRunning { - args = append(args, "--leave-running") - } - if options.PreCheckPoint { - args = append(args, "--pre-dump") - } - if !options.PreCheckPoint && options.WithPrevious { - args = append( - args, - "--parent-path", - filepath.Join("..", preCheckpointDir), - ) - } - - args = append(args, ctr.ID()) - logrus.Debugf("the args to checkpoint: %s %s", r.path, strings.Join(args, " ")) - - runtimeDir, err := util.GetRuntimeDir() - if err != nil { - return 0, err - } - env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)} - if path, ok := os.LookupEnv("PATH"); ok { - env = append(env, fmt.Sprintf("PATH=%s", path)) - } - - runtime.LockOSThread() - if err := label.SetSocketLabel(ctr.ProcessLabel()); err != nil { - return 0, err - } - - runtimeCheckpointStarted := time.Now() - err = utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, args...) - // Ignore error returned from SetSocketLabel("") call, - // can't recover. - if labelErr := label.SetSocketLabel(""); labelErr == nil { - // Unlock the thread only if the process label could be restored - // successfully. Otherwise leave the thread locked and the Go runtime - // will terminate it once it returns to the threads pool. - runtime.UnlockOSThread() - } else { - logrus.Errorf("Unable to reset socket label: %q", labelErr) - } - - runtimeCheckpointDuration := func() int64 { - if options.PrintStats { - return time.Since(runtimeCheckpointStarted).Microseconds() - } - return 0 - }() - - return runtimeCheckpointDuration, err -} - -func (r *ConmonOCIRuntime) CheckConmonRunning(ctr *Container) (bool, error) { - if ctr.state.ConmonPID == 0 { - // If the container is running or paused, assume Conmon is - // running. We didn't record Conmon PID on some old versions, so - // that is likely what's going on... - // Unusual enough that we should print a warning message though. - if ctr.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) { - logrus.Warnf("Conmon PID is not set, but container is running!") - return true, nil - } - // Container's not running, so conmon PID being unset is - // expected. Conmon is not running. - return false, nil - } - - // We have a conmon PID. Ping it with signal 0. - if err := unix.Kill(ctr.state.ConmonPID, 0); err != nil { - if err == unix.ESRCH { - return false, nil - } - return false, fmt.Errorf("error pinging container %s conmon with signal 0: %w", ctr.ID(), err) - } - return true, nil -} - -// SupportsCheckpoint checks if the OCI runtime supports checkpointing -// containers. -func (r *ConmonOCIRuntime) SupportsCheckpoint() bool { - return crutils.CRRuntimeSupportsCheckpointRestore(r.path) -} - -// SupportsJSONErrors checks if the OCI runtime supports JSON-formatted error -// messages. -func (r *ConmonOCIRuntime) SupportsJSONErrors() bool { - return r.supportsJSON -} - -// SupportsNoCgroups checks if the OCI runtime supports running containers -// without cgroups (the --cgroup-manager=disabled flag). -func (r *ConmonOCIRuntime) SupportsNoCgroups() bool { - return r.supportsNoCgroups -} - -// SupportsKVM checks if the OCI runtime supports running containers -// without KVM separation -func (r *ConmonOCIRuntime) SupportsKVM() bool { - return r.supportsKVM -} - -// AttachSocketPath is the path to a single container's attach socket. -func (r *ConmonOCIRuntime) AttachSocketPath(ctr *Container) (string, error) { - if ctr == nil { - return "", fmt.Errorf("must provide a valid container to get attach socket path: %w", define.ErrInvalidArg) - } - - return filepath.Join(ctr.bundlePath(), "attach"), nil -} - -// ExitFilePath is the path to a container's exit file. -func (r *ConmonOCIRuntime) ExitFilePath(ctr *Container) (string, error) { - if ctr == nil { - return "", fmt.Errorf("must provide a valid container to get exit file path: %w", define.ErrInvalidArg) - } - return filepath.Join(r.exitsDir, ctr.ID()), nil -} - -// RuntimeInfo provides information on the runtime. -func (r *ConmonOCIRuntime) RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntimeInfo, error) { - runtimePackage := packageVersion(r.path) - conmonPackage := packageVersion(r.conmonPath) - runtimeVersion, err := r.getOCIRuntimeVersion() - if err != nil { - return nil, nil, fmt.Errorf("error getting version of OCI runtime %s: %w", r.name, err) - } - conmonVersion, err := r.getConmonVersion() - if err != nil { - return nil, nil, fmt.Errorf("error getting conmon version: %w", err) - } - - conmon := define.ConmonInfo{ - Package: conmonPackage, - Path: r.conmonPath, - Version: conmonVersion, - } - ocirt := define.OCIRuntimeInfo{ - Name: r.name, - Path: r.path, - Package: runtimePackage, - Version: runtimeVersion, - } - return &conmon, &ocirt, nil -} - -// makeAccessible changes the path permission and each parent directory to have --x--x--x -func makeAccessible(path string, uid, gid int) error { - for ; path != "/"; path = filepath.Dir(path) { - st, err := os.Stat(path) - if err != nil { - if os.IsNotExist(err) { - return nil - } - return err - } - if int(st.Sys().(*syscall.Stat_t).Uid) == uid && int(st.Sys().(*syscall.Stat_t).Gid) == gid { - continue - } - if st.Mode()&0111 != 0111 { - if err := os.Chmod(path, st.Mode()|0111); err != nil { - return err - } - } - } - return nil -} - -// Wait for a container which has been sent a signal to stop -func waitContainerStop(ctr *Container, timeout time.Duration) error { - return waitPidStop(ctr.state.PID, timeout) -} - -// Wait for a given PID to stop -func waitPidStop(pid int, timeout time.Duration) error { - done := make(chan struct{}) - chControl := make(chan struct{}) - go func() { - for { - select { - case <-chControl: - return - default: - if err := unix.Kill(pid, 0); err != nil { - if err == unix.ESRCH { - close(done) - return - } - logrus.Errorf("Pinging PID %d with signal 0: %v", pid, err) - } - time.Sleep(100 * time.Millisecond) - } - } - }() - select { - case <-done: - return nil - case <-time.After(timeout): - close(chControl) - return fmt.Errorf("given PIDs did not die within timeout") - } -} - -func (r *ConmonOCIRuntime) getLogTag(ctr *Container) (string, error) { - logTag := ctr.LogTag() - if logTag == "" { - return "", nil - } - data, err := ctr.inspectLocked(false) - if err != nil { - // FIXME: this error should probably be returned - return "", nil //nolint: nilerr - } - tmpl, err := template.New("container").Parse(logTag) - if err != nil { - return "", fmt.Errorf("template parsing error %s: %w", logTag, err) - } - var b bytes.Buffer - err = tmpl.Execute(&b, data) - if err != nil { - return "", err - } - return b.String(), nil -} - -// createOCIContainer generates this container's main conmon instance and prepares it for starting -func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) { - var stderrBuf bytes.Buffer - - runtimeDir, err := util.GetRuntimeDir() - if err != nil { - return 0, err - } - - parentSyncPipe, childSyncPipe, err := newPipe() - if err != nil { - return 0, fmt.Errorf("error creating socket pair: %w", err) - } - defer errorhandling.CloseQuiet(parentSyncPipe) - - childStartPipe, parentStartPipe, err := newPipe() - if err != nil { - return 0, fmt.Errorf("error creating socket pair for start pipe: %w", err) - } - - defer errorhandling.CloseQuiet(parentStartPipe) - - var ociLog string - if logrus.GetLevel() != logrus.DebugLevel && r.supportsJSON { - ociLog = filepath.Join(ctr.state.RunDir, "oci-log") - } - - logTag, err := r.getLogTag(ctr) - if err != nil { - return 0, err - } - - if ctr.config.CgroupsMode == cgroupSplit { - if err := utils.MoveUnderCgroupSubtree("runtime"); err != nil { - return 0, err - } - } - - pidfile := ctr.config.PidFile - if pidfile == "" { - pidfile = filepath.Join(ctr.state.RunDir, "pidfile") - } - - args := r.sharedConmonArgs(ctr, ctr.ID(), ctr.bundlePath(), pidfile, ctr.LogPath(), r.exitsDir, ociLog, ctr.LogDriver(), logTag) - - if ctr.config.SdNotifyMode == define.SdNotifyModeContainer && ctr.config.SdNotifySocket != "" { - args = append(args, fmt.Sprintf("--sdnotify-socket=%s", ctr.config.SdNotifySocket)) - } - - if ctr.config.Spec.Process.Terminal { - args = append(args, "-t") - } else if ctr.config.Stdin { - args = append(args, "-i") - } - - if ctr.config.Timeout > 0 { - args = append(args, fmt.Sprintf("--timeout=%d", ctr.config.Timeout)) - } - - if !r.enableKeyring { - args = append(args, "--no-new-keyring") - } - if ctr.config.ConmonPidFile != "" { - args = append(args, "--conmon-pidfile", ctr.config.ConmonPidFile) - } - - if r.noPivot { - args = append(args, "--no-pivot") - } - - exitCommand, err := specgenutil.CreateExitCommandArgs(ctr.runtime.storageConfig, ctr.runtime.config, logrus.IsLevelEnabled(logrus.DebugLevel), ctr.AutoRemove(), false) - if err != nil { - return 0, err - } - exitCommand = append(exitCommand, ctr.config.ID) - - args = append(args, "--exit-command", exitCommand[0]) - for _, arg := range exitCommand[1:] { - args = append(args, []string{"--exit-command-arg", arg}...) - } - - // Pass down the LISTEN_* environment (see #10443). - preserveFDs := ctr.config.PreserveFDs - if val := os.Getenv("LISTEN_FDS"); val != "" { - if ctr.config.PreserveFDs > 0 { - logrus.Warnf("Ignoring LISTEN_FDS to preserve custom user-specified FDs") - } else { - fds, err := strconv.Atoi(val) - if err != nil { - return 0, fmt.Errorf("converting LISTEN_FDS=%s: %w", val, err) - } - preserveFDs = uint(fds) - } - } - - if preserveFDs > 0 { - args = append(args, formatRuntimeOpts("--preserve-fds", fmt.Sprintf("%d", preserveFDs))...) - } - - if restoreOptions != nil { - args = append(args, "--restore", ctr.CheckpointPath()) - if restoreOptions.TCPEstablished { - args = append(args, "--runtime-opt", "--tcp-established") - } - if restoreOptions.FileLocks { - args = append(args, "--runtime-opt", "--file-locks") - } - if restoreOptions.Pod != "" { - mountLabel := ctr.config.MountLabel - processLabel := ctr.config.ProcessLabel - if mountLabel != "" { - args = append( - args, - "--runtime-opt", - fmt.Sprintf( - "--lsm-mount-context=%s", - mountLabel, - ), - ) - } - if processLabel != "" { - args = append( - args, - "--runtime-opt", - fmt.Sprintf( - "--lsm-profile=selinux:%s", - processLabel, - ), - ) - } - } - } - - logrus.WithFields(logrus.Fields{ - "args": args, - }).Debugf("running conmon: %s", r.conmonPath) - - cmd := exec.Command(r.conmonPath, args...) - cmd.SysProcAttr = &syscall.SysProcAttr{ - Setpgid: true, - } - // TODO this is probably a really bad idea for some uses - // Make this configurable - cmd.Stdin = os.Stdin - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - if ctr.config.Spec.Process.Terminal { - cmd.Stderr = &stderrBuf - } - - // 0, 1 and 2 are stdin, stdout and stderr - conmonEnv := r.configureConmonEnv(runtimeDir) - - var filesToClose []*os.File - if preserveFDs > 0 { - for fd := 3; fd < int(3+preserveFDs); fd++ { - f := os.NewFile(uintptr(fd), fmt.Sprintf("fd-%d", fd)) - filesToClose = append(filesToClose, f) - cmd.ExtraFiles = append(cmd.ExtraFiles, f) - } - } - - cmd.Env = r.conmonEnv - // we don't want to step on users fds they asked to preserve - // Since 0-2 are used for stdio, start the fds we pass in at preserveFDs+3 - cmd.Env = append(cmd.Env, fmt.Sprintf("_OCI_SYNCPIPE=%d", preserveFDs+3), fmt.Sprintf("_OCI_STARTPIPE=%d", preserveFDs+4)) - cmd.Env = append(cmd.Env, conmonEnv...) - cmd.ExtraFiles = append(cmd.ExtraFiles, childSyncPipe, childStartPipe) - - if r.reservePorts && !rootless.IsRootless() && !ctr.config.NetMode.IsSlirp4netns() { - ports, err := bindPorts(ctr.convertPortMappings()) - if err != nil { - return 0, err - } - filesToClose = append(filesToClose, ports...) - - // Leak the port we bound in the conmon process. These fd's won't be used - // by the container and conmon will keep the ports busy so that another - // process cannot use them. - cmd.ExtraFiles = append(cmd.ExtraFiles, ports...) - } - - if ctr.config.NetMode.IsSlirp4netns() || rootless.IsRootless() { - if ctr.config.PostConfigureNetNS { - havePortMapping := len(ctr.config.PortMappings) > 0 - if havePortMapping { - ctr.rootlessPortSyncR, ctr.rootlessPortSyncW, err = os.Pipe() - if err != nil { - return 0, fmt.Errorf("failed to create rootless port sync pipe: %w", err) - } - } - ctr.rootlessSlirpSyncR, ctr.rootlessSlirpSyncW, err = os.Pipe() - if err != nil { - return 0, fmt.Errorf("failed to create rootless network sync pipe: %w", err) - } - } else { - if ctr.rootlessSlirpSyncR != nil { - defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncR) - } - if ctr.rootlessSlirpSyncW != nil { - defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncW) - } - } - // Leak one end in conmon, the other one will be leaked into slirp4netns - cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessSlirpSyncW) - - if ctr.rootlessPortSyncW != nil { - defer errorhandling.CloseQuiet(ctr.rootlessPortSyncW) - // Leak one end in conmon, the other one will be leaked into rootlessport - cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessPortSyncW) - } - } - var runtimeRestoreStarted time.Time - if restoreOptions != nil { - runtimeRestoreStarted = time.Now() - } - err = startCommand(cmd, ctr) - - // regardless of whether we errored or not, we no longer need the children pipes - childSyncPipe.Close() - childStartPipe.Close() - if err != nil { - return 0, err - } - if err := r.moveConmonToCgroupAndSignal(ctr, cmd, parentStartPipe); err != nil { - return 0, err - } - /* Wait for initial setup and fork, and reap child */ - err = cmd.Wait() - if err != nil { - return 0, err - } - - pid, err := readConmonPipeData(r.name, parentSyncPipe, ociLog) - if err != nil { - if err2 := r.DeleteContainer(ctr); err2 != nil { - logrus.Errorf("Removing container %s from runtime after creation failed", ctr.ID()) - } - return 0, err - } - ctr.state.PID = pid - - conmonPID, err := readConmonPidFile(ctr.config.ConmonPidFile) - if err != nil { - logrus.Warnf("Error reading conmon pid file for container %s: %v", ctr.ID(), err) - } else if conmonPID > 0 { - // conmon not having a pid file is a valid state, so don't set it if we don't have it - logrus.Infof("Got Conmon PID as %d", conmonPID) - ctr.state.ConmonPID = conmonPID - } - - runtimeRestoreDuration := func() int64 { - if restoreOptions != nil && restoreOptions.PrintStats { - return time.Since(runtimeRestoreStarted).Microseconds() - } - return 0 - }() - - // These fds were passed down to the runtime. Close them - // and not interfere - for _, f := range filesToClose { - errorhandling.CloseQuiet(f) - } - - return runtimeRestoreDuration, nil -} - -// configureConmonEnv gets the environment values to add to conmon's exec struct -// TODO this may want to be less hardcoded/more configurable in the future -func (r *ConmonOCIRuntime) configureConmonEnv(runtimeDir string) []string { - var env []string - for _, e := range os.Environ() { - if strings.HasPrefix(e, "LC_") { - env = append(env, e) - } - } - conf, ok := os.LookupEnv("CONTAINERS_CONF") - if ok { - env = append(env, fmt.Sprintf("CONTAINERS_CONF=%s", conf)) - } - env = append(env, fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)) - env = append(env, fmt.Sprintf("_CONTAINERS_USERNS_CONFIGURED=%s", os.Getenv("_CONTAINERS_USERNS_CONFIGURED"))) - env = append(env, fmt.Sprintf("_CONTAINERS_ROOTLESS_UID=%s", os.Getenv("_CONTAINERS_ROOTLESS_UID"))) - home := homedir.Get() - if home != "" { - env = append(env, fmt.Sprintf("HOME=%s", home)) - } - - return env -} - -// sharedConmonArgs takes common arguments for exec and create/restore and formats them for the conmon CLI -func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, exitDir, ociLogPath, logDriver, logTag string) []string { - // set the conmon API version to be able to use the correct sync struct keys - args := []string{ - "--api-version", "1", - "-c", ctr.ID(), - "-u", cuuid, - "-r", r.path, - "-b", bundlePath, - "-p", pidPath, - "-n", ctr.Name(), - "--exit-dir", exitDir, - "--full-attach", - } - if len(r.runtimeFlags) > 0 { - rFlags := []string{} - for _, arg := range r.runtimeFlags { - rFlags = append(rFlags, "--runtime-arg", arg) - } - args = append(args, rFlags...) - } - - if ctr.CgroupManager() == config.SystemdCgroupsManager && !ctr.config.NoCgroups && ctr.config.CgroupsMode != cgroupSplit { - args = append(args, "-s") - } - - var logDriverArg string - switch logDriver { - case define.JournaldLogging: - logDriverArg = define.JournaldLogging - case define.NoLogging: - logDriverArg = define.NoLogging - case define.PassthroughLogging: - logDriverArg = define.PassthroughLogging - //lint:ignore ST1015 the default case has to be here - default: //nolint:stylecheck,gocritic - // No case here should happen except JSONLogging, but keep this here in case the options are extended - logrus.Errorf("%s logging specified but not supported. Choosing k8s-file logging instead", ctr.LogDriver()) - fallthrough - case "": - // to get here, either a user would specify `--log-driver ""`, or this came from another place in libpod - // since the former case is obscure, and the latter case isn't an error, let's silently fallthrough - fallthrough - case define.JSONLogging: - fallthrough - case define.KubernetesLogging: - logDriverArg = fmt.Sprintf("%s:%s", define.KubernetesLogging, logPath) - } - - args = append(args, "-l", logDriverArg) - logLevel := logrus.GetLevel() - args = append(args, "--log-level", logLevel.String()) - - if logLevel == logrus.DebugLevel { - logrus.Debugf("%s messages will be logged to syslog", r.conmonPath) - args = append(args, "--syslog") - } - - size := r.logSizeMax - if ctr.config.LogSize > 0 { - size = ctr.config.LogSize - } - if size > 0 { - args = append(args, "--log-size-max", fmt.Sprintf("%v", size)) - } - - if ociLogPath != "" { - args = append(args, "--runtime-arg", "--log-format=json", "--runtime-arg", "--log", fmt.Sprintf("--runtime-arg=%s", ociLogPath)) - } - if logTag != "" { - args = append(args, "--log-tag", logTag) - } - if ctr.config.NoCgroups { - logrus.Debugf("Running with no Cgroups") - args = append(args, "--runtime-arg", "--cgroup-manager", "--runtime-arg", "disabled") - } - return args -} - -func startCommand(cmd *exec.Cmd, ctr *Container) error { - // Make sure to unset the NOTIFY_SOCKET and reset it afterwards if needed. - switch ctr.config.SdNotifyMode { - case define.SdNotifyModeContainer, define.SdNotifyModeIgnore: - if prev := os.Getenv("NOTIFY_SOCKET"); prev != "" { - if err := os.Unsetenv("NOTIFY_SOCKET"); err != nil { - logrus.Warnf("Error unsetting NOTIFY_SOCKET %v", err) - } - defer func() { - if err := os.Setenv("NOTIFY_SOCKET", prev); err != nil { - logrus.Errorf("Resetting NOTIFY_SOCKET=%s", prev) - } - }() - } - } - - return cmd.Start() -} - -// moveConmonToCgroupAndSignal gets a container's cgroupParent and moves the conmon process to that cgroup -// it then signals for conmon to start by sending nonce data down the start fd -func (r *ConmonOCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec.Cmd, startFd *os.File) error { - mustCreateCgroup := true - - if ctr.config.NoCgroups { - mustCreateCgroup = false - } - - // If cgroup creation is disabled - just signal. - switch ctr.config.CgroupsMode { - case "disabled", "no-conmon", cgroupSplit: - mustCreateCgroup = false - } - - // $INVOCATION_ID is set by systemd when running as a service. - if ctr.runtime.RemoteURI() == "" && os.Getenv("INVOCATION_ID") != "" { - mustCreateCgroup = false - } - - if mustCreateCgroup { - // Usually rootless users are not allowed to configure cgroupfs. - // There are cases though, where it is allowed, e.g. if the cgroup - // is manually configured and chowned). Avoid detecting all - // such cases and simply use a lower log level. - logLevel := logrus.WarnLevel - if rootless.IsRootless() { - logLevel = logrus.InfoLevel - } - // TODO: This should be a switch - we are not guaranteed that - // there are only 2 valid cgroup managers - cgroupParent := ctr.CgroupParent() - cgroupPath := filepath.Join(ctr.config.CgroupParent, "conmon") - Resource := ctr.Spec().Linux.Resources - cgroupResources, err := GetLimits(Resource) - if err != nil { - logrus.StandardLogger().Log(logLevel, "Could not get ctr resources") - } - if ctr.CgroupManager() == config.SystemdCgroupsManager { - unitName := createUnitName("libpod-conmon", ctr.ID()) - realCgroupParent := cgroupParent - splitParent := strings.Split(cgroupParent, "/") - if strings.HasSuffix(cgroupParent, ".slice") && len(splitParent) > 1 { - realCgroupParent = splitParent[len(splitParent)-1] - } - - logrus.Infof("Running conmon under slice %s and unitName %s", realCgroupParent, unitName) - if err := utils.RunUnderSystemdScope(cmd.Process.Pid, realCgroupParent, unitName); err != nil { - logrus.StandardLogger().Logf(logLevel, "Failed to add conmon to systemd sandbox cgroup: %v", err) - } - } else { - control, err := cgroups.New(cgroupPath, &cgroupResources) - if err != nil { - logrus.StandardLogger().Logf(logLevel, "Failed to add conmon to cgroupfs sandbox cgroup: %v", err) - } else if err := control.AddPid(cmd.Process.Pid); err != nil { - // we need to remove this defer and delete the cgroup once conmon exits - // maybe need a conmon monitor? - logrus.StandardLogger().Logf(logLevel, "Failed to add conmon to cgroupfs sandbox cgroup: %v", err) - } - } - } - - /* We set the cgroup, now the child can start creating children */ - if err := writeConmonPipeData(startFd); err != nil { - return err - } - return nil -} - -// newPipe creates a unix socket pair for communication. -// Returns two files - first is parent, second is child. -func newPipe() (*os.File, *os.File, error) { - fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_SEQPACKET|unix.SOCK_CLOEXEC, 0) - if err != nil { - return nil, nil, err - } - return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil -} - -// readConmonPidFile attempts to read conmon's pid from its pid file -func readConmonPidFile(pidFile string) (int, error) { - // Let's try reading the Conmon pid at the same time. - if pidFile != "" { - contents, err := ioutil.ReadFile(pidFile) - if err != nil { - return -1, err - } - // Convert it to an int - conmonPID, err := strconv.Atoi(string(contents)) - if err != nil { - return -1, err - } - return conmonPID, nil - } - return 0, nil -} - -// readConmonPipeData attempts to read a syncInfo struct from the pipe -func readConmonPipeData(runtimeName string, pipe *os.File, ociLog string) (int, error) { - // syncInfo is used to return data from monitor process to daemon - type syncInfo struct { - Data int `json:"data"` - Message string `json:"message,omitempty"` - } - - // Wait to get container pid from conmon - type syncStruct struct { - si *syncInfo - err error - } - ch := make(chan syncStruct) - go func() { - var si *syncInfo - rdr := bufio.NewReader(pipe) - b, err := rdr.ReadBytes('\n') - // ignore EOF here, error is returned even when data was read - // if it is no valid json unmarshal will fail below - if err != nil && !errors.Is(err, io.EOF) { - ch <- syncStruct{err: err} - } - if err := json.Unmarshal(b, &si); err != nil { - ch <- syncStruct{err: fmt.Errorf("conmon bytes %q: %w", string(b), err)} - return - } - ch <- syncStruct{si: si} - }() - - data := -1 //nolint: wastedassign - select { - case ss := <-ch: - if ss.err != nil { - if ociLog != "" { - ociLogData, err := ioutil.ReadFile(ociLog) - if err == nil { - var ociErr ociError - if err := json.Unmarshal(ociLogData, &ociErr); err == nil { - return -1, getOCIRuntimeError(runtimeName, ociErr.Msg) - } - } - } - return -1, fmt.Errorf("container create failed (no logs from conmon): %w", ss.err) - } - logrus.Debugf("Received: %d", ss.si.Data) - if ss.si.Data < 0 { - if ociLog != "" { - ociLogData, err := ioutil.ReadFile(ociLog) - if err == nil { - var ociErr ociError - if err := json.Unmarshal(ociLogData, &ociErr); err == nil { - return ss.si.Data, getOCIRuntimeError(runtimeName, ociErr.Msg) - } - } - } - // If we failed to parse the JSON errors, then print the output as it is - if ss.si.Message != "" { - return ss.si.Data, getOCIRuntimeError(runtimeName, ss.si.Message) - } - return ss.si.Data, fmt.Errorf("container create failed: %w", define.ErrInternal) - } - data = ss.si.Data - case <-time.After(define.ContainerCreateTimeout): - return -1, fmt.Errorf("container creation timeout: %w", define.ErrInternal) - } - return data, nil -} - -// writeConmonPipeData writes nonce data to a pipe -func writeConmonPipeData(pipe *os.File) error { - someData := []byte{0} - _, err := pipe.Write(someData) - return err -} - -// formatRuntimeOpts prepends opts passed to it with --runtime-opt for passing to conmon -func formatRuntimeOpts(opts ...string) []string { - args := make([]string, 0, len(opts)*2) - for _, o := range opts { - args = append(args, "--runtime-opt", o) - } - return args -} - -// getConmonVersion returns a string representation of the conmon version. -func (r *ConmonOCIRuntime) getConmonVersion() (string, error) { - output, err := utils.ExecCmd(r.conmonPath, "--version") - if err != nil { - return "", err - } - return strings.TrimSuffix(strings.Replace(output, "\n", ", ", 1), "\n"), nil -} - -// getOCIRuntimeVersion returns a string representation of the OCI runtime's -// version. -func (r *ConmonOCIRuntime) getOCIRuntimeVersion() (string, error) { - output, err := utils.ExecCmd(r.path, "--version") - if err != nil { - return "", err - } - return strings.TrimSuffix(output, "\n"), nil -} - -// Copy data from container to HTTP connection, for terminal attach. -// Container is the container's attach socket connection, http is a buffer for -// the HTTP connection. cid is the ID of the container the attach session is -// running for (used solely for error messages). -func httpAttachTerminalCopy(container *net.UnixConn, http *bufio.ReadWriter, cid string) error { - buf := make([]byte, bufferSize) - for { - numR, err := container.Read(buf) - logrus.Debugf("Read fd(%d) %d/%d bytes for container %s", int(buf[0]), numR, len(buf), cid) - - if numR > 0 { - switch buf[0] { - case AttachPipeStdout: - // Do nothing - default: - logrus.Errorf("Received unexpected attach type %+d, discarding %d bytes", buf[0], numR) - continue - } - - numW, err2 := http.Write(buf[1:numR]) - if err2 != nil { - if err != nil { - logrus.Errorf("Reading container %s STDOUT: %v", cid, err) - } - return err2 - } else if numW+1 != numR { - return io.ErrShortWrite - } - // We need to force the buffer to write immediately, so - // there isn't a delay on the terminal side. - if err2 := http.Flush(); err2 != nil { - if err != nil { - logrus.Errorf("Reading container %s STDOUT: %v", cid, err) - } - return err2 - } - } - if err != nil { - if err == io.EOF { - return nil - } - return err - } - } -} - -// Copy data from a container to an HTTP connection, for non-terminal attach. -// Appends a header to multiplex input. -func httpAttachNonTerminalCopy(container *net.UnixConn, http *bufio.ReadWriter, cid string, stdin, stdout, stderr bool) error { - buf := make([]byte, bufferSize) - for { - numR, err := container.Read(buf) - if numR > 0 { - var headerBuf []byte - - // Subtract 1 because we strip the first byte (used for - // multiplexing by Conmon). - headerLen := uint32(numR - 1) - // Practically speaking, we could make this buf[0] - 1, - // but we need to validate it anyway. - switch buf[0] { - case AttachPipeStdin: - headerBuf = makeHTTPAttachHeader(0, headerLen) - if !stdin { - continue - } - case AttachPipeStdout: - if !stdout { - continue - } - headerBuf = makeHTTPAttachHeader(1, headerLen) - case AttachPipeStderr: - if !stderr { - continue - } - headerBuf = makeHTTPAttachHeader(2, headerLen) - default: - logrus.Errorf("Received unexpected attach type %+d, discarding %d bytes", buf[0], numR) - continue - } - - numH, err2 := http.Write(headerBuf) - if err2 != nil { - if err != nil { - logrus.Errorf("Reading container %s standard streams: %v", cid, err) - } - - return err2 - } - // Hardcoding header length is pretty gross, but - // fast. Should be safe, as this is a fixed part - // of the protocol. - if numH != 8 { - if err != nil { - logrus.Errorf("Reading container %s standard streams: %v", cid, err) - } - - return io.ErrShortWrite - } - - numW, err2 := http.Write(buf[1:numR]) - if err2 != nil { - if err != nil { - logrus.Errorf("Reading container %s standard streams: %v", cid, err) - } - - return err2 - } else if numW+1 != numR { - if err != nil { - logrus.Errorf("Reading container %s standard streams: %v", cid, err) - } - - return io.ErrShortWrite - } - // We need to force the buffer to write immediately, so - // there isn't a delay on the terminal side. - if err2 := http.Flush(); err2 != nil { - if err != nil { - logrus.Errorf("Reading container %s STDOUT: %v", cid, err) - } - return err2 - } - } - if err != nil { - if err == io.EOF { - return nil - } - - return err - } - } -} - -// GetLimits converts spec resource limits to cgroup consumable limits -func GetLimits(resource *spec.LinuxResources) (runcconfig.Resources, error) { - if resource == nil { - resource = &spec.LinuxResources{} - } - final := &runcconfig.Resources{} - devs := []*devices.Rule{} - - // Devices - for _, entry := range resource.Devices { - if entry.Major == nil || entry.Minor == nil { - continue - } - runeType := 'a' - switch entry.Type { - case "b": - runeType = 'b' - case "c": - runeType = 'c' - } - - devs = append(devs, &devices.Rule{ - Type: devices.Type(runeType), - Major: *entry.Major, - Minor: *entry.Minor, - Permissions: devices.Permissions(entry.Access), - Allow: entry.Allow, - }) - } - final.Devices = devs - - // HugepageLimits - pageLimits := []*runcconfig.HugepageLimit{} - for _, entry := range resource.HugepageLimits { - pageLimits = append(pageLimits, &runcconfig.HugepageLimit{ - Pagesize: entry.Pagesize, - Limit: entry.Limit, - }) - } - final.HugetlbLimit = pageLimits - - // Networking - netPriorities := []*runcconfig.IfPrioMap{} - if resource.Network != nil { - for _, entry := range resource.Network.Priorities { - netPriorities = append(netPriorities, &runcconfig.IfPrioMap{ - Interface: entry.Name, - Priority: int64(entry.Priority), - }) - } - } - final.NetPrioIfpriomap = netPriorities - rdma := make(map[string]runcconfig.LinuxRdma) - for name, entry := range resource.Rdma { - rdma[name] = runcconfig.LinuxRdma{HcaHandles: entry.HcaHandles, HcaObjects: entry.HcaObjects} - } - final.Rdma = rdma - - // Memory - if resource.Memory != nil { - if resource.Memory.Limit != nil { - final.Memory = *resource.Memory.Limit - } - if resource.Memory.Reservation != nil { - final.MemoryReservation = *resource.Memory.Reservation - } - if resource.Memory.Swap != nil { - final.MemorySwap = *resource.Memory.Swap - } - if resource.Memory.Swappiness != nil { - final.MemorySwappiness = resource.Memory.Swappiness - } - } - - // CPU - if resource.CPU != nil { - if resource.CPU.Period != nil { - final.CpuPeriod = *resource.CPU.Period - } - if resource.CPU.Quota != nil { - final.CpuQuota = *resource.CPU.Quota - } - if resource.CPU.RealtimePeriod != nil { - final.CpuRtPeriod = *resource.CPU.RealtimePeriod - } - if resource.CPU.RealtimeRuntime != nil { - final.CpuRtRuntime = *resource.CPU.RealtimeRuntime - } - if resource.CPU.Shares != nil { - final.CpuShares = *resource.CPU.Shares - } - final.CpusetCpus = resource.CPU.Cpus - final.CpusetMems = resource.CPU.Mems - } - - // BlkIO - if resource.BlockIO != nil { - if len(resource.BlockIO.ThrottleReadBpsDevice) > 0 { - for _, entry := range resource.BlockIO.ThrottleReadBpsDevice { - throttle := &runcconfig.ThrottleDevice{} - dev := &runcconfig.BlockIODevice{ - Major: entry.Major, - Minor: entry.Minor, - } - throttle.BlockIODevice = *dev - throttle.Rate = entry.Rate - final.BlkioThrottleReadBpsDevice = append(final.BlkioThrottleReadBpsDevice, throttle) - } - } - if len(resource.BlockIO.ThrottleWriteBpsDevice) > 0 { - for _, entry := range resource.BlockIO.ThrottleWriteBpsDevice { - throttle := &runcconfig.ThrottleDevice{} - dev := &runcconfig.BlockIODevice{ - Major: entry.Major, - Minor: entry.Minor, - } - throttle.BlockIODevice = *dev - throttle.Rate = entry.Rate - final.BlkioThrottleWriteBpsDevice = append(final.BlkioThrottleWriteBpsDevice, throttle) - } - } - if len(resource.BlockIO.ThrottleReadIOPSDevice) > 0 { - for _, entry := range resource.BlockIO.ThrottleReadIOPSDevice { - throttle := &runcconfig.ThrottleDevice{} - dev := &runcconfig.BlockIODevice{ - Major: entry.Major, - Minor: entry.Minor, - } - throttle.BlockIODevice = *dev - throttle.Rate = entry.Rate - final.BlkioThrottleReadIOPSDevice = append(final.BlkioThrottleReadIOPSDevice, throttle) - } - } - if len(resource.BlockIO.ThrottleWriteIOPSDevice) > 0 { - for _, entry := range resource.BlockIO.ThrottleWriteIOPSDevice { - throttle := &runcconfig.ThrottleDevice{} - dev := &runcconfig.BlockIODevice{ - Major: entry.Major, - Minor: entry.Minor, - } - throttle.BlockIODevice = *dev - throttle.Rate = entry.Rate - final.BlkioThrottleWriteIOPSDevice = append(final.BlkioThrottleWriteIOPSDevice, throttle) - } - } - if resource.BlockIO.LeafWeight != nil { - final.BlkioLeafWeight = *resource.BlockIO.LeafWeight - } - if resource.BlockIO.Weight != nil { - final.BlkioWeight = *resource.BlockIO.Weight - } - if len(resource.BlockIO.WeightDevice) > 0 { - for _, entry := range resource.BlockIO.WeightDevice { - weight := &runcconfig.WeightDevice{} - dev := &runcconfig.BlockIODevice{ - Major: entry.Major, - Minor: entry.Minor, - } - if entry.Weight != nil { - weight.Weight = *entry.Weight - } - if entry.LeafWeight != nil { - weight.LeafWeight = *entry.LeafWeight - } - weight.BlockIODevice = *dev - final.BlkioWeightDevice = append(final.BlkioWeightDevice, weight) - } - } - } - - // Pids - if resource.Pids != nil { - final.PidsLimit = resource.Pids.Limit - } - - // Networking - if resource.Network != nil { - if resource.Network.ClassID != nil { - final.NetClsClassid = *resource.Network.ClassID - } - } - - // Unified state - final.Unified = resource.Unified - - return *final, nil -} -- cgit v1.2.3-54-g00ecf From 8d229c6cdc9ab7325bf1f246e1bab6af79e75afe Mon Sep 17 00:00:00 2001 From: Doug Rabson Date: Wed, 17 Aug 2022 09:36:46 +0100 Subject: libpod: Move oci_conmon_attach_linux.go to oci_conmon_attach_common.go [NO NEW TESTS NEEDED] Signed-off-by: Doug Rabson --- libpod/oci_conmon_attach_common.go | 314 +++++++++++++++++++++++++++++++++++++ libpod/oci_conmon_attach_linux.go | 314 ------------------------------------- 2 files changed, 314 insertions(+), 314 deletions(-) create mode 100644 libpod/oci_conmon_attach_common.go delete mode 100644 libpod/oci_conmon_attach_linux.go (limited to 'libpod') diff --git a/libpod/oci_conmon_attach_common.go b/libpod/oci_conmon_attach_common.go new file mode 100644 index 000000000..aa55aa6f5 --- /dev/null +++ b/libpod/oci_conmon_attach_common.go @@ -0,0 +1,314 @@ +//go:build linux +// +build linux + +package libpod + +import ( + "errors" + "fmt" + "io" + "net" + "os" + "path/filepath" + "syscall" + + "github.com/containers/common/pkg/config" + "github.com/containers/common/pkg/resize" + "github.com/containers/common/pkg/util" + "github.com/containers/podman/v4/libpod/define" + "github.com/containers/podman/v4/pkg/errorhandling" + "github.com/moby/term" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +/* Sync with stdpipe_t in conmon.c */ +const ( + AttachPipeStdin = 1 + AttachPipeStdout = 2 + AttachPipeStderr = 3 +) + +func openUnixSocket(path string) (*net.UnixConn, error) { + fd, err := unix.Open(path, unix.O_PATH, 0) + if err != nil { + return nil, err + } + defer unix.Close(fd) + return net.DialUnix("unixpacket", nil, &net.UnixAddr{Name: fmt.Sprintf("/proc/self/fd/%d", fd), Net: "unixpacket"}) +} + +// Attach to the given container. +// Does not check if state is appropriate. +// started is only required if startContainer is true. +func (r *ConmonOCIRuntime) Attach(c *Container, params *AttachOptions) error { + passthrough := c.LogDriver() == define.PassthroughLogging + + if params == nil || params.Streams == nil { + return fmt.Errorf("must provide parameters to Attach: %w", define.ErrInternal) + } + + if !params.Streams.AttachOutput && !params.Streams.AttachError && !params.Streams.AttachInput && !passthrough { + return fmt.Errorf("must provide at least one stream to attach to: %w", define.ErrInvalidArg) + } + if params.Start && params.Started == nil { + return fmt.Errorf("started chan not passed when startContainer set: %w", define.ErrInternal) + } + + keys := config.DefaultDetachKeys + if params.DetachKeys != nil { + keys = *params.DetachKeys + } + + detachKeys, err := processDetachKeys(keys) + if err != nil { + return err + } + + var conn *net.UnixConn + if !passthrough { + logrus.Debugf("Attaching to container %s", c.ID()) + + // If we have a resize, do it. + if params.InitialSize != nil { + if err := r.AttachResize(c, *params.InitialSize); err != nil { + return err + } + } + + attachSock, err := c.AttachSocketPath() + if err != nil { + return err + } + + conn, err = openUnixSocket(attachSock) + if err != nil { + return fmt.Errorf("failed to connect to container's attach socket: %v: %w", attachSock, err) + } + defer func() { + if err := conn.Close(); err != nil { + logrus.Errorf("unable to close socket: %q", err) + } + }() + } + + // If starting was requested, start the container and notify when that's + // done. + if params.Start { + if err := c.start(); err != nil { + return err + } + params.Started <- true + } + + if passthrough { + return nil + } + + receiveStdoutError, stdinDone := setupStdioChannels(params.Streams, conn, detachKeys) + if params.AttachReady != nil { + params.AttachReady <- true + } + return readStdio(conn, params.Streams, receiveStdoutError, stdinDone) +} + +// Attach to the given container's exec session +// attachFd and startFd must be open file descriptors +// attachFd must be the output side of the fd. attachFd is used for two things: +// conmon will first send a nonce value across the pipe indicating it has set up its side of the console socket +// this ensures attachToExec gets all of the output of the called process +// conmon will then send the exit code of the exec process, or an error in the exec session +// startFd must be the input side of the fd. +// newSize resizes the tty to this size before the process is started, must be nil if the exec session has no tty +// conmon will wait to start the exec session until the parent process has set up the console socket. +// Once attachToExec successfully attaches to the console socket, the child conmon process responsible for calling runtime exec +// will read from the output side of start fd, thus learning to start the child process. +// Thus, the order goes as follow: +// 1. conmon parent process sets up its console socket. sends on attachFd +// 2. attachToExec attaches to the console socket after reading on attachFd and resizes the tty +// 3. child waits on startFd for attachToExec to attach to said console socket +// 4. attachToExec sends on startFd, signalling it has attached to the socket and child is ready to go +// 5. child receives on startFd, runs the runtime exec command +// attachToExec is responsible for closing startFd and attachFd +func (c *Container) attachToExec(streams *define.AttachStreams, keys *string, sessionID string, startFd, attachFd *os.File, newSize *resize.TerminalSize) error { + if !streams.AttachOutput && !streams.AttachError && !streams.AttachInput { + return fmt.Errorf("must provide at least one stream to attach to: %w", define.ErrInvalidArg) + } + if startFd == nil || attachFd == nil { + return fmt.Errorf("start sync pipe and attach sync pipe must be defined for exec attach: %w", define.ErrInvalidArg) + } + + defer errorhandling.CloseQuiet(startFd) + defer errorhandling.CloseQuiet(attachFd) + + detachString := config.DefaultDetachKeys + if keys != nil { + detachString = *keys + } + detachKeys, err := processDetachKeys(detachString) + if err != nil { + return err + } + + logrus.Debugf("Attaching to container %s exec session %s", c.ID(), sessionID) + + // set up the socket path, such that it is the correct length and location for exec + sockPath, err := c.execAttachSocketPath(sessionID) + if err != nil { + return err + } + + // 2: read from attachFd that the parent process has set up the console socket + if _, err := readConmonPipeData(c.ociRuntime.Name(), attachFd, ""); err != nil { + return err + } + + // resize before we start the container process + if newSize != nil { + err = c.ociRuntime.ExecAttachResize(c, sessionID, *newSize) + if err != nil { + logrus.Warnf("Resize failed: %v", err) + } + } + + // 2: then attach + conn, err := openUnixSocket(sockPath) + if err != nil { + return fmt.Errorf("failed to connect to container's attach socket: %v: %w", sockPath, err) + } + defer func() { + if err := conn.Close(); err != nil { + logrus.Errorf("Unable to close socket: %q", err) + } + }() + + // start listening on stdio of the process + receiveStdoutError, stdinDone := setupStdioChannels(streams, conn, detachKeys) + + // 4: send start message to child + if err := writeConmonPipeData(startFd); err != nil { + return err + } + + return readStdio(conn, streams, receiveStdoutError, stdinDone) +} + +func processDetachKeys(keys string) ([]byte, error) { + // Check the validity of the provided keys first + if len(keys) == 0 { + return []byte{}, nil + } + detachKeys, err := term.ToBytes(keys) + if err != nil { + return nil, fmt.Errorf("invalid detach keys: %w", err) + } + return detachKeys, nil +} + +func registerResizeFunc(r <-chan resize.TerminalSize, bundlePath string) { + resize.HandleResizing(r, func(size resize.TerminalSize) { + controlPath := filepath.Join(bundlePath, "ctl") + controlFile, err := os.OpenFile(controlPath, unix.O_WRONLY, 0) + if err != nil { + logrus.Debugf("Could not open ctl file: %v", err) + return + } + defer controlFile.Close() + + logrus.Debugf("Received a resize event: %+v", size) + if _, err = fmt.Fprintf(controlFile, "%d %d %d\n", 1, size.Height, size.Width); err != nil { + logrus.Warnf("Failed to write to control file to resize terminal: %v", err) + } + }) +} + +func setupStdioChannels(streams *define.AttachStreams, conn *net.UnixConn, detachKeys []byte) (chan error, chan error) { + receiveStdoutError := make(chan error) + go func() { + receiveStdoutError <- redirectResponseToOutputStreams(streams.OutputStream, streams.ErrorStream, streams.AttachOutput, streams.AttachError, conn) + }() + + stdinDone := make(chan error) + go func() { + var err error + if streams.AttachInput { + _, err = util.CopyDetachable(conn, streams.InputStream, detachKeys) + } + stdinDone <- err + }() + + return receiveStdoutError, stdinDone +} + +func redirectResponseToOutputStreams(outputStream, errorStream io.Writer, writeOutput, writeError bool, conn io.Reader) error { + var err error + buf := make([]byte, 8192+1) /* Sync with conmon STDIO_BUF_SIZE */ + for { + nr, er := conn.Read(buf) + if nr > 0 { + var dst io.Writer + var doWrite bool + switch buf[0] { + case AttachPipeStdout: + dst = outputStream + doWrite = writeOutput + case AttachPipeStderr: + dst = errorStream + doWrite = writeError + default: + logrus.Infof("Received unexpected attach type %+d", buf[0]) + } + if dst == nil { + return errors.New("output destination cannot be nil") + } + + if doWrite { + nw, ew := dst.Write(buf[1:nr]) + if ew != nil { + err = ew + break + } + if nr != nw+1 { + err = io.ErrShortWrite + break + } + } + } + if errors.Is(er, io.EOF) || errors.Is(er, syscall.ECONNRESET) { + break + } + if er != nil { + err = er + break + } + } + return err +} + +func readStdio(conn *net.UnixConn, streams *define.AttachStreams, receiveStdoutError, stdinDone chan error) error { + var err error + select { + case err = <-receiveStdoutError: + if err := conn.CloseWrite(); err != nil { + logrus.Errorf("Failed to close stdin: %v", err) + } + return err + case err = <-stdinDone: + if err == define.ErrDetach { + if err := conn.CloseWrite(); err != nil { + logrus.Errorf("Failed to close stdin: %v", err) + } + return err + } + if err == nil { + // copy stdin is done, close it + if connErr := conn.CloseWrite(); connErr != nil { + logrus.Errorf("Unable to close conn: %v", connErr) + } + } + if streams.AttachOutput || streams.AttachError { + return <-receiveStdoutError + } + } + return nil +} diff --git a/libpod/oci_conmon_attach_linux.go b/libpod/oci_conmon_attach_linux.go deleted file mode 100644 index aa55aa6f5..000000000 --- a/libpod/oci_conmon_attach_linux.go +++ /dev/null @@ -1,314 +0,0 @@ -//go:build linux -// +build linux - -package libpod - -import ( - "errors" - "fmt" - "io" - "net" - "os" - "path/filepath" - "syscall" - - "github.com/containers/common/pkg/config" - "github.com/containers/common/pkg/resize" - "github.com/containers/common/pkg/util" - "github.com/containers/podman/v4/libpod/define" - "github.com/containers/podman/v4/pkg/errorhandling" - "github.com/moby/term" - "github.com/sirupsen/logrus" - "golang.org/x/sys/unix" -) - -/* Sync with stdpipe_t in conmon.c */ -const ( - AttachPipeStdin = 1 - AttachPipeStdout = 2 - AttachPipeStderr = 3 -) - -func openUnixSocket(path string) (*net.UnixConn, error) { - fd, err := unix.Open(path, unix.O_PATH, 0) - if err != nil { - return nil, err - } - defer unix.Close(fd) - return net.DialUnix("unixpacket", nil, &net.UnixAddr{Name: fmt.Sprintf("/proc/self/fd/%d", fd), Net: "unixpacket"}) -} - -// Attach to the given container. -// Does not check if state is appropriate. -// started is only required if startContainer is true. -func (r *ConmonOCIRuntime) Attach(c *Container, params *AttachOptions) error { - passthrough := c.LogDriver() == define.PassthroughLogging - - if params == nil || params.Streams == nil { - return fmt.Errorf("must provide parameters to Attach: %w", define.ErrInternal) - } - - if !params.Streams.AttachOutput && !params.Streams.AttachError && !params.Streams.AttachInput && !passthrough { - return fmt.Errorf("must provide at least one stream to attach to: %w", define.ErrInvalidArg) - } - if params.Start && params.Started == nil { - return fmt.Errorf("started chan not passed when startContainer set: %w", define.ErrInternal) - } - - keys := config.DefaultDetachKeys - if params.DetachKeys != nil { - keys = *params.DetachKeys - } - - detachKeys, err := processDetachKeys(keys) - if err != nil { - return err - } - - var conn *net.UnixConn - if !passthrough { - logrus.Debugf("Attaching to container %s", c.ID()) - - // If we have a resize, do it. - if params.InitialSize != nil { - if err := r.AttachResize(c, *params.InitialSize); err != nil { - return err - } - } - - attachSock, err := c.AttachSocketPath() - if err != nil { - return err - } - - conn, err = openUnixSocket(attachSock) - if err != nil { - return fmt.Errorf("failed to connect to container's attach socket: %v: %w", attachSock, err) - } - defer func() { - if err := conn.Close(); err != nil { - logrus.Errorf("unable to close socket: %q", err) - } - }() - } - - // If starting was requested, start the container and notify when that's - // done. - if params.Start { - if err := c.start(); err != nil { - return err - } - params.Started <- true - } - - if passthrough { - return nil - } - - receiveStdoutError, stdinDone := setupStdioChannels(params.Streams, conn, detachKeys) - if params.AttachReady != nil { - params.AttachReady <- true - } - return readStdio(conn, params.Streams, receiveStdoutError, stdinDone) -} - -// Attach to the given container's exec session -// attachFd and startFd must be open file descriptors -// attachFd must be the output side of the fd. attachFd is used for two things: -// conmon will first send a nonce value across the pipe indicating it has set up its side of the console socket -// this ensures attachToExec gets all of the output of the called process -// conmon will then send the exit code of the exec process, or an error in the exec session -// startFd must be the input side of the fd. -// newSize resizes the tty to this size before the process is started, must be nil if the exec session has no tty -// conmon will wait to start the exec session until the parent process has set up the console socket. -// Once attachToExec successfully attaches to the console socket, the child conmon process responsible for calling runtime exec -// will read from the output side of start fd, thus learning to start the child process. -// Thus, the order goes as follow: -// 1. conmon parent process sets up its console socket. sends on attachFd -// 2. attachToExec attaches to the console socket after reading on attachFd and resizes the tty -// 3. child waits on startFd for attachToExec to attach to said console socket -// 4. attachToExec sends on startFd, signalling it has attached to the socket and child is ready to go -// 5. child receives on startFd, runs the runtime exec command -// attachToExec is responsible for closing startFd and attachFd -func (c *Container) attachToExec(streams *define.AttachStreams, keys *string, sessionID string, startFd, attachFd *os.File, newSize *resize.TerminalSize) error { - if !streams.AttachOutput && !streams.AttachError && !streams.AttachInput { - return fmt.Errorf("must provide at least one stream to attach to: %w", define.ErrInvalidArg) - } - if startFd == nil || attachFd == nil { - return fmt.Errorf("start sync pipe and attach sync pipe must be defined for exec attach: %w", define.ErrInvalidArg) - } - - defer errorhandling.CloseQuiet(startFd) - defer errorhandling.CloseQuiet(attachFd) - - detachString := config.DefaultDetachKeys - if keys != nil { - detachString = *keys - } - detachKeys, err := processDetachKeys(detachString) - if err != nil { - return err - } - - logrus.Debugf("Attaching to container %s exec session %s", c.ID(), sessionID) - - // set up the socket path, such that it is the correct length and location for exec - sockPath, err := c.execAttachSocketPath(sessionID) - if err != nil { - return err - } - - // 2: read from attachFd that the parent process has set up the console socket - if _, err := readConmonPipeData(c.ociRuntime.Name(), attachFd, ""); err != nil { - return err - } - - // resize before we start the container process - if newSize != nil { - err = c.ociRuntime.ExecAttachResize(c, sessionID, *newSize) - if err != nil { - logrus.Warnf("Resize failed: %v", err) - } - } - - // 2: then attach - conn, err := openUnixSocket(sockPath) - if err != nil { - return fmt.Errorf("failed to connect to container's attach socket: %v: %w", sockPath, err) - } - defer func() { - if err := conn.Close(); err != nil { - logrus.Errorf("Unable to close socket: %q", err) - } - }() - - // start listening on stdio of the process - receiveStdoutError, stdinDone := setupStdioChannels(streams, conn, detachKeys) - - // 4: send start message to child - if err := writeConmonPipeData(startFd); err != nil { - return err - } - - return readStdio(conn, streams, receiveStdoutError, stdinDone) -} - -func processDetachKeys(keys string) ([]byte, error) { - // Check the validity of the provided keys first - if len(keys) == 0 { - return []byte{}, nil - } - detachKeys, err := term.ToBytes(keys) - if err != nil { - return nil, fmt.Errorf("invalid detach keys: %w", err) - } - return detachKeys, nil -} - -func registerResizeFunc(r <-chan resize.TerminalSize, bundlePath string) { - resize.HandleResizing(r, func(size resize.TerminalSize) { - controlPath := filepath.Join(bundlePath, "ctl") - controlFile, err := os.OpenFile(controlPath, unix.O_WRONLY, 0) - if err != nil { - logrus.Debugf("Could not open ctl file: %v", err) - return - } - defer controlFile.Close() - - logrus.Debugf("Received a resize event: %+v", size) - if _, err = fmt.Fprintf(controlFile, "%d %d %d\n", 1, size.Height, size.Width); err != nil { - logrus.Warnf("Failed to write to control file to resize terminal: %v", err) - } - }) -} - -func setupStdioChannels(streams *define.AttachStreams, conn *net.UnixConn, detachKeys []byte) (chan error, chan error) { - receiveStdoutError := make(chan error) - go func() { - receiveStdoutError <- redirectResponseToOutputStreams(streams.OutputStream, streams.ErrorStream, streams.AttachOutput, streams.AttachError, conn) - }() - - stdinDone := make(chan error) - go func() { - var err error - if streams.AttachInput { - _, err = util.CopyDetachable(conn, streams.InputStream, detachKeys) - } - stdinDone <- err - }() - - return receiveStdoutError, stdinDone -} - -func redirectResponseToOutputStreams(outputStream, errorStream io.Writer, writeOutput, writeError bool, conn io.Reader) error { - var err error - buf := make([]byte, 8192+1) /* Sync with conmon STDIO_BUF_SIZE */ - for { - nr, er := conn.Read(buf) - if nr > 0 { - var dst io.Writer - var doWrite bool - switch buf[0] { - case AttachPipeStdout: - dst = outputStream - doWrite = writeOutput - case AttachPipeStderr: - dst = errorStream - doWrite = writeError - default: - logrus.Infof("Received unexpected attach type %+d", buf[0]) - } - if dst == nil { - return errors.New("output destination cannot be nil") - } - - if doWrite { - nw, ew := dst.Write(buf[1:nr]) - if ew != nil { - err = ew - break - } - if nr != nw+1 { - err = io.ErrShortWrite - break - } - } - } - if errors.Is(er, io.EOF) || errors.Is(er, syscall.ECONNRESET) { - break - } - if er != nil { - err = er - break - } - } - return err -} - -func readStdio(conn *net.UnixConn, streams *define.AttachStreams, receiveStdoutError, stdinDone chan error) error { - var err error - select { - case err = <-receiveStdoutError: - if err := conn.CloseWrite(); err != nil { - logrus.Errorf("Failed to close stdin: %v", err) - } - return err - case err = <-stdinDone: - if err == define.ErrDetach { - if err := conn.CloseWrite(); err != nil { - logrus.Errorf("Failed to close stdin: %v", err) - } - return err - } - if err == nil { - // copy stdin is done, close it - if connErr := conn.CloseWrite(); connErr != nil { - logrus.Errorf("Unable to close conn: %v", connErr) - } - } - if streams.AttachOutput || streams.AttachError { - return <-receiveStdoutError - } - } - return nil -} -- cgit v1.2.3-54-g00ecf From 68b2450d3de0344b2a4cfacdcabed8d1c854cb68 Mon Sep 17 00:00:00 2001 From: Doug Rabson Date: Wed, 17 Aug 2022 09:37:07 +0100 Subject: libpod: Move oci_conmon_exec_linux.go to oci_conmon_exec_common.go [NO NEW TESTS NEEDED] Signed-off-by: Doug Rabson --- libpod/oci_conmon_exec_common.go | 797 +++++++++++++++++++++++++++++++++++++++ libpod/oci_conmon_exec_linux.go | 797 --------------------------------------- 2 files changed, 797 insertions(+), 797 deletions(-) create mode 100644 libpod/oci_conmon_exec_common.go delete mode 100644 libpod/oci_conmon_exec_linux.go (limited to 'libpod') diff --git a/libpod/oci_conmon_exec_common.go b/libpod/oci_conmon_exec_common.go new file mode 100644 index 000000000..16cd7ef9f --- /dev/null +++ b/libpod/oci_conmon_exec_common.go @@ -0,0 +1,797 @@ +package libpod + +import ( + "errors" + "fmt" + "io/ioutil" + "net/http" + "os" + "os/exec" + "path/filepath" + "strings" + "syscall" + "time" + + "github.com/containers/common/pkg/capabilities" + "github.com/containers/common/pkg/config" + "github.com/containers/common/pkg/resize" + cutil "github.com/containers/common/pkg/util" + "github.com/containers/podman/v4/libpod/define" + "github.com/containers/podman/v4/pkg/errorhandling" + "github.com/containers/podman/v4/pkg/lookup" + "github.com/containers/podman/v4/pkg/util" + spec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +// ExecContainer executes a command in a running container +func (r *ConmonOCIRuntime) ExecContainer(c *Container, sessionID string, options *ExecOptions, streams *define.AttachStreams, newSize *resize.TerminalSize) (int, chan error, error) { + if options == nil { + return -1, nil, fmt.Errorf("must provide an ExecOptions struct to ExecContainer: %w", define.ErrInvalidArg) + } + if len(options.Cmd) == 0 { + return -1, nil, fmt.Errorf("must provide a command to execute: %w", define.ErrInvalidArg) + } + + if sessionID == "" { + return -1, nil, fmt.Errorf("must provide a session ID for exec: %w", define.ErrEmptyID) + } + + // TODO: Should we default this to false? + // Or maybe make streams mandatory? + attachStdin := true + if streams != nil { + attachStdin = streams.AttachInput + } + + var ociLog string + if logrus.GetLevel() != logrus.DebugLevel && r.supportsJSON { + ociLog = c.execOCILog(sessionID) + } + + execCmd, pipes, err := r.startExec(c, sessionID, options, attachStdin, ociLog) + if err != nil { + return -1, nil, err + } + + // Only close sync pipe. Start and attach are consumed in the attach + // goroutine. + defer func() { + if pipes.syncPipe != nil && !pipes.syncClosed { + errorhandling.CloseQuiet(pipes.syncPipe) + pipes.syncClosed = true + } + }() + + // TODO Only create if !detach + // Attach to the container before starting it + attachChan := make(chan error) + go func() { + // attachToExec is responsible for closing pipes + attachChan <- c.attachToExec(streams, options.DetachKeys, sessionID, pipes.startPipe, pipes.attachPipe, newSize) + close(attachChan) + }() + + if err := execCmd.Wait(); err != nil { + return -1, nil, fmt.Errorf("cannot run conmon: %w", err) + } + + pid, err := readConmonPipeData(r.name, pipes.syncPipe, ociLog) + + return pid, attachChan, err +} + +// ExecContainerHTTP executes a new command in an existing container and +// forwards its standard streams over an attach +func (r *ConmonOCIRuntime) ExecContainerHTTP(ctr *Container, sessionID string, options *ExecOptions, req *http.Request, w http.ResponseWriter, + streams *HTTPAttachStreams, cancel <-chan bool, hijackDone chan<- bool, holdConnOpen <-chan bool, newSize *resize.TerminalSize) (int, chan error, error) { + if streams != nil { + if !streams.Stdin && !streams.Stdout && !streams.Stderr { + return -1, nil, fmt.Errorf("must provide at least one stream to attach to: %w", define.ErrInvalidArg) + } + } + + if options == nil { + return -1, nil, fmt.Errorf("must provide exec options to ExecContainerHTTP: %w", define.ErrInvalidArg) + } + + detachString := config.DefaultDetachKeys + if options.DetachKeys != nil { + detachString = *options.DetachKeys + } + detachKeys, err := processDetachKeys(detachString) + if err != nil { + return -1, nil, err + } + + // TODO: Should we default this to false? + // Or maybe make streams mandatory? + attachStdin := true + if streams != nil { + attachStdin = streams.Stdin + } + + var ociLog string + if logrus.GetLevel() != logrus.DebugLevel && r.supportsJSON { + ociLog = ctr.execOCILog(sessionID) + } + + execCmd, pipes, err := r.startExec(ctr, sessionID, options, attachStdin, ociLog) + if err != nil { + return -1, nil, err + } + + // Only close sync pipe. Start and attach are consumed in the attach + // goroutine. + defer func() { + if pipes.syncPipe != nil && !pipes.syncClosed { + errorhandling.CloseQuiet(pipes.syncPipe) + pipes.syncClosed = true + } + }() + + attachChan := make(chan error) + conmonPipeDataChan := make(chan conmonPipeData) + go func() { + // attachToExec is responsible for closing pipes + attachChan <- attachExecHTTP(ctr, sessionID, req, w, streams, pipes, detachKeys, options.Terminal, cancel, hijackDone, holdConnOpen, execCmd, conmonPipeDataChan, ociLog, newSize, r.name) + close(attachChan) + }() + + // NOTE: the channel is needed to communicate conmon's data. In case + // of an error, the error will be written on the hijacked http + // connection such that remote clients will receive the error. + pipeData := <-conmonPipeDataChan + + return pipeData.pid, attachChan, pipeData.err +} + +// conmonPipeData contains the data when reading from conmon's pipe. +type conmonPipeData struct { + pid int + err error +} + +// ExecContainerDetached executes a command in a running container, but does +// not attach to it. +func (r *ConmonOCIRuntime) ExecContainerDetached(ctr *Container, sessionID string, options *ExecOptions, stdin bool) (int, error) { + if options == nil { + return -1, fmt.Errorf("must provide exec options to ExecContainerHTTP: %w", define.ErrInvalidArg) + } + + var ociLog string + if logrus.GetLevel() != logrus.DebugLevel && r.supportsJSON { + ociLog = ctr.execOCILog(sessionID) + } + + execCmd, pipes, err := r.startExec(ctr, sessionID, options, stdin, ociLog) + if err != nil { + return -1, err + } + + defer func() { + pipes.cleanup() + }() + + // Wait for Conmon to tell us we're ready to attach. + // We aren't actually *going* to attach, but this means that we're good + // to proceed. + if _, err := readConmonPipeData(r.name, pipes.attachPipe, ""); err != nil { + return -1, err + } + + // Start the exec session + if err := writeConmonPipeData(pipes.startPipe); err != nil { + return -1, err + } + + // Wait for conmon to succeed, when return. + if err := execCmd.Wait(); err != nil { + return -1, fmt.Errorf("cannot run conmon: %w", err) + } + + pid, err := readConmonPipeData(r.name, pipes.syncPipe, ociLog) + + return pid, err +} + +// ExecAttachResize resizes the TTY of the given exec session. +func (r *ConmonOCIRuntime) ExecAttachResize(ctr *Container, sessionID string, newSize resize.TerminalSize) error { + controlFile, err := openControlFile(ctr, ctr.execBundlePath(sessionID)) + if err != nil { + return err + } + defer controlFile.Close() + + if _, err = fmt.Fprintf(controlFile, "%d %d %d\n", 1, newSize.Height, newSize.Width); err != nil { + return fmt.Errorf("failed to write to ctl file to resize terminal: %w", err) + } + + return nil +} + +// ExecStopContainer stops a given exec session in a running container. +func (r *ConmonOCIRuntime) ExecStopContainer(ctr *Container, sessionID string, timeout uint) error { + pid, err := ctr.getExecSessionPID(sessionID) + if err != nil { + return err + } + + logrus.Debugf("Going to stop container %s exec session %s", ctr.ID(), sessionID) + + // Is the session dead? + // Ping the PID with signal 0 to see if it still exists. + if err := unix.Kill(pid, 0); err != nil { + if err == unix.ESRCH { + return nil + } + return fmt.Errorf("error pinging container %s exec session %s PID %d with signal 0: %w", ctr.ID(), sessionID, pid, err) + } + + if timeout > 0 { + // Use SIGTERM by default, then SIGSTOP after timeout. + logrus.Debugf("Killing exec session %s (PID %d) of container %s with SIGTERM", sessionID, pid, ctr.ID()) + if err := unix.Kill(pid, unix.SIGTERM); err != nil { + if err == unix.ESRCH { + return nil + } + return fmt.Errorf("error killing container %s exec session %s PID %d with SIGTERM: %w", ctr.ID(), sessionID, pid, err) + } + + // Wait for the PID to stop + if err := waitPidStop(pid, time.Duration(timeout)*time.Second); err != nil { + logrus.Infof("Timed out waiting for container %s exec session %s to stop, resorting to SIGKILL: %v", ctr.ID(), sessionID, err) + } else { + // No error, container is dead + return nil + } + } + + // SIGTERM did not work. On to SIGKILL. + logrus.Debugf("Killing exec session %s (PID %d) of container %s with SIGKILL", sessionID, pid, ctr.ID()) + if err := unix.Kill(pid, unix.SIGTERM); err != nil { + if err == unix.ESRCH { + return nil + } + return fmt.Errorf("error killing container %s exec session %s PID %d with SIGKILL: %w", ctr.ID(), sessionID, pid, err) + } + + // Wait for the PID to stop + if err := waitPidStop(pid, killContainerTimeout); err != nil { + return fmt.Errorf("timed out waiting for container %s exec session %s PID %d to stop after SIGKILL: %w", ctr.ID(), sessionID, pid, err) + } + + return nil +} + +// ExecUpdateStatus checks if the given exec session is still running. +func (r *ConmonOCIRuntime) ExecUpdateStatus(ctr *Container, sessionID string) (bool, error) { + pid, err := ctr.getExecSessionPID(sessionID) + if err != nil { + return false, err + } + + logrus.Debugf("Checking status of container %s exec session %s", ctr.ID(), sessionID) + + // Is the session dead? + // Ping the PID with signal 0 to see if it still exists. + if err := unix.Kill(pid, 0); err != nil { + if err == unix.ESRCH { + return false, nil + } + return false, fmt.Errorf("error pinging container %s exec session %s PID %d with signal 0: %w", ctr.ID(), sessionID, pid, err) + } + + return true, nil +} + +// ExecAttachSocketPath is the path to a container's exec session attach socket. +func (r *ConmonOCIRuntime) ExecAttachSocketPath(ctr *Container, sessionID string) (string, error) { + // We don't even use container, so don't validity check it + if sessionID == "" { + return "", fmt.Errorf("must provide a valid session ID to get attach socket path: %w", define.ErrInvalidArg) + } + + return filepath.Join(ctr.execBundlePath(sessionID), "attach"), nil +} + +// This contains pipes used by the exec API. +type execPipes struct { + syncPipe *os.File + syncClosed bool + startPipe *os.File + startClosed bool + attachPipe *os.File + attachClosed bool +} + +func (p *execPipes) cleanup() { + if p.syncPipe != nil && !p.syncClosed { + errorhandling.CloseQuiet(p.syncPipe) + p.syncClosed = true + } + if p.startPipe != nil && !p.startClosed { + errorhandling.CloseQuiet(p.startPipe) + p.startClosed = true + } + if p.attachPipe != nil && !p.attachClosed { + errorhandling.CloseQuiet(p.attachPipe) + p.attachClosed = true + } +} + +// Start an exec session's conmon parent from the given options. +func (r *ConmonOCIRuntime) startExec(c *Container, sessionID string, options *ExecOptions, attachStdin bool, ociLog string) (_ *exec.Cmd, _ *execPipes, deferredErr error) { + pipes := new(execPipes) + + if options == nil { + return nil, nil, fmt.Errorf("must provide an ExecOptions struct to ExecContainer: %w", define.ErrInvalidArg) + } + if len(options.Cmd) == 0 { + return nil, nil, fmt.Errorf("must provide a command to execute: %w", define.ErrInvalidArg) + } + + if sessionID == "" { + return nil, nil, fmt.Errorf("must provide a session ID for exec: %w", define.ErrEmptyID) + } + + // create sync pipe to receive the pid + parentSyncPipe, childSyncPipe, err := newPipe() + if err != nil { + return nil, nil, fmt.Errorf("error creating socket pair: %w", err) + } + pipes.syncPipe = parentSyncPipe + + defer func() { + if deferredErr != nil { + pipes.cleanup() + } + }() + + // create start pipe to set the cgroup before running + // attachToExec is responsible for closing parentStartPipe + childStartPipe, parentStartPipe, err := newPipe() + if err != nil { + return nil, nil, fmt.Errorf("error creating socket pair: %w", err) + } + pipes.startPipe = parentStartPipe + + // create the attach pipe to allow attach socket to be created before + // $RUNTIME exec starts running. This is to make sure we can capture all output + // from the process through that socket, rather than half reading the log, half attaching to the socket + // attachToExec is responsible for closing parentAttachPipe + parentAttachPipe, childAttachPipe, err := newPipe() + if err != nil { + return nil, nil, fmt.Errorf("error creating socket pair: %w", err) + } + pipes.attachPipe = parentAttachPipe + + childrenClosed := false + defer func() { + if !childrenClosed { + errorhandling.CloseQuiet(childSyncPipe) + errorhandling.CloseQuiet(childAttachPipe) + errorhandling.CloseQuiet(childStartPipe) + } + }() + + runtimeDir, err := util.GetRuntimeDir() + if err != nil { + return nil, nil, err + } + + finalEnv := make([]string, 0, len(options.Env)) + for k, v := range options.Env { + finalEnv = append(finalEnv, fmt.Sprintf("%s=%s", k, v)) + } + + processFile, err := prepareProcessExec(c, options, finalEnv, sessionID) + if err != nil { + return nil, nil, err + } + defer processFile.Close() + + args := r.sharedConmonArgs(c, sessionID, c.execBundlePath(sessionID), c.execPidPath(sessionID), c.execLogPath(sessionID), c.execExitFileDir(sessionID), ociLog, define.NoLogging, c.config.LogTag) + + if options.PreserveFDs > 0 { + args = append(args, formatRuntimeOpts("--preserve-fds", fmt.Sprintf("%d", options.PreserveFDs))...) + } + + if options.Terminal { + args = append(args, "-t") + } + + if attachStdin { + args = append(args, "-i") + } + + // Append container ID and command + args = append(args, "-e") + // TODO make this optional when we can detach + args = append(args, "--exec-attach") + args = append(args, "--exec-process-spec", processFile.Name()) + + if len(options.ExitCommand) > 0 { + args = append(args, "--exit-command", options.ExitCommand[0]) + for _, arg := range options.ExitCommand[1:] { + args = append(args, []string{"--exit-command-arg", arg}...) + } + if options.ExitCommandDelay > 0 { + args = append(args, []string{"--exit-delay", fmt.Sprintf("%d", options.ExitCommandDelay)}...) + } + } + + logrus.WithFields(logrus.Fields{ + "args": args, + }).Debugf("running conmon: %s", r.conmonPath) + execCmd := exec.Command(r.conmonPath, args...) + + // TODO: This is commented because it doesn't make much sense in HTTP + // attach, and I'm not certain it does for non-HTTP attach as well. + // if streams != nil { + // // Don't add the InputStream to the execCmd. Instead, the data should be passed + // // through CopyDetachable + // if streams.AttachOutput { + // execCmd.Stdout = options.Streams.OutputStream + // } + // if streams.AttachError { + // execCmd.Stderr = options.Streams.ErrorStream + // } + // } + + conmonEnv := r.configureConmonEnv(runtimeDir) + + var filesToClose []*os.File + if options.PreserveFDs > 0 { + for fd := 3; fd < int(3+options.PreserveFDs); fd++ { + f := os.NewFile(uintptr(fd), fmt.Sprintf("fd-%d", fd)) + filesToClose = append(filesToClose, f) + execCmd.ExtraFiles = append(execCmd.ExtraFiles, f) + } + } + + // we don't want to step on users fds they asked to preserve + // Since 0-2 are used for stdio, start the fds we pass in at preserveFDs+3 + execCmd.Env = r.conmonEnv + execCmd.Env = append(execCmd.Env, fmt.Sprintf("_OCI_SYNCPIPE=%d", options.PreserveFDs+3), fmt.Sprintf("_OCI_STARTPIPE=%d", options.PreserveFDs+4), fmt.Sprintf("_OCI_ATTACHPIPE=%d", options.PreserveFDs+5)) + execCmd.Env = append(execCmd.Env, conmonEnv...) + + execCmd.ExtraFiles = append(execCmd.ExtraFiles, childSyncPipe, childStartPipe, childAttachPipe) + execCmd.Dir = c.execBundlePath(sessionID) + execCmd.SysProcAttr = &syscall.SysProcAttr{ + Setpgid: true, + } + + err = startCommand(execCmd, c) + + // We don't need children pipes on the parent side + errorhandling.CloseQuiet(childSyncPipe) + errorhandling.CloseQuiet(childAttachPipe) + errorhandling.CloseQuiet(childStartPipe) + childrenClosed = true + + if err != nil { + return nil, nil, fmt.Errorf("cannot start container %s: %w", c.ID(), err) + } + if err := r.moveConmonToCgroupAndSignal(c, execCmd, parentStartPipe); err != nil { + return nil, nil, err + } + + // These fds were passed down to the runtime. Close them + // and not interfere + for _, f := range filesToClose { + errorhandling.CloseQuiet(f) + } + + return execCmd, pipes, nil +} + +// Attach to a container over HTTP +func attachExecHTTP(c *Container, sessionID string, r *http.Request, w http.ResponseWriter, streams *HTTPAttachStreams, pipes *execPipes, detachKeys []byte, isTerminal bool, cancel <-chan bool, hijackDone chan<- bool, holdConnOpen <-chan bool, execCmd *exec.Cmd, conmonPipeDataChan chan<- conmonPipeData, ociLog string, newSize *resize.TerminalSize, runtimeName string) (deferredErr error) { + // NOTE: As you may notice, the attach code is quite complex. + // Many things happen concurrently and yet are interdependent. + // If you ever change this function, make sure to write to the + // conmonPipeDataChan in case of an error. + + if pipes == nil || pipes.startPipe == nil || pipes.attachPipe == nil { + err := fmt.Errorf("must provide a start and attach pipe to finish an exec attach: %w", define.ErrInvalidArg) + conmonPipeDataChan <- conmonPipeData{-1, err} + return err + } + + defer func() { + if !pipes.startClosed { + errorhandling.CloseQuiet(pipes.startPipe) + pipes.startClosed = true + } + if !pipes.attachClosed { + errorhandling.CloseQuiet(pipes.attachPipe) + pipes.attachClosed = true + } + }() + + logrus.Debugf("Attaching to container %s exec session %s", c.ID(), sessionID) + + // set up the socket path, such that it is the correct length and location for exec + sockPath, err := c.execAttachSocketPath(sessionID) + if err != nil { + conmonPipeDataChan <- conmonPipeData{-1, err} + return err + } + + // 2: read from attachFd that the parent process has set up the console socket + if _, err := readConmonPipeData(runtimeName, pipes.attachPipe, ""); err != nil { + conmonPipeDataChan <- conmonPipeData{-1, err} + return err + } + + // resize before we start the container process + if newSize != nil { + err = c.ociRuntime.ExecAttachResize(c, sessionID, *newSize) + if err != nil { + logrus.Warnf("Resize failed: %v", err) + } + } + + // 2: then attach + conn, err := openUnixSocket(sockPath) + if err != nil { + conmonPipeDataChan <- conmonPipeData{-1, err} + return fmt.Errorf("failed to connect to container's attach socket: %v: %w", sockPath, err) + } + defer func() { + if err := conn.Close(); err != nil { + logrus.Errorf("Unable to close socket: %q", err) + } + }() + + attachStdout := true + attachStderr := true + attachStdin := true + if streams != nil { + attachStdout = streams.Stdout + attachStderr = streams.Stderr + attachStdin = streams.Stdin + } + + // Perform hijack + hijacker, ok := w.(http.Hijacker) + if !ok { + conmonPipeDataChan <- conmonPipeData{-1, err} + return errors.New("unable to hijack connection") + } + + httpCon, httpBuf, err := hijacker.Hijack() + if err != nil { + conmonPipeDataChan <- conmonPipeData{-1, err} + return fmt.Errorf("error hijacking connection: %w", err) + } + + hijackDone <- true + + // Write a header to let the client know what happened + writeHijackHeader(r, httpBuf) + + // Force a flush after the header is written. + if err := httpBuf.Flush(); err != nil { + conmonPipeDataChan <- conmonPipeData{-1, err} + return fmt.Errorf("error flushing HTTP hijack header: %w", err) + } + + go func() { + // Wait for conmon to succeed, when return. + if err := execCmd.Wait(); err != nil { + conmonPipeDataChan <- conmonPipeData{-1, err} + } else { + pid, err := readConmonPipeData(runtimeName, pipes.syncPipe, ociLog) + if err != nil { + hijackWriteError(err, c.ID(), isTerminal, httpBuf) + conmonPipeDataChan <- conmonPipeData{pid, err} + } else { + conmonPipeDataChan <- conmonPipeData{pid, err} + } + } + // We need to hold the connection open until the complete exec + // function has finished. This channel will be closed in a defer + // in that function, so we can wait for it here. + // Can't be a defer, because this would block the function from + // returning. + <-holdConnOpen + hijackWriteErrorAndClose(deferredErr, c.ID(), isTerminal, httpCon, httpBuf) + }() + + stdoutChan := make(chan error) + stdinChan := make(chan error) + + // Next, STDIN. Avoid entirely if attachStdin unset. + if attachStdin { + go func() { + logrus.Debugf("Beginning STDIN copy") + _, err := cutil.CopyDetachable(conn, httpBuf, detachKeys) + logrus.Debugf("STDIN copy completed") + stdinChan <- err + }() + } + + // 4: send start message to child + if err := writeConmonPipeData(pipes.startPipe); err != nil { + return err + } + + // Handle STDOUT/STDERR *after* start message is sent + go func() { + var err error + if isTerminal { + // Hack: return immediately if attachStdout not set to + // emulate Docker. + // Basically, when terminal is set, STDERR goes nowhere. + // Everything does over STDOUT. + // Therefore, if not attaching STDOUT - we'll never copy + // anything from here. + logrus.Debugf("Performing terminal HTTP attach for container %s", c.ID()) + if attachStdout { + err = httpAttachTerminalCopy(conn, httpBuf, c.ID()) + } + } else { + logrus.Debugf("Performing non-terminal HTTP attach for container %s", c.ID()) + err = httpAttachNonTerminalCopy(conn, httpBuf, c.ID(), attachStdin, attachStdout, attachStderr) + } + stdoutChan <- err + logrus.Debugf("STDOUT/ERR copy completed") + }() + + for { + select { + case err := <-stdoutChan: + if err != nil { + return err + } + + return nil + case err := <-stdinChan: + if err != nil { + return err + } + // copy stdin is done, close it + if connErr := conn.CloseWrite(); connErr != nil { + logrus.Errorf("Unable to close conn: %v", connErr) + } + case <-cancel: + return nil + } + } +} + +// prepareProcessExec returns the path of the process.json used in runc exec -p +// caller is responsible to close the returned *os.File if needed. +func prepareProcessExec(c *Container, options *ExecOptions, env []string, sessionID string) (*os.File, error) { + f, err := ioutil.TempFile(c.execBundlePath(sessionID), "exec-process-") + if err != nil { + return nil, err + } + pspec := new(spec.Process) + if err := JSONDeepCopy(c.config.Spec.Process, pspec); err != nil { + return nil, err + } + pspec.SelinuxLabel = c.config.ProcessLabel + pspec.Args = options.Cmd + + // We need to default this to false else it will inherit terminal as true + // from the container. + pspec.Terminal = false + if options.Terminal { + pspec.Terminal = true + } + if len(env) > 0 { + pspec.Env = append(pspec.Env, env...) + } + + // Add secret envs if they exist + manager, err := c.runtime.SecretsManager() + if err != nil { + return nil, err + } + for name, secr := range c.config.EnvSecrets { + _, data, err := manager.LookupSecretData(secr.Name) + if err != nil { + return nil, err + } + pspec.Env = append(pspec.Env, fmt.Sprintf("%s=%s", name, string(data))) + } + + if options.Cwd != "" { + pspec.Cwd = options.Cwd + } + + var addGroups []string + var sgids []uint32 + + // if the user is empty, we should inherit the user that the container is currently running with + user := options.User + if user == "" { + logrus.Debugf("Set user to %s", c.config.User) + user = c.config.User + addGroups = c.config.Groups + } + + overrides := c.getUserOverrides() + execUser, err := lookup.GetUserGroupInfo(c.state.Mountpoint, user, overrides) + if err != nil { + return nil, err + } + + if len(addGroups) > 0 { + sgids, err = lookup.GetContainerGroups(addGroups, c.state.Mountpoint, overrides) + if err != nil { + return nil, fmt.Errorf("error looking up supplemental groups for container %s exec session %s: %w", c.ID(), sessionID, err) + } + } + + // If user was set, look it up in the container to get a UID to use on + // the host + if user != "" || len(sgids) > 0 { + if user != "" { + for _, sgid := range execUser.Sgids { + sgids = append(sgids, uint32(sgid)) + } + } + processUser := spec.User{ + UID: uint32(execUser.Uid), + GID: uint32(execUser.Gid), + AdditionalGids: sgids, + } + + pspec.User = processUser + } + + ctrSpec, err := c.specFromState() + if err != nil { + return nil, err + } + + allCaps, err := capabilities.BoundingSet() + if err != nil { + return nil, err + } + if options.Privileged { + pspec.Capabilities.Bounding = allCaps + } else { + pspec.Capabilities.Bounding = ctrSpec.Process.Capabilities.Bounding + } + + // Always unset the inheritable capabilities similarly to what the Linux kernel does + // They are used only when using capabilities with uid != 0. + pspec.Capabilities.Inheritable = []string{} + + if execUser.Uid == 0 { + pspec.Capabilities.Effective = pspec.Capabilities.Bounding + pspec.Capabilities.Permitted = pspec.Capabilities.Bounding + } else if user == c.config.User { + pspec.Capabilities.Effective = ctrSpec.Process.Capabilities.Effective + pspec.Capabilities.Inheritable = ctrSpec.Process.Capabilities.Effective + pspec.Capabilities.Permitted = ctrSpec.Process.Capabilities.Effective + pspec.Capabilities.Ambient = ctrSpec.Process.Capabilities.Effective + } + + hasHomeSet := false + for _, s := range pspec.Env { + if strings.HasPrefix(s, "HOME=") { + hasHomeSet = true + break + } + } + if !hasHomeSet { + pspec.Env = append(pspec.Env, fmt.Sprintf("HOME=%s", execUser.Home)) + } + + processJSON, err := json.Marshal(pspec) + if err != nil { + return nil, err + } + + if err := ioutil.WriteFile(f.Name(), processJSON, 0644); err != nil { + return nil, err + } + return f, nil +} diff --git a/libpod/oci_conmon_exec_linux.go b/libpod/oci_conmon_exec_linux.go deleted file mode 100644 index 16cd7ef9f..000000000 --- a/libpod/oci_conmon_exec_linux.go +++ /dev/null @@ -1,797 +0,0 @@ -package libpod - -import ( - "errors" - "fmt" - "io/ioutil" - "net/http" - "os" - "os/exec" - "path/filepath" - "strings" - "syscall" - "time" - - "github.com/containers/common/pkg/capabilities" - "github.com/containers/common/pkg/config" - "github.com/containers/common/pkg/resize" - cutil "github.com/containers/common/pkg/util" - "github.com/containers/podman/v4/libpod/define" - "github.com/containers/podman/v4/pkg/errorhandling" - "github.com/containers/podman/v4/pkg/lookup" - "github.com/containers/podman/v4/pkg/util" - spec "github.com/opencontainers/runtime-spec/specs-go" - "github.com/sirupsen/logrus" - "golang.org/x/sys/unix" -) - -// ExecContainer executes a command in a running container -func (r *ConmonOCIRuntime) ExecContainer(c *Container, sessionID string, options *ExecOptions, streams *define.AttachStreams, newSize *resize.TerminalSize) (int, chan error, error) { - if options == nil { - return -1, nil, fmt.Errorf("must provide an ExecOptions struct to ExecContainer: %w", define.ErrInvalidArg) - } - if len(options.Cmd) == 0 { - return -1, nil, fmt.Errorf("must provide a command to execute: %w", define.ErrInvalidArg) - } - - if sessionID == "" { - return -1, nil, fmt.Errorf("must provide a session ID for exec: %w", define.ErrEmptyID) - } - - // TODO: Should we default this to false? - // Or maybe make streams mandatory? - attachStdin := true - if streams != nil { - attachStdin = streams.AttachInput - } - - var ociLog string - if logrus.GetLevel() != logrus.DebugLevel && r.supportsJSON { - ociLog = c.execOCILog(sessionID) - } - - execCmd, pipes, err := r.startExec(c, sessionID, options, attachStdin, ociLog) - if err != nil { - return -1, nil, err - } - - // Only close sync pipe. Start and attach are consumed in the attach - // goroutine. - defer func() { - if pipes.syncPipe != nil && !pipes.syncClosed { - errorhandling.CloseQuiet(pipes.syncPipe) - pipes.syncClosed = true - } - }() - - // TODO Only create if !detach - // Attach to the container before starting it - attachChan := make(chan error) - go func() { - // attachToExec is responsible for closing pipes - attachChan <- c.attachToExec(streams, options.DetachKeys, sessionID, pipes.startPipe, pipes.attachPipe, newSize) - close(attachChan) - }() - - if err := execCmd.Wait(); err != nil { - return -1, nil, fmt.Errorf("cannot run conmon: %w", err) - } - - pid, err := readConmonPipeData(r.name, pipes.syncPipe, ociLog) - - return pid, attachChan, err -} - -// ExecContainerHTTP executes a new command in an existing container and -// forwards its standard streams over an attach -func (r *ConmonOCIRuntime) ExecContainerHTTP(ctr *Container, sessionID string, options *ExecOptions, req *http.Request, w http.ResponseWriter, - streams *HTTPAttachStreams, cancel <-chan bool, hijackDone chan<- bool, holdConnOpen <-chan bool, newSize *resize.TerminalSize) (int, chan error, error) { - if streams != nil { - if !streams.Stdin && !streams.Stdout && !streams.Stderr { - return -1, nil, fmt.Errorf("must provide at least one stream to attach to: %w", define.ErrInvalidArg) - } - } - - if options == nil { - return -1, nil, fmt.Errorf("must provide exec options to ExecContainerHTTP: %w", define.ErrInvalidArg) - } - - detachString := config.DefaultDetachKeys - if options.DetachKeys != nil { - detachString = *options.DetachKeys - } - detachKeys, err := processDetachKeys(detachString) - if err != nil { - return -1, nil, err - } - - // TODO: Should we default this to false? - // Or maybe make streams mandatory? - attachStdin := true - if streams != nil { - attachStdin = streams.Stdin - } - - var ociLog string - if logrus.GetLevel() != logrus.DebugLevel && r.supportsJSON { - ociLog = ctr.execOCILog(sessionID) - } - - execCmd, pipes, err := r.startExec(ctr, sessionID, options, attachStdin, ociLog) - if err != nil { - return -1, nil, err - } - - // Only close sync pipe. Start and attach are consumed in the attach - // goroutine. - defer func() { - if pipes.syncPipe != nil && !pipes.syncClosed { - errorhandling.CloseQuiet(pipes.syncPipe) - pipes.syncClosed = true - } - }() - - attachChan := make(chan error) - conmonPipeDataChan := make(chan conmonPipeData) - go func() { - // attachToExec is responsible for closing pipes - attachChan <- attachExecHTTP(ctr, sessionID, req, w, streams, pipes, detachKeys, options.Terminal, cancel, hijackDone, holdConnOpen, execCmd, conmonPipeDataChan, ociLog, newSize, r.name) - close(attachChan) - }() - - // NOTE: the channel is needed to communicate conmon's data. In case - // of an error, the error will be written on the hijacked http - // connection such that remote clients will receive the error. - pipeData := <-conmonPipeDataChan - - return pipeData.pid, attachChan, pipeData.err -} - -// conmonPipeData contains the data when reading from conmon's pipe. -type conmonPipeData struct { - pid int - err error -} - -// ExecContainerDetached executes a command in a running container, but does -// not attach to it. -func (r *ConmonOCIRuntime) ExecContainerDetached(ctr *Container, sessionID string, options *ExecOptions, stdin bool) (int, error) { - if options == nil { - return -1, fmt.Errorf("must provide exec options to ExecContainerHTTP: %w", define.ErrInvalidArg) - } - - var ociLog string - if logrus.GetLevel() != logrus.DebugLevel && r.supportsJSON { - ociLog = ctr.execOCILog(sessionID) - } - - execCmd, pipes, err := r.startExec(ctr, sessionID, options, stdin, ociLog) - if err != nil { - return -1, err - } - - defer func() { - pipes.cleanup() - }() - - // Wait for Conmon to tell us we're ready to attach. - // We aren't actually *going* to attach, but this means that we're good - // to proceed. - if _, err := readConmonPipeData(r.name, pipes.attachPipe, ""); err != nil { - return -1, err - } - - // Start the exec session - if err := writeConmonPipeData(pipes.startPipe); err != nil { - return -1, err - } - - // Wait for conmon to succeed, when return. - if err := execCmd.Wait(); err != nil { - return -1, fmt.Errorf("cannot run conmon: %w", err) - } - - pid, err := readConmonPipeData(r.name, pipes.syncPipe, ociLog) - - return pid, err -} - -// ExecAttachResize resizes the TTY of the given exec session. -func (r *ConmonOCIRuntime) ExecAttachResize(ctr *Container, sessionID string, newSize resize.TerminalSize) error { - controlFile, err := openControlFile(ctr, ctr.execBundlePath(sessionID)) - if err != nil { - return err - } - defer controlFile.Close() - - if _, err = fmt.Fprintf(controlFile, "%d %d %d\n", 1, newSize.Height, newSize.Width); err != nil { - return fmt.Errorf("failed to write to ctl file to resize terminal: %w", err) - } - - return nil -} - -// ExecStopContainer stops a given exec session in a running container. -func (r *ConmonOCIRuntime) ExecStopContainer(ctr *Container, sessionID string, timeout uint) error { - pid, err := ctr.getExecSessionPID(sessionID) - if err != nil { - return err - } - - logrus.Debugf("Going to stop container %s exec session %s", ctr.ID(), sessionID) - - // Is the session dead? - // Ping the PID with signal 0 to see if it still exists. - if err := unix.Kill(pid, 0); err != nil { - if err == unix.ESRCH { - return nil - } - return fmt.Errorf("error pinging container %s exec session %s PID %d with signal 0: %w", ctr.ID(), sessionID, pid, err) - } - - if timeout > 0 { - // Use SIGTERM by default, then SIGSTOP after timeout. - logrus.Debugf("Killing exec session %s (PID %d) of container %s with SIGTERM", sessionID, pid, ctr.ID()) - if err := unix.Kill(pid, unix.SIGTERM); err != nil { - if err == unix.ESRCH { - return nil - } - return fmt.Errorf("error killing container %s exec session %s PID %d with SIGTERM: %w", ctr.ID(), sessionID, pid, err) - } - - // Wait for the PID to stop - if err := waitPidStop(pid, time.Duration(timeout)*time.Second); err != nil { - logrus.Infof("Timed out waiting for container %s exec session %s to stop, resorting to SIGKILL: %v", ctr.ID(), sessionID, err) - } else { - // No error, container is dead - return nil - } - } - - // SIGTERM did not work. On to SIGKILL. - logrus.Debugf("Killing exec session %s (PID %d) of container %s with SIGKILL", sessionID, pid, ctr.ID()) - if err := unix.Kill(pid, unix.SIGTERM); err != nil { - if err == unix.ESRCH { - return nil - } - return fmt.Errorf("error killing container %s exec session %s PID %d with SIGKILL: %w", ctr.ID(), sessionID, pid, err) - } - - // Wait for the PID to stop - if err := waitPidStop(pid, killContainerTimeout); err != nil { - return fmt.Errorf("timed out waiting for container %s exec session %s PID %d to stop after SIGKILL: %w", ctr.ID(), sessionID, pid, err) - } - - return nil -} - -// ExecUpdateStatus checks if the given exec session is still running. -func (r *ConmonOCIRuntime) ExecUpdateStatus(ctr *Container, sessionID string) (bool, error) { - pid, err := ctr.getExecSessionPID(sessionID) - if err != nil { - return false, err - } - - logrus.Debugf("Checking status of container %s exec session %s", ctr.ID(), sessionID) - - // Is the session dead? - // Ping the PID with signal 0 to see if it still exists. - if err := unix.Kill(pid, 0); err != nil { - if err == unix.ESRCH { - return false, nil - } - return false, fmt.Errorf("error pinging container %s exec session %s PID %d with signal 0: %w", ctr.ID(), sessionID, pid, err) - } - - return true, nil -} - -// ExecAttachSocketPath is the path to a container's exec session attach socket. -func (r *ConmonOCIRuntime) ExecAttachSocketPath(ctr *Container, sessionID string) (string, error) { - // We don't even use container, so don't validity check it - if sessionID == "" { - return "", fmt.Errorf("must provide a valid session ID to get attach socket path: %w", define.ErrInvalidArg) - } - - return filepath.Join(ctr.execBundlePath(sessionID), "attach"), nil -} - -// This contains pipes used by the exec API. -type execPipes struct { - syncPipe *os.File - syncClosed bool - startPipe *os.File - startClosed bool - attachPipe *os.File - attachClosed bool -} - -func (p *execPipes) cleanup() { - if p.syncPipe != nil && !p.syncClosed { - errorhandling.CloseQuiet(p.syncPipe) - p.syncClosed = true - } - if p.startPipe != nil && !p.startClosed { - errorhandling.CloseQuiet(p.startPipe) - p.startClosed = true - } - if p.attachPipe != nil && !p.attachClosed { - errorhandling.CloseQuiet(p.attachPipe) - p.attachClosed = true - } -} - -// Start an exec session's conmon parent from the given options. -func (r *ConmonOCIRuntime) startExec(c *Container, sessionID string, options *ExecOptions, attachStdin bool, ociLog string) (_ *exec.Cmd, _ *execPipes, deferredErr error) { - pipes := new(execPipes) - - if options == nil { - return nil, nil, fmt.Errorf("must provide an ExecOptions struct to ExecContainer: %w", define.ErrInvalidArg) - } - if len(options.Cmd) == 0 { - return nil, nil, fmt.Errorf("must provide a command to execute: %w", define.ErrInvalidArg) - } - - if sessionID == "" { - return nil, nil, fmt.Errorf("must provide a session ID for exec: %w", define.ErrEmptyID) - } - - // create sync pipe to receive the pid - parentSyncPipe, childSyncPipe, err := newPipe() - if err != nil { - return nil, nil, fmt.Errorf("error creating socket pair: %w", err) - } - pipes.syncPipe = parentSyncPipe - - defer func() { - if deferredErr != nil { - pipes.cleanup() - } - }() - - // create start pipe to set the cgroup before running - // attachToExec is responsible for closing parentStartPipe - childStartPipe, parentStartPipe, err := newPipe() - if err != nil { - return nil, nil, fmt.Errorf("error creating socket pair: %w", err) - } - pipes.startPipe = parentStartPipe - - // create the attach pipe to allow attach socket to be created before - // $RUNTIME exec starts running. This is to make sure we can capture all output - // from the process through that socket, rather than half reading the log, half attaching to the socket - // attachToExec is responsible for closing parentAttachPipe - parentAttachPipe, childAttachPipe, err := newPipe() - if err != nil { - return nil, nil, fmt.Errorf("error creating socket pair: %w", err) - } - pipes.attachPipe = parentAttachPipe - - childrenClosed := false - defer func() { - if !childrenClosed { - errorhandling.CloseQuiet(childSyncPipe) - errorhandling.CloseQuiet(childAttachPipe) - errorhandling.CloseQuiet(childStartPipe) - } - }() - - runtimeDir, err := util.GetRuntimeDir() - if err != nil { - return nil, nil, err - } - - finalEnv := make([]string, 0, len(options.Env)) - for k, v := range options.Env { - finalEnv = append(finalEnv, fmt.Sprintf("%s=%s", k, v)) - } - - processFile, err := prepareProcessExec(c, options, finalEnv, sessionID) - if err != nil { - return nil, nil, err - } - defer processFile.Close() - - args := r.sharedConmonArgs(c, sessionID, c.execBundlePath(sessionID), c.execPidPath(sessionID), c.execLogPath(sessionID), c.execExitFileDir(sessionID), ociLog, define.NoLogging, c.config.LogTag) - - if options.PreserveFDs > 0 { - args = append(args, formatRuntimeOpts("--preserve-fds", fmt.Sprintf("%d", options.PreserveFDs))...) - } - - if options.Terminal { - args = append(args, "-t") - } - - if attachStdin { - args = append(args, "-i") - } - - // Append container ID and command - args = append(args, "-e") - // TODO make this optional when we can detach - args = append(args, "--exec-attach") - args = append(args, "--exec-process-spec", processFile.Name()) - - if len(options.ExitCommand) > 0 { - args = append(args, "--exit-command", options.ExitCommand[0]) - for _, arg := range options.ExitCommand[1:] { - args = append(args, []string{"--exit-command-arg", arg}...) - } - if options.ExitCommandDelay > 0 { - args = append(args, []string{"--exit-delay", fmt.Sprintf("%d", options.ExitCommandDelay)}...) - } - } - - logrus.WithFields(logrus.Fields{ - "args": args, - }).Debugf("running conmon: %s", r.conmonPath) - execCmd := exec.Command(r.conmonPath, args...) - - // TODO: This is commented because it doesn't make much sense in HTTP - // attach, and I'm not certain it does for non-HTTP attach as well. - // if streams != nil { - // // Don't add the InputStream to the execCmd. Instead, the data should be passed - // // through CopyDetachable - // if streams.AttachOutput { - // execCmd.Stdout = options.Streams.OutputStream - // } - // if streams.AttachError { - // execCmd.Stderr = options.Streams.ErrorStream - // } - // } - - conmonEnv := r.configureConmonEnv(runtimeDir) - - var filesToClose []*os.File - if options.PreserveFDs > 0 { - for fd := 3; fd < int(3+options.PreserveFDs); fd++ { - f := os.NewFile(uintptr(fd), fmt.Sprintf("fd-%d", fd)) - filesToClose = append(filesToClose, f) - execCmd.ExtraFiles = append(execCmd.ExtraFiles, f) - } - } - - // we don't want to step on users fds they asked to preserve - // Since 0-2 are used for stdio, start the fds we pass in at preserveFDs+3 - execCmd.Env = r.conmonEnv - execCmd.Env = append(execCmd.Env, fmt.Sprintf("_OCI_SYNCPIPE=%d", options.PreserveFDs+3), fmt.Sprintf("_OCI_STARTPIPE=%d", options.PreserveFDs+4), fmt.Sprintf("_OCI_ATTACHPIPE=%d", options.PreserveFDs+5)) - execCmd.Env = append(execCmd.Env, conmonEnv...) - - execCmd.ExtraFiles = append(execCmd.ExtraFiles, childSyncPipe, childStartPipe, childAttachPipe) - execCmd.Dir = c.execBundlePath(sessionID) - execCmd.SysProcAttr = &syscall.SysProcAttr{ - Setpgid: true, - } - - err = startCommand(execCmd, c) - - // We don't need children pipes on the parent side - errorhandling.CloseQuiet(childSyncPipe) - errorhandling.CloseQuiet(childAttachPipe) - errorhandling.CloseQuiet(childStartPipe) - childrenClosed = true - - if err != nil { - return nil, nil, fmt.Errorf("cannot start container %s: %w", c.ID(), err) - } - if err := r.moveConmonToCgroupAndSignal(c, execCmd, parentStartPipe); err != nil { - return nil, nil, err - } - - // These fds were passed down to the runtime. Close them - // and not interfere - for _, f := range filesToClose { - errorhandling.CloseQuiet(f) - } - - return execCmd, pipes, nil -} - -// Attach to a container over HTTP -func attachExecHTTP(c *Container, sessionID string, r *http.Request, w http.ResponseWriter, streams *HTTPAttachStreams, pipes *execPipes, detachKeys []byte, isTerminal bool, cancel <-chan bool, hijackDone chan<- bool, holdConnOpen <-chan bool, execCmd *exec.Cmd, conmonPipeDataChan chan<- conmonPipeData, ociLog string, newSize *resize.TerminalSize, runtimeName string) (deferredErr error) { - // NOTE: As you may notice, the attach code is quite complex. - // Many things happen concurrently and yet are interdependent. - // If you ever change this function, make sure to write to the - // conmonPipeDataChan in case of an error. - - if pipes == nil || pipes.startPipe == nil || pipes.attachPipe == nil { - err := fmt.Errorf("must provide a start and attach pipe to finish an exec attach: %w", define.ErrInvalidArg) - conmonPipeDataChan <- conmonPipeData{-1, err} - return err - } - - defer func() { - if !pipes.startClosed { - errorhandling.CloseQuiet(pipes.startPipe) - pipes.startClosed = true - } - if !pipes.attachClosed { - errorhandling.CloseQuiet(pipes.attachPipe) - pipes.attachClosed = true - } - }() - - logrus.Debugf("Attaching to container %s exec session %s", c.ID(), sessionID) - - // set up the socket path, such that it is the correct length and location for exec - sockPath, err := c.execAttachSocketPath(sessionID) - if err != nil { - conmonPipeDataChan <- conmonPipeData{-1, err} - return err - } - - // 2: read from attachFd that the parent process has set up the console socket - if _, err := readConmonPipeData(runtimeName, pipes.attachPipe, ""); err != nil { - conmonPipeDataChan <- conmonPipeData{-1, err} - return err - } - - // resize before we start the container process - if newSize != nil { - err = c.ociRuntime.ExecAttachResize(c, sessionID, *newSize) - if err != nil { - logrus.Warnf("Resize failed: %v", err) - } - } - - // 2: then attach - conn, err := openUnixSocket(sockPath) - if err != nil { - conmonPipeDataChan <- conmonPipeData{-1, err} - return fmt.Errorf("failed to connect to container's attach socket: %v: %w", sockPath, err) - } - defer func() { - if err := conn.Close(); err != nil { - logrus.Errorf("Unable to close socket: %q", err) - } - }() - - attachStdout := true - attachStderr := true - attachStdin := true - if streams != nil { - attachStdout = streams.Stdout - attachStderr = streams.Stderr - attachStdin = streams.Stdin - } - - // Perform hijack - hijacker, ok := w.(http.Hijacker) - if !ok { - conmonPipeDataChan <- conmonPipeData{-1, err} - return errors.New("unable to hijack connection") - } - - httpCon, httpBuf, err := hijacker.Hijack() - if err != nil { - conmonPipeDataChan <- conmonPipeData{-1, err} - return fmt.Errorf("error hijacking connection: %w", err) - } - - hijackDone <- true - - // Write a header to let the client know what happened - writeHijackHeader(r, httpBuf) - - // Force a flush after the header is written. - if err := httpBuf.Flush(); err != nil { - conmonPipeDataChan <- conmonPipeData{-1, err} - return fmt.Errorf("error flushing HTTP hijack header: %w", err) - } - - go func() { - // Wait for conmon to succeed, when return. - if err := execCmd.Wait(); err != nil { - conmonPipeDataChan <- conmonPipeData{-1, err} - } else { - pid, err := readConmonPipeData(runtimeName, pipes.syncPipe, ociLog) - if err != nil { - hijackWriteError(err, c.ID(), isTerminal, httpBuf) - conmonPipeDataChan <- conmonPipeData{pid, err} - } else { - conmonPipeDataChan <- conmonPipeData{pid, err} - } - } - // We need to hold the connection open until the complete exec - // function has finished. This channel will be closed in a defer - // in that function, so we can wait for it here. - // Can't be a defer, because this would block the function from - // returning. - <-holdConnOpen - hijackWriteErrorAndClose(deferredErr, c.ID(), isTerminal, httpCon, httpBuf) - }() - - stdoutChan := make(chan error) - stdinChan := make(chan error) - - // Next, STDIN. Avoid entirely if attachStdin unset. - if attachStdin { - go func() { - logrus.Debugf("Beginning STDIN copy") - _, err := cutil.CopyDetachable(conn, httpBuf, detachKeys) - logrus.Debugf("STDIN copy completed") - stdinChan <- err - }() - } - - // 4: send start message to child - if err := writeConmonPipeData(pipes.startPipe); err != nil { - return err - } - - // Handle STDOUT/STDERR *after* start message is sent - go func() { - var err error - if isTerminal { - // Hack: return immediately if attachStdout not set to - // emulate Docker. - // Basically, when terminal is set, STDERR goes nowhere. - // Everything does over STDOUT. - // Therefore, if not attaching STDOUT - we'll never copy - // anything from here. - logrus.Debugf("Performing terminal HTTP attach for container %s", c.ID()) - if attachStdout { - err = httpAttachTerminalCopy(conn, httpBuf, c.ID()) - } - } else { - logrus.Debugf("Performing non-terminal HTTP attach for container %s", c.ID()) - err = httpAttachNonTerminalCopy(conn, httpBuf, c.ID(), attachStdin, attachStdout, attachStderr) - } - stdoutChan <- err - logrus.Debugf("STDOUT/ERR copy completed") - }() - - for { - select { - case err := <-stdoutChan: - if err != nil { - return err - } - - return nil - case err := <-stdinChan: - if err != nil { - return err - } - // copy stdin is done, close it - if connErr := conn.CloseWrite(); connErr != nil { - logrus.Errorf("Unable to close conn: %v", connErr) - } - case <-cancel: - return nil - } - } -} - -// prepareProcessExec returns the path of the process.json used in runc exec -p -// caller is responsible to close the returned *os.File if needed. -func prepareProcessExec(c *Container, options *ExecOptions, env []string, sessionID string) (*os.File, error) { - f, err := ioutil.TempFile(c.execBundlePath(sessionID), "exec-process-") - if err != nil { - return nil, err - } - pspec := new(spec.Process) - if err := JSONDeepCopy(c.config.Spec.Process, pspec); err != nil { - return nil, err - } - pspec.SelinuxLabel = c.config.ProcessLabel - pspec.Args = options.Cmd - - // We need to default this to false else it will inherit terminal as true - // from the container. - pspec.Terminal = false - if options.Terminal { - pspec.Terminal = true - } - if len(env) > 0 { - pspec.Env = append(pspec.Env, env...) - } - - // Add secret envs if they exist - manager, err := c.runtime.SecretsManager() - if err != nil { - return nil, err - } - for name, secr := range c.config.EnvSecrets { - _, data, err := manager.LookupSecretData(secr.Name) - if err != nil { - return nil, err - } - pspec.Env = append(pspec.Env, fmt.Sprintf("%s=%s", name, string(data))) - } - - if options.Cwd != "" { - pspec.Cwd = options.Cwd - } - - var addGroups []string - var sgids []uint32 - - // if the user is empty, we should inherit the user that the container is currently running with - user := options.User - if user == "" { - logrus.Debugf("Set user to %s", c.config.User) - user = c.config.User - addGroups = c.config.Groups - } - - overrides := c.getUserOverrides() - execUser, err := lookup.GetUserGroupInfo(c.state.Mountpoint, user, overrides) - if err != nil { - return nil, err - } - - if len(addGroups) > 0 { - sgids, err = lookup.GetContainerGroups(addGroups, c.state.Mountpoint, overrides) - if err != nil { - return nil, fmt.Errorf("error looking up supplemental groups for container %s exec session %s: %w", c.ID(), sessionID, err) - } - } - - // If user was set, look it up in the container to get a UID to use on - // the host - if user != "" || len(sgids) > 0 { - if user != "" { - for _, sgid := range execUser.Sgids { - sgids = append(sgids, uint32(sgid)) - } - } - processUser := spec.User{ - UID: uint32(execUser.Uid), - GID: uint32(execUser.Gid), - AdditionalGids: sgids, - } - - pspec.User = processUser - } - - ctrSpec, err := c.specFromState() - if err != nil { - return nil, err - } - - allCaps, err := capabilities.BoundingSet() - if err != nil { - return nil, err - } - if options.Privileged { - pspec.Capabilities.Bounding = allCaps - } else { - pspec.Capabilities.Bounding = ctrSpec.Process.Capabilities.Bounding - } - - // Always unset the inheritable capabilities similarly to what the Linux kernel does - // They are used only when using capabilities with uid != 0. - pspec.Capabilities.Inheritable = []string{} - - if execUser.Uid == 0 { - pspec.Capabilities.Effective = pspec.Capabilities.Bounding - pspec.Capabilities.Permitted = pspec.Capabilities.Bounding - } else if user == c.config.User { - pspec.Capabilities.Effective = ctrSpec.Process.Capabilities.Effective - pspec.Capabilities.Inheritable = ctrSpec.Process.Capabilities.Effective - pspec.Capabilities.Permitted = ctrSpec.Process.Capabilities.Effective - pspec.Capabilities.Ambient = ctrSpec.Process.Capabilities.Effective - } - - hasHomeSet := false - for _, s := range pspec.Env { - if strings.HasPrefix(s, "HOME=") { - hasHomeSet = true - break - } - } - if !hasHomeSet { - pspec.Env = append(pspec.Env, fmt.Sprintf("HOME=%s", execUser.Home)) - } - - processJSON, err := json.Marshal(pspec) - if err != nil { - return nil, err - } - - if err := ioutil.WriteFile(f.Name(), processJSON, 0644); err != nil { - return nil, err - } - return f, nil -} -- cgit v1.2.3-54-g00ecf From 6791cdbdf153a0b3103810679995cc09ea8db340 Mon Sep 17 00:00:00 2001 From: Doug Rabson Date: Wed, 17 Aug 2022 10:29:40 +0100 Subject: libpod: Move rootless handling from oci_conmon_common.go to oci_conmon_linux.go [NO NEW TESTS NEEDED] Signed-off-by: Doug Rabson --- libpod/oci_conmon_common.go | 56 +----------------------------------- libpod/oci_conmon_linux.go | 70 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 55 deletions(-) create mode 100644 libpod/oci_conmon_linux.go (limited to 'libpod') diff --git a/libpod/oci_conmon_common.go b/libpod/oci_conmon_common.go index 1b654ed33..4ca2d6e34 100644 --- a/libpod/oci_conmon_common.go +++ b/libpod/oci_conmon_common.go @@ -41,7 +41,6 @@ import ( "github.com/containers/podman/v4/pkg/util" "github.com/containers/podman/v4/utils" "github.com/containers/storage/pkg/homedir" - pmount "github.com/containers/storage/pkg/mount" spec "github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/selinux/go-selinux/label" "github.com/sirupsen/logrus" @@ -204,60 +203,7 @@ func (r *ConmonOCIRuntime) CreateContainer(ctr *Container, restoreOptions *Conta // if we are running a non privileged container, be sure to umount some kernel paths so they are not // bind mounted inside the container at all. if !ctr.config.Privileged && !rootless.IsRootless() { - type result struct { - restoreDuration int64 - err error - } - ch := make(chan result) - go func() { - runtime.LockOSThread() - restoreDuration, err := func() (int64, error) { - fd, err := os.Open(fmt.Sprintf("/proc/%d/task/%d/ns/mnt", os.Getpid(), unix.Gettid())) - if err != nil { - return 0, err - } - defer errorhandling.CloseQuiet(fd) - - // create a new mountns on the current thread - if err = unix.Unshare(unix.CLONE_NEWNS); err != nil { - return 0, err - } - defer func() { - if err := unix.Setns(int(fd.Fd()), unix.CLONE_NEWNS); err != nil { - logrus.Errorf("Unable to clone new namespace: %q", err) - } - }() - - // don't spread our mounts around. We are setting only /sys to be slave - // so that the cleanup process is still able to umount the storage and the - // changes are propagated to the host. - err = unix.Mount("/sys", "/sys", "none", unix.MS_REC|unix.MS_SLAVE, "") - if err != nil { - return 0, fmt.Errorf("cannot make /sys slave: %w", err) - } - - mounts, err := pmount.GetMounts() - if err != nil { - return 0, err - } - for _, m := range mounts { - if !strings.HasPrefix(m.Mountpoint, "/sys/kernel") { - continue - } - err = unix.Unmount(m.Mountpoint, 0) - if err != nil && !os.IsNotExist(err) { - return 0, fmt.Errorf("cannot unmount %s: %w", m.Mountpoint, err) - } - } - return r.createOCIContainer(ctr, restoreOptions) - }() - ch <- result{ - restoreDuration: restoreDuration, - err: err, - } - }() - r := <-ch - return r.restoreDuration, r.err + return r.createRootlessContainer(ctr, restoreOptions) } } return r.createOCIContainer(ctr, restoreOptions) diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go new file mode 100644 index 000000000..4e8bbafd6 --- /dev/null +++ b/libpod/oci_conmon_linux.go @@ -0,0 +1,70 @@ +package libpod + +import ( + "fmt" + "os" + "runtime" + "strings" + + "github.com/containers/podman/v4/pkg/errorhandling" + pmount "github.com/containers/storage/pkg/mount" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +func (r *ConmonOCIRuntime) createRootlessContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) { + type result struct { + restoreDuration int64 + err error + } + ch := make(chan result) + go func() { + runtime.LockOSThread() + restoreDuration, err := func() (int64, error) { + fd, err := os.Open(fmt.Sprintf("/proc/%d/task/%d/ns/mnt", os.Getpid(), unix.Gettid())) + if err != nil { + return 0, err + } + defer errorhandling.CloseQuiet(fd) + + // create a new mountns on the current thread + if err = unix.Unshare(unix.CLONE_NEWNS); err != nil { + return 0, err + } + defer func() { + if err := unix.Setns(int(fd.Fd()), unix.CLONE_NEWNS); err != nil { + logrus.Errorf("Unable to clone new namespace: %q", err) + } + }() + + // don't spread our mounts around. We are setting only /sys to be slave + // so that the cleanup process is still able to umount the storage and the + // changes are propagated to the host. + err = unix.Mount("/sys", "/sys", "none", unix.MS_REC|unix.MS_SLAVE, "") + if err != nil { + return 0, fmt.Errorf("cannot make /sys slave: %w", err) + } + + mounts, err := pmount.GetMounts() + if err != nil { + return 0, err + } + for _, m := range mounts { + if !strings.HasPrefix(m.Mountpoint, "/sys/kernel") { + continue + } + err = unix.Unmount(m.Mountpoint, 0) + if err != nil && !os.IsNotExist(err) { + return 0, fmt.Errorf("cannot unmount %s: %w", m.Mountpoint, err) + } + } + return r.createOCIContainer(ctr, restoreOptions) + }() + ch <- result{ + restoreDuration: restoreDuration, + err: err, + } + }() + res := <-ch + return res.restoreDuration, res.err +} -- cgit v1.2.3-54-g00ecf From 93bad904864aa71c45b6b72d217a752c05eb254b Mon Sep 17 00:00:00 2001 From: Doug Rabson Date: Wed, 17 Aug 2022 10:30:30 +0100 Subject: libpod: Move socket label handling from oci_conmon_common.go to oci_conmon_linux.go [NO NEW TESTS NEEDED] Signed-off-by: Doug Rabson --- libpod/oci_conmon_common.go | 24 +++++------------------- libpod/oci_conmon_linux.go | 21 +++++++++++++++++++++ 2 files changed, 26 insertions(+), 19 deletions(-) (limited to 'libpod') diff --git a/libpod/oci_conmon_common.go b/libpod/oci_conmon_common.go index 4ca2d6e34..aee0c36c8 100644 --- a/libpod/oci_conmon_common.go +++ b/libpod/oci_conmon_common.go @@ -16,7 +16,6 @@ import ( "os" "os/exec" "path/filepath" - "runtime" "strconv" "strings" "sync" @@ -42,7 +41,6 @@ import ( "github.com/containers/podman/v4/utils" "github.com/containers/storage/pkg/homedir" spec "github.com/opencontainers/runtime-spec/specs-go" - "github.com/opencontainers/selinux/go-selinux/label" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" ) @@ -763,23 +761,11 @@ func (r *ConmonOCIRuntime) CheckpointContainer(ctr *Container, options Container env = append(env, fmt.Sprintf("PATH=%s", path)) } - runtime.LockOSThread() - if err := label.SetSocketLabel(ctr.ProcessLabel()); err != nil { - return 0, err - } - - runtimeCheckpointStarted := time.Now() - err = utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, args...) - // Ignore error returned from SetSocketLabel("") call, - // can't recover. - if labelErr := label.SetSocketLabel(""); labelErr == nil { - // Unlock the thread only if the process label could be restored - // successfully. Otherwise leave the thread locked and the Go runtime - // will terminate it once it returns to the threads pool. - runtime.UnlockOSThread() - } else { - logrus.Errorf("Unable to reset socket label: %q", labelErr) - } + var runtimeCheckpointStarted time.Time + err = r.withContainerSocketLabel(ctr, func() error { + runtimeCheckpointStarted = time.Now() + return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, args...) + }) runtimeCheckpointDuration := func() int64 { if options.PrintStats { diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go index 4e8bbafd6..ce6eaf32a 100644 --- a/libpod/oci_conmon_linux.go +++ b/libpod/oci_conmon_linux.go @@ -8,6 +8,7 @@ import ( "github.com/containers/podman/v4/pkg/errorhandling" pmount "github.com/containers/storage/pkg/mount" + "github.com/opencontainers/selinux/go-selinux/label" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" ) @@ -68,3 +69,23 @@ func (r *ConmonOCIRuntime) createRootlessContainer(ctr *Container, restoreOption res := <-ch return res.restoreDuration, res.err } + +// Run the closure with the container's socket label set +func (r *ConmonOCIRuntime) withContainerSocketLabel(ctr *Container, closure func() error) error { + runtime.LockOSThread() + if err := label.SetSocketLabel(ctr.ProcessLabel()); err != nil { + return err + } + err := closure() + // Ignore error returned from SetSocketLabel("") call, + // can't recover. + if labelErr := label.SetSocketLabel(""); labelErr == nil { + // Unlock the thread only if the process label could be restored + // successfully. Otherwise leave the thread locked and the Go runtime + // will terminate it once it returns to the threads pool. + runtime.UnlockOSThread() + } else { + logrus.Errorf("Unable to reset socket label: %q", labelErr) + } + return err +} -- cgit v1.2.3-54-g00ecf From d43fac20f3025096cdfe45ae32f41886b39e4659 Mon Sep 17 00:00:00 2001 From: Doug Rabson Date: Wed, 17 Aug 2022 11:15:37 +0100 Subject: libpod: Move moveConmonToCgroupAndSignal and GetLimits to oci_conmon_linux.go [NO NEW TESTS NEEDED] Signed-off-by: Doug Rabson --- libpod/oci_conmon_common.go | 261 ------------------------------------------- libpod/oci_conmon_linux.go | 267 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 267 insertions(+), 261 deletions(-) (limited to 'libpod') diff --git a/libpod/oci_conmon_common.go b/libpod/oci_conmon_common.go index aee0c36c8..222fec9ca 100644 --- a/libpod/oci_conmon_common.go +++ b/libpod/oci_conmon_common.go @@ -23,10 +23,6 @@ import ( "text/template" "time" - runcconfig "github.com/opencontainers/runc/libcontainer/configs" - "github.com/opencontainers/runc/libcontainer/devices" - - "github.com/containers/common/pkg/cgroups" "github.com/containers/common/pkg/config" "github.com/containers/common/pkg/resize" cutil "github.com/containers/common/pkg/util" @@ -1338,75 +1334,6 @@ func startCommand(cmd *exec.Cmd, ctr *Container) error { return cmd.Start() } -// moveConmonToCgroupAndSignal gets a container's cgroupParent and moves the conmon process to that cgroup -// it then signals for conmon to start by sending nonce data down the start fd -func (r *ConmonOCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec.Cmd, startFd *os.File) error { - mustCreateCgroup := true - - if ctr.config.NoCgroups { - mustCreateCgroup = false - } - - // If cgroup creation is disabled - just signal. - switch ctr.config.CgroupsMode { - case "disabled", "no-conmon", cgroupSplit: - mustCreateCgroup = false - } - - // $INVOCATION_ID is set by systemd when running as a service. - if ctr.runtime.RemoteURI() == "" && os.Getenv("INVOCATION_ID") != "" { - mustCreateCgroup = false - } - - if mustCreateCgroup { - // Usually rootless users are not allowed to configure cgroupfs. - // There are cases though, where it is allowed, e.g. if the cgroup - // is manually configured and chowned). Avoid detecting all - // such cases and simply use a lower log level. - logLevel := logrus.WarnLevel - if rootless.IsRootless() { - logLevel = logrus.InfoLevel - } - // TODO: This should be a switch - we are not guaranteed that - // there are only 2 valid cgroup managers - cgroupParent := ctr.CgroupParent() - cgroupPath := filepath.Join(ctr.config.CgroupParent, "conmon") - Resource := ctr.Spec().Linux.Resources - cgroupResources, err := GetLimits(Resource) - if err != nil { - logrus.StandardLogger().Log(logLevel, "Could not get ctr resources") - } - if ctr.CgroupManager() == config.SystemdCgroupsManager { - unitName := createUnitName("libpod-conmon", ctr.ID()) - realCgroupParent := cgroupParent - splitParent := strings.Split(cgroupParent, "/") - if strings.HasSuffix(cgroupParent, ".slice") && len(splitParent) > 1 { - realCgroupParent = splitParent[len(splitParent)-1] - } - - logrus.Infof("Running conmon under slice %s and unitName %s", realCgroupParent, unitName) - if err := utils.RunUnderSystemdScope(cmd.Process.Pid, realCgroupParent, unitName); err != nil { - logrus.StandardLogger().Logf(logLevel, "Failed to add conmon to systemd sandbox cgroup: %v", err) - } - } else { - control, err := cgroups.New(cgroupPath, &cgroupResources) - if err != nil { - logrus.StandardLogger().Logf(logLevel, "Failed to add conmon to cgroupfs sandbox cgroup: %v", err) - } else if err := control.AddPid(cmd.Process.Pid); err != nil { - // we need to remove this defer and delete the cgroup once conmon exits - // maybe need a conmon monitor? - logrus.StandardLogger().Logf(logLevel, "Failed to add conmon to cgroupfs sandbox cgroup: %v", err) - } - } - } - - /* We set the cgroup, now the child can start creating children */ - if err := writeConmonPipeData(startFd); err != nil { - return err - } - return nil -} - // newPipe creates a unix socket pair for communication. // Returns two files - first is parent, second is child. func newPipe() (*os.File, *os.File, error) { @@ -1671,191 +1598,3 @@ func httpAttachNonTerminalCopy(container *net.UnixConn, http *bufio.ReadWriter, } } } - -// GetLimits converts spec resource limits to cgroup consumable limits -func GetLimits(resource *spec.LinuxResources) (runcconfig.Resources, error) { - if resource == nil { - resource = &spec.LinuxResources{} - } - final := &runcconfig.Resources{} - devs := []*devices.Rule{} - - // Devices - for _, entry := range resource.Devices { - if entry.Major == nil || entry.Minor == nil { - continue - } - runeType := 'a' - switch entry.Type { - case "b": - runeType = 'b' - case "c": - runeType = 'c' - } - - devs = append(devs, &devices.Rule{ - Type: devices.Type(runeType), - Major: *entry.Major, - Minor: *entry.Minor, - Permissions: devices.Permissions(entry.Access), - Allow: entry.Allow, - }) - } - final.Devices = devs - - // HugepageLimits - pageLimits := []*runcconfig.HugepageLimit{} - for _, entry := range resource.HugepageLimits { - pageLimits = append(pageLimits, &runcconfig.HugepageLimit{ - Pagesize: entry.Pagesize, - Limit: entry.Limit, - }) - } - final.HugetlbLimit = pageLimits - - // Networking - netPriorities := []*runcconfig.IfPrioMap{} - if resource.Network != nil { - for _, entry := range resource.Network.Priorities { - netPriorities = append(netPriorities, &runcconfig.IfPrioMap{ - Interface: entry.Name, - Priority: int64(entry.Priority), - }) - } - } - final.NetPrioIfpriomap = netPriorities - rdma := make(map[string]runcconfig.LinuxRdma) - for name, entry := range resource.Rdma { - rdma[name] = runcconfig.LinuxRdma{HcaHandles: entry.HcaHandles, HcaObjects: entry.HcaObjects} - } - final.Rdma = rdma - - // Memory - if resource.Memory != nil { - if resource.Memory.Limit != nil { - final.Memory = *resource.Memory.Limit - } - if resource.Memory.Reservation != nil { - final.MemoryReservation = *resource.Memory.Reservation - } - if resource.Memory.Swap != nil { - final.MemorySwap = *resource.Memory.Swap - } - if resource.Memory.Swappiness != nil { - final.MemorySwappiness = resource.Memory.Swappiness - } - } - - // CPU - if resource.CPU != nil { - if resource.CPU.Period != nil { - final.CpuPeriod = *resource.CPU.Period - } - if resource.CPU.Quota != nil { - final.CpuQuota = *resource.CPU.Quota - } - if resource.CPU.RealtimePeriod != nil { - final.CpuRtPeriod = *resource.CPU.RealtimePeriod - } - if resource.CPU.RealtimeRuntime != nil { - final.CpuRtRuntime = *resource.CPU.RealtimeRuntime - } - if resource.CPU.Shares != nil { - final.CpuShares = *resource.CPU.Shares - } - final.CpusetCpus = resource.CPU.Cpus - final.CpusetMems = resource.CPU.Mems - } - - // BlkIO - if resource.BlockIO != nil { - if len(resource.BlockIO.ThrottleReadBpsDevice) > 0 { - for _, entry := range resource.BlockIO.ThrottleReadBpsDevice { - throttle := &runcconfig.ThrottleDevice{} - dev := &runcconfig.BlockIODevice{ - Major: entry.Major, - Minor: entry.Minor, - } - throttle.BlockIODevice = *dev - throttle.Rate = entry.Rate - final.BlkioThrottleReadBpsDevice = append(final.BlkioThrottleReadBpsDevice, throttle) - } - } - if len(resource.BlockIO.ThrottleWriteBpsDevice) > 0 { - for _, entry := range resource.BlockIO.ThrottleWriteBpsDevice { - throttle := &runcconfig.ThrottleDevice{} - dev := &runcconfig.BlockIODevice{ - Major: entry.Major, - Minor: entry.Minor, - } - throttle.BlockIODevice = *dev - throttle.Rate = entry.Rate - final.BlkioThrottleWriteBpsDevice = append(final.BlkioThrottleWriteBpsDevice, throttle) - } - } - if len(resource.BlockIO.ThrottleReadIOPSDevice) > 0 { - for _, entry := range resource.BlockIO.ThrottleReadIOPSDevice { - throttle := &runcconfig.ThrottleDevice{} - dev := &runcconfig.BlockIODevice{ - Major: entry.Major, - Minor: entry.Minor, - } - throttle.BlockIODevice = *dev - throttle.Rate = entry.Rate - final.BlkioThrottleReadIOPSDevice = append(final.BlkioThrottleReadIOPSDevice, throttle) - } - } - if len(resource.BlockIO.ThrottleWriteIOPSDevice) > 0 { - for _, entry := range resource.BlockIO.ThrottleWriteIOPSDevice { - throttle := &runcconfig.ThrottleDevice{} - dev := &runcconfig.BlockIODevice{ - Major: entry.Major, - Minor: entry.Minor, - } - throttle.BlockIODevice = *dev - throttle.Rate = entry.Rate - final.BlkioThrottleWriteIOPSDevice = append(final.BlkioThrottleWriteIOPSDevice, throttle) - } - } - if resource.BlockIO.LeafWeight != nil { - final.BlkioLeafWeight = *resource.BlockIO.LeafWeight - } - if resource.BlockIO.Weight != nil { - final.BlkioWeight = *resource.BlockIO.Weight - } - if len(resource.BlockIO.WeightDevice) > 0 { - for _, entry := range resource.BlockIO.WeightDevice { - weight := &runcconfig.WeightDevice{} - dev := &runcconfig.BlockIODevice{ - Major: entry.Major, - Minor: entry.Minor, - } - if entry.Weight != nil { - weight.Weight = *entry.Weight - } - if entry.LeafWeight != nil { - weight.LeafWeight = *entry.LeafWeight - } - weight.BlockIODevice = *dev - final.BlkioWeightDevice = append(final.BlkioWeightDevice, weight) - } - } - } - - // Pids - if resource.Pids != nil { - final.PidsLimit = resource.Pids.Limit - } - - // Networking - if resource.Network != nil { - if resource.Network.ClassID != nil { - final.NetClsClassid = *resource.Network.ClassID - } - } - - // Unified state - final.Unified = resource.Unified - - return *final, nil -} diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go index ce6eaf32a..0964d4ea3 100644 --- a/libpod/oci_conmon_linux.go +++ b/libpod/oci_conmon_linux.go @@ -3,11 +3,21 @@ package libpod import ( "fmt" "os" + "os/exec" + "path/filepath" "runtime" "strings" + runcconfig "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/devices" + + "github.com/containers/common/pkg/cgroups" + "github.com/containers/common/pkg/config" "github.com/containers/podman/v4/pkg/errorhandling" + "github.com/containers/podman/v4/pkg/rootless" + "github.com/containers/podman/v4/utils" pmount "github.com/containers/storage/pkg/mount" + spec "github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/selinux/go-selinux/label" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" @@ -89,3 +99,260 @@ func (r *ConmonOCIRuntime) withContainerSocketLabel(ctr *Container, closure func } return err } + +// moveConmonToCgroupAndSignal gets a container's cgroupParent and moves the conmon process to that cgroup +// it then signals for conmon to start by sending nonce data down the start fd +func (r *ConmonOCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec.Cmd, startFd *os.File) error { + mustCreateCgroup := true + + if ctr.config.NoCgroups { + mustCreateCgroup = false + } + + // If cgroup creation is disabled - just signal. + switch ctr.config.CgroupsMode { + case "disabled", "no-conmon", cgroupSplit: + mustCreateCgroup = false + } + + // $INVOCATION_ID is set by systemd when running as a service. + if ctr.runtime.RemoteURI() == "" && os.Getenv("INVOCATION_ID") != "" { + mustCreateCgroup = false + } + + if mustCreateCgroup { + // Usually rootless users are not allowed to configure cgroupfs. + // There are cases though, where it is allowed, e.g. if the cgroup + // is manually configured and chowned). Avoid detecting all + // such cases and simply use a lower log level. + logLevel := logrus.WarnLevel + if rootless.IsRootless() { + logLevel = logrus.InfoLevel + } + // TODO: This should be a switch - we are not guaranteed that + // there are only 2 valid cgroup managers + cgroupParent := ctr.CgroupParent() + cgroupPath := filepath.Join(ctr.config.CgroupParent, "conmon") + Resource := ctr.Spec().Linux.Resources + cgroupResources, err := GetLimits(Resource) + if err != nil { + logrus.StandardLogger().Log(logLevel, "Could not get ctr resources") + } + if ctr.CgroupManager() == config.SystemdCgroupsManager { + unitName := createUnitName("libpod-conmon", ctr.ID()) + realCgroupParent := cgroupParent + splitParent := strings.Split(cgroupParent, "/") + if strings.HasSuffix(cgroupParent, ".slice") && len(splitParent) > 1 { + realCgroupParent = splitParent[len(splitParent)-1] + } + + logrus.Infof("Running conmon under slice %s and unitName %s", realCgroupParent, unitName) + if err := utils.RunUnderSystemdScope(cmd.Process.Pid, realCgroupParent, unitName); err != nil { + logrus.StandardLogger().Logf(logLevel, "Failed to add conmon to systemd sandbox cgroup: %v", err) + } + } else { + control, err := cgroups.New(cgroupPath, &cgroupResources) + if err != nil { + logrus.StandardLogger().Logf(logLevel, "Failed to add conmon to cgroupfs sandbox cgroup: %v", err) + } else if err := control.AddPid(cmd.Process.Pid); err != nil { + // we need to remove this defer and delete the cgroup once conmon exits + // maybe need a conmon monitor? + logrus.StandardLogger().Logf(logLevel, "Failed to add conmon to cgroupfs sandbox cgroup: %v", err) + } + } + } + + /* We set the cgroup, now the child can start creating children */ + if err := writeConmonPipeData(startFd); err != nil { + return err + } + return nil +} + +// GetLimits converts spec resource limits to cgroup consumable limits +func GetLimits(resource *spec.LinuxResources) (runcconfig.Resources, error) { + if resource == nil { + resource = &spec.LinuxResources{} + } + final := &runcconfig.Resources{} + devs := []*devices.Rule{} + + // Devices + for _, entry := range resource.Devices { + if entry.Major == nil || entry.Minor == nil { + continue + } + runeType := 'a' + switch entry.Type { + case "b": + runeType = 'b' + case "c": + runeType = 'c' + } + + devs = append(devs, &devices.Rule{ + Type: devices.Type(runeType), + Major: *entry.Major, + Minor: *entry.Minor, + Permissions: devices.Permissions(entry.Access), + Allow: entry.Allow, + }) + } + final.Devices = devs + + // HugepageLimits + pageLimits := []*runcconfig.HugepageLimit{} + for _, entry := range resource.HugepageLimits { + pageLimits = append(pageLimits, &runcconfig.HugepageLimit{ + Pagesize: entry.Pagesize, + Limit: entry.Limit, + }) + } + final.HugetlbLimit = pageLimits + + // Networking + netPriorities := []*runcconfig.IfPrioMap{} + if resource.Network != nil { + for _, entry := range resource.Network.Priorities { + netPriorities = append(netPriorities, &runcconfig.IfPrioMap{ + Interface: entry.Name, + Priority: int64(entry.Priority), + }) + } + } + final.NetPrioIfpriomap = netPriorities + rdma := make(map[string]runcconfig.LinuxRdma) + for name, entry := range resource.Rdma { + rdma[name] = runcconfig.LinuxRdma{HcaHandles: entry.HcaHandles, HcaObjects: entry.HcaObjects} + } + final.Rdma = rdma + + // Memory + if resource.Memory != nil { + if resource.Memory.Limit != nil { + final.Memory = *resource.Memory.Limit + } + if resource.Memory.Reservation != nil { + final.MemoryReservation = *resource.Memory.Reservation + } + if resource.Memory.Swap != nil { + final.MemorySwap = *resource.Memory.Swap + } + if resource.Memory.Swappiness != nil { + final.MemorySwappiness = resource.Memory.Swappiness + } + } + + // CPU + if resource.CPU != nil { + if resource.CPU.Period != nil { + final.CpuPeriod = *resource.CPU.Period + } + if resource.CPU.Quota != nil { + final.CpuQuota = *resource.CPU.Quota + } + if resource.CPU.RealtimePeriod != nil { + final.CpuRtPeriod = *resource.CPU.RealtimePeriod + } + if resource.CPU.RealtimeRuntime != nil { + final.CpuRtRuntime = *resource.CPU.RealtimeRuntime + } + if resource.CPU.Shares != nil { + final.CpuShares = *resource.CPU.Shares + } + final.CpusetCpus = resource.CPU.Cpus + final.CpusetMems = resource.CPU.Mems + } + + // BlkIO + if resource.BlockIO != nil { + if len(resource.BlockIO.ThrottleReadBpsDevice) > 0 { + for _, entry := range resource.BlockIO.ThrottleReadBpsDevice { + throttle := &runcconfig.ThrottleDevice{} + dev := &runcconfig.BlockIODevice{ + Major: entry.Major, + Minor: entry.Minor, + } + throttle.BlockIODevice = *dev + throttle.Rate = entry.Rate + final.BlkioThrottleReadBpsDevice = append(final.BlkioThrottleReadBpsDevice, throttle) + } + } + if len(resource.BlockIO.ThrottleWriteBpsDevice) > 0 { + for _, entry := range resource.BlockIO.ThrottleWriteBpsDevice { + throttle := &runcconfig.ThrottleDevice{} + dev := &runcconfig.BlockIODevice{ + Major: entry.Major, + Minor: entry.Minor, + } + throttle.BlockIODevice = *dev + throttle.Rate = entry.Rate + final.BlkioThrottleWriteBpsDevice = append(final.BlkioThrottleWriteBpsDevice, throttle) + } + } + if len(resource.BlockIO.ThrottleReadIOPSDevice) > 0 { + for _, entry := range resource.BlockIO.ThrottleReadIOPSDevice { + throttle := &runcconfig.ThrottleDevice{} + dev := &runcconfig.BlockIODevice{ + Major: entry.Major, + Minor: entry.Minor, + } + throttle.BlockIODevice = *dev + throttle.Rate = entry.Rate + final.BlkioThrottleReadIOPSDevice = append(final.BlkioThrottleReadIOPSDevice, throttle) + } + } + if len(resource.BlockIO.ThrottleWriteIOPSDevice) > 0 { + for _, entry := range resource.BlockIO.ThrottleWriteIOPSDevice { + throttle := &runcconfig.ThrottleDevice{} + dev := &runcconfig.BlockIODevice{ + Major: entry.Major, + Minor: entry.Minor, + } + throttle.BlockIODevice = *dev + throttle.Rate = entry.Rate + final.BlkioThrottleWriteIOPSDevice = append(final.BlkioThrottleWriteIOPSDevice, throttle) + } + } + if resource.BlockIO.LeafWeight != nil { + final.BlkioLeafWeight = *resource.BlockIO.LeafWeight + } + if resource.BlockIO.Weight != nil { + final.BlkioWeight = *resource.BlockIO.Weight + } + if len(resource.BlockIO.WeightDevice) > 0 { + for _, entry := range resource.BlockIO.WeightDevice { + weight := &runcconfig.WeightDevice{} + dev := &runcconfig.BlockIODevice{ + Major: entry.Major, + Minor: entry.Minor, + } + if entry.Weight != nil { + weight.Weight = *entry.Weight + } + if entry.LeafWeight != nil { + weight.LeafWeight = *entry.LeafWeight + } + weight.BlockIODevice = *dev + final.BlkioWeightDevice = append(final.BlkioWeightDevice, weight) + } + } + } + + // Pids + if resource.Pids != nil { + final.PidsLimit = resource.Pids.Limit + } + + // Networking + if resource.Network != nil { + if resource.Network.ClassID != nil { + final.NetClsClassid = *resource.Network.ClassID + } + } + + // Unified state + final.Unified = resource.Unified + + return *final, nil +} -- cgit v1.2.3-54-g00ecf From cb4158889e7a115b4d8bb77c76cc99032d5e8363 Mon Sep 17 00:00:00 2001 From: Doug Rabson Date: Wed, 17 Aug 2022 11:18:15 +0100 Subject: libpod: Move openUnixSocket to oci_conmon_attach_linux.go This function depends on linux-specific functionality in /proc/fd to allow connecting to local domain sockets with pathnames too long for sockaddr_un. [NO NEW TESTS NEEDED] Signed-off-by: Doug Rabson --- libpod/oci_conmon_attach_common.go | 9 --------- libpod/oci_conmon_attach_linux.go | 17 +++++++++++++++++ 2 files changed, 17 insertions(+), 9 deletions(-) create mode 100644 libpod/oci_conmon_attach_linux.go (limited to 'libpod') diff --git a/libpod/oci_conmon_attach_common.go b/libpod/oci_conmon_attach_common.go index aa55aa6f5..adc374503 100644 --- a/libpod/oci_conmon_attach_common.go +++ b/libpod/oci_conmon_attach_common.go @@ -29,15 +29,6 @@ const ( AttachPipeStderr = 3 ) -func openUnixSocket(path string) (*net.UnixConn, error) { - fd, err := unix.Open(path, unix.O_PATH, 0) - if err != nil { - return nil, err - } - defer unix.Close(fd) - return net.DialUnix("unixpacket", nil, &net.UnixAddr{Name: fmt.Sprintf("/proc/self/fd/%d", fd), Net: "unixpacket"}) -} - // Attach to the given container. // Does not check if state is appropriate. // started is only required if startContainer is true. diff --git a/libpod/oci_conmon_attach_linux.go b/libpod/oci_conmon_attach_linux.go new file mode 100644 index 000000000..f1aa89d3e --- /dev/null +++ b/libpod/oci_conmon_attach_linux.go @@ -0,0 +1,17 @@ +package libpod + +import ( + "fmt" + "net" + + "golang.org/x/sys/unix" +) + +func openUnixSocket(path string) (*net.UnixConn, error) { + fd, err := unix.Open(path, unix.O_PATH, 0) + if err != nil { + return nil, err + } + defer unix.Close(fd) + return net.DialUnix("unixpacket", nil, &net.UnixAddr{Name: fmt.Sprintf("/proc/self/fd/%d", fd), Net: "unixpacket"}) +} -- cgit v1.2.3-54-g00ecf From 054d64710736250c4d238e159884c1588eb7218a Mon Sep 17 00:00:00 2001 From: Doug Rabson Date: Wed, 17 Aug 2022 09:43:43 +0100 Subject: libpod: Build oci_conmon_common.go and oci_conmon_attach_common on FreeBSD This also adds FreeBSD equivalents to the functions moved to oci_conmon*_linux.go. For openUnixSocket, we create a temporary symlink to shorten the path to something that fits into sockaddr_un. [NO NEW TESTS NEEDED] Signed-off-by: Doug Rabson --- libpod/networking_unsupported.go | 7 +++++++ libpod/oci_conmon_attach_common.go | 4 ++-- libpod/oci_conmon_attach_freebsd.go | 21 +++++++++++++++++++++ libpod/oci_conmon_common.go | 4 ++-- libpod/oci_conmon_freebsd.go | 24 ++++++++++++++++++++++++ libpod/oci_conmon_unsupported.go | 4 ++-- 6 files changed, 58 insertions(+), 6 deletions(-) create mode 100644 libpod/oci_conmon_attach_freebsd.go create mode 100644 libpod/oci_conmon_freebsd.go (limited to 'libpod') diff --git a/libpod/networking_unsupported.go b/libpod/networking_unsupported.go index 227b512cd..76ffabb5e 100644 --- a/libpod/networking_unsupported.go +++ b/libpod/networking_unsupported.go @@ -77,3 +77,10 @@ func (r *RootlessNetNS) Cleanup(runtime *Runtime) error { func (r *Runtime) GetRootlessNetNs(new bool) (*RootlessNetNS, error) { return nil, errors.New("not implemented (*Runtime) GetRootlessNetNs") } + +// convertPortMappings will remove the HostIP part from the ports when running inside podman machine. +// This is need because a HostIP of 127.0.0.1 would now allow the gvproxy forwarder to reach to open ports. +// For machine the HostIP must only be used by gvproxy and never in the VM. +func (c *Container) convertPortMappings() []types.PortMapping { + return []types.PortMapping{} +} diff --git a/libpod/oci_conmon_attach_common.go b/libpod/oci_conmon_attach_common.go index adc374503..a9e9b2bb5 100644 --- a/libpod/oci_conmon_attach_common.go +++ b/libpod/oci_conmon_attach_common.go @@ -1,5 +1,5 @@ -//go:build linux -// +build linux +//go:build linux || freebsd +// +build linux freebsd package libpod diff --git a/libpod/oci_conmon_attach_freebsd.go b/libpod/oci_conmon_attach_freebsd.go new file mode 100644 index 000000000..de0054381 --- /dev/null +++ b/libpod/oci_conmon_attach_freebsd.go @@ -0,0 +1,21 @@ +package libpod + +import ( + "net" + "os" + "path/filepath" +) + +func openUnixSocket(path string) (*net.UnixConn, error) { + // socket paths can be too long to fit into a sockaddr_un so we create a shorter symlink. + tmpdir, err := os.MkdirTemp("", "podman") + if err != nil { + return nil, err + } + defer os.RemoveAll(tmpdir) + tmpsockpath := filepath.Join(tmpdir, "sock") + if err := os.Symlink(path, tmpsockpath); err != nil { + return nil, err + } + return net.DialUnix("unixpacket", nil, &net.UnixAddr{Name: tmpsockpath, Net: "unixpacket"}) +} diff --git a/libpod/oci_conmon_common.go b/libpod/oci_conmon_common.go index 222fec9ca..c3725cdb4 100644 --- a/libpod/oci_conmon_common.go +++ b/libpod/oci_conmon_common.go @@ -1,5 +1,5 @@ -//go:build linux -// +build linux +//go:build linux || freebsd +// +build linux freebsd package libpod diff --git a/libpod/oci_conmon_freebsd.go b/libpod/oci_conmon_freebsd.go new file mode 100644 index 000000000..6f7ac7fc6 --- /dev/null +++ b/libpod/oci_conmon_freebsd.go @@ -0,0 +1,24 @@ +package libpod + +import ( + "errors" + "os" + "os/exec" +) + +func (r *ConmonOCIRuntime) createRootlessContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) { + return -1, errors.New("unsupported (*ConmonOCIRuntime) createRootlessContainer") +} + +// Run the closure with the container's socket label set +func (r *ConmonOCIRuntime) withContainerSocketLabel(ctr *Container, closure func() error) error { + // No label support yet + return closure() +} + +// moveConmonToCgroupAndSignal gets a container's cgroupParent and moves the conmon process to that cgroup +// it then signals for conmon to start by sending nonce data down the start fd +func (r *ConmonOCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec.Cmd, startFd *os.File) error { + // No equivalent on FreeBSD + return nil +} diff --git a/libpod/oci_conmon_unsupported.go b/libpod/oci_conmon_unsupported.go index c72dc0f0d..cc6d68e89 100644 --- a/libpod/oci_conmon_unsupported.go +++ b/libpod/oci_conmon_unsupported.go @@ -1,5 +1,5 @@ -//go:build !linux -// +build !linux +//go:build !linux && !freebsd +// +build !linux,!freebsd package libpod -- cgit v1.2.3-54-g00ecf From 8ffeb626c918b9b9329a8bda5e5579169e323bdf Mon Sep 17 00:00:00 2001 From: Doug Rabson Date: Thu, 11 Aug 2022 11:19:40 +0100 Subject: events: Add freebsd support for libpod/event [NO NEW TESTS NEEDED] Signed-off-by: Doug Rabson --- libpod/events/events_freebsd.go | 23 +++++++++++++++++++++++ libpod/events/events_unsupported.go | 4 ++-- libpod/events/logfile.go | 4 ++-- 3 files changed, 27 insertions(+), 4 deletions(-) create mode 100644 libpod/events/events_freebsd.go (limited to 'libpod') diff --git a/libpod/events/events_freebsd.go b/libpod/events/events_freebsd.go new file mode 100644 index 000000000..17d410089 --- /dev/null +++ b/libpod/events/events_freebsd.go @@ -0,0 +1,23 @@ +package events + +import ( + "fmt" + "strings" + + "github.com/sirupsen/logrus" +) + +// NewEventer creates an eventer based on the eventer type +func NewEventer(options EventerOptions) (Eventer, error) { + logrus.Debugf("Initializing event backend %s", options.EventerType) + switch strings.ToUpper(options.EventerType) { + case strings.ToUpper(LogFile.String()): + return EventLogFile{options}, nil + case strings.ToUpper(Null.String()): + return NewNullEventer(), nil + case strings.ToUpper(Memory.String()): + return NewMemoryEventer(), nil + default: + return nil, fmt.Errorf("unknown event logger type: %s", strings.ToUpper(options.EventerType)) + } +} diff --git a/libpod/events/events_unsupported.go b/libpod/events/events_unsupported.go index d766402a9..01031c225 100644 --- a/libpod/events/events_unsupported.go +++ b/libpod/events/events_unsupported.go @@ -1,5 +1,5 @@ -//go:build !linux -// +build !linux +//go:build !linux && !freebsd +// +build !linux,!freebsd package events diff --git a/libpod/events/logfile.go b/libpod/events/logfile.go index c7dbf4850..519e16629 100644 --- a/libpod/events/logfile.go +++ b/libpod/events/logfile.go @@ -1,5 +1,5 @@ -//go:build linux -// +build linux +//go:build linux || freebsd +// +build linux freebsd package events -- cgit v1.2.3-54-g00ecf From 8f797ab66166ec99d0eff0bb0aed997bb1876c5d Mon Sep 17 00:00:00 2001 From: Doug Rabson Date: Fri, 19 Aug 2022 11:30:39 +0100 Subject: libpod: Add definition of containerPlatformState for FreeBSD For FreeBSD, we need the name of the 'network jail' which is the parent of all containers in a pod. Having a separate jail for the network configuration also simplifies the implementation of CNI plugins so we use this pattern for solitary containers as well as pods. [NO NEW TESTS NEEDED] Signed-off-by: Doug Rabson --- libpod/boltdb_state_freebsd.go | 17 +++++++++++++++++ libpod/boltdb_state_unsupported.go | 4 ++-- libpod/container_freebsd.go | 12 ++++++++++++ libpod/container_unsupported.go | 4 ++-- 4 files changed, 33 insertions(+), 4 deletions(-) create mode 100644 libpod/boltdb_state_freebsd.go create mode 100644 libpod/container_freebsd.go (limited to 'libpod') diff --git a/libpod/boltdb_state_freebsd.go b/libpod/boltdb_state_freebsd.go new file mode 100644 index 000000000..d7f2736fc --- /dev/null +++ b/libpod/boltdb_state_freebsd.go @@ -0,0 +1,17 @@ +//go:build freebsd +// +build freebsd + +package libpod + +// replaceNetNS handle network namespace transitions after updating a +// container's state. +func replaceNetNS(netNSPath string, ctr *Container, newState *ContainerState) error { + // On FreeBSD, we just record the network jail's name in our state. + newState.NetworkJail = netNSPath + return nil +} + +// getNetNSPath retrieves the netns path to be stored in the database +func getNetNSPath(ctr *Container) string { + return ctr.state.NetworkJail +} diff --git a/libpod/boltdb_state_unsupported.go b/libpod/boltdb_state_unsupported.go index 97d59614e..9db1e3c4b 100644 --- a/libpod/boltdb_state_unsupported.go +++ b/libpod/boltdb_state_unsupported.go @@ -1,5 +1,5 @@ -//go:build !linux -// +build !linux +//go:build !linux && !freebsd +// +build !linux,!freebsd package libpod diff --git a/libpod/container_freebsd.go b/libpod/container_freebsd.go new file mode 100644 index 000000000..f9fbc4daa --- /dev/null +++ b/libpod/container_freebsd.go @@ -0,0 +1,12 @@ +//go:build freebsd +// +build freebsd + +package libpod + +type containerPlatformState struct { + // NetworkJail is the name of the container's network VNET + // jail. Will only be set if config.CreateNetNS is true, or + // the container was told to join another container's network + // namespace. + NetworkJail string `json:"-"` +} diff --git a/libpod/container_unsupported.go b/libpod/container_unsupported.go index 5a46c163c..16bf11622 100644 --- a/libpod/container_unsupported.go +++ b/libpod/container_unsupported.go @@ -1,5 +1,5 @@ -//go:build !linux -// +build !linux +//go:build !linux && !freebsd +// +build !linux,!freebsd package libpod -- cgit v1.2.3-54-g00ecf From 0f739355635d5bc4d538cf88009d7af533e7c289 Mon Sep 17 00:00:00 2001 From: Matthew Heon Date: Tue, 23 Aug 2022 14:46:43 -0400 Subject: Add support for containers.conf volume timeouts Also, do a general cleanup of all the timeout code. Changes include: - Convert from int to *uint where possible. Timeouts cannot be negative, hence the uint change; and a timeout of 0 is valid, so we need a new way to detect that the user set a timeout (hence, pointer). - Change name in the database to avoid conflicts between new data type and old one. This will cause timeouts set with 4.2.0 to be lost, but considering nobody is using the feature at present (and the lack of validation means we could have invalid, negative timeouts in the DB) this feels safe. - Ensure volume plugin timeouts can only be used with volumes created using a plugin. Timeouts on the local driver are nonsensical. - Remove the existing test, as it did not use a volume plugin. Write a new test that does. The actual plumbing of the containers.conf timeout in is one line in volume_api.go; the remainder are the above-described cleanups. Signed-off-by: Matthew Heon --- go.mod | 2 +- go.sum | 10 ++-- libpod/define/volume_inspect.go | 2 +- libpod/options.go | 14 ++++- libpod/plugin/volume_api.go | 19 +++--- libpod/runtime.go | 2 +- libpod/runtime_volume_linux.go | 2 +- libpod/volume.go | 2 +- libpod/volume_inspect.go | 7 ++- pkg/domain/infra/abi/parse/parse.go | 5 +- test/e2e/config/containers.conf | 2 + test/e2e/volume_create_test.go | 15 ----- test/e2e/volume_plugin_test.go | 34 +++++++++++ test/testvol/util.go | 2 +- .../Microsoft/hcsshim/internal/jobobject/iocp.go | 2 +- .../hcsshim/internal/jobobject/jobobject.go | 55 +++++++++++++++--- .../Microsoft/hcsshim/internal/jobobject/limits.go | 4 +- .../Microsoft/hcsshim/internal/queue/mq.go | 61 +++++++------------- .../Microsoft/hcsshim/internal/winapi/jobobject.go | 2 +- .../Microsoft/hcsshim/internal/winapi/process.go | 2 +- .../Microsoft/hcsshim/internal/winapi/system.go | 3 +- .../hcsshim/internal/winapi/zsyscall_windows.go | 6 +- .../containers/common/libimage/inspect.go | 2 +- .../github.com/containers/common/libimage/load.go | 2 +- .../containers/common/libnetwork/cni/network.go | 14 ++++- .../common/libnetwork/network/interface.go | 1 + .../containers/common/pkg/config/config.go | 67 ++++++++++++++++++++++ .../containers/common/pkg/config/config_darwin.go | 2 + .../containers/common/pkg/config/containers.conf | 17 +++++- .../containers/common/pkg/config/default.go | 6 +- .../containers/common/pkg/config/default_darwin.go | 5 ++ .../common/pkg/config/default_freebsd.go | 5 ++ .../containers/common/pkg/config/default_linux.go | 5 ++ .../common/pkg/config/default_windows.go | 5 ++ .../common/pkg/subscriptions/subscriptions.go | 2 +- vendor/modules.txt | 6 +- 36 files changed, 284 insertions(+), 108 deletions(-) (limited to 'libpod') diff --git a/go.mod b/go.mod index 635c0a17d..c0a23d962 100644 --- a/go.mod +++ b/go.mod @@ -12,7 +12,7 @@ require ( github.com/containernetworking/cni v1.1.2 github.com/containernetworking/plugins v1.1.1 github.com/containers/buildah v1.27.0 - github.com/containers/common v0.49.2-0.20220817132854-f6679f170eca + github.com/containers/common v0.49.2-0.20220823130605-72a7da3358ac github.com/containers/conmon v2.0.20+incompatible github.com/containers/image/v5 v5.22.0 github.com/containers/ocicrypt v1.1.5 diff --git a/go.sum b/go.sum index 5053589c5..a476729ee 100644 --- a/go.sum +++ b/go.sum @@ -140,8 +140,9 @@ github.com/Microsoft/hcsshim v0.8.21/go.mod h1:+w2gRZ5ReXQhFOrvSQeNfhrYB/dg3oDwT github.com/Microsoft/hcsshim v0.8.22/go.mod h1:91uVCVzvX2QD16sMCenoxxXo6L1wJnLMX2PSufFMtF0= github.com/Microsoft/hcsshim v0.8.23/go.mod h1:4zegtUJth7lAvFyc6cH2gGQ5B3OFQim01nnU2M8jKDg= github.com/Microsoft/hcsshim v0.9.2/go.mod h1:7pLA8lDk46WKDWlVsENo92gC0XFa8rbKfyFRBqxEbCc= -github.com/Microsoft/hcsshim v0.9.3 h1:k371PzBuRrz2b+ebGuI2nVgVhgsVX60jMfSw80NECxo= github.com/Microsoft/hcsshim v0.9.3/go.mod h1:7pLA8lDk46WKDWlVsENo92gC0XFa8rbKfyFRBqxEbCc= +github.com/Microsoft/hcsshim v0.9.4 h1:mnUj0ivWy6UzbB1uLFqKR6F+ZyiDc7j4iGgHTpO+5+I= +github.com/Microsoft/hcsshim v0.9.4/go.mod h1:7pLA8lDk46WKDWlVsENo92gC0XFa8rbKfyFRBqxEbCc= github.com/Microsoft/hcsshim/test v0.0.0-20201218223536-d3e5debf77da/go.mod h1:5hlzMzRKMLyo42nCZ9oml8AdTlq/0cvIaBv6tK1RehU= github.com/Microsoft/hcsshim/test v0.0.0-20210227013316-43a75bb4edd3/go.mod h1:mw7qgWloBUl75W/gVH3cQszUg1+gUITj7D6NY7ywVnY= github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ= @@ -323,8 +324,9 @@ github.com/containerd/containerd v1.5.7/go.mod h1:gyvv6+ugqY25TiXxcZC3L5yOeYgEw0 github.com/containerd/containerd v1.5.8/go.mod h1:YdFSv5bTFLpG2HIYmfqDpSYYTDX+mc5qtSuYx1YUb/s= github.com/containerd/containerd v1.5.9/go.mod h1:fvQqCfadDGga5HZyn3j4+dx56qj2I9YwBrlSdalvJYQ= github.com/containerd/containerd v1.6.1/go.mod h1:1nJz5xCZPusx6jJU8Frfct988y0NpumIq9ODB0kLtoE= -github.com/containerd/containerd v1.6.6 h1:xJNPhbrmz8xAMDNoVjHy9YHtWwEQNS+CDkcIRh7t8Y0= github.com/containerd/containerd v1.6.6/go.mod h1:ZoP1geJldzCVY3Tonoz7b1IXk8rIX0Nltt5QE4OMNk0= +github.com/containerd/containerd v1.6.8 h1:h4dOFDwzHmqFEP754PgfgTeVXFnLiRc6kiqC7tplDJs= +github.com/containerd/containerd v1.6.8/go.mod h1:By6p5KqPK0/7/CgO/A6t/Gz+CUYUu2zf1hUaaymVXB0= github.com/containerd/continuity v0.0.0-20190426062206-aaeac12a7ffc/go.mod h1:GL3xCUCBDV3CZiTSEKksMWbLE66hEyuu9qyDOOqM47Y= github.com/containerd/continuity v0.0.0-20190815185530-f2a389ac0a02/go.mod h1:GL3xCUCBDV3CZiTSEKksMWbLE66hEyuu9qyDOOqM47Y= github.com/containerd/continuity v0.0.0-20191127005431-f65d91d395eb/go.mod h1:GL3xCUCBDV3CZiTSEKksMWbLE66hEyuu9qyDOOqM47Y= @@ -395,8 +397,8 @@ github.com/containernetworking/plugins v1.1.1/go.mod h1:Sr5TH/eBsGLXK/h71HeLfX19 github.com/containers/buildah v1.27.0 h1:LJ1ks7vKxwPzJGr5BWVvigbtVL9w7XeHtNEmiIOPJqI= github.com/containers/buildah v1.27.0/go.mod h1:anH3ExvDXRNP9zLQCrOc1vWb5CrhqLF/aYFim4tslvA= github.com/containers/common v0.49.1/go.mod h1:ueM5hT0itKqCQvVJDs+EtjornAQtrHYxQJzP2gxeGIg= -github.com/containers/common v0.49.2-0.20220817132854-f6679f170eca h1:OjhEBVpFskIJ6Vq9nikYW7M6YXfkTxOBu+EQBoCyhuM= -github.com/containers/common v0.49.2-0.20220817132854-f6679f170eca/go.mod h1:eT2iSsNzjOlF5VFLkyj9OU2SXznURvEYndsioQImuoE= +github.com/containers/common v0.49.2-0.20220823130605-72a7da3358ac h1:rLbTzosxPKrQd+EgMRxfC1WYm3azPiQfig+Lr7mCQ4k= +github.com/containers/common v0.49.2-0.20220823130605-72a7da3358ac/go.mod h1:xC4qkLfW9R+YSDknlT9xU+NDNxIw017U8AyohGtr9Ec= github.com/containers/conmon v2.0.20+incompatible h1:YbCVSFSCqFjjVwHTPINGdMX1F6JXHGTUje2ZYobNrkg= github.com/containers/conmon v2.0.20+incompatible/go.mod h1:hgwZ2mtuDrppv78a/cOBNiCm6O0UMWGx1mu7P00nu5I= github.com/containers/image/v5 v5.22.0 h1:KemxPmD4D2YYOFZN2SgoTk7nBFcnwPiPW0MqjYtknSE= diff --git a/libpod/define/volume_inspect.go b/libpod/define/volume_inspect.go index 9279812da..76120647c 100644 --- a/libpod/define/volume_inspect.go +++ b/libpod/define/volume_inspect.go @@ -57,7 +57,7 @@ type InspectVolumeData struct { // UID/GID. NeedsChown bool `json:"NeedsChown,omitempty"` // Timeout is the specified driver timeout if given - Timeout int `json:"Timeout,omitempty"` + Timeout uint `json:"Timeout,omitempty"` } type VolumeReload struct { diff --git a/libpod/options.go b/libpod/options.go index 43ed1ff78..d31741094 100644 --- a/libpod/options.go +++ b/libpod/options.go @@ -1695,14 +1695,22 @@ func withSetAnon() VolumeCreateOption { } } -// WithVolumeDriverTimeout sets the volume creation timeout period -func WithVolumeDriverTimeout(timeout int) VolumeCreateOption { +// WithVolumeDriverTimeout sets the volume creation timeout period. +// Only usable if a non-local volume driver is in use. +func WithVolumeDriverTimeout(timeout uint) VolumeCreateOption { return func(volume *Volume) error { if volume.valid { return define.ErrVolumeFinalized } - volume.config.Timeout = timeout + if volume.config.Driver == "" || volume.config.Driver == define.VolumeDriverLocal { + return fmt.Errorf("Volume driver timeout can only be used with non-local volume drivers: %w", define.ErrInvalidArg) + } + + tm := timeout + + volume.config.Timeout = &tm + return nil } } diff --git a/libpod/plugin/volume_api.go b/libpod/plugin/volume_api.go index 0a5eaae53..b13578388 100644 --- a/libpod/plugin/volume_api.go +++ b/libpod/plugin/volume_api.go @@ -3,6 +3,7 @@ package plugin import ( "bytes" "context" + "errors" "fmt" "io/ioutil" "net" @@ -13,8 +14,7 @@ import ( "sync" "time" - "errors" - + "github.com/containers/common/pkg/config" "github.com/containers/podman/v4/libpod/define" "github.com/docker/go-plugins-helpers/sdk" "github.com/docker/go-plugins-helpers/volume" @@ -40,7 +40,6 @@ var ( ) const ( - defaultTimeout = 5 * time.Second volumePluginType = "VolumeDriver" ) @@ -129,7 +128,7 @@ func validatePlugin(newPlugin *VolumePlugin) error { // GetVolumePlugin gets a single volume plugin, with the given name, at the // given path. -func GetVolumePlugin(name string, path string, timeout int) (*VolumePlugin, error) { +func GetVolumePlugin(name string, path string, timeout *uint, cfg *config.Config) (*VolumePlugin, error) { pluginsLock.Lock() defer pluginsLock.Unlock() @@ -152,13 +151,11 @@ func GetVolumePlugin(name string, path string, timeout int) (*VolumePlugin, erro // Need an HTTP client to force a Unix connection. // And since we can reuse it, might as well cache it. client := new(http.Client) - client.Timeout = defaultTimeout - // if the user specified a non-zero timeout, use their value. Else, keep the default. - if timeout != 0 { - if time.Duration(timeout)*time.Second < defaultTimeout { - logrus.Warnf("the default timeout for volume creation is %d seconds, setting a time less than that may break this feature.", defaultTimeout) - } - client.Timeout = time.Duration(timeout) * time.Second + client.Timeout = 5 * time.Second + if timeout != nil { + client.Timeout = time.Duration(*timeout) * time.Second + } else if cfg != nil { + client.Timeout = time.Duration(cfg.Engine.VolumePluginTimeout) * time.Second } // This bit borrowed from pkg/bindings/connection.go client.Transport = &http.Transport{ diff --git a/libpod/runtime.go b/libpod/runtime.go index 684f4abd7..9b97fd724 100644 --- a/libpod/runtime.go +++ b/libpod/runtime.go @@ -1097,7 +1097,7 @@ func (r *Runtime) getVolumePlugin(volConfig *VolumeConfig) (*plugin.VolumePlugin return nil, fmt.Errorf("no volume plugin with name %s available: %w", name, define.ErrMissingPlugin) } - return plugin.GetVolumePlugin(name, pluginPath, timeout) + return plugin.GetVolumePlugin(name, pluginPath, timeout, r.config) } // GetSecretsStorageDir returns the directory that the secrets manager should take diff --git a/libpod/runtime_volume_linux.go b/libpod/runtime_volume_linux.go index 1f354e41b..65f2a1005 100644 --- a/libpod/runtime_volume_linux.go +++ b/libpod/runtime_volume_linux.go @@ -184,7 +184,7 @@ func (r *Runtime) UpdateVolumePlugins(ctx context.Context) *define.VolumeReload ) for driverName, socket := range r.config.Engine.VolumePlugins { - driver, err := volplugin.GetVolumePlugin(driverName, socket, 0) + driver, err := volplugin.GetVolumePlugin(driverName, socket, nil, r.config) if err != nil { errs = append(errs, err) continue diff --git a/libpod/volume.go b/libpod/volume.go index 2e8cd77a5..a054e4032 100644 --- a/libpod/volume.go +++ b/libpod/volume.go @@ -56,7 +56,7 @@ type VolumeConfig struct { // quota tracking. DisableQuota bool `json:"disableQuota,omitempty"` // Timeout allows users to override the default driver timeout of 5 seconds - Timeout int + Timeout *uint `json:"timeout,omitempty"` } // VolumeState holds the volume's mutable state. diff --git a/libpod/volume_inspect.go b/libpod/volume_inspect.go index dd2f3fd01..c3872bca7 100644 --- a/libpod/volume_inspect.go +++ b/libpod/volume_inspect.go @@ -64,7 +64,12 @@ func (v *Volume) Inspect() (*define.InspectVolumeData, error) { data.MountCount = v.state.MountCount data.NeedsCopyUp = v.state.NeedsCopyUp data.NeedsChown = v.state.NeedsChown - data.Timeout = v.config.Timeout + + if v.config.Timeout != nil { + data.Timeout = *v.config.Timeout + } else if v.UsesVolumeDriver() { + data.Timeout = v.runtime.config.Engine.VolumePluginTimeout + } return data, nil } diff --git a/pkg/domain/infra/abi/parse/parse.go b/pkg/domain/infra/abi/parse/parse.go index 19699589b..fb2876bb2 100644 --- a/pkg/domain/infra/abi/parse/parse.go +++ b/pkg/domain/infra/abi/parse/parse.go @@ -86,8 +86,11 @@ func VolumeOptions(opts map[string]string) ([]libpod.VolumeCreateOption, error) if err != nil { return nil, fmt.Errorf("cannot convert Timeout %s to an integer: %w", splitO[1], err) } + if intTimeout < 0 { + return nil, fmt.Errorf("volume timeout cannot be negative (got %d)", intTimeout) + } logrus.Debugf("Removing timeout from options and adding WithTimeout for Timeout %d", intTimeout) - libpodOptions = append(libpodOptions, libpod.WithVolumeDriverTimeout(intTimeout)) + libpodOptions = append(libpodOptions, libpod.WithVolumeDriverTimeout(uint(intTimeout))) default: finalVal = append(finalVal, o) } diff --git a/test/e2e/config/containers.conf b/test/e2e/config/containers.conf index c33f32ab4..94bb316b1 100644 --- a/test/e2e/config/containers.conf +++ b/test/e2e/config/containers.conf @@ -61,6 +61,8 @@ no_hosts=true network_cmd_options=["allow_host_loopback=true"] service_timeout=1234 +volume_plugin_timeout = 15 + # We need to ensure each test runs on a separate plugin instance... # For now, let's just make a bunch of plugin paths and have each test use one. [engine.volume_plugins] diff --git a/test/e2e/volume_create_test.go b/test/e2e/volume_create_test.go index 7a975f6a5..499283cab 100644 --- a/test/e2e/volume_create_test.go +++ b/test/e2e/volume_create_test.go @@ -162,19 +162,4 @@ var _ = Describe("Podman volume create", func() { Expect(inspectOpts).Should(Exit(0)) Expect(inspectOpts.OutputToString()).To(Equal(optionStrFormatExpect)) }) - - It("podman create volume with o=timeout", func() { - volName := "testVol" - timeout := 10 - timeoutStr := "10" - session := podmanTest.Podman([]string{"volume", "create", "--opt", fmt.Sprintf("o=timeout=%d", timeout), volName}) - session.WaitWithDefaultTimeout() - Expect(session).Should(Exit(0)) - - inspectTimeout := podmanTest.Podman([]string{"volume", "inspect", "--format", "{{ .Timeout }}", volName}) - inspectTimeout.WaitWithDefaultTimeout() - Expect(inspectTimeout).Should(Exit(0)) - Expect(inspectTimeout.OutputToString()).To(Equal(timeoutStr)) - - }) }) diff --git a/test/e2e/volume_plugin_test.go b/test/e2e/volume_plugin_test.go index b585f8dd8..a44e75a54 100644 --- a/test/e2e/volume_plugin_test.go +++ b/test/e2e/volume_plugin_test.go @@ -256,4 +256,38 @@ Removed: Expect(session.OutputToStringArray()).To(ContainElements(localvol, vol2)) Expect(session.ErrorToString()).To(Equal("")) // make no errors are shown }) + + It("volume driver timeouts test", func() { + podmanTest.AddImageToRWStore(volumeTest) + + pluginStatePath := filepath.Join(podmanTest.TempDir, "volumes") + err := os.Mkdir(pluginStatePath, 0755) + Expect(err).ToNot(HaveOccurred()) + + // Keep this distinct within tests to avoid multiple tests using the same plugin. + pluginName := "testvol6" + plugin := podmanTest.Podman([]string{"run", "--security-opt", "label=disable", "-v", "/run/docker/plugins:/run/docker/plugins", "-v", fmt.Sprintf("%v:%v", pluginStatePath, pluginStatePath), "-d", volumeTest, "--sock-name", pluginName, "--path", pluginStatePath}) + plugin.WaitWithDefaultTimeout() + Expect(plugin).Should(Exit(0)) + + volName := "testVolume1" + create := podmanTest.Podman([]string{"volume", "create", "--driver", pluginName, volName}) + create.WaitWithDefaultTimeout() + Expect(create).Should(Exit(0)) + + volInspect := podmanTest.Podman([]string{"volume", "inspect", "--format", "{{ .Timeout }}", volName}) + volInspect.WaitWithDefaultTimeout() + Expect(volInspect).Should(Exit(0)) + Expect(volInspect.OutputToString()).To(ContainSubstring("15")) + + volName2 := "testVolume2" + create2 := podmanTest.Podman([]string{"volume", "create", "--driver", pluginName, "--opt", "o=timeout=3", volName2}) + create2.WaitWithDefaultTimeout() + Expect(create2).Should(Exit(0)) + + volInspect2 := podmanTest.Podman([]string{"volume", "inspect", "--format", "{{ .Timeout }}", volName2}) + volInspect2.WaitWithDefaultTimeout() + Expect(volInspect2).Should(Exit(0)) + Expect(volInspect2.OutputToString()).To(ContainSubstring("3")) + }) }) diff --git a/test/testvol/util.go b/test/testvol/util.go index b50bb3afb..b4961e097 100644 --- a/test/testvol/util.go +++ b/test/testvol/util.go @@ -25,5 +25,5 @@ func getPluginName(pathOrName string) string { func getPlugin(sockNameOrPath string) (*plugin.VolumePlugin, error) { path := getSocketPath(sockNameOrPath) name := getPluginName(sockNameOrPath) - return plugin.GetVolumePlugin(name, path, 0) + return plugin.GetVolumePlugin(name, path, nil, nil) } diff --git a/vendor/github.com/Microsoft/hcsshim/internal/jobobject/iocp.go b/vendor/github.com/Microsoft/hcsshim/internal/jobobject/iocp.go index 3d640ac7b..5d6acd69e 100644 --- a/vendor/github.com/Microsoft/hcsshim/internal/jobobject/iocp.go +++ b/vendor/github.com/Microsoft/hcsshim/internal/jobobject/iocp.go @@ -57,7 +57,7 @@ func pollIOCP(ctx context.Context, iocpHandle windows.Handle) { }).Warn("failed to parse job object message") continue } - if err := msq.Write(notification); err == queue.ErrQueueClosed { + if err := msq.Enqueue(notification); err == queue.ErrQueueClosed { // Write will only return an error when the queue is closed. // The only time a queue would ever be closed is when we call `Close` on // the job it belongs to which also removes it from the jobMap, so something diff --git a/vendor/github.com/Microsoft/hcsshim/internal/jobobject/jobobject.go b/vendor/github.com/Microsoft/hcsshim/internal/jobobject/jobobject.go index 9c2726416..c9fdd921a 100644 --- a/vendor/github.com/Microsoft/hcsshim/internal/jobobject/jobobject.go +++ b/vendor/github.com/Microsoft/hcsshim/internal/jobobject/jobobject.go @@ -68,6 +68,9 @@ type Options struct { // `UseNTVariant` specifies if we should use the `Nt` variant of Open/CreateJobObject. // Defaults to false. UseNTVariant bool + // `IOTracking` enables tracking I/O statistics on the job object. More specifically this + // calls SetInformationJobObject with the JobObjectIoAttribution class. + EnableIOTracking bool } // Create creates a job object. @@ -134,6 +137,12 @@ func Create(ctx context.Context, options *Options) (_ *JobObject, err error) { job.mq = mq } + if options.EnableIOTracking { + if err := enableIOTracking(jobHandle); err != nil { + return nil, err + } + } + return job, nil } @@ -235,7 +244,7 @@ func (job *JobObject) PollNotification() (interface{}, error) { if job.mq == nil { return nil, ErrNotRegistered } - return job.mq.ReadOrWait() + return job.mq.Dequeue() } // UpdateProcThreadAttribute updates the passed in ProcThreadAttributeList to contain what is necessary to @@ -330,7 +339,7 @@ func (job *JobObject) Pids() ([]uint32, error) { err := winapi.QueryInformationJobObject( job.handle, winapi.JobObjectBasicProcessIdList, - uintptr(unsafe.Pointer(&info)), + unsafe.Pointer(&info), uint32(unsafe.Sizeof(info)), nil, ) @@ -356,7 +365,7 @@ func (job *JobObject) Pids() ([]uint32, error) { if err = winapi.QueryInformationJobObject( job.handle, winapi.JobObjectBasicProcessIdList, - uintptr(unsafe.Pointer(&buf[0])), + unsafe.Pointer(&buf[0]), uint32(len(buf)), nil, ); err != nil { @@ -384,7 +393,7 @@ func (job *JobObject) QueryMemoryStats() (*winapi.JOBOBJECT_MEMORY_USAGE_INFORMA if err := winapi.QueryInformationJobObject( job.handle, winapi.JobObjectMemoryUsageInformation, - uintptr(unsafe.Pointer(&info)), + unsafe.Pointer(&info), uint32(unsafe.Sizeof(info)), nil, ); err != nil { @@ -406,7 +415,7 @@ func (job *JobObject) QueryProcessorStats() (*winapi.JOBOBJECT_BASIC_ACCOUNTING_ if err := winapi.QueryInformationJobObject( job.handle, winapi.JobObjectBasicAccountingInformation, - uintptr(unsafe.Pointer(&info)), + unsafe.Pointer(&info), uint32(unsafe.Sizeof(info)), nil, ); err != nil { @@ -415,7 +424,9 @@ func (job *JobObject) QueryProcessorStats() (*winapi.JOBOBJECT_BASIC_ACCOUNTING_ return &info, nil } -// QueryStorageStats gets the storage (I/O) stats for the job object. +// QueryStorageStats gets the storage (I/O) stats for the job object. This call will error +// if either `EnableIOTracking` wasn't set to true on creation of the job, or SetIOTracking() +// hasn't been called since creation of the job. func (job *JobObject) QueryStorageStats() (*winapi.JOBOBJECT_IO_ATTRIBUTION_INFORMATION, error) { job.handleLock.RLock() defer job.handleLock.RUnlock() @@ -430,7 +441,7 @@ func (job *JobObject) QueryStorageStats() (*winapi.JOBOBJECT_IO_ATTRIBUTION_INFO if err := winapi.QueryInformationJobObject( job.handle, winapi.JobObjectIoAttribution, - uintptr(unsafe.Pointer(&info)), + unsafe.Pointer(&info), uint32(unsafe.Sizeof(info)), nil, ); err != nil { @@ -476,7 +487,7 @@ func (job *JobObject) QueryPrivateWorkingSet() (uint64, error) { status := winapi.NtQueryInformationProcess( h, winapi.ProcessVmCounters, - uintptr(unsafe.Pointer(&vmCounters)), + unsafe.Pointer(&vmCounters), uint32(unsafe.Sizeof(vmCounters)), nil, ) @@ -497,3 +508,31 @@ func (job *JobObject) QueryPrivateWorkingSet() (uint64, error) { return jobWorkingSetSize, nil } + +// SetIOTracking enables IO tracking for processes in the job object. +// This enables use of the QueryStorageStats method. +func (job *JobObject) SetIOTracking() error { + job.handleLock.RLock() + defer job.handleLock.RUnlock() + + if job.handle == 0 { + return ErrAlreadyClosed + } + + return enableIOTracking(job.handle) +} + +func enableIOTracking(job windows.Handle) error { + info := winapi.JOBOBJECT_IO_ATTRIBUTION_INFORMATION{ + ControlFlags: winapi.JOBOBJECT_IO_ATTRIBUTION_CONTROL_ENABLE, + } + if _, err := windows.SetInformationJobObject( + job, + winapi.JobObjectIoAttribution, + uintptr(unsafe.Pointer(&info)), + uint32(unsafe.Sizeof(info)), + ); err != nil { + return fmt.Errorf("failed to enable IO tracking on job object: %w", err) + } + return nil +} diff --git a/vendor/github.com/Microsoft/hcsshim/internal/jobobject/limits.go b/vendor/github.com/Microsoft/hcsshim/internal/jobobject/limits.go index 4be297788..4efde292c 100644 --- a/vendor/github.com/Microsoft/hcsshim/internal/jobobject/limits.go +++ b/vendor/github.com/Microsoft/hcsshim/internal/jobobject/limits.go @@ -202,7 +202,7 @@ func (job *JobObject) getExtendedInformation() (*windows.JOBOBJECT_EXTENDED_LIMI if err := winapi.QueryInformationJobObject( job.handle, windows.JobObjectExtendedLimitInformation, - uintptr(unsafe.Pointer(&info)), + unsafe.Pointer(&info), uint32(unsafe.Sizeof(info)), nil, ); err != nil { @@ -224,7 +224,7 @@ func (job *JobObject) getCPURateControlInformation() (*winapi.JOBOBJECT_CPU_RATE if err := winapi.QueryInformationJobObject( job.handle, windows.JobObjectCpuRateControlInformation, - uintptr(unsafe.Pointer(&info)), + unsafe.Pointer(&info), uint32(unsafe.Sizeof(info)), nil, ); err != nil { diff --git a/vendor/github.com/Microsoft/hcsshim/internal/queue/mq.go b/vendor/github.com/Microsoft/hcsshim/internal/queue/mq.go index e177c9a62..4eb9bb9f1 100644 --- a/vendor/github.com/Microsoft/hcsshim/internal/queue/mq.go +++ b/vendor/github.com/Microsoft/hcsshim/internal/queue/mq.go @@ -5,10 +5,7 @@ import ( "sync" ) -var ( - ErrQueueClosed = errors.New("the queue is closed for reading and writing") - ErrQueueEmpty = errors.New("the queue is empty") -) +var ErrQueueClosed = errors.New("the queue is closed for reading and writing") // MessageQueue represents a threadsafe message queue to be used to retrieve or // write messages to. @@ -29,8 +26,8 @@ func NewMessageQueue() *MessageQueue { } } -// Write writes `msg` to the queue. -func (mq *MessageQueue) Write(msg interface{}) error { +// Enqueue writes `msg` to the queue. +func (mq *MessageQueue) Enqueue(msg interface{}) error { mq.m.Lock() defer mq.m.Unlock() @@ -43,55 +40,37 @@ func (mq *MessageQueue) Write(msg interface{}) error { return nil } -// Read will read a value from the queue if available, otherwise return an error. -func (mq *MessageQueue) Read() (interface{}, error) { +// Dequeue will read a value from the queue and remove it. If the queue +// is empty, this will block until the queue is closed or a value gets enqueued. +func (mq *MessageQueue) Dequeue() (interface{}, error) { mq.m.Lock() defer mq.m.Unlock() - if mq.closed { - return nil, ErrQueueClosed - } - if mq.isEmpty() { - return nil, ErrQueueEmpty + + for !mq.closed && mq.size() == 0 { + mq.c.Wait() } - val := mq.messages[0] - mq.messages[0] = nil - mq.messages = mq.messages[1:] - return val, nil -} -// ReadOrWait will read a value from the queue if available, else it will wait for a -// value to become available. This will block forever if nothing gets written or until -// the queue gets closed. -func (mq *MessageQueue) ReadOrWait() (interface{}, error) { - mq.m.Lock() + // We got woken up, check if it's because the queue got closed. if mq.closed { - mq.m.Unlock() return nil, ErrQueueClosed } - if mq.isEmpty() { - for !mq.closed && mq.isEmpty() { - mq.c.Wait() - } - mq.m.Unlock() - return mq.Read() - } + val := mq.messages[0] mq.messages[0] = nil mq.messages = mq.messages[1:] - mq.m.Unlock() return val, nil } -// IsEmpty returns if the queue is empty -func (mq *MessageQueue) IsEmpty() bool { +// Size returns the size of the queue. +func (mq *MessageQueue) Size() int { mq.m.RLock() defer mq.m.RUnlock() - return len(mq.messages) == 0 + return mq.size() } -// Nonexported empty check that doesn't lock so we can call this in Read and Write. -func (mq *MessageQueue) isEmpty() bool { - return len(mq.messages) == 0 +// Nonexported size check to check if the queue is empty inside already locked functions. +func (mq *MessageQueue) size() int { + return len(mq.messages) } // Close closes the queue for future writes or reads. Any attempts to read or write from the @@ -99,13 +78,15 @@ func (mq *MessageQueue) isEmpty() bool { func (mq *MessageQueue) Close() { mq.m.Lock() defer mq.m.Unlock() - // Already closed + + // Already closed, noop if mq.closed { return } + mq.messages = nil mq.closed = true - // If there's anybody currently waiting on a value from ReadOrWait, we need to + // If there's anybody currently waiting on a value from Dequeue, we need to // broadcast so the read(s) can return ErrQueueClosed. mq.c.Broadcast() } diff --git a/vendor/github.com/Microsoft/hcsshim/internal/winapi/jobobject.go b/vendor/github.com/Microsoft/hcsshim/internal/winapi/jobobject.go index 479649db3..7eb13f8f0 100644 --- a/vendor/github.com/Microsoft/hcsshim/internal/winapi/jobobject.go +++ b/vendor/github.com/Microsoft/hcsshim/internal/winapi/jobobject.go @@ -175,7 +175,7 @@ type JOBOBJECT_ASSOCIATE_COMPLETION_PORT struct { // LPDWORD lpReturnLength // ); // -//sys QueryInformationJobObject(jobHandle windows.Handle, infoClass uint32, jobObjectInfo uintptr, jobObjectInformationLength uint32, lpReturnLength *uint32) (err error) = kernel32.QueryInformationJobObject +//sys QueryInformationJobObject(jobHandle windows.Handle, infoClass uint32, jobObjectInfo unsafe.Pointer, jobObjectInformationLength uint32, lpReturnLength *uint32) (err error) = kernel32.QueryInformationJobObject // HANDLE OpenJobObjectW( // DWORD dwDesiredAccess, diff --git a/vendor/github.com/Microsoft/hcsshim/internal/winapi/process.go b/vendor/github.com/Microsoft/hcsshim/internal/winapi/process.go index 5f9e03fd2..222529f43 100644 --- a/vendor/github.com/Microsoft/hcsshim/internal/winapi/process.go +++ b/vendor/github.com/Microsoft/hcsshim/internal/winapi/process.go @@ -18,7 +18,7 @@ const ProcessVmCounters = 3 // [out, optional] PULONG ReturnLength // ); // -//sys NtQueryInformationProcess(processHandle windows.Handle, processInfoClass uint32, processInfo uintptr, processInfoLength uint32, returnLength *uint32) (status uint32) = ntdll.NtQueryInformationProcess +//sys NtQueryInformationProcess(processHandle windows.Handle, processInfoClass uint32, processInfo unsafe.Pointer, processInfoLength uint32, returnLength *uint32) (status uint32) = ntdll.NtQueryInformationProcess // typedef struct _VM_COUNTERS_EX // { diff --git a/vendor/github.com/Microsoft/hcsshim/internal/winapi/system.go b/vendor/github.com/Microsoft/hcsshim/internal/winapi/system.go index 327f57d7c..78fe01a4b 100644 --- a/vendor/github.com/Microsoft/hcsshim/internal/winapi/system.go +++ b/vendor/github.com/Microsoft/hcsshim/internal/winapi/system.go @@ -12,7 +12,8 @@ const STATUS_INFO_LENGTH_MISMATCH = 0xC0000004 // ULONG SystemInformationLength, // PULONG ReturnLength // ); -//sys NtQuerySystemInformation(systemInfoClass int, systemInformation uintptr, systemInfoLength uint32, returnLength *uint32) (status uint32) = ntdll.NtQuerySystemInformation +// +//sys NtQuerySystemInformation(systemInfoClass int, systemInformation unsafe.Pointer, systemInfoLength uint32, returnLength *uint32) (status uint32) = ntdll.NtQuerySystemInformation type SYSTEM_PROCESS_INFORMATION struct { NextEntryOffset uint32 // ULONG diff --git a/vendor/github.com/Microsoft/hcsshim/internal/winapi/zsyscall_windows.go b/vendor/github.com/Microsoft/hcsshim/internal/winapi/zsyscall_windows.go index 39fb3e1ad..1f16cf0b8 100644 --- a/vendor/github.com/Microsoft/hcsshim/internal/winapi/zsyscall_windows.go +++ b/vendor/github.com/Microsoft/hcsshim/internal/winapi/zsyscall_windows.go @@ -100,7 +100,7 @@ func resizePseudoConsole(hPc windows.Handle, size uint32) (hr error) { return } -func NtQuerySystemInformation(systemInfoClass int, systemInformation uintptr, systemInfoLength uint32, returnLength *uint32) (status uint32) { +func NtQuerySystemInformation(systemInfoClass int, systemInformation unsafe.Pointer, systemInfoLength uint32, returnLength *uint32) (status uint32) { r0, _, _ := syscall.Syscall6(procNtQuerySystemInformation.Addr(), 4, uintptr(systemInfoClass), uintptr(systemInformation), uintptr(systemInfoLength), uintptr(unsafe.Pointer(returnLength)), 0, 0) status = uint32(r0) return @@ -152,7 +152,7 @@ func IsProcessInJob(procHandle windows.Handle, jobHandle windows.Handle, result return } -func QueryInformationJobObject(jobHandle windows.Handle, infoClass uint32, jobObjectInfo uintptr, jobObjectInformationLength uint32, lpReturnLength *uint32) (err error) { +func QueryInformationJobObject(jobHandle windows.Handle, infoClass uint32, jobObjectInfo unsafe.Pointer, jobObjectInformationLength uint32, lpReturnLength *uint32) (err error) { r1, _, e1 := syscall.Syscall6(procQueryInformationJobObject.Addr(), 5, uintptr(jobHandle), uintptr(infoClass), uintptr(jobObjectInfo), uintptr(jobObjectInformationLength), uintptr(unsafe.Pointer(lpReturnLength)), 0) if r1 == 0 { if e1 != 0 { @@ -244,7 +244,7 @@ func LocalFree(ptr uintptr) { return } -func NtQueryInformationProcess(processHandle windows.Handle, processInfoClass uint32, processInfo uintptr, processInfoLength uint32, returnLength *uint32) (status uint32) { +func NtQueryInformationProcess(processHandle windows.Handle, processInfoClass uint32, processInfo unsafe.Pointer, processInfoLength uint32, returnLength *uint32) (status uint32) { r0, _, _ := syscall.Syscall6(procNtQueryInformationProcess.Addr(), 5, uintptr(processHandle), uintptr(processInfoClass), uintptr(processInfo), uintptr(processInfoLength), uintptr(unsafe.Pointer(returnLength)), 0) status = uint32(r0) return diff --git a/vendor/github.com/containers/common/libimage/inspect.go b/vendor/github.com/containers/common/libimage/inspect.go index 5da8df1bf..c6632d9a2 100644 --- a/vendor/github.com/containers/common/libimage/inspect.go +++ b/vendor/github.com/containers/common/libimage/inspect.go @@ -190,7 +190,7 @@ func (i *Image) Inspect(ctx context.Context, options *InspectOptions) (*ImageDat // NOTE: Health checks may be listed in the container config or // the config. data.HealthCheck = dockerManifest.ContainerConfig.Healthcheck - if data.HealthCheck == nil { + if data.HealthCheck == nil && dockerManifest.Config != nil { data.HealthCheck = dockerManifest.Config.Healthcheck } } diff --git a/vendor/github.com/containers/common/libimage/load.go b/vendor/github.com/containers/common/libimage/load.go index 89faa4635..593eef04b 100644 --- a/vendor/github.com/containers/common/libimage/load.go +++ b/vendor/github.com/containers/common/libimage/load.go @@ -99,7 +99,7 @@ func (r *Runtime) Load(ctx context.Context, path string, options *LoadOptions) ( } // loadMultiImageDockerArchive loads the docker archive specified by ref. In -// case the path@reference notation was used, only the specifiec image will be +// case the path@reference notation was used, only the specified image will be // loaded. Otherwise, all images will be loaded. func (r *Runtime) loadMultiImageDockerArchive(ctx context.Context, ref types.ImageReference, options *CopyOptions) ([]string, error) { // If we cannot stat the path, it either does not exist OR the correct diff --git a/vendor/github.com/containers/common/libnetwork/cni/network.go b/vendor/github.com/containers/common/libnetwork/cni/network.go index fce8f0066..11f1bbe14 100644 --- a/vendor/github.com/containers/common/libnetwork/cni/network.go +++ b/vendor/github.com/containers/common/libnetwork/cni/network.go @@ -19,6 +19,7 @@ import ( "github.com/containers/common/pkg/config" "github.com/containers/storage/pkg/lockfile" "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" ) type cniNetwork struct { @@ -62,6 +63,8 @@ type InitConfig struct { CNIConfigDir string // CNIPluginDirs is a list of directories where cni should look for the plugins. CNIPluginDirs []string + // RunDir is a directory where temporary files can be stored. + RunDir string // DefaultNetwork is the name for the default network. DefaultNetwork string @@ -81,7 +84,16 @@ func NewCNINetworkInterface(conf *InitConfig) (types.ContainerNetwork, error) { // TODO: consider using a shared memory lock lock, err := lockfile.GetLockfile(filepath.Join(conf.CNIConfigDir, "cni.lock")) if err != nil { - return nil, err + // If we're on a read-only filesystem, there is no risk of + // contention. Fall back to a local lockfile. + if errors.Is(err, unix.EROFS) { + lock, err = lockfile.GetLockfile(filepath.Join(conf.RunDir, "cni.lock")) + if err != nil { + return nil, err + } + } else { + return nil, err + } } defaultNetworkName := conf.DefaultNetwork diff --git a/vendor/github.com/containers/common/libnetwork/network/interface.go b/vendor/github.com/containers/common/libnetwork/network/interface.go index 639ff4e45..545655fd3 100644 --- a/vendor/github.com/containers/common/libnetwork/network/interface.go +++ b/vendor/github.com/containers/common/libnetwork/network/interface.go @@ -169,6 +169,7 @@ func getCniInterface(conf *config.Config) (types.ContainerNetwork, error) { return cni.NewCNINetworkInterface(&cni.InitConfig{ CNIConfigDir: confDir, CNIPluginDirs: conf.Network.CNIPluginDirs, + RunDir: conf.Engine.TmpDir, DefaultNetwork: conf.Network.DefaultNetwork, DefaultSubnet: conf.Network.DefaultSubnet, DefaultsubnetPools: conf.Network.DefaultSubnetPools, diff --git a/vendor/github.com/containers/common/pkg/config/config.go b/vendor/github.com/containers/common/pkg/config/config.go index de1d91ae3..858f961b6 100644 --- a/vendor/github.com/containers/common/pkg/config/config.go +++ b/vendor/github.com/containers/common/pkg/config/config.go @@ -7,6 +7,7 @@ import ( "os" "os/exec" "path/filepath" + "runtime" "sort" "strings" "sync" @@ -27,6 +28,8 @@ const ( _configPath = "containers/containers.conf" // UserOverrideContainersConfig holds the containers config path overridden by the rootless user UserOverrideContainersConfig = ".config/" + _configPath + // Token prefix for looking for helper binary under $BINDIR + bindirPrefix = "$BINDIR" ) // RuntimeStateStore is a constant indicating which state store implementation @@ -454,6 +457,13 @@ type EngineConfig struct { // may not be by other drivers. VolumePath string `toml:"volume_path,omitempty"` + // VolumePluginTimeout sets the default timeout, in seconds, for + // operations that must contact a volume plugin. Plugins are external + // programs accessed via REST API; this sets a timeout for requests to + // that API. + // A value of 0 is treated as no timeout. + VolumePluginTimeout uint `toml:"volume_plugin_timeout,omitempty,omitzero"` + // VolumePlugins is a set of plugins that can be used as the backend for // Podman named volumes. Each volume is specified as a name (what Podman // will refer to the plugin as) mapped to a path, which must point to a @@ -815,6 +825,18 @@ func (c *Config) Validate() error { return nil } +// URI returns the URI Path to the machine image +func (m *MachineConfig) URI() string { + uri := m.Image + for _, val := range []string{"$ARCH", "$arch"} { + uri = strings.Replace(uri, val, runtime.GOARCH, 1) + } + for _, val := range []string{"$OS", "$os"} { + uri = strings.Replace(uri, val, runtime.GOOS, 1) + } + return uri +} + func (c *EngineConfig) findRuntime() string { // Search for crun first followed by runc, kata, runsc for _, name := range []string{"crun", "runc", "runj", "kata", "runsc"} { @@ -1241,10 +1263,37 @@ func (c *Config) ActiveDestination() (uri, identity string, err error) { return "", "", errors.New("no service destination configured") } +var ( + bindirFailed = false + bindirCached = "" +) + +func findBindir() string { + if bindirCached != "" || bindirFailed { + return bindirCached + } + execPath, err := os.Executable() + if err == nil { + // Resolve symbolic links to find the actual binary file path. + execPath, err = filepath.EvalSymlinks(execPath) + } + if err != nil { + // If failed to find executable (unlikely to happen), warn about it. + // The bindirFailed flag will track this, so we only warn once. + logrus.Warnf("Failed to find $BINDIR: %v", err) + bindirFailed = true + return "" + } + bindirCached = filepath.Dir(execPath) + return bindirCached +} + // FindHelperBinary will search the given binary name in the configured directories. // If searchPATH is set to true it will also search in $PATH. func (c *Config) FindHelperBinary(name string, searchPATH bool) (string, error) { dirList := c.Engine.HelperBinariesDir + bindirPath := "" + bindirSearched := false // If set, search this directory first. This is used in testing. if dir, found := os.LookupEnv("CONTAINERS_HELPER_BINARY_DIR"); found { @@ -1252,6 +1301,24 @@ func (c *Config) FindHelperBinary(name string, searchPATH bool) (string, error) } for _, path := range dirList { + if path == bindirPrefix || strings.HasPrefix(path, bindirPrefix+string(filepath.Separator)) { + // Calculate the path to the executable first time we encounter a $BINDIR prefix. + if !bindirSearched { + bindirSearched = true + bindirPath = findBindir() + } + // If there's an error, don't stop the search for the helper binary. + // findBindir() will have warned once during the first failure. + if bindirPath == "" { + continue + } + // Replace the $BINDIR prefix with the path to the directory of the current binary. + if path == bindirPrefix { + path = bindirPath + } else { + path = filepath.Join(bindirPath, strings.TrimPrefix(path, bindirPrefix+string(filepath.Separator))) + } + } fullpath := filepath.Join(path, name) if fi, err := os.Stat(fullpath); err == nil && fi.Mode().IsRegular() { return fullpath, nil diff --git a/vendor/github.com/containers/common/pkg/config/config_darwin.go b/vendor/github.com/containers/common/pkg/config/config_darwin.go index 0ab9e0294..5283665e1 100644 --- a/vendor/github.com/containers/common/pkg/config/config_darwin.go +++ b/vendor/github.com/containers/common/pkg/config/config_darwin.go @@ -35,4 +35,6 @@ var defaultHelperBinariesDir = []string{ "/usr/local/lib/podman", "/usr/libexec/podman", "/usr/lib/podman", + // Relative to the binary directory + "$BINDIR/../libexec/podman", } diff --git a/vendor/github.com/containers/common/pkg/config/containers.conf b/vendor/github.com/containers/common/pkg/config/containers.conf index d1ac7c0e8..5b5aaa00a 100644 --- a/vendor/github.com/containers/common/pkg/config/containers.conf +++ b/vendor/github.com/containers/common/pkg/config/containers.conf @@ -605,6 +605,12 @@ default_sysctls = [ # #volume_path = "/var/lib/containers/storage/volumes" +# Default timeout (in seconds) for volume plugin operations. +# Plugins are external programs accessed via a REST API; this sets a timeout +# for requests to that API. +# A value of 0 is treated as no timeout. +#volume_plugin_timeout = 5 + # Paths to look for a valid OCI runtime (crun, runc, kata, runsc, krun, etc) [engine.runtimes] #crun = [ @@ -665,9 +671,16 @@ default_sysctls = [ # #disk_size=10 -# The image used when creating a podman-machine VM. +# Default image URI when creating a new VM using `podman machine init`. +# Options: On Linux/Mac, `testing`, `stable`, `next`. On Windows, the major +# version of the OS (e.g `36`) for Fedora 36. For all platforms you can +# alternatively specify a custom download URL to an image. Container engines +# translate URIs $OS and $ARCH to the native OS and ARCH. URI +# "https://example.com/$OS/$ARCH/foobar.ami" becomes +# "https://example.com/linux/amd64/foobar.ami" on a Linux AMD machine. +# The default value is `testing`. # -#image = "testing" +# image = "testing" # Memory in MB a machine is created with. # diff --git a/vendor/github.com/containers/common/pkg/config/default.go b/vendor/github.com/containers/common/pkg/config/default.go index 6bca7312a..b0d62779b 100644 --- a/vendor/github.com/containers/common/pkg/config/default.go +++ b/vendor/github.com/containers/common/pkg/config/default.go @@ -168,6 +168,8 @@ const ( SeccompOverridePath = _etcDir + "/containers/seccomp.json" // SeccompDefaultPath defines the default seccomp path. SeccompDefaultPath = _installPrefix + "/share/containers/seccomp.json" + // DefaultVolumePluginTimeout is the default volume plugin timeout, in seconds + DefaultVolumePluginTimeout = 5 ) // DefaultConfig defines the default values from containers.conf. @@ -264,7 +266,7 @@ func defaultMachineConfig() MachineConfig { Image: getDefaultMachineImage(), Memory: 2048, User: getDefaultMachineUser(), - Volumes: []string{"$HOME:$HOME"}, + Volumes: getDefaultMachineVolumes(), } } @@ -304,6 +306,8 @@ func defaultConfigFromMemory() (*EngineConfig, error) { c.StaticDir = filepath.Join(storeOpts.GraphRoot, "libpod") c.VolumePath = filepath.Join(storeOpts.GraphRoot, "volumes") + c.VolumePluginTimeout = DefaultVolumePluginTimeout + c.HelperBinariesDir = defaultHelperBinariesDir if additionalHelperBinariesDir != "" { c.HelperBinariesDir = append(c.HelperBinariesDir, additionalHelperBinariesDir) diff --git a/vendor/github.com/containers/common/pkg/config/default_darwin.go b/vendor/github.com/containers/common/pkg/config/default_darwin.go index c502ea55e..5d857df4f 100644 --- a/vendor/github.com/containers/common/pkg/config/default_darwin.go +++ b/vendor/github.com/containers/common/pkg/config/default_darwin.go @@ -11,3 +11,8 @@ func getDefaultLockType() string { func getLibpodTmpDir() string { return "/run/libpod" } + +// getDefaultMachineVolumes returns default mounted volumes (possibly with env vars, which will be expanded) +func getDefaultMachineVolumes() []string { + return []string{"$HOME:$HOME"} +} diff --git a/vendor/github.com/containers/common/pkg/config/default_freebsd.go b/vendor/github.com/containers/common/pkg/config/default_freebsd.go index 8b10ac1f7..9c827dbfe 100644 --- a/vendor/github.com/containers/common/pkg/config/default_freebsd.go +++ b/vendor/github.com/containers/common/pkg/config/default_freebsd.go @@ -18,3 +18,8 @@ func getDefaultLockType() string { func getLibpodTmpDir() string { return "/var/run/libpod" } + +// getDefaultMachineVolumes returns default mounted volumes (possibly with env vars, which will be expanded) +func getDefaultMachineVolumes() []string { + return []string{"$HOME:$HOME"} +} diff --git a/vendor/github.com/containers/common/pkg/config/default_linux.go b/vendor/github.com/containers/common/pkg/config/default_linux.go index 86873beb1..15052c10e 100644 --- a/vendor/github.com/containers/common/pkg/config/default_linux.go +++ b/vendor/github.com/containers/common/pkg/config/default_linux.go @@ -70,3 +70,8 @@ func getDefaultLockType() string { func getLibpodTmpDir() string { return "/run/libpod" } + +// getDefaultMachineVolumes returns default mounted volumes (possibly with env vars, which will be expanded) +func getDefaultMachineVolumes() []string { + return []string{"$HOME:$HOME"} +} diff --git a/vendor/github.com/containers/common/pkg/config/default_windows.go b/vendor/github.com/containers/common/pkg/config/default_windows.go index 1ff88fc42..08a0bf223 100644 --- a/vendor/github.com/containers/common/pkg/config/default_windows.go +++ b/vendor/github.com/containers/common/pkg/config/default_windows.go @@ -44,3 +44,8 @@ func getDefaultLockType() string { func getLibpodTmpDir() string { return "/run/libpod" } + +// getDefaultMachineVolumes returns default mounted volumes (possibly with env vars, which will be expanded) +func getDefaultMachineVolumes() []string { + return []string{} +} diff --git a/vendor/github.com/containers/common/pkg/subscriptions/subscriptions.go b/vendor/github.com/containers/common/pkg/subscriptions/subscriptions.go index ff82b5a39..02b6dfb09 100644 --- a/vendor/github.com/containers/common/pkg/subscriptions/subscriptions.go +++ b/vendor/github.com/containers/common/pkg/subscriptions/subscriptions.go @@ -372,7 +372,7 @@ func mountExists(mounts []rspec.Mount, dest string) bool { return false } -// resolveSymbolicLink resolves a possbile symlink path. If the path is a symlink, returns resolved +// resolveSymbolicLink resolves symlink paths. If the path is a symlink, returns resolved // path; if not, returns the original path. func resolveSymbolicLink(path string) (string, error) { info, err := os.Lstat(path) diff --git a/vendor/modules.txt b/vendor/modules.txt index eb9c7a34d..170f8fb98 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -11,7 +11,7 @@ github.com/Microsoft/go-winio/backuptar github.com/Microsoft/go-winio/pkg/guid github.com/Microsoft/go-winio/pkg/security github.com/Microsoft/go-winio/vhd -# github.com/Microsoft/hcsshim v0.9.3 +# github.com/Microsoft/hcsshim v0.9.4 github.com/Microsoft/hcsshim github.com/Microsoft/hcsshim/computestorage github.com/Microsoft/hcsshim/internal/cow @@ -67,7 +67,7 @@ github.com/container-orchestrated-devices/container-device-interface/pkg/cdi github.com/container-orchestrated-devices/container-device-interface/specs-go # github.com/containerd/cgroups v1.0.3 github.com/containerd/cgroups/stats/v1 -# github.com/containerd/containerd v1.6.6 +# github.com/containerd/containerd v1.6.8 github.com/containerd/containerd/errdefs github.com/containerd/containerd/log github.com/containerd/containerd/pkg/userns @@ -114,7 +114,7 @@ github.com/containers/buildah/pkg/rusage github.com/containers/buildah/pkg/sshagent github.com/containers/buildah/pkg/util github.com/containers/buildah/util -# github.com/containers/common v0.49.2-0.20220817132854-f6679f170eca +# github.com/containers/common v0.49.2-0.20220823130605-72a7da3358ac ## explicit github.com/containers/common/libimage github.com/containers/common/libimage/define -- cgit v1.2.3-54-g00ecf From 877e673992845afa798582c970dfbdb26efa9d46 Mon Sep 17 00:00:00 2001 From: Doug Rabson Date: Fri, 19 Aug 2022 07:58:26 +0100 Subject: libpod: Split out platform-specific code from hostInfo [NO NEW TESTS NEEDED] Signed-off-by: Doug Rabson --- libpod/info.go | 107 +++++++++------------------------------------------ libpod/info_linux.go | 88 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+), 88 deletions(-) create mode 100644 libpod/info_linux.go (limited to 'libpod') diff --git a/libpod/info.go b/libpod/info.go index 8db6df8cc..96fd7242e 100644 --- a/libpod/info.go +++ b/libpod/info.go @@ -11,7 +11,6 @@ import ( "io/ioutil" "math" "os" - "os/exec" "runtime" "strconv" "strings" @@ -19,16 +18,12 @@ import ( "time" "github.com/containers/buildah" - "github.com/containers/common/pkg/apparmor" - "github.com/containers/common/pkg/cgroups" - "github.com/containers/common/pkg/seccomp" "github.com/containers/image/v5/pkg/sysregistriesv2" "github.com/containers/podman/v4/libpod/define" "github.com/containers/podman/v4/libpod/linkmode" "github.com/containers/podman/v4/pkg/rootless" "github.com/containers/storage" "github.com/containers/storage/pkg/system" - "github.com/opencontainers/selinux/go-selinux" "github.com/sirupsen/logrus" ) @@ -104,94 +99,30 @@ func (r *Runtime) hostInfo() (*define.HostInfo, error) { return nil, fmt.Errorf("error getting hostname: %w", err) } - seccompProfilePath, err := DefaultSeccompPath() - if err != nil { - return nil, fmt.Errorf("error getting Seccomp profile path: %w", err) - } - - // Cgroups version - unified, err := cgroups.IsCgroup2UnifiedMode() - if err != nil { - return nil, fmt.Errorf("error reading cgroups mode: %w", err) - } - - // Get Map of all available controllers - availableControllers, err := cgroups.GetAvailableControllers(nil, unified) - if err != nil { - return nil, fmt.Errorf("error getting available cgroup controllers: %w", err) - } cpuUtil, err := getCPUUtilization() if err != nil { return nil, err } info := define.HostInfo{ - Arch: runtime.GOARCH, - BuildahVersion: buildah.Version, - CgroupManager: r.config.Engine.CgroupManager, - CgroupControllers: availableControllers, - Linkmode: linkmode.Linkmode(), - CPUs: runtime.NumCPU(), - CPUUtilization: cpuUtil, - Distribution: hostDistributionInfo, - LogDriver: r.config.Containers.LogDriver, - EventLogger: r.eventer.String(), - Hostname: host, - IDMappings: define.IDMappings{}, - Kernel: kv, - MemFree: mi.MemFree, - MemTotal: mi.MemTotal, - NetworkBackend: r.config.Network.NetworkBackend, - OS: runtime.GOOS, - Security: define.SecurityInfo{ - AppArmorEnabled: apparmor.IsEnabled(), - DefaultCapabilities: strings.Join(r.config.Containers.DefaultCapabilities, ","), - Rootless: rootless.IsRootless(), - SECCOMPEnabled: seccomp.IsEnabled(), - SECCOMPProfilePath: seccompProfilePath, - SELinuxEnabled: selinux.GetEnabled(), - }, - Slirp4NetNS: define.SlirpInfo{}, - SwapFree: mi.SwapFree, - SwapTotal: mi.SwapTotal, - } - - cgroupVersion := "v1" - if unified { - cgroupVersion = "v2" - } - info.CgroupsVersion = cgroupVersion - - slirp4netnsPath := r.config.Engine.NetworkCmdPath - if slirp4netnsPath == "" { - slirp4netnsPath, _ = exec.LookPath("slirp4netns") - } - if slirp4netnsPath != "" { - version, err := programVersion(slirp4netnsPath) - if err != nil { - logrus.Warnf("Failed to retrieve program version for %s: %v", slirp4netnsPath, err) - } - program := define.SlirpInfo{ - Executable: slirp4netnsPath, - Package: packageVersion(slirp4netnsPath), - Version: version, - } - info.Slirp4NetNS = program - } - - if rootless.IsRootless() { - uidmappings, err := rootless.ReadMappingsProc("/proc/self/uid_map") - if err != nil { - return nil, fmt.Errorf("error reading uid mappings: %w", err) - } - gidmappings, err := rootless.ReadMappingsProc("/proc/self/gid_map") - if err != nil { - return nil, fmt.Errorf("error reading gid mappings: %w", err) - } - idmappings := define.IDMappings{ - GIDMap: gidmappings, - UIDMap: uidmappings, - } - info.IDMappings = idmappings + Arch: runtime.GOARCH, + BuildahVersion: buildah.Version, + Linkmode: linkmode.Linkmode(), + CPUs: runtime.NumCPU(), + CPUUtilization: cpuUtil, + Distribution: hostDistributionInfo, + LogDriver: r.config.Containers.LogDriver, + EventLogger: r.eventer.String(), + Hostname: host, + Kernel: kv, + MemFree: mi.MemFree, + MemTotal: mi.MemTotal, + NetworkBackend: r.config.Network.NetworkBackend, + OS: runtime.GOOS, + SwapFree: mi.SwapFree, + SwapTotal: mi.SwapTotal, + } + if err := r.setPlatformHostInfo(&info); err != nil { + return nil, err } conmonInfo, ociruntimeInfo, err := r.defaultOCIRuntime.RuntimeInfo() diff --git a/libpod/info_linux.go b/libpod/info_linux.go new file mode 100644 index 000000000..c906cae86 --- /dev/null +++ b/libpod/info_linux.go @@ -0,0 +1,88 @@ +package libpod + +import ( + "fmt" + "os/exec" + "strings" + + "github.com/containers/common/pkg/apparmor" + "github.com/containers/common/pkg/cgroups" + "github.com/containers/common/pkg/seccomp" + "github.com/containers/podman/v4/libpod/define" + "github.com/containers/podman/v4/pkg/rootless" + "github.com/opencontainers/selinux/go-selinux" + "github.com/sirupsen/logrus" +) + +func (r *Runtime) setPlatformHostInfo(info *define.HostInfo) error { + seccompProfilePath, err := DefaultSeccompPath() + if err != nil { + return fmt.Errorf("error getting Seccomp profile path: %w", err) + } + + // Cgroups version + unified, err := cgroups.IsCgroup2UnifiedMode() + if err != nil { + return fmt.Errorf("error reading cgroups mode: %w", err) + } + + // Get Map of all available controllers + availableControllers, err := cgroups.GetAvailableControllers(nil, unified) + if err != nil { + return fmt.Errorf("error getting available cgroup controllers: %w", err) + } + + info.CgroupManager = r.config.Engine.CgroupManager + info.CgroupControllers = availableControllers + info.IDMappings = define.IDMappings{} + info.Security = define.SecurityInfo{ + AppArmorEnabled: apparmor.IsEnabled(), + DefaultCapabilities: strings.Join(r.config.Containers.DefaultCapabilities, ","), + Rootless: rootless.IsRootless(), + SECCOMPEnabled: seccomp.IsEnabled(), + SECCOMPProfilePath: seccompProfilePath, + SELinuxEnabled: selinux.GetEnabled(), + } + info.Slirp4NetNS = define.SlirpInfo{} + + cgroupVersion := "v1" + if unified { + cgroupVersion = "v2" + } + info.CgroupsVersion = cgroupVersion + + slirp4netnsPath := r.config.Engine.NetworkCmdPath + if slirp4netnsPath == "" { + slirp4netnsPath, _ = exec.LookPath("slirp4netns") + } + if slirp4netnsPath != "" { + version, err := programVersion(slirp4netnsPath) + if err != nil { + logrus.Warnf("Failed to retrieve program version for %s: %v", slirp4netnsPath, err) + } + program := define.SlirpInfo{ + Executable: slirp4netnsPath, + Package: packageVersion(slirp4netnsPath), + Version: version, + } + info.Slirp4NetNS = program + } + + if rootless.IsRootless() { + uidmappings, err := rootless.ReadMappingsProc("/proc/self/uid_map") + if err != nil { + return fmt.Errorf("error reading uid mappings: %w", err) + } + gidmappings, err := rootless.ReadMappingsProc("/proc/self/gid_map") + if err != nil { + return fmt.Errorf("error reading gid mappings: %w", err) + } + idmappings := define.IDMappings{ + GIDMap: gidmappings, + UIDMap: uidmappings, + } + info.IDMappings = idmappings + } + + return nil +} -- cgit v1.2.3-54-g00ecf From 694cbaca3745ca8fab8665d8ba88af6056dc16f8 Mon Sep 17 00:00:00 2001 From: Doug Rabson Date: Wed, 17 Aug 2022 08:14:51 +0100 Subject: libpod: Read kernel version and uptime using buildah/pkg/util [NO NEW TESTS NEEDED] Signed-off-by: Doug Rabson --- libpod/info.go | 31 +++---------------------------- 1 file changed, 3 insertions(+), 28 deletions(-) (limited to 'libpod') diff --git a/libpod/info.go b/libpod/info.go index 96fd7242e..cdf453135 100644 --- a/libpod/info.go +++ b/libpod/info.go @@ -8,7 +8,6 @@ import ( "bytes" "errors" "fmt" - "io/ioutil" "math" "os" "runtime" @@ -18,6 +17,7 @@ import ( "time" "github.com/containers/buildah" + "github.com/containers/buildah/pkg/util" "github.com/containers/image/v5/pkg/sysregistriesv2" "github.com/containers/podman/v4/libpod/define" "github.com/containers/podman/v4/libpod/linkmode" @@ -89,7 +89,7 @@ func (r *Runtime) hostInfo() (*define.HostInfo, error) { hostDistributionInfo := r.GetHostDistributionInfo() - kv, err := readKernelVersion() + kv, err := util.ReadKernelVersion() if err != nil { return nil, fmt.Errorf("error reading kernel version: %w", err) } @@ -133,7 +133,7 @@ func (r *Runtime) hostInfo() (*define.HostInfo, error) { info.OCIRuntime = ociruntimeInfo } - duration, err := procUptime() + duration, err := util.ReadUptime() if err != nil { return nil, fmt.Errorf("error reading up time: %w", err) } @@ -263,31 +263,6 @@ func (r *Runtime) storeInfo() (*define.StoreInfo, error) { return &info, nil } -func readKernelVersion() (string, error) { - buf, err := ioutil.ReadFile("/proc/version") - if err != nil { - return "", err - } - f := bytes.Fields(buf) - if len(f) < 3 { - return string(bytes.TrimSpace(buf)), nil - } - return string(f[2]), nil -} - -func procUptime() (time.Duration, error) { - var zero time.Duration - buf, err := ioutil.ReadFile("/proc/uptime") - if err != nil { - return zero, err - } - f := bytes.Fields(buf) - if len(f) < 1 { - return zero, errors.New("unable to parse uptime from /proc/uptime") - } - return time.ParseDuration(string(f[0]) + "s") -} - // GetHostDistributionInfo returns a map containing the host's distribution and version func (r *Runtime) GetHostDistributionInfo() define.DistributionInfo { // Populate values in case we cannot find the values -- cgit v1.2.3-54-g00ecf From ff20c74e973abd7090b1ef76bc0d4de420513d34 Mon Sep 17 00:00:00 2001 From: Doug Rabson Date: Fri, 19 Aug 2022 11:16:23 +0100 Subject: libpod: Move getCPUUtilization to info_linux.go The Linux implementation uses /proc/stat - the FreeBSD equivalent is quite different where this information is exposed via sysctl. [NO NEW TESTS NEEDED] Signed-off-by: Doug Rabson --- libpod/info.go | 41 ----------------------------------------- libpod/info_linux.go | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 41 deletions(-) (limited to 'libpod') diff --git a/libpod/info.go b/libpod/info.go index cdf453135..20ea007cf 100644 --- a/libpod/info.go +++ b/libpod/info.go @@ -11,7 +11,6 @@ import ( "math" "os" "runtime" - "strconv" "strings" "syscall" "time" @@ -294,43 +293,3 @@ func (r *Runtime) GetHostDistributionInfo() define.DistributionInfo { } return dist } - -// getCPUUtilization Returns a CPUUsage object that summarizes CPU -// usage for userspace, system, and idle time. -func getCPUUtilization() (*define.CPUUsage, error) { - f, err := os.Open("/proc/stat") - if err != nil { - return nil, err - } - defer f.Close() - scanner := bufio.NewScanner(f) - // Read first line of /proc/stat that has entries for system ("cpu" line) - for scanner.Scan() { - break - } - // column 1 is user, column 3 is system, column 4 is idle - stats := strings.Fields(scanner.Text()) - return statToPercent(stats) -} - -func statToPercent(stats []string) (*define.CPUUsage, error) { - userTotal, err := strconv.ParseFloat(stats[1], 64) - if err != nil { - return nil, fmt.Errorf("unable to parse user value %q: %w", stats[1], err) - } - systemTotal, err := strconv.ParseFloat(stats[3], 64) - if err != nil { - return nil, fmt.Errorf("unable to parse system value %q: %w", stats[3], err) - } - idleTotal, err := strconv.ParseFloat(stats[4], 64) - if err != nil { - return nil, fmt.Errorf("unable to parse idle value %q: %w", stats[4], err) - } - total := userTotal + systemTotal + idleTotal - s := define.CPUUsage{ - UserPercent: math.Round((userTotal/total*100)*100) / 100, - SystemPercent: math.Round((systemTotal/total*100)*100) / 100, - IdlePercent: math.Round((idleTotal/total*100)*100) / 100, - } - return &s, nil -} diff --git a/libpod/info_linux.go b/libpod/info_linux.go index c906cae86..801dcdb43 100644 --- a/libpod/info_linux.go +++ b/libpod/info_linux.go @@ -1,8 +1,12 @@ package libpod import ( + "bufio" "fmt" + "math" + "os" "os/exec" + "strconv" "strings" "github.com/containers/common/pkg/apparmor" @@ -86,3 +90,43 @@ func (r *Runtime) setPlatformHostInfo(info *define.HostInfo) error { return nil } + +func statToPercent(stats []string) (*define.CPUUsage, error) { + userTotal, err := strconv.ParseFloat(stats[1], 64) + if err != nil { + return nil, fmt.Errorf("unable to parse user value %q: %w", stats[1], err) + } + systemTotal, err := strconv.ParseFloat(stats[3], 64) + if err != nil { + return nil, fmt.Errorf("unable to parse system value %q: %w", stats[3], err) + } + idleTotal, err := strconv.ParseFloat(stats[4], 64) + if err != nil { + return nil, fmt.Errorf("unable to parse idle value %q: %w", stats[4], err) + } + total := userTotal + systemTotal + idleTotal + s := define.CPUUsage{ + UserPercent: math.Round((userTotal/total*100)*100) / 100, + SystemPercent: math.Round((systemTotal/total*100)*100) / 100, + IdlePercent: math.Round((idleTotal/total*100)*100) / 100, + } + return &s, nil +} + +// getCPUUtilization Returns a CPUUsage object that summarizes CPU +// usage for userspace, system, and idle time. +func getCPUUtilization() (*define.CPUUsage, error) { + f, err := os.Open("/proc/stat") + if err != nil { + return nil, err + } + defer f.Close() + scanner := bufio.NewScanner(f) + // Read first line of /proc/stat that has entries for system ("cpu" line) + for scanner.Scan() { + break + } + // column 1 is user, column 3 is system, column 4 is idle + stats := strings.Fields(scanner.Text()) + return statToPercent(stats) +} -- cgit v1.2.3-54-g00ecf From b0700aa48291d7b1f697e757b13c7f6ae3efed43 Mon Sep 17 00:00:00 2001 From: Doug Rabson Date: Fri, 19 Aug 2022 07:59:59 +0100 Subject: libpod: Enable 'podman info' for FreeBSD [NO NEW TESTS NEEDED] Signed-off-by: Doug Rabson --- libpod/info.go | 3 --- libpod/info_freebsd.go | 40 ++++++++++++++++++++++++++++++++++++++++ libpod/info_unsupported.go | 4 ++-- 3 files changed, 42 insertions(+), 5 deletions(-) create mode 100644 libpod/info_freebsd.go (limited to 'libpod') diff --git a/libpod/info.go b/libpod/info.go index 20ea007cf..1990dc044 100644 --- a/libpod/info.go +++ b/libpod/info.go @@ -1,6 +1,3 @@ -//go:build linux -// +build linux - package libpod import ( diff --git a/libpod/info_freebsd.go b/libpod/info_freebsd.go new file mode 100644 index 000000000..ef7b6817c --- /dev/null +++ b/libpod/info_freebsd.go @@ -0,0 +1,40 @@ +package libpod + +import ( + "fmt" + "unsafe" + + "github.com/containers/podman/v4/libpod/define" + "golang.org/x/sys/unix" +) + +func (r *Runtime) setPlatformHostInfo(info *define.HostInfo) error { + return nil +} + +func timeToPercent(time uint64, total uint64) float64 { + return 100.0 * float64(time) / float64(total) +} + +// getCPUUtilization Returns a CPUUsage object that summarizes CPU +// usage for userspace, system, and idle time. +func getCPUUtilization() (*define.CPUUsage, error) { + buf, err := unix.SysctlRaw("kern.cp_time") + if err != nil { + return nil, fmt.Errorf("error reading sysctl kern.cp_time: %w", err) + } + + var total uint64 = 0 + var times [unix.CPUSTATES]uint64 + + for i := 0; i < unix.CPUSTATES; i++ { + val := *(*uint64)(unsafe.Pointer(&buf[8*i])) + times[i] = val + total += val + } + return &define.CPUUsage{ + UserPercent: timeToPercent(times[unix.CP_USER], total), + SystemPercent: timeToPercent(times[unix.CP_SYS], total), + IdlePercent: timeToPercent(times[unix.CP_IDLE], total), + }, nil +} diff --git a/libpod/info_unsupported.go b/libpod/info_unsupported.go index 53ee4b32f..0aed51247 100644 --- a/libpod/info_unsupported.go +++ b/libpod/info_unsupported.go @@ -1,5 +1,5 @@ -//go:build !linux -// +build !linux +//go:build !linux && !freebsd +// +build !linux,!freebsd package libpod -- cgit v1.2.3-54-g00ecf From 10b460512408a1101d6c766d84ec8586556e1939 Mon Sep 17 00:00:00 2001 From: Urvashi Mohnani Date: Wed, 24 Aug 2022 23:43:56 -0400 Subject: Remove duplicate annotations in generated service yaml Don't add the same annotations as the pod yaml to the service yaml as it is not needed. [NO NEW TESTS NEEDED] Signed-off-by: Urvashi Mohnani --- libpod/kube.go | 2 ++ 1 file changed, 2 insertions(+) (limited to 'libpod') diff --git a/libpod/kube.go b/libpod/kube.go index 8c09a6bb5..a0fb52973 100644 --- a/libpod/kube.go +++ b/libpod/kube.go @@ -267,6 +267,8 @@ func GenerateKubeServiceFromV1Pod(pod *v1.Pod, servicePorts []v1.ServicePort) (Y } service.Spec = serviceSpec service.ObjectMeta = pod.ObjectMeta + // Reset the annotations for the service as the pod annotations are not needed for the service + service.ObjectMeta.Annotations = nil tm := v12.TypeMeta{ Kind: "Service", APIVersion: pod.TypeMeta.APIVersion, -- cgit v1.2.3-54-g00ecf From 9553f3bafad264367a8a642a17239c0d87c18090 Mon Sep 17 00:00:00 2001 From: Daniel J Walsh Date: Thu, 25 Aug 2022 16:31:53 -0400 Subject: Run codespell Signed-off-by: Daniel J Walsh --- Makefile | 2 +- libpod/container_internal_unsupported.go | 8 ++++---- libpod/define/exec_codes.go | 4 ++-- pkg/machine/config.go | 2 +- pkg/systemd/notifyproxy/notifyproxy_test.go | 2 +- test/e2e/restart_test.go | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) (limited to 'libpod') diff --git a/Makefile b/Makefile index 4818ee122..d10c9cf19 100644 --- a/Makefile +++ b/Makefile @@ -267,7 +267,7 @@ test/version/version: version/version.go .PHONY: codespell codespell: - codespell -S bin,vendor,.git,go.sum,.cirrus.yml,"RELEASE_NOTES.md,*.xz,*.gz,*.ps1,*.tar,swagger.yaml,*.tgz,bin2img,*ico,*.png,*.1,*.5,copyimg,*.orig,apidoc.go" -L pullrequest,uint,iff,od,seeked,splitted,marge,erro,hist,ether -w + codespell -S bin,vendor,.git,go.sum,.cirrus.yml,"RELEASE_NOTES.md,*.xz,*.gz,*.ps1,*.tar,swagger.yaml,*.tgz,bin2img,*ico,*.png,*.1,*.5,copyimg,*.orig,apidoc.go" -L clos,ans,pullrequest,uint,iff,od,seeked,splitted,marge,erro,hist,ether -w .PHONY: validate validate: lint .gitvalidation validate.completions man-page-check swagger-check tests-included tests-expect-exit pr-removes-fixed-skips diff --git a/libpod/container_internal_unsupported.go b/libpod/container_internal_unsupported.go index de92ff260..074aeee47 100644 --- a/libpod/container_internal_unsupported.go +++ b/libpod/container_internal_unsupported.go @@ -69,21 +69,21 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti // getHostsEntries returns the container ip host entries for the correct netmode func (c *Container) getHostsEntries() (etchosts.HostEntries, error) { - return nil, errors.New("unspported (*Container) getHostsEntries") + return nil, errors.New("unsupported (*Container) getHostsEntries") } // Fix ownership and permissions of the specified volume if necessary. func (c *Container) fixVolumePermissions(v *ContainerNamedVolume) error { - return errors.New("unspported (*Container) fixVolumePermissions") + return errors.New("unsupported (*Container) fixVolumePermissions") } func (c *Container) expectPodCgroup() (bool, error) { - return false, errors.New("unspported (*Container) expectPodCgroup") + return false, errors.New("unsupported (*Container) expectPodCgroup") } // Get cgroup path in a format suitable for the OCI spec func (c *Container) getOCICgroupPath() (string, error) { - return "", errors.New("unspported (*Container) getOCICgroupPath") + return "", errors.New("unsupported (*Container) getOCICgroupPath") } func getLocalhostHostEntry(c *Container) etchosts.HostEntries { diff --git a/libpod/define/exec_codes.go b/libpod/define/exec_codes.go index 3f2da4910..a84730e72 100644 --- a/libpod/define/exec_codes.go +++ b/libpod/define/exec_codes.go @@ -11,8 +11,8 @@ const ( // ExecErrorCodeGeneric is the default error code to return from an exec session if libpod failed // prior to calling the runtime ExecErrorCodeGeneric = 125 - // ExecErrorCodeCannotInvoke is the error code to return when the runtime fails to invoke a command - // an example of this can be found by trying to execute a directory: + // ExecErrorCodeCannotInvoke is the error code to return when the runtime fails to invoke a command. + // An example of this can be found by trying to execute a directory: // `podman exec -l /etc` ExecErrorCodeCannotInvoke = 126 // ExecErrorCodeNotFound is the error code to return when a command cannot be found diff --git a/pkg/machine/config.go b/pkg/machine/config.go index 5162006db..54aa9e1b7 100644 --- a/pkg/machine/config.go +++ b/pkg/machine/config.go @@ -175,7 +175,7 @@ func (rc RemoteConnectionType) MakeSSHURL(host, path, port, userName string) url return uri } -// GetCacheDir returns the dir where VM images are downladed into when pulled +// GetCacheDir returns the dir where VM images are downloaded into when pulled func GetCacheDir(vmType string) (string, error) { dataDir, err := GetDataDir(vmType) if err != nil { diff --git a/pkg/systemd/notifyproxy/notifyproxy_test.go b/pkg/systemd/notifyproxy/notifyproxy_test.go index edad95659..ce63fc9cd 100644 --- a/pkg/systemd/notifyproxy/notifyproxy_test.go +++ b/pkg/systemd/notifyproxy/notifyproxy_test.go @@ -37,7 +37,7 @@ func TestWaitAndClose(t *testing.T) { time.Sleep(250 * time.Millisecond) select { case err := <-ch: - t.Fatalf("Should stil be waiting but received %v", err) + t.Fatalf("Should still be waiting but received %v", err) default: } diff --git a/test/e2e/restart_test.go b/test/e2e/restart_test.go index dd0070f54..9df884292 100644 --- a/test/e2e/restart_test.go +++ b/test/e2e/restart_test.go @@ -228,7 +228,7 @@ var _ = Describe("Podman restart", func() { Expect(beforeRestart.OutputToString()).To(Equal(afterRestart.OutputToString())) }) - It("podman restart all stoped containers with --all", func() { + It("podman restart all stopped containers with --all", func() { session := podmanTest.RunTopContainer("") session.WaitWithDefaultTimeout() Expect(session).Should(Exit(0)) -- cgit v1.2.3-54-g00ecf From 15aeccb8341d44510b3ec055678f010a2624b34e Mon Sep 17 00:00:00 2001 From: Valentin Rothberg Date: Fri, 26 Aug 2022 10:42:34 +0200 Subject: libpod: UpdateContainerStatus: do not wait for container Commit 30e7cbccc194 accidentally added a deadlock as Podman was waiting for the exit code to show up when the container transitioned to stopped. Code paths that require the exit code to be written (by the cleanup process) should already be using `(*Container).Wait()` in a deadlock free way. [NO NEW TESTS NEEDED] as I did not manage to a reproducer that would work in CI. Ultimately, it's a race condition. Fixes: #15492 Signed-off-by: Valentin Rothberg --- libpod/oci_conmon_common.go | 9 --------- 1 file changed, 9 deletions(-) (limited to 'libpod') diff --git a/libpod/oci_conmon_common.go b/libpod/oci_conmon_common.go index c3725cdb4..b96f92d3a 100644 --- a/libpod/oci_conmon_common.go +++ b/libpod/oci_conmon_common.go @@ -277,15 +277,6 @@ func (r *ConmonOCIRuntime) UpdateContainerStatus(ctr *Container) error { ctr.ID(), state.Status, define.ErrInternal) } - // Only grab exit status if we were not already stopped - // If we were, it should already be in the database - if ctr.state.State == define.ContainerStateStopped && oldState != define.ContainerStateStopped { - if _, err := ctr.Wait(context.Background()); err != nil { - logrus.Errorf("Waiting for container %s to exit: %v", ctr.ID(), err) - } - return nil - } - // Handle ContainerStateStopping - keep it unless the container // transitioned to no longer running. if oldState == define.ContainerStateStopping && (ctr.state.State == define.ContainerStatePaused || ctr.state.State == define.ContainerStateRunning) { -- cgit v1.2.3-54-g00ecf From 78aec2130242b81659bd199c6c8605c76e681ff6 Mon Sep 17 00:00:00 2001 From: Alessandro Rossi Date: Sat, 27 Aug 2022 18:28:24 +0200 Subject: Fix #15499 already connected network Compat: Treat already attached networks as a no-op Applies only to containers in created state. Maintain error in running state. Co-authored-by: Alessandro Rossi Co-authored-by: Brent Baude Co-authored-by: Jason T. Greene Signed-off-by: Alessandro Rossi Signed-off-by: Jason T. Greene --- libpod/boltdb_state.go | 2 +- libpod/define/errors.go | 3 +++ libpod/networking_linux.go | 5 +++++ test/e2e/network_connect_disconnect_test.go | 12 ++++++++++-- 4 files changed, 19 insertions(+), 3 deletions(-) (limited to 'libpod') diff --git a/libpod/boltdb_state.go b/libpod/boltdb_state.go index 81f11410b..e5a7e20fc 100644 --- a/libpod/boltdb_state.go +++ b/libpod/boltdb_state.go @@ -1278,7 +1278,7 @@ func (s *BoltState) NetworkConnect(ctr *Container, network string, opts types.Pe } netConnected := ctrNetworksBkt.Get([]byte(network)) if netConnected != nil { - return fmt.Errorf("container %s is already connected to network %q: %w", ctr.ID(), network, define.ErrNetworkExists) + return fmt.Errorf("container %s is already connected to network %q: %w", ctr.ID(), network, define.ErrNetworkConnected) } // Add the network diff --git a/libpod/define/errors.go b/libpod/define/errors.go index fd27e89de..be471c27e 100644 --- a/libpod/define/errors.go +++ b/libpod/define/errors.go @@ -179,6 +179,9 @@ var ( // ErrNetworkInUse indicates the requested operation failed because the network was in use ErrNetworkInUse = errors.New("network is being used") + // ErrNetworkConnected indicates that the required operation failed because the container is already a network endpoint + ErrNetworkConnected = errors.New("network is already connected") + // ErrStoreNotInitialized indicates that the container storage was never // initialized. ErrStoreNotInitialized = errors.New("the container storage was never initialized") diff --git a/libpod/networking_linux.go b/libpod/networking_linux.go index c05796768..c10c3c0b2 100644 --- a/libpod/networking_linux.go +++ b/libpod/networking_linux.go @@ -1357,6 +1357,11 @@ func (c *Container) NetworkConnect(nameOrID, netName string, netOpts types.PerNe } if err := c.runtime.state.NetworkConnect(c, netName, netOpts); err != nil { + // Docker compat: treat requests to attach already attached networks as a no-op, ignoring opts + if errors.Is(err, define.ErrNetworkConnected) && c.ensureState(define.ContainerStateConfigured) { + return nil + } + return err } c.newNetworkEvent(events.NetworkConnect, netName) diff --git a/test/e2e/network_connect_disconnect_test.go b/test/e2e/network_connect_disconnect_test.go index ece1b519d..30a5c6482 100644 --- a/test/e2e/network_connect_disconnect_test.go +++ b/test/e2e/network_connect_disconnect_test.go @@ -157,7 +157,7 @@ var _ = Describe("Podman network connect and disconnect", func() { Expect(con.ErrorToString()).To(ContainSubstring(`"slirp4netns" is not supported: invalid network mode`)) }) - It("podman connect on a container that already is connected to the network should error", func() { + It("podman connect on a container that already is connected to the network should error after init", func() { netName := "aliasTest" + stringid.GenerateNonCryptoID() session := podmanTest.Podman([]string{"network", "create", netName}) session.WaitWithDefaultTimeout() @@ -177,7 +177,15 @@ var _ = Describe("Podman network connect and disconnect", func() { con := podmanTest.Podman([]string{"network", "connect", netName, "test"}) con.WaitWithDefaultTimeout() - Expect(con).Should(ExitWithError()) + Expect(con).Should(Exit(0)) + + init := podmanTest.Podman([]string{"init", "test"}) + init.WaitWithDefaultTimeout() + Expect(init).Should(Exit(0)) + + con2 := podmanTest.Podman([]string{"network", "connect", netName, "test"}) + con2.WaitWithDefaultTimeout() + Expect(con2).Should(ExitWithError()) }) It("podman network connect", func() { -- cgit v1.2.3-54-g00ecf