aboutsummaryrefslogtreecommitdiff
path: root/libpod
diff options
context:
space:
mode:
Diffstat (limited to 'libpod')
-rw-r--r--libpod/boltdb_state.go2
-rw-r--r--libpod/boltdb_state_freebsd.go17
-rw-r--r--libpod/boltdb_state_unsupported.go19
-rw-r--r--libpod/container_copy_unsupported.go17
-rw-r--r--libpod/container_freebsd.go12
-rw-r--r--libpod/container_internal.go20
-rw-r--r--libpod/container_internal_linux.go25
-rw-r--r--libpod/container_internal_unsupported.go99
-rw-r--r--libpod/container_stat_unsupported.go14
-rw-r--r--libpod/container_top_unsupported.go14
-rw-r--r--libpod/container_unsupported.go7
-rw-r--r--libpod/define/errors.go3
-rw-r--r--libpod/define/exec_codes.go4
-rw-r--r--libpod/define/volume_inspect.go2
-rw-r--r--libpod/events/events_freebsd.go23
-rw-r--r--libpod/events/events_unsupported.go4
-rw-r--r--libpod/events/logfile.go4
-rw-r--r--libpod/healthcheck_unsupported.go25
-rw-r--r--libpod/info.go179
-rw-r--r--libpod/info_freebsd.go40
-rw-r--r--libpod/info_linux.go132
-rw-r--r--libpod/info_unsupported.go14
-rw-r--r--libpod/kube.go2
-rw-r--r--libpod/networking_linux.go5
-rw-r--r--libpod/networking_unsupported.go86
-rw-r--r--libpod/oci_conmon_attach_common.go305
-rw-r--r--libpod/oci_conmon_attach_freebsd.go21
-rw-r--r--libpod/oci_conmon_attach_linux.go297
-rw-r--r--libpod/oci_conmon_common.go1591
-rw-r--r--libpod/oci_conmon_exec_common.go (renamed from libpod/oci_conmon_exec_linux.go)0
-rw-r--r--libpod/oci_conmon_freebsd.go24
-rw-r--r--libpod/oci_conmon_linux.go1657
-rw-r--r--libpod/oci_conmon_unsupported.go24
-rw-r--r--libpod/options.go14
-rw-r--r--libpod/plugin/volume_api.go19
-rw-r--r--libpod/pod_top_unsupported.go20
-rw-r--r--libpod/runtime.go115
-rw-r--r--libpod/runtime_migrate_unsupported.go16
-rw-r--r--libpod/runtime_pod_unsupported.go30
-rw-r--r--libpod/runtime_volume_linux.go2
-rw-r--r--libpod/runtime_volume_unsupported.go42
-rw-r--r--libpod/stats_unsupported.go17
-rw-r--r--libpod/util_unsupported.go27
-rw-r--r--libpod/volume.go2
-rw-r--r--libpod/volume_inspect.go7
-rw-r--r--libpod/volume_internal_unsupported.go32
46 files changed, 2809 insertions, 2222 deletions
diff --git a/libpod/boltdb_state.go b/libpod/boltdb_state.go
index 81f11410b..e5a7e20fc 100644
--- a/libpod/boltdb_state.go
+++ b/libpod/boltdb_state.go
@@ -1278,7 +1278,7 @@ func (s *BoltState) NetworkConnect(ctr *Container, network string, opts types.Pe
}
netConnected := ctrNetworksBkt.Get([]byte(network))
if netConnected != nil {
- return fmt.Errorf("container %s is already connected to network %q: %w", ctr.ID(), network, define.ErrNetworkExists)
+ return fmt.Errorf("container %s is already connected to network %q: %w", ctr.ID(), network, define.ErrNetworkConnected)
}
// Add the network
diff --git a/libpod/boltdb_state_freebsd.go b/libpod/boltdb_state_freebsd.go
new file mode 100644
index 000000000..d7f2736fc
--- /dev/null
+++ b/libpod/boltdb_state_freebsd.go
@@ -0,0 +1,17 @@
+//go:build freebsd
+// +build freebsd
+
+package libpod
+
+// replaceNetNS handle network namespace transitions after updating a
+// container's state.
+func replaceNetNS(netNSPath string, ctr *Container, newState *ContainerState) error {
+ // On FreeBSD, we just record the network jail's name in our state.
+ newState.NetworkJail = netNSPath
+ return nil
+}
+
+// getNetNSPath retrieves the netns path to be stored in the database
+func getNetNSPath(ctr *Container) string {
+ return ctr.state.NetworkJail
+}
diff --git a/libpod/boltdb_state_unsupported.go b/libpod/boltdb_state_unsupported.go
new file mode 100644
index 000000000..9db1e3c4b
--- /dev/null
+++ b/libpod/boltdb_state_unsupported.go
@@ -0,0 +1,19 @@
+//go:build !linux && !freebsd
+// +build !linux,!freebsd
+
+package libpod
+
+import (
+ "errors"
+)
+
+// replaceNetNS handle network namespace transitions after updating a
+// container's state.
+func replaceNetNS(netNSPath string, ctr *Container, newState *ContainerState) error {
+ return errors.New("replaceNetNS not supported on this platform")
+}
+
+// getNetNSPath retrieves the netns path to be stored in the database
+func getNetNSPath(ctr *Container) string {
+ return ""
+}
diff --git a/libpod/container_copy_unsupported.go b/libpod/container_copy_unsupported.go
new file mode 100644
index 000000000..62937279a
--- /dev/null
+++ b/libpod/container_copy_unsupported.go
@@ -0,0 +1,17 @@
+//go:build !linux
+// +build !linux
+
+package libpod
+
+import (
+ "errors"
+ "io"
+)
+
+func (c *Container) copyFromArchive(path string, chown, noOverwriteDirNonDir bool, rename map[string]string, reader io.Reader) (func() error, error) {
+ return nil, errors.New("not implemented (*Container) copyFromArchive")
+}
+
+func (c *Container) copyToArchive(path string, writer io.Writer) (func() error, error) {
+ return nil, errors.New("not implemented (*Container) copyToArchive")
+}
diff --git a/libpod/container_freebsd.go b/libpod/container_freebsd.go
new file mode 100644
index 000000000..f9fbc4daa
--- /dev/null
+++ b/libpod/container_freebsd.go
@@ -0,0 +1,12 @@
+//go:build freebsd
+// +build freebsd
+
+package libpod
+
+type containerPlatformState struct {
+ // NetworkJail is the name of the container's network VNET
+ // jail. Will only be set if config.CreateNetNS is true, or
+ // the container was told to join another container's network
+ // namespace.
+ NetworkJail string `json:"-"`
+}
diff --git a/libpod/container_internal.go b/libpod/container_internal.go
index 7cef067b0..60fb29607 100644
--- a/libpod/container_internal.go
+++ b/libpod/container_internal.go
@@ -293,20 +293,8 @@ func (c *Container) handleRestartPolicy(ctx context.Context) (_ bool, retErr err
}
// set up slirp4netns again because slirp4netns will die when conmon exits
- if c.config.NetMode.IsSlirp4netns() {
- err := c.runtime.setupSlirp4netns(c, c.state.NetNS)
- if err != nil {
- return false, err
- }
- }
-
- // set up rootlesskit port forwarder again since it dies when conmon exits
- // we use rootlesskit port forwarder only as rootless and when bridge network is used
- if rootless.IsRootless() && c.config.NetMode.IsBridge() && len(c.config.PortMappings) > 0 {
- err := c.runtime.setupRootlessPortMappingViaRLK(c, c.state.NetNS.Path(), c.state.NetworkStatus)
- if err != nil {
- return false, err
- }
+ if err := c.setupRootlessNetwork(); err != nil {
+ return false, err
}
if c.state.State == define.ContainerStateStopped {
@@ -1557,7 +1545,7 @@ func (c *Container) mountStorage() (_ string, deferredErr error) {
rootUID, rootGID := c.RootUID(), c.RootGID()
- dirfd, err := unix.Open(mountPoint, unix.O_RDONLY|unix.O_PATH, 0)
+ dirfd, err := openDirectory(mountPoint)
if err != nil {
return "", fmt.Errorf("open mount point: %w", err)
}
@@ -1580,7 +1568,7 @@ func (c *Container) mountStorage() (_ string, deferredErr error) {
return "", fmt.Errorf("resolve /etc in the container: %w", err)
}
- etcInTheContainerFd, err := unix.Open(etcInTheContainerPath, unix.O_RDONLY|unix.O_PATH, 0)
+ etcInTheContainerFd, err := openDirectory(etcInTheContainerPath)
if err != nil {
return "", fmt.Errorf("open /etc in the container: %w", err)
}
diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go
index 3c77cb18c..5c5fd471b 100644
--- a/libpod/container_internal_linux.go
+++ b/libpod/container_internal_linux.go
@@ -3228,3 +3228,28 @@ func (c *Container) ChangeHostPathOwnership(src string, recurse bool, uid, gid i
}
return chown.ChangeHostPathOwnership(src, recurse, uid, gid)
}
+
+// If the container is rootless, set up the slirp4netns network
+func (c *Container) setupRootlessNetwork() error {
+ // set up slirp4netns again because slirp4netns will die when conmon exits
+ if c.config.NetMode.IsSlirp4netns() {
+ err := c.runtime.setupSlirp4netns(c, c.state.NetNS)
+ if err != nil {
+ return err
+ }
+ }
+
+ // set up rootlesskit port forwarder again since it dies when conmon exits
+ // we use rootlesskit port forwarder only as rootless and when bridge network is used
+ if rootless.IsRootless() && c.config.NetMode.IsBridge() && len(c.config.PortMappings) > 0 {
+ err := c.runtime.setupRootlessPortMappingViaRLK(c, c.state.NetNS.Path(), c.state.NetworkStatus)
+ if err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func openDirectory(path string) (fd int, err error) {
+ return unix.Open(path, unix.O_RDONLY|unix.O_PATH, 0)
+}
diff --git a/libpod/container_internal_unsupported.go b/libpod/container_internal_unsupported.go
new file mode 100644
index 000000000..074aeee47
--- /dev/null
+++ b/libpod/container_internal_unsupported.go
@@ -0,0 +1,99 @@
+//go:build !linux
+// +build !linux
+
+package libpod
+
+import (
+ "context"
+ "errors"
+
+ "github.com/containers/common/libnetwork/etchosts"
+ "github.com/containers/podman/v4/libpod/define"
+ "github.com/containers/podman/v4/pkg/lookup"
+ spec "github.com/opencontainers/runtime-spec/specs-go"
+)
+
+func (c *Container) mountSHM(shmOptions string) error {
+ return errors.New("not implemented (*Container) mountSHM")
+}
+
+func (c *Container) unmountSHM(mount string) error {
+ return errors.New("not implemented (*Container) unmountSHM")
+}
+
+func (c *Container) cleanupOverlayMounts() error {
+ return errors.New("not implemented (*Container) cleanupOverlayMounts")
+}
+
+// prepare mounts the container and sets up other required resources like net
+// namespaces
+func (c *Container) prepare() error {
+ return errors.New("not implemented (*Container) prepare")
+}
+
+// resolveWorkDir resolves the container's workdir and, depending on the
+// configuration, will create it, or error out if it does not exist.
+// Note that the container must be mounted before.
+func (c *Container) resolveWorkDir() error {
+ return errors.New("not implemented (*Container) resolveWorkDir")
+}
+
+// cleanupNetwork unmounts and cleans up the container's network
+func (c *Container) cleanupNetwork() error {
+ return errors.New("not implemented (*Container) cleanupNetwork")
+}
+
+// reloadNetwork reloads the network for the given container, recreating
+// firewall rules.
+func (c *Container) reloadNetwork() error {
+ return errors.New("not implemented (*Container) reloadNetwork")
+}
+
+// Generate spec for a container
+// Accepts a map of the container's dependencies
+func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
+ return nil, errors.New("not implemented (*Container) generateSpec")
+}
+
+func (c *Container) getUserOverrides() *lookup.Overrides {
+ return &lookup.Overrides{}
+}
+
+func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointOptions) (*define.CRIUCheckpointRestoreStatistics, int64, error) {
+ return nil, 0, errors.New("not implemented (*Container) checkpoint")
+}
+
+func (c *Container) restore(ctx context.Context, options ContainerCheckpointOptions) (criuStatistics *define.CRIUCheckpointRestoreStatistics, runtimeRestoreDuration int64, retErr error) {
+ return nil, 0, errors.New("not implemented (*Container) restore")
+}
+
+// getHostsEntries returns the container ip host entries for the correct netmode
+func (c *Container) getHostsEntries() (etchosts.HostEntries, error) {
+ return nil, errors.New("unsupported (*Container) getHostsEntries")
+}
+
+// Fix ownership and permissions of the specified volume if necessary.
+func (c *Container) fixVolumePermissions(v *ContainerNamedVolume) error {
+ return errors.New("unsupported (*Container) fixVolumePermissions")
+}
+
+func (c *Container) expectPodCgroup() (bool, error) {
+ return false, errors.New("unsupported (*Container) expectPodCgroup")
+}
+
+// Get cgroup path in a format suitable for the OCI spec
+func (c *Container) getOCICgroupPath() (string, error) {
+ return "", errors.New("unsupported (*Container) getOCICgroupPath")
+}
+
+func getLocalhostHostEntry(c *Container) etchosts.HostEntries {
+ return nil
+}
+
+func isRootlessCgroupSet(cgroup string) bool {
+ return false
+}
+
+func openDirectory(path string) (fd int, err error) {
+ return -1, errors.New("unsupported openDirectory")
+}
diff --git a/libpod/container_stat_unsupported.go b/libpod/container_stat_unsupported.go
new file mode 100644
index 000000000..2f1acd44d
--- /dev/null
+++ b/libpod/container_stat_unsupported.go
@@ -0,0 +1,14 @@
+//go:build !linux
+// +build !linux
+
+package libpod
+
+import (
+ "errors"
+
+ "github.com/containers/podman/v4/libpod/define"
+)
+
+func (c *Container) stat(containerMountPoint string, containerPath string) (*define.FileInfo, string, string, error) {
+ return nil, "", "", errors.New("Containers stat not supported on this platform")
+}
diff --git a/libpod/container_top_unsupported.go b/libpod/container_top_unsupported.go
new file mode 100644
index 000000000..a8d9b970b
--- /dev/null
+++ b/libpod/container_top_unsupported.go
@@ -0,0 +1,14 @@
+//go:build !linux
+// +build !linux
+
+package libpod
+
+import (
+ "errors"
+)
+
+// Top gathers statistics about the running processes in a container. It returns a
+// []string for output
+func (c *Container) Top(descriptors []string) ([]string, error) {
+ return nil, errors.New("not implemented (*Container) Top")
+}
diff --git a/libpod/container_unsupported.go b/libpod/container_unsupported.go
new file mode 100644
index 000000000..16bf11622
--- /dev/null
+++ b/libpod/container_unsupported.go
@@ -0,0 +1,7 @@
+//go:build !linux && !freebsd
+// +build !linux,!freebsd
+
+package libpod
+
+type containerPlatformState struct {
+}
diff --git a/libpod/define/errors.go b/libpod/define/errors.go
index fd27e89de..be471c27e 100644
--- a/libpod/define/errors.go
+++ b/libpod/define/errors.go
@@ -179,6 +179,9 @@ var (
// ErrNetworkInUse indicates the requested operation failed because the network was in use
ErrNetworkInUse = errors.New("network is being used")
+ // ErrNetworkConnected indicates that the required operation failed because the container is already a network endpoint
+ ErrNetworkConnected = errors.New("network is already connected")
+
// ErrStoreNotInitialized indicates that the container storage was never
// initialized.
ErrStoreNotInitialized = errors.New("the container storage was never initialized")
diff --git a/libpod/define/exec_codes.go b/libpod/define/exec_codes.go
index 3f2da4910..a84730e72 100644
--- a/libpod/define/exec_codes.go
+++ b/libpod/define/exec_codes.go
@@ -11,8 +11,8 @@ const (
// ExecErrorCodeGeneric is the default error code to return from an exec session if libpod failed
// prior to calling the runtime
ExecErrorCodeGeneric = 125
- // ExecErrorCodeCannotInvoke is the error code to return when the runtime fails to invoke a command
- // an example of this can be found by trying to execute a directory:
+ // ExecErrorCodeCannotInvoke is the error code to return when the runtime fails to invoke a command.
+ // An example of this can be found by trying to execute a directory:
// `podman exec -l /etc`
ExecErrorCodeCannotInvoke = 126
// ExecErrorCodeNotFound is the error code to return when a command cannot be found
diff --git a/libpod/define/volume_inspect.go b/libpod/define/volume_inspect.go
index 9279812da..76120647c 100644
--- a/libpod/define/volume_inspect.go
+++ b/libpod/define/volume_inspect.go
@@ -57,7 +57,7 @@ type InspectVolumeData struct {
// UID/GID.
NeedsChown bool `json:"NeedsChown,omitempty"`
// Timeout is the specified driver timeout if given
- Timeout int `json:"Timeout,omitempty"`
+ Timeout uint `json:"Timeout,omitempty"`
}
type VolumeReload struct {
diff --git a/libpod/events/events_freebsd.go b/libpod/events/events_freebsd.go
new file mode 100644
index 000000000..17d410089
--- /dev/null
+++ b/libpod/events/events_freebsd.go
@@ -0,0 +1,23 @@
+package events
+
+import (
+ "fmt"
+ "strings"
+
+ "github.com/sirupsen/logrus"
+)
+
+// NewEventer creates an eventer based on the eventer type
+func NewEventer(options EventerOptions) (Eventer, error) {
+ logrus.Debugf("Initializing event backend %s", options.EventerType)
+ switch strings.ToUpper(options.EventerType) {
+ case strings.ToUpper(LogFile.String()):
+ return EventLogFile{options}, nil
+ case strings.ToUpper(Null.String()):
+ return NewNullEventer(), nil
+ case strings.ToUpper(Memory.String()):
+ return NewMemoryEventer(), nil
+ default:
+ return nil, fmt.Errorf("unknown event logger type: %s", strings.ToUpper(options.EventerType))
+ }
+}
diff --git a/libpod/events/events_unsupported.go b/libpod/events/events_unsupported.go
index d766402a9..01031c225 100644
--- a/libpod/events/events_unsupported.go
+++ b/libpod/events/events_unsupported.go
@@ -1,5 +1,5 @@
-//go:build !linux
-// +build !linux
+//go:build !linux && !freebsd
+// +build !linux,!freebsd
package events
diff --git a/libpod/events/logfile.go b/libpod/events/logfile.go
index c7dbf4850..519e16629 100644
--- a/libpod/events/logfile.go
+++ b/libpod/events/logfile.go
@@ -1,5 +1,5 @@
-//go:build linux
-// +build linux
+//go:build linux || freebsd
+// +build linux freebsd
package events
diff --git a/libpod/healthcheck_unsupported.go b/libpod/healthcheck_unsupported.go
new file mode 100644
index 000000000..92cd5d0a3
--- /dev/null
+++ b/libpod/healthcheck_unsupported.go
@@ -0,0 +1,25 @@
+//go:build !linux
+// +build !linux
+
+package libpod
+
+import (
+ "context"
+ "errors"
+)
+
+// createTimer systemd timers for healthchecks of a container
+func (c *Container) createTimer() error {
+ return errors.New("not implemented (*Container) createTimer")
+}
+
+// startTimer starts a systemd timer for the healthchecks
+func (c *Container) startTimer() error {
+ return errors.New("not implemented (*Container) startTimer")
+}
+
+// removeTransientFiles removes the systemd timer and unit files
+// for the container
+func (c *Container) removeTransientFiles(ctx context.Context) error {
+ return errors.New("not implemented (*Container) removeTransientFiles")
+}
diff --git a/libpod/info.go b/libpod/info.go
index c4193b40d..1990dc044 100644
--- a/libpod/info.go
+++ b/libpod/info.go
@@ -5,27 +5,21 @@ import (
"bytes"
"errors"
"fmt"
- "io/ioutil"
"math"
"os"
- "os/exec"
"runtime"
- "strconv"
"strings"
"syscall"
"time"
"github.com/containers/buildah"
- "github.com/containers/common/pkg/apparmor"
- "github.com/containers/common/pkg/cgroups"
- "github.com/containers/common/pkg/seccomp"
+ "github.com/containers/buildah/pkg/util"
"github.com/containers/image/v5/pkg/sysregistriesv2"
"github.com/containers/podman/v4/libpod/define"
"github.com/containers/podman/v4/libpod/linkmode"
"github.com/containers/podman/v4/pkg/rootless"
"github.com/containers/storage"
"github.com/containers/storage/pkg/system"
- "github.com/opencontainers/selinux/go-selinux"
"github.com/sirupsen/logrus"
)
@@ -91,7 +85,7 @@ func (r *Runtime) hostInfo() (*define.HostInfo, error) {
hostDistributionInfo := r.GetHostDistributionInfo()
- kv, err := readKernelVersion()
+ kv, err := util.ReadKernelVersion()
if err != nil {
return nil, fmt.Errorf("error reading kernel version: %w", err)
}
@@ -101,94 +95,30 @@ func (r *Runtime) hostInfo() (*define.HostInfo, error) {
return nil, fmt.Errorf("error getting hostname: %w", err)
}
- seccompProfilePath, err := DefaultSeccompPath()
- if err != nil {
- return nil, fmt.Errorf("error getting Seccomp profile path: %w", err)
- }
-
- // Cgroups version
- unified, err := cgroups.IsCgroup2UnifiedMode()
- if err != nil {
- return nil, fmt.Errorf("error reading cgroups mode: %w", err)
- }
-
- // Get Map of all available controllers
- availableControllers, err := cgroups.GetAvailableControllers(nil, unified)
- if err != nil {
- return nil, fmt.Errorf("error getting available cgroup controllers: %w", err)
- }
cpuUtil, err := getCPUUtilization()
if err != nil {
return nil, err
}
info := define.HostInfo{
- Arch: runtime.GOARCH,
- BuildahVersion: buildah.Version,
- CgroupManager: r.config.Engine.CgroupManager,
- CgroupControllers: availableControllers,
- Linkmode: linkmode.Linkmode(),
- CPUs: runtime.NumCPU(),
- CPUUtilization: cpuUtil,
- Distribution: hostDistributionInfo,
- LogDriver: r.config.Containers.LogDriver,
- EventLogger: r.eventer.String(),
- Hostname: host,
- IDMappings: define.IDMappings{},
- Kernel: kv,
- MemFree: mi.MemFree,
- MemTotal: mi.MemTotal,
- NetworkBackend: r.config.Network.NetworkBackend,
- OS: runtime.GOOS,
- Security: define.SecurityInfo{
- AppArmorEnabled: apparmor.IsEnabled(),
- DefaultCapabilities: strings.Join(r.config.Containers.DefaultCapabilities, ","),
- Rootless: rootless.IsRootless(),
- SECCOMPEnabled: seccomp.IsEnabled(),
- SECCOMPProfilePath: seccompProfilePath,
- SELinuxEnabled: selinux.GetEnabled(),
- },
- Slirp4NetNS: define.SlirpInfo{},
- SwapFree: mi.SwapFree,
- SwapTotal: mi.SwapTotal,
- }
-
- cgroupVersion := "v1"
- if unified {
- cgroupVersion = "v2"
- }
- info.CgroupsVersion = cgroupVersion
-
- slirp4netnsPath := r.config.Engine.NetworkCmdPath
- if slirp4netnsPath == "" {
- slirp4netnsPath, _ = exec.LookPath("slirp4netns")
- }
- if slirp4netnsPath != "" {
- version, err := programVersion(slirp4netnsPath)
- if err != nil {
- logrus.Warnf("Failed to retrieve program version for %s: %v", slirp4netnsPath, err)
- }
- program := define.SlirpInfo{
- Executable: slirp4netnsPath,
- Package: packageVersion(slirp4netnsPath),
- Version: version,
- }
- info.Slirp4NetNS = program
- }
-
- if rootless.IsRootless() {
- uidmappings, err := rootless.ReadMappingsProc("/proc/self/uid_map")
- if err != nil {
- return nil, fmt.Errorf("error reading uid mappings: %w", err)
- }
- gidmappings, err := rootless.ReadMappingsProc("/proc/self/gid_map")
- if err != nil {
- return nil, fmt.Errorf("error reading gid mappings: %w", err)
- }
- idmappings := define.IDMappings{
- GIDMap: gidmappings,
- UIDMap: uidmappings,
- }
- info.IDMappings = idmappings
+ Arch: runtime.GOARCH,
+ BuildahVersion: buildah.Version,
+ Linkmode: linkmode.Linkmode(),
+ CPUs: runtime.NumCPU(),
+ CPUUtilization: cpuUtil,
+ Distribution: hostDistributionInfo,
+ LogDriver: r.config.Containers.LogDriver,
+ EventLogger: r.eventer.String(),
+ Hostname: host,
+ Kernel: kv,
+ MemFree: mi.MemFree,
+ MemTotal: mi.MemTotal,
+ NetworkBackend: r.config.Network.NetworkBackend,
+ OS: runtime.GOOS,
+ SwapFree: mi.SwapFree,
+ SwapTotal: mi.SwapTotal,
+ }
+ if err := r.setPlatformHostInfo(&info); err != nil {
+ return nil, err
}
conmonInfo, ociruntimeInfo, err := r.defaultOCIRuntime.RuntimeInfo()
@@ -199,7 +129,7 @@ func (r *Runtime) hostInfo() (*define.HostInfo, error) {
info.OCIRuntime = ociruntimeInfo
}
- duration, err := procUptime()
+ duration, err := util.ReadUptime()
if err != nil {
return nil, fmt.Errorf("error reading up time: %w", err)
}
@@ -329,31 +259,6 @@ func (r *Runtime) storeInfo() (*define.StoreInfo, error) {
return &info, nil
}
-func readKernelVersion() (string, error) {
- buf, err := ioutil.ReadFile("/proc/version")
- if err != nil {
- return "", err
- }
- f := bytes.Fields(buf)
- if len(f) < 3 {
- return string(bytes.TrimSpace(buf)), nil
- }
- return string(f[2]), nil
-}
-
-func procUptime() (time.Duration, error) {
- var zero time.Duration
- buf, err := ioutil.ReadFile("/proc/uptime")
- if err != nil {
- return zero, err
- }
- f := bytes.Fields(buf)
- if len(f) < 1 {
- return zero, errors.New("unable to parse uptime from /proc/uptime")
- }
- return time.ParseDuration(string(f[0]) + "s")
-}
-
// GetHostDistributionInfo returns a map containing the host's distribution and version
func (r *Runtime) GetHostDistributionInfo() define.DistributionInfo {
// Populate values in case we cannot find the values
@@ -385,43 +290,3 @@ func (r *Runtime) GetHostDistributionInfo() define.DistributionInfo {
}
return dist
}
-
-// getCPUUtilization Returns a CPUUsage object that summarizes CPU
-// usage for userspace, system, and idle time.
-func getCPUUtilization() (*define.CPUUsage, error) {
- f, err := os.Open("/proc/stat")
- if err != nil {
- return nil, err
- }
- defer f.Close()
- scanner := bufio.NewScanner(f)
- // Read first line of /proc/stat that has entries for system ("cpu" line)
- for scanner.Scan() {
- break
- }
- // column 1 is user, column 3 is system, column 4 is idle
- stats := strings.Fields(scanner.Text())
- return statToPercent(stats)
-}
-
-func statToPercent(stats []string) (*define.CPUUsage, error) {
- userTotal, err := strconv.ParseFloat(stats[1], 64)
- if err != nil {
- return nil, fmt.Errorf("unable to parse user value %q: %w", stats[1], err)
- }
- systemTotal, err := strconv.ParseFloat(stats[3], 64)
- if err != nil {
- return nil, fmt.Errorf("unable to parse system value %q: %w", stats[3], err)
- }
- idleTotal, err := strconv.ParseFloat(stats[4], 64)
- if err != nil {
- return nil, fmt.Errorf("unable to parse idle value %q: %w", stats[4], err)
- }
- total := userTotal + systemTotal + idleTotal
- s := define.CPUUsage{
- UserPercent: math.Round((userTotal/total*100)*100) / 100,
- SystemPercent: math.Round((systemTotal/total*100)*100) / 100,
- IdlePercent: math.Round((idleTotal/total*100)*100) / 100,
- }
- return &s, nil
-}
diff --git a/libpod/info_freebsd.go b/libpod/info_freebsd.go
new file mode 100644
index 000000000..ef7b6817c
--- /dev/null
+++ b/libpod/info_freebsd.go
@@ -0,0 +1,40 @@
+package libpod
+
+import (
+ "fmt"
+ "unsafe"
+
+ "github.com/containers/podman/v4/libpod/define"
+ "golang.org/x/sys/unix"
+)
+
+func (r *Runtime) setPlatformHostInfo(info *define.HostInfo) error {
+ return nil
+}
+
+func timeToPercent(time uint64, total uint64) float64 {
+ return 100.0 * float64(time) / float64(total)
+}
+
+// getCPUUtilization Returns a CPUUsage object that summarizes CPU
+// usage for userspace, system, and idle time.
+func getCPUUtilization() (*define.CPUUsage, error) {
+ buf, err := unix.SysctlRaw("kern.cp_time")
+ if err != nil {
+ return nil, fmt.Errorf("error reading sysctl kern.cp_time: %w", err)
+ }
+
+ var total uint64 = 0
+ var times [unix.CPUSTATES]uint64
+
+ for i := 0; i < unix.CPUSTATES; i++ {
+ val := *(*uint64)(unsafe.Pointer(&buf[8*i]))
+ times[i] = val
+ total += val
+ }
+ return &define.CPUUsage{
+ UserPercent: timeToPercent(times[unix.CP_USER], total),
+ SystemPercent: timeToPercent(times[unix.CP_SYS], total),
+ IdlePercent: timeToPercent(times[unix.CP_IDLE], total),
+ }, nil
+}
diff --git a/libpod/info_linux.go b/libpod/info_linux.go
new file mode 100644
index 000000000..801dcdb43
--- /dev/null
+++ b/libpod/info_linux.go
@@ -0,0 +1,132 @@
+package libpod
+
+import (
+ "bufio"
+ "fmt"
+ "math"
+ "os"
+ "os/exec"
+ "strconv"
+ "strings"
+
+ "github.com/containers/common/pkg/apparmor"
+ "github.com/containers/common/pkg/cgroups"
+ "github.com/containers/common/pkg/seccomp"
+ "github.com/containers/podman/v4/libpod/define"
+ "github.com/containers/podman/v4/pkg/rootless"
+ "github.com/opencontainers/selinux/go-selinux"
+ "github.com/sirupsen/logrus"
+)
+
+func (r *Runtime) setPlatformHostInfo(info *define.HostInfo) error {
+ seccompProfilePath, err := DefaultSeccompPath()
+ if err != nil {
+ return fmt.Errorf("error getting Seccomp profile path: %w", err)
+ }
+
+ // Cgroups version
+ unified, err := cgroups.IsCgroup2UnifiedMode()
+ if err != nil {
+ return fmt.Errorf("error reading cgroups mode: %w", err)
+ }
+
+ // Get Map of all available controllers
+ availableControllers, err := cgroups.GetAvailableControllers(nil, unified)
+ if err != nil {
+ return fmt.Errorf("error getting available cgroup controllers: %w", err)
+ }
+
+ info.CgroupManager = r.config.Engine.CgroupManager
+ info.CgroupControllers = availableControllers
+ info.IDMappings = define.IDMappings{}
+ info.Security = define.SecurityInfo{
+ AppArmorEnabled: apparmor.IsEnabled(),
+ DefaultCapabilities: strings.Join(r.config.Containers.DefaultCapabilities, ","),
+ Rootless: rootless.IsRootless(),
+ SECCOMPEnabled: seccomp.IsEnabled(),
+ SECCOMPProfilePath: seccompProfilePath,
+ SELinuxEnabled: selinux.GetEnabled(),
+ }
+ info.Slirp4NetNS = define.SlirpInfo{}
+
+ cgroupVersion := "v1"
+ if unified {
+ cgroupVersion = "v2"
+ }
+ info.CgroupsVersion = cgroupVersion
+
+ slirp4netnsPath := r.config.Engine.NetworkCmdPath
+ if slirp4netnsPath == "" {
+ slirp4netnsPath, _ = exec.LookPath("slirp4netns")
+ }
+ if slirp4netnsPath != "" {
+ version, err := programVersion(slirp4netnsPath)
+ if err != nil {
+ logrus.Warnf("Failed to retrieve program version for %s: %v", slirp4netnsPath, err)
+ }
+ program := define.SlirpInfo{
+ Executable: slirp4netnsPath,
+ Package: packageVersion(slirp4netnsPath),
+ Version: version,
+ }
+ info.Slirp4NetNS = program
+ }
+
+ if rootless.IsRootless() {
+ uidmappings, err := rootless.ReadMappingsProc("/proc/self/uid_map")
+ if err != nil {
+ return fmt.Errorf("error reading uid mappings: %w", err)
+ }
+ gidmappings, err := rootless.ReadMappingsProc("/proc/self/gid_map")
+ if err != nil {
+ return fmt.Errorf("error reading gid mappings: %w", err)
+ }
+ idmappings := define.IDMappings{
+ GIDMap: gidmappings,
+ UIDMap: uidmappings,
+ }
+ info.IDMappings = idmappings
+ }
+
+ return nil
+}
+
+func statToPercent(stats []string) (*define.CPUUsage, error) {
+ userTotal, err := strconv.ParseFloat(stats[1], 64)
+ if err != nil {
+ return nil, fmt.Errorf("unable to parse user value %q: %w", stats[1], err)
+ }
+ systemTotal, err := strconv.ParseFloat(stats[3], 64)
+ if err != nil {
+ return nil, fmt.Errorf("unable to parse system value %q: %w", stats[3], err)
+ }
+ idleTotal, err := strconv.ParseFloat(stats[4], 64)
+ if err != nil {
+ return nil, fmt.Errorf("unable to parse idle value %q: %w", stats[4], err)
+ }
+ total := userTotal + systemTotal + idleTotal
+ s := define.CPUUsage{
+ UserPercent: math.Round((userTotal/total*100)*100) / 100,
+ SystemPercent: math.Round((systemTotal/total*100)*100) / 100,
+ IdlePercent: math.Round((idleTotal/total*100)*100) / 100,
+ }
+ return &s, nil
+}
+
+// getCPUUtilization Returns a CPUUsage object that summarizes CPU
+// usage for userspace, system, and idle time.
+func getCPUUtilization() (*define.CPUUsage, error) {
+ f, err := os.Open("/proc/stat")
+ if err != nil {
+ return nil, err
+ }
+ defer f.Close()
+ scanner := bufio.NewScanner(f)
+ // Read first line of /proc/stat that has entries for system ("cpu" line)
+ for scanner.Scan() {
+ break
+ }
+ // column 1 is user, column 3 is system, column 4 is idle
+ stats := strings.Fields(scanner.Text())
+ return statToPercent(stats)
+}
diff --git a/libpod/info_unsupported.go b/libpod/info_unsupported.go
new file mode 100644
index 000000000..0aed51247
--- /dev/null
+++ b/libpod/info_unsupported.go
@@ -0,0 +1,14 @@
+//go:build !linux && !freebsd
+// +build !linux,!freebsd
+
+package libpod
+
+import (
+ "errors"
+
+ "github.com/containers/podman/v4/libpod/define"
+)
+
+func (r *Runtime) info() (*define.Info, error) {
+ return nil, errors.New("not implemented (*Runtime) info")
+}
diff --git a/libpod/kube.go b/libpod/kube.go
index 8c09a6bb5..a0fb52973 100644
--- a/libpod/kube.go
+++ b/libpod/kube.go
@@ -267,6 +267,8 @@ func GenerateKubeServiceFromV1Pod(pod *v1.Pod, servicePorts []v1.ServicePort) (Y
}
service.Spec = serviceSpec
service.ObjectMeta = pod.ObjectMeta
+ // Reset the annotations for the service as the pod annotations are not needed for the service
+ service.ObjectMeta.Annotations = nil
tm := v12.TypeMeta{
Kind: "Service",
APIVersion: pod.TypeMeta.APIVersion,
diff --git a/libpod/networking_linux.go b/libpod/networking_linux.go
index c05796768..c10c3c0b2 100644
--- a/libpod/networking_linux.go
+++ b/libpod/networking_linux.go
@@ -1357,6 +1357,11 @@ func (c *Container) NetworkConnect(nameOrID, netName string, netOpts types.PerNe
}
if err := c.runtime.state.NetworkConnect(c, netName, netOpts); err != nil {
+ // Docker compat: treat requests to attach already attached networks as a no-op, ignoring opts
+ if errors.Is(err, define.ErrNetworkConnected) && c.ensureState(define.ContainerStateConfigured) {
+ return nil
+ }
+
return err
}
c.newNetworkEvent(events.NetworkConnect, netName)
diff --git a/libpod/networking_unsupported.go b/libpod/networking_unsupported.go
new file mode 100644
index 000000000..76ffabb5e
--- /dev/null
+++ b/libpod/networking_unsupported.go
@@ -0,0 +1,86 @@
+//go:build !linux
+// +build !linux
+
+package libpod
+
+import (
+ "errors"
+ "path/filepath"
+
+ "github.com/containers/common/libnetwork/types"
+ "github.com/containers/podman/v4/libpod/define"
+ "github.com/containers/storage/pkg/lockfile"
+)
+
+type RootlessNetNS struct {
+ dir string
+ Lock lockfile.Locker
+}
+
+// ocicniPortsToNetTypesPorts convert the old port format to the new one
+// while deduplicating ports into ranges
+func ocicniPortsToNetTypesPorts(ports []types.OCICNIPortMapping) []types.PortMapping {
+ return []types.PortMapping{}
+}
+
+func (c *Container) getContainerNetworkInfo() (*define.InspectNetworkSettings, error) {
+ return nil, errors.New("not implemented (*Container) getContainerNetworkInfo")
+}
+
+func (c *Container) setupRootlessNetwork() error {
+ return errors.New("not implemented (*Container) setupRootlessNetwork")
+}
+
+func (r *Runtime) setupNetNS(ctr *Container) error {
+ return errors.New("not implemented (*Runtime) setupNetNS")
+}
+
+// normalizeNetworkName takes a network name, a partial or a full network ID and returns the network name.
+// If the network is not found a errors is returned.
+func (r *Runtime) normalizeNetworkName(nameOrID string) (string, error) {
+ return "", errors.New("not implemented (*Runtime) normalizeNetworkName")
+}
+
+// DisconnectContainerFromNetwork removes a container from its CNI network
+func (r *Runtime) DisconnectContainerFromNetwork(nameOrID, netName string, force bool) error {
+ return errors.New("not implemented (*Runtime) DisconnectContainerFromNetwork")
+}
+
+// ConnectContainerToNetwork connects a container to a CNI network
+func (r *Runtime) ConnectContainerToNetwork(nameOrID, netName string, netOpts types.PerNetworkOptions) error {
+ return errors.New("not implemented (*Runtime) ConnectContainerToNetwork")
+}
+
+// getPath will join the given path to the rootless netns dir
+func (r *RootlessNetNS) getPath(path string) string {
+ return filepath.Join(r.dir, path)
+}
+
+// Do - run the given function in the rootless netns.
+// It does not lock the rootlessCNI lock, the caller
+// should only lock when needed, e.g. for cni operations.
+func (r *RootlessNetNS) Do(toRun func() error) error {
+ return errors.New("not implemented (*RootlessNetNS) Do")
+}
+
+// Cleanup the rootless network namespace if needed.
+// It checks if we have running containers with the bridge network mode.
+// Cleanup() expects that r.Lock is locked
+func (r *RootlessNetNS) Cleanup(runtime *Runtime) error {
+ return errors.New("not implemented (*RootlessNetNS) Cleanup")
+}
+
+// GetRootlessNetNs returns the rootless netns object. If create is set to true
+// the rootless network namespace will be created if it does not exists already.
+// If called as root it returns always nil.
+// On success the returned RootlessCNI lock is locked and must be unlocked by the caller.
+func (r *Runtime) GetRootlessNetNs(new bool) (*RootlessNetNS, error) {
+ return nil, errors.New("not implemented (*Runtime) GetRootlessNetNs")
+}
+
+// convertPortMappings will remove the HostIP part from the ports when running inside podman machine.
+// This is need because a HostIP of 127.0.0.1 would now allow the gvproxy forwarder to reach to open ports.
+// For machine the HostIP must only be used by gvproxy and never in the VM.
+func (c *Container) convertPortMappings() []types.PortMapping {
+ return []types.PortMapping{}
+}
diff --git a/libpod/oci_conmon_attach_common.go b/libpod/oci_conmon_attach_common.go
new file mode 100644
index 000000000..a9e9b2bb5
--- /dev/null
+++ b/libpod/oci_conmon_attach_common.go
@@ -0,0 +1,305 @@
+//go:build linux || freebsd
+// +build linux freebsd
+
+package libpod
+
+import (
+ "errors"
+ "fmt"
+ "io"
+ "net"
+ "os"
+ "path/filepath"
+ "syscall"
+
+ "github.com/containers/common/pkg/config"
+ "github.com/containers/common/pkg/resize"
+ "github.com/containers/common/pkg/util"
+ "github.com/containers/podman/v4/libpod/define"
+ "github.com/containers/podman/v4/pkg/errorhandling"
+ "github.com/moby/term"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/sys/unix"
+)
+
+/* Sync with stdpipe_t in conmon.c */
+const (
+ AttachPipeStdin = 1
+ AttachPipeStdout = 2
+ AttachPipeStderr = 3
+)
+
+// Attach to the given container.
+// Does not check if state is appropriate.
+// started is only required if startContainer is true.
+func (r *ConmonOCIRuntime) Attach(c *Container, params *AttachOptions) error {
+ passthrough := c.LogDriver() == define.PassthroughLogging
+
+ if params == nil || params.Streams == nil {
+ return fmt.Errorf("must provide parameters to Attach: %w", define.ErrInternal)
+ }
+
+ if !params.Streams.AttachOutput && !params.Streams.AttachError && !params.Streams.AttachInput && !passthrough {
+ return fmt.Errorf("must provide at least one stream to attach to: %w", define.ErrInvalidArg)
+ }
+ if params.Start && params.Started == nil {
+ return fmt.Errorf("started chan not passed when startContainer set: %w", define.ErrInternal)
+ }
+
+ keys := config.DefaultDetachKeys
+ if params.DetachKeys != nil {
+ keys = *params.DetachKeys
+ }
+
+ detachKeys, err := processDetachKeys(keys)
+ if err != nil {
+ return err
+ }
+
+ var conn *net.UnixConn
+ if !passthrough {
+ logrus.Debugf("Attaching to container %s", c.ID())
+
+ // If we have a resize, do it.
+ if params.InitialSize != nil {
+ if err := r.AttachResize(c, *params.InitialSize); err != nil {
+ return err
+ }
+ }
+
+ attachSock, err := c.AttachSocketPath()
+ if err != nil {
+ return err
+ }
+
+ conn, err = openUnixSocket(attachSock)
+ if err != nil {
+ return fmt.Errorf("failed to connect to container's attach socket: %v: %w", attachSock, err)
+ }
+ defer func() {
+ if err := conn.Close(); err != nil {
+ logrus.Errorf("unable to close socket: %q", err)
+ }
+ }()
+ }
+
+ // If starting was requested, start the container and notify when that's
+ // done.
+ if params.Start {
+ if err := c.start(); err != nil {
+ return err
+ }
+ params.Started <- true
+ }
+
+ if passthrough {
+ return nil
+ }
+
+ receiveStdoutError, stdinDone := setupStdioChannels(params.Streams, conn, detachKeys)
+ if params.AttachReady != nil {
+ params.AttachReady <- true
+ }
+ return readStdio(conn, params.Streams, receiveStdoutError, stdinDone)
+}
+
+// Attach to the given container's exec session
+// attachFd and startFd must be open file descriptors
+// attachFd must be the output side of the fd. attachFd is used for two things:
+// conmon will first send a nonce value across the pipe indicating it has set up its side of the console socket
+// this ensures attachToExec gets all of the output of the called process
+// conmon will then send the exit code of the exec process, or an error in the exec session
+// startFd must be the input side of the fd.
+// newSize resizes the tty to this size before the process is started, must be nil if the exec session has no tty
+// conmon will wait to start the exec session until the parent process has set up the console socket.
+// Once attachToExec successfully attaches to the console socket, the child conmon process responsible for calling runtime exec
+// will read from the output side of start fd, thus learning to start the child process.
+// Thus, the order goes as follow:
+// 1. conmon parent process sets up its console socket. sends on attachFd
+// 2. attachToExec attaches to the console socket after reading on attachFd and resizes the tty
+// 3. child waits on startFd for attachToExec to attach to said console socket
+// 4. attachToExec sends on startFd, signalling it has attached to the socket and child is ready to go
+// 5. child receives on startFd, runs the runtime exec command
+// attachToExec is responsible for closing startFd and attachFd
+func (c *Container) attachToExec(streams *define.AttachStreams, keys *string, sessionID string, startFd, attachFd *os.File, newSize *resize.TerminalSize) error {
+ if !streams.AttachOutput && !streams.AttachError && !streams.AttachInput {
+ return fmt.Errorf("must provide at least one stream to attach to: %w", define.ErrInvalidArg)
+ }
+ if startFd == nil || attachFd == nil {
+ return fmt.Errorf("start sync pipe and attach sync pipe must be defined for exec attach: %w", define.ErrInvalidArg)
+ }
+
+ defer errorhandling.CloseQuiet(startFd)
+ defer errorhandling.CloseQuiet(attachFd)
+
+ detachString := config.DefaultDetachKeys
+ if keys != nil {
+ detachString = *keys
+ }
+ detachKeys, err := processDetachKeys(detachString)
+ if err != nil {
+ return err
+ }
+
+ logrus.Debugf("Attaching to container %s exec session %s", c.ID(), sessionID)
+
+ // set up the socket path, such that it is the correct length and location for exec
+ sockPath, err := c.execAttachSocketPath(sessionID)
+ if err != nil {
+ return err
+ }
+
+ // 2: read from attachFd that the parent process has set up the console socket
+ if _, err := readConmonPipeData(c.ociRuntime.Name(), attachFd, ""); err != nil {
+ return err
+ }
+
+ // resize before we start the container process
+ if newSize != nil {
+ err = c.ociRuntime.ExecAttachResize(c, sessionID, *newSize)
+ if err != nil {
+ logrus.Warnf("Resize failed: %v", err)
+ }
+ }
+
+ // 2: then attach
+ conn, err := openUnixSocket(sockPath)
+ if err != nil {
+ return fmt.Errorf("failed to connect to container's attach socket: %v: %w", sockPath, err)
+ }
+ defer func() {
+ if err := conn.Close(); err != nil {
+ logrus.Errorf("Unable to close socket: %q", err)
+ }
+ }()
+
+ // start listening on stdio of the process
+ receiveStdoutError, stdinDone := setupStdioChannels(streams, conn, detachKeys)
+
+ // 4: send start message to child
+ if err := writeConmonPipeData(startFd); err != nil {
+ return err
+ }
+
+ return readStdio(conn, streams, receiveStdoutError, stdinDone)
+}
+
+func processDetachKeys(keys string) ([]byte, error) {
+ // Check the validity of the provided keys first
+ if len(keys) == 0 {
+ return []byte{}, nil
+ }
+ detachKeys, err := term.ToBytes(keys)
+ if err != nil {
+ return nil, fmt.Errorf("invalid detach keys: %w", err)
+ }
+ return detachKeys, nil
+}
+
+func registerResizeFunc(r <-chan resize.TerminalSize, bundlePath string) {
+ resize.HandleResizing(r, func(size resize.TerminalSize) {
+ controlPath := filepath.Join(bundlePath, "ctl")
+ controlFile, err := os.OpenFile(controlPath, unix.O_WRONLY, 0)
+ if err != nil {
+ logrus.Debugf("Could not open ctl file: %v", err)
+ return
+ }
+ defer controlFile.Close()
+
+ logrus.Debugf("Received a resize event: %+v", size)
+ if _, err = fmt.Fprintf(controlFile, "%d %d %d\n", 1, size.Height, size.Width); err != nil {
+ logrus.Warnf("Failed to write to control file to resize terminal: %v", err)
+ }
+ })
+}
+
+func setupStdioChannels(streams *define.AttachStreams, conn *net.UnixConn, detachKeys []byte) (chan error, chan error) {
+ receiveStdoutError := make(chan error)
+ go func() {
+ receiveStdoutError <- redirectResponseToOutputStreams(streams.OutputStream, streams.ErrorStream, streams.AttachOutput, streams.AttachError, conn)
+ }()
+
+ stdinDone := make(chan error)
+ go func() {
+ var err error
+ if streams.AttachInput {
+ _, err = util.CopyDetachable(conn, streams.InputStream, detachKeys)
+ }
+ stdinDone <- err
+ }()
+
+ return receiveStdoutError, stdinDone
+}
+
+func redirectResponseToOutputStreams(outputStream, errorStream io.Writer, writeOutput, writeError bool, conn io.Reader) error {
+ var err error
+ buf := make([]byte, 8192+1) /* Sync with conmon STDIO_BUF_SIZE */
+ for {
+ nr, er := conn.Read(buf)
+ if nr > 0 {
+ var dst io.Writer
+ var doWrite bool
+ switch buf[0] {
+ case AttachPipeStdout:
+ dst = outputStream
+ doWrite = writeOutput
+ case AttachPipeStderr:
+ dst = errorStream
+ doWrite = writeError
+ default:
+ logrus.Infof("Received unexpected attach type %+d", buf[0])
+ }
+ if dst == nil {
+ return errors.New("output destination cannot be nil")
+ }
+
+ if doWrite {
+ nw, ew := dst.Write(buf[1:nr])
+ if ew != nil {
+ err = ew
+ break
+ }
+ if nr != nw+1 {
+ err = io.ErrShortWrite
+ break
+ }
+ }
+ }
+ if errors.Is(er, io.EOF) || errors.Is(er, syscall.ECONNRESET) {
+ break
+ }
+ if er != nil {
+ err = er
+ break
+ }
+ }
+ return err
+}
+
+func readStdio(conn *net.UnixConn, streams *define.AttachStreams, receiveStdoutError, stdinDone chan error) error {
+ var err error
+ select {
+ case err = <-receiveStdoutError:
+ if err := conn.CloseWrite(); err != nil {
+ logrus.Errorf("Failed to close stdin: %v", err)
+ }
+ return err
+ case err = <-stdinDone:
+ if err == define.ErrDetach {
+ if err := conn.CloseWrite(); err != nil {
+ logrus.Errorf("Failed to close stdin: %v", err)
+ }
+ return err
+ }
+ if err == nil {
+ // copy stdin is done, close it
+ if connErr := conn.CloseWrite(); connErr != nil {
+ logrus.Errorf("Unable to close conn: %v", connErr)
+ }
+ }
+ if streams.AttachOutput || streams.AttachError {
+ return <-receiveStdoutError
+ }
+ }
+ return nil
+}
diff --git a/libpod/oci_conmon_attach_freebsd.go b/libpod/oci_conmon_attach_freebsd.go
new file mode 100644
index 000000000..de0054381
--- /dev/null
+++ b/libpod/oci_conmon_attach_freebsd.go
@@ -0,0 +1,21 @@
+package libpod
+
+import (
+ "net"
+ "os"
+ "path/filepath"
+)
+
+func openUnixSocket(path string) (*net.UnixConn, error) {
+ // socket paths can be too long to fit into a sockaddr_un so we create a shorter symlink.
+ tmpdir, err := os.MkdirTemp("", "podman")
+ if err != nil {
+ return nil, err
+ }
+ defer os.RemoveAll(tmpdir)
+ tmpsockpath := filepath.Join(tmpdir, "sock")
+ if err := os.Symlink(path, tmpsockpath); err != nil {
+ return nil, err
+ }
+ return net.DialUnix("unixpacket", nil, &net.UnixAddr{Name: tmpsockpath, Net: "unixpacket"})
+}
diff --git a/libpod/oci_conmon_attach_linux.go b/libpod/oci_conmon_attach_linux.go
index aa55aa6f5..f1aa89d3e 100644
--- a/libpod/oci_conmon_attach_linux.go
+++ b/libpod/oci_conmon_attach_linux.go
@@ -1,34 +1,12 @@
-//go:build linux
-// +build linux
-
package libpod
import (
- "errors"
"fmt"
- "io"
"net"
- "os"
- "path/filepath"
- "syscall"
- "github.com/containers/common/pkg/config"
- "github.com/containers/common/pkg/resize"
- "github.com/containers/common/pkg/util"
- "github.com/containers/podman/v4/libpod/define"
- "github.com/containers/podman/v4/pkg/errorhandling"
- "github.com/moby/term"
- "github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
-/* Sync with stdpipe_t in conmon.c */
-const (
- AttachPipeStdin = 1
- AttachPipeStdout = 2
- AttachPipeStderr = 3
-)
-
func openUnixSocket(path string) (*net.UnixConn, error) {
fd, err := unix.Open(path, unix.O_PATH, 0)
if err != nil {
@@ -37,278 +15,3 @@ func openUnixSocket(path string) (*net.UnixConn, error) {
defer unix.Close(fd)
return net.DialUnix("unixpacket", nil, &net.UnixAddr{Name: fmt.Sprintf("/proc/self/fd/%d", fd), Net: "unixpacket"})
}
-
-// Attach to the given container.
-// Does not check if state is appropriate.
-// started is only required if startContainer is true.
-func (r *ConmonOCIRuntime) Attach(c *Container, params *AttachOptions) error {
- passthrough := c.LogDriver() == define.PassthroughLogging
-
- if params == nil || params.Streams == nil {
- return fmt.Errorf("must provide parameters to Attach: %w", define.ErrInternal)
- }
-
- if !params.Streams.AttachOutput && !params.Streams.AttachError && !params.Streams.AttachInput && !passthrough {
- return fmt.Errorf("must provide at least one stream to attach to: %w", define.ErrInvalidArg)
- }
- if params.Start && params.Started == nil {
- return fmt.Errorf("started chan not passed when startContainer set: %w", define.ErrInternal)
- }
-
- keys := config.DefaultDetachKeys
- if params.DetachKeys != nil {
- keys = *params.DetachKeys
- }
-
- detachKeys, err := processDetachKeys(keys)
- if err != nil {
- return err
- }
-
- var conn *net.UnixConn
- if !passthrough {
- logrus.Debugf("Attaching to container %s", c.ID())
-
- // If we have a resize, do it.
- if params.InitialSize != nil {
- if err := r.AttachResize(c, *params.InitialSize); err != nil {
- return err
- }
- }
-
- attachSock, err := c.AttachSocketPath()
- if err != nil {
- return err
- }
-
- conn, err = openUnixSocket(attachSock)
- if err != nil {
- return fmt.Errorf("failed to connect to container's attach socket: %v: %w", attachSock, err)
- }
- defer func() {
- if err := conn.Close(); err != nil {
- logrus.Errorf("unable to close socket: %q", err)
- }
- }()
- }
-
- // If starting was requested, start the container and notify when that's
- // done.
- if params.Start {
- if err := c.start(); err != nil {
- return err
- }
- params.Started <- true
- }
-
- if passthrough {
- return nil
- }
-
- receiveStdoutError, stdinDone := setupStdioChannels(params.Streams, conn, detachKeys)
- if params.AttachReady != nil {
- params.AttachReady <- true
- }
- return readStdio(conn, params.Streams, receiveStdoutError, stdinDone)
-}
-
-// Attach to the given container's exec session
-// attachFd and startFd must be open file descriptors
-// attachFd must be the output side of the fd. attachFd is used for two things:
-// conmon will first send a nonce value across the pipe indicating it has set up its side of the console socket
-// this ensures attachToExec gets all of the output of the called process
-// conmon will then send the exit code of the exec process, or an error in the exec session
-// startFd must be the input side of the fd.
-// newSize resizes the tty to this size before the process is started, must be nil if the exec session has no tty
-// conmon will wait to start the exec session until the parent process has set up the console socket.
-// Once attachToExec successfully attaches to the console socket, the child conmon process responsible for calling runtime exec
-// will read from the output side of start fd, thus learning to start the child process.
-// Thus, the order goes as follow:
-// 1. conmon parent process sets up its console socket. sends on attachFd
-// 2. attachToExec attaches to the console socket after reading on attachFd and resizes the tty
-// 3. child waits on startFd for attachToExec to attach to said console socket
-// 4. attachToExec sends on startFd, signalling it has attached to the socket and child is ready to go
-// 5. child receives on startFd, runs the runtime exec command
-// attachToExec is responsible for closing startFd and attachFd
-func (c *Container) attachToExec(streams *define.AttachStreams, keys *string, sessionID string, startFd, attachFd *os.File, newSize *resize.TerminalSize) error {
- if !streams.AttachOutput && !streams.AttachError && !streams.AttachInput {
- return fmt.Errorf("must provide at least one stream to attach to: %w", define.ErrInvalidArg)
- }
- if startFd == nil || attachFd == nil {
- return fmt.Errorf("start sync pipe and attach sync pipe must be defined for exec attach: %w", define.ErrInvalidArg)
- }
-
- defer errorhandling.CloseQuiet(startFd)
- defer errorhandling.CloseQuiet(attachFd)
-
- detachString := config.DefaultDetachKeys
- if keys != nil {
- detachString = *keys
- }
- detachKeys, err := processDetachKeys(detachString)
- if err != nil {
- return err
- }
-
- logrus.Debugf("Attaching to container %s exec session %s", c.ID(), sessionID)
-
- // set up the socket path, such that it is the correct length and location for exec
- sockPath, err := c.execAttachSocketPath(sessionID)
- if err != nil {
- return err
- }
-
- // 2: read from attachFd that the parent process has set up the console socket
- if _, err := readConmonPipeData(c.ociRuntime.Name(), attachFd, ""); err != nil {
- return err
- }
-
- // resize before we start the container process
- if newSize != nil {
- err = c.ociRuntime.ExecAttachResize(c, sessionID, *newSize)
- if err != nil {
- logrus.Warnf("Resize failed: %v", err)
- }
- }
-
- // 2: then attach
- conn, err := openUnixSocket(sockPath)
- if err != nil {
- return fmt.Errorf("failed to connect to container's attach socket: %v: %w", sockPath, err)
- }
- defer func() {
- if err := conn.Close(); err != nil {
- logrus.Errorf("Unable to close socket: %q", err)
- }
- }()
-
- // start listening on stdio of the process
- receiveStdoutError, stdinDone := setupStdioChannels(streams, conn, detachKeys)
-
- // 4: send start message to child
- if err := writeConmonPipeData(startFd); err != nil {
- return err
- }
-
- return readStdio(conn, streams, receiveStdoutError, stdinDone)
-}
-
-func processDetachKeys(keys string) ([]byte, error) {
- // Check the validity of the provided keys first
- if len(keys) == 0 {
- return []byte{}, nil
- }
- detachKeys, err := term.ToBytes(keys)
- if err != nil {
- return nil, fmt.Errorf("invalid detach keys: %w", err)
- }
- return detachKeys, nil
-}
-
-func registerResizeFunc(r <-chan resize.TerminalSize, bundlePath string) {
- resize.HandleResizing(r, func(size resize.TerminalSize) {
- controlPath := filepath.Join(bundlePath, "ctl")
- controlFile, err := os.OpenFile(controlPath, unix.O_WRONLY, 0)
- if err != nil {
- logrus.Debugf("Could not open ctl file: %v", err)
- return
- }
- defer controlFile.Close()
-
- logrus.Debugf("Received a resize event: %+v", size)
- if _, err = fmt.Fprintf(controlFile, "%d %d %d\n", 1, size.Height, size.Width); err != nil {
- logrus.Warnf("Failed to write to control file to resize terminal: %v", err)
- }
- })
-}
-
-func setupStdioChannels(streams *define.AttachStreams, conn *net.UnixConn, detachKeys []byte) (chan error, chan error) {
- receiveStdoutError := make(chan error)
- go func() {
- receiveStdoutError <- redirectResponseToOutputStreams(streams.OutputStream, streams.ErrorStream, streams.AttachOutput, streams.AttachError, conn)
- }()
-
- stdinDone := make(chan error)
- go func() {
- var err error
- if streams.AttachInput {
- _, err = util.CopyDetachable(conn, streams.InputStream, detachKeys)
- }
- stdinDone <- err
- }()
-
- return receiveStdoutError, stdinDone
-}
-
-func redirectResponseToOutputStreams(outputStream, errorStream io.Writer, writeOutput, writeError bool, conn io.Reader) error {
- var err error
- buf := make([]byte, 8192+1) /* Sync with conmon STDIO_BUF_SIZE */
- for {
- nr, er := conn.Read(buf)
- if nr > 0 {
- var dst io.Writer
- var doWrite bool
- switch buf[0] {
- case AttachPipeStdout:
- dst = outputStream
- doWrite = writeOutput
- case AttachPipeStderr:
- dst = errorStream
- doWrite = writeError
- default:
- logrus.Infof("Received unexpected attach type %+d", buf[0])
- }
- if dst == nil {
- return errors.New("output destination cannot be nil")
- }
-
- if doWrite {
- nw, ew := dst.Write(buf[1:nr])
- if ew != nil {
- err = ew
- break
- }
- if nr != nw+1 {
- err = io.ErrShortWrite
- break
- }
- }
- }
- if errors.Is(er, io.EOF) || errors.Is(er, syscall.ECONNRESET) {
- break
- }
- if er != nil {
- err = er
- break
- }
- }
- return err
-}
-
-func readStdio(conn *net.UnixConn, streams *define.AttachStreams, receiveStdoutError, stdinDone chan error) error {
- var err error
- select {
- case err = <-receiveStdoutError:
- if err := conn.CloseWrite(); err != nil {
- logrus.Errorf("Failed to close stdin: %v", err)
- }
- return err
- case err = <-stdinDone:
- if err == define.ErrDetach {
- if err := conn.CloseWrite(); err != nil {
- logrus.Errorf("Failed to close stdin: %v", err)
- }
- return err
- }
- if err == nil {
- // copy stdin is done, close it
- if connErr := conn.CloseWrite(); connErr != nil {
- logrus.Errorf("Unable to close conn: %v", connErr)
- }
- }
- if streams.AttachOutput || streams.AttachError {
- return <-receiveStdoutError
- }
- }
- return nil
-}
diff --git a/libpod/oci_conmon_common.go b/libpod/oci_conmon_common.go
new file mode 100644
index 000000000..b96f92d3a
--- /dev/null
+++ b/libpod/oci_conmon_common.go
@@ -0,0 +1,1591 @@
+//go:build linux || freebsd
+// +build linux freebsd
+
+package libpod
+
+import (
+ "bufio"
+ "bytes"
+ "context"
+ "errors"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "net"
+ "net/http"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "sync"
+ "syscall"
+ "text/template"
+ "time"
+
+ "github.com/containers/common/pkg/config"
+ "github.com/containers/common/pkg/resize"
+ cutil "github.com/containers/common/pkg/util"
+ conmonConfig "github.com/containers/conmon/runner/config"
+ "github.com/containers/podman/v4/libpod/define"
+ "github.com/containers/podman/v4/libpod/logs"
+ "github.com/containers/podman/v4/pkg/checkpoint/crutils"
+ "github.com/containers/podman/v4/pkg/errorhandling"
+ "github.com/containers/podman/v4/pkg/rootless"
+ "github.com/containers/podman/v4/pkg/specgenutil"
+ "github.com/containers/podman/v4/pkg/util"
+ "github.com/containers/podman/v4/utils"
+ "github.com/containers/storage/pkg/homedir"
+ spec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/sys/unix"
+)
+
+const (
+ // This is Conmon's STDIO_BUF_SIZE. I don't believe we have access to it
+ // directly from the Go code, so const it here
+ // Important: The conmon attach socket uses an extra byte at the beginning of each
+ // message to specify the STREAM so we have to increase the buffer size by one
+ bufferSize = conmonConfig.BufSize + 1
+)
+
+// ConmonOCIRuntime is an OCI runtime managed by Conmon.
+// TODO: Make all calls to OCI runtime have a timeout.
+type ConmonOCIRuntime struct {
+ name string
+ path string
+ conmonPath string
+ conmonEnv []string
+ tmpDir string
+ exitsDir string
+ logSizeMax int64
+ noPivot bool
+ reservePorts bool
+ runtimeFlags []string
+ supportsJSON bool
+ supportsKVM bool
+ supportsNoCgroups bool
+ enableKeyring bool
+}
+
+// Make a new Conmon-based OCI runtime with the given options.
+// Conmon will wrap the given OCI runtime, which can be `runc`, `crun`, or
+// any runtime with a runc-compatible CLI.
+// The first path that points to a valid executable will be used.
+// Deliberately private. Someone should not be able to construct this outside of
+// libpod.
+func newConmonOCIRuntime(name string, paths []string, conmonPath string, runtimeFlags []string, runtimeCfg *config.Config) (OCIRuntime, error) {
+ if name == "" {
+ return nil, fmt.Errorf("the OCI runtime must be provided a non-empty name: %w", define.ErrInvalidArg)
+ }
+
+ // Make lookup tables for runtime support
+ supportsJSON := make(map[string]bool, len(runtimeCfg.Engine.RuntimeSupportsJSON))
+ supportsNoCgroups := make(map[string]bool, len(runtimeCfg.Engine.RuntimeSupportsNoCgroups))
+ supportsKVM := make(map[string]bool, len(runtimeCfg.Engine.RuntimeSupportsKVM))
+ for _, r := range runtimeCfg.Engine.RuntimeSupportsJSON {
+ supportsJSON[r] = true
+ }
+ for _, r := range runtimeCfg.Engine.RuntimeSupportsNoCgroups {
+ supportsNoCgroups[r] = true
+ }
+ for _, r := range runtimeCfg.Engine.RuntimeSupportsKVM {
+ supportsKVM[r] = true
+ }
+
+ runtime := new(ConmonOCIRuntime)
+ runtime.name = name
+ runtime.conmonPath = conmonPath
+ runtime.runtimeFlags = runtimeFlags
+
+ runtime.conmonEnv = runtimeCfg.Engine.ConmonEnvVars
+ runtime.tmpDir = runtimeCfg.Engine.TmpDir
+ runtime.logSizeMax = runtimeCfg.Containers.LogSizeMax
+ runtime.noPivot = runtimeCfg.Engine.NoPivotRoot
+ runtime.reservePorts = runtimeCfg.Engine.EnablePortReservation
+ runtime.enableKeyring = runtimeCfg.Containers.EnableKeyring
+
+ // TODO: probe OCI runtime for feature and enable automatically if
+ // available.
+
+ base := filepath.Base(name)
+ runtime.supportsJSON = supportsJSON[base]
+ runtime.supportsNoCgroups = supportsNoCgroups[base]
+ runtime.supportsKVM = supportsKVM[base]
+
+ foundPath := false
+ for _, path := range paths {
+ stat, err := os.Stat(path)
+ if err != nil {
+ if os.IsNotExist(err) {
+ continue
+ }
+ return nil, fmt.Errorf("cannot stat OCI runtime %s path: %w", name, err)
+ }
+ if !stat.Mode().IsRegular() {
+ continue
+ }
+ foundPath = true
+ logrus.Tracef("found runtime %q", path)
+ runtime.path = path
+ break
+ }
+
+ // Search the $PATH as last fallback
+ if !foundPath {
+ if foundRuntime, err := exec.LookPath(name); err == nil {
+ foundPath = true
+ runtime.path = foundRuntime
+ logrus.Debugf("using runtime %q from $PATH: %q", name, foundRuntime)
+ }
+ }
+
+ if !foundPath {
+ return nil, fmt.Errorf("no valid executable found for OCI runtime %s: %w", name, define.ErrInvalidArg)
+ }
+
+ runtime.exitsDir = filepath.Join(runtime.tmpDir, "exits")
+
+ // Create the exit files and attach sockets directories
+ if err := os.MkdirAll(runtime.exitsDir, 0750); err != nil {
+ // The directory is allowed to exist
+ if !os.IsExist(err) {
+ return nil, fmt.Errorf("error creating OCI runtime exit files directory: %w", err)
+ }
+ }
+ return runtime, nil
+}
+
+// Name returns the name of the runtime being wrapped by Conmon.
+func (r *ConmonOCIRuntime) Name() string {
+ return r.name
+}
+
+// Path returns the path of the OCI runtime being wrapped by Conmon.
+func (r *ConmonOCIRuntime) Path() string {
+ return r.path
+}
+
+// hasCurrentUserMapped checks whether the current user is mapped inside the container user namespace
+func hasCurrentUserMapped(ctr *Container) bool {
+ if len(ctr.config.IDMappings.UIDMap) == 0 && len(ctr.config.IDMappings.GIDMap) == 0 {
+ return true
+ }
+ uid := os.Geteuid()
+ for _, m := range ctr.config.IDMappings.UIDMap {
+ if uid >= m.HostID && uid < m.HostID+m.Size {
+ return true
+ }
+ }
+ return false
+}
+
+// CreateContainer creates a container.
+func (r *ConmonOCIRuntime) CreateContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) {
+ // always make the run dir accessible to the current user so that the PID files can be read without
+ // being in the rootless user namespace.
+ if err := makeAccessible(ctr.state.RunDir, 0, 0); err != nil {
+ return 0, err
+ }
+ if !hasCurrentUserMapped(ctr) {
+ for _, i := range []string{ctr.state.RunDir, ctr.runtime.config.Engine.TmpDir, ctr.config.StaticDir, ctr.state.Mountpoint, ctr.runtime.config.Engine.VolumePath} {
+ if err := makeAccessible(i, ctr.RootUID(), ctr.RootGID()); err != nil {
+ return 0, err
+ }
+ }
+
+ // if we are running a non privileged container, be sure to umount some kernel paths so they are not
+ // bind mounted inside the container at all.
+ if !ctr.config.Privileged && !rootless.IsRootless() {
+ return r.createRootlessContainer(ctr, restoreOptions)
+ }
+ }
+ return r.createOCIContainer(ctr, restoreOptions)
+}
+
+// UpdateContainerStatus retrieves the current status of the container from the
+// runtime. It updates the container's state but does not save it.
+// If useRuntime is false, we will not directly hit runc to see the container's
+// status, but will instead only check for the existence of the conmon exit file
+// and update state to stopped if it exists.
+func (r *ConmonOCIRuntime) UpdateContainerStatus(ctr *Container) error {
+ runtimeDir, err := util.GetRuntimeDir()
+ if err != nil {
+ return err
+ }
+
+ // Store old state so we know if we were already stopped
+ oldState := ctr.state.State
+
+ state := new(spec.State)
+
+ cmd := exec.Command(r.path, "state", ctr.ID())
+ cmd.Env = append(cmd.Env, fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir))
+
+ outPipe, err := cmd.StdoutPipe()
+ if err != nil {
+ return fmt.Errorf("getting stdout pipe: %w", err)
+ }
+ errPipe, err := cmd.StderrPipe()
+ if err != nil {
+ return fmt.Errorf("getting stderr pipe: %w", err)
+ }
+
+ if err := cmd.Start(); err != nil {
+ out, err2 := ioutil.ReadAll(errPipe)
+ if err2 != nil {
+ return fmt.Errorf("error getting container %s state: %w", ctr.ID(), err)
+ }
+ if strings.Contains(string(out), "does not exist") || strings.Contains(string(out), "No such file") {
+ if err := ctr.removeConmonFiles(); err != nil {
+ logrus.Debugf("unable to remove conmon files for container %s", ctr.ID())
+ }
+ ctr.state.ExitCode = -1
+ ctr.state.FinishedTime = time.Now()
+ ctr.state.State = define.ContainerStateExited
+ return ctr.runtime.state.AddContainerExitCode(ctr.ID(), ctr.state.ExitCode)
+ }
+ return fmt.Errorf("error getting container %s state. stderr/out: %s: %w", ctr.ID(), out, err)
+ }
+ defer func() {
+ _ = cmd.Wait()
+ }()
+
+ if err := errPipe.Close(); err != nil {
+ return err
+ }
+ out, err := ioutil.ReadAll(outPipe)
+ if err != nil {
+ return fmt.Errorf("error reading stdout: %s: %w", ctr.ID(), err)
+ }
+ if err := json.NewDecoder(bytes.NewBuffer(out)).Decode(state); err != nil {
+ return fmt.Errorf("error decoding container status for container %s: %w", ctr.ID(), err)
+ }
+ ctr.state.PID = state.Pid
+
+ switch state.Status {
+ case "created":
+ ctr.state.State = define.ContainerStateCreated
+ case "paused":
+ ctr.state.State = define.ContainerStatePaused
+ case "running":
+ ctr.state.State = define.ContainerStateRunning
+ case "stopped":
+ ctr.state.State = define.ContainerStateStopped
+ default:
+ return fmt.Errorf("unrecognized status returned by runtime for container %s: %s: %w",
+ ctr.ID(), state.Status, define.ErrInternal)
+ }
+
+ // Handle ContainerStateStopping - keep it unless the container
+ // transitioned to no longer running.
+ if oldState == define.ContainerStateStopping && (ctr.state.State == define.ContainerStatePaused || ctr.state.State == define.ContainerStateRunning) {
+ ctr.state.State = define.ContainerStateStopping
+ }
+
+ return nil
+}
+
+// StartContainer starts the given container.
+// Sets time the container was started, but does not save it.
+func (r *ConmonOCIRuntime) StartContainer(ctr *Container) error {
+ // TODO: streams should probably *not* be our STDIN/OUT/ERR - redirect to buffers?
+ runtimeDir, err := util.GetRuntimeDir()
+ if err != nil {
+ return err
+ }
+ env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
+ if path, ok := os.LookupEnv("PATH"); ok {
+ env = append(env, fmt.Sprintf("PATH=%s", path))
+ }
+ if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "start", ctr.ID())...); err != nil {
+ return err
+ }
+
+ ctr.state.StartedTime = time.Now()
+
+ return nil
+}
+
+// KillContainer sends the given signal to the given container.
+// If all is set, send to all PIDs in the container.
+// All is only supported if the container created cgroups.
+func (r *ConmonOCIRuntime) KillContainer(ctr *Container, signal uint, all bool) error {
+ logrus.Debugf("Sending signal %d to container %s", signal, ctr.ID())
+ runtimeDir, err := util.GetRuntimeDir()
+ if err != nil {
+ return err
+ }
+ env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
+ var args []string
+ args = append(args, r.runtimeFlags...)
+ if all {
+ args = append(args, "kill", "--all", ctr.ID(), fmt.Sprintf("%d", signal))
+ } else {
+ args = append(args, "kill", ctr.ID(), fmt.Sprintf("%d", signal))
+ }
+ if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, args...); err != nil {
+ // Update container state - there's a chance we failed because
+ // the container exited in the meantime.
+ if err2 := r.UpdateContainerStatus(ctr); err2 != nil {
+ logrus.Infof("Error updating status for container %s: %v", ctr.ID(), err2)
+ }
+ if ctr.ensureState(define.ContainerStateStopped, define.ContainerStateExited) {
+ return define.ErrCtrStateInvalid
+ }
+ return fmt.Errorf("error sending signal to container %s: %w", ctr.ID(), err)
+ }
+
+ return nil
+}
+
+// StopContainer stops a container, first using its given stop signal (or
+// SIGTERM if no signal was specified), then using SIGKILL.
+// Timeout is given in seconds. If timeout is 0, the container will be
+// immediately kill with SIGKILL.
+// Does not set finished time for container, assumes you will run updateStatus
+// after to pull the exit code.
+func (r *ConmonOCIRuntime) StopContainer(ctr *Container, timeout uint, all bool) error {
+ logrus.Debugf("Stopping container %s (PID %d)", ctr.ID(), ctr.state.PID)
+
+ // Ping the container to see if it's alive
+ // If it's not, it's already stopped, return
+ err := unix.Kill(ctr.state.PID, 0)
+ if err == unix.ESRCH {
+ return nil
+ }
+
+ stopSignal := ctr.config.StopSignal
+ if stopSignal == 0 {
+ stopSignal = uint(syscall.SIGTERM)
+ }
+
+ if timeout > 0 {
+ if err := r.KillContainer(ctr, stopSignal, all); err != nil {
+ // Is the container gone?
+ // If so, it probably died between the first check and
+ // our sending the signal
+ // The container is stopped, so exit cleanly
+ err := unix.Kill(ctr.state.PID, 0)
+ if err == unix.ESRCH {
+ return nil
+ }
+
+ return err
+ }
+
+ if err := waitContainerStop(ctr, time.Duration(timeout)*time.Second); err != nil {
+ logrus.Debugf("Timed out stopping container %s with %s, resorting to SIGKILL: %v", ctr.ID(), unix.SignalName(syscall.Signal(stopSignal)), err)
+ logrus.Warnf("StopSignal %s failed to stop container %s in %d seconds, resorting to SIGKILL", unix.SignalName(syscall.Signal(stopSignal)), ctr.Name(), timeout)
+ } else {
+ // No error, the container is dead
+ return nil
+ }
+ }
+
+ if err := r.KillContainer(ctr, 9, all); err != nil {
+ // Again, check if the container is gone. If it is, exit cleanly.
+ err := unix.Kill(ctr.state.PID, 0)
+ if err == unix.ESRCH {
+ return nil
+ }
+
+ return fmt.Errorf("error sending SIGKILL to container %s: %w", ctr.ID(), err)
+ }
+
+ // Give runtime a few seconds to make it happen
+ if err := waitContainerStop(ctr, killContainerTimeout); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// DeleteContainer deletes a container from the OCI runtime.
+func (r *ConmonOCIRuntime) DeleteContainer(ctr *Container) error {
+ runtimeDir, err := util.GetRuntimeDir()
+ if err != nil {
+ return err
+ }
+ env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
+ return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "delete", "--force", ctr.ID())...)
+}
+
+// PauseContainer pauses the given container.
+func (r *ConmonOCIRuntime) PauseContainer(ctr *Container) error {
+ runtimeDir, err := util.GetRuntimeDir()
+ if err != nil {
+ return err
+ }
+ env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
+ return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "pause", ctr.ID())...)
+}
+
+// UnpauseContainer unpauses the given container.
+func (r *ConmonOCIRuntime) UnpauseContainer(ctr *Container) error {
+ runtimeDir, err := util.GetRuntimeDir()
+ if err != nil {
+ return err
+ }
+ env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
+ return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "resume", ctr.ID())...)
+}
+
+// HTTPAttach performs an attach for the HTTP API.
+// The caller must handle closing the HTTP connection after this returns.
+// The cancel channel is not closed; it is up to the caller to do so after
+// this function returns.
+// If this is a container with a terminal, we will stream raw. If it is not, we
+// will stream with an 8-byte header to multiplex STDOUT and STDERR.
+// Returns any errors that occurred, and whether the connection was successfully
+// hijacked before that error occurred.
+func (r *ConmonOCIRuntime) HTTPAttach(ctr *Container, req *http.Request, w http.ResponseWriter, streams *HTTPAttachStreams, detachKeys *string, cancel <-chan bool, hijackDone chan<- bool, streamAttach, streamLogs bool) (deferredErr error) {
+ isTerminal := false
+ if ctr.config.Spec.Process != nil {
+ isTerminal = ctr.config.Spec.Process.Terminal
+ }
+
+ if streams != nil {
+ if !streams.Stdin && !streams.Stdout && !streams.Stderr {
+ return fmt.Errorf("must specify at least one stream to attach to: %w", define.ErrInvalidArg)
+ }
+ }
+
+ attachSock, err := r.AttachSocketPath(ctr)
+ if err != nil {
+ return err
+ }
+
+ var conn *net.UnixConn
+ if streamAttach {
+ newConn, err := openUnixSocket(attachSock)
+ if err != nil {
+ return fmt.Errorf("failed to connect to container's attach socket: %v: %w", attachSock, err)
+ }
+ conn = newConn
+ defer func() {
+ if err := conn.Close(); err != nil {
+ logrus.Errorf("Unable to close container %s attach socket: %q", ctr.ID(), err)
+ }
+ }()
+
+ logrus.Debugf("Successfully connected to container %s attach socket %s", ctr.ID(), attachSock)
+ }
+
+ detachString := ctr.runtime.config.Engine.DetachKeys
+ if detachKeys != nil {
+ detachString = *detachKeys
+ }
+ detach, err := processDetachKeys(detachString)
+ if err != nil {
+ return err
+ }
+
+ attachStdout := true
+ attachStderr := true
+ attachStdin := true
+ if streams != nil {
+ attachStdout = streams.Stdout
+ attachStderr = streams.Stderr
+ attachStdin = streams.Stdin
+ }
+
+ logrus.Debugf("Going to hijack container %s attach connection", ctr.ID())
+
+ // Alright, let's hijack.
+ hijacker, ok := w.(http.Hijacker)
+ if !ok {
+ return fmt.Errorf("unable to hijack connection")
+ }
+
+ httpCon, httpBuf, err := hijacker.Hijack()
+ if err != nil {
+ return fmt.Errorf("error hijacking connection: %w", err)
+ }
+
+ hijackDone <- true
+
+ writeHijackHeader(req, httpBuf)
+
+ // Force a flush after the header is written.
+ if err := httpBuf.Flush(); err != nil {
+ return fmt.Errorf("error flushing HTTP hijack header: %w", err)
+ }
+
+ defer func() {
+ hijackWriteErrorAndClose(deferredErr, ctr.ID(), isTerminal, httpCon, httpBuf)
+ }()
+
+ logrus.Debugf("Hijack for container %s attach session done, ready to stream", ctr.ID())
+
+ // TODO: This is gross. Really, really gross.
+ // I want to say we should read all the logs into an array before
+ // calling this, in container_api.go, but that could take a lot of
+ // memory...
+ // On the whole, we need to figure out a better way of doing this,
+ // though.
+ logSize := 0
+ if streamLogs {
+ logrus.Debugf("Will stream logs for container %s attach session", ctr.ID())
+
+ // Get all logs for the container
+ logChan := make(chan *logs.LogLine)
+ logOpts := new(logs.LogOptions)
+ logOpts.Tail = -1
+ logOpts.WaitGroup = new(sync.WaitGroup)
+ errChan := make(chan error)
+ go func() {
+ var err error
+ // In non-terminal mode we need to prepend with the
+ // stream header.
+ logrus.Debugf("Writing logs for container %s to HTTP attach", ctr.ID())
+ for logLine := range logChan {
+ if !isTerminal {
+ device := logLine.Device
+ var header []byte
+ headerLen := uint32(len(logLine.Msg))
+ logSize += len(logLine.Msg)
+ switch strings.ToLower(device) {
+ case "stdin":
+ header = makeHTTPAttachHeader(0, headerLen)
+ case "stdout":
+ header = makeHTTPAttachHeader(1, headerLen)
+ case "stderr":
+ header = makeHTTPAttachHeader(2, headerLen)
+ default:
+ logrus.Errorf("Unknown device for log line: %s", device)
+ header = makeHTTPAttachHeader(1, headerLen)
+ }
+ _, err = httpBuf.Write(header)
+ if err != nil {
+ break
+ }
+ }
+ _, err = httpBuf.Write([]byte(logLine.Msg))
+ if err != nil {
+ break
+ }
+ if !logLine.Partial() {
+ _, err = httpBuf.Write([]byte("\n"))
+ if err != nil {
+ break
+ }
+ }
+ err = httpBuf.Flush()
+ if err != nil {
+ break
+ }
+ }
+ errChan <- err
+ }()
+ if err := ctr.ReadLog(context.Background(), logOpts, logChan, 0); err != nil {
+ return err
+ }
+ go func() {
+ logOpts.WaitGroup.Wait()
+ close(logChan)
+ }()
+ logrus.Debugf("Done reading logs for container %s, %d bytes", ctr.ID(), logSize)
+ if err := <-errChan; err != nil {
+ return err
+ }
+ }
+ if !streamAttach {
+ logrus.Debugf("Done streaming logs for container %s attach, exiting as attach streaming not requested", ctr.ID())
+ return nil
+ }
+
+ logrus.Debugf("Forwarding attach output for container %s", ctr.ID())
+
+ stdoutChan := make(chan error)
+ stdinChan := make(chan error)
+
+ // Handle STDOUT/STDERR
+ go func() {
+ var err error
+ if isTerminal {
+ // Hack: return immediately if attachStdout not set to
+ // emulate Docker.
+ // Basically, when terminal is set, STDERR goes nowhere.
+ // Everything does over STDOUT.
+ // Therefore, if not attaching STDOUT - we'll never copy
+ // anything from here.
+ logrus.Debugf("Performing terminal HTTP attach for container %s", ctr.ID())
+ if attachStdout {
+ err = httpAttachTerminalCopy(conn, httpBuf, ctr.ID())
+ }
+ } else {
+ logrus.Debugf("Performing non-terminal HTTP attach for container %s", ctr.ID())
+ err = httpAttachNonTerminalCopy(conn, httpBuf, ctr.ID(), attachStdin, attachStdout, attachStderr)
+ }
+ stdoutChan <- err
+ logrus.Debugf("STDOUT/ERR copy completed")
+ }()
+ // Next, STDIN. Avoid entirely if attachStdin unset.
+ if attachStdin {
+ go func() {
+ _, err := cutil.CopyDetachable(conn, httpBuf, detach)
+ logrus.Debugf("STDIN copy completed")
+ stdinChan <- err
+ }()
+ }
+
+ for {
+ select {
+ case err := <-stdoutChan:
+ if err != nil {
+ return err
+ }
+
+ return nil
+ case err := <-stdinChan:
+ if err != nil {
+ return err
+ }
+ // copy stdin is done, close it
+ if connErr := conn.CloseWrite(); connErr != nil {
+ logrus.Errorf("Unable to close conn: %v", connErr)
+ }
+ case <-cancel:
+ return nil
+ }
+ }
+}
+
+// isRetryable returns whether the error was caused by a blocked syscall or the
+// specified operation on a non blocking file descriptor wasn't ready for completion.
+func isRetryable(err error) bool {
+ var errno syscall.Errno
+ if errors.As(err, &errno) {
+ return errno == syscall.EINTR || errno == syscall.EAGAIN
+ }
+ return false
+}
+
+// openControlFile opens the terminal control file.
+func openControlFile(ctr *Container, parentDir string) (*os.File, error) {
+ controlPath := filepath.Join(parentDir, "ctl")
+ for i := 0; i < 600; i++ {
+ controlFile, err := os.OpenFile(controlPath, unix.O_WRONLY|unix.O_NONBLOCK, 0)
+ if err == nil {
+ return controlFile, nil
+ }
+ if !isRetryable(err) {
+ return nil, fmt.Errorf("could not open ctl file for terminal resize for container %s: %w", ctr.ID(), err)
+ }
+ time.Sleep(time.Second / 10)
+ }
+ return nil, fmt.Errorf("timeout waiting for %q", controlPath)
+}
+
+// AttachResize resizes the terminal used by the given container.
+func (r *ConmonOCIRuntime) AttachResize(ctr *Container, newSize resize.TerminalSize) error {
+ controlFile, err := openControlFile(ctr, ctr.bundlePath())
+ if err != nil {
+ return err
+ }
+ defer controlFile.Close()
+
+ logrus.Debugf("Received a resize event for container %s: %+v", ctr.ID(), newSize)
+ if _, err = fmt.Fprintf(controlFile, "%d %d %d\n", 1, newSize.Height, newSize.Width); err != nil {
+ return fmt.Errorf("failed to write to ctl file to resize terminal: %w", err)
+ }
+
+ return nil
+}
+
+// CheckpointContainer checkpoints the given container.
+func (r *ConmonOCIRuntime) CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) (int64, error) {
+ // imagePath is used by CRIU to store the actual checkpoint files
+ imagePath := ctr.CheckpointPath()
+ if options.PreCheckPoint {
+ imagePath = ctr.PreCheckPointPath()
+ }
+ // workPath will be used to store dump.log and stats-dump
+ workPath := ctr.bundlePath()
+ logrus.Debugf("Writing checkpoint to %s", imagePath)
+ logrus.Debugf("Writing checkpoint logs to %s", workPath)
+ logrus.Debugf("Pre-dump the container %t", options.PreCheckPoint)
+ args := []string{}
+ args = append(args, r.runtimeFlags...)
+ args = append(args, "checkpoint")
+ args = append(args, "--image-path")
+ args = append(args, imagePath)
+ args = append(args, "--work-path")
+ args = append(args, workPath)
+ if options.KeepRunning {
+ args = append(args, "--leave-running")
+ }
+ if options.TCPEstablished {
+ args = append(args, "--tcp-established")
+ }
+ if options.FileLocks {
+ args = append(args, "--file-locks")
+ }
+ if !options.PreCheckPoint && options.KeepRunning {
+ args = append(args, "--leave-running")
+ }
+ if options.PreCheckPoint {
+ args = append(args, "--pre-dump")
+ }
+ if !options.PreCheckPoint && options.WithPrevious {
+ args = append(
+ args,
+ "--parent-path",
+ filepath.Join("..", preCheckpointDir),
+ )
+ }
+
+ args = append(args, ctr.ID())
+ logrus.Debugf("the args to checkpoint: %s %s", r.path, strings.Join(args, " "))
+
+ runtimeDir, err := util.GetRuntimeDir()
+ if err != nil {
+ return 0, err
+ }
+ env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
+ if path, ok := os.LookupEnv("PATH"); ok {
+ env = append(env, fmt.Sprintf("PATH=%s", path))
+ }
+
+ var runtimeCheckpointStarted time.Time
+ err = r.withContainerSocketLabel(ctr, func() error {
+ runtimeCheckpointStarted = time.Now()
+ return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, args...)
+ })
+
+ runtimeCheckpointDuration := func() int64 {
+ if options.PrintStats {
+ return time.Since(runtimeCheckpointStarted).Microseconds()
+ }
+ return 0
+ }()
+
+ return runtimeCheckpointDuration, err
+}
+
+func (r *ConmonOCIRuntime) CheckConmonRunning(ctr *Container) (bool, error) {
+ if ctr.state.ConmonPID == 0 {
+ // If the container is running or paused, assume Conmon is
+ // running. We didn't record Conmon PID on some old versions, so
+ // that is likely what's going on...
+ // Unusual enough that we should print a warning message though.
+ if ctr.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) {
+ logrus.Warnf("Conmon PID is not set, but container is running!")
+ return true, nil
+ }
+ // Container's not running, so conmon PID being unset is
+ // expected. Conmon is not running.
+ return false, nil
+ }
+
+ // We have a conmon PID. Ping it with signal 0.
+ if err := unix.Kill(ctr.state.ConmonPID, 0); err != nil {
+ if err == unix.ESRCH {
+ return false, nil
+ }
+ return false, fmt.Errorf("error pinging container %s conmon with signal 0: %w", ctr.ID(), err)
+ }
+ return true, nil
+}
+
+// SupportsCheckpoint checks if the OCI runtime supports checkpointing
+// containers.
+func (r *ConmonOCIRuntime) SupportsCheckpoint() bool {
+ return crutils.CRRuntimeSupportsCheckpointRestore(r.path)
+}
+
+// SupportsJSONErrors checks if the OCI runtime supports JSON-formatted error
+// messages.
+func (r *ConmonOCIRuntime) SupportsJSONErrors() bool {
+ return r.supportsJSON
+}
+
+// SupportsNoCgroups checks if the OCI runtime supports running containers
+// without cgroups (the --cgroup-manager=disabled flag).
+func (r *ConmonOCIRuntime) SupportsNoCgroups() bool {
+ return r.supportsNoCgroups
+}
+
+// SupportsKVM checks if the OCI runtime supports running containers
+// without KVM separation
+func (r *ConmonOCIRuntime) SupportsKVM() bool {
+ return r.supportsKVM
+}
+
+// AttachSocketPath is the path to a single container's attach socket.
+func (r *ConmonOCIRuntime) AttachSocketPath(ctr *Container) (string, error) {
+ if ctr == nil {
+ return "", fmt.Errorf("must provide a valid container to get attach socket path: %w", define.ErrInvalidArg)
+ }
+
+ return filepath.Join(ctr.bundlePath(), "attach"), nil
+}
+
+// ExitFilePath is the path to a container's exit file.
+func (r *ConmonOCIRuntime) ExitFilePath(ctr *Container) (string, error) {
+ if ctr == nil {
+ return "", fmt.Errorf("must provide a valid container to get exit file path: %w", define.ErrInvalidArg)
+ }
+ return filepath.Join(r.exitsDir, ctr.ID()), nil
+}
+
+// RuntimeInfo provides information on the runtime.
+func (r *ConmonOCIRuntime) RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntimeInfo, error) {
+ runtimePackage := packageVersion(r.path)
+ conmonPackage := packageVersion(r.conmonPath)
+ runtimeVersion, err := r.getOCIRuntimeVersion()
+ if err != nil {
+ return nil, nil, fmt.Errorf("error getting version of OCI runtime %s: %w", r.name, err)
+ }
+ conmonVersion, err := r.getConmonVersion()
+ if err != nil {
+ return nil, nil, fmt.Errorf("error getting conmon version: %w", err)
+ }
+
+ conmon := define.ConmonInfo{
+ Package: conmonPackage,
+ Path: r.conmonPath,
+ Version: conmonVersion,
+ }
+ ocirt := define.OCIRuntimeInfo{
+ Name: r.name,
+ Path: r.path,
+ Package: runtimePackage,
+ Version: runtimeVersion,
+ }
+ return &conmon, &ocirt, nil
+}
+
+// makeAccessible changes the path permission and each parent directory to have --x--x--x
+func makeAccessible(path string, uid, gid int) error {
+ for ; path != "/"; path = filepath.Dir(path) {
+ st, err := os.Stat(path)
+ if err != nil {
+ if os.IsNotExist(err) {
+ return nil
+ }
+ return err
+ }
+ if int(st.Sys().(*syscall.Stat_t).Uid) == uid && int(st.Sys().(*syscall.Stat_t).Gid) == gid {
+ continue
+ }
+ if st.Mode()&0111 != 0111 {
+ if err := os.Chmod(path, st.Mode()|0111); err != nil {
+ return err
+ }
+ }
+ }
+ return nil
+}
+
+// Wait for a container which has been sent a signal to stop
+func waitContainerStop(ctr *Container, timeout time.Duration) error {
+ return waitPidStop(ctr.state.PID, timeout)
+}
+
+// Wait for a given PID to stop
+func waitPidStop(pid int, timeout time.Duration) error {
+ done := make(chan struct{})
+ chControl := make(chan struct{})
+ go func() {
+ for {
+ select {
+ case <-chControl:
+ return
+ default:
+ if err := unix.Kill(pid, 0); err != nil {
+ if err == unix.ESRCH {
+ close(done)
+ return
+ }
+ logrus.Errorf("Pinging PID %d with signal 0: %v", pid, err)
+ }
+ time.Sleep(100 * time.Millisecond)
+ }
+ }
+ }()
+ select {
+ case <-done:
+ return nil
+ case <-time.After(timeout):
+ close(chControl)
+ return fmt.Errorf("given PIDs did not die within timeout")
+ }
+}
+
+func (r *ConmonOCIRuntime) getLogTag(ctr *Container) (string, error) {
+ logTag := ctr.LogTag()
+ if logTag == "" {
+ return "", nil
+ }
+ data, err := ctr.inspectLocked(false)
+ if err != nil {
+ // FIXME: this error should probably be returned
+ return "", nil //nolint: nilerr
+ }
+ tmpl, err := template.New("container").Parse(logTag)
+ if err != nil {
+ return "", fmt.Errorf("template parsing error %s: %w", logTag, err)
+ }
+ var b bytes.Buffer
+ err = tmpl.Execute(&b, data)
+ if err != nil {
+ return "", err
+ }
+ return b.String(), nil
+}
+
+// createOCIContainer generates this container's main conmon instance and prepares it for starting
+func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) {
+ var stderrBuf bytes.Buffer
+
+ runtimeDir, err := util.GetRuntimeDir()
+ if err != nil {
+ return 0, err
+ }
+
+ parentSyncPipe, childSyncPipe, err := newPipe()
+ if err != nil {
+ return 0, fmt.Errorf("error creating socket pair: %w", err)
+ }
+ defer errorhandling.CloseQuiet(parentSyncPipe)
+
+ childStartPipe, parentStartPipe, err := newPipe()
+ if err != nil {
+ return 0, fmt.Errorf("error creating socket pair for start pipe: %w", err)
+ }
+
+ defer errorhandling.CloseQuiet(parentStartPipe)
+
+ var ociLog string
+ if logrus.GetLevel() != logrus.DebugLevel && r.supportsJSON {
+ ociLog = filepath.Join(ctr.state.RunDir, "oci-log")
+ }
+
+ logTag, err := r.getLogTag(ctr)
+ if err != nil {
+ return 0, err
+ }
+
+ if ctr.config.CgroupsMode == cgroupSplit {
+ if err := utils.MoveUnderCgroupSubtree("runtime"); err != nil {
+ return 0, err
+ }
+ }
+
+ pidfile := ctr.config.PidFile
+ if pidfile == "" {
+ pidfile = filepath.Join(ctr.state.RunDir, "pidfile")
+ }
+
+ args := r.sharedConmonArgs(ctr, ctr.ID(), ctr.bundlePath(), pidfile, ctr.LogPath(), r.exitsDir, ociLog, ctr.LogDriver(), logTag)
+
+ if ctr.config.SdNotifyMode == define.SdNotifyModeContainer && ctr.config.SdNotifySocket != "" {
+ args = append(args, fmt.Sprintf("--sdnotify-socket=%s", ctr.config.SdNotifySocket))
+ }
+
+ if ctr.config.Spec.Process.Terminal {
+ args = append(args, "-t")
+ } else if ctr.config.Stdin {
+ args = append(args, "-i")
+ }
+
+ if ctr.config.Timeout > 0 {
+ args = append(args, fmt.Sprintf("--timeout=%d", ctr.config.Timeout))
+ }
+
+ if !r.enableKeyring {
+ args = append(args, "--no-new-keyring")
+ }
+ if ctr.config.ConmonPidFile != "" {
+ args = append(args, "--conmon-pidfile", ctr.config.ConmonPidFile)
+ }
+
+ if r.noPivot {
+ args = append(args, "--no-pivot")
+ }
+
+ exitCommand, err := specgenutil.CreateExitCommandArgs(ctr.runtime.storageConfig, ctr.runtime.config, logrus.IsLevelEnabled(logrus.DebugLevel), ctr.AutoRemove(), false)
+ if err != nil {
+ return 0, err
+ }
+ exitCommand = append(exitCommand, ctr.config.ID)
+
+ args = append(args, "--exit-command", exitCommand[0])
+ for _, arg := range exitCommand[1:] {
+ args = append(args, []string{"--exit-command-arg", arg}...)
+ }
+
+ // Pass down the LISTEN_* environment (see #10443).
+ preserveFDs := ctr.config.PreserveFDs
+ if val := os.Getenv("LISTEN_FDS"); val != "" {
+ if ctr.config.PreserveFDs > 0 {
+ logrus.Warnf("Ignoring LISTEN_FDS to preserve custom user-specified FDs")
+ } else {
+ fds, err := strconv.Atoi(val)
+ if err != nil {
+ return 0, fmt.Errorf("converting LISTEN_FDS=%s: %w", val, err)
+ }
+ preserveFDs = uint(fds)
+ }
+ }
+
+ if preserveFDs > 0 {
+ args = append(args, formatRuntimeOpts("--preserve-fds", fmt.Sprintf("%d", preserveFDs))...)
+ }
+
+ if restoreOptions != nil {
+ args = append(args, "--restore", ctr.CheckpointPath())
+ if restoreOptions.TCPEstablished {
+ args = append(args, "--runtime-opt", "--tcp-established")
+ }
+ if restoreOptions.FileLocks {
+ args = append(args, "--runtime-opt", "--file-locks")
+ }
+ if restoreOptions.Pod != "" {
+ mountLabel := ctr.config.MountLabel
+ processLabel := ctr.config.ProcessLabel
+ if mountLabel != "" {
+ args = append(
+ args,
+ "--runtime-opt",
+ fmt.Sprintf(
+ "--lsm-mount-context=%s",
+ mountLabel,
+ ),
+ )
+ }
+ if processLabel != "" {
+ args = append(
+ args,
+ "--runtime-opt",
+ fmt.Sprintf(
+ "--lsm-profile=selinux:%s",
+ processLabel,
+ ),
+ )
+ }
+ }
+ }
+
+ logrus.WithFields(logrus.Fields{
+ "args": args,
+ }).Debugf("running conmon: %s", r.conmonPath)
+
+ cmd := exec.Command(r.conmonPath, args...)
+ cmd.SysProcAttr = &syscall.SysProcAttr{
+ Setpgid: true,
+ }
+ // TODO this is probably a really bad idea for some uses
+ // Make this configurable
+ cmd.Stdin = os.Stdin
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = os.Stderr
+ if ctr.config.Spec.Process.Terminal {
+ cmd.Stderr = &stderrBuf
+ }
+
+ // 0, 1 and 2 are stdin, stdout and stderr
+ conmonEnv := r.configureConmonEnv(runtimeDir)
+
+ var filesToClose []*os.File
+ if preserveFDs > 0 {
+ for fd := 3; fd < int(3+preserveFDs); fd++ {
+ f := os.NewFile(uintptr(fd), fmt.Sprintf("fd-%d", fd))
+ filesToClose = append(filesToClose, f)
+ cmd.ExtraFiles = append(cmd.ExtraFiles, f)
+ }
+ }
+
+ cmd.Env = r.conmonEnv
+ // we don't want to step on users fds they asked to preserve
+ // Since 0-2 are used for stdio, start the fds we pass in at preserveFDs+3
+ cmd.Env = append(cmd.Env, fmt.Sprintf("_OCI_SYNCPIPE=%d", preserveFDs+3), fmt.Sprintf("_OCI_STARTPIPE=%d", preserveFDs+4))
+ cmd.Env = append(cmd.Env, conmonEnv...)
+ cmd.ExtraFiles = append(cmd.ExtraFiles, childSyncPipe, childStartPipe)
+
+ if r.reservePorts && !rootless.IsRootless() && !ctr.config.NetMode.IsSlirp4netns() {
+ ports, err := bindPorts(ctr.convertPortMappings())
+ if err != nil {
+ return 0, err
+ }
+ filesToClose = append(filesToClose, ports...)
+
+ // Leak the port we bound in the conmon process. These fd's won't be used
+ // by the container and conmon will keep the ports busy so that another
+ // process cannot use them.
+ cmd.ExtraFiles = append(cmd.ExtraFiles, ports...)
+ }
+
+ if ctr.config.NetMode.IsSlirp4netns() || rootless.IsRootless() {
+ if ctr.config.PostConfigureNetNS {
+ havePortMapping := len(ctr.config.PortMappings) > 0
+ if havePortMapping {
+ ctr.rootlessPortSyncR, ctr.rootlessPortSyncW, err = os.Pipe()
+ if err != nil {
+ return 0, fmt.Errorf("failed to create rootless port sync pipe: %w", err)
+ }
+ }
+ ctr.rootlessSlirpSyncR, ctr.rootlessSlirpSyncW, err = os.Pipe()
+ if err != nil {
+ return 0, fmt.Errorf("failed to create rootless network sync pipe: %w", err)
+ }
+ } else {
+ if ctr.rootlessSlirpSyncR != nil {
+ defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncR)
+ }
+ if ctr.rootlessSlirpSyncW != nil {
+ defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncW)
+ }
+ }
+ // Leak one end in conmon, the other one will be leaked into slirp4netns
+ cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessSlirpSyncW)
+
+ if ctr.rootlessPortSyncW != nil {
+ defer errorhandling.CloseQuiet(ctr.rootlessPortSyncW)
+ // Leak one end in conmon, the other one will be leaked into rootlessport
+ cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessPortSyncW)
+ }
+ }
+ var runtimeRestoreStarted time.Time
+ if restoreOptions != nil {
+ runtimeRestoreStarted = time.Now()
+ }
+ err = startCommand(cmd, ctr)
+
+ // regardless of whether we errored or not, we no longer need the children pipes
+ childSyncPipe.Close()
+ childStartPipe.Close()
+ if err != nil {
+ return 0, err
+ }
+ if err := r.moveConmonToCgroupAndSignal(ctr, cmd, parentStartPipe); err != nil {
+ return 0, err
+ }
+ /* Wait for initial setup and fork, and reap child */
+ err = cmd.Wait()
+ if err != nil {
+ return 0, err
+ }
+
+ pid, err := readConmonPipeData(r.name, parentSyncPipe, ociLog)
+ if err != nil {
+ if err2 := r.DeleteContainer(ctr); err2 != nil {
+ logrus.Errorf("Removing container %s from runtime after creation failed", ctr.ID())
+ }
+ return 0, err
+ }
+ ctr.state.PID = pid
+
+ conmonPID, err := readConmonPidFile(ctr.config.ConmonPidFile)
+ if err != nil {
+ logrus.Warnf("Error reading conmon pid file for container %s: %v", ctr.ID(), err)
+ } else if conmonPID > 0 {
+ // conmon not having a pid file is a valid state, so don't set it if we don't have it
+ logrus.Infof("Got Conmon PID as %d", conmonPID)
+ ctr.state.ConmonPID = conmonPID
+ }
+
+ runtimeRestoreDuration := func() int64 {
+ if restoreOptions != nil && restoreOptions.PrintStats {
+ return time.Since(runtimeRestoreStarted).Microseconds()
+ }
+ return 0
+ }()
+
+ // These fds were passed down to the runtime. Close them
+ // and not interfere
+ for _, f := range filesToClose {
+ errorhandling.CloseQuiet(f)
+ }
+
+ return runtimeRestoreDuration, nil
+}
+
+// configureConmonEnv gets the environment values to add to conmon's exec struct
+// TODO this may want to be less hardcoded/more configurable in the future
+func (r *ConmonOCIRuntime) configureConmonEnv(runtimeDir string) []string {
+ var env []string
+ for _, e := range os.Environ() {
+ if strings.HasPrefix(e, "LC_") {
+ env = append(env, e)
+ }
+ }
+ conf, ok := os.LookupEnv("CONTAINERS_CONF")
+ if ok {
+ env = append(env, fmt.Sprintf("CONTAINERS_CONF=%s", conf))
+ }
+ env = append(env, fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir))
+ env = append(env, fmt.Sprintf("_CONTAINERS_USERNS_CONFIGURED=%s", os.Getenv("_CONTAINERS_USERNS_CONFIGURED")))
+ env = append(env, fmt.Sprintf("_CONTAINERS_ROOTLESS_UID=%s", os.Getenv("_CONTAINERS_ROOTLESS_UID")))
+ home := homedir.Get()
+ if home != "" {
+ env = append(env, fmt.Sprintf("HOME=%s", home))
+ }
+
+ return env
+}
+
+// sharedConmonArgs takes common arguments for exec and create/restore and formats them for the conmon CLI
+func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, exitDir, ociLogPath, logDriver, logTag string) []string {
+ // set the conmon API version to be able to use the correct sync struct keys
+ args := []string{
+ "--api-version", "1",
+ "-c", ctr.ID(),
+ "-u", cuuid,
+ "-r", r.path,
+ "-b", bundlePath,
+ "-p", pidPath,
+ "-n", ctr.Name(),
+ "--exit-dir", exitDir,
+ "--full-attach",
+ }
+ if len(r.runtimeFlags) > 0 {
+ rFlags := []string{}
+ for _, arg := range r.runtimeFlags {
+ rFlags = append(rFlags, "--runtime-arg", arg)
+ }
+ args = append(args, rFlags...)
+ }
+
+ if ctr.CgroupManager() == config.SystemdCgroupsManager && !ctr.config.NoCgroups && ctr.config.CgroupsMode != cgroupSplit {
+ args = append(args, "-s")
+ }
+
+ var logDriverArg string
+ switch logDriver {
+ case define.JournaldLogging:
+ logDriverArg = define.JournaldLogging
+ case define.NoLogging:
+ logDriverArg = define.NoLogging
+ case define.PassthroughLogging:
+ logDriverArg = define.PassthroughLogging
+ //lint:ignore ST1015 the default case has to be here
+ default: //nolint:stylecheck,gocritic
+ // No case here should happen except JSONLogging, but keep this here in case the options are extended
+ logrus.Errorf("%s logging specified but not supported. Choosing k8s-file logging instead", ctr.LogDriver())
+ fallthrough
+ case "":
+ // to get here, either a user would specify `--log-driver ""`, or this came from another place in libpod
+ // since the former case is obscure, and the latter case isn't an error, let's silently fallthrough
+ fallthrough
+ case define.JSONLogging:
+ fallthrough
+ case define.KubernetesLogging:
+ logDriverArg = fmt.Sprintf("%s:%s", define.KubernetesLogging, logPath)
+ }
+
+ args = append(args, "-l", logDriverArg)
+ logLevel := logrus.GetLevel()
+ args = append(args, "--log-level", logLevel.String())
+
+ if logLevel == logrus.DebugLevel {
+ logrus.Debugf("%s messages will be logged to syslog", r.conmonPath)
+ args = append(args, "--syslog")
+ }
+
+ size := r.logSizeMax
+ if ctr.config.LogSize > 0 {
+ size = ctr.config.LogSize
+ }
+ if size > 0 {
+ args = append(args, "--log-size-max", fmt.Sprintf("%v", size))
+ }
+
+ if ociLogPath != "" {
+ args = append(args, "--runtime-arg", "--log-format=json", "--runtime-arg", "--log", fmt.Sprintf("--runtime-arg=%s", ociLogPath))
+ }
+ if logTag != "" {
+ args = append(args, "--log-tag", logTag)
+ }
+ if ctr.config.NoCgroups {
+ logrus.Debugf("Running with no Cgroups")
+ args = append(args, "--runtime-arg", "--cgroup-manager", "--runtime-arg", "disabled")
+ }
+ return args
+}
+
+func startCommand(cmd *exec.Cmd, ctr *Container) error {
+ // Make sure to unset the NOTIFY_SOCKET and reset it afterwards if needed.
+ switch ctr.config.SdNotifyMode {
+ case define.SdNotifyModeContainer, define.SdNotifyModeIgnore:
+ if prev := os.Getenv("NOTIFY_SOCKET"); prev != "" {
+ if err := os.Unsetenv("NOTIFY_SOCKET"); err != nil {
+ logrus.Warnf("Error unsetting NOTIFY_SOCKET %v", err)
+ }
+ defer func() {
+ if err := os.Setenv("NOTIFY_SOCKET", prev); err != nil {
+ logrus.Errorf("Resetting NOTIFY_SOCKET=%s", prev)
+ }
+ }()
+ }
+ }
+
+ return cmd.Start()
+}
+
+// newPipe creates a unix socket pair for communication.
+// Returns two files - first is parent, second is child.
+func newPipe() (*os.File, *os.File, error) {
+ fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_SEQPACKET|unix.SOCK_CLOEXEC, 0)
+ if err != nil {
+ return nil, nil, err
+ }
+ return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil
+}
+
+// readConmonPidFile attempts to read conmon's pid from its pid file
+func readConmonPidFile(pidFile string) (int, error) {
+ // Let's try reading the Conmon pid at the same time.
+ if pidFile != "" {
+ contents, err := ioutil.ReadFile(pidFile)
+ if err != nil {
+ return -1, err
+ }
+ // Convert it to an int
+ conmonPID, err := strconv.Atoi(string(contents))
+ if err != nil {
+ return -1, err
+ }
+ return conmonPID, nil
+ }
+ return 0, nil
+}
+
+// readConmonPipeData attempts to read a syncInfo struct from the pipe
+func readConmonPipeData(runtimeName string, pipe *os.File, ociLog string) (int, error) {
+ // syncInfo is used to return data from monitor process to daemon
+ type syncInfo struct {
+ Data int `json:"data"`
+ Message string `json:"message,omitempty"`
+ }
+
+ // Wait to get container pid from conmon
+ type syncStruct struct {
+ si *syncInfo
+ err error
+ }
+ ch := make(chan syncStruct)
+ go func() {
+ var si *syncInfo
+ rdr := bufio.NewReader(pipe)
+ b, err := rdr.ReadBytes('\n')
+ // ignore EOF here, error is returned even when data was read
+ // if it is no valid json unmarshal will fail below
+ if err != nil && !errors.Is(err, io.EOF) {
+ ch <- syncStruct{err: err}
+ }
+ if err := json.Unmarshal(b, &si); err != nil {
+ ch <- syncStruct{err: fmt.Errorf("conmon bytes %q: %w", string(b), err)}
+ return
+ }
+ ch <- syncStruct{si: si}
+ }()
+
+ data := -1 //nolint: wastedassign
+ select {
+ case ss := <-ch:
+ if ss.err != nil {
+ if ociLog != "" {
+ ociLogData, err := ioutil.ReadFile(ociLog)
+ if err == nil {
+ var ociErr ociError
+ if err := json.Unmarshal(ociLogData, &ociErr); err == nil {
+ return -1, getOCIRuntimeError(runtimeName, ociErr.Msg)
+ }
+ }
+ }
+ return -1, fmt.Errorf("container create failed (no logs from conmon): %w", ss.err)
+ }
+ logrus.Debugf("Received: %d", ss.si.Data)
+ if ss.si.Data < 0 {
+ if ociLog != "" {
+ ociLogData, err := ioutil.ReadFile(ociLog)
+ if err == nil {
+ var ociErr ociError
+ if err := json.Unmarshal(ociLogData, &ociErr); err == nil {
+ return ss.si.Data, getOCIRuntimeError(runtimeName, ociErr.Msg)
+ }
+ }
+ }
+ // If we failed to parse the JSON errors, then print the output as it is
+ if ss.si.Message != "" {
+ return ss.si.Data, getOCIRuntimeError(runtimeName, ss.si.Message)
+ }
+ return ss.si.Data, fmt.Errorf("container create failed: %w", define.ErrInternal)
+ }
+ data = ss.si.Data
+ case <-time.After(define.ContainerCreateTimeout):
+ return -1, fmt.Errorf("container creation timeout: %w", define.ErrInternal)
+ }
+ return data, nil
+}
+
+// writeConmonPipeData writes nonce data to a pipe
+func writeConmonPipeData(pipe *os.File) error {
+ someData := []byte{0}
+ _, err := pipe.Write(someData)
+ return err
+}
+
+// formatRuntimeOpts prepends opts passed to it with --runtime-opt for passing to conmon
+func formatRuntimeOpts(opts ...string) []string {
+ args := make([]string, 0, len(opts)*2)
+ for _, o := range opts {
+ args = append(args, "--runtime-opt", o)
+ }
+ return args
+}
+
+// getConmonVersion returns a string representation of the conmon version.
+func (r *ConmonOCIRuntime) getConmonVersion() (string, error) {
+ output, err := utils.ExecCmd(r.conmonPath, "--version")
+ if err != nil {
+ return "", err
+ }
+ return strings.TrimSuffix(strings.Replace(output, "\n", ", ", 1), "\n"), nil
+}
+
+// getOCIRuntimeVersion returns a string representation of the OCI runtime's
+// version.
+func (r *ConmonOCIRuntime) getOCIRuntimeVersion() (string, error) {
+ output, err := utils.ExecCmd(r.path, "--version")
+ if err != nil {
+ return "", err
+ }
+ return strings.TrimSuffix(output, "\n"), nil
+}
+
+// Copy data from container to HTTP connection, for terminal attach.
+// Container is the container's attach socket connection, http is a buffer for
+// the HTTP connection. cid is the ID of the container the attach session is
+// running for (used solely for error messages).
+func httpAttachTerminalCopy(container *net.UnixConn, http *bufio.ReadWriter, cid string) error {
+ buf := make([]byte, bufferSize)
+ for {
+ numR, err := container.Read(buf)
+ logrus.Debugf("Read fd(%d) %d/%d bytes for container %s", int(buf[0]), numR, len(buf), cid)
+
+ if numR > 0 {
+ switch buf[0] {
+ case AttachPipeStdout:
+ // Do nothing
+ default:
+ logrus.Errorf("Received unexpected attach type %+d, discarding %d bytes", buf[0], numR)
+ continue
+ }
+
+ numW, err2 := http.Write(buf[1:numR])
+ if err2 != nil {
+ if err != nil {
+ logrus.Errorf("Reading container %s STDOUT: %v", cid, err)
+ }
+ return err2
+ } else if numW+1 != numR {
+ return io.ErrShortWrite
+ }
+ // We need to force the buffer to write immediately, so
+ // there isn't a delay on the terminal side.
+ if err2 := http.Flush(); err2 != nil {
+ if err != nil {
+ logrus.Errorf("Reading container %s STDOUT: %v", cid, err)
+ }
+ return err2
+ }
+ }
+ if err != nil {
+ if err == io.EOF {
+ return nil
+ }
+ return err
+ }
+ }
+}
+
+// Copy data from a container to an HTTP connection, for non-terminal attach.
+// Appends a header to multiplex input.
+func httpAttachNonTerminalCopy(container *net.UnixConn, http *bufio.ReadWriter, cid string, stdin, stdout, stderr bool) error {
+ buf := make([]byte, bufferSize)
+ for {
+ numR, err := container.Read(buf)
+ if numR > 0 {
+ var headerBuf []byte
+
+ // Subtract 1 because we strip the first byte (used for
+ // multiplexing by Conmon).
+ headerLen := uint32(numR - 1)
+ // Practically speaking, we could make this buf[0] - 1,
+ // but we need to validate it anyway.
+ switch buf[0] {
+ case AttachPipeStdin:
+ headerBuf = makeHTTPAttachHeader(0, headerLen)
+ if !stdin {
+ continue
+ }
+ case AttachPipeStdout:
+ if !stdout {
+ continue
+ }
+ headerBuf = makeHTTPAttachHeader(1, headerLen)
+ case AttachPipeStderr:
+ if !stderr {
+ continue
+ }
+ headerBuf = makeHTTPAttachHeader(2, headerLen)
+ default:
+ logrus.Errorf("Received unexpected attach type %+d, discarding %d bytes", buf[0], numR)
+ continue
+ }
+
+ numH, err2 := http.Write(headerBuf)
+ if err2 != nil {
+ if err != nil {
+ logrus.Errorf("Reading container %s standard streams: %v", cid, err)
+ }
+
+ return err2
+ }
+ // Hardcoding header length is pretty gross, but
+ // fast. Should be safe, as this is a fixed part
+ // of the protocol.
+ if numH != 8 {
+ if err != nil {
+ logrus.Errorf("Reading container %s standard streams: %v", cid, err)
+ }
+
+ return io.ErrShortWrite
+ }
+
+ numW, err2 := http.Write(buf[1:numR])
+ if err2 != nil {
+ if err != nil {
+ logrus.Errorf("Reading container %s standard streams: %v", cid, err)
+ }
+
+ return err2
+ } else if numW+1 != numR {
+ if err != nil {
+ logrus.Errorf("Reading container %s standard streams: %v", cid, err)
+ }
+
+ return io.ErrShortWrite
+ }
+ // We need to force the buffer to write immediately, so
+ // there isn't a delay on the terminal side.
+ if err2 := http.Flush(); err2 != nil {
+ if err != nil {
+ logrus.Errorf("Reading container %s STDOUT: %v", cid, err)
+ }
+ return err2
+ }
+ }
+ if err != nil {
+ if err == io.EOF {
+ return nil
+ }
+
+ return err
+ }
+ }
+}
diff --git a/libpod/oci_conmon_exec_linux.go b/libpod/oci_conmon_exec_common.go
index 16cd7ef9f..16cd7ef9f 100644
--- a/libpod/oci_conmon_exec_linux.go
+++ b/libpod/oci_conmon_exec_common.go
diff --git a/libpod/oci_conmon_freebsd.go b/libpod/oci_conmon_freebsd.go
new file mode 100644
index 000000000..6f7ac7fc6
--- /dev/null
+++ b/libpod/oci_conmon_freebsd.go
@@ -0,0 +1,24 @@
+package libpod
+
+import (
+ "errors"
+ "os"
+ "os/exec"
+)
+
+func (r *ConmonOCIRuntime) createRootlessContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) {
+ return -1, errors.New("unsupported (*ConmonOCIRuntime) createRootlessContainer")
+}
+
+// Run the closure with the container's socket label set
+func (r *ConmonOCIRuntime) withContainerSocketLabel(ctr *Container, closure func() error) error {
+ // No label support yet
+ return closure()
+}
+
+// moveConmonToCgroupAndSignal gets a container's cgroupParent and moves the conmon process to that cgroup
+// it then signals for conmon to start by sending nonce data down the start fd
+func (r *ConmonOCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec.Cmd, startFd *os.File) error {
+ // No equivalent on FreeBSD
+ return nil
+}
diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go
index 1b654ed33..0964d4ea3 100644
--- a/libpod/oci_conmon_linux.go
+++ b/libpod/oci_conmon_linux.go
@@ -1,46 +1,21 @@
-//go:build linux
-// +build linux
-
package libpod
import (
- "bufio"
- "bytes"
- "context"
- "errors"
"fmt"
- "io"
- "io/ioutil"
- "net"
- "net/http"
"os"
"os/exec"
"path/filepath"
"runtime"
- "strconv"
"strings"
- "sync"
- "syscall"
- "text/template"
- "time"
runcconfig "github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/devices"
"github.com/containers/common/pkg/cgroups"
"github.com/containers/common/pkg/config"
- "github.com/containers/common/pkg/resize"
- cutil "github.com/containers/common/pkg/util"
- conmonConfig "github.com/containers/conmon/runner/config"
- "github.com/containers/podman/v4/libpod/define"
- "github.com/containers/podman/v4/libpod/logs"
- "github.com/containers/podman/v4/pkg/checkpoint/crutils"
"github.com/containers/podman/v4/pkg/errorhandling"
"github.com/containers/podman/v4/pkg/rootless"
- "github.com/containers/podman/v4/pkg/specgenutil"
- "github.com/containers/podman/v4/pkg/util"
"github.com/containers/podman/v4/utils"
- "github.com/containers/storage/pkg/homedir"
pmount "github.com/containers/storage/pkg/mount"
spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/selinux/go-selinux/label"
@@ -48,782 +23,70 @@ import (
"golang.org/x/sys/unix"
)
-const (
- // This is Conmon's STDIO_BUF_SIZE. I don't believe we have access to it
- // directly from the Go code, so const it here
- // Important: The conmon attach socket uses an extra byte at the beginning of each
- // message to specify the STREAM so we have to increase the buffer size by one
- bufferSize = conmonConfig.BufSize + 1
-)
-
-// ConmonOCIRuntime is an OCI runtime managed by Conmon.
-// TODO: Make all calls to OCI runtime have a timeout.
-type ConmonOCIRuntime struct {
- name string
- path string
- conmonPath string
- conmonEnv []string
- tmpDir string
- exitsDir string
- logSizeMax int64
- noPivot bool
- reservePorts bool
- runtimeFlags []string
- supportsJSON bool
- supportsKVM bool
- supportsNoCgroups bool
- enableKeyring bool
-}
-
-// Make a new Conmon-based OCI runtime with the given options.
-// Conmon will wrap the given OCI runtime, which can be `runc`, `crun`, or
-// any runtime with a runc-compatible CLI.
-// The first path that points to a valid executable will be used.
-// Deliberately private. Someone should not be able to construct this outside of
-// libpod.
-func newConmonOCIRuntime(name string, paths []string, conmonPath string, runtimeFlags []string, runtimeCfg *config.Config) (OCIRuntime, error) {
- if name == "" {
- return nil, fmt.Errorf("the OCI runtime must be provided a non-empty name: %w", define.ErrInvalidArg)
- }
-
- // Make lookup tables for runtime support
- supportsJSON := make(map[string]bool, len(runtimeCfg.Engine.RuntimeSupportsJSON))
- supportsNoCgroups := make(map[string]bool, len(runtimeCfg.Engine.RuntimeSupportsNoCgroups))
- supportsKVM := make(map[string]bool, len(runtimeCfg.Engine.RuntimeSupportsKVM))
- for _, r := range runtimeCfg.Engine.RuntimeSupportsJSON {
- supportsJSON[r] = true
- }
- for _, r := range runtimeCfg.Engine.RuntimeSupportsNoCgroups {
- supportsNoCgroups[r] = true
- }
- for _, r := range runtimeCfg.Engine.RuntimeSupportsKVM {
- supportsKVM[r] = true
- }
-
- runtime := new(ConmonOCIRuntime)
- runtime.name = name
- runtime.conmonPath = conmonPath
- runtime.runtimeFlags = runtimeFlags
-
- runtime.conmonEnv = runtimeCfg.Engine.ConmonEnvVars
- runtime.tmpDir = runtimeCfg.Engine.TmpDir
- runtime.logSizeMax = runtimeCfg.Containers.LogSizeMax
- runtime.noPivot = runtimeCfg.Engine.NoPivotRoot
- runtime.reservePorts = runtimeCfg.Engine.EnablePortReservation
- runtime.enableKeyring = runtimeCfg.Containers.EnableKeyring
-
- // TODO: probe OCI runtime for feature and enable automatically if
- // available.
-
- base := filepath.Base(name)
- runtime.supportsJSON = supportsJSON[base]
- runtime.supportsNoCgroups = supportsNoCgroups[base]
- runtime.supportsKVM = supportsKVM[base]
-
- foundPath := false
- for _, path := range paths {
- stat, err := os.Stat(path)
- if err != nil {
- if os.IsNotExist(err) {
- continue
- }
- return nil, fmt.Errorf("cannot stat OCI runtime %s path: %w", name, err)
- }
- if !stat.Mode().IsRegular() {
- continue
- }
- foundPath = true
- logrus.Tracef("found runtime %q", path)
- runtime.path = path
- break
- }
-
- // Search the $PATH as last fallback
- if !foundPath {
- if foundRuntime, err := exec.LookPath(name); err == nil {
- foundPath = true
- runtime.path = foundRuntime
- logrus.Debugf("using runtime %q from $PATH: %q", name, foundRuntime)
- }
- }
-
- if !foundPath {
- return nil, fmt.Errorf("no valid executable found for OCI runtime %s: %w", name, define.ErrInvalidArg)
- }
-
- runtime.exitsDir = filepath.Join(runtime.tmpDir, "exits")
-
- // Create the exit files and attach sockets directories
- if err := os.MkdirAll(runtime.exitsDir, 0750); err != nil {
- // The directory is allowed to exist
- if !os.IsExist(err) {
- return nil, fmt.Errorf("error creating OCI runtime exit files directory: %w", err)
- }
+func (r *ConmonOCIRuntime) createRootlessContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) {
+ type result struct {
+ restoreDuration int64
+ err error
}
- return runtime, nil
-}
-
-// Name returns the name of the runtime being wrapped by Conmon.
-func (r *ConmonOCIRuntime) Name() string {
- return r.name
-}
-
-// Path returns the path of the OCI runtime being wrapped by Conmon.
-func (r *ConmonOCIRuntime) Path() string {
- return r.path
-}
-
-// hasCurrentUserMapped checks whether the current user is mapped inside the container user namespace
-func hasCurrentUserMapped(ctr *Container) bool {
- if len(ctr.config.IDMappings.UIDMap) == 0 && len(ctr.config.IDMappings.GIDMap) == 0 {
- return true
- }
- uid := os.Geteuid()
- for _, m := range ctr.config.IDMappings.UIDMap {
- if uid >= m.HostID && uid < m.HostID+m.Size {
- return true
- }
- }
- return false
-}
-
-// CreateContainer creates a container.
-func (r *ConmonOCIRuntime) CreateContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) {
- // always make the run dir accessible to the current user so that the PID files can be read without
- // being in the rootless user namespace.
- if err := makeAccessible(ctr.state.RunDir, 0, 0); err != nil {
- return 0, err
- }
- if !hasCurrentUserMapped(ctr) {
- for _, i := range []string{ctr.state.RunDir, ctr.runtime.config.Engine.TmpDir, ctr.config.StaticDir, ctr.state.Mountpoint, ctr.runtime.config.Engine.VolumePath} {
- if err := makeAccessible(i, ctr.RootUID(), ctr.RootGID()); err != nil {
+ ch := make(chan result)
+ go func() {
+ runtime.LockOSThread()
+ restoreDuration, err := func() (int64, error) {
+ fd, err := os.Open(fmt.Sprintf("/proc/%d/task/%d/ns/mnt", os.Getpid(), unix.Gettid()))
+ if err != nil {
return 0, err
}
- }
+ defer errorhandling.CloseQuiet(fd)
- // if we are running a non privileged container, be sure to umount some kernel paths so they are not
- // bind mounted inside the container at all.
- if !ctr.config.Privileged && !rootless.IsRootless() {
- type result struct {
- restoreDuration int64
- err error
+ // create a new mountns on the current thread
+ if err = unix.Unshare(unix.CLONE_NEWNS); err != nil {
+ return 0, err
}
- ch := make(chan result)
- go func() {
- runtime.LockOSThread()
- restoreDuration, err := func() (int64, error) {
- fd, err := os.Open(fmt.Sprintf("/proc/%d/task/%d/ns/mnt", os.Getpid(), unix.Gettid()))
- if err != nil {
- return 0, err
- }
- defer errorhandling.CloseQuiet(fd)
-
- // create a new mountns on the current thread
- if err = unix.Unshare(unix.CLONE_NEWNS); err != nil {
- return 0, err
- }
- defer func() {
- if err := unix.Setns(int(fd.Fd()), unix.CLONE_NEWNS); err != nil {
- logrus.Errorf("Unable to clone new namespace: %q", err)
- }
- }()
-
- // don't spread our mounts around. We are setting only /sys to be slave
- // so that the cleanup process is still able to umount the storage and the
- // changes are propagated to the host.
- err = unix.Mount("/sys", "/sys", "none", unix.MS_REC|unix.MS_SLAVE, "")
- if err != nil {
- return 0, fmt.Errorf("cannot make /sys slave: %w", err)
- }
-
- mounts, err := pmount.GetMounts()
- if err != nil {
- return 0, err
- }
- for _, m := range mounts {
- if !strings.HasPrefix(m.Mountpoint, "/sys/kernel") {
- continue
- }
- err = unix.Unmount(m.Mountpoint, 0)
- if err != nil && !os.IsNotExist(err) {
- return 0, fmt.Errorf("cannot unmount %s: %w", m.Mountpoint, err)
- }
- }
- return r.createOCIContainer(ctr, restoreOptions)
- }()
- ch <- result{
- restoreDuration: restoreDuration,
- err: err,
+ defer func() {
+ if err := unix.Setns(int(fd.Fd()), unix.CLONE_NEWNS); err != nil {
+ logrus.Errorf("Unable to clone new namespace: %q", err)
}
}()
- r := <-ch
- return r.restoreDuration, r.err
- }
- }
- return r.createOCIContainer(ctr, restoreOptions)
-}
-
-// UpdateContainerStatus retrieves the current status of the container from the
-// runtime. It updates the container's state but does not save it.
-// If useRuntime is false, we will not directly hit runc to see the container's
-// status, but will instead only check for the existence of the conmon exit file
-// and update state to stopped if it exists.
-func (r *ConmonOCIRuntime) UpdateContainerStatus(ctr *Container) error {
- runtimeDir, err := util.GetRuntimeDir()
- if err != nil {
- return err
- }
-
- // Store old state so we know if we were already stopped
- oldState := ctr.state.State
- state := new(spec.State)
-
- cmd := exec.Command(r.path, "state", ctr.ID())
- cmd.Env = append(cmd.Env, fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir))
-
- outPipe, err := cmd.StdoutPipe()
- if err != nil {
- return fmt.Errorf("getting stdout pipe: %w", err)
- }
- errPipe, err := cmd.StderrPipe()
- if err != nil {
- return fmt.Errorf("getting stderr pipe: %w", err)
- }
-
- if err := cmd.Start(); err != nil {
- out, err2 := ioutil.ReadAll(errPipe)
- if err2 != nil {
- return fmt.Errorf("error getting container %s state: %w", ctr.ID(), err)
- }
- if strings.Contains(string(out), "does not exist") || strings.Contains(string(out), "No such file") {
- if err := ctr.removeConmonFiles(); err != nil {
- logrus.Debugf("unable to remove conmon files for container %s", ctr.ID())
- }
- ctr.state.ExitCode = -1
- ctr.state.FinishedTime = time.Now()
- ctr.state.State = define.ContainerStateExited
- return ctr.runtime.state.AddContainerExitCode(ctr.ID(), ctr.state.ExitCode)
- }
- return fmt.Errorf("error getting container %s state. stderr/out: %s: %w", ctr.ID(), out, err)
- }
- defer func() {
- _ = cmd.Wait()
- }()
-
- if err := errPipe.Close(); err != nil {
- return err
- }
- out, err := ioutil.ReadAll(outPipe)
- if err != nil {
- return fmt.Errorf("error reading stdout: %s: %w", ctr.ID(), err)
- }
- if err := json.NewDecoder(bytes.NewBuffer(out)).Decode(state); err != nil {
- return fmt.Errorf("error decoding container status for container %s: %w", ctr.ID(), err)
- }
- ctr.state.PID = state.Pid
-
- switch state.Status {
- case "created":
- ctr.state.State = define.ContainerStateCreated
- case "paused":
- ctr.state.State = define.ContainerStatePaused
- case "running":
- ctr.state.State = define.ContainerStateRunning
- case "stopped":
- ctr.state.State = define.ContainerStateStopped
- default:
- return fmt.Errorf("unrecognized status returned by runtime for container %s: %s: %w",
- ctr.ID(), state.Status, define.ErrInternal)
- }
-
- // Only grab exit status if we were not already stopped
- // If we were, it should already be in the database
- if ctr.state.State == define.ContainerStateStopped && oldState != define.ContainerStateStopped {
- if _, err := ctr.Wait(context.Background()); err != nil {
- logrus.Errorf("Waiting for container %s to exit: %v", ctr.ID(), err)
- }
- return nil
- }
-
- // Handle ContainerStateStopping - keep it unless the container
- // transitioned to no longer running.
- if oldState == define.ContainerStateStopping && (ctr.state.State == define.ContainerStatePaused || ctr.state.State == define.ContainerStateRunning) {
- ctr.state.State = define.ContainerStateStopping
- }
-
- return nil
-}
-
-// StartContainer starts the given container.
-// Sets time the container was started, but does not save it.
-func (r *ConmonOCIRuntime) StartContainer(ctr *Container) error {
- // TODO: streams should probably *not* be our STDIN/OUT/ERR - redirect to buffers?
- runtimeDir, err := util.GetRuntimeDir()
- if err != nil {
- return err
- }
- env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
- if path, ok := os.LookupEnv("PATH"); ok {
- env = append(env, fmt.Sprintf("PATH=%s", path))
- }
- if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "start", ctr.ID())...); err != nil {
- return err
- }
-
- ctr.state.StartedTime = time.Now()
-
- return nil
-}
-
-// KillContainer sends the given signal to the given container.
-// If all is set, send to all PIDs in the container.
-// All is only supported if the container created cgroups.
-func (r *ConmonOCIRuntime) KillContainer(ctr *Container, signal uint, all bool) error {
- logrus.Debugf("Sending signal %d to container %s", signal, ctr.ID())
- runtimeDir, err := util.GetRuntimeDir()
- if err != nil {
- return err
- }
- env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
- var args []string
- args = append(args, r.runtimeFlags...)
- if all {
- args = append(args, "kill", "--all", ctr.ID(), fmt.Sprintf("%d", signal))
- } else {
- args = append(args, "kill", ctr.ID(), fmt.Sprintf("%d", signal))
- }
- if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, args...); err != nil {
- // Update container state - there's a chance we failed because
- // the container exited in the meantime.
- if err2 := r.UpdateContainerStatus(ctr); err2 != nil {
- logrus.Infof("Error updating status for container %s: %v", ctr.ID(), err2)
- }
- if ctr.ensureState(define.ContainerStateStopped, define.ContainerStateExited) {
- return define.ErrCtrStateInvalid
- }
- return fmt.Errorf("error sending signal to container %s: %w", ctr.ID(), err)
- }
-
- return nil
-}
-
-// StopContainer stops a container, first using its given stop signal (or
-// SIGTERM if no signal was specified), then using SIGKILL.
-// Timeout is given in seconds. If timeout is 0, the container will be
-// immediately kill with SIGKILL.
-// Does not set finished time for container, assumes you will run updateStatus
-// after to pull the exit code.
-func (r *ConmonOCIRuntime) StopContainer(ctr *Container, timeout uint, all bool) error {
- logrus.Debugf("Stopping container %s (PID %d)", ctr.ID(), ctr.state.PID)
-
- // Ping the container to see if it's alive
- // If it's not, it's already stopped, return
- err := unix.Kill(ctr.state.PID, 0)
- if err == unix.ESRCH {
- return nil
- }
-
- stopSignal := ctr.config.StopSignal
- if stopSignal == 0 {
- stopSignal = uint(syscall.SIGTERM)
- }
-
- if timeout > 0 {
- if err := r.KillContainer(ctr, stopSignal, all); err != nil {
- // Is the container gone?
- // If so, it probably died between the first check and
- // our sending the signal
- // The container is stopped, so exit cleanly
- err := unix.Kill(ctr.state.PID, 0)
- if err == unix.ESRCH {
- return nil
+ // don't spread our mounts around. We are setting only /sys to be slave
+ // so that the cleanup process is still able to umount the storage and the
+ // changes are propagated to the host.
+ err = unix.Mount("/sys", "/sys", "none", unix.MS_REC|unix.MS_SLAVE, "")
+ if err != nil {
+ return 0, fmt.Errorf("cannot make /sys slave: %w", err)
}
- return err
- }
-
- if err := waitContainerStop(ctr, time.Duration(timeout)*time.Second); err != nil {
- logrus.Debugf("Timed out stopping container %s with %s, resorting to SIGKILL: %v", ctr.ID(), unix.SignalName(syscall.Signal(stopSignal)), err)
- logrus.Warnf("StopSignal %s failed to stop container %s in %d seconds, resorting to SIGKILL", unix.SignalName(syscall.Signal(stopSignal)), ctr.Name(), timeout)
- } else {
- // No error, the container is dead
- return nil
- }
- }
-
- if err := r.KillContainer(ctr, 9, all); err != nil {
- // Again, check if the container is gone. If it is, exit cleanly.
- err := unix.Kill(ctr.state.PID, 0)
- if err == unix.ESRCH {
- return nil
- }
-
- return fmt.Errorf("error sending SIGKILL to container %s: %w", ctr.ID(), err)
- }
-
- // Give runtime a few seconds to make it happen
- if err := waitContainerStop(ctr, killContainerTimeout); err != nil {
- return err
- }
-
- return nil
-}
-
-// DeleteContainer deletes a container from the OCI runtime.
-func (r *ConmonOCIRuntime) DeleteContainer(ctr *Container) error {
- runtimeDir, err := util.GetRuntimeDir()
- if err != nil {
- return err
- }
- env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
- return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "delete", "--force", ctr.ID())...)
-}
-
-// PauseContainer pauses the given container.
-func (r *ConmonOCIRuntime) PauseContainer(ctr *Container) error {
- runtimeDir, err := util.GetRuntimeDir()
- if err != nil {
- return err
- }
- env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
- return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "pause", ctr.ID())...)
-}
-
-// UnpauseContainer unpauses the given container.
-func (r *ConmonOCIRuntime) UnpauseContainer(ctr *Container) error {
- runtimeDir, err := util.GetRuntimeDir()
- if err != nil {
- return err
- }
- env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
- return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "resume", ctr.ID())...)
-}
-
-// HTTPAttach performs an attach for the HTTP API.
-// The caller must handle closing the HTTP connection after this returns.
-// The cancel channel is not closed; it is up to the caller to do so after
-// this function returns.
-// If this is a container with a terminal, we will stream raw. If it is not, we
-// will stream with an 8-byte header to multiplex STDOUT and STDERR.
-// Returns any errors that occurred, and whether the connection was successfully
-// hijacked before that error occurred.
-func (r *ConmonOCIRuntime) HTTPAttach(ctr *Container, req *http.Request, w http.ResponseWriter, streams *HTTPAttachStreams, detachKeys *string, cancel <-chan bool, hijackDone chan<- bool, streamAttach, streamLogs bool) (deferredErr error) {
- isTerminal := false
- if ctr.config.Spec.Process != nil {
- isTerminal = ctr.config.Spec.Process.Terminal
- }
-
- if streams != nil {
- if !streams.Stdin && !streams.Stdout && !streams.Stderr {
- return fmt.Errorf("must specify at least one stream to attach to: %w", define.ErrInvalidArg)
- }
- }
-
- attachSock, err := r.AttachSocketPath(ctr)
- if err != nil {
- return err
- }
-
- var conn *net.UnixConn
- if streamAttach {
- newConn, err := openUnixSocket(attachSock)
- if err != nil {
- return fmt.Errorf("failed to connect to container's attach socket: %v: %w", attachSock, err)
- }
- conn = newConn
- defer func() {
- if err := conn.Close(); err != nil {
- logrus.Errorf("Unable to close container %s attach socket: %q", ctr.ID(), err)
+ mounts, err := pmount.GetMounts()
+ if err != nil {
+ return 0, err
}
- }()
-
- logrus.Debugf("Successfully connected to container %s attach socket %s", ctr.ID(), attachSock)
- }
-
- detachString := ctr.runtime.config.Engine.DetachKeys
- if detachKeys != nil {
- detachString = *detachKeys
- }
- detach, err := processDetachKeys(detachString)
- if err != nil {
- return err
- }
-
- attachStdout := true
- attachStderr := true
- attachStdin := true
- if streams != nil {
- attachStdout = streams.Stdout
- attachStderr = streams.Stderr
- attachStdin = streams.Stdin
- }
-
- logrus.Debugf("Going to hijack container %s attach connection", ctr.ID())
-
- // Alright, let's hijack.
- hijacker, ok := w.(http.Hijacker)
- if !ok {
- return fmt.Errorf("unable to hijack connection")
- }
-
- httpCon, httpBuf, err := hijacker.Hijack()
- if err != nil {
- return fmt.Errorf("error hijacking connection: %w", err)
- }
-
- hijackDone <- true
-
- writeHijackHeader(req, httpBuf)
-
- // Force a flush after the header is written.
- if err := httpBuf.Flush(); err != nil {
- return fmt.Errorf("error flushing HTTP hijack header: %w", err)
- }
-
- defer func() {
- hijackWriteErrorAndClose(deferredErr, ctr.ID(), isTerminal, httpCon, httpBuf)
- }()
-
- logrus.Debugf("Hijack for container %s attach session done, ready to stream", ctr.ID())
-
- // TODO: This is gross. Really, really gross.
- // I want to say we should read all the logs into an array before
- // calling this, in container_api.go, but that could take a lot of
- // memory...
- // On the whole, we need to figure out a better way of doing this,
- // though.
- logSize := 0
- if streamLogs {
- logrus.Debugf("Will stream logs for container %s attach session", ctr.ID())
-
- // Get all logs for the container
- logChan := make(chan *logs.LogLine)
- logOpts := new(logs.LogOptions)
- logOpts.Tail = -1
- logOpts.WaitGroup = new(sync.WaitGroup)
- errChan := make(chan error)
- go func() {
- var err error
- // In non-terminal mode we need to prepend with the
- // stream header.
- logrus.Debugf("Writing logs for container %s to HTTP attach", ctr.ID())
- for logLine := range logChan {
- if !isTerminal {
- device := logLine.Device
- var header []byte
- headerLen := uint32(len(logLine.Msg))
- logSize += len(logLine.Msg)
- switch strings.ToLower(device) {
- case "stdin":
- header = makeHTTPAttachHeader(0, headerLen)
- case "stdout":
- header = makeHTTPAttachHeader(1, headerLen)
- case "stderr":
- header = makeHTTPAttachHeader(2, headerLen)
- default:
- logrus.Errorf("Unknown device for log line: %s", device)
- header = makeHTTPAttachHeader(1, headerLen)
- }
- _, err = httpBuf.Write(header)
- if err != nil {
- break
- }
- }
- _, err = httpBuf.Write([]byte(logLine.Msg))
- if err != nil {
- break
- }
- if !logLine.Partial() {
- _, err = httpBuf.Write([]byte("\n"))
- if err != nil {
- break
- }
+ for _, m := range mounts {
+ if !strings.HasPrefix(m.Mountpoint, "/sys/kernel") {
+ continue
}
- err = httpBuf.Flush()
- if err != nil {
- break
+ err = unix.Unmount(m.Mountpoint, 0)
+ if err != nil && !os.IsNotExist(err) {
+ return 0, fmt.Errorf("cannot unmount %s: %w", m.Mountpoint, err)
}
}
- errChan <- err
- }()
- if err := ctr.ReadLog(context.Background(), logOpts, logChan, 0); err != nil {
- return err
- }
- go func() {
- logOpts.WaitGroup.Wait()
- close(logChan)
+ return r.createOCIContainer(ctr, restoreOptions)
}()
- logrus.Debugf("Done reading logs for container %s, %d bytes", ctr.ID(), logSize)
- if err := <-errChan; err != nil {
- return err
+ ch <- result{
+ restoreDuration: restoreDuration,
+ err: err,
}
- }
- if !streamAttach {
- logrus.Debugf("Done streaming logs for container %s attach, exiting as attach streaming not requested", ctr.ID())
- return nil
- }
-
- logrus.Debugf("Forwarding attach output for container %s", ctr.ID())
-
- stdoutChan := make(chan error)
- stdinChan := make(chan error)
-
- // Handle STDOUT/STDERR
- go func() {
- var err error
- if isTerminal {
- // Hack: return immediately if attachStdout not set to
- // emulate Docker.
- // Basically, when terminal is set, STDERR goes nowhere.
- // Everything does over STDOUT.
- // Therefore, if not attaching STDOUT - we'll never copy
- // anything from here.
- logrus.Debugf("Performing terminal HTTP attach for container %s", ctr.ID())
- if attachStdout {
- err = httpAttachTerminalCopy(conn, httpBuf, ctr.ID())
- }
- } else {
- logrus.Debugf("Performing non-terminal HTTP attach for container %s", ctr.ID())
- err = httpAttachNonTerminalCopy(conn, httpBuf, ctr.ID(), attachStdin, attachStdout, attachStderr)
- }
- stdoutChan <- err
- logrus.Debugf("STDOUT/ERR copy completed")
}()
- // Next, STDIN. Avoid entirely if attachStdin unset.
- if attachStdin {
- go func() {
- _, err := cutil.CopyDetachable(conn, httpBuf, detach)
- logrus.Debugf("STDIN copy completed")
- stdinChan <- err
- }()
- }
-
- for {
- select {
- case err := <-stdoutChan:
- if err != nil {
- return err
- }
-
- return nil
- case err := <-stdinChan:
- if err != nil {
- return err
- }
- // copy stdin is done, close it
- if connErr := conn.CloseWrite(); connErr != nil {
- logrus.Errorf("Unable to close conn: %v", connErr)
- }
- case <-cancel:
- return nil
- }
- }
-}
-
-// isRetryable returns whether the error was caused by a blocked syscall or the
-// specified operation on a non blocking file descriptor wasn't ready for completion.
-func isRetryable(err error) bool {
- var errno syscall.Errno
- if errors.As(err, &errno) {
- return errno == syscall.EINTR || errno == syscall.EAGAIN
- }
- return false
+ res := <-ch
+ return res.restoreDuration, res.err
}
-// openControlFile opens the terminal control file.
-func openControlFile(ctr *Container, parentDir string) (*os.File, error) {
- controlPath := filepath.Join(parentDir, "ctl")
- for i := 0; i < 600; i++ {
- controlFile, err := os.OpenFile(controlPath, unix.O_WRONLY|unix.O_NONBLOCK, 0)
- if err == nil {
- return controlFile, nil
- }
- if !isRetryable(err) {
- return nil, fmt.Errorf("could not open ctl file for terminal resize for container %s: %w", ctr.ID(), err)
- }
- time.Sleep(time.Second / 10)
- }
- return nil, fmt.Errorf("timeout waiting for %q", controlPath)
-}
-
-// AttachResize resizes the terminal used by the given container.
-func (r *ConmonOCIRuntime) AttachResize(ctr *Container, newSize resize.TerminalSize) error {
- controlFile, err := openControlFile(ctr, ctr.bundlePath())
- if err != nil {
- return err
- }
- defer controlFile.Close()
-
- logrus.Debugf("Received a resize event for container %s: %+v", ctr.ID(), newSize)
- if _, err = fmt.Fprintf(controlFile, "%d %d %d\n", 1, newSize.Height, newSize.Width); err != nil {
- return fmt.Errorf("failed to write to ctl file to resize terminal: %w", err)
- }
-
- return nil
-}
-
-// CheckpointContainer checkpoints the given container.
-func (r *ConmonOCIRuntime) CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) (int64, error) {
- // imagePath is used by CRIU to store the actual checkpoint files
- imagePath := ctr.CheckpointPath()
- if options.PreCheckPoint {
- imagePath = ctr.PreCheckPointPath()
- }
- // workPath will be used to store dump.log and stats-dump
- workPath := ctr.bundlePath()
- logrus.Debugf("Writing checkpoint to %s", imagePath)
- logrus.Debugf("Writing checkpoint logs to %s", workPath)
- logrus.Debugf("Pre-dump the container %t", options.PreCheckPoint)
- args := []string{}
- args = append(args, r.runtimeFlags...)
- args = append(args, "checkpoint")
- args = append(args, "--image-path")
- args = append(args, imagePath)
- args = append(args, "--work-path")
- args = append(args, workPath)
- if options.KeepRunning {
- args = append(args, "--leave-running")
- }
- if options.TCPEstablished {
- args = append(args, "--tcp-established")
- }
- if options.FileLocks {
- args = append(args, "--file-locks")
- }
- if !options.PreCheckPoint && options.KeepRunning {
- args = append(args, "--leave-running")
- }
- if options.PreCheckPoint {
- args = append(args, "--pre-dump")
- }
- if !options.PreCheckPoint && options.WithPrevious {
- args = append(
- args,
- "--parent-path",
- filepath.Join("..", preCheckpointDir),
- )
- }
-
- args = append(args, ctr.ID())
- logrus.Debugf("the args to checkpoint: %s %s", r.path, strings.Join(args, " "))
-
- runtimeDir, err := util.GetRuntimeDir()
- if err != nil {
- return 0, err
- }
- env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
- if path, ok := os.LookupEnv("PATH"); ok {
- env = append(env, fmt.Sprintf("PATH=%s", path))
- }
-
+// Run the closure with the container's socket label set
+func (r *ConmonOCIRuntime) withContainerSocketLabel(ctr *Container, closure func() error) error {
runtime.LockOSThread()
if err := label.SetSocketLabel(ctr.ProcessLabel()); err != nil {
- return 0, err
+ return err
}
-
- runtimeCheckpointStarted := time.Now()
- err = utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, args...)
+ err := closure()
// Ignore error returned from SetSocketLabel("") call,
// can't recover.
if labelErr := label.SetSocketLabel(""); labelErr == nil {
@@ -834,576 +97,7 @@ func (r *ConmonOCIRuntime) CheckpointContainer(ctr *Container, options Container
} else {
logrus.Errorf("Unable to reset socket label: %q", labelErr)
}
-
- runtimeCheckpointDuration := func() int64 {
- if options.PrintStats {
- return time.Since(runtimeCheckpointStarted).Microseconds()
- }
- return 0
- }()
-
- return runtimeCheckpointDuration, err
-}
-
-func (r *ConmonOCIRuntime) CheckConmonRunning(ctr *Container) (bool, error) {
- if ctr.state.ConmonPID == 0 {
- // If the container is running or paused, assume Conmon is
- // running. We didn't record Conmon PID on some old versions, so
- // that is likely what's going on...
- // Unusual enough that we should print a warning message though.
- if ctr.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) {
- logrus.Warnf("Conmon PID is not set, but container is running!")
- return true, nil
- }
- // Container's not running, so conmon PID being unset is
- // expected. Conmon is not running.
- return false, nil
- }
-
- // We have a conmon PID. Ping it with signal 0.
- if err := unix.Kill(ctr.state.ConmonPID, 0); err != nil {
- if err == unix.ESRCH {
- return false, nil
- }
- return false, fmt.Errorf("error pinging container %s conmon with signal 0: %w", ctr.ID(), err)
- }
- return true, nil
-}
-
-// SupportsCheckpoint checks if the OCI runtime supports checkpointing
-// containers.
-func (r *ConmonOCIRuntime) SupportsCheckpoint() bool {
- return crutils.CRRuntimeSupportsCheckpointRestore(r.path)
-}
-
-// SupportsJSONErrors checks if the OCI runtime supports JSON-formatted error
-// messages.
-func (r *ConmonOCIRuntime) SupportsJSONErrors() bool {
- return r.supportsJSON
-}
-
-// SupportsNoCgroups checks if the OCI runtime supports running containers
-// without cgroups (the --cgroup-manager=disabled flag).
-func (r *ConmonOCIRuntime) SupportsNoCgroups() bool {
- return r.supportsNoCgroups
-}
-
-// SupportsKVM checks if the OCI runtime supports running containers
-// without KVM separation
-func (r *ConmonOCIRuntime) SupportsKVM() bool {
- return r.supportsKVM
-}
-
-// AttachSocketPath is the path to a single container's attach socket.
-func (r *ConmonOCIRuntime) AttachSocketPath(ctr *Container) (string, error) {
- if ctr == nil {
- return "", fmt.Errorf("must provide a valid container to get attach socket path: %w", define.ErrInvalidArg)
- }
-
- return filepath.Join(ctr.bundlePath(), "attach"), nil
-}
-
-// ExitFilePath is the path to a container's exit file.
-func (r *ConmonOCIRuntime) ExitFilePath(ctr *Container) (string, error) {
- if ctr == nil {
- return "", fmt.Errorf("must provide a valid container to get exit file path: %w", define.ErrInvalidArg)
- }
- return filepath.Join(r.exitsDir, ctr.ID()), nil
-}
-
-// RuntimeInfo provides information on the runtime.
-func (r *ConmonOCIRuntime) RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntimeInfo, error) {
- runtimePackage := packageVersion(r.path)
- conmonPackage := packageVersion(r.conmonPath)
- runtimeVersion, err := r.getOCIRuntimeVersion()
- if err != nil {
- return nil, nil, fmt.Errorf("error getting version of OCI runtime %s: %w", r.name, err)
- }
- conmonVersion, err := r.getConmonVersion()
- if err != nil {
- return nil, nil, fmt.Errorf("error getting conmon version: %w", err)
- }
-
- conmon := define.ConmonInfo{
- Package: conmonPackage,
- Path: r.conmonPath,
- Version: conmonVersion,
- }
- ocirt := define.OCIRuntimeInfo{
- Name: r.name,
- Path: r.path,
- Package: runtimePackage,
- Version: runtimeVersion,
- }
- return &conmon, &ocirt, nil
-}
-
-// makeAccessible changes the path permission and each parent directory to have --x--x--x
-func makeAccessible(path string, uid, gid int) error {
- for ; path != "/"; path = filepath.Dir(path) {
- st, err := os.Stat(path)
- if err != nil {
- if os.IsNotExist(err) {
- return nil
- }
- return err
- }
- if int(st.Sys().(*syscall.Stat_t).Uid) == uid && int(st.Sys().(*syscall.Stat_t).Gid) == gid {
- continue
- }
- if st.Mode()&0111 != 0111 {
- if err := os.Chmod(path, st.Mode()|0111); err != nil {
- return err
- }
- }
- }
- return nil
-}
-
-// Wait for a container which has been sent a signal to stop
-func waitContainerStop(ctr *Container, timeout time.Duration) error {
- return waitPidStop(ctr.state.PID, timeout)
-}
-
-// Wait for a given PID to stop
-func waitPidStop(pid int, timeout time.Duration) error {
- done := make(chan struct{})
- chControl := make(chan struct{})
- go func() {
- for {
- select {
- case <-chControl:
- return
- default:
- if err := unix.Kill(pid, 0); err != nil {
- if err == unix.ESRCH {
- close(done)
- return
- }
- logrus.Errorf("Pinging PID %d with signal 0: %v", pid, err)
- }
- time.Sleep(100 * time.Millisecond)
- }
- }
- }()
- select {
- case <-done:
- return nil
- case <-time.After(timeout):
- close(chControl)
- return fmt.Errorf("given PIDs did not die within timeout")
- }
-}
-
-func (r *ConmonOCIRuntime) getLogTag(ctr *Container) (string, error) {
- logTag := ctr.LogTag()
- if logTag == "" {
- return "", nil
- }
- data, err := ctr.inspectLocked(false)
- if err != nil {
- // FIXME: this error should probably be returned
- return "", nil //nolint: nilerr
- }
- tmpl, err := template.New("container").Parse(logTag)
- if err != nil {
- return "", fmt.Errorf("template parsing error %s: %w", logTag, err)
- }
- var b bytes.Buffer
- err = tmpl.Execute(&b, data)
- if err != nil {
- return "", err
- }
- return b.String(), nil
-}
-
-// createOCIContainer generates this container's main conmon instance and prepares it for starting
-func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) {
- var stderrBuf bytes.Buffer
-
- runtimeDir, err := util.GetRuntimeDir()
- if err != nil {
- return 0, err
- }
-
- parentSyncPipe, childSyncPipe, err := newPipe()
- if err != nil {
- return 0, fmt.Errorf("error creating socket pair: %w", err)
- }
- defer errorhandling.CloseQuiet(parentSyncPipe)
-
- childStartPipe, parentStartPipe, err := newPipe()
- if err != nil {
- return 0, fmt.Errorf("error creating socket pair for start pipe: %w", err)
- }
-
- defer errorhandling.CloseQuiet(parentStartPipe)
-
- var ociLog string
- if logrus.GetLevel() != logrus.DebugLevel && r.supportsJSON {
- ociLog = filepath.Join(ctr.state.RunDir, "oci-log")
- }
-
- logTag, err := r.getLogTag(ctr)
- if err != nil {
- return 0, err
- }
-
- if ctr.config.CgroupsMode == cgroupSplit {
- if err := utils.MoveUnderCgroupSubtree("runtime"); err != nil {
- return 0, err
- }
- }
-
- pidfile := ctr.config.PidFile
- if pidfile == "" {
- pidfile = filepath.Join(ctr.state.RunDir, "pidfile")
- }
-
- args := r.sharedConmonArgs(ctr, ctr.ID(), ctr.bundlePath(), pidfile, ctr.LogPath(), r.exitsDir, ociLog, ctr.LogDriver(), logTag)
-
- if ctr.config.SdNotifyMode == define.SdNotifyModeContainer && ctr.config.SdNotifySocket != "" {
- args = append(args, fmt.Sprintf("--sdnotify-socket=%s", ctr.config.SdNotifySocket))
- }
-
- if ctr.config.Spec.Process.Terminal {
- args = append(args, "-t")
- } else if ctr.config.Stdin {
- args = append(args, "-i")
- }
-
- if ctr.config.Timeout > 0 {
- args = append(args, fmt.Sprintf("--timeout=%d", ctr.config.Timeout))
- }
-
- if !r.enableKeyring {
- args = append(args, "--no-new-keyring")
- }
- if ctr.config.ConmonPidFile != "" {
- args = append(args, "--conmon-pidfile", ctr.config.ConmonPidFile)
- }
-
- if r.noPivot {
- args = append(args, "--no-pivot")
- }
-
- exitCommand, err := specgenutil.CreateExitCommandArgs(ctr.runtime.storageConfig, ctr.runtime.config, logrus.IsLevelEnabled(logrus.DebugLevel), ctr.AutoRemove(), false)
- if err != nil {
- return 0, err
- }
- exitCommand = append(exitCommand, ctr.config.ID)
-
- args = append(args, "--exit-command", exitCommand[0])
- for _, arg := range exitCommand[1:] {
- args = append(args, []string{"--exit-command-arg", arg}...)
- }
-
- // Pass down the LISTEN_* environment (see #10443).
- preserveFDs := ctr.config.PreserveFDs
- if val := os.Getenv("LISTEN_FDS"); val != "" {
- if ctr.config.PreserveFDs > 0 {
- logrus.Warnf("Ignoring LISTEN_FDS to preserve custom user-specified FDs")
- } else {
- fds, err := strconv.Atoi(val)
- if err != nil {
- return 0, fmt.Errorf("converting LISTEN_FDS=%s: %w", val, err)
- }
- preserveFDs = uint(fds)
- }
- }
-
- if preserveFDs > 0 {
- args = append(args, formatRuntimeOpts("--preserve-fds", fmt.Sprintf("%d", preserveFDs))...)
- }
-
- if restoreOptions != nil {
- args = append(args, "--restore", ctr.CheckpointPath())
- if restoreOptions.TCPEstablished {
- args = append(args, "--runtime-opt", "--tcp-established")
- }
- if restoreOptions.FileLocks {
- args = append(args, "--runtime-opt", "--file-locks")
- }
- if restoreOptions.Pod != "" {
- mountLabel := ctr.config.MountLabel
- processLabel := ctr.config.ProcessLabel
- if mountLabel != "" {
- args = append(
- args,
- "--runtime-opt",
- fmt.Sprintf(
- "--lsm-mount-context=%s",
- mountLabel,
- ),
- )
- }
- if processLabel != "" {
- args = append(
- args,
- "--runtime-opt",
- fmt.Sprintf(
- "--lsm-profile=selinux:%s",
- processLabel,
- ),
- )
- }
- }
- }
-
- logrus.WithFields(logrus.Fields{
- "args": args,
- }).Debugf("running conmon: %s", r.conmonPath)
-
- cmd := exec.Command(r.conmonPath, args...)
- cmd.SysProcAttr = &syscall.SysProcAttr{
- Setpgid: true,
- }
- // TODO this is probably a really bad idea for some uses
- // Make this configurable
- cmd.Stdin = os.Stdin
- cmd.Stdout = os.Stdout
- cmd.Stderr = os.Stderr
- if ctr.config.Spec.Process.Terminal {
- cmd.Stderr = &stderrBuf
- }
-
- // 0, 1 and 2 are stdin, stdout and stderr
- conmonEnv := r.configureConmonEnv(runtimeDir)
-
- var filesToClose []*os.File
- if preserveFDs > 0 {
- for fd := 3; fd < int(3+preserveFDs); fd++ {
- f := os.NewFile(uintptr(fd), fmt.Sprintf("fd-%d", fd))
- filesToClose = append(filesToClose, f)
- cmd.ExtraFiles = append(cmd.ExtraFiles, f)
- }
- }
-
- cmd.Env = r.conmonEnv
- // we don't want to step on users fds they asked to preserve
- // Since 0-2 are used for stdio, start the fds we pass in at preserveFDs+3
- cmd.Env = append(cmd.Env, fmt.Sprintf("_OCI_SYNCPIPE=%d", preserveFDs+3), fmt.Sprintf("_OCI_STARTPIPE=%d", preserveFDs+4))
- cmd.Env = append(cmd.Env, conmonEnv...)
- cmd.ExtraFiles = append(cmd.ExtraFiles, childSyncPipe, childStartPipe)
-
- if r.reservePorts && !rootless.IsRootless() && !ctr.config.NetMode.IsSlirp4netns() {
- ports, err := bindPorts(ctr.convertPortMappings())
- if err != nil {
- return 0, err
- }
- filesToClose = append(filesToClose, ports...)
-
- // Leak the port we bound in the conmon process. These fd's won't be used
- // by the container and conmon will keep the ports busy so that another
- // process cannot use them.
- cmd.ExtraFiles = append(cmd.ExtraFiles, ports...)
- }
-
- if ctr.config.NetMode.IsSlirp4netns() || rootless.IsRootless() {
- if ctr.config.PostConfigureNetNS {
- havePortMapping := len(ctr.config.PortMappings) > 0
- if havePortMapping {
- ctr.rootlessPortSyncR, ctr.rootlessPortSyncW, err = os.Pipe()
- if err != nil {
- return 0, fmt.Errorf("failed to create rootless port sync pipe: %w", err)
- }
- }
- ctr.rootlessSlirpSyncR, ctr.rootlessSlirpSyncW, err = os.Pipe()
- if err != nil {
- return 0, fmt.Errorf("failed to create rootless network sync pipe: %w", err)
- }
- } else {
- if ctr.rootlessSlirpSyncR != nil {
- defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncR)
- }
- if ctr.rootlessSlirpSyncW != nil {
- defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncW)
- }
- }
- // Leak one end in conmon, the other one will be leaked into slirp4netns
- cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessSlirpSyncW)
-
- if ctr.rootlessPortSyncW != nil {
- defer errorhandling.CloseQuiet(ctr.rootlessPortSyncW)
- // Leak one end in conmon, the other one will be leaked into rootlessport
- cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessPortSyncW)
- }
- }
- var runtimeRestoreStarted time.Time
- if restoreOptions != nil {
- runtimeRestoreStarted = time.Now()
- }
- err = startCommand(cmd, ctr)
-
- // regardless of whether we errored or not, we no longer need the children pipes
- childSyncPipe.Close()
- childStartPipe.Close()
- if err != nil {
- return 0, err
- }
- if err := r.moveConmonToCgroupAndSignal(ctr, cmd, parentStartPipe); err != nil {
- return 0, err
- }
- /* Wait for initial setup and fork, and reap child */
- err = cmd.Wait()
- if err != nil {
- return 0, err
- }
-
- pid, err := readConmonPipeData(r.name, parentSyncPipe, ociLog)
- if err != nil {
- if err2 := r.DeleteContainer(ctr); err2 != nil {
- logrus.Errorf("Removing container %s from runtime after creation failed", ctr.ID())
- }
- return 0, err
- }
- ctr.state.PID = pid
-
- conmonPID, err := readConmonPidFile(ctr.config.ConmonPidFile)
- if err != nil {
- logrus.Warnf("Error reading conmon pid file for container %s: %v", ctr.ID(), err)
- } else if conmonPID > 0 {
- // conmon not having a pid file is a valid state, so don't set it if we don't have it
- logrus.Infof("Got Conmon PID as %d", conmonPID)
- ctr.state.ConmonPID = conmonPID
- }
-
- runtimeRestoreDuration := func() int64 {
- if restoreOptions != nil && restoreOptions.PrintStats {
- return time.Since(runtimeRestoreStarted).Microseconds()
- }
- return 0
- }()
-
- // These fds were passed down to the runtime. Close them
- // and not interfere
- for _, f := range filesToClose {
- errorhandling.CloseQuiet(f)
- }
-
- return runtimeRestoreDuration, nil
-}
-
-// configureConmonEnv gets the environment values to add to conmon's exec struct
-// TODO this may want to be less hardcoded/more configurable in the future
-func (r *ConmonOCIRuntime) configureConmonEnv(runtimeDir string) []string {
- var env []string
- for _, e := range os.Environ() {
- if strings.HasPrefix(e, "LC_") {
- env = append(env, e)
- }
- }
- conf, ok := os.LookupEnv("CONTAINERS_CONF")
- if ok {
- env = append(env, fmt.Sprintf("CONTAINERS_CONF=%s", conf))
- }
- env = append(env, fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir))
- env = append(env, fmt.Sprintf("_CONTAINERS_USERNS_CONFIGURED=%s", os.Getenv("_CONTAINERS_USERNS_CONFIGURED")))
- env = append(env, fmt.Sprintf("_CONTAINERS_ROOTLESS_UID=%s", os.Getenv("_CONTAINERS_ROOTLESS_UID")))
- home := homedir.Get()
- if home != "" {
- env = append(env, fmt.Sprintf("HOME=%s", home))
- }
-
- return env
-}
-
-// sharedConmonArgs takes common arguments for exec and create/restore and formats them for the conmon CLI
-func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, exitDir, ociLogPath, logDriver, logTag string) []string {
- // set the conmon API version to be able to use the correct sync struct keys
- args := []string{
- "--api-version", "1",
- "-c", ctr.ID(),
- "-u", cuuid,
- "-r", r.path,
- "-b", bundlePath,
- "-p", pidPath,
- "-n", ctr.Name(),
- "--exit-dir", exitDir,
- "--full-attach",
- }
- if len(r.runtimeFlags) > 0 {
- rFlags := []string{}
- for _, arg := range r.runtimeFlags {
- rFlags = append(rFlags, "--runtime-arg", arg)
- }
- args = append(args, rFlags...)
- }
-
- if ctr.CgroupManager() == config.SystemdCgroupsManager && !ctr.config.NoCgroups && ctr.config.CgroupsMode != cgroupSplit {
- args = append(args, "-s")
- }
-
- var logDriverArg string
- switch logDriver {
- case define.JournaldLogging:
- logDriverArg = define.JournaldLogging
- case define.NoLogging:
- logDriverArg = define.NoLogging
- case define.PassthroughLogging:
- logDriverArg = define.PassthroughLogging
- //lint:ignore ST1015 the default case has to be here
- default: //nolint:stylecheck,gocritic
- // No case here should happen except JSONLogging, but keep this here in case the options are extended
- logrus.Errorf("%s logging specified but not supported. Choosing k8s-file logging instead", ctr.LogDriver())
- fallthrough
- case "":
- // to get here, either a user would specify `--log-driver ""`, or this came from another place in libpod
- // since the former case is obscure, and the latter case isn't an error, let's silently fallthrough
- fallthrough
- case define.JSONLogging:
- fallthrough
- case define.KubernetesLogging:
- logDriverArg = fmt.Sprintf("%s:%s", define.KubernetesLogging, logPath)
- }
-
- args = append(args, "-l", logDriverArg)
- logLevel := logrus.GetLevel()
- args = append(args, "--log-level", logLevel.String())
-
- if logLevel == logrus.DebugLevel {
- logrus.Debugf("%s messages will be logged to syslog", r.conmonPath)
- args = append(args, "--syslog")
- }
-
- size := r.logSizeMax
- if ctr.config.LogSize > 0 {
- size = ctr.config.LogSize
- }
- if size > 0 {
- args = append(args, "--log-size-max", fmt.Sprintf("%v", size))
- }
-
- if ociLogPath != "" {
- args = append(args, "--runtime-arg", "--log-format=json", "--runtime-arg", "--log", fmt.Sprintf("--runtime-arg=%s", ociLogPath))
- }
- if logTag != "" {
- args = append(args, "--log-tag", logTag)
- }
- if ctr.config.NoCgroups {
- logrus.Debugf("Running with no Cgroups")
- args = append(args, "--runtime-arg", "--cgroup-manager", "--runtime-arg", "disabled")
- }
- return args
-}
-
-func startCommand(cmd *exec.Cmd, ctr *Container) error {
- // Make sure to unset the NOTIFY_SOCKET and reset it afterwards if needed.
- switch ctr.config.SdNotifyMode {
- case define.SdNotifyModeContainer, define.SdNotifyModeIgnore:
- if prev := os.Getenv("NOTIFY_SOCKET"); prev != "" {
- if err := os.Unsetenv("NOTIFY_SOCKET"); err != nil {
- logrus.Warnf("Error unsetting NOTIFY_SOCKET %v", err)
- }
- defer func() {
- if err := os.Setenv("NOTIFY_SOCKET", prev); err != nil {
- logrus.Errorf("Resetting NOTIFY_SOCKET=%s", prev)
- }
- }()
- }
- }
-
- return cmd.Start()
+ return err
}
// moveConmonToCgroupAndSignal gets a container's cgroupParent and moves the conmon process to that cgroup
@@ -1475,271 +169,6 @@ func (r *ConmonOCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec
return nil
}
-// newPipe creates a unix socket pair for communication.
-// Returns two files - first is parent, second is child.
-func newPipe() (*os.File, *os.File, error) {
- fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_SEQPACKET|unix.SOCK_CLOEXEC, 0)
- if err != nil {
- return nil, nil, err
- }
- return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil
-}
-
-// readConmonPidFile attempts to read conmon's pid from its pid file
-func readConmonPidFile(pidFile string) (int, error) {
- // Let's try reading the Conmon pid at the same time.
- if pidFile != "" {
- contents, err := ioutil.ReadFile(pidFile)
- if err != nil {
- return -1, err
- }
- // Convert it to an int
- conmonPID, err := strconv.Atoi(string(contents))
- if err != nil {
- return -1, err
- }
- return conmonPID, nil
- }
- return 0, nil
-}
-
-// readConmonPipeData attempts to read a syncInfo struct from the pipe
-func readConmonPipeData(runtimeName string, pipe *os.File, ociLog string) (int, error) {
- // syncInfo is used to return data from monitor process to daemon
- type syncInfo struct {
- Data int `json:"data"`
- Message string `json:"message,omitempty"`
- }
-
- // Wait to get container pid from conmon
- type syncStruct struct {
- si *syncInfo
- err error
- }
- ch := make(chan syncStruct)
- go func() {
- var si *syncInfo
- rdr := bufio.NewReader(pipe)
- b, err := rdr.ReadBytes('\n')
- // ignore EOF here, error is returned even when data was read
- // if it is no valid json unmarshal will fail below
- if err != nil && !errors.Is(err, io.EOF) {
- ch <- syncStruct{err: err}
- }
- if err := json.Unmarshal(b, &si); err != nil {
- ch <- syncStruct{err: fmt.Errorf("conmon bytes %q: %w", string(b), err)}
- return
- }
- ch <- syncStruct{si: si}
- }()
-
- data := -1 //nolint: wastedassign
- select {
- case ss := <-ch:
- if ss.err != nil {
- if ociLog != "" {
- ociLogData, err := ioutil.ReadFile(ociLog)
- if err == nil {
- var ociErr ociError
- if err := json.Unmarshal(ociLogData, &ociErr); err == nil {
- return -1, getOCIRuntimeError(runtimeName, ociErr.Msg)
- }
- }
- }
- return -1, fmt.Errorf("container create failed (no logs from conmon): %w", ss.err)
- }
- logrus.Debugf("Received: %d", ss.si.Data)
- if ss.si.Data < 0 {
- if ociLog != "" {
- ociLogData, err := ioutil.ReadFile(ociLog)
- if err == nil {
- var ociErr ociError
- if err := json.Unmarshal(ociLogData, &ociErr); err == nil {
- return ss.si.Data, getOCIRuntimeError(runtimeName, ociErr.Msg)
- }
- }
- }
- // If we failed to parse the JSON errors, then print the output as it is
- if ss.si.Message != "" {
- return ss.si.Data, getOCIRuntimeError(runtimeName, ss.si.Message)
- }
- return ss.si.Data, fmt.Errorf("container create failed: %w", define.ErrInternal)
- }
- data = ss.si.Data
- case <-time.After(define.ContainerCreateTimeout):
- return -1, fmt.Errorf("container creation timeout: %w", define.ErrInternal)
- }
- return data, nil
-}
-
-// writeConmonPipeData writes nonce data to a pipe
-func writeConmonPipeData(pipe *os.File) error {
- someData := []byte{0}
- _, err := pipe.Write(someData)
- return err
-}
-
-// formatRuntimeOpts prepends opts passed to it with --runtime-opt for passing to conmon
-func formatRuntimeOpts(opts ...string) []string {
- args := make([]string, 0, len(opts)*2)
- for _, o := range opts {
- args = append(args, "--runtime-opt", o)
- }
- return args
-}
-
-// getConmonVersion returns a string representation of the conmon version.
-func (r *ConmonOCIRuntime) getConmonVersion() (string, error) {
- output, err := utils.ExecCmd(r.conmonPath, "--version")
- if err != nil {
- return "", err
- }
- return strings.TrimSuffix(strings.Replace(output, "\n", ", ", 1), "\n"), nil
-}
-
-// getOCIRuntimeVersion returns a string representation of the OCI runtime's
-// version.
-func (r *ConmonOCIRuntime) getOCIRuntimeVersion() (string, error) {
- output, err := utils.ExecCmd(r.path, "--version")
- if err != nil {
- return "", err
- }
- return strings.TrimSuffix(output, "\n"), nil
-}
-
-// Copy data from container to HTTP connection, for terminal attach.
-// Container is the container's attach socket connection, http is a buffer for
-// the HTTP connection. cid is the ID of the container the attach session is
-// running for (used solely for error messages).
-func httpAttachTerminalCopy(container *net.UnixConn, http *bufio.ReadWriter, cid string) error {
- buf := make([]byte, bufferSize)
- for {
- numR, err := container.Read(buf)
- logrus.Debugf("Read fd(%d) %d/%d bytes for container %s", int(buf[0]), numR, len(buf), cid)
-
- if numR > 0 {
- switch buf[0] {
- case AttachPipeStdout:
- // Do nothing
- default:
- logrus.Errorf("Received unexpected attach type %+d, discarding %d bytes", buf[0], numR)
- continue
- }
-
- numW, err2 := http.Write(buf[1:numR])
- if err2 != nil {
- if err != nil {
- logrus.Errorf("Reading container %s STDOUT: %v", cid, err)
- }
- return err2
- } else if numW+1 != numR {
- return io.ErrShortWrite
- }
- // We need to force the buffer to write immediately, so
- // there isn't a delay on the terminal side.
- if err2 := http.Flush(); err2 != nil {
- if err != nil {
- logrus.Errorf("Reading container %s STDOUT: %v", cid, err)
- }
- return err2
- }
- }
- if err != nil {
- if err == io.EOF {
- return nil
- }
- return err
- }
- }
-}
-
-// Copy data from a container to an HTTP connection, for non-terminal attach.
-// Appends a header to multiplex input.
-func httpAttachNonTerminalCopy(container *net.UnixConn, http *bufio.ReadWriter, cid string, stdin, stdout, stderr bool) error {
- buf := make([]byte, bufferSize)
- for {
- numR, err := container.Read(buf)
- if numR > 0 {
- var headerBuf []byte
-
- // Subtract 1 because we strip the first byte (used for
- // multiplexing by Conmon).
- headerLen := uint32(numR - 1)
- // Practically speaking, we could make this buf[0] - 1,
- // but we need to validate it anyway.
- switch buf[0] {
- case AttachPipeStdin:
- headerBuf = makeHTTPAttachHeader(0, headerLen)
- if !stdin {
- continue
- }
- case AttachPipeStdout:
- if !stdout {
- continue
- }
- headerBuf = makeHTTPAttachHeader(1, headerLen)
- case AttachPipeStderr:
- if !stderr {
- continue
- }
- headerBuf = makeHTTPAttachHeader(2, headerLen)
- default:
- logrus.Errorf("Received unexpected attach type %+d, discarding %d bytes", buf[0], numR)
- continue
- }
-
- numH, err2 := http.Write(headerBuf)
- if err2 != nil {
- if err != nil {
- logrus.Errorf("Reading container %s standard streams: %v", cid, err)
- }
-
- return err2
- }
- // Hardcoding header length is pretty gross, but
- // fast. Should be safe, as this is a fixed part
- // of the protocol.
- if numH != 8 {
- if err != nil {
- logrus.Errorf("Reading container %s standard streams: %v", cid, err)
- }
-
- return io.ErrShortWrite
- }
-
- numW, err2 := http.Write(buf[1:numR])
- if err2 != nil {
- if err != nil {
- logrus.Errorf("Reading container %s standard streams: %v", cid, err)
- }
-
- return err2
- } else if numW+1 != numR {
- if err != nil {
- logrus.Errorf("Reading container %s standard streams: %v", cid, err)
- }
-
- return io.ErrShortWrite
- }
- // We need to force the buffer to write immediately, so
- // there isn't a delay on the terminal side.
- if err2 := http.Flush(); err2 != nil {
- if err != nil {
- logrus.Errorf("Reading container %s STDOUT: %v", cid, err)
- }
- return err2
- }
- }
- if err != nil {
- if err == io.EOF {
- return nil
- }
-
- return err
- }
- }
-}
-
// GetLimits converts spec resource limits to cgroup consumable limits
func GetLimits(resource *spec.LinuxResources) (runcconfig.Resources, error) {
if resource == nil {
diff --git a/libpod/oci_conmon_unsupported.go b/libpod/oci_conmon_unsupported.go
new file mode 100644
index 000000000..cc6d68e89
--- /dev/null
+++ b/libpod/oci_conmon_unsupported.go
@@ -0,0 +1,24 @@
+//go:build !linux && !freebsd
+// +build !linux,!freebsd
+
+package libpod
+
+import (
+ "errors"
+
+ "github.com/containers/common/pkg/config"
+ "github.com/containers/common/pkg/resize"
+)
+
+// Make a new Conmon-based OCI runtime with the given options.
+// Conmon will wrap the given OCI runtime, which can be `runc`, `crun`, or
+// any runtime with a runc-compatible CLI.
+// The first path that points to a valid executable will be used.
+// Deliberately private. Someone should not be able to construct this outside of
+// libpod.
+func newConmonOCIRuntime(name string, paths []string, conmonPath string, runtimeFlags []string, runtimeCfg *config.Config) (OCIRuntime, error) {
+ return nil, errors.New("newConmonOCIRuntime not supported on this platform")
+}
+
+func registerResizeFunc(r <-chan resize.TerminalSize, bundlePath string) {
+}
diff --git a/libpod/options.go b/libpod/options.go
index 43ed1ff78..d31741094 100644
--- a/libpod/options.go
+++ b/libpod/options.go
@@ -1695,14 +1695,22 @@ func withSetAnon() VolumeCreateOption {
}
}
-// WithVolumeDriverTimeout sets the volume creation timeout period
-func WithVolumeDriverTimeout(timeout int) VolumeCreateOption {
+// WithVolumeDriverTimeout sets the volume creation timeout period.
+// Only usable if a non-local volume driver is in use.
+func WithVolumeDriverTimeout(timeout uint) VolumeCreateOption {
return func(volume *Volume) error {
if volume.valid {
return define.ErrVolumeFinalized
}
- volume.config.Timeout = timeout
+ if volume.config.Driver == "" || volume.config.Driver == define.VolumeDriverLocal {
+ return fmt.Errorf("Volume driver timeout can only be used with non-local volume drivers: %w", define.ErrInvalidArg)
+ }
+
+ tm := timeout
+
+ volume.config.Timeout = &tm
+
return nil
}
}
diff --git a/libpod/plugin/volume_api.go b/libpod/plugin/volume_api.go
index 0a5eaae53..b13578388 100644
--- a/libpod/plugin/volume_api.go
+++ b/libpod/plugin/volume_api.go
@@ -3,6 +3,7 @@ package plugin
import (
"bytes"
"context"
+ "errors"
"fmt"
"io/ioutil"
"net"
@@ -13,8 +14,7 @@ import (
"sync"
"time"
- "errors"
-
+ "github.com/containers/common/pkg/config"
"github.com/containers/podman/v4/libpod/define"
"github.com/docker/go-plugins-helpers/sdk"
"github.com/docker/go-plugins-helpers/volume"
@@ -40,7 +40,6 @@ var (
)
const (
- defaultTimeout = 5 * time.Second
volumePluginType = "VolumeDriver"
)
@@ -129,7 +128,7 @@ func validatePlugin(newPlugin *VolumePlugin) error {
// GetVolumePlugin gets a single volume plugin, with the given name, at the
// given path.
-func GetVolumePlugin(name string, path string, timeout int) (*VolumePlugin, error) {
+func GetVolumePlugin(name string, path string, timeout *uint, cfg *config.Config) (*VolumePlugin, error) {
pluginsLock.Lock()
defer pluginsLock.Unlock()
@@ -152,13 +151,11 @@ func GetVolumePlugin(name string, path string, timeout int) (*VolumePlugin, erro
// Need an HTTP client to force a Unix connection.
// And since we can reuse it, might as well cache it.
client := new(http.Client)
- client.Timeout = defaultTimeout
- // if the user specified a non-zero timeout, use their value. Else, keep the default.
- if timeout != 0 {
- if time.Duration(timeout)*time.Second < defaultTimeout {
- logrus.Warnf("the default timeout for volume creation is %d seconds, setting a time less than that may break this feature.", defaultTimeout)
- }
- client.Timeout = time.Duration(timeout) * time.Second
+ client.Timeout = 5 * time.Second
+ if timeout != nil {
+ client.Timeout = time.Duration(*timeout) * time.Second
+ } else if cfg != nil {
+ client.Timeout = time.Duration(cfg.Engine.VolumePluginTimeout) * time.Second
}
// This bit borrowed from pkg/bindings/connection.go
client.Transport = &http.Transport{
diff --git a/libpod/pod_top_unsupported.go b/libpod/pod_top_unsupported.go
new file mode 100644
index 000000000..92323043a
--- /dev/null
+++ b/libpod/pod_top_unsupported.go
@@ -0,0 +1,20 @@
+//go:build !linux
+// +build !linux
+
+package libpod
+
+import (
+ "errors"
+)
+
+// GetPodPidInformation returns process-related data of all processes in
+// the pod. The output data can be controlled via the `descriptors`
+// argument which expects format descriptors and supports all AIXformat
+// descriptors of ps (1) plus some additional ones to for instance inspect the
+// set of effective capabilities. Each element in the returned string slice
+// is a tab-separated string.
+//
+// For more details, please refer to github.com/containers/psgo.
+func (p *Pod) GetPodPidInformation(descriptors []string) ([]string, error) {
+ return nil, errors.New("not implemented (*Pod) GetPodPidInformation")
+}
diff --git a/libpod/runtime.go b/libpod/runtime.go
index ea4b34954..9b97fd724 100644
--- a/libpod/runtime.go
+++ b/libpod/runtime.go
@@ -2,15 +2,11 @@ package libpod
import (
"bufio"
- "bytes"
"context"
"errors"
"fmt"
"os"
- "os/exec"
"path/filepath"
- "regexp"
- "strconv"
"strings"
"sync"
"syscall"
@@ -44,17 +40,6 @@ import (
"github.com/sirupsen/logrus"
)
-const (
- // conmonMinMajorVersion is the major version required for conmon.
- conmonMinMajorVersion = 2
-
- // conmonMinMinorVersion is the minor version required for conmon.
- conmonMinMinorVersion = 0
-
- // conmonMinPatchVersion is the sub-minor version required for conmon.
- conmonMinPatchVersion = 24
-)
-
// A RuntimeOption is a functional option which alters the Runtime created by
// NewRuntime
type RuntimeOption func(*Runtime) error
@@ -308,7 +293,7 @@ func getLockManager(runtime *Runtime) (lock.Manager, error) {
// Sets up containers/storage, state store, OCI runtime
func makeRuntime(runtime *Runtime) (retErr error) {
// Find a working conmon binary
- cPath, err := findConmon(runtime.config.Engine.ConmonPath)
+ cPath, err := runtime.config.FindConmon()
if err != nil {
return err
}
@@ -670,102 +655,6 @@ func makeRuntime(runtime *Runtime) (retErr error) {
return nil
}
-// findConmon iterates over conmonPaths and returns the path
-// to the first conmon binary with a new enough version. If none is found,
-// we try to do a path lookup of "conmon".
-func findConmon(conmonPaths []string) (string, error) {
- foundOutdatedConmon := false
- for _, path := range conmonPaths {
- stat, err := os.Stat(path)
- if err != nil {
- continue
- }
- if stat.IsDir() {
- continue
- }
- if err := probeConmon(path); err != nil {
- logrus.Warnf("Conmon at %s invalid: %v", path, err)
- foundOutdatedConmon = true
- continue
- }
- logrus.Debugf("Using conmon: %q", path)
- return path, nil
- }
-
- // Search the $PATH as last fallback
- if path, err := exec.LookPath("conmon"); err == nil {
- if err := probeConmon(path); err != nil {
- logrus.Warnf("Conmon at %s is invalid: %v", path, err)
- foundOutdatedConmon = true
- } else {
- logrus.Debugf("Using conmon from $PATH: %q", path)
- return path, nil
- }
- }
-
- if foundOutdatedConmon {
- return "", fmt.Errorf(
- "please update to v%d.%d.%d or later: %w",
- conmonMinMajorVersion, conmonMinMinorVersion, conmonMinPatchVersion, define.ErrConmonOutdated)
- }
-
- return "", fmt.Errorf(
- "could not find a working conmon binary (configured options: %v): %w",
- conmonPaths, define.ErrInvalidArg)
-}
-
-// probeConmon calls conmon --version and verifies it is a new enough version for
-// the runtime expectations the container engine currently has.
-func probeConmon(conmonBinary string) error {
- cmd := exec.Command(conmonBinary, "--version")
- var out bytes.Buffer
- cmd.Stdout = &out
- err := cmd.Run()
- if err != nil {
- return err
- }
- r := regexp.MustCompile(`^conmon version (?P<Major>\d+).(?P<Minor>\d+).(?P<Patch>\d+)`)
-
- matches := r.FindStringSubmatch(out.String())
- if len(matches) != 4 {
- return fmt.Errorf("%v: %w", define.ErrConmonVersionFormat, err)
- }
- major, err := strconv.Atoi(matches[1])
- if err != nil {
- return fmt.Errorf("%v: %w", define.ErrConmonVersionFormat, err)
- }
- if major < conmonMinMajorVersion {
- return define.ErrConmonOutdated
- }
- if major > conmonMinMajorVersion {
- return nil
- }
-
- minor, err := strconv.Atoi(matches[2])
- if err != nil {
- return fmt.Errorf("%v: %w", define.ErrConmonVersionFormat, err)
- }
- if minor < conmonMinMinorVersion {
- return define.ErrConmonOutdated
- }
- if minor > conmonMinMinorVersion {
- return nil
- }
-
- patch, err := strconv.Atoi(matches[3])
- if err != nil {
- return fmt.Errorf("%v: %w", define.ErrConmonVersionFormat, err)
- }
- if patch < conmonMinPatchVersion {
- return define.ErrConmonOutdated
- }
- if patch > conmonMinPatchVersion {
- return nil
- }
-
- return nil
-}
-
// TmpDir gets the current Libpod temporary files directory.
func (r *Runtime) TmpDir() (string, error) {
if !r.valid {
@@ -1208,7 +1097,7 @@ func (r *Runtime) getVolumePlugin(volConfig *VolumeConfig) (*plugin.VolumePlugin
return nil, fmt.Errorf("no volume plugin with name %s available: %w", name, define.ErrMissingPlugin)
}
- return plugin.GetVolumePlugin(name, pluginPath, timeout)
+ return plugin.GetVolumePlugin(name, pluginPath, timeout, r.config)
}
// GetSecretsStorageDir returns the directory that the secrets manager should take
diff --git a/libpod/runtime_migrate_unsupported.go b/libpod/runtime_migrate_unsupported.go
new file mode 100644
index 000000000..77c2737a9
--- /dev/null
+++ b/libpod/runtime_migrate_unsupported.go
@@ -0,0 +1,16 @@
+//go:build !linux
+// +build !linux
+
+package libpod
+
+import (
+ "errors"
+)
+
+func (r *Runtime) stopPauseProcess() error {
+ return errors.New("not implemented (*Runtime) stopPauseProcess")
+}
+
+func (r *Runtime) migrate() error {
+ return errors.New("not implemented (*Runtime) migrate")
+}
diff --git a/libpod/runtime_pod_unsupported.go b/libpod/runtime_pod_unsupported.go
new file mode 100644
index 000000000..0c7ff8655
--- /dev/null
+++ b/libpod/runtime_pod_unsupported.go
@@ -0,0 +1,30 @@
+//go:build !linux
+// +build !linux
+
+package libpod
+
+import (
+ "context"
+ "errors"
+
+ "github.com/containers/podman/v4/pkg/specgen"
+)
+
+// NewPod makes a new, empty pod
+func (r *Runtime) NewPod(ctx context.Context, p specgen.PodSpecGenerator, options ...PodCreateOption) (_ *Pod, deferredErr error) {
+ return nil, errors.New("not implemented (*Runtime) NewPod")
+}
+
+// AddInfra adds the created infra container to the pod state
+func (r *Runtime) AddInfra(ctx context.Context, pod *Pod, infraCtr *Container) (*Pod, error) {
+ return nil, errors.New("not implemented (*Runtime) AddInfra")
+}
+
+// SavePod is a helper function to save the pod state from outside of libpod
+func (r *Runtime) SavePod(pod *Pod) error {
+ return errors.New("not implemented (*Runtime) SavePod")
+}
+
+func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool, timeout *uint) error {
+ return errors.New("not implemented (*Runtime) removePod")
+}
diff --git a/libpod/runtime_volume_linux.go b/libpod/runtime_volume_linux.go
index 1f354e41b..65f2a1005 100644
--- a/libpod/runtime_volume_linux.go
+++ b/libpod/runtime_volume_linux.go
@@ -184,7 +184,7 @@ func (r *Runtime) UpdateVolumePlugins(ctx context.Context) *define.VolumeReload
)
for driverName, socket := range r.config.Engine.VolumePlugins {
- driver, err := volplugin.GetVolumePlugin(driverName, socket, 0)
+ driver, err := volplugin.GetVolumePlugin(driverName, socket, nil, r.config)
if err != nil {
errs = append(errs, err)
continue
diff --git a/libpod/runtime_volume_unsupported.go b/libpod/runtime_volume_unsupported.go
new file mode 100644
index 000000000..c2816b817
--- /dev/null
+++ b/libpod/runtime_volume_unsupported.go
@@ -0,0 +1,42 @@
+//go:build !linux
+// +build !linux
+
+package libpod
+
+import (
+ "context"
+ "errors"
+
+ "github.com/containers/podman/v4/libpod/define"
+)
+
+// NewVolume creates a new empty volume
+func (r *Runtime) NewVolume(ctx context.Context, options ...VolumeCreateOption) (*Volume, error) {
+ if !r.valid {
+ return nil, define.ErrRuntimeStopped
+ }
+ return r.newVolume(false, options...)
+}
+
+// NewVolume creates a new empty volume
+func (r *Runtime) newVolume(noCreatePluginVolume bool, options ...VolumeCreateOption) (*Volume, error) {
+ return nil, errors.New("not implemented (*Runtime) newVolume")
+}
+
+// UpdateVolumePlugins reads all volumes from all configured volume plugins and
+// imports them into the libpod db. It also checks if existing libpod volumes
+// are removed in the plugin, in this case we try to remove it from libpod.
+// On errors we continue and try to do as much as possible. all errors are
+// returned as array in the returned struct.
+// This function has many race conditions, it is best effort but cannot guarantee
+// a perfect state since plugins can be modified from the outside at any time.
+func (r *Runtime) UpdateVolumePlugins(ctx context.Context) *define.VolumeReload {
+ return nil
+}
+
+// removeVolume removes the specified volume from state as well tears down its mountpoint and storage.
+// ignoreVolumePlugin is used to only remove the volume from the db and not the plugin,
+// this is required when the volume was already removed from the plugin, i.e. in UpdateVolumePlugins().
+func (r *Runtime) removeVolume(ctx context.Context, v *Volume, force bool, timeout *uint, ignoreVolumePlugin bool) error {
+ return errors.New("not implemented (*Runtime) removeVolume")
+}
diff --git a/libpod/stats_unsupported.go b/libpod/stats_unsupported.go
new file mode 100644
index 000000000..b23333c2e
--- /dev/null
+++ b/libpod/stats_unsupported.go
@@ -0,0 +1,17 @@
+//go:build !linux
+// +build !linux
+
+package libpod
+
+import (
+ "errors"
+
+ "github.com/containers/podman/v4/libpod/define"
+)
+
+// GetContainerStats gets the running stats for a given container.
+// The previousStats is used to correctly calculate cpu percentages. You
+// should pass nil if there is no previous stat for this container.
+func (c *Container) GetContainerStats(previousStats *define.ContainerStats) (*define.ContainerStats, error) {
+ return nil, errors.New("not implemented (*Container) GetContainerStats")
+}
diff --git a/libpod/util_unsupported.go b/libpod/util_unsupported.go
new file mode 100644
index 000000000..d2ec3ae7b
--- /dev/null
+++ b/libpod/util_unsupported.go
@@ -0,0 +1,27 @@
+//go:build !linux
+// +build !linux
+
+package libpod
+
+import (
+ "errors"
+
+ spec "github.com/opencontainers/runtime-spec/specs-go"
+)
+
+// systemdSliceFromPath makes a new systemd slice under the given parent with
+// the given name.
+// The parent must be a slice. The name must NOT include ".slice"
+func systemdSliceFromPath(parent, name string, resources *spec.LinuxResources) (string, error) {
+ return "", errors.New("not implemented systemdSliceFromPath")
+}
+
+// Unmount umounts a target directory
+func Unmount(mount string) {
+}
+
+// LabelVolumePath takes a mount path for a volume and gives it an
+// selinux label of either shared or not
+func LabelVolumePath(path string) error {
+ return errors.New("not implemented LabelVolumePath")
+}
diff --git a/libpod/volume.go b/libpod/volume.go
index 2e8cd77a5..a054e4032 100644
--- a/libpod/volume.go
+++ b/libpod/volume.go
@@ -56,7 +56,7 @@ type VolumeConfig struct {
// quota tracking.
DisableQuota bool `json:"disableQuota,omitempty"`
// Timeout allows users to override the default driver timeout of 5 seconds
- Timeout int
+ Timeout *uint `json:"timeout,omitempty"`
}
// VolumeState holds the volume's mutable state.
diff --git a/libpod/volume_inspect.go b/libpod/volume_inspect.go
index dd2f3fd01..c3872bca7 100644
--- a/libpod/volume_inspect.go
+++ b/libpod/volume_inspect.go
@@ -64,7 +64,12 @@ func (v *Volume) Inspect() (*define.InspectVolumeData, error) {
data.MountCount = v.state.MountCount
data.NeedsCopyUp = v.state.NeedsCopyUp
data.NeedsChown = v.state.NeedsChown
- data.Timeout = v.config.Timeout
+
+ if v.config.Timeout != nil {
+ data.Timeout = *v.config.Timeout
+ } else if v.UsesVolumeDriver() {
+ data.Timeout = v.runtime.config.Engine.VolumePluginTimeout
+ }
return data, nil
}
diff --git a/libpod/volume_internal_unsupported.go b/libpod/volume_internal_unsupported.go
new file mode 100644
index 000000000..50515e692
--- /dev/null
+++ b/libpod/volume_internal_unsupported.go
@@ -0,0 +1,32 @@
+//go:build !linux
+// +build !linux
+
+package libpod
+
+import (
+ "errors"
+)
+
+// mount mounts the volume if necessary.
+// A mount is necessary if a volume has any options set.
+// If a mount is necessary, v.state.MountCount will be incremented.
+// If it was 0 when the increment occurred, the volume will be mounted on the
+// host. Otherwise, we assume it is already mounted.
+// Must be done while the volume is locked.
+// Is a no-op on volumes that do not require a mount (as defined by
+// volumeNeedsMount()).
+func (v *Volume) mount() error {
+ return errors.New("not implemented (*Volume) mount")
+}
+
+// unmount unmounts the volume if necessary.
+// Unmounting a volume that is not mounted is a no-op.
+// Unmounting a volume that does not require a mount is a no-op.
+// The volume must be locked for this to occur.
+// The mount counter will be decremented if non-zero. If the counter reaches 0,
+// the volume will really be unmounted, as no further containers are using the
+// volume.
+// If force is set, the volume will be unmounted regardless of mount counter.
+func (v *Volume) unmount(force bool) error {
+ return errors.New("not implemented (*Volume) unmount")
+}