summaryrefslogtreecommitdiff
path: root/libpod
diff options
context:
space:
mode:
Diffstat (limited to 'libpod')
-rw-r--r--libpod/boltdb_state.go2
-rw-r--r--libpod/container.go17
-rw-r--r--libpod/container_api.go10
-rw-r--r--libpod/container_config.go3
-rw-r--r--libpod/container_freebsd.go10
-rw-r--r--libpod/container_inspect.go2
-rw-r--r--libpod/container_internal.go18
-rw-r--r--libpod/container_internal_common.go2699
-rw-r--r--libpod/container_internal_freebsd.go285
-rw-r--r--libpod/container_internal_linux.go3035
-rw-r--r--libpod/container_internal_unsupported.go12
-rw-r--r--libpod/container_linux.go15
-rw-r--r--libpod/container_validate.go4
-rw-r--r--libpod/define/config.go2
-rw-r--r--libpod/define/container_inspect.go2
-rw-r--r--libpod/define/errors.go3
-rw-r--r--libpod/define/exec_codes.go4
-rw-r--r--libpod/define/healthchecks.go74
-rw-r--r--libpod/define/mount.go2
-rw-r--r--libpod/define/mount_freebsd.go8
-rw-r--r--libpod/define/mount_linux.go8
-rw-r--r--libpod/define/mount_unsupported.go8
-rw-r--r--libpod/events.go12
-rw-r--r--libpod/healthcheck.go41
-rw-r--r--libpod/kube.go39
-rw-r--r--libpod/networking_linux.go5
-rw-r--r--libpod/networking_unsupported.go5
-rw-r--r--libpod/oci.go4
-rw-r--r--libpod/oci_conmon_attach_common.go6
-rw-r--r--libpod/oci_conmon_common.go73
-rw-r--r--libpod/oci_conmon_exec_common.go34
-rw-r--r--libpod/oci_conmon_exec_freebsd.go10
-rw-r--r--libpod/oci_conmon_exec_linux.go39
-rw-r--r--libpod/oci_conmon_freebsd.go5
-rw-r--r--libpod/oci_missing.go6
-rw-r--r--libpod/options.go18
-rw-r--r--libpod/runtime.go8
-rw-r--r--libpod/runtime_ctr.go30
-rw-r--r--libpod/runtime_ctr_freebsd.go5
-rw-r--r--libpod/runtime_ctr_linux.go5
-rw-r--r--libpod/runtime_test.go28
41 files changed, 3680 insertions, 2916 deletions
diff --git a/libpod/boltdb_state.go b/libpod/boltdb_state.go
index 81f11410b..e5a7e20fc 100644
--- a/libpod/boltdb_state.go
+++ b/libpod/boltdb_state.go
@@ -1278,7 +1278,7 @@ func (s *BoltState) NetworkConnect(ctr *Container, network string, opts types.Pe
}
netConnected := ctrNetworksBkt.Get([]byte(network))
if netConnected != nil {
- return fmt.Errorf("container %s is already connected to network %q: %w", ctr.ID(), network, define.ErrNetworkExists)
+ return fmt.Errorf("container %s is already connected to network %q: %w", ctr.ID(), network, define.ErrNetworkConnected)
}
// Add the network
diff --git a/libpod/container.go b/libpod/container.go
index 6c05b1084..1891b124f 100644
--- a/libpod/container.go
+++ b/libpod/container.go
@@ -237,6 +237,9 @@ type ContainerNamedVolume struct {
Dest string `json:"dest"`
// Options are fstab style mount options
Options []string `json:"options,omitempty"`
+ // IsAnonymous sets the named volume as anonymous even if it has a name
+ // This is used for emptyDir volumes from a kube yaml
+ IsAnonymous bool `json:"setAnonymous,omitempty"`
}
// ContainerOverlayVolume is a overlay volume that will be mounted into the
@@ -1130,20 +1133,6 @@ func (c *Container) NetworkDisabled() (bool, error) {
return networkDisabled(c)
}
-func networkDisabled(c *Container) (bool, error) {
- if c.config.CreateNetNS {
- return false, nil
- }
- if !c.config.PostConfigureNetNS {
- for _, ns := range c.config.Spec.Linux.Namespaces {
- if ns.Type == spec.NetworkNamespace {
- return ns.Path == "", nil
- }
- }
- }
- return false, nil
-}
-
func (c *Container) HostNetwork() bool {
if c.config.CreateNetNS || c.config.NetNsCtr != "" {
return false
diff --git a/libpod/container_api.go b/libpod/container_api.go
index 2ff4bfe08..f88e38ce1 100644
--- a/libpod/container_api.go
+++ b/libpod/container_api.go
@@ -16,6 +16,7 @@ import (
"github.com/containers/podman/v4/libpod/events"
"github.com/containers/podman/v4/pkg/signal"
"github.com/containers/storage/pkg/archive"
+ spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
)
@@ -98,6 +99,15 @@ func (c *Container) Start(ctx context.Context, recursive bool) error {
return c.start()
}
+// Update updates the given container.
+// only the cgroup config can be updated and therefore only a linux resource spec is passed.
+func (c *Container) Update(res *spec.LinuxResources) error {
+ if err := c.syncContainer(); err != nil {
+ return err
+ }
+ return c.update(res)
+}
+
// StartAndAttach starts a container and attaches to it.
// This acts as a combination of the Start and Attach APIs, ensuring proper
// ordering of the two such that no output from the container is lost (e.g. the
diff --git a/libpod/container_config.go b/libpod/container_config.go
index bd9816651..f3585d22c 100644
--- a/libpod/container_config.go
+++ b/libpod/container_config.go
@@ -7,6 +7,7 @@ import (
"github.com/containers/common/libnetwork/types"
"github.com/containers/common/pkg/secrets"
"github.com/containers/image/v5/manifest"
+ "github.com/containers/podman/v4/libpod/define"
"github.com/containers/podman/v4/pkg/namespaces"
"github.com/containers/podman/v4/pkg/specgen"
"github.com/containers/storage"
@@ -392,6 +393,8 @@ type ContainerMiscConfig struct {
Systemd *bool `json:"systemd,omitempty"`
// HealthCheckConfig has the health check command and related timings
HealthCheckConfig *manifest.Schema2HealthConfig `json:"healthcheck"`
+ // HealthCheckOnFailureAction defines an action to take once the container turns unhealthy.
+ HealthCheckOnFailureAction define.HealthCheckOnFailureAction `json:"healthcheck_on_failure_action"`
// PreserveFDs is a number of additional file descriptors (in addition
// to 0, 1, 2) that will be passed to the executed process. The total FDs
// passed will be 3 + PreserveFDs.
diff --git a/libpod/container_freebsd.go b/libpod/container_freebsd.go
index f9fbc4daa..7292ba37a 100644
--- a/libpod/container_freebsd.go
+++ b/libpod/container_freebsd.go
@@ -10,3 +10,13 @@ type containerPlatformState struct {
// namespace.
NetworkJail string `json:"-"`
}
+
+func networkDisabled(c *Container) (bool, error) {
+ if c.config.CreateNetNS {
+ return false, nil
+ }
+ if !c.config.PostConfigureNetNS {
+ return c.state.NetworkJail == "", nil
+ }
+ return false, nil
+}
diff --git a/libpod/container_inspect.go b/libpod/container_inspect.go
index 5e2ab2818..ad8bae286 100644
--- a/libpod/container_inspect.go
+++ b/libpod/container_inspect.go
@@ -390,6 +390,8 @@ func (c *Container) generateInspectContainerConfig(spec *spec.Spec) *define.Insp
// leak.
ctrConfig.Healthcheck = c.config.HealthCheckConfig
+ ctrConfig.HealthcheckOnFailureAction = c.config.HealthCheckOnFailureAction.String()
+
ctrConfig.CreateCommand = c.config.CreateCommand
ctrConfig.Timezone = c.config.Timezone
diff --git a/libpod/container_internal.go b/libpod/container_internal.go
index 60fb29607..32674235a 100644
--- a/libpod/container_internal.go
+++ b/libpod/container_internal.go
@@ -27,6 +27,7 @@ import (
cutil "github.com/containers/common/pkg/util"
"github.com/containers/podman/v4/libpod/define"
"github.com/containers/podman/v4/libpod/events"
+ "github.com/containers/podman/v4/libpod/shutdown"
"github.com/containers/podman/v4/pkg/ctime"
"github.com/containers/podman/v4/pkg/lookup"
"github.com/containers/podman/v4/pkg/rootless"
@@ -1038,6 +1039,13 @@ func (c *Container) init(ctx context.Context, retainRetries bool) error {
}
}
+ // To ensure that we don't lose track of Conmon if hit by a SIGTERM
+ // in the middle of setting up the container, inhibit shutdown signals
+ // until after we save Conmon's PID to the state.
+ // TODO: This can likely be removed once conmon-rs support merges.
+ shutdown.Inhibit()
+ defer shutdown.Uninhibit()
+
// With the spec complete, do an OCI create
if _, err = c.ociRuntime.CreateContainer(c, nil); err != nil {
return err
@@ -1073,6 +1081,7 @@ func (c *Container) init(ctx context.Context, retainRetries bool) error {
if err := c.save(); err != nil {
return err
}
+
if c.config.HealthCheckConfig != nil {
if err := c.createTimer(); err != nil {
logrus.Error(err)
@@ -2343,3 +2352,12 @@ func (c *Container) extractSecretToCtrStorage(secr *ContainerSecret) error {
}
return nil
}
+
+// update calls the ociRuntime update function to modify a cgroup config after container creation
+func (c *Container) update(resources *spec.LinuxResources) error {
+ if err := c.ociRuntime.UpdateContainer(c, resources); err != nil {
+ return err
+ }
+ logrus.Debugf("updated container %s", c.ID())
+ return nil
+}
diff --git a/libpod/container_internal_common.go b/libpod/container_internal_common.go
new file mode 100644
index 000000000..192a86b6a
--- /dev/null
+++ b/libpod/container_internal_common.go
@@ -0,0 +1,2699 @@
+//go:build linux || freebsd
+// +build linux freebsd
+
+package libpod
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "math"
+ "os"
+ "os/user"
+ "path"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "syscall"
+ "time"
+
+ metadata "github.com/checkpoint-restore/checkpointctl/lib"
+ "github.com/checkpoint-restore/go-criu/v5/stats"
+ cdi "github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
+ "github.com/containers/buildah"
+ "github.com/containers/buildah/pkg/chrootuser"
+ "github.com/containers/buildah/pkg/overlay"
+ butil "github.com/containers/buildah/util"
+ "github.com/containers/common/libnetwork/etchosts"
+ "github.com/containers/common/libnetwork/resolvconf"
+ "github.com/containers/common/libnetwork/types"
+ "github.com/containers/common/pkg/apparmor"
+ "github.com/containers/common/pkg/chown"
+ "github.com/containers/common/pkg/config"
+ "github.com/containers/common/pkg/subscriptions"
+ "github.com/containers/common/pkg/umask"
+ cutil "github.com/containers/common/pkg/util"
+ is "github.com/containers/image/v5/storage"
+ "github.com/containers/podman/v4/libpod/define"
+ "github.com/containers/podman/v4/libpod/events"
+ "github.com/containers/podman/v4/pkg/annotations"
+ "github.com/containers/podman/v4/pkg/checkpoint/crutils"
+ "github.com/containers/podman/v4/pkg/criu"
+ "github.com/containers/podman/v4/pkg/lookup"
+ "github.com/containers/podman/v4/pkg/rootless"
+ "github.com/containers/podman/v4/pkg/util"
+ "github.com/containers/podman/v4/version"
+ "github.com/containers/storage/pkg/archive"
+ "github.com/containers/storage/pkg/idtools"
+ "github.com/containers/storage/pkg/lockfile"
+ securejoin "github.com/cyphar/filepath-securejoin"
+ runcuser "github.com/opencontainers/runc/libcontainer/user"
+ spec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/opencontainers/runtime-tools/generate"
+ "github.com/opencontainers/selinux/go-selinux"
+ "github.com/opencontainers/selinux/go-selinux/label"
+ "github.com/sirupsen/logrus"
+)
+
+// Internal only function which returns upper and work dir from
+// overlay options.
+func getOverlayUpperAndWorkDir(options []string) (string, string, error) {
+ upperDir := ""
+ workDir := ""
+ for _, o := range options {
+ if strings.HasPrefix(o, "upperdir") {
+ splitOpt := strings.SplitN(o, "=", 2)
+ if len(splitOpt) > 1 {
+ upperDir = splitOpt[1]
+ if upperDir == "" {
+ return "", "", errors.New("cannot accept empty value for upperdir")
+ }
+ }
+ }
+ if strings.HasPrefix(o, "workdir") {
+ splitOpt := strings.SplitN(o, "=", 2)
+ if len(splitOpt) > 1 {
+ workDir = splitOpt[1]
+ if workDir == "" {
+ return "", "", errors.New("cannot accept empty value for workdir")
+ }
+ }
+ }
+ }
+ if (upperDir != "" && workDir == "") || (upperDir == "" && workDir != "") {
+ return "", "", errors.New("must specify both upperdir and workdir")
+ }
+ return upperDir, workDir, nil
+}
+
+// Generate spec for a container
+// Accepts a map of the container's dependencies
+func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
+ overrides := c.getUserOverrides()
+ execUser, err := lookup.GetUserGroupInfo(c.state.Mountpoint, c.config.User, overrides)
+ if err != nil {
+ if cutil.StringInSlice(c.config.User, c.config.HostUsers) {
+ execUser, err = lookupHostUser(c.config.User)
+ }
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ // NewFromSpec() is deprecated according to its comment
+ // however the recommended replace just causes a nil map panic
+ //nolint:staticcheck
+ g := generate.NewFromSpec(c.config.Spec)
+
+ // If the flag to mount all devices is set for a privileged container, add
+ // all the devices from the host's machine into the container
+ if c.config.MountAllDevices {
+ if err := util.AddPrivilegedDevices(&g); err != nil {
+ return nil, err
+ }
+ }
+
+ // If network namespace was requested, add it now
+ if err := c.addNetworkNamespace(&g); err != nil {
+ return nil, err
+ }
+
+ // Apply AppArmor checks and load the default profile if needed.
+ if len(c.config.Spec.Process.ApparmorProfile) > 0 {
+ updatedProfile, err := apparmor.CheckProfileAndLoadDefault(c.config.Spec.Process.ApparmorProfile)
+ if err != nil {
+ return nil, err
+ }
+ g.SetProcessApparmorProfile(updatedProfile)
+ }
+
+ if err := c.makeBindMounts(); err != nil {
+ return nil, err
+ }
+
+ if err := c.mountNotifySocket(g); err != nil {
+ return nil, err
+ }
+
+ // Get host UID and GID based on the container process UID and GID.
+ hostUID, hostGID, err := butil.GetHostIDs(util.IDtoolsToRuntimeSpec(c.config.IDMappings.UIDMap), util.IDtoolsToRuntimeSpec(c.config.IDMappings.GIDMap), uint32(execUser.Uid), uint32(execUser.Gid))
+ if err != nil {
+ return nil, err
+ }
+
+ // Add named volumes
+ for _, namedVol := range c.config.NamedVolumes {
+ volume, err := c.runtime.GetVolume(namedVol.Name)
+ if err != nil {
+ return nil, fmt.Errorf("error retrieving volume %s to add to container %s: %w", namedVol.Name, c.ID(), err)
+ }
+ mountPoint, err := volume.MountPoint()
+ if err != nil {
+ return nil, err
+ }
+
+ overlayFlag := false
+ upperDir := ""
+ workDir := ""
+ for _, o := range namedVol.Options {
+ if o == "O" {
+ overlayFlag = true
+ upperDir, workDir, err = getOverlayUpperAndWorkDir(namedVol.Options)
+ if err != nil {
+ return nil, err
+ }
+ }
+ }
+
+ if overlayFlag {
+ var overlayMount spec.Mount
+ var overlayOpts *overlay.Options
+ contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID())
+ if err != nil {
+ return nil, err
+ }
+
+ overlayOpts = &overlay.Options{RootUID: c.RootUID(),
+ RootGID: c.RootGID(),
+ UpperDirOptionFragment: upperDir,
+ WorkDirOptionFragment: workDir,
+ GraphOpts: c.runtime.store.GraphOptions(),
+ }
+
+ overlayMount, err = overlay.MountWithOptions(contentDir, mountPoint, namedVol.Dest, overlayOpts)
+ if err != nil {
+ return nil, fmt.Errorf("mounting overlay failed %q: %w", mountPoint, err)
+ }
+
+ for _, o := range namedVol.Options {
+ if o == "U" {
+ if err := c.ChangeHostPathOwnership(mountPoint, true, int(hostUID), int(hostGID)); err != nil {
+ return nil, err
+ }
+
+ if err := c.ChangeHostPathOwnership(contentDir, true, int(hostUID), int(hostGID)); err != nil {
+ return nil, err
+ }
+ }
+ }
+ g.AddMount(overlayMount)
+ } else {
+ volMount := spec.Mount{
+ Type: define.TypeBind,
+ Source: mountPoint,
+ Destination: namedVol.Dest,
+ Options: namedVol.Options,
+ }
+ g.AddMount(volMount)
+ }
+ }
+
+ // Check if the spec file mounts contain the options z, Z or U.
+ // If they have z or Z, relabel the source directory and then remove the option.
+ // If they have U, chown the source directory and them remove the option.
+ for i := range g.Config.Mounts {
+ m := &g.Config.Mounts[i]
+ var options []string
+ for _, o := range m.Options {
+ switch o {
+ case "U":
+ if m.Type == "tmpfs" {
+ options = append(options, []string{fmt.Sprintf("uid=%d", execUser.Uid), fmt.Sprintf("gid=%d", execUser.Gid)}...)
+ } else {
+ // only chown on initial creation of container
+ if err := c.ChangeHostPathOwnership(m.Source, true, int(hostUID), int(hostGID)); err != nil {
+ return nil, err
+ }
+ }
+ case "z":
+ fallthrough
+ case "Z":
+ if err := c.relabel(m.Source, c.MountLabel(), label.IsShared(o)); err != nil {
+ return nil, err
+ }
+
+ default:
+ options = append(options, o)
+ }
+ }
+ m.Options = options
+ }
+
+ c.setProcessLabel(&g)
+ c.setMountLabel(&g)
+
+ // Add bind mounts to container
+ for dstPath, srcPath := range c.state.BindMounts {
+ newMount := spec.Mount{
+ Type: define.TypeBind,
+ Source: srcPath,
+ Destination: dstPath,
+ Options: bindOptions,
+ }
+ if c.IsReadOnly() && dstPath != "/dev/shm" {
+ newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev")
+ }
+ if dstPath == "/dev/shm" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
+ newMount.Options = append(newMount.Options, "nosuid", "noexec", "nodev")
+ }
+ if !MountExists(g.Mounts(), dstPath) {
+ g.AddMount(newMount)
+ } else {
+ logrus.Infof("User mount overriding libpod mount at %q", dstPath)
+ }
+ }
+
+ // Add overlay volumes
+ for _, overlayVol := range c.config.OverlayVolumes {
+ upperDir, workDir, err := getOverlayUpperAndWorkDir(overlayVol.Options)
+ if err != nil {
+ return nil, err
+ }
+ contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID())
+ if err != nil {
+ return nil, err
+ }
+ overlayOpts := &overlay.Options{RootUID: c.RootUID(),
+ RootGID: c.RootGID(),
+ UpperDirOptionFragment: upperDir,
+ WorkDirOptionFragment: workDir,
+ GraphOpts: c.runtime.store.GraphOptions(),
+ }
+
+ overlayMount, err := overlay.MountWithOptions(contentDir, overlayVol.Source, overlayVol.Dest, overlayOpts)
+ if err != nil {
+ return nil, fmt.Errorf("mounting overlay failed %q: %w", overlayVol.Source, err)
+ }
+
+ // Check overlay volume options
+ for _, o := range overlayVol.Options {
+ if o == "U" {
+ if err := c.ChangeHostPathOwnership(overlayVol.Source, true, int(hostUID), int(hostGID)); err != nil {
+ return nil, err
+ }
+
+ if err := c.ChangeHostPathOwnership(contentDir, true, int(hostUID), int(hostGID)); err != nil {
+ return nil, err
+ }
+ }
+ }
+
+ g.AddMount(overlayMount)
+ }
+
+ // Add image volumes as overlay mounts
+ for _, volume := range c.config.ImageVolumes {
+ // Mount the specified image.
+ img, _, err := c.runtime.LibimageRuntime().LookupImage(volume.Source, nil)
+ if err != nil {
+ return nil, fmt.Errorf("error creating image volume %q:%q: %w", volume.Source, volume.Dest, err)
+ }
+ mountPoint, err := img.Mount(ctx, nil, "")
+ if err != nil {
+ return nil, fmt.Errorf("error mounting image volume %q:%q: %w", volume.Source, volume.Dest, err)
+ }
+
+ contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID())
+ if err != nil {
+ return nil, fmt.Errorf("failed to create TempDir in the %s directory: %w", c.config.StaticDir, err)
+ }
+
+ var overlayMount spec.Mount
+ if volume.ReadWrite {
+ overlayMount, err = overlay.Mount(contentDir, mountPoint, volume.Dest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions())
+ } else {
+ overlayMount, err = overlay.MountReadOnly(contentDir, mountPoint, volume.Dest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions())
+ }
+ if err != nil {
+ return nil, fmt.Errorf("creating overlay mount for image %q failed: %w", volume.Source, err)
+ }
+ g.AddMount(overlayMount)
+ }
+
+ hasHomeSet := false
+ for _, s := range c.config.Spec.Process.Env {
+ if strings.HasPrefix(s, "HOME=") {
+ hasHomeSet = true
+ break
+ }
+ }
+ if !hasHomeSet && execUser.Home != "" {
+ c.config.Spec.Process.Env = append(c.config.Spec.Process.Env, fmt.Sprintf("HOME=%s", execUser.Home))
+ }
+
+ if c.config.User != "" {
+ // User and Group must go together
+ g.SetProcessUID(uint32(execUser.Uid))
+ g.SetProcessGID(uint32(execUser.Gid))
+ g.AddProcessAdditionalGid(uint32(execUser.Gid))
+ }
+
+ if c.config.Umask != "" {
+ decVal, err := strconv.ParseUint(c.config.Umask, 8, 32)
+ if err != nil {
+ return nil, fmt.Errorf("invalid Umask Value: %w", err)
+ }
+ umask := uint32(decVal)
+ g.Config.Process.User.Umask = &umask
+ }
+
+ // Add addition groups if c.config.GroupAdd is not empty
+ if len(c.config.Groups) > 0 {
+ gids, err := lookup.GetContainerGroups(c.config.Groups, c.state.Mountpoint, overrides)
+ if err != nil {
+ return nil, fmt.Errorf("error looking up supplemental groups for container %s: %w", c.ID(), err)
+ }
+ for _, gid := range gids {
+ g.AddProcessAdditionalGid(gid)
+ }
+ }
+
+ if err := c.addSystemdMounts(&g); err != nil {
+ return nil, err
+ }
+
+ // Look up and add groups the user belongs to, if a group wasn't directly specified
+ if !strings.Contains(c.config.User, ":") {
+ // the gidMappings that are present inside the container user namespace
+ var gidMappings []idtools.IDMap
+
+ switch {
+ case len(c.config.IDMappings.GIDMap) > 0:
+ gidMappings = c.config.IDMappings.GIDMap
+ case rootless.IsRootless():
+ // Check whether the current user namespace has enough gids available.
+ availableGids, err := rootless.GetAvailableGids()
+ if err != nil {
+ return nil, fmt.Errorf("cannot read number of available GIDs: %w", err)
+ }
+ gidMappings = []idtools.IDMap{{
+ ContainerID: 0,
+ HostID: 0,
+ Size: int(availableGids),
+ }}
+ default:
+ gidMappings = []idtools.IDMap{{
+ ContainerID: 0,
+ HostID: 0,
+ Size: math.MaxInt32,
+ }}
+ }
+ for _, gid := range execUser.Sgids {
+ isGIDAvailable := false
+ for _, m := range gidMappings {
+ if gid >= m.ContainerID && gid < m.ContainerID+m.Size {
+ isGIDAvailable = true
+ break
+ }
+ }
+ if isGIDAvailable {
+ g.AddProcessAdditionalGid(uint32(gid))
+ } else {
+ logrus.Warnf("Additional gid=%d is not present in the user namespace, skip setting it", gid)
+ }
+ }
+ }
+
+ // Add shared namespaces from other containers
+ if err := c.addSharedNamespaces(&g); err != nil {
+ return nil, err
+ }
+
+ g.SetRootPath(c.state.Mountpoint)
+ g.AddAnnotation(annotations.Created, c.config.CreatedTime.Format(time.RFC3339Nano))
+ g.AddAnnotation("org.opencontainers.image.stopSignal", fmt.Sprintf("%d", c.config.StopSignal))
+
+ if _, exists := g.Config.Annotations[annotations.ContainerManager]; !exists {
+ g.AddAnnotation(annotations.ContainerManager, annotations.ContainerManagerLibpod)
+ }
+
+ if err := c.setCgroupsPath(&g); err != nil {
+ return nil, err
+ }
+
+ // Warning: CDI may alter g.Config in place.
+ if len(c.config.CDIDevices) > 0 {
+ registry := cdi.GetRegistry(
+ cdi.WithAutoRefresh(false),
+ )
+ if err := registry.Refresh(); err != nil {
+ logrus.Debugf("The following error was triggered when refreshing the CDI registry: %v", err)
+ }
+ _, err := registry.InjectDevices(g.Config, c.config.CDIDevices...)
+ if err != nil {
+ return nil, fmt.Errorf("error setting up CDI devices: %w", err)
+ }
+ }
+
+ // Mounts need to be sorted so paths will not cover other paths
+ mounts := sortMounts(g.Mounts())
+ g.ClearMounts()
+
+ for _, m := range mounts {
+ // We need to remove all symlinks from tmpfs mounts.
+ // Runc and other runtimes may choke on them.
+ // Easy solution: use securejoin to do a scoped evaluation of
+ // the links, then trim off the mount prefix.
+ if m.Type == "tmpfs" {
+ finalPath, err := securejoin.SecureJoin(c.state.Mountpoint, m.Destination)
+ if err != nil {
+ return nil, fmt.Errorf("error resolving symlinks for mount destination %s: %w", m.Destination, err)
+ }
+ trimmedPath := strings.TrimPrefix(finalPath, strings.TrimSuffix(c.state.Mountpoint, "/"))
+ m.Destination = trimmedPath
+ }
+ g.AddMount(m)
+ }
+
+ if err := c.addRootPropagation(&g, mounts); err != nil {
+ return nil, err
+ }
+
+ // Warning: precreate hooks may alter g.Config in place.
+ if c.state.ExtensionStageHooks, err = c.setupOCIHooks(ctx, g.Config); err != nil {
+ return nil, fmt.Errorf("error setting up OCI Hooks: %w", err)
+ }
+ if len(c.config.EnvSecrets) > 0 {
+ manager, err := c.runtime.SecretsManager()
+ if err != nil {
+ return nil, err
+ }
+ if err != nil {
+ return nil, err
+ }
+ for name, secr := range c.config.EnvSecrets {
+ _, data, err := manager.LookupSecretData(secr.Name)
+ if err != nil {
+ return nil, err
+ }
+ g.AddProcessEnv(name, string(data))
+ }
+ }
+
+ // Pass down the LISTEN_* environment (see #10443).
+ for _, key := range []string{"LISTEN_PID", "LISTEN_FDS", "LISTEN_FDNAMES"} {
+ if val, ok := os.LookupEnv(key); ok {
+ // Force the PID to `1` since we cannot rely on (all
+ // versions of) all runtimes to do it for us.
+ if key == "LISTEN_PID" {
+ val = "1"
+ }
+ g.AddProcessEnv(key, val)
+ }
+ }
+
+ return g.Config, nil
+}
+
+// isWorkDirSymlink returns true if resolved workdir is symlink or a chain of symlinks,
+// and final resolved target is present either on volume, mount or inside of container
+// otherwise it returns false. Following function is meant for internal use only and
+// can change at any point of time.
+func (c *Container) isWorkDirSymlink(resolvedPath string) bool {
+ // We cannot create workdir since explicit --workdir is
+ // set in config but workdir could also be a symlink.
+ // If it's a symlink, check if the resolved target is present in the container.
+ // If so, that's a valid use case: return nil.
+
+ maxSymLinks := 0
+ for {
+ // Linux only supports a chain of 40 links.
+ // Reference: https://github.com/torvalds/linux/blob/master/include/linux/namei.h#L13
+ if maxSymLinks > 40 {
+ break
+ }
+ resolvedSymlink, err := os.Readlink(resolvedPath)
+ if err != nil {
+ // End sym-link resolution loop.
+ break
+ }
+ if resolvedSymlink != "" {
+ _, resolvedSymlinkWorkdir, err := c.resolvePath(c.state.Mountpoint, resolvedSymlink)
+ if isPathOnVolume(c, resolvedSymlinkWorkdir) || isPathOnBindMount(c, resolvedSymlinkWorkdir) {
+ // Resolved symlink exists on external volume or mount
+ return true
+ }
+ if err != nil {
+ // Could not resolve path so end sym-link resolution loop.
+ break
+ }
+ if resolvedSymlinkWorkdir != "" {
+ resolvedPath = resolvedSymlinkWorkdir
+ _, err := os.Stat(resolvedSymlinkWorkdir)
+ if err == nil {
+ // Symlink resolved successfully and resolved path exists on container,
+ // this is a valid use-case so return nil.
+ logrus.Debugf("Workdir is a symlink with target to %q and resolved symlink exists on container", resolvedSymlink)
+ return true
+ }
+ }
+ }
+ maxSymLinks++
+ }
+ return false
+}
+
+// resolveWorkDir resolves the container's workdir and, depending on the
+// configuration, will create it, or error out if it does not exist.
+// Note that the container must be mounted before.
+func (c *Container) resolveWorkDir() error {
+ workdir := c.WorkingDir()
+
+ // If the specified workdir is a subdir of a volume or mount,
+ // we don't need to do anything. The runtime is taking care of
+ // that.
+ if isPathOnVolume(c, workdir) || isPathOnBindMount(c, workdir) {
+ logrus.Debugf("Workdir %q resolved to a volume or mount", workdir)
+ return nil
+ }
+
+ _, resolvedWorkdir, err := c.resolvePath(c.state.Mountpoint, workdir)
+ if err != nil {
+ return err
+ }
+ logrus.Debugf("Workdir %q resolved to host path %q", workdir, resolvedWorkdir)
+
+ st, err := os.Stat(resolvedWorkdir)
+ if err == nil {
+ if !st.IsDir() {
+ return fmt.Errorf("workdir %q exists on container %s, but is not a directory", workdir, c.ID())
+ }
+ return nil
+ }
+ if !c.config.CreateWorkingDir {
+ // No need to create it (e.g., `--workdir=/foo`), so let's make sure
+ // the path exists on the container.
+ if err != nil {
+ if os.IsNotExist(err) {
+ // If resolved Workdir path gets marked as a valid symlink,
+ // return nil cause this is valid use-case.
+ if c.isWorkDirSymlink(resolvedWorkdir) {
+ return nil
+ }
+ return fmt.Errorf("workdir %q does not exist on container %s", workdir, c.ID())
+ }
+ // This might be a serious error (e.g., permission), so
+ // we need to return the full error.
+ return fmt.Errorf("error detecting workdir %q on container %s: %w", workdir, c.ID(), err)
+ }
+ return nil
+ }
+ if err := os.MkdirAll(resolvedWorkdir, 0755); err != nil {
+ if os.IsExist(err) {
+ return nil
+ }
+ return fmt.Errorf("error creating container %s workdir: %w", c.ID(), err)
+ }
+
+ // Ensure container entrypoint is created (if required).
+ uid, gid, _, err := chrootuser.GetUser(c.state.Mountpoint, c.User())
+ if err != nil {
+ return fmt.Errorf("error looking up %s inside of the container %s: %w", c.User(), c.ID(), err)
+ }
+ if err := os.Chown(resolvedWorkdir, int(uid), int(gid)); err != nil {
+ return fmt.Errorf("error chowning container %s workdir to container root: %w", c.ID(), err)
+ }
+
+ return nil
+}
+
+func (c *Container) getUserOverrides() *lookup.Overrides {
+ var hasPasswdFile, hasGroupFile bool
+ overrides := lookup.Overrides{}
+ for _, m := range c.config.Spec.Mounts {
+ if m.Destination == "/etc/passwd" {
+ overrides.ContainerEtcPasswdPath = m.Source
+ hasPasswdFile = true
+ }
+ if m.Destination == "/etc/group" {
+ overrides.ContainerEtcGroupPath = m.Source
+ hasGroupFile = true
+ }
+ if m.Destination == "/etc" {
+ if !hasPasswdFile {
+ overrides.ContainerEtcPasswdPath = filepath.Join(m.Source, "passwd")
+ }
+ if !hasGroupFile {
+ overrides.ContainerEtcGroupPath = filepath.Join(m.Source, "group")
+ }
+ }
+ }
+ if path, ok := c.state.BindMounts["/etc/passwd"]; ok {
+ overrides.ContainerEtcPasswdPath = path
+ }
+ return &overrides
+}
+
+func lookupHostUser(name string) (*runcuser.ExecUser, error) {
+ var execUser runcuser.ExecUser
+ // Look up User on host
+ u, err := util.LookupUser(name)
+ if err != nil {
+ return &execUser, err
+ }
+ uid, err := strconv.ParseUint(u.Uid, 8, 32)
+ if err != nil {
+ return &execUser, err
+ }
+
+ gid, err := strconv.ParseUint(u.Gid, 8, 32)
+ if err != nil {
+ return &execUser, err
+ }
+ execUser.Uid = int(uid)
+ execUser.Gid = int(gid)
+ execUser.Home = u.HomeDir
+ return &execUser, nil
+}
+
+// mountNotifySocket mounts the NOTIFY_SOCKET into the container if it's set
+// and if the sdnotify mode is set to container. It also sets c.notifySocket
+// to avoid redundantly looking up the env variable.
+func (c *Container) mountNotifySocket(g generate.Generator) error {
+ if c.config.SdNotifySocket == "" {
+ return nil
+ }
+ if c.config.SdNotifyMode != define.SdNotifyModeContainer {
+ return nil
+ }
+
+ notifyDir := filepath.Join(c.bundlePath(), "notify")
+ logrus.Debugf("Checking notify %q dir", notifyDir)
+ if err := os.MkdirAll(notifyDir, 0755); err != nil {
+ if !os.IsExist(err) {
+ return fmt.Errorf("unable to create notify %q dir: %w", notifyDir, err)
+ }
+ }
+ if err := label.Relabel(notifyDir, c.MountLabel(), true); err != nil {
+ return fmt.Errorf("relabel failed %q: %w", notifyDir, err)
+ }
+ logrus.Debugf("Add bindmount notify %q dir", notifyDir)
+ if _, ok := c.state.BindMounts["/run/notify"]; !ok {
+ c.state.BindMounts["/run/notify"] = notifyDir
+ }
+
+ // Set the container's notify socket to the proxy socket created by conmon
+ g.AddProcessEnv("NOTIFY_SOCKET", "/run/notify/notify.sock")
+
+ return nil
+}
+
+func (c *Container) addCheckpointImageMetadata(importBuilder *buildah.Builder) error {
+ // Get information about host environment
+ hostInfo, err := c.Runtime().hostInfo()
+ if err != nil {
+ return fmt.Errorf("getting host info: %v", err)
+ }
+
+ criuVersion, err := criu.GetCriuVersion()
+ if err != nil {
+ return fmt.Errorf("getting criu version: %v", err)
+ }
+
+ rootfsImageID, rootfsImageName := c.Image()
+
+ // Add image annotations with information about the container and the host.
+ // This information is useful to check compatibility before restoring the checkpoint
+
+ checkpointImageAnnotations := map[string]string{
+ define.CheckpointAnnotationName: c.config.Name,
+ define.CheckpointAnnotationRawImageName: c.config.RawImageName,
+ define.CheckpointAnnotationRootfsImageID: rootfsImageID,
+ define.CheckpointAnnotationRootfsImageName: rootfsImageName,
+ define.CheckpointAnnotationPodmanVersion: version.Version.String(),
+ define.CheckpointAnnotationCriuVersion: strconv.Itoa(criuVersion),
+ define.CheckpointAnnotationRuntimeName: hostInfo.OCIRuntime.Name,
+ define.CheckpointAnnotationRuntimeVersion: hostInfo.OCIRuntime.Version,
+ define.CheckpointAnnotationConmonVersion: hostInfo.Conmon.Version,
+ define.CheckpointAnnotationHostArch: hostInfo.Arch,
+ define.CheckpointAnnotationHostKernel: hostInfo.Kernel,
+ define.CheckpointAnnotationCgroupVersion: hostInfo.CgroupsVersion,
+ define.CheckpointAnnotationDistributionVersion: hostInfo.Distribution.Version,
+ define.CheckpointAnnotationDistributionName: hostInfo.Distribution.Distribution,
+ }
+
+ for key, value := range checkpointImageAnnotations {
+ importBuilder.SetAnnotation(key, value)
+ }
+
+ return nil
+}
+
+func (c *Container) resolveCheckpointImageName(options *ContainerCheckpointOptions) error {
+ if options.CreateImage == "" {
+ return nil
+ }
+
+ // Resolve image name
+ resolvedImageName, err := c.runtime.LibimageRuntime().ResolveName(options.CreateImage)
+ if err != nil {
+ return err
+ }
+
+ options.CreateImage = resolvedImageName
+ return nil
+}
+
+func (c *Container) createCheckpointImage(ctx context.Context, options ContainerCheckpointOptions) error {
+ if options.CreateImage == "" {
+ return nil
+ }
+ logrus.Debugf("Create checkpoint image %s", options.CreateImage)
+
+ // Create storage reference
+ imageRef, err := is.Transport.ParseStoreReference(c.runtime.store, options.CreateImage)
+ if err != nil {
+ return errors.New("failed to parse image name")
+ }
+
+ // Build an image scratch
+ builderOptions := buildah.BuilderOptions{
+ FromImage: "scratch",
+ }
+ importBuilder, err := buildah.NewBuilder(ctx, c.runtime.store, builderOptions)
+ if err != nil {
+ return err
+ }
+ // Clean up buildah working container
+ defer func() {
+ if err := importBuilder.Delete(); err != nil {
+ logrus.Errorf("Image builder delete failed: %v", err)
+ }
+ }()
+
+ if err := c.prepareCheckpointExport(); err != nil {
+ return err
+ }
+
+ // Export checkpoint into temporary tar file
+ tmpDir, err := ioutil.TempDir("", "checkpoint_image_")
+ if err != nil {
+ return err
+ }
+ defer os.RemoveAll(tmpDir)
+
+ options.TargetFile = path.Join(tmpDir, "checkpoint.tar")
+
+ if err := c.exportCheckpoint(options); err != nil {
+ return err
+ }
+
+ // Copy checkpoint from temporary tar file in the image
+ addAndCopyOptions := buildah.AddAndCopyOptions{}
+ if err := importBuilder.Add("", true, addAndCopyOptions, options.TargetFile); err != nil {
+ return err
+ }
+
+ if err := c.addCheckpointImageMetadata(importBuilder); err != nil {
+ return err
+ }
+
+ commitOptions := buildah.CommitOptions{
+ Squash: true,
+ SystemContext: c.runtime.imageContext,
+ }
+
+ // Create checkpoint image
+ id, _, _, err := importBuilder.Commit(ctx, imageRef, commitOptions)
+ if err != nil {
+ return err
+ }
+ logrus.Debugf("Created checkpoint image: %s", id)
+ return nil
+}
+
+func (c *Container) exportCheckpoint(options ContainerCheckpointOptions) error {
+ if len(c.Dependencies()) == 1 {
+ // Check if the dependency is an infra container. If it is we can checkpoint
+ // the container out of the Pod.
+ if c.config.Pod == "" {
+ return errors.New("cannot export checkpoints of containers with dependencies")
+ }
+
+ pod, err := c.runtime.state.Pod(c.config.Pod)
+ if err != nil {
+ return fmt.Errorf("container %s is in pod %s, but pod cannot be retrieved: %w", c.ID(), c.config.Pod, err)
+ }
+ infraID, err := pod.InfraContainerID()
+ if err != nil {
+ return fmt.Errorf("cannot retrieve infra container ID for pod %s: %w", c.config.Pod, err)
+ }
+ if c.Dependencies()[0] != infraID {
+ return errors.New("cannot export checkpoints of containers with dependencies")
+ }
+ }
+ if len(c.Dependencies()) > 1 {
+ return errors.New("cannot export checkpoints of containers with dependencies")
+ }
+ logrus.Debugf("Exporting checkpoint image of container %q to %q", c.ID(), options.TargetFile)
+
+ includeFiles := []string{
+ "artifacts",
+ metadata.DevShmCheckpointTar,
+ metadata.ConfigDumpFile,
+ metadata.SpecDumpFile,
+ metadata.NetworkStatusFile,
+ stats.StatsDump,
+ }
+
+ if c.LogDriver() == define.KubernetesLogging ||
+ c.LogDriver() == define.JSONLogging {
+ includeFiles = append(includeFiles, "ctr.log")
+ }
+ if options.PreCheckPoint {
+ includeFiles = append(includeFiles, preCheckpointDir)
+ } else {
+ includeFiles = append(includeFiles, metadata.CheckpointDirectory)
+ }
+ // Get root file-system changes included in the checkpoint archive
+ var addToTarFiles []string
+ if !options.IgnoreRootfs {
+ // To correctly track deleted files, let's go through the output of 'podman diff'
+ rootFsChanges, err := c.runtime.GetDiff("", c.ID(), define.DiffContainer)
+ if err != nil {
+ return fmt.Errorf("error exporting root file-system diff for %q: %w", c.ID(), err)
+ }
+
+ addToTarFiles, err := crutils.CRCreateRootFsDiffTar(&rootFsChanges, c.state.Mountpoint, c.bundlePath())
+ if err != nil {
+ return err
+ }
+
+ includeFiles = append(includeFiles, addToTarFiles...)
+ }
+
+ // Folder containing archived volumes that will be included in the export
+ expVolDir := filepath.Join(c.bundlePath(), metadata.CheckpointVolumesDirectory)
+
+ // Create an archive for each volume associated with the container
+ if !options.IgnoreVolumes {
+ if err := os.MkdirAll(expVolDir, 0700); err != nil {
+ return fmt.Errorf("error creating volumes export directory %q: %w", expVolDir, err)
+ }
+
+ for _, v := range c.config.NamedVolumes {
+ volumeTarFilePath := filepath.Join(metadata.CheckpointVolumesDirectory, v.Name+".tar")
+ volumeTarFileFullPath := filepath.Join(c.bundlePath(), volumeTarFilePath)
+
+ volumeTarFile, err := os.Create(volumeTarFileFullPath)
+ if err != nil {
+ return fmt.Errorf("error creating %q: %w", volumeTarFileFullPath, err)
+ }
+
+ volume, err := c.runtime.GetVolume(v.Name)
+ if err != nil {
+ return err
+ }
+
+ mp, err := volume.MountPoint()
+ if err != nil {
+ return err
+ }
+ if mp == "" {
+ return fmt.Errorf("volume %s is not mounted, cannot export: %w", volume.Name(), define.ErrInternal)
+ }
+
+ input, err := archive.TarWithOptions(mp, &archive.TarOptions{
+ Compression: archive.Uncompressed,
+ IncludeSourceDir: true,
+ })
+ if err != nil {
+ return fmt.Errorf("error reading volume directory %q: %w", v.Dest, err)
+ }
+
+ _, err = io.Copy(volumeTarFile, input)
+ if err != nil {
+ return err
+ }
+ volumeTarFile.Close()
+
+ includeFiles = append(includeFiles, volumeTarFilePath)
+ }
+ }
+
+ input, err := archive.TarWithOptions(c.bundlePath(), &archive.TarOptions{
+ Compression: options.Compression,
+ IncludeSourceDir: true,
+ IncludeFiles: includeFiles,
+ })
+
+ if err != nil {
+ return fmt.Errorf("error reading checkpoint directory %q: %w", c.ID(), err)
+ }
+
+ outFile, err := os.Create(options.TargetFile)
+ if err != nil {
+ return fmt.Errorf("error creating checkpoint export file %q: %w", options.TargetFile, err)
+ }
+ defer outFile.Close()
+
+ if err := os.Chmod(options.TargetFile, 0600); err != nil {
+ return err
+ }
+
+ _, err = io.Copy(outFile, input)
+ if err != nil {
+ return err
+ }
+
+ for _, file := range addToTarFiles {
+ os.Remove(filepath.Join(c.bundlePath(), file))
+ }
+
+ if !options.IgnoreVolumes {
+ os.RemoveAll(expVolDir)
+ }
+
+ return nil
+}
+
+func (c *Container) checkpointRestoreSupported(version int) error {
+ if !criu.CheckForCriu(version) {
+ return fmt.Errorf("checkpoint/restore requires at least CRIU %d", version)
+ }
+ if !c.ociRuntime.SupportsCheckpoint() {
+ return errors.New("configured runtime does not support checkpoint/restore")
+ }
+ return nil
+}
+
+func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointOptions) (*define.CRIUCheckpointRestoreStatistics, int64, error) {
+ if err := c.checkpointRestoreSupported(criu.MinCriuVersion); err != nil {
+ return nil, 0, err
+ }
+
+ if c.state.State != define.ContainerStateRunning {
+ return nil, 0, fmt.Errorf("%q is not running, cannot checkpoint: %w", c.state.State, define.ErrCtrStateInvalid)
+ }
+
+ if c.AutoRemove() && options.TargetFile == "" {
+ return nil, 0, errors.New("cannot checkpoint containers that have been started with '--rm' unless '--export' is used")
+ }
+
+ if err := c.resolveCheckpointImageName(&options); err != nil {
+ return nil, 0, err
+ }
+
+ if err := crutils.CRCreateFileWithLabel(c.bundlePath(), "dump.log", c.MountLabel()); err != nil {
+ return nil, 0, err
+ }
+
+ // Setting CheckpointLog early in case there is a failure.
+ c.state.CheckpointLog = path.Join(c.bundlePath(), "dump.log")
+ c.state.CheckpointPath = c.CheckpointPath()
+
+ runtimeCheckpointDuration, err := c.ociRuntime.CheckpointContainer(c, options)
+ if err != nil {
+ return nil, 0, err
+ }
+
+ // Keep the content of /dev/shm directory
+ if c.config.ShmDir != "" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
+ shmDirTarFileFullPath := filepath.Join(c.bundlePath(), metadata.DevShmCheckpointTar)
+
+ shmDirTarFile, err := os.Create(shmDirTarFileFullPath)
+ if err != nil {
+ return nil, 0, err
+ }
+ defer shmDirTarFile.Close()
+
+ input, err := archive.TarWithOptions(c.config.ShmDir, &archive.TarOptions{
+ Compression: archive.Uncompressed,
+ IncludeSourceDir: true,
+ })
+ if err != nil {
+ return nil, 0, err
+ }
+
+ if _, err = io.Copy(shmDirTarFile, input); err != nil {
+ return nil, 0, err
+ }
+ }
+
+ // Save network.status. This is needed to restore the container with
+ // the same IP. Currently limited to one IP address in a container
+ // with one interface.
+ // FIXME: will this break something?
+ if _, err := metadata.WriteJSONFile(c.getNetworkStatus(), c.bundlePath(), metadata.NetworkStatusFile); err != nil {
+ return nil, 0, err
+ }
+
+ defer c.newContainerEvent(events.Checkpoint)
+
+ // There is a bug from criu: https://github.com/checkpoint-restore/criu/issues/116
+ // We have to change the symbolic link from absolute path to relative path
+ if options.WithPrevious {
+ os.Remove(path.Join(c.CheckpointPath(), "parent"))
+ if err := os.Symlink("../pre-checkpoint", path.Join(c.CheckpointPath(), "parent")); err != nil {
+ return nil, 0, err
+ }
+ }
+
+ if options.TargetFile != "" {
+ if err := c.exportCheckpoint(options); err != nil {
+ return nil, 0, err
+ }
+ } else {
+ if err := c.createCheckpointImage(ctx, options); err != nil {
+ return nil, 0, err
+ }
+ }
+
+ logrus.Debugf("Checkpointed container %s", c.ID())
+
+ if !options.KeepRunning && !options.PreCheckPoint {
+ c.state.State = define.ContainerStateStopped
+ c.state.Checkpointed = true
+ c.state.CheckpointedTime = time.Now()
+ c.state.Restored = false
+ c.state.RestoredTime = time.Time{}
+
+ // Clean up Storage and Network
+ if err := c.cleanup(ctx); err != nil {
+ return nil, 0, err
+ }
+ }
+
+ criuStatistics, err := func() (*define.CRIUCheckpointRestoreStatistics, error) {
+ if !options.PrintStats {
+ return nil, nil
+ }
+ statsDirectory, err := os.Open(c.bundlePath())
+ if err != nil {
+ return nil, fmt.Errorf("not able to open %q: %w", c.bundlePath(), err)
+ }
+
+ dumpStatistics, err := stats.CriuGetDumpStats(statsDirectory)
+ if err != nil {
+ return nil, fmt.Errorf("displaying checkpointing statistics not possible: %w", err)
+ }
+
+ return &define.CRIUCheckpointRestoreStatistics{
+ FreezingTime: dumpStatistics.GetFreezingTime(),
+ FrozenTime: dumpStatistics.GetFrozenTime(),
+ MemdumpTime: dumpStatistics.GetMemdumpTime(),
+ MemwriteTime: dumpStatistics.GetMemwriteTime(),
+ PagesScanned: dumpStatistics.GetPagesScanned(),
+ PagesWritten: dumpStatistics.GetPagesWritten(),
+ }, nil
+ }()
+ if err != nil {
+ return nil, 0, err
+ }
+
+ if !options.Keep && !options.PreCheckPoint {
+ cleanup := []string{
+ "dump.log",
+ stats.StatsDump,
+ metadata.ConfigDumpFile,
+ metadata.SpecDumpFile,
+ }
+ for _, del := range cleanup {
+ file := filepath.Join(c.bundlePath(), del)
+ if err := os.Remove(file); err != nil {
+ logrus.Debugf("Unable to remove file %s", file)
+ }
+ }
+ // The file has been deleted. Do not mention it.
+ c.state.CheckpointLog = ""
+ }
+
+ c.state.FinishedTime = time.Now()
+ return criuStatistics, runtimeCheckpointDuration, c.save()
+}
+
+func (c *Container) generateContainerSpec() error {
+ // Make sure the newly created config.json exists on disk
+
+ // NewFromSpec() is deprecated according to its comment
+ // however the recommended replace just causes a nil map panic
+ //nolint:staticcheck
+ g := generate.NewFromSpec(c.config.Spec)
+
+ if err := c.saveSpec(g.Config); err != nil {
+ return fmt.Errorf("saving imported container specification for restore failed: %w", err)
+ }
+
+ return nil
+}
+
+func (c *Container) importCheckpointImage(ctx context.Context, imageID string) error {
+ img, _, err := c.Runtime().LibimageRuntime().LookupImage(imageID, nil)
+ if err != nil {
+ return err
+ }
+
+ mountPoint, err := img.Mount(ctx, nil, "")
+ defer func() {
+ if err := c.unmount(true); err != nil {
+ logrus.Errorf("Failed to unmount container: %v", err)
+ }
+ }()
+ if err != nil {
+ return err
+ }
+
+ // Import all checkpoint files except ConfigDumpFile and SpecDumpFile. We
+ // generate new container config files to enable to specifying a new
+ // container name.
+ checkpoint := []string{
+ "artifacts",
+ metadata.CheckpointDirectory,
+ metadata.CheckpointVolumesDirectory,
+ metadata.DevShmCheckpointTar,
+ metadata.RootFsDiffTar,
+ metadata.DeletedFilesFile,
+ metadata.PodOptionsFile,
+ metadata.PodDumpFile,
+ }
+
+ for _, name := range checkpoint {
+ src := filepath.Join(mountPoint, name)
+ dst := filepath.Join(c.bundlePath(), name)
+ if err := archive.NewDefaultArchiver().CopyWithTar(src, dst); err != nil {
+ logrus.Debugf("Can't import '%s' from checkpoint image", name)
+ }
+ }
+
+ return c.generateContainerSpec()
+}
+
+func (c *Container) importCheckpointTar(input string) error {
+ if err := crutils.CRImportCheckpointWithoutConfig(c.bundlePath(), input); err != nil {
+ return err
+ }
+
+ return c.generateContainerSpec()
+}
+
+func (c *Container) importPreCheckpoint(input string) error {
+ archiveFile, err := os.Open(input)
+ if err != nil {
+ return fmt.Errorf("failed to open pre-checkpoint archive for import: %w", err)
+ }
+
+ defer archiveFile.Close()
+
+ err = archive.Untar(archiveFile, c.bundlePath(), nil)
+ if err != nil {
+ return fmt.Errorf("unpacking of pre-checkpoint archive %s failed: %w", input, err)
+ }
+ return nil
+}
+
+func (c *Container) restore(ctx context.Context, options ContainerCheckpointOptions) (criuStatistics *define.CRIUCheckpointRestoreStatistics, runtimeRestoreDuration int64, retErr error) {
+ minCriuVersion := func() int {
+ if options.Pod == "" {
+ return criu.MinCriuVersion
+ }
+ return criu.PodCriuVersion
+ }()
+ if err := c.checkpointRestoreSupported(minCriuVersion); err != nil {
+ return nil, 0, err
+ }
+
+ if options.Pod != "" && !crutils.CRRuntimeSupportsPodCheckpointRestore(c.ociRuntime.Path()) {
+ return nil, 0, fmt.Errorf("runtime %s does not support pod restore", c.ociRuntime.Path())
+ }
+
+ if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) {
+ return nil, 0, fmt.Errorf("container %s is running or paused, cannot restore: %w", c.ID(), define.ErrCtrStateInvalid)
+ }
+
+ if options.ImportPrevious != "" {
+ if err := c.importPreCheckpoint(options.ImportPrevious); err != nil {
+ return nil, 0, err
+ }
+ }
+
+ if options.TargetFile != "" {
+ if err := c.importCheckpointTar(options.TargetFile); err != nil {
+ return nil, 0, err
+ }
+ } else if options.CheckpointImageID != "" {
+ if err := c.importCheckpointImage(ctx, options.CheckpointImageID); err != nil {
+ return nil, 0, err
+ }
+ }
+
+ // Let's try to stat() CRIU's inventory file. If it does not exist, it makes
+ // no sense to try a restore. This is a minimal check if a checkpoint exist.
+ if _, err := os.Stat(filepath.Join(c.CheckpointPath(), "inventory.img")); os.IsNotExist(err) {
+ return nil, 0, fmt.Errorf("a complete checkpoint for this container cannot be found, cannot restore: %w", err)
+ }
+
+ if err := crutils.CRCreateFileWithLabel(c.bundlePath(), "restore.log", c.MountLabel()); err != nil {
+ return nil, 0, err
+ }
+
+ // Setting RestoreLog early in case there is a failure.
+ c.state.RestoreLog = path.Join(c.bundlePath(), "restore.log")
+ c.state.CheckpointPath = c.CheckpointPath()
+
+ // Read network configuration from checkpoint
+ var netStatus map[string]types.StatusBlock
+ _, err := metadata.ReadJSONFile(&netStatus, c.bundlePath(), metadata.NetworkStatusFile)
+ if err != nil {
+ logrus.Infof("Failed to unmarshal network status, cannot restore the same ip/mac: %v", err)
+ }
+ // If the restored container should get a new name, the IP address of
+ // the container will not be restored. This assumes that if a new name is
+ // specified, the container is restored multiple times.
+ // TODO: This implicit restoring with or without IP depending on an
+ // unrelated restore parameter (--name) does not seem like the
+ // best solution.
+ if err == nil && options.Name == "" && (!options.IgnoreStaticIP || !options.IgnoreStaticMAC) {
+ // The file with the network.status does exist. Let's restore the
+ // container with the same networks settings as during checkpointing.
+ networkOpts, err := c.networks()
+ if err != nil {
+ return nil, 0, err
+ }
+
+ netOpts := make(map[string]types.PerNetworkOptions, len(netStatus))
+ for network, perNetOpts := range networkOpts {
+ // unset mac and ips before we start adding the ones from the status
+ perNetOpts.StaticMAC = nil
+ perNetOpts.StaticIPs = nil
+ for name, netInt := range netStatus[network].Interfaces {
+ perNetOpts.InterfaceName = name
+ if !options.IgnoreStaticIP {
+ perNetOpts.StaticMAC = netInt.MacAddress
+ }
+ if !options.IgnoreStaticIP {
+ for _, netAddress := range netInt.Subnets {
+ perNetOpts.StaticIPs = append(perNetOpts.StaticIPs, netAddress.IPNet.IP)
+ }
+ }
+ // Normally interfaces have a length of 1, only for some special cni configs we could get more.
+ // For now just use the first interface to get the ips this should be good enough for most cases.
+ break
+ }
+ netOpts[network] = perNetOpts
+ }
+ c.perNetworkOpts = netOpts
+ }
+
+ defer func() {
+ if retErr != nil {
+ if err := c.cleanup(ctx); err != nil {
+ logrus.Errorf("Cleaning up container %s: %v", c.ID(), err)
+ }
+ }
+ }()
+
+ if err := c.prepare(); err != nil {
+ return nil, 0, err
+ }
+
+ // Read config
+ jsonPath := filepath.Join(c.bundlePath(), "config.json")
+ logrus.Debugf("generate.NewFromFile at %v", jsonPath)
+ g, err := generate.NewFromFile(jsonPath)
+ if err != nil {
+ logrus.Debugf("generate.NewFromFile failed with %v", err)
+ return nil, 0, err
+ }
+
+ // Restoring from an import means that we are doing migration
+ if options.TargetFile != "" || options.CheckpointImageID != "" {
+ g.SetRootPath(c.state.Mountpoint)
+ }
+
+ // We want to have the same network namespace as before.
+ if err := c.addNetworkNamespace(&g); err != nil {
+ return nil, 0, err
+ }
+
+ if options.Pod != "" {
+ // Running in a Pod means that we have to change all namespace settings to
+ // the ones from the infrastructure container.
+ pod, err := c.runtime.LookupPod(options.Pod)
+ if err != nil {
+ return nil, 0, fmt.Errorf("pod %q cannot be retrieved: %w", options.Pod, err)
+ }
+
+ infraContainer, err := pod.InfraContainer()
+ if err != nil {
+ return nil, 0, fmt.Errorf("cannot retrieved infra container from pod %q: %w", options.Pod, err)
+ }
+
+ infraContainer.lock.Lock()
+ if err := infraContainer.syncContainer(); err != nil {
+ infraContainer.lock.Unlock()
+ return nil, 0, fmt.Errorf("error syncing infrastructure container %s status: %w", infraContainer.ID(), err)
+ }
+ if infraContainer.state.State != define.ContainerStateRunning {
+ if err := infraContainer.initAndStart(ctx); err != nil {
+ infraContainer.lock.Unlock()
+ return nil, 0, fmt.Errorf("error starting infrastructure container %s status: %w", infraContainer.ID(), err)
+ }
+ }
+ infraContainer.lock.Unlock()
+
+ if c.config.IPCNsCtr != "" {
+ nsPath, err := infraContainer.namespacePath(IPCNS)
+ if err != nil {
+ return nil, 0, fmt.Errorf("cannot retrieve IPC namespace path for Pod %q: %w", options.Pod, err)
+ }
+ if err := g.AddOrReplaceLinuxNamespace(string(spec.IPCNamespace), nsPath); err != nil {
+ return nil, 0, err
+ }
+ }
+
+ if c.config.NetNsCtr != "" {
+ nsPath, err := infraContainer.namespacePath(NetNS)
+ if err != nil {
+ return nil, 0, fmt.Errorf("cannot retrieve network namespace path for Pod %q: %w", options.Pod, err)
+ }
+ if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), nsPath); err != nil {
+ return nil, 0, err
+ }
+ }
+
+ if c.config.PIDNsCtr != "" {
+ nsPath, err := infraContainer.namespacePath(PIDNS)
+ if err != nil {
+ return nil, 0, fmt.Errorf("cannot retrieve PID namespace path for Pod %q: %w", options.Pod, err)
+ }
+ if err := g.AddOrReplaceLinuxNamespace(string(spec.PIDNamespace), nsPath); err != nil {
+ return nil, 0, err
+ }
+ }
+
+ if c.config.UTSNsCtr != "" {
+ nsPath, err := infraContainer.namespacePath(UTSNS)
+ if err != nil {
+ return nil, 0, fmt.Errorf("cannot retrieve UTS namespace path for Pod %q: %w", options.Pod, err)
+ }
+ if err := g.AddOrReplaceLinuxNamespace(string(spec.UTSNamespace), nsPath); err != nil {
+ return nil, 0, err
+ }
+ }
+
+ if c.config.CgroupNsCtr != "" {
+ nsPath, err := infraContainer.namespacePath(CgroupNS)
+ if err != nil {
+ return nil, 0, fmt.Errorf("cannot retrieve Cgroup namespace path for Pod %q: %w", options.Pod, err)
+ }
+ if err := g.AddOrReplaceLinuxNamespace(string(spec.CgroupNamespace), nsPath); err != nil {
+ return nil, 0, err
+ }
+ }
+ }
+
+ if err := c.makeBindMounts(); err != nil {
+ return nil, 0, err
+ }
+
+ if options.TargetFile != "" || options.CheckpointImageID != "" {
+ for dstPath, srcPath := range c.state.BindMounts {
+ newMount := spec.Mount{
+ Type: "bind",
+ Source: srcPath,
+ Destination: dstPath,
+ Options: []string{"bind", "private"},
+ }
+ if c.IsReadOnly() && dstPath != "/dev/shm" {
+ newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev")
+ }
+ if dstPath == "/dev/shm" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
+ newMount.Options = append(newMount.Options, "nosuid", "noexec", "nodev")
+ }
+ if !MountExists(g.Mounts(), dstPath) {
+ g.AddMount(newMount)
+ }
+ }
+ }
+
+ // Restore /dev/shm content
+ if c.config.ShmDir != "" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
+ shmDirTarFileFullPath := filepath.Join(c.bundlePath(), metadata.DevShmCheckpointTar)
+ if _, err := os.Stat(shmDirTarFileFullPath); err != nil {
+ logrus.Debug("Container checkpoint doesn't contain dev/shm: ", err.Error())
+ } else {
+ shmDirTarFile, err := os.Open(shmDirTarFileFullPath)
+ if err != nil {
+ return nil, 0, err
+ }
+ defer shmDirTarFile.Close()
+
+ if err := archive.UntarUncompressed(shmDirTarFile, c.config.ShmDir, nil); err != nil {
+ return nil, 0, err
+ }
+ }
+ }
+
+ // Cleanup for a working restore.
+ if err := c.removeConmonFiles(); err != nil {
+ return nil, 0, err
+ }
+
+ // Save the OCI spec to disk
+ if err := c.saveSpec(g.Config); err != nil {
+ return nil, 0, err
+ }
+
+ // When restoring from an imported archive, allow restoring the content of volumes.
+ // Volumes are created in setupContainer()
+ if !options.IgnoreVolumes && (options.TargetFile != "" || options.CheckpointImageID != "") {
+ for _, v := range c.config.NamedVolumes {
+ volumeFilePath := filepath.Join(c.bundlePath(), metadata.CheckpointVolumesDirectory, v.Name+".tar")
+
+ volumeFile, err := os.Open(volumeFilePath)
+ if err != nil {
+ return nil, 0, fmt.Errorf("failed to open volume file %s: %w", volumeFilePath, err)
+ }
+ defer volumeFile.Close()
+
+ volume, err := c.runtime.GetVolume(v.Name)
+ if err != nil {
+ return nil, 0, fmt.Errorf("failed to retrieve volume %s: %w", v.Name, err)
+ }
+
+ mountPoint, err := volume.MountPoint()
+ if err != nil {
+ return nil, 0, err
+ }
+ if mountPoint == "" {
+ return nil, 0, fmt.Errorf("unable to import volume %s as it is not mounted: %w", volume.Name(), err)
+ }
+ if err := archive.UntarUncompressed(volumeFile, mountPoint, nil); err != nil {
+ return nil, 0, fmt.Errorf("failed to extract volume %s to %s: %w", volumeFilePath, mountPoint, err)
+ }
+ }
+ }
+
+ // Before actually restarting the container, apply the root file-system changes
+ if !options.IgnoreRootfs {
+ if err := crutils.CRApplyRootFsDiffTar(c.bundlePath(), c.state.Mountpoint); err != nil {
+ return nil, 0, err
+ }
+
+ if err := crutils.CRRemoveDeletedFiles(c.ID(), c.bundlePath(), c.state.Mountpoint); err != nil {
+ return nil, 0, err
+ }
+ }
+
+ runtimeRestoreDuration, err = c.ociRuntime.CreateContainer(c, &options)
+ if err != nil {
+ return nil, 0, err
+ }
+
+ criuStatistics, err = func() (*define.CRIUCheckpointRestoreStatistics, error) {
+ if !options.PrintStats {
+ return nil, nil
+ }
+ statsDirectory, err := os.Open(c.bundlePath())
+ if err != nil {
+ return nil, fmt.Errorf("not able to open %q: %w", c.bundlePath(), err)
+ }
+
+ restoreStatistics, err := stats.CriuGetRestoreStats(statsDirectory)
+ if err != nil {
+ return nil, fmt.Errorf("displaying restore statistics not possible: %w", err)
+ }
+
+ return &define.CRIUCheckpointRestoreStatistics{
+ PagesCompared: restoreStatistics.GetPagesCompared(),
+ PagesSkippedCow: restoreStatistics.GetPagesSkippedCow(),
+ ForkingTime: restoreStatistics.GetForkingTime(),
+ RestoreTime: restoreStatistics.GetRestoreTime(),
+ PagesRestored: restoreStatistics.GetPagesRestored(),
+ }, nil
+ }()
+ if err != nil {
+ return nil, 0, err
+ }
+
+ logrus.Debugf("Restored container %s", c.ID())
+
+ c.state.State = define.ContainerStateRunning
+ c.state.Checkpointed = false
+ c.state.Restored = true
+ c.state.CheckpointedTime = time.Time{}
+ c.state.RestoredTime = time.Now()
+
+ if !options.Keep {
+ // Delete all checkpoint related files. At this point, in theory, all files
+ // should exist. Still ignoring errors for now as the container should be
+ // restored and running. Not erroring out just because some cleanup operation
+ // failed. Starting with the checkpoint directory
+ err = os.RemoveAll(c.CheckpointPath())
+ if err != nil {
+ logrus.Debugf("Non-fatal: removal of checkpoint directory (%s) failed: %v", c.CheckpointPath(), err)
+ }
+ c.state.CheckpointPath = ""
+ err = os.RemoveAll(c.PreCheckPointPath())
+ if err != nil {
+ logrus.Debugf("Non-fatal: removal of pre-checkpoint directory (%s) failed: %v", c.PreCheckPointPath(), err)
+ }
+ err = os.RemoveAll(c.CheckpointVolumesPath())
+ if err != nil {
+ logrus.Debugf("Non-fatal: removal of checkpoint volumes directory (%s) failed: %v", c.CheckpointVolumesPath(), err)
+ }
+ cleanup := [...]string{
+ "restore.log",
+ "dump.log",
+ stats.StatsDump,
+ stats.StatsRestore,
+ metadata.DevShmCheckpointTar,
+ metadata.NetworkStatusFile,
+ metadata.RootFsDiffTar,
+ metadata.DeletedFilesFile,
+ }
+ for _, del := range cleanup {
+ file := filepath.Join(c.bundlePath(), del)
+ err = os.Remove(file)
+ if err != nil {
+ logrus.Debugf("Non-fatal: removal of checkpoint file (%s) failed: %v", file, err)
+ }
+ }
+ c.state.CheckpointLog = ""
+ c.state.RestoreLog = ""
+ }
+
+ return criuStatistics, runtimeRestoreDuration, c.save()
+}
+
+// Retrieves a container's "root" net namespace container dependency.
+func (c *Container) getRootNetNsDepCtr() (depCtr *Container, err error) {
+ containersVisited := map[string]int{c.config.ID: 1}
+ nextCtr := c.config.NetNsCtr
+ for nextCtr != "" {
+ // Make sure we aren't in a loop
+ if _, visited := containersVisited[nextCtr]; visited {
+ return nil, errors.New("loop encountered while determining net namespace container")
+ }
+ containersVisited[nextCtr] = 1
+
+ depCtr, err = c.runtime.state.Container(nextCtr)
+ if err != nil {
+ return nil, fmt.Errorf("error fetching dependency %s of container %s: %w", c.config.NetNsCtr, c.ID(), err)
+ }
+ // This should never happen without an error
+ if depCtr == nil {
+ break
+ }
+ nextCtr = depCtr.config.NetNsCtr
+ }
+
+ if depCtr == nil {
+ return nil, errors.New("unexpected error depCtr is nil without reported error from runtime state")
+ }
+ return depCtr, nil
+}
+
+// Ensure standard bind mounts are mounted into all root directories (including chroot directories)
+func (c *Container) mountIntoRootDirs(mountName string, mountPath string) error {
+ c.state.BindMounts[mountName] = mountPath
+
+ for _, chrootDir := range c.config.ChrootDirs {
+ c.state.BindMounts[filepath.Join(chrootDir, mountName)] = mountPath
+ }
+
+ return nil
+}
+
+// Make standard bind mounts to include in the container
+func (c *Container) makeBindMounts() error {
+ if err := os.Chown(c.state.RunDir, c.RootUID(), c.RootGID()); err != nil {
+ return fmt.Errorf("cannot chown run directory: %w", err)
+ }
+
+ if c.state.BindMounts == nil {
+ c.state.BindMounts = make(map[string]string)
+ }
+ netDisabled, err := c.NetworkDisabled()
+ if err != nil {
+ return err
+ }
+
+ if !netDisabled {
+ // If /etc/resolv.conf and /etc/hosts exist, delete them so we
+ // will recreate. Only do this if we aren't sharing them with
+ // another container.
+ if c.config.NetNsCtr == "" {
+ if resolvePath, ok := c.state.BindMounts["/etc/resolv.conf"]; ok {
+ if err := os.Remove(resolvePath); err != nil && !os.IsNotExist(err) {
+ return fmt.Errorf("container %s: %w", c.ID(), err)
+ }
+ delete(c.state.BindMounts, "/etc/resolv.conf")
+ }
+ if hostsPath, ok := c.state.BindMounts["/etc/hosts"]; ok {
+ if err := os.Remove(hostsPath); err != nil && !os.IsNotExist(err) {
+ return fmt.Errorf("container %s: %w", c.ID(), err)
+ }
+ delete(c.state.BindMounts, "/etc/hosts")
+ }
+ }
+
+ if c.config.NetNsCtr != "" && (!c.config.UseImageResolvConf || !c.config.UseImageHosts) {
+ // We share a net namespace.
+ // We want /etc/resolv.conf and /etc/hosts from the
+ // other container. Unless we're not creating both of
+ // them.
+ depCtr, err := c.getRootNetNsDepCtr()
+ if err != nil {
+ return fmt.Errorf("error fetching network namespace dependency container for container %s: %w", c.ID(), err)
+ }
+
+ // We need that container's bind mounts
+ bindMounts, err := depCtr.BindMounts()
+ if err != nil {
+ return fmt.Errorf("error fetching bind mounts from dependency %s of container %s: %w", depCtr.ID(), c.ID(), err)
+ }
+
+ // The other container may not have a resolv.conf or /etc/hosts
+ // If it doesn't, don't copy them
+ resolvPath, exists := bindMounts["/etc/resolv.conf"]
+ if !c.config.UseImageResolvConf && exists {
+ err := c.mountIntoRootDirs("/etc/resolv.conf", resolvPath)
+
+ if err != nil {
+ return fmt.Errorf("error assigning mounts to container %s: %w", c.ID(), err)
+ }
+ }
+
+ // check if dependency container has an /etc/hosts file.
+ // It may not have one, so only use it if it does.
+ hostsPath, exists := bindMounts[config.DefaultHostsFile]
+ if !c.config.UseImageHosts && exists {
+ // we cannot use the dependency container lock due ABBA deadlocks in cleanup()
+ lock, err := lockfile.GetLockfile(hostsPath)
+ if err != nil {
+ return fmt.Errorf("failed to lock hosts file: %w", err)
+ }
+ lock.Lock()
+
+ // add the newly added container to the hosts file
+ // we always use 127.0.0.1 as ip since they have the same netns
+ err = etchosts.Add(hostsPath, getLocalhostHostEntry(c))
+ lock.Unlock()
+ if err != nil {
+ return fmt.Errorf("error creating hosts file for container %s which depends on container %s: %w", c.ID(), depCtr.ID(), err)
+ }
+
+ // finally, save it in the new container
+ err = c.mountIntoRootDirs(config.DefaultHostsFile, hostsPath)
+ if err != nil {
+ return fmt.Errorf("error assigning mounts to container %s: %w", c.ID(), err)
+ }
+ }
+
+ if !hasCurrentUserMapped(c) {
+ if err := makeAccessible(resolvPath, c.RootUID(), c.RootGID()); err != nil {
+ return err
+ }
+ if err := makeAccessible(hostsPath, c.RootUID(), c.RootGID()); err != nil {
+ return err
+ }
+ }
+ } else {
+ if !c.config.UseImageResolvConf {
+ if err := c.generateResolvConf(); err != nil {
+ return fmt.Errorf("error creating resolv.conf for container %s: %w", c.ID(), err)
+ }
+ }
+
+ if !c.config.UseImageHosts {
+ if err := c.createHosts(); err != nil {
+ return fmt.Errorf("error creating hosts file for container %s: %w", c.ID(), err)
+ }
+ }
+ }
+
+ if c.state.BindMounts["/etc/hosts"] != "" {
+ if err := c.relabel(c.state.BindMounts["/etc/hosts"], c.config.MountLabel, true); err != nil {
+ return err
+ }
+ }
+
+ if c.state.BindMounts["/etc/resolv.conf"] != "" {
+ if err := c.relabel(c.state.BindMounts["/etc/resolv.conf"], c.config.MountLabel, true); err != nil {
+ return err
+ }
+ }
+ } else if !c.config.UseImageHosts && c.state.BindMounts["/etc/hosts"] == "" {
+ if err := c.createHosts(); err != nil {
+ return fmt.Errorf("error creating hosts file for container %s: %w", c.ID(), err)
+ }
+ }
+
+ if c.config.ShmDir != "" {
+ // If ShmDir has a value SHM is always added when we mount the container
+ c.state.BindMounts["/dev/shm"] = c.config.ShmDir
+ }
+
+ if c.config.Passwd == nil || *c.config.Passwd {
+ newPasswd, newGroup, err := c.generatePasswdAndGroup()
+ if err != nil {
+ return fmt.Errorf("error creating temporary passwd file for container %s: %w", c.ID(), err)
+ }
+ if newPasswd != "" {
+ // Make /etc/passwd
+ // If it already exists, delete so we can recreate
+ delete(c.state.BindMounts, "/etc/passwd")
+ c.state.BindMounts["/etc/passwd"] = newPasswd
+ }
+ if newGroup != "" {
+ // Make /etc/group
+ // If it already exists, delete so we can recreate
+ delete(c.state.BindMounts, "/etc/group")
+ c.state.BindMounts["/etc/group"] = newGroup
+ }
+ }
+
+ // Make /etc/hostname
+ // This should never change, so no need to recreate if it exists
+ if _, ok := c.state.BindMounts["/etc/hostname"]; !ok {
+ hostnamePath, err := c.writeStringToRundir("hostname", c.Hostname())
+ if err != nil {
+ return fmt.Errorf("error creating hostname file for container %s: %w", c.ID(), err)
+ }
+ c.state.BindMounts["/etc/hostname"] = hostnamePath
+ }
+
+ // Make /etc/localtime
+ ctrTimezone := c.Timezone()
+ if ctrTimezone != "" {
+ // validate the format of the timezone specified if it's not "local"
+ if ctrTimezone != "local" {
+ _, err = time.LoadLocation(ctrTimezone)
+ if err != nil {
+ return fmt.Errorf("error finding timezone for container %s: %w", c.ID(), err)
+ }
+ }
+ if _, ok := c.state.BindMounts["/etc/localtime"]; !ok {
+ var zonePath string
+ if ctrTimezone == "local" {
+ zonePath, err = filepath.EvalSymlinks("/etc/localtime")
+ if err != nil {
+ return fmt.Errorf("error finding local timezone for container %s: %w", c.ID(), err)
+ }
+ } else {
+ zone := filepath.Join("/usr/share/zoneinfo", ctrTimezone)
+ zonePath, err = filepath.EvalSymlinks(zone)
+ if err != nil {
+ return fmt.Errorf("error setting timezone for container %s: %w", c.ID(), err)
+ }
+ }
+ localtimePath, err := c.copyTimezoneFile(zonePath)
+ if err != nil {
+ return fmt.Errorf("error setting timezone for container %s: %w", c.ID(), err)
+ }
+ c.state.BindMounts["/etc/localtime"] = localtimePath
+ }
+ }
+
+ _, hasRunContainerenv := c.state.BindMounts["/run/.containerenv"]
+ if !hasRunContainerenv {
+ // check in the spec mounts
+ for _, m := range c.config.Spec.Mounts {
+ if m.Destination == "/run/.containerenv" || m.Destination == "/run" {
+ hasRunContainerenv = true
+ break
+ }
+ }
+ }
+
+ // Make .containerenv if it does not exist
+ if !hasRunContainerenv {
+ containerenv := c.runtime.graphRootMountedFlag(c.config.Spec.Mounts)
+ isRootless := 0
+ if rootless.IsRootless() {
+ isRootless = 1
+ }
+ imageID, imageName := c.Image()
+
+ if c.Privileged() {
+ // Populate the .containerenv with container information
+ containerenv = fmt.Sprintf(`engine="podman-%s"
+name=%q
+id=%q
+image=%q
+imageid=%q
+rootless=%d
+%s`, version.Version.String(), c.Name(), c.ID(), imageName, imageID, isRootless, containerenv)
+ }
+ containerenvPath, err := c.writeStringToRundir(".containerenv", containerenv)
+ if err != nil {
+ return fmt.Errorf("error creating containerenv file for container %s: %w", c.ID(), err)
+ }
+ c.state.BindMounts["/run/.containerenv"] = containerenvPath
+ }
+
+ // Add Subscription Mounts
+ subscriptionMounts := subscriptions.MountsWithUIDGID(c.config.MountLabel, c.state.RunDir, c.runtime.config.Containers.DefaultMountsFile, c.state.Mountpoint, c.RootUID(), c.RootGID(), rootless.IsRootless(), false)
+ for _, mount := range subscriptionMounts {
+ if _, ok := c.state.BindMounts[mount.Destination]; !ok {
+ c.state.BindMounts[mount.Destination] = mount.Source
+ }
+ }
+
+ // Secrets are mounted by getting the secret data from the secrets manager,
+ // copying the data into the container's static dir,
+ // then mounting the copied dir into /run/secrets.
+ // The secrets mounting must come after subscription mounts, since subscription mounts
+ // creates the /run/secrets dir in the container where we mount as well.
+ if len(c.Secrets()) > 0 {
+ // create /run/secrets if subscriptions did not create
+ if err := c.createSecretMountDir(); err != nil {
+ return fmt.Errorf("error creating secrets mount: %w", err)
+ }
+ for _, secret := range c.Secrets() {
+ secretFileName := secret.Name
+ base := "/run/secrets"
+ if secret.Target != "" {
+ secretFileName = secret.Target
+ // If absolute path for target given remove base.
+ if filepath.IsAbs(secretFileName) {
+ base = ""
+ }
+ }
+ src := filepath.Join(c.config.SecretsPath, secret.Name)
+ dest := filepath.Join(base, secretFileName)
+ c.state.BindMounts[dest] = src
+ }
+ }
+
+ return nil
+}
+
+// generateResolvConf generates a containers resolv.conf
+func (c *Container) generateResolvConf() error {
+ var (
+ networkNameServers []string
+ networkSearchDomains []string
+ )
+
+ netStatus := c.getNetworkStatus()
+ for _, status := range netStatus {
+ if status.DNSServerIPs != nil {
+ for _, nsIP := range status.DNSServerIPs {
+ networkNameServers = append(networkNameServers, nsIP.String())
+ }
+ logrus.Debugf("Adding nameserver(s) from network status of '%q'", status.DNSServerIPs)
+ }
+ if status.DNSSearchDomains != nil {
+ networkSearchDomains = append(networkSearchDomains, status.DNSSearchDomains...)
+ logrus.Debugf("Adding search domain(s) from network status of '%q'", status.DNSSearchDomains)
+ }
+ }
+
+ ipv6, err := c.checkForIPv6(netStatus)
+ if err != nil {
+ return err
+ }
+
+ nameservers := make([]string, 0, len(c.runtime.config.Containers.DNSServers)+len(c.config.DNSServer))
+ nameservers = append(nameservers, c.runtime.config.Containers.DNSServers...)
+ for _, ip := range c.config.DNSServer {
+ nameservers = append(nameservers, ip.String())
+ }
+ // If the user provided dns, it trumps all; then dns masq; then resolv.conf
+ var search []string
+ keepHostServers := false
+ if len(nameservers) == 0 {
+ keepHostServers = true
+ // first add the nameservers from the networks status
+ nameservers = networkNameServers
+ // when we add network dns server we also have to add the search domains
+ search = networkSearchDomains
+ // slirp4netns has a built in DNS forwarder.
+ nameservers = c.addSlirp4netnsDNS(nameservers)
+ }
+
+ if len(c.config.DNSSearch) > 0 || len(c.runtime.config.Containers.DNSSearches) > 0 {
+ customSearch := make([]string, 0, len(c.config.DNSSearch)+len(c.runtime.config.Containers.DNSSearches))
+ customSearch = append(customSearch, c.runtime.config.Containers.DNSSearches...)
+ customSearch = append(customSearch, c.config.DNSSearch...)
+ search = customSearch
+ }
+
+ options := make([]string, 0, len(c.config.DNSOption)+len(c.runtime.config.Containers.DNSOptions))
+ options = append(options, c.runtime.config.Containers.DNSOptions...)
+ options = append(options, c.config.DNSOption...)
+
+ destPath := filepath.Join(c.state.RunDir, "resolv.conf")
+
+ if err := resolvconf.New(&resolvconf.Params{
+ IPv6Enabled: ipv6,
+ KeepHostServers: keepHostServers,
+ Nameservers: nameservers,
+ Namespaces: c.config.Spec.Linux.Namespaces,
+ Options: options,
+ Path: destPath,
+ Searches: search,
+ }); err != nil {
+ return fmt.Errorf("error building resolv.conf for container %s: %w", c.ID(), err)
+ }
+
+ return c.bindMountRootFile(destPath, resolvconf.DefaultResolvConf)
+}
+
+// Check if a container uses IPv6.
+func (c *Container) checkForIPv6(netStatus map[string]types.StatusBlock) (bool, error) {
+ for _, status := range netStatus {
+ for _, netInt := range status.Interfaces {
+ for _, netAddress := range netInt.Subnets {
+ // Note: only using To16() does not work since it also returns a valid ip for ipv4
+ if netAddress.IPNet.IP.To4() == nil && netAddress.IPNet.IP.To16() != nil {
+ return true, nil
+ }
+ }
+ }
+ }
+
+ return c.isSlirp4netnsIPv6()
+}
+
+// Add a new nameserver to the container's resolv.conf, ensuring that it is the
+// first nameserver present.
+// Usable only with running containers.
+func (c *Container) addNameserver(ips []string) error {
+ // Take no action if container is not running.
+ if !c.ensureState(define.ContainerStateRunning, define.ContainerStateCreated) {
+ return nil
+ }
+
+ // Do we have a resolv.conf at all?
+ path, ok := c.state.BindMounts[resolvconf.DefaultResolvConf]
+ if !ok {
+ return nil
+ }
+
+ if err := resolvconf.Add(path, ips); err != nil {
+ return fmt.Errorf("adding new nameserver to container %s resolv.conf: %w", c.ID(), err)
+ }
+
+ return nil
+}
+
+// Remove an entry from the existing resolv.conf of the container.
+// Usable only with running containers.
+func (c *Container) removeNameserver(ips []string) error {
+ // Take no action if container is not running.
+ if !c.ensureState(define.ContainerStateRunning, define.ContainerStateCreated) {
+ return nil
+ }
+
+ // Do we have a resolv.conf at all?
+ path, ok := c.state.BindMounts[resolvconf.DefaultResolvConf]
+ if !ok {
+ return nil
+ }
+
+ if err := resolvconf.Remove(path, ips); err != nil {
+ return fmt.Errorf("removing nameservers from container %s resolv.conf: %w", c.ID(), err)
+ }
+
+ return nil
+}
+
+func getLocalhostHostEntry(c *Container) etchosts.HostEntries {
+ return etchosts.HostEntries{{IP: "127.0.0.1", Names: []string{c.Hostname(), c.config.Name}}}
+}
+
+// getHostsEntries returns the container ip host entries for the correct netmode
+func (c *Container) getHostsEntries() (etchosts.HostEntries, error) {
+ var entries etchosts.HostEntries
+ names := []string{c.Hostname(), c.config.Name}
+ switch {
+ case c.config.NetMode.IsBridge():
+ entries = etchosts.GetNetworkHostEntries(c.state.NetworkStatus, names...)
+ case c.config.NetMode.IsSlirp4netns():
+ ip, err := GetSlirp4netnsIP(c.slirp4netnsSubnet)
+ if err != nil {
+ return nil, err
+ }
+ entries = etchosts.HostEntries{{IP: ip.String(), Names: names}}
+ default:
+ if c.hasNetNone() {
+ entries = etchosts.HostEntries{{IP: "127.0.0.1", Names: names}}
+ }
+ }
+ return entries, nil
+}
+
+func (c *Container) createHosts() error {
+ var containerIPsEntries etchosts.HostEntries
+ var err error
+ // if we configure the netns after the container create we should not add
+ // the hosts here since we have no information about the actual ips
+ // instead we will add them in c.completeNetworkSetup()
+ if !c.config.PostConfigureNetNS {
+ containerIPsEntries, err = c.getHostsEntries()
+ if err != nil {
+ return fmt.Errorf("failed to get container ip host entries: %w", err)
+ }
+ }
+ baseHostFile, err := etchosts.GetBaseHostFile(c.runtime.config.Containers.BaseHostsFile, c.state.Mountpoint)
+ if err != nil {
+ return err
+ }
+
+ targetFile := filepath.Join(c.state.RunDir, "hosts")
+ err = etchosts.New(&etchosts.Params{
+ BaseFile: baseHostFile,
+ ExtraHosts: c.config.HostAdd,
+ ContainerIPs: containerIPsEntries,
+ HostContainersInternalIP: etchosts.GetHostContainersInternalIP(c.runtime.config, c.state.NetworkStatus, c.runtime.network),
+ TargetFile: targetFile,
+ })
+ if err != nil {
+ return err
+ }
+
+ return c.bindMountRootFile(targetFile, config.DefaultHostsFile)
+}
+
+// bindMountRootFile will chown and relabel the source file to make it usable in the container.
+// It will also add the path to the container bind mount map.
+// source is the path on the host, dest is the path in the container.
+func (c *Container) bindMountRootFile(source, dest string) error {
+ if err := os.Chown(source, c.RootUID(), c.RootGID()); err != nil {
+ return err
+ }
+ if err := label.Relabel(source, c.MountLabel(), false); err != nil {
+ return err
+ }
+
+ return c.mountIntoRootDirs(dest, source)
+}
+
+// generateGroupEntry generates an entry or entries into /etc/group as
+// required by container configuration.
+// Generally speaking, we will make an entry under two circumstances:
+// 1. The container is started as a specific user:group, and that group is both
+// numeric, and does not already exist in /etc/group.
+// 2. It is requested that Libpod add the group that launched Podman to
+// /etc/group via AddCurrentUserPasswdEntry (though this does not trigger if
+// the group in question already exists in /etc/passwd).
+//
+// Returns group entry (as a string that can be appended to /etc/group) and any
+// error that occurred.
+func (c *Container) generateGroupEntry() (string, error) {
+ groupString := ""
+
+ // Things we *can't* handle: adding the user we added in
+ // generatePasswdEntry to any *existing* groups.
+ addedGID := 0
+ if c.config.AddCurrentUserPasswdEntry {
+ entry, gid, err := c.generateCurrentUserGroupEntry()
+ if err != nil {
+ return "", err
+ }
+ groupString += entry
+ addedGID = gid
+ }
+ if c.config.User != "" {
+ entry, err := c.generateUserGroupEntry(addedGID)
+ if err != nil {
+ return "", err
+ }
+ groupString += entry
+ }
+
+ return groupString, nil
+}
+
+// Make an entry in /etc/group for the group of the user running podman iff we
+// are rootless.
+func (c *Container) generateCurrentUserGroupEntry() (string, int, error) {
+ gid := rootless.GetRootlessGID()
+ if gid == 0 {
+ return "", 0, nil
+ }
+
+ g, err := user.LookupGroupId(strconv.Itoa(gid))
+ if err != nil {
+ return "", 0, fmt.Errorf("failed to get current group: %w", err)
+ }
+
+ // Look up group name to see if it exists in the image.
+ _, err = lookup.GetGroup(c.state.Mountpoint, g.Name)
+ if err != runcuser.ErrNoGroupEntries {
+ return "", 0, err
+ }
+
+ // Look up GID to see if it exists in the image.
+ _, err = lookup.GetGroup(c.state.Mountpoint, g.Gid)
+ if err != runcuser.ErrNoGroupEntries {
+ return "", 0, err
+ }
+
+ // We need to get the username of the rootless user so we can add it to
+ // the group.
+ username := ""
+ uid := rootless.GetRootlessUID()
+ if uid != 0 {
+ u, err := user.LookupId(strconv.Itoa(uid))
+ if err != nil {
+ return "", 0, fmt.Errorf("failed to get current user to make group entry: %w", err)
+ }
+ username = u.Username
+ }
+
+ // Make the entry.
+ return fmt.Sprintf("%s:x:%s:%s\n", g.Name, g.Gid, username), gid, nil
+}
+
+// Make an entry in /etc/group for the group the container was specified to run
+// as.
+func (c *Container) generateUserGroupEntry(addedGID int) (string, error) {
+ if c.config.User == "" {
+ return "", nil
+ }
+
+ splitUser := strings.SplitN(c.config.User, ":", 2)
+ group := splitUser[0]
+ if len(splitUser) > 1 {
+ group = splitUser[1]
+ }
+
+ gid, err := strconv.ParseUint(group, 10, 32)
+ if err != nil {
+ return "", nil //nolint: nilerr
+ }
+
+ if addedGID != 0 && addedGID == int(gid) {
+ return "", nil
+ }
+
+ // Check if the group already exists
+ _, err = lookup.GetGroup(c.state.Mountpoint, group)
+ if err != runcuser.ErrNoGroupEntries {
+ return "", err
+ }
+
+ return fmt.Sprintf("%d:x:%d:%s\n", gid, gid, splitUser[0]), nil
+}
+
+// generatePasswdEntry generates an entry or entries into /etc/passwd as
+// required by container configuration.
+// Generally speaking, we will make an entry under two circumstances:
+// 1. The container is started as a specific user who is not in /etc/passwd.
+// This only triggers if the user is given as a *numeric* ID.
+// 2. It is requested that Libpod add the user that launched Podman to
+// /etc/passwd via AddCurrentUserPasswdEntry (though this does not trigger if
+// the user in question already exists in /etc/passwd) or the UID to be added
+// is 0).
+// 3. The user specified additional host user accounts to add the the /etc/passwd file
+//
+// Returns password entry (as a string that can be appended to /etc/passwd) and
+// any error that occurred.
+func (c *Container) generatePasswdEntry() (string, error) {
+ passwdString := ""
+
+ addedUID := 0
+ for _, userid := range c.config.HostUsers {
+ // Look up User on host
+ u, err := util.LookupUser(userid)
+ if err != nil {
+ return "", err
+ }
+ entry, err := c.userPasswdEntry(u)
+ if err != nil {
+ return "", err
+ }
+ passwdString += entry
+ }
+ if c.config.AddCurrentUserPasswdEntry {
+ entry, uid, _, err := c.generateCurrentUserPasswdEntry()
+ if err != nil {
+ return "", err
+ }
+ passwdString += entry
+ addedUID = uid
+ }
+ if c.config.User != "" {
+ entry, err := c.generateUserPasswdEntry(addedUID)
+ if err != nil {
+ return "", err
+ }
+ passwdString += entry
+ }
+
+ return passwdString, nil
+}
+
+// generateCurrentUserPasswdEntry generates an /etc/passwd entry for the user
+// running the container engine.
+// Returns a passwd entry for the user, and the UID and GID of the added entry.
+func (c *Container) generateCurrentUserPasswdEntry() (string, int, int, error) {
+ uid := rootless.GetRootlessUID()
+ if uid == 0 {
+ return "", 0, 0, nil
+ }
+
+ u, err := user.LookupId(strconv.Itoa(uid))
+ if err != nil {
+ return "", 0, 0, fmt.Errorf("failed to get current user: %w", err)
+ }
+ pwd, err := c.userPasswdEntry(u)
+ if err != nil {
+ return "", 0, 0, err
+ }
+
+ return pwd, uid, rootless.GetRootlessGID(), nil
+}
+
+func (c *Container) userPasswdEntry(u *user.User) (string, error) {
+ // Look up the user to see if it exists in the container image.
+ _, err := lookup.GetUser(c.state.Mountpoint, u.Username)
+ if err != runcuser.ErrNoPasswdEntries {
+ return "", err
+ }
+
+ // Look up the UID to see if it exists in the container image.
+ _, err = lookup.GetUser(c.state.Mountpoint, u.Uid)
+ if err != runcuser.ErrNoPasswdEntries {
+ return "", err
+ }
+
+ // If the user's actual home directory exists, or was mounted in - use
+ // that.
+ homeDir := c.WorkingDir()
+ hDir := u.HomeDir
+ for hDir != "/" {
+ if MountExists(c.config.Spec.Mounts, hDir) {
+ homeDir = u.HomeDir
+ break
+ }
+ hDir = filepath.Dir(hDir)
+ }
+ if homeDir != u.HomeDir {
+ for _, hDir := range c.UserVolumes() {
+ if hDir == u.HomeDir {
+ homeDir = u.HomeDir
+ break
+ }
+ }
+ }
+ // Set HOME environment if not already set
+ hasHomeSet := false
+ for _, s := range c.config.Spec.Process.Env {
+ if strings.HasPrefix(s, "HOME=") {
+ hasHomeSet = true
+ break
+ }
+ }
+ if !hasHomeSet {
+ c.config.Spec.Process.Env = append(c.config.Spec.Process.Env, fmt.Sprintf("HOME=%s", homeDir))
+ }
+ if c.config.PasswdEntry != "" {
+ return c.passwdEntry(u.Username, u.Uid, u.Gid, u.Name, homeDir), nil
+ }
+
+ return fmt.Sprintf("%s:*:%s:%s:%s:%s:/bin/sh\n", u.Username, u.Uid, u.Gid, u.Name, homeDir), nil
+}
+
+// generateUserPasswdEntry generates an /etc/passwd entry for the container user
+// to run in the container.
+// The UID and GID of the added entry will also be returned.
+// Accepts one argument, that being any UID that has already been added to the
+// passwd file by other functions; if it matches the UID we were given, we don't
+// need to do anything.
+func (c *Container) generateUserPasswdEntry(addedUID int) (string, error) {
+ var (
+ groupspec string
+ gid int
+ )
+ if c.config.User == "" {
+ return "", nil
+ }
+ splitSpec := strings.SplitN(c.config.User, ":", 2)
+ userspec := splitSpec[0]
+ if len(splitSpec) > 1 {
+ groupspec = splitSpec[1]
+ }
+ // If a non numeric User, then don't generate passwd
+ uid, err := strconv.ParseUint(userspec, 10, 32)
+ if err != nil {
+ return "", nil //nolint: nilerr
+ }
+
+ if addedUID != 0 && int(uid) == addedUID {
+ return "", nil
+ }
+
+ // Look up the user to see if it exists in the container image
+ _, err = lookup.GetUser(c.state.Mountpoint, userspec)
+ if err != runcuser.ErrNoPasswdEntries {
+ return "", err
+ }
+
+ if groupspec != "" {
+ ugid, err := strconv.ParseUint(groupspec, 10, 32)
+ if err == nil {
+ gid = int(ugid)
+ } else {
+ group, err := lookup.GetGroup(c.state.Mountpoint, groupspec)
+ if err != nil {
+ return "", fmt.Errorf("unable to get gid %s from group file: %w", groupspec, err)
+ }
+ gid = group.Gid
+ }
+ }
+
+ if c.config.PasswdEntry != "" {
+ entry := c.passwdEntry(fmt.Sprintf("%d", uid), fmt.Sprintf("%d", uid), fmt.Sprintf("%d", gid), "container user", c.WorkingDir())
+ return entry, nil
+ }
+
+ return fmt.Sprintf("%d:*:%d:%d:container user:%s:/bin/sh\n", uid, uid, gid, c.WorkingDir()), nil
+}
+
+func (c *Container) passwdEntry(username string, uid, gid, name, homeDir string) string {
+ s := c.config.PasswdEntry
+ s = strings.ReplaceAll(s, "$USERNAME", username)
+ s = strings.ReplaceAll(s, "$UID", uid)
+ s = strings.ReplaceAll(s, "$GID", gid)
+ s = strings.ReplaceAll(s, "$NAME", name)
+ s = strings.ReplaceAll(s, "$HOME", homeDir)
+ return s + "\n"
+}
+
+// generatePasswdAndGroup generates container-specific passwd and group files
+// iff g.config.User is a number or we are configured to make a passwd entry for
+// the current user or the user specified HostsUsers
+// Returns path to file to mount at /etc/passwd, path to file to mount at
+// /etc/group, and any error that occurred. If no passwd/group file were
+// required, the empty string will be returned for those path (this may occur
+// even if no error happened).
+// This may modify the mounted container's /etc/passwd and /etc/group instead of
+// making copies to bind-mount in, so we don't break useradd (it wants to make a
+// copy of /etc/passwd and rename the copy to /etc/passwd, which is impossible
+// with a bind mount). This is done in cases where the container is *not*
+// read-only. In this case, the function will return nothing ("", "", nil).
+func (c *Container) generatePasswdAndGroup() (string, string, error) {
+ if !c.config.AddCurrentUserPasswdEntry && c.config.User == "" &&
+ len(c.config.HostUsers) == 0 {
+ return "", "", nil
+ }
+
+ needPasswd := true
+ needGroup := true
+
+ // First, check if there's a mount at /etc/passwd or group, we don't
+ // want to interfere with user mounts.
+ if MountExists(c.config.Spec.Mounts, "/etc/passwd") {
+ needPasswd = false
+ }
+ if MountExists(c.config.Spec.Mounts, "/etc/group") {
+ needGroup = false
+ }
+
+ // Next, check if we already made the files. If we didn't, don't need to
+ // do anything more.
+ if needPasswd {
+ passwdPath := filepath.Join(c.config.StaticDir, "passwd")
+ if _, err := os.Stat(passwdPath); err == nil {
+ needPasswd = false
+ }
+ }
+ if needGroup {
+ groupPath := filepath.Join(c.config.StaticDir, "group")
+ if _, err := os.Stat(groupPath); err == nil {
+ needGroup = false
+ }
+ }
+
+ // If we don't need a /etc/passwd or /etc/group at this point we can
+ // just return.
+ if !needPasswd && !needGroup {
+ return "", "", nil
+ }
+
+ passwdPath := ""
+ groupPath := ""
+
+ ro := c.IsReadOnly()
+
+ if needPasswd {
+ passwdEntry, err := c.generatePasswdEntry()
+ if err != nil {
+ return "", "", err
+ }
+
+ needsWrite := passwdEntry != ""
+ switch {
+ case ro && needsWrite:
+ logrus.Debugf("Making /etc/passwd for container %s", c.ID())
+ originPasswdFile, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/passwd")
+ if err != nil {
+ return "", "", fmt.Errorf("error creating path to container %s /etc/passwd: %w", c.ID(), err)
+ }
+ orig, err := ioutil.ReadFile(originPasswdFile)
+ if err != nil && !os.IsNotExist(err) {
+ return "", "", err
+ }
+ passwdFile, err := c.writeStringToStaticDir("passwd", string(orig)+passwdEntry)
+ if err != nil {
+ return "", "", fmt.Errorf("failed to create temporary passwd file: %w", err)
+ }
+ if err := os.Chmod(passwdFile, 0644); err != nil {
+ return "", "", err
+ }
+ passwdPath = passwdFile
+ case !ro && needsWrite:
+ logrus.Debugf("Modifying container %s /etc/passwd", c.ID())
+ containerPasswd, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/passwd")
+ if err != nil {
+ return "", "", fmt.Errorf("error looking up location of container %s /etc/passwd: %w", c.ID(), err)
+ }
+
+ f, err := os.OpenFile(containerPasswd, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0600)
+ if err != nil {
+ return "", "", fmt.Errorf("container %s: %w", c.ID(), err)
+ }
+ defer f.Close()
+
+ if _, err := f.WriteString(passwdEntry); err != nil {
+ return "", "", fmt.Errorf("unable to append to container %s /etc/passwd: %w", c.ID(), err)
+ }
+ default:
+ logrus.Debugf("Not modifying container %s /etc/passwd", c.ID())
+ }
+ }
+ if needGroup {
+ groupEntry, err := c.generateGroupEntry()
+ if err != nil {
+ return "", "", err
+ }
+
+ needsWrite := groupEntry != ""
+ switch {
+ case ro && needsWrite:
+ logrus.Debugf("Making /etc/group for container %s", c.ID())
+ originGroupFile, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/group")
+ if err != nil {
+ return "", "", fmt.Errorf("error creating path to container %s /etc/group: %w", c.ID(), err)
+ }
+ orig, err := ioutil.ReadFile(originGroupFile)
+ if err != nil && !os.IsNotExist(err) {
+ return "", "", err
+ }
+ groupFile, err := c.writeStringToStaticDir("group", string(orig)+groupEntry)
+ if err != nil {
+ return "", "", fmt.Errorf("failed to create temporary group file: %w", err)
+ }
+ if err := os.Chmod(groupFile, 0644); err != nil {
+ return "", "", err
+ }
+ groupPath = groupFile
+ case !ro && needsWrite:
+ logrus.Debugf("Modifying container %s /etc/group", c.ID())
+ containerGroup, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/group")
+ if err != nil {
+ return "", "", fmt.Errorf("error looking up location of container %s /etc/group: %w", c.ID(), err)
+ }
+
+ f, err := os.OpenFile(containerGroup, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0600)
+ if err != nil {
+ return "", "", fmt.Errorf("container %s: %w", c.ID(), err)
+ }
+ defer f.Close()
+
+ if _, err := f.WriteString(groupEntry); err != nil {
+ return "", "", fmt.Errorf("unable to append to container %s /etc/group: %w", c.ID(), err)
+ }
+ default:
+ logrus.Debugf("Not modifying container %s /etc/group", c.ID())
+ }
+ }
+
+ return passwdPath, groupPath, nil
+}
+
+func (c *Container) copyTimezoneFile(zonePath string) (string, error) {
+ localtimeCopy := filepath.Join(c.state.RunDir, "localtime")
+ file, err := os.Stat(zonePath)
+ if err != nil {
+ return "", err
+ }
+ if file.IsDir() {
+ return "", errors.New("invalid timezone: is a directory")
+ }
+ src, err := os.Open(zonePath)
+ if err != nil {
+ return "", err
+ }
+ defer src.Close()
+ dest, err := os.Create(localtimeCopy)
+ if err != nil {
+ return "", err
+ }
+ defer dest.Close()
+ _, err = io.Copy(dest, src)
+ if err != nil {
+ return "", err
+ }
+ if err := c.relabel(localtimeCopy, c.config.MountLabel, false); err != nil {
+ return "", err
+ }
+ if err := dest.Chown(c.RootUID(), c.RootGID()); err != nil {
+ return "", err
+ }
+ return localtimeCopy, err
+}
+
+func (c *Container) cleanupOverlayMounts() error {
+ return overlay.CleanupContent(c.config.StaticDir)
+}
+
+// Creates and mounts an empty dir to mount secrets into, if it does not already exist
+func (c *Container) createSecretMountDir() error {
+ src := filepath.Join(c.state.RunDir, "/run/secrets")
+ _, err := os.Stat(src)
+ if os.IsNotExist(err) {
+ oldUmask := umask.Set(0)
+ defer umask.Set(oldUmask)
+
+ if err := os.MkdirAll(src, 0755); err != nil {
+ return err
+ }
+ if err := label.Relabel(src, c.config.MountLabel, false); err != nil {
+ return err
+ }
+ if err := os.Chown(src, c.RootUID(), c.RootGID()); err != nil {
+ return err
+ }
+ c.state.BindMounts["/run/secrets"] = src
+ return nil
+ }
+
+ return err
+}
+
+// Fix ownership and permissions of the specified volume if necessary.
+func (c *Container) fixVolumePermissions(v *ContainerNamedVolume) error {
+ vol, err := c.runtime.state.Volume(v.Name)
+ if err != nil {
+ return fmt.Errorf("error retrieving named volume %s for container %s: %w", v.Name, c.ID(), err)
+ }
+
+ vol.lock.Lock()
+ defer vol.lock.Unlock()
+
+ // The volume may need a copy-up. Check the state.
+ if err := vol.update(); err != nil {
+ return err
+ }
+
+ // Volumes owned by a volume driver are not chowned - we don't want to
+ // mess with a mount not managed by us.
+ if vol.state.NeedsChown && !vol.UsesVolumeDriver() {
+ vol.state.NeedsChown = false
+
+ uid := int(c.config.Spec.Process.User.UID)
+ gid := int(c.config.Spec.Process.User.GID)
+
+ if c.config.IDMappings.UIDMap != nil {
+ p := idtools.IDPair{
+ UID: uid,
+ GID: gid,
+ }
+ mappings := idtools.NewIDMappingsFromMaps(c.config.IDMappings.UIDMap, c.config.IDMappings.GIDMap)
+ newPair, err := mappings.ToHost(p)
+ if err != nil {
+ return fmt.Errorf("error mapping user %d:%d: %w", uid, gid, err)
+ }
+ uid = newPair.UID
+ gid = newPair.GID
+ }
+
+ vol.state.UIDChowned = uid
+ vol.state.GIDChowned = gid
+
+ if err := vol.save(); err != nil {
+ return err
+ }
+
+ mountPoint, err := vol.MountPoint()
+ if err != nil {
+ return err
+ }
+
+ if err := os.Lchown(mountPoint, uid, gid); err != nil {
+ return err
+ }
+
+ // Make sure the new volume matches the permissions of the target directory.
+ // https://github.com/containers/podman/issues/10188
+ st, err := os.Lstat(filepath.Join(c.state.Mountpoint, v.Dest))
+ if err == nil {
+ if stat, ok := st.Sys().(*syscall.Stat_t); ok {
+ if err := os.Lchown(mountPoint, int(stat.Uid), int(stat.Gid)); err != nil {
+ return err
+ }
+ }
+ if err := os.Chmod(mountPoint, st.Mode()); err != nil {
+ return err
+ }
+ if err := setVolumeAtime(mountPoint, st); err != nil {
+ return err
+ }
+ } else if !os.IsNotExist(err) {
+ return err
+ }
+ }
+ return nil
+}
+
+func (c *Container) relabel(src, mountLabel string, recurse bool) error {
+ if !selinux.GetEnabled() || mountLabel == "" {
+ return nil
+ }
+ // only relabel on initial creation of container
+ if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateUnknown) {
+ label, err := label.FileLabel(src)
+ if err != nil {
+ return err
+ }
+ // If labels are different, might be on a tmpfs
+ if label == mountLabel {
+ return nil
+ }
+ }
+ return label.Relabel(src, mountLabel, recurse)
+}
+
+func (c *Container) ChangeHostPathOwnership(src string, recurse bool, uid, gid int) error {
+ // only chown on initial creation of container
+ if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateUnknown) {
+ st, err := os.Stat(src)
+ if err != nil {
+ return err
+ }
+
+ // If labels are different, might be on a tmpfs
+ if int(st.Sys().(*syscall.Stat_t).Uid) == uid && int(st.Sys().(*syscall.Stat_t).Gid) == gid {
+ return nil
+ }
+ }
+ return chown.ChangeHostPathOwnership(src, recurse, uid, gid)
+}
diff --git a/libpod/container_internal_freebsd.go b/libpod/container_internal_freebsd.go
new file mode 100644
index 000000000..40c6c5ebf
--- /dev/null
+++ b/libpod/container_internal_freebsd.go
@@ -0,0 +1,285 @@
+//go:build freebsd
+// +build freebsd
+
+package libpod
+
+import (
+ "errors"
+ "fmt"
+ "os"
+ "strings"
+ "sync"
+ "syscall"
+ "time"
+
+ "github.com/containers/common/libnetwork/types"
+ "github.com/containers/podman/v4/pkg/rootless"
+ spec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/opencontainers/runtime-tools/generate"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/sys/unix"
+)
+
+var (
+ bindOptions = []string{}
+)
+
+// Network stubs to decouple container_internal_freebsd.go from
+// networking_freebsd.go so they can be reviewed separately.
+func (r *Runtime) createNetNS(ctr *Container) (netJail string, q map[string]types.StatusBlock, retErr error) {
+ return "", nil, errors.New("not implemented (*Runtime) createNetNS")
+}
+
+func (r *Runtime) teardownNetNS(ctr *Container) error {
+ return errors.New("not implemented (*Runtime) teardownNetNS")
+}
+
+func (r *Runtime) reloadContainerNetwork(ctr *Container) (map[string]types.StatusBlock, error) {
+ return nil, errors.New("not implemented (*Runtime) reloadContainerNetwork")
+}
+
+func (c *Container) mountSHM(shmOptions string) error {
+ return nil
+}
+
+func (c *Container) unmountSHM(path string) error {
+ return nil
+}
+
+// prepare mounts the container and sets up other required resources like net
+// namespaces
+func (c *Container) prepare() error {
+ var (
+ wg sync.WaitGroup
+ jailName string
+ networkStatus map[string]types.StatusBlock
+ createNetNSErr, mountStorageErr error
+ mountPoint string
+ tmpStateLock sync.Mutex
+ )
+
+ wg.Add(2)
+
+ go func() {
+ defer wg.Done()
+ // Set up network namespace if not already set up
+ noNetNS := c.state.NetworkJail == ""
+ if c.config.CreateNetNS && noNetNS && !c.config.PostConfigureNetNS {
+ jailName, networkStatus, createNetNSErr = c.runtime.createNetNS(c)
+ if createNetNSErr != nil {
+ return
+ }
+
+ tmpStateLock.Lock()
+ defer tmpStateLock.Unlock()
+
+ // Assign NetNS attributes to container
+ c.state.NetworkJail = jailName
+ c.state.NetworkStatus = networkStatus
+ }
+ }()
+ // Mount storage if not mounted
+ go func() {
+ defer wg.Done()
+ mountPoint, mountStorageErr = c.mountStorage()
+
+ if mountStorageErr != nil {
+ return
+ }
+
+ tmpStateLock.Lock()
+ defer tmpStateLock.Unlock()
+
+ // Finish up mountStorage
+ c.state.Mounted = true
+ c.state.Mountpoint = mountPoint
+
+ logrus.Debugf("Created root filesystem for container %s at %s", c.ID(), c.state.Mountpoint)
+ }()
+
+ wg.Wait()
+
+ var createErr error
+ if mountStorageErr != nil {
+ if createErr != nil {
+ logrus.Errorf("Preparing container %s: %v", c.ID(), createErr)
+ }
+ createErr = mountStorageErr
+ }
+
+ if createErr != nil {
+ return createErr
+ }
+
+ // Save changes to container state
+ if err := c.save(); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// cleanupNetwork unmounts and cleans up the container's network
+func (c *Container) cleanupNetwork() error {
+ if c.config.NetNsCtr != "" {
+ return nil
+ }
+ netDisabled, err := c.NetworkDisabled()
+ if err != nil {
+ return err
+ }
+ if netDisabled {
+ return nil
+ }
+
+ // Stop the container's network namespace (if it has one)
+ if err := c.runtime.teardownNetNS(c); err != nil {
+ logrus.Errorf("Unable to cleanup network for container %s: %q", c.ID(), err)
+ }
+
+ if c.valid {
+ return c.save()
+ }
+
+ return nil
+}
+
+// reloadNetwork reloads the network for the given container, recreating
+// firewall rules.
+func (c *Container) reloadNetwork() error {
+ result, err := c.runtime.reloadContainerNetwork(c)
+ if err != nil {
+ return err
+ }
+
+ c.state.NetworkStatus = result
+
+ return c.save()
+}
+
+// Add an existing container's network jail
+func (c *Container) addNetworkContainer(g *generate.Generator, ctr string) error {
+ nsCtr, err := c.runtime.state.Container(ctr)
+ c.runtime.state.UpdateContainer(nsCtr)
+ if err != nil {
+ return fmt.Errorf("error retrieving dependency %s of container %s from state: %w", ctr, c.ID(), err)
+ }
+ g.AddAnnotation("org.freebsd.parentJail", nsCtr.state.NetworkJail)
+ return nil
+}
+
+func isRootlessCgroupSet(cgroup string) bool {
+ return false
+}
+
+func (c *Container) expectPodCgroup() (bool, error) {
+ return false, nil
+}
+
+func (c *Container) getOCICgroupPath() (string, error) {
+ return "", nil
+}
+
+func openDirectory(path string) (fd int, err error) {
+ const O_PATH = 0x00400000
+ return unix.Open(path, unix.O_RDONLY|O_PATH, 0)
+}
+
+func (c *Container) addNetworkNamespace(g *generate.Generator) error {
+ if c.config.CreateNetNS {
+ g.AddAnnotation("org.freebsd.parentJail", c.state.NetworkJail)
+ }
+ return nil
+}
+
+func (c *Container) addSystemdMounts(g *generate.Generator) error {
+ return nil
+}
+
+func (c *Container) addSharedNamespaces(g *generate.Generator) error {
+ if c.config.NetNsCtr != "" {
+ if err := c.addNetworkContainer(g, c.config.NetNsCtr); err != nil {
+ return err
+ }
+ }
+
+ availableUIDs, availableGIDs, err := rootless.GetAvailableIDMaps()
+ if err != nil {
+ if os.IsNotExist(err) {
+ // The kernel-provided files only exist if user namespaces are supported
+ logrus.Debugf("User or group ID mappings not available: %s", err)
+ } else {
+ return err
+ }
+ } else {
+ g.Config.Linux.UIDMappings = rootless.MaybeSplitMappings(g.Config.Linux.UIDMappings, availableUIDs)
+ g.Config.Linux.GIDMappings = rootless.MaybeSplitMappings(g.Config.Linux.GIDMappings, availableGIDs)
+ }
+
+ // Hostname handling:
+ // If we have a UTS namespace, set Hostname in the OCI spec.
+ // Set the HOSTNAME environment variable unless explicitly overridden by
+ // the user (already present in OCI spec). If we don't have a UTS ns,
+ // set it to the host's hostname instead.
+ hostname := c.Hostname()
+ foundUTS := false
+
+ // TODO: make this optional, needs progress on adding FreeBSD section to the spec
+ foundUTS = true
+ g.SetHostname(hostname)
+
+ if !foundUTS {
+ tmpHostname, err := os.Hostname()
+ if err != nil {
+ return err
+ }
+ hostname = tmpHostname
+ }
+ needEnv := true
+ for _, checkEnv := range g.Config.Process.Env {
+ if strings.SplitN(checkEnv, "=", 2)[0] == "HOSTNAME" {
+ needEnv = false
+ break
+ }
+ }
+ if needEnv {
+ g.AddProcessEnv("HOSTNAME", hostname)
+ }
+ return nil
+}
+
+func (c *Container) addRootPropagation(g *generate.Generator, mounts []spec.Mount) error {
+ return nil
+}
+
+func (c *Container) setProcessLabel(g *generate.Generator) {
+}
+
+func (c *Container) setMountLabel(g *generate.Generator) {
+}
+
+func (c *Container) setCgroupsPath(g *generate.Generator) error {
+ return nil
+}
+
+func (c *Container) addSlirp4netnsDNS(nameservers []string) []string {
+ return nameservers
+}
+
+func (c *Container) isSlirp4netnsIPv6() (bool, error) {
+ return false, nil
+}
+
+// check for net=none
+func (c *Container) hasNetNone() bool {
+ return c.state.NetworkJail == ""
+}
+
+func setVolumeAtime(mountPoint string, st os.FileInfo) error {
+ stat := st.Sys().(*syscall.Stat_t)
+ atime := time.Unix(int64(stat.Atimespec.Sec), int64(stat.Atimespec.Nsec)) //nolint: unconvert
+ if err := os.Chtimes(mountPoint, atime, st.ModTime()); err != nil {
+ return err
+ }
+ return nil
+}
diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go
index 5c5fd471b..9b05a2d61 100644
--- a/libpod/container_internal_linux.go
+++ b/libpod/container_internal_linux.go
@@ -4,64 +4,34 @@
package libpod
import (
- "context"
"errors"
"fmt"
- "io"
- "io/ioutil"
- "math"
"os"
- "os/user"
"path"
"path/filepath"
- "strconv"
"strings"
"sync"
"syscall"
"time"
- metadata "github.com/checkpoint-restore/checkpointctl/lib"
- "github.com/checkpoint-restore/go-criu/v5/stats"
- cdi "github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/containernetworking/plugins/pkg/ns"
- "github.com/containers/buildah"
- "github.com/containers/buildah/pkg/chrootuser"
- "github.com/containers/buildah/pkg/overlay"
- butil "github.com/containers/buildah/util"
- "github.com/containers/common/libnetwork/etchosts"
- "github.com/containers/common/libnetwork/resolvconf"
"github.com/containers/common/libnetwork/types"
- "github.com/containers/common/pkg/apparmor"
"github.com/containers/common/pkg/cgroups"
- "github.com/containers/common/pkg/chown"
"github.com/containers/common/pkg/config"
- "github.com/containers/common/pkg/subscriptions"
- "github.com/containers/common/pkg/umask"
- cutil "github.com/containers/common/pkg/util"
- is "github.com/containers/image/v5/storage"
"github.com/containers/podman/v4/libpod/define"
- "github.com/containers/podman/v4/libpod/events"
- "github.com/containers/podman/v4/pkg/annotations"
- "github.com/containers/podman/v4/pkg/checkpoint/crutils"
- "github.com/containers/podman/v4/pkg/criu"
- "github.com/containers/podman/v4/pkg/lookup"
"github.com/containers/podman/v4/pkg/rootless"
- "github.com/containers/podman/v4/pkg/util"
"github.com/containers/podman/v4/utils"
- "github.com/containers/podman/v4/version"
- "github.com/containers/storage/pkg/archive"
- "github.com/containers/storage/pkg/idtools"
- "github.com/containers/storage/pkg/lockfile"
- securejoin "github.com/cyphar/filepath-securejoin"
- runcuser "github.com/opencontainers/runc/libcontainer/user"
spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/runtime-tools/generate"
- "github.com/opencontainers/selinux/go-selinux"
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
+var (
+ bindOptions = []string{"bind", "rprivate"}
+)
+
func (c *Container) mountSHM(shmOptions string) error {
if err := unix.Mount("shm", c.config.ShmDir, "tmpfs", unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV,
label.FormatMountLabel(shmOptions, c.config.MountLabel)); err != nil {
@@ -177,118 +147,6 @@ func (c *Container) prepare() error {
return nil
}
-// isWorkDirSymlink returns true if resolved workdir is symlink or a chain of symlinks,
-// and final resolved target is present either on volume, mount or inside of container
-// otherwise it returns false. Following function is meant for internal use only and
-// can change at any point of time.
-func (c *Container) isWorkDirSymlink(resolvedPath string) bool {
- // We cannot create workdir since explicit --workdir is
- // set in config but workdir could also be a symlink.
- // If it's a symlink, check if the resolved target is present in the container.
- // If so, that's a valid use case: return nil.
-
- maxSymLinks := 0
- for {
- // Linux only supports a chain of 40 links.
- // Reference: https://github.com/torvalds/linux/blob/master/include/linux/namei.h#L13
- if maxSymLinks > 40 {
- break
- }
- resolvedSymlink, err := os.Readlink(resolvedPath)
- if err != nil {
- // End sym-link resolution loop.
- break
- }
- if resolvedSymlink != "" {
- _, resolvedSymlinkWorkdir, err := c.resolvePath(c.state.Mountpoint, resolvedSymlink)
- if isPathOnVolume(c, resolvedSymlinkWorkdir) || isPathOnBindMount(c, resolvedSymlinkWorkdir) {
- // Resolved symlink exists on external volume or mount
- return true
- }
- if err != nil {
- // Could not resolve path so end sym-link resolution loop.
- break
- }
- if resolvedSymlinkWorkdir != "" {
- resolvedPath = resolvedSymlinkWorkdir
- _, err := os.Stat(resolvedSymlinkWorkdir)
- if err == nil {
- // Symlink resolved successfully and resolved path exists on container,
- // this is a valid use-case so return nil.
- logrus.Debugf("Workdir is a symlink with target to %q and resolved symlink exists on container", resolvedSymlink)
- return true
- }
- }
- }
- maxSymLinks++
- }
- return false
-}
-
-// resolveWorkDir resolves the container's workdir and, depending on the
-// configuration, will create it, or error out if it does not exist.
-// Note that the container must be mounted before.
-func (c *Container) resolveWorkDir() error {
- workdir := c.WorkingDir()
-
- // If the specified workdir is a subdir of a volume or mount,
- // we don't need to do anything. The runtime is taking care of
- // that.
- if isPathOnVolume(c, workdir) || isPathOnBindMount(c, workdir) {
- logrus.Debugf("Workdir %q resolved to a volume or mount", workdir)
- return nil
- }
-
- _, resolvedWorkdir, err := c.resolvePath(c.state.Mountpoint, workdir)
- if err != nil {
- return err
- }
- logrus.Debugf("Workdir %q resolved to host path %q", workdir, resolvedWorkdir)
-
- st, err := os.Stat(resolvedWorkdir)
- if err == nil {
- if !st.IsDir() {
- return fmt.Errorf("workdir %q exists on container %s, but is not a directory", workdir, c.ID())
- }
- return nil
- }
- if !c.config.CreateWorkingDir {
- // No need to create it (e.g., `--workdir=/foo`), so let's make sure
- // the path exists on the container.
- if err != nil {
- if os.IsNotExist(err) {
- // If resolved Workdir path gets marked as a valid symlink,
- // return nil cause this is valid use-case.
- if c.isWorkDirSymlink(resolvedWorkdir) {
- return nil
- }
- return fmt.Errorf("workdir %q does not exist on container %s", workdir, c.ID())
- }
- // This might be a serious error (e.g., permission), so
- // we need to return the full error.
- return fmt.Errorf("error detecting workdir %q on container %s: %w", workdir, c.ID(), err)
- }
- return nil
- }
- if err := os.MkdirAll(resolvedWorkdir, 0755); err != nil {
- if os.IsExist(err) {
- return nil
- }
- return fmt.Errorf("error creating container %s workdir: %w", c.ID(), err)
- }
-
- // Ensure container entrypoint is created (if required).
- uid, gid, _, err := chrootuser.GetUser(c.state.Mountpoint, c.User())
- if err != nil {
- return fmt.Errorf("error looking up %s inside of the container %s: %w", c.User(), c.ID(), err)
- }
- if err := os.Chown(resolvedWorkdir, int(uid), int(gid)); err != nil {
- return fmt.Errorf("error chowning container %s workdir to container root: %w", c.ID(), err)
- }
-
- return nil
-}
-
// cleanupNetwork unmounts and cleans up the container's network
func (c *Container) cleanupNetwork() error {
if c.config.NetNsCtr != "" {
@@ -335,670 +193,6 @@ func (c *Container) reloadNetwork() error {
return c.save()
}
-func (c *Container) getUserOverrides() *lookup.Overrides {
- var hasPasswdFile, hasGroupFile bool
- overrides := lookup.Overrides{}
- for _, m := range c.config.Spec.Mounts {
- if m.Destination == "/etc/passwd" {
- overrides.ContainerEtcPasswdPath = m.Source
- hasPasswdFile = true
- }
- if m.Destination == "/etc/group" {
- overrides.ContainerEtcGroupPath = m.Source
- hasGroupFile = true
- }
- if m.Destination == "/etc" {
- if !hasPasswdFile {
- overrides.ContainerEtcPasswdPath = filepath.Join(m.Source, "passwd")
- }
- if !hasGroupFile {
- overrides.ContainerEtcGroupPath = filepath.Join(m.Source, "group")
- }
- }
- }
- if path, ok := c.state.BindMounts["/etc/passwd"]; ok {
- overrides.ContainerEtcPasswdPath = path
- }
- return &overrides
-}
-
-func lookupHostUser(name string) (*runcuser.ExecUser, error) {
- var execUser runcuser.ExecUser
- // Look up User on host
- u, err := util.LookupUser(name)
- if err != nil {
- return &execUser, err
- }
- uid, err := strconv.ParseUint(u.Uid, 8, 32)
- if err != nil {
- return &execUser, err
- }
-
- gid, err := strconv.ParseUint(u.Gid, 8, 32)
- if err != nil {
- return &execUser, err
- }
- execUser.Uid = int(uid)
- execUser.Gid = int(gid)
- execUser.Home = u.HomeDir
- return &execUser, nil
-}
-
-// Internal only function which returns upper and work dir from
-// overlay options.
-func getOverlayUpperAndWorkDir(options []string) (string, string, error) {
- upperDir := ""
- workDir := ""
- for _, o := range options {
- if strings.HasPrefix(o, "upperdir") {
- splitOpt := strings.SplitN(o, "=", 2)
- if len(splitOpt) > 1 {
- upperDir = splitOpt[1]
- if upperDir == "" {
- return "", "", errors.New("cannot accept empty value for upperdir")
- }
- }
- }
- if strings.HasPrefix(o, "workdir") {
- splitOpt := strings.SplitN(o, "=", 2)
- if len(splitOpt) > 1 {
- workDir = splitOpt[1]
- if workDir == "" {
- return "", "", errors.New("cannot accept empty value for workdir")
- }
- }
- }
- }
- if (upperDir != "" && workDir == "") || (upperDir == "" && workDir != "") {
- return "", "", errors.New("must specify both upperdir and workdir")
- }
- return upperDir, workDir, nil
-}
-
-// Generate spec for a container
-// Accepts a map of the container's dependencies
-func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
- overrides := c.getUserOverrides()
- execUser, err := lookup.GetUserGroupInfo(c.state.Mountpoint, c.config.User, overrides)
- if err != nil {
- if cutil.StringInSlice(c.config.User, c.config.HostUsers) {
- execUser, err = lookupHostUser(c.config.User)
- }
- if err != nil {
- return nil, err
- }
- }
-
- // NewFromSpec() is deprecated according to its comment
- // however the recommended replace just causes a nil map panic
- //nolint:staticcheck
- g := generate.NewFromSpec(c.config.Spec)
-
- // If the flag to mount all devices is set for a privileged container, add
- // all the devices from the host's machine into the container
- if c.config.MountAllDevices {
- if err := util.AddPrivilegedDevices(&g); err != nil {
- return nil, err
- }
- }
-
- // If network namespace was requested, add it now
- if c.config.CreateNetNS {
- if c.config.PostConfigureNetNS {
- if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), ""); err != nil {
- return nil, err
- }
- } else {
- if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), c.state.NetNS.Path()); err != nil {
- return nil, err
- }
- }
- }
-
- // Apply AppArmor checks and load the default profile if needed.
- if len(c.config.Spec.Process.ApparmorProfile) > 0 {
- updatedProfile, err := apparmor.CheckProfileAndLoadDefault(c.config.Spec.Process.ApparmorProfile)
- if err != nil {
- return nil, err
- }
- g.SetProcessApparmorProfile(updatedProfile)
- }
-
- if err := c.makeBindMounts(); err != nil {
- return nil, err
- }
-
- if err := c.mountNotifySocket(g); err != nil {
- return nil, err
- }
-
- // Get host UID and GID based on the container process UID and GID.
- hostUID, hostGID, err := butil.GetHostIDs(util.IDtoolsToRuntimeSpec(c.config.IDMappings.UIDMap), util.IDtoolsToRuntimeSpec(c.config.IDMappings.GIDMap), uint32(execUser.Uid), uint32(execUser.Gid))
- if err != nil {
- return nil, err
- }
-
- // Add named volumes
- for _, namedVol := range c.config.NamedVolumes {
- volume, err := c.runtime.GetVolume(namedVol.Name)
- if err != nil {
- return nil, fmt.Errorf("error retrieving volume %s to add to container %s: %w", namedVol.Name, c.ID(), err)
- }
- mountPoint, err := volume.MountPoint()
- if err != nil {
- return nil, err
- }
-
- overlayFlag := false
- upperDir := ""
- workDir := ""
- for _, o := range namedVol.Options {
- if o == "O" {
- overlayFlag = true
- upperDir, workDir, err = getOverlayUpperAndWorkDir(namedVol.Options)
- if err != nil {
- return nil, err
- }
- }
- }
-
- if overlayFlag {
- var overlayMount spec.Mount
- var overlayOpts *overlay.Options
- contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID())
- if err != nil {
- return nil, err
- }
-
- overlayOpts = &overlay.Options{RootUID: c.RootUID(),
- RootGID: c.RootGID(),
- UpperDirOptionFragment: upperDir,
- WorkDirOptionFragment: workDir,
- GraphOpts: c.runtime.store.GraphOptions(),
- }
-
- overlayMount, err = overlay.MountWithOptions(contentDir, mountPoint, namedVol.Dest, overlayOpts)
- if err != nil {
- return nil, fmt.Errorf("mounting overlay failed %q: %w", mountPoint, err)
- }
-
- for _, o := range namedVol.Options {
- if o == "U" {
- if err := c.ChangeHostPathOwnership(mountPoint, true, int(hostUID), int(hostGID)); err != nil {
- return nil, err
- }
-
- if err := c.ChangeHostPathOwnership(contentDir, true, int(hostUID), int(hostGID)); err != nil {
- return nil, err
- }
- }
- }
- g.AddMount(overlayMount)
- } else {
- volMount := spec.Mount{
- Type: "bind",
- Source: mountPoint,
- Destination: namedVol.Dest,
- Options: namedVol.Options,
- }
- g.AddMount(volMount)
- }
- }
-
- // Check if the spec file mounts contain the options z, Z or U.
- // If they have z or Z, relabel the source directory and then remove the option.
- // If they have U, chown the source directory and them remove the option.
- for i := range g.Config.Mounts {
- m := &g.Config.Mounts[i]
- var options []string
- for _, o := range m.Options {
- switch o {
- case "U":
- if m.Type == "tmpfs" {
- options = append(options, []string{fmt.Sprintf("uid=%d", execUser.Uid), fmt.Sprintf("gid=%d", execUser.Gid)}...)
- } else {
- // only chown on initial creation of container
- if err := c.ChangeHostPathOwnership(m.Source, true, int(hostUID), int(hostGID)); err != nil {
- return nil, err
- }
- }
- case "z":
- fallthrough
- case "Z":
- if err := c.relabel(m.Source, c.MountLabel(), label.IsShared(o)); err != nil {
- return nil, err
- }
-
- default:
- options = append(options, o)
- }
- }
- m.Options = options
- }
-
- g.SetProcessSelinuxLabel(c.ProcessLabel())
- g.SetLinuxMountLabel(c.MountLabel())
-
- // Add bind mounts to container
- for dstPath, srcPath := range c.state.BindMounts {
- newMount := spec.Mount{
- Type: "bind",
- Source: srcPath,
- Destination: dstPath,
- Options: []string{"bind", "rprivate"},
- }
- if c.IsReadOnly() && dstPath != "/dev/shm" {
- newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev")
- }
- if dstPath == "/dev/shm" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
- newMount.Options = append(newMount.Options, "nosuid", "noexec", "nodev")
- }
- if !MountExists(g.Mounts(), dstPath) {
- g.AddMount(newMount)
- } else {
- logrus.Infof("User mount overriding libpod mount at %q", dstPath)
- }
- }
-
- // Add overlay volumes
- for _, overlayVol := range c.config.OverlayVolumes {
- upperDir, workDir, err := getOverlayUpperAndWorkDir(overlayVol.Options)
- if err != nil {
- return nil, err
- }
- contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID())
- if err != nil {
- return nil, err
- }
- overlayOpts := &overlay.Options{RootUID: c.RootUID(),
- RootGID: c.RootGID(),
- UpperDirOptionFragment: upperDir,
- WorkDirOptionFragment: workDir,
- GraphOpts: c.runtime.store.GraphOptions(),
- }
-
- overlayMount, err := overlay.MountWithOptions(contentDir, overlayVol.Source, overlayVol.Dest, overlayOpts)
- if err != nil {
- return nil, fmt.Errorf("mounting overlay failed %q: %w", overlayVol.Source, err)
- }
-
- // Check overlay volume options
- for _, o := range overlayVol.Options {
- if o == "U" {
- if err := c.ChangeHostPathOwnership(overlayVol.Source, true, int(hostUID), int(hostGID)); err != nil {
- return nil, err
- }
-
- if err := c.ChangeHostPathOwnership(contentDir, true, int(hostUID), int(hostGID)); err != nil {
- return nil, err
- }
- }
- }
-
- g.AddMount(overlayMount)
- }
-
- // Add image volumes as overlay mounts
- for _, volume := range c.config.ImageVolumes {
- // Mount the specified image.
- img, _, err := c.runtime.LibimageRuntime().LookupImage(volume.Source, nil)
- if err != nil {
- return nil, fmt.Errorf("error creating image volume %q:%q: %w", volume.Source, volume.Dest, err)
- }
- mountPoint, err := img.Mount(ctx, nil, "")
- if err != nil {
- return nil, fmt.Errorf("error mounting image volume %q:%q: %w", volume.Source, volume.Dest, err)
- }
-
- contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID())
- if err != nil {
- return nil, fmt.Errorf("failed to create TempDir in the %s directory: %w", c.config.StaticDir, err)
- }
-
- var overlayMount spec.Mount
- if volume.ReadWrite {
- overlayMount, err = overlay.Mount(contentDir, mountPoint, volume.Dest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions())
- } else {
- overlayMount, err = overlay.MountReadOnly(contentDir, mountPoint, volume.Dest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions())
- }
- if err != nil {
- return nil, fmt.Errorf("creating overlay mount for image %q failed: %w", volume.Source, err)
- }
- g.AddMount(overlayMount)
- }
-
- hasHomeSet := false
- for _, s := range c.config.Spec.Process.Env {
- if strings.HasPrefix(s, "HOME=") {
- hasHomeSet = true
- break
- }
- }
- if !hasHomeSet && execUser.Home != "" {
- c.config.Spec.Process.Env = append(c.config.Spec.Process.Env, fmt.Sprintf("HOME=%s", execUser.Home))
- }
-
- if c.config.User != "" {
- // User and Group must go together
- g.SetProcessUID(uint32(execUser.Uid))
- g.SetProcessGID(uint32(execUser.Gid))
- }
-
- if c.config.Umask != "" {
- decVal, err := strconv.ParseUint(c.config.Umask, 8, 32)
- if err != nil {
- return nil, fmt.Errorf("invalid Umask Value: %w", err)
- }
- umask := uint32(decVal)
- g.Config.Process.User.Umask = &umask
- }
-
- // Add addition groups if c.config.GroupAdd is not empty
- if len(c.config.Groups) > 0 {
- gids, err := lookup.GetContainerGroups(c.config.Groups, c.state.Mountpoint, overrides)
- if err != nil {
- return nil, fmt.Errorf("error looking up supplemental groups for container %s: %w", c.ID(), err)
- }
- for _, gid := range gids {
- g.AddProcessAdditionalGid(gid)
- }
- }
-
- if c.Systemd() {
- if err := c.setupSystemd(g.Mounts(), g); err != nil {
- return nil, fmt.Errorf("error adding systemd-specific mounts: %w", err)
- }
- }
-
- // Look up and add groups the user belongs to, if a group wasn't directly specified
- if !strings.Contains(c.config.User, ":") {
- // the gidMappings that are present inside the container user namespace
- var gidMappings []idtools.IDMap
-
- switch {
- case len(c.config.IDMappings.GIDMap) > 0:
- gidMappings = c.config.IDMappings.GIDMap
- case rootless.IsRootless():
- // Check whether the current user namespace has enough gids available.
- availableGids, err := rootless.GetAvailableGids()
- if err != nil {
- return nil, fmt.Errorf("cannot read number of available GIDs: %w", err)
- }
- gidMappings = []idtools.IDMap{{
- ContainerID: 0,
- HostID: 0,
- Size: int(availableGids),
- }}
- default:
- gidMappings = []idtools.IDMap{{
- ContainerID: 0,
- HostID: 0,
- Size: math.MaxInt32,
- }}
- }
- for _, gid := range execUser.Sgids {
- isGIDAvailable := false
- for _, m := range gidMappings {
- if gid >= m.ContainerID && gid < m.ContainerID+m.Size {
- isGIDAvailable = true
- break
- }
- }
- if isGIDAvailable {
- g.AddProcessAdditionalGid(uint32(gid))
- } else {
- logrus.Warnf("Additional gid=%d is not present in the user namespace, skip setting it", gid)
- }
- }
- }
-
- // Add shared namespaces from other containers
- if c.config.IPCNsCtr != "" {
- if err := c.addNamespaceContainer(&g, IPCNS, c.config.IPCNsCtr, spec.IPCNamespace); err != nil {
- return nil, err
- }
- }
- if c.config.MountNsCtr != "" {
- if err := c.addNamespaceContainer(&g, MountNS, c.config.MountNsCtr, spec.MountNamespace); err != nil {
- return nil, err
- }
- }
- if c.config.NetNsCtr != "" {
- if err := c.addNamespaceContainer(&g, NetNS, c.config.NetNsCtr, spec.NetworkNamespace); err != nil {
- return nil, err
- }
- }
- if c.config.PIDNsCtr != "" {
- if err := c.addNamespaceContainer(&g, PIDNS, c.config.PIDNsCtr, spec.PIDNamespace); err != nil {
- return nil, err
- }
- }
- if c.config.UserNsCtr != "" {
- if err := c.addNamespaceContainer(&g, UserNS, c.config.UserNsCtr, spec.UserNamespace); err != nil {
- return nil, err
- }
- if len(g.Config.Linux.UIDMappings) == 0 {
- // runc complains if no mapping is specified, even if we join another ns. So provide a dummy mapping
- g.AddLinuxUIDMapping(uint32(0), uint32(0), uint32(1))
- g.AddLinuxGIDMapping(uint32(0), uint32(0), uint32(1))
- }
- }
-
- availableUIDs, availableGIDs, err := rootless.GetAvailableIDMaps()
- if err != nil {
- if os.IsNotExist(err) {
- // The kernel-provided files only exist if user namespaces are supported
- logrus.Debugf("User or group ID mappings not available: %s", err)
- } else {
- return nil, err
- }
- } else {
- g.Config.Linux.UIDMappings = rootless.MaybeSplitMappings(g.Config.Linux.UIDMappings, availableUIDs)
- g.Config.Linux.GIDMappings = rootless.MaybeSplitMappings(g.Config.Linux.GIDMappings, availableGIDs)
- }
-
- // Hostname handling:
- // If we have a UTS namespace, set Hostname in the OCI spec.
- // Set the HOSTNAME environment variable unless explicitly overridden by
- // the user (already present in OCI spec). If we don't have a UTS ns,
- // set it to the host's hostname instead.
- hostname := c.Hostname()
- foundUTS := false
-
- for _, i := range c.config.Spec.Linux.Namespaces {
- if i.Type == spec.UTSNamespace && i.Path == "" {
- foundUTS = true
- g.SetHostname(hostname)
- break
- }
- }
- if !foundUTS {
- tmpHostname, err := os.Hostname()
- if err != nil {
- return nil, err
- }
- hostname = tmpHostname
- }
- needEnv := true
- for _, checkEnv := range g.Config.Process.Env {
- if strings.SplitN(checkEnv, "=", 2)[0] == "HOSTNAME" {
- needEnv = false
- break
- }
- }
- if needEnv {
- g.AddProcessEnv("HOSTNAME", hostname)
- }
-
- if c.config.UTSNsCtr != "" {
- if err := c.addNamespaceContainer(&g, UTSNS, c.config.UTSNsCtr, spec.UTSNamespace); err != nil {
- return nil, err
- }
- }
- if c.config.CgroupNsCtr != "" {
- if err := c.addNamespaceContainer(&g, CgroupNS, c.config.CgroupNsCtr, spec.CgroupNamespace); err != nil {
- return nil, err
- }
- }
-
- if c.config.UserNsCtr == "" && c.config.IDMappings.AutoUserNs {
- if err := g.AddOrReplaceLinuxNamespace(string(spec.UserNamespace), ""); err != nil {
- return nil, err
- }
- g.ClearLinuxUIDMappings()
- for _, uidmap := range c.config.IDMappings.UIDMap {
- g.AddLinuxUIDMapping(uint32(uidmap.HostID), uint32(uidmap.ContainerID), uint32(uidmap.Size))
- }
- g.ClearLinuxGIDMappings()
- for _, gidmap := range c.config.IDMappings.GIDMap {
- g.AddLinuxGIDMapping(uint32(gidmap.HostID), uint32(gidmap.ContainerID), uint32(gidmap.Size))
- }
- }
-
- g.SetRootPath(c.state.Mountpoint)
- g.AddAnnotation(annotations.Created, c.config.CreatedTime.Format(time.RFC3339Nano))
- g.AddAnnotation("org.opencontainers.image.stopSignal", fmt.Sprintf("%d", c.config.StopSignal))
-
- if _, exists := g.Config.Annotations[annotations.ContainerManager]; !exists {
- g.AddAnnotation(annotations.ContainerManager, annotations.ContainerManagerLibpod)
- }
-
- cgroupPath, err := c.getOCICgroupPath()
- if err != nil {
- return nil, err
- }
-
- g.SetLinuxCgroupsPath(cgroupPath)
-
- // Warning: CDI may alter g.Config in place.
- if len(c.config.CDIDevices) > 0 {
- registry := cdi.GetRegistry(
- cdi.WithAutoRefresh(false),
- )
- if err := registry.Refresh(); err != nil {
- logrus.Debugf("The following error was triggered when refreshing the CDI registry: %v", err)
- }
- _, err := registry.InjectDevices(g.Config, c.config.CDIDevices...)
- if err != nil {
- return nil, fmt.Errorf("error setting up CDI devices: %w", err)
- }
- }
-
- // Mounts need to be sorted so paths will not cover other paths
- mounts := sortMounts(g.Mounts())
- g.ClearMounts()
-
- // Determine property of RootPropagation based on volume properties. If
- // a volume is shared, then keep root propagation shared. This should
- // work for slave and private volumes too.
- //
- // For slave volumes, it can be either [r]shared/[r]slave.
- //
- // For private volumes any root propagation value should work.
- rootPropagation := ""
- for _, m := range mounts {
- // We need to remove all symlinks from tmpfs mounts.
- // Runc and other runtimes may choke on them.
- // Easy solution: use securejoin to do a scoped evaluation of
- // the links, then trim off the mount prefix.
- if m.Type == "tmpfs" {
- finalPath, err := securejoin.SecureJoin(c.state.Mountpoint, m.Destination)
- if err != nil {
- return nil, fmt.Errorf("error resolving symlinks for mount destination %s: %w", m.Destination, err)
- }
- trimmedPath := strings.TrimPrefix(finalPath, strings.TrimSuffix(c.state.Mountpoint, "/"))
- m.Destination = trimmedPath
- }
- g.AddMount(m)
- for _, opt := range m.Options {
- switch opt {
- case MountShared, MountRShared:
- if rootPropagation != MountShared && rootPropagation != MountRShared {
- rootPropagation = MountShared
- }
- case MountSlave, MountRSlave:
- if rootPropagation != MountShared && rootPropagation != MountRShared && rootPropagation != MountSlave && rootPropagation != MountRSlave {
- rootPropagation = MountRSlave
- }
- }
- }
- }
-
- if rootPropagation != "" {
- logrus.Debugf("Set root propagation to %q", rootPropagation)
- if err := g.SetLinuxRootPropagation(rootPropagation); err != nil {
- return nil, err
- }
- }
-
- // Warning: precreate hooks may alter g.Config in place.
- if c.state.ExtensionStageHooks, err = c.setupOCIHooks(ctx, g.Config); err != nil {
- return nil, fmt.Errorf("error setting up OCI Hooks: %w", err)
- }
- if len(c.config.EnvSecrets) > 0 {
- manager, err := c.runtime.SecretsManager()
- if err != nil {
- return nil, err
- }
- if err != nil {
- return nil, err
- }
- for name, secr := range c.config.EnvSecrets {
- _, data, err := manager.LookupSecretData(secr.Name)
- if err != nil {
- return nil, err
- }
- g.AddProcessEnv(name, string(data))
- }
- }
-
- // Pass down the LISTEN_* environment (see #10443).
- for _, key := range []string{"LISTEN_PID", "LISTEN_FDS", "LISTEN_FDNAMES"} {
- if val, ok := os.LookupEnv(key); ok {
- // Force the PID to `1` since we cannot rely on (all
- // versions of) all runtimes to do it for us.
- if key == "LISTEN_PID" {
- val = "1"
- }
- g.AddProcessEnv(key, val)
- }
- }
-
- return g.Config, nil
-}
-
-// mountNotifySocket mounts the NOTIFY_SOCKET into the container if it's set
-// and if the sdnotify mode is set to container. It also sets c.notifySocket
-// to avoid redundantly looking up the env variable.
-func (c *Container) mountNotifySocket(g generate.Generator) error {
- if c.config.SdNotifySocket == "" {
- return nil
- }
- if c.config.SdNotifyMode != define.SdNotifyModeContainer {
- return nil
- }
-
- notifyDir := filepath.Join(c.bundlePath(), "notify")
- logrus.Debugf("Checking notify %q dir", notifyDir)
- if err := os.MkdirAll(notifyDir, 0755); err != nil {
- if !os.IsExist(err) {
- return fmt.Errorf("unable to create notify %q dir: %w", notifyDir, err)
- }
- }
- if err := label.Relabel(notifyDir, c.MountLabel(), true); err != nil {
- return fmt.Errorf("relabel failed %q: %w", notifyDir, err)
- }
- logrus.Debugf("Add bindmount notify %q dir", notifyDir)
- if _, ok := c.state.BindMounts["/run/notify"]; !ok {
- c.state.BindMounts["/run/notify"] = notifyDir
- }
-
- // Set the container's notify socket to the proxy socket created by conmon
- g.AddProcessEnv("NOTIFY_SOCKET", "/run/notify/notify.sock")
-
- return nil
-}
-
// systemd expects to have /run, /run/lock and /tmp on tmpfs
// It also expects to be able to write to /sys/fs/cgroup/systemd and /var/log/journal
func (c *Container) setupSystemd(mounts []spec.Mount, g generate.Generator) error {
@@ -1073,9 +267,15 @@ func (c *Container) setupSystemd(mounts []spec.Mount, g generate.Generator) erro
g.AddMount(systemdMnt)
} else {
mountOptions := []string{"bind", "rprivate"}
+ skipMount := false
var statfs unix.Statfs_t
if err := unix.Statfs("/sys/fs/cgroup/systemd", &statfs); err != nil {
+ if errors.Is(err, os.ErrNotExist) {
+ // If the mount is missing on the host, we cannot bind mount it so
+ // just skip it.
+ skipMount = true
+ }
mountOptions = append(mountOptions, "nodev", "noexec", "nosuid")
} else {
if statfs.Flags&unix.MS_NODEV == unix.MS_NODEV {
@@ -1091,15 +291,16 @@ func (c *Container) setupSystemd(mounts []spec.Mount, g generate.Generator) erro
mountOptions = append(mountOptions, "ro")
}
}
-
- systemdMnt := spec.Mount{
- Destination: "/sys/fs/cgroup/systemd",
- Type: "bind",
- Source: "/sys/fs/cgroup/systemd",
- Options: mountOptions,
+ if !skipMount {
+ systemdMnt := spec.Mount{
+ Destination: "/sys/fs/cgroup/systemd",
+ Type: "bind",
+ Source: "/sys/fs/cgroup/systemd",
+ Options: mountOptions,
+ }
+ g.AddMount(systemdMnt)
+ g.AddLinuxMaskedPaths("/sys/fs/cgroup/systemd/release_agent")
}
- g.AddMount(systemdMnt)
- g.AddLinuxMaskedPaths("/sys/fs/cgroup/systemd/release_agent")
}
return nil
@@ -1131,1867 +332,6 @@ func (c *Container) addNamespaceContainer(g *generate.Generator, ns LinuxNS, ctr
return nil
}
-func (c *Container) addCheckpointImageMetadata(importBuilder *buildah.Builder) error {
- // Get information about host environment
- hostInfo, err := c.Runtime().hostInfo()
- if err != nil {
- return fmt.Errorf("getting host info: %v", err)
- }
-
- criuVersion, err := criu.GetCriuVersion()
- if err != nil {
- return fmt.Errorf("getting criu version: %v", err)
- }
-
- rootfsImageID, rootfsImageName := c.Image()
-
- // Add image annotations with information about the container and the host.
- // This information is useful to check compatibility before restoring the checkpoint
-
- checkpointImageAnnotations := map[string]string{
- define.CheckpointAnnotationName: c.config.Name,
- define.CheckpointAnnotationRawImageName: c.config.RawImageName,
- define.CheckpointAnnotationRootfsImageID: rootfsImageID,
- define.CheckpointAnnotationRootfsImageName: rootfsImageName,
- define.CheckpointAnnotationPodmanVersion: version.Version.String(),
- define.CheckpointAnnotationCriuVersion: strconv.Itoa(criuVersion),
- define.CheckpointAnnotationRuntimeName: hostInfo.OCIRuntime.Name,
- define.CheckpointAnnotationRuntimeVersion: hostInfo.OCIRuntime.Version,
- define.CheckpointAnnotationConmonVersion: hostInfo.Conmon.Version,
- define.CheckpointAnnotationHostArch: hostInfo.Arch,
- define.CheckpointAnnotationHostKernel: hostInfo.Kernel,
- define.CheckpointAnnotationCgroupVersion: hostInfo.CgroupsVersion,
- define.CheckpointAnnotationDistributionVersion: hostInfo.Distribution.Version,
- define.CheckpointAnnotationDistributionName: hostInfo.Distribution.Distribution,
- }
-
- for key, value := range checkpointImageAnnotations {
- importBuilder.SetAnnotation(key, value)
- }
-
- return nil
-}
-
-func (c *Container) resolveCheckpointImageName(options *ContainerCheckpointOptions) error {
- if options.CreateImage == "" {
- return nil
- }
-
- // Resolve image name
- resolvedImageName, err := c.runtime.LibimageRuntime().ResolveName(options.CreateImage)
- if err != nil {
- return err
- }
-
- options.CreateImage = resolvedImageName
- return nil
-}
-
-func (c *Container) createCheckpointImage(ctx context.Context, options ContainerCheckpointOptions) error {
- if options.CreateImage == "" {
- return nil
- }
- logrus.Debugf("Create checkpoint image %s", options.CreateImage)
-
- // Create storage reference
- imageRef, err := is.Transport.ParseStoreReference(c.runtime.store, options.CreateImage)
- if err != nil {
- return errors.New("failed to parse image name")
- }
-
- // Build an image scratch
- builderOptions := buildah.BuilderOptions{
- FromImage: "scratch",
- }
- importBuilder, err := buildah.NewBuilder(ctx, c.runtime.store, builderOptions)
- if err != nil {
- return err
- }
- // Clean up buildah working container
- defer func() {
- if err := importBuilder.Delete(); err != nil {
- logrus.Errorf("Image builder delete failed: %v", err)
- }
- }()
-
- if err := c.prepareCheckpointExport(); err != nil {
- return err
- }
-
- // Export checkpoint into temporary tar file
- tmpDir, err := ioutil.TempDir("", "checkpoint_image_")
- if err != nil {
- return err
- }
- defer os.RemoveAll(tmpDir)
-
- options.TargetFile = path.Join(tmpDir, "checkpoint.tar")
-
- if err := c.exportCheckpoint(options); err != nil {
- return err
- }
-
- // Copy checkpoint from temporary tar file in the image
- addAndCopyOptions := buildah.AddAndCopyOptions{}
- if err := importBuilder.Add("", true, addAndCopyOptions, options.TargetFile); err != nil {
- return err
- }
-
- if err := c.addCheckpointImageMetadata(importBuilder); err != nil {
- return err
- }
-
- commitOptions := buildah.CommitOptions{
- Squash: true,
- SystemContext: c.runtime.imageContext,
- }
-
- // Create checkpoint image
- id, _, _, err := importBuilder.Commit(ctx, imageRef, commitOptions)
- if err != nil {
- return err
- }
- logrus.Debugf("Created checkpoint image: %s", id)
- return nil
-}
-
-func (c *Container) exportCheckpoint(options ContainerCheckpointOptions) error {
- if len(c.Dependencies()) == 1 {
- // Check if the dependency is an infra container. If it is we can checkpoint
- // the container out of the Pod.
- if c.config.Pod == "" {
- return errors.New("cannot export checkpoints of containers with dependencies")
- }
-
- pod, err := c.runtime.state.Pod(c.config.Pod)
- if err != nil {
- return fmt.Errorf("container %s is in pod %s, but pod cannot be retrieved: %w", c.ID(), c.config.Pod, err)
- }
- infraID, err := pod.InfraContainerID()
- if err != nil {
- return fmt.Errorf("cannot retrieve infra container ID for pod %s: %w", c.config.Pod, err)
- }
- if c.Dependencies()[0] != infraID {
- return errors.New("cannot export checkpoints of containers with dependencies")
- }
- }
- if len(c.Dependencies()) > 1 {
- return errors.New("cannot export checkpoints of containers with dependencies")
- }
- logrus.Debugf("Exporting checkpoint image of container %q to %q", c.ID(), options.TargetFile)
-
- includeFiles := []string{
- "artifacts",
- metadata.DevShmCheckpointTar,
- metadata.ConfigDumpFile,
- metadata.SpecDumpFile,
- metadata.NetworkStatusFile,
- stats.StatsDump,
- }
-
- if c.LogDriver() == define.KubernetesLogging ||
- c.LogDriver() == define.JSONLogging {
- includeFiles = append(includeFiles, "ctr.log")
- }
- if options.PreCheckPoint {
- includeFiles = append(includeFiles, preCheckpointDir)
- } else {
- includeFiles = append(includeFiles, metadata.CheckpointDirectory)
- }
- // Get root file-system changes included in the checkpoint archive
- var addToTarFiles []string
- if !options.IgnoreRootfs {
- // To correctly track deleted files, let's go through the output of 'podman diff'
- rootFsChanges, err := c.runtime.GetDiff("", c.ID(), define.DiffContainer)
- if err != nil {
- return fmt.Errorf("error exporting root file-system diff for %q: %w", c.ID(), err)
- }
-
- addToTarFiles, err := crutils.CRCreateRootFsDiffTar(&rootFsChanges, c.state.Mountpoint, c.bundlePath())
- if err != nil {
- return err
- }
-
- includeFiles = append(includeFiles, addToTarFiles...)
- }
-
- // Folder containing archived volumes that will be included in the export
- expVolDir := filepath.Join(c.bundlePath(), metadata.CheckpointVolumesDirectory)
-
- // Create an archive for each volume associated with the container
- if !options.IgnoreVolumes {
- if err := os.MkdirAll(expVolDir, 0700); err != nil {
- return fmt.Errorf("error creating volumes export directory %q: %w", expVolDir, err)
- }
-
- for _, v := range c.config.NamedVolumes {
- volumeTarFilePath := filepath.Join(metadata.CheckpointVolumesDirectory, v.Name+".tar")
- volumeTarFileFullPath := filepath.Join(c.bundlePath(), volumeTarFilePath)
-
- volumeTarFile, err := os.Create(volumeTarFileFullPath)
- if err != nil {
- return fmt.Errorf("error creating %q: %w", volumeTarFileFullPath, err)
- }
-
- volume, err := c.runtime.GetVolume(v.Name)
- if err != nil {
- return err
- }
-
- mp, err := volume.MountPoint()
- if err != nil {
- return err
- }
- if mp == "" {
- return fmt.Errorf("volume %s is not mounted, cannot export: %w", volume.Name(), define.ErrInternal)
- }
-
- input, err := archive.TarWithOptions(mp, &archive.TarOptions{
- Compression: archive.Uncompressed,
- IncludeSourceDir: true,
- })
- if err != nil {
- return fmt.Errorf("error reading volume directory %q: %w", v.Dest, err)
- }
-
- _, err = io.Copy(volumeTarFile, input)
- if err != nil {
- return err
- }
- volumeTarFile.Close()
-
- includeFiles = append(includeFiles, volumeTarFilePath)
- }
- }
-
- input, err := archive.TarWithOptions(c.bundlePath(), &archive.TarOptions{
- Compression: options.Compression,
- IncludeSourceDir: true,
- IncludeFiles: includeFiles,
- })
-
- if err != nil {
- return fmt.Errorf("error reading checkpoint directory %q: %w", c.ID(), err)
- }
-
- outFile, err := os.Create(options.TargetFile)
- if err != nil {
- return fmt.Errorf("error creating checkpoint export file %q: %w", options.TargetFile, err)
- }
- defer outFile.Close()
-
- if err := os.Chmod(options.TargetFile, 0600); err != nil {
- return err
- }
-
- _, err = io.Copy(outFile, input)
- if err != nil {
- return err
- }
-
- for _, file := range addToTarFiles {
- os.Remove(filepath.Join(c.bundlePath(), file))
- }
-
- if !options.IgnoreVolumes {
- os.RemoveAll(expVolDir)
- }
-
- return nil
-}
-
-func (c *Container) checkpointRestoreSupported(version int) error {
- if !criu.CheckForCriu(version) {
- return fmt.Errorf("checkpoint/restore requires at least CRIU %d", version)
- }
- if !c.ociRuntime.SupportsCheckpoint() {
- return errors.New("configured runtime does not support checkpoint/restore")
- }
- return nil
-}
-
-func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointOptions) (*define.CRIUCheckpointRestoreStatistics, int64, error) {
- if err := c.checkpointRestoreSupported(criu.MinCriuVersion); err != nil {
- return nil, 0, err
- }
-
- if c.state.State != define.ContainerStateRunning {
- return nil, 0, fmt.Errorf("%q is not running, cannot checkpoint: %w", c.state.State, define.ErrCtrStateInvalid)
- }
-
- if c.AutoRemove() && options.TargetFile == "" {
- return nil, 0, errors.New("cannot checkpoint containers that have been started with '--rm' unless '--export' is used")
- }
-
- if err := c.resolveCheckpointImageName(&options); err != nil {
- return nil, 0, err
- }
-
- if err := crutils.CRCreateFileWithLabel(c.bundlePath(), "dump.log", c.MountLabel()); err != nil {
- return nil, 0, err
- }
-
- // Setting CheckpointLog early in case there is a failure.
- c.state.CheckpointLog = path.Join(c.bundlePath(), "dump.log")
- c.state.CheckpointPath = c.CheckpointPath()
-
- runtimeCheckpointDuration, err := c.ociRuntime.CheckpointContainer(c, options)
- if err != nil {
- return nil, 0, err
- }
-
- // Keep the content of /dev/shm directory
- if c.config.ShmDir != "" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
- shmDirTarFileFullPath := filepath.Join(c.bundlePath(), metadata.DevShmCheckpointTar)
-
- shmDirTarFile, err := os.Create(shmDirTarFileFullPath)
- if err != nil {
- return nil, 0, err
- }
- defer shmDirTarFile.Close()
-
- input, err := archive.TarWithOptions(c.config.ShmDir, &archive.TarOptions{
- Compression: archive.Uncompressed,
- IncludeSourceDir: true,
- })
- if err != nil {
- return nil, 0, err
- }
-
- if _, err = io.Copy(shmDirTarFile, input); err != nil {
- return nil, 0, err
- }
- }
-
- // Save network.status. This is needed to restore the container with
- // the same IP. Currently limited to one IP address in a container
- // with one interface.
- // FIXME: will this break something?
- if _, err := metadata.WriteJSONFile(c.getNetworkStatus(), c.bundlePath(), metadata.NetworkStatusFile); err != nil {
- return nil, 0, err
- }
-
- defer c.newContainerEvent(events.Checkpoint)
-
- // There is a bug from criu: https://github.com/checkpoint-restore/criu/issues/116
- // We have to change the symbolic link from absolute path to relative path
- if options.WithPrevious {
- os.Remove(path.Join(c.CheckpointPath(), "parent"))
- if err := os.Symlink("../pre-checkpoint", path.Join(c.CheckpointPath(), "parent")); err != nil {
- return nil, 0, err
- }
- }
-
- if options.TargetFile != "" {
- if err := c.exportCheckpoint(options); err != nil {
- return nil, 0, err
- }
- } else {
- if err := c.createCheckpointImage(ctx, options); err != nil {
- return nil, 0, err
- }
- }
-
- logrus.Debugf("Checkpointed container %s", c.ID())
-
- if !options.KeepRunning && !options.PreCheckPoint {
- c.state.State = define.ContainerStateStopped
- c.state.Checkpointed = true
- c.state.CheckpointedTime = time.Now()
- c.state.Restored = false
- c.state.RestoredTime = time.Time{}
-
- // Clean up Storage and Network
- if err := c.cleanup(ctx); err != nil {
- return nil, 0, err
- }
- }
-
- criuStatistics, err := func() (*define.CRIUCheckpointRestoreStatistics, error) {
- if !options.PrintStats {
- return nil, nil
- }
- statsDirectory, err := os.Open(c.bundlePath())
- if err != nil {
- return nil, fmt.Errorf("not able to open %q: %w", c.bundlePath(), err)
- }
-
- dumpStatistics, err := stats.CriuGetDumpStats(statsDirectory)
- if err != nil {
- return nil, fmt.Errorf("displaying checkpointing statistics not possible: %w", err)
- }
-
- return &define.CRIUCheckpointRestoreStatistics{
- FreezingTime: dumpStatistics.GetFreezingTime(),
- FrozenTime: dumpStatistics.GetFrozenTime(),
- MemdumpTime: dumpStatistics.GetMemdumpTime(),
- MemwriteTime: dumpStatistics.GetMemwriteTime(),
- PagesScanned: dumpStatistics.GetPagesScanned(),
- PagesWritten: dumpStatistics.GetPagesWritten(),
- }, nil
- }()
- if err != nil {
- return nil, 0, err
- }
-
- if !options.Keep && !options.PreCheckPoint {
- cleanup := []string{
- "dump.log",
- stats.StatsDump,
- metadata.ConfigDumpFile,
- metadata.SpecDumpFile,
- }
- for _, del := range cleanup {
- file := filepath.Join(c.bundlePath(), del)
- if err := os.Remove(file); err != nil {
- logrus.Debugf("Unable to remove file %s", file)
- }
- }
- // The file has been deleted. Do not mention it.
- c.state.CheckpointLog = ""
- }
-
- c.state.FinishedTime = time.Now()
- return criuStatistics, runtimeCheckpointDuration, c.save()
-}
-
-func (c *Container) generateContainerSpec() error {
- // Make sure the newly created config.json exists on disk
-
- // NewFromSpec() is deprecated according to its comment
- // however the recommended replace just causes a nil map panic
- //nolint:staticcheck
- g := generate.NewFromSpec(c.config.Spec)
-
- if err := c.saveSpec(g.Config); err != nil {
- return fmt.Errorf("saving imported container specification for restore failed: %w", err)
- }
-
- return nil
-}
-
-func (c *Container) importCheckpointImage(ctx context.Context, imageID string) error {
- img, _, err := c.Runtime().LibimageRuntime().LookupImage(imageID, nil)
- if err != nil {
- return err
- }
-
- mountPoint, err := img.Mount(ctx, nil, "")
- defer func() {
- if err := c.unmount(true); err != nil {
- logrus.Errorf("Failed to unmount container: %v", err)
- }
- }()
- if err != nil {
- return err
- }
-
- // Import all checkpoint files except ConfigDumpFile and SpecDumpFile. We
- // generate new container config files to enable to specifying a new
- // container name.
- checkpoint := []string{
- "artifacts",
- metadata.CheckpointDirectory,
- metadata.CheckpointVolumesDirectory,
- metadata.DevShmCheckpointTar,
- metadata.RootFsDiffTar,
- metadata.DeletedFilesFile,
- metadata.PodOptionsFile,
- metadata.PodDumpFile,
- }
-
- for _, name := range checkpoint {
- src := filepath.Join(mountPoint, name)
- dst := filepath.Join(c.bundlePath(), name)
- if err := archive.NewDefaultArchiver().CopyWithTar(src, dst); err != nil {
- logrus.Debugf("Can't import '%s' from checkpoint image", name)
- }
- }
-
- return c.generateContainerSpec()
-}
-
-func (c *Container) importCheckpointTar(input string) error {
- if err := crutils.CRImportCheckpointWithoutConfig(c.bundlePath(), input); err != nil {
- return err
- }
-
- return c.generateContainerSpec()
-}
-
-func (c *Container) importPreCheckpoint(input string) error {
- archiveFile, err := os.Open(input)
- if err != nil {
- return fmt.Errorf("failed to open pre-checkpoint archive for import: %w", err)
- }
-
- defer archiveFile.Close()
-
- err = archive.Untar(archiveFile, c.bundlePath(), nil)
- if err != nil {
- return fmt.Errorf("unpacking of pre-checkpoint archive %s failed: %w", input, err)
- }
- return nil
-}
-
-func (c *Container) restore(ctx context.Context, options ContainerCheckpointOptions) (criuStatistics *define.CRIUCheckpointRestoreStatistics, runtimeRestoreDuration int64, retErr error) {
- minCriuVersion := func() int {
- if options.Pod == "" {
- return criu.MinCriuVersion
- }
- return criu.PodCriuVersion
- }()
- if err := c.checkpointRestoreSupported(minCriuVersion); err != nil {
- return nil, 0, err
- }
-
- if options.Pod != "" && !crutils.CRRuntimeSupportsPodCheckpointRestore(c.ociRuntime.Path()) {
- return nil, 0, fmt.Errorf("runtime %s does not support pod restore", c.ociRuntime.Path())
- }
-
- if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) {
- return nil, 0, fmt.Errorf("container %s is running or paused, cannot restore: %w", c.ID(), define.ErrCtrStateInvalid)
- }
-
- if options.ImportPrevious != "" {
- if err := c.importPreCheckpoint(options.ImportPrevious); err != nil {
- return nil, 0, err
- }
- }
-
- if options.TargetFile != "" {
- if err := c.importCheckpointTar(options.TargetFile); err != nil {
- return nil, 0, err
- }
- } else if options.CheckpointImageID != "" {
- if err := c.importCheckpointImage(ctx, options.CheckpointImageID); err != nil {
- return nil, 0, err
- }
- }
-
- // Let's try to stat() CRIU's inventory file. If it does not exist, it makes
- // no sense to try a restore. This is a minimal check if a checkpoint exist.
- if _, err := os.Stat(filepath.Join(c.CheckpointPath(), "inventory.img")); os.IsNotExist(err) {
- return nil, 0, fmt.Errorf("a complete checkpoint for this container cannot be found, cannot restore: %w", err)
- }
-
- if err := crutils.CRCreateFileWithLabel(c.bundlePath(), "restore.log", c.MountLabel()); err != nil {
- return nil, 0, err
- }
-
- // Setting RestoreLog early in case there is a failure.
- c.state.RestoreLog = path.Join(c.bundlePath(), "restore.log")
- c.state.CheckpointPath = c.CheckpointPath()
-
- // Read network configuration from checkpoint
- var netStatus map[string]types.StatusBlock
- _, err := metadata.ReadJSONFile(&netStatus, c.bundlePath(), metadata.NetworkStatusFile)
- if err != nil {
- logrus.Infof("Failed to unmarshal network status, cannot restore the same ip/mac: %v", err)
- }
- // If the restored container should get a new name, the IP address of
- // the container will not be restored. This assumes that if a new name is
- // specified, the container is restored multiple times.
- // TODO: This implicit restoring with or without IP depending on an
- // unrelated restore parameter (--name) does not seem like the
- // best solution.
- if err == nil && options.Name == "" && (!options.IgnoreStaticIP || !options.IgnoreStaticMAC) {
- // The file with the network.status does exist. Let's restore the
- // container with the same networks settings as during checkpointing.
- networkOpts, err := c.networks()
- if err != nil {
- return nil, 0, err
- }
-
- netOpts := make(map[string]types.PerNetworkOptions, len(netStatus))
- for network, perNetOpts := range networkOpts {
- // unset mac and ips before we start adding the ones from the status
- perNetOpts.StaticMAC = nil
- perNetOpts.StaticIPs = nil
- for name, netInt := range netStatus[network].Interfaces {
- perNetOpts.InterfaceName = name
- if !options.IgnoreStaticIP {
- perNetOpts.StaticMAC = netInt.MacAddress
- }
- if !options.IgnoreStaticIP {
- for _, netAddress := range netInt.Subnets {
- perNetOpts.StaticIPs = append(perNetOpts.StaticIPs, netAddress.IPNet.IP)
- }
- }
- // Normally interfaces have a length of 1, only for some special cni configs we could get more.
- // For now just use the first interface to get the ips this should be good enough for most cases.
- break
- }
- netOpts[network] = perNetOpts
- }
- c.perNetworkOpts = netOpts
- }
-
- defer func() {
- if retErr != nil {
- if err := c.cleanup(ctx); err != nil {
- logrus.Errorf("Cleaning up container %s: %v", c.ID(), err)
- }
- }
- }()
-
- if err := c.prepare(); err != nil {
- return nil, 0, err
- }
-
- // Read config
- jsonPath := filepath.Join(c.bundlePath(), "config.json")
- logrus.Debugf("generate.NewFromFile at %v", jsonPath)
- g, err := generate.NewFromFile(jsonPath)
- if err != nil {
- logrus.Debugf("generate.NewFromFile failed with %v", err)
- return nil, 0, err
- }
-
- // Restoring from an import means that we are doing migration
- if options.TargetFile != "" || options.CheckpointImageID != "" {
- g.SetRootPath(c.state.Mountpoint)
- }
-
- // We want to have the same network namespace as before.
- if c.config.CreateNetNS {
- netNSPath := ""
- if !c.config.PostConfigureNetNS {
- netNSPath = c.state.NetNS.Path()
- }
-
- if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), netNSPath); err != nil {
- return nil, 0, err
- }
- }
-
- if options.Pod != "" {
- // Running in a Pod means that we have to change all namespace settings to
- // the ones from the infrastructure container.
- pod, err := c.runtime.LookupPod(options.Pod)
- if err != nil {
- return nil, 0, fmt.Errorf("pod %q cannot be retrieved: %w", options.Pod, err)
- }
-
- infraContainer, err := pod.InfraContainer()
- if err != nil {
- return nil, 0, fmt.Errorf("cannot retrieved infra container from pod %q: %w", options.Pod, err)
- }
-
- infraContainer.lock.Lock()
- if err := infraContainer.syncContainer(); err != nil {
- infraContainer.lock.Unlock()
- return nil, 0, fmt.Errorf("error syncing infrastructure container %s status: %w", infraContainer.ID(), err)
- }
- if infraContainer.state.State != define.ContainerStateRunning {
- if err := infraContainer.initAndStart(ctx); err != nil {
- infraContainer.lock.Unlock()
- return nil, 0, fmt.Errorf("error starting infrastructure container %s status: %w", infraContainer.ID(), err)
- }
- }
- infraContainer.lock.Unlock()
-
- if c.config.IPCNsCtr != "" {
- nsPath, err := infraContainer.namespacePath(IPCNS)
- if err != nil {
- return nil, 0, fmt.Errorf("cannot retrieve IPC namespace path for Pod %q: %w", options.Pod, err)
- }
- if err := g.AddOrReplaceLinuxNamespace(string(spec.IPCNamespace), nsPath); err != nil {
- return nil, 0, err
- }
- }
-
- if c.config.NetNsCtr != "" {
- nsPath, err := infraContainer.namespacePath(NetNS)
- if err != nil {
- return nil, 0, fmt.Errorf("cannot retrieve network namespace path for Pod %q: %w", options.Pod, err)
- }
- if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), nsPath); err != nil {
- return nil, 0, err
- }
- }
-
- if c.config.PIDNsCtr != "" {
- nsPath, err := infraContainer.namespacePath(PIDNS)
- if err != nil {
- return nil, 0, fmt.Errorf("cannot retrieve PID namespace path for Pod %q: %w", options.Pod, err)
- }
- if err := g.AddOrReplaceLinuxNamespace(string(spec.PIDNamespace), nsPath); err != nil {
- return nil, 0, err
- }
- }
-
- if c.config.UTSNsCtr != "" {
- nsPath, err := infraContainer.namespacePath(UTSNS)
- if err != nil {
- return nil, 0, fmt.Errorf("cannot retrieve UTS namespace path for Pod %q: %w", options.Pod, err)
- }
- if err := g.AddOrReplaceLinuxNamespace(string(spec.UTSNamespace), nsPath); err != nil {
- return nil, 0, err
- }
- }
-
- if c.config.CgroupNsCtr != "" {
- nsPath, err := infraContainer.namespacePath(CgroupNS)
- if err != nil {
- return nil, 0, fmt.Errorf("cannot retrieve Cgroup namespace path for Pod %q: %w", options.Pod, err)
- }
- if err := g.AddOrReplaceLinuxNamespace(string(spec.CgroupNamespace), nsPath); err != nil {
- return nil, 0, err
- }
- }
- }
-
- if err := c.makeBindMounts(); err != nil {
- return nil, 0, err
- }
-
- if options.TargetFile != "" || options.CheckpointImageID != "" {
- for dstPath, srcPath := range c.state.BindMounts {
- newMount := spec.Mount{
- Type: "bind",
- Source: srcPath,
- Destination: dstPath,
- Options: []string{"bind", "private"},
- }
- if c.IsReadOnly() && dstPath != "/dev/shm" {
- newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev")
- }
- if dstPath == "/dev/shm" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
- newMount.Options = append(newMount.Options, "nosuid", "noexec", "nodev")
- }
- if !MountExists(g.Mounts(), dstPath) {
- g.AddMount(newMount)
- }
- }
- }
-
- // Restore /dev/shm content
- if c.config.ShmDir != "" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
- shmDirTarFileFullPath := filepath.Join(c.bundlePath(), metadata.DevShmCheckpointTar)
- if _, err := os.Stat(shmDirTarFileFullPath); err != nil {
- logrus.Debug("Container checkpoint doesn't contain dev/shm: ", err.Error())
- } else {
- shmDirTarFile, err := os.Open(shmDirTarFileFullPath)
- if err != nil {
- return nil, 0, err
- }
- defer shmDirTarFile.Close()
-
- if err := archive.UntarUncompressed(shmDirTarFile, c.config.ShmDir, nil); err != nil {
- return nil, 0, err
- }
- }
- }
-
- // Cleanup for a working restore.
- if err := c.removeConmonFiles(); err != nil {
- return nil, 0, err
- }
-
- // Save the OCI spec to disk
- if err := c.saveSpec(g.Config); err != nil {
- return nil, 0, err
- }
-
- // When restoring from an imported archive, allow restoring the content of volumes.
- // Volumes are created in setupContainer()
- if !options.IgnoreVolumes && (options.TargetFile != "" || options.CheckpointImageID != "") {
- for _, v := range c.config.NamedVolumes {
- volumeFilePath := filepath.Join(c.bundlePath(), metadata.CheckpointVolumesDirectory, v.Name+".tar")
-
- volumeFile, err := os.Open(volumeFilePath)
- if err != nil {
- return nil, 0, fmt.Errorf("failed to open volume file %s: %w", volumeFilePath, err)
- }
- defer volumeFile.Close()
-
- volume, err := c.runtime.GetVolume(v.Name)
- if err != nil {
- return nil, 0, fmt.Errorf("failed to retrieve volume %s: %w", v.Name, err)
- }
-
- mountPoint, err := volume.MountPoint()
- if err != nil {
- return nil, 0, err
- }
- if mountPoint == "" {
- return nil, 0, fmt.Errorf("unable to import volume %s as it is not mounted: %w", volume.Name(), err)
- }
- if err := archive.UntarUncompressed(volumeFile, mountPoint, nil); err != nil {
- return nil, 0, fmt.Errorf("failed to extract volume %s to %s: %w", volumeFilePath, mountPoint, err)
- }
- }
- }
-
- // Before actually restarting the container, apply the root file-system changes
- if !options.IgnoreRootfs {
- if err := crutils.CRApplyRootFsDiffTar(c.bundlePath(), c.state.Mountpoint); err != nil {
- return nil, 0, err
- }
-
- if err := crutils.CRRemoveDeletedFiles(c.ID(), c.bundlePath(), c.state.Mountpoint); err != nil {
- return nil, 0, err
- }
- }
-
- runtimeRestoreDuration, err = c.ociRuntime.CreateContainer(c, &options)
- if err != nil {
- return nil, 0, err
- }
-
- criuStatistics, err = func() (*define.CRIUCheckpointRestoreStatistics, error) {
- if !options.PrintStats {
- return nil, nil
- }
- statsDirectory, err := os.Open(c.bundlePath())
- if err != nil {
- return nil, fmt.Errorf("not able to open %q: %w", c.bundlePath(), err)
- }
-
- restoreStatistics, err := stats.CriuGetRestoreStats(statsDirectory)
- if err != nil {
- return nil, fmt.Errorf("displaying restore statistics not possible: %w", err)
- }
-
- return &define.CRIUCheckpointRestoreStatistics{
- PagesCompared: restoreStatistics.GetPagesCompared(),
- PagesSkippedCow: restoreStatistics.GetPagesSkippedCow(),
- ForkingTime: restoreStatistics.GetForkingTime(),
- RestoreTime: restoreStatistics.GetRestoreTime(),
- PagesRestored: restoreStatistics.GetPagesRestored(),
- }, nil
- }()
- if err != nil {
- return nil, 0, err
- }
-
- logrus.Debugf("Restored container %s", c.ID())
-
- c.state.State = define.ContainerStateRunning
- c.state.Checkpointed = false
- c.state.Restored = true
- c.state.CheckpointedTime = time.Time{}
- c.state.RestoredTime = time.Now()
-
- if !options.Keep {
- // Delete all checkpoint related files. At this point, in theory, all files
- // should exist. Still ignoring errors for now as the container should be
- // restored and running. Not erroring out just because some cleanup operation
- // failed. Starting with the checkpoint directory
- err = os.RemoveAll(c.CheckpointPath())
- if err != nil {
- logrus.Debugf("Non-fatal: removal of checkpoint directory (%s) failed: %v", c.CheckpointPath(), err)
- }
- c.state.CheckpointPath = ""
- err = os.RemoveAll(c.PreCheckPointPath())
- if err != nil {
- logrus.Debugf("Non-fatal: removal of pre-checkpoint directory (%s) failed: %v", c.PreCheckPointPath(), err)
- }
- err = os.RemoveAll(c.CheckpointVolumesPath())
- if err != nil {
- logrus.Debugf("Non-fatal: removal of checkpoint volumes directory (%s) failed: %v", c.CheckpointVolumesPath(), err)
- }
- cleanup := [...]string{
- "restore.log",
- "dump.log",
- stats.StatsDump,
- stats.StatsRestore,
- metadata.DevShmCheckpointTar,
- metadata.NetworkStatusFile,
- metadata.RootFsDiffTar,
- metadata.DeletedFilesFile,
- }
- for _, del := range cleanup {
- file := filepath.Join(c.bundlePath(), del)
- err = os.Remove(file)
- if err != nil {
- logrus.Debugf("Non-fatal: removal of checkpoint file (%s) failed: %v", file, err)
- }
- }
- c.state.CheckpointLog = ""
- c.state.RestoreLog = ""
- }
-
- return criuStatistics, runtimeRestoreDuration, c.save()
-}
-
-// Retrieves a container's "root" net namespace container dependency.
-func (c *Container) getRootNetNsDepCtr() (depCtr *Container, err error) {
- containersVisited := map[string]int{c.config.ID: 1}
- nextCtr := c.config.NetNsCtr
- for nextCtr != "" {
- // Make sure we aren't in a loop
- if _, visited := containersVisited[nextCtr]; visited {
- return nil, errors.New("loop encountered while determining net namespace container")
- }
- containersVisited[nextCtr] = 1
-
- depCtr, err = c.runtime.state.Container(nextCtr)
- if err != nil {
- return nil, fmt.Errorf("error fetching dependency %s of container %s: %w", c.config.NetNsCtr, c.ID(), err)
- }
- // This should never happen without an error
- if depCtr == nil {
- break
- }
- nextCtr = depCtr.config.NetNsCtr
- }
-
- if depCtr == nil {
- return nil, errors.New("unexpected error depCtr is nil without reported error from runtime state")
- }
- return depCtr, nil
-}
-
-// Ensure standard bind mounts are mounted into all root directories (including chroot directories)
-func (c *Container) mountIntoRootDirs(mountName string, mountPath string) error {
- c.state.BindMounts[mountName] = mountPath
-
- for _, chrootDir := range c.config.ChrootDirs {
- c.state.BindMounts[filepath.Join(chrootDir, mountName)] = mountPath
- }
-
- return nil
-}
-
-// Make standard bind mounts to include in the container
-func (c *Container) makeBindMounts() error {
- if err := os.Chown(c.state.RunDir, c.RootUID(), c.RootGID()); err != nil {
- return fmt.Errorf("cannot chown run directory: %w", err)
- }
-
- if c.state.BindMounts == nil {
- c.state.BindMounts = make(map[string]string)
- }
- netDisabled, err := c.NetworkDisabled()
- if err != nil {
- return err
- }
-
- if !netDisabled {
- // If /etc/resolv.conf and /etc/hosts exist, delete them so we
- // will recreate. Only do this if we aren't sharing them with
- // another container.
- if c.config.NetNsCtr == "" {
- if resolvePath, ok := c.state.BindMounts["/etc/resolv.conf"]; ok {
- if err := os.Remove(resolvePath); err != nil && !os.IsNotExist(err) {
- return fmt.Errorf("container %s: %w", c.ID(), err)
- }
- delete(c.state.BindMounts, "/etc/resolv.conf")
- }
- if hostsPath, ok := c.state.BindMounts["/etc/hosts"]; ok {
- if err := os.Remove(hostsPath); err != nil && !os.IsNotExist(err) {
- return fmt.Errorf("container %s: %w", c.ID(), err)
- }
- delete(c.state.BindMounts, "/etc/hosts")
- }
- }
-
- if c.config.NetNsCtr != "" && (!c.config.UseImageResolvConf || !c.config.UseImageHosts) {
- // We share a net namespace.
- // We want /etc/resolv.conf and /etc/hosts from the
- // other container. Unless we're not creating both of
- // them.
- depCtr, err := c.getRootNetNsDepCtr()
- if err != nil {
- return fmt.Errorf("error fetching network namespace dependency container for container %s: %w", c.ID(), err)
- }
-
- // We need that container's bind mounts
- bindMounts, err := depCtr.BindMounts()
- if err != nil {
- return fmt.Errorf("error fetching bind mounts from dependency %s of container %s: %w", depCtr.ID(), c.ID(), err)
- }
-
- // The other container may not have a resolv.conf or /etc/hosts
- // If it doesn't, don't copy them
- resolvPath, exists := bindMounts["/etc/resolv.conf"]
- if !c.config.UseImageResolvConf && exists {
- err := c.mountIntoRootDirs("/etc/resolv.conf", resolvPath)
-
- if err != nil {
- return fmt.Errorf("error assigning mounts to container %s: %w", c.ID(), err)
- }
- }
-
- // check if dependency container has an /etc/hosts file.
- // It may not have one, so only use it if it does.
- hostsPath, exists := bindMounts[config.DefaultHostsFile]
- if !c.config.UseImageHosts && exists {
- // we cannot use the dependency container lock due ABBA deadlocks in cleanup()
- lock, err := lockfile.GetLockfile(hostsPath)
- if err != nil {
- return fmt.Errorf("failed to lock hosts file: %w", err)
- }
- lock.Lock()
-
- // add the newly added container to the hosts file
- // we always use 127.0.0.1 as ip since they have the same netns
- err = etchosts.Add(hostsPath, getLocalhostHostEntry(c))
- lock.Unlock()
- if err != nil {
- return fmt.Errorf("error creating hosts file for container %s which depends on container %s: %w", c.ID(), depCtr.ID(), err)
- }
-
- // finally, save it in the new container
- err = c.mountIntoRootDirs(config.DefaultHostsFile, hostsPath)
- if err != nil {
- return fmt.Errorf("error assigning mounts to container %s: %w", c.ID(), err)
- }
- }
-
- if !hasCurrentUserMapped(c) {
- if err := makeAccessible(resolvPath, c.RootUID(), c.RootGID()); err != nil {
- return err
- }
- if err := makeAccessible(hostsPath, c.RootUID(), c.RootGID()); err != nil {
- return err
- }
- }
- } else {
- if !c.config.UseImageResolvConf {
- if err := c.generateResolvConf(); err != nil {
- return fmt.Errorf("error creating resolv.conf for container %s: %w", c.ID(), err)
- }
- }
-
- if !c.config.UseImageHosts {
- if err := c.createHosts(); err != nil {
- return fmt.Errorf("error creating hosts file for container %s: %w", c.ID(), err)
- }
- }
- }
-
- if c.state.BindMounts["/etc/hosts"] != "" {
- if err := c.relabel(c.state.BindMounts["/etc/hosts"], c.config.MountLabel, true); err != nil {
- return err
- }
- }
-
- if c.state.BindMounts["/etc/resolv.conf"] != "" {
- if err := c.relabel(c.state.BindMounts["/etc/resolv.conf"], c.config.MountLabel, true); err != nil {
- return err
- }
- }
- } else if !c.config.UseImageHosts && c.state.BindMounts["/etc/hosts"] == "" {
- if err := c.createHosts(); err != nil {
- return fmt.Errorf("error creating hosts file for container %s: %w", c.ID(), err)
- }
- }
-
- if c.config.ShmDir != "" {
- // If ShmDir has a value SHM is always added when we mount the container
- c.state.BindMounts["/dev/shm"] = c.config.ShmDir
- }
-
- if c.config.Passwd == nil || *c.config.Passwd {
- newPasswd, newGroup, err := c.generatePasswdAndGroup()
- if err != nil {
- return fmt.Errorf("error creating temporary passwd file for container %s: %w", c.ID(), err)
- }
- if newPasswd != "" {
- // Make /etc/passwd
- // If it already exists, delete so we can recreate
- delete(c.state.BindMounts, "/etc/passwd")
- c.state.BindMounts["/etc/passwd"] = newPasswd
- }
- if newGroup != "" {
- // Make /etc/group
- // If it already exists, delete so we can recreate
- delete(c.state.BindMounts, "/etc/group")
- c.state.BindMounts["/etc/group"] = newGroup
- }
- }
-
- // Make /etc/hostname
- // This should never change, so no need to recreate if it exists
- if _, ok := c.state.BindMounts["/etc/hostname"]; !ok {
- hostnamePath, err := c.writeStringToRundir("hostname", c.Hostname())
- if err != nil {
- return fmt.Errorf("error creating hostname file for container %s: %w", c.ID(), err)
- }
- c.state.BindMounts["/etc/hostname"] = hostnamePath
- }
-
- // Make /etc/localtime
- ctrTimezone := c.Timezone()
- if ctrTimezone != "" {
- // validate the format of the timezone specified if it's not "local"
- if ctrTimezone != "local" {
- _, err = time.LoadLocation(ctrTimezone)
- if err != nil {
- return fmt.Errorf("error finding timezone for container %s: %w", c.ID(), err)
- }
- }
- if _, ok := c.state.BindMounts["/etc/localtime"]; !ok {
- var zonePath string
- if ctrTimezone == "local" {
- zonePath, err = filepath.EvalSymlinks("/etc/localtime")
- if err != nil {
- return fmt.Errorf("error finding local timezone for container %s: %w", c.ID(), err)
- }
- } else {
- zone := filepath.Join("/usr/share/zoneinfo", ctrTimezone)
- zonePath, err = filepath.EvalSymlinks(zone)
- if err != nil {
- return fmt.Errorf("error setting timezone for container %s: %w", c.ID(), err)
- }
- }
- localtimePath, err := c.copyTimezoneFile(zonePath)
- if err != nil {
- return fmt.Errorf("error setting timezone for container %s: %w", c.ID(), err)
- }
- c.state.BindMounts["/etc/localtime"] = localtimePath
- }
- }
-
- _, hasRunContainerenv := c.state.BindMounts["/run/.containerenv"]
- if !hasRunContainerenv {
- // check in the spec mounts
- for _, m := range c.config.Spec.Mounts {
- if m.Destination == "/run/.containerenv" || m.Destination == "/run" {
- hasRunContainerenv = true
- break
- }
- }
- }
-
- // Make .containerenv if it does not exist
- if !hasRunContainerenv {
- containerenv := c.runtime.graphRootMountedFlag(c.config.Spec.Mounts)
- isRootless := 0
- if rootless.IsRootless() {
- isRootless = 1
- }
- imageID, imageName := c.Image()
-
- if c.Privileged() {
- // Populate the .containerenv with container information
- containerenv = fmt.Sprintf(`engine="podman-%s"
-name=%q
-id=%q
-image=%q
-imageid=%q
-rootless=%d
-%s`, version.Version.String(), c.Name(), c.ID(), imageName, imageID, isRootless, containerenv)
- }
- containerenvPath, err := c.writeStringToRundir(".containerenv", containerenv)
- if err != nil {
- return fmt.Errorf("error creating containerenv file for container %s: %w", c.ID(), err)
- }
- c.state.BindMounts["/run/.containerenv"] = containerenvPath
- }
-
- // Add Subscription Mounts
- subscriptionMounts := subscriptions.MountsWithUIDGID(c.config.MountLabel, c.state.RunDir, c.runtime.config.Containers.DefaultMountsFile, c.state.Mountpoint, c.RootUID(), c.RootGID(), rootless.IsRootless(), false)
- for _, mount := range subscriptionMounts {
- if _, ok := c.state.BindMounts[mount.Destination]; !ok {
- c.state.BindMounts[mount.Destination] = mount.Source
- }
- }
-
- // Secrets are mounted by getting the secret data from the secrets manager,
- // copying the data into the container's static dir,
- // then mounting the copied dir into /run/secrets.
- // The secrets mounting must come after subscription mounts, since subscription mounts
- // creates the /run/secrets dir in the container where we mount as well.
- if len(c.Secrets()) > 0 {
- // create /run/secrets if subscriptions did not create
- if err := c.createSecretMountDir(); err != nil {
- return fmt.Errorf("error creating secrets mount: %w", err)
- }
- for _, secret := range c.Secrets() {
- secretFileName := secret.Name
- base := "/run/secrets"
- if secret.Target != "" {
- secretFileName = secret.Target
- // If absolute path for target given remove base.
- if filepath.IsAbs(secretFileName) {
- base = ""
- }
- }
- src := filepath.Join(c.config.SecretsPath, secret.Name)
- dest := filepath.Join(base, secretFileName)
- c.state.BindMounts[dest] = src
- }
- }
-
- return nil
-}
-
-// generateResolvConf generates a containers resolv.conf
-func (c *Container) generateResolvConf() error {
- var (
- networkNameServers []string
- networkSearchDomains []string
- )
-
- netStatus := c.getNetworkStatus()
- for _, status := range netStatus {
- if status.DNSServerIPs != nil {
- for _, nsIP := range status.DNSServerIPs {
- networkNameServers = append(networkNameServers, nsIP.String())
- }
- logrus.Debugf("Adding nameserver(s) from network status of '%q'", status.DNSServerIPs)
- }
- if status.DNSSearchDomains != nil {
- networkSearchDomains = append(networkSearchDomains, status.DNSSearchDomains...)
- logrus.Debugf("Adding search domain(s) from network status of '%q'", status.DNSSearchDomains)
- }
- }
-
- ipv6, err := c.checkForIPv6(netStatus)
- if err != nil {
- return err
- }
-
- nameservers := make([]string, 0, len(c.runtime.config.Containers.DNSServers)+len(c.config.DNSServer))
- nameservers = append(nameservers, c.runtime.config.Containers.DNSServers...)
- for _, ip := range c.config.DNSServer {
- nameservers = append(nameservers, ip.String())
- }
- // If the user provided dns, it trumps all; then dns masq; then resolv.conf
- var search []string
- keepHostServers := false
- if len(nameservers) == 0 {
- keepHostServers = true
- // first add the nameservers from the networks status
- nameservers = networkNameServers
- // when we add network dns server we also have to add the search domains
- search = networkSearchDomains
- // slirp4netns has a built in DNS forwarder.
- if c.config.NetMode.IsSlirp4netns() {
- slirp4netnsDNS, err := GetSlirp4netnsDNS(c.slirp4netnsSubnet)
- if err != nil {
- logrus.Warn("Failed to determine Slirp4netns DNS: ", err.Error())
- } else {
- nameservers = append(nameservers, slirp4netnsDNS.String())
- }
- }
- }
-
- if len(c.config.DNSSearch) > 0 || len(c.runtime.config.Containers.DNSSearches) > 0 {
- customSearch := make([]string, 0, len(c.config.DNSSearch)+len(c.runtime.config.Containers.DNSSearches))
- customSearch = append(customSearch, c.runtime.config.Containers.DNSSearches...)
- customSearch = append(customSearch, c.config.DNSSearch...)
- search = customSearch
- }
-
- options := make([]string, 0, len(c.config.DNSOption)+len(c.runtime.config.Containers.DNSOptions))
- options = append(options, c.runtime.config.Containers.DNSOptions...)
- options = append(options, c.config.DNSOption...)
-
- destPath := filepath.Join(c.state.RunDir, "resolv.conf")
-
- if err := resolvconf.New(&resolvconf.Params{
- IPv6Enabled: ipv6,
- KeepHostServers: keepHostServers,
- Nameservers: nameservers,
- Namespaces: c.config.Spec.Linux.Namespaces,
- Options: options,
- Path: destPath,
- Searches: search,
- }); err != nil {
- return fmt.Errorf("error building resolv.conf for container %s: %w", c.ID(), err)
- }
-
- return c.bindMountRootFile(destPath, resolvconf.DefaultResolvConf)
-}
-
-// Check if a container uses IPv6.
-func (c *Container) checkForIPv6(netStatus map[string]types.StatusBlock) (bool, error) {
- for _, status := range netStatus {
- for _, netInt := range status.Interfaces {
- for _, netAddress := range netInt.Subnets {
- // Note: only using To16() does not work since it also returns a valid ip for ipv4
- if netAddress.IPNet.IP.To4() == nil && netAddress.IPNet.IP.To16() != nil {
- return true, nil
- }
- }
- }
- }
-
- if c.config.NetMode.IsSlirp4netns() {
- ctrNetworkSlipOpts := []string{}
- if c.config.NetworkOptions != nil {
- ctrNetworkSlipOpts = append(ctrNetworkSlipOpts, c.config.NetworkOptions["slirp4netns"]...)
- }
- slirpOpts, err := parseSlirp4netnsNetworkOptions(c.runtime, ctrNetworkSlipOpts)
- if err != nil {
- return false, err
- }
- return slirpOpts.enableIPv6, nil
- }
-
- return false, nil
-}
-
-// Add a new nameserver to the container's resolv.conf, ensuring that it is the
-// first nameserver present.
-// Usable only with running containers.
-func (c *Container) addNameserver(ips []string) error {
- // Take no action if container is not running.
- if !c.ensureState(define.ContainerStateRunning, define.ContainerStateCreated) {
- return nil
- }
-
- // Do we have a resolv.conf at all?
- path, ok := c.state.BindMounts[resolvconf.DefaultResolvConf]
- if !ok {
- return nil
- }
-
- if err := resolvconf.Add(path, ips); err != nil {
- return fmt.Errorf("adding new nameserver to container %s resolv.conf: %w", c.ID(), err)
- }
-
- return nil
-}
-
-// Remove an entry from the existing resolv.conf of the container.
-// Usable only with running containers.
-func (c *Container) removeNameserver(ips []string) error {
- // Take no action if container is not running.
- if !c.ensureState(define.ContainerStateRunning, define.ContainerStateCreated) {
- return nil
- }
-
- // Do we have a resolv.conf at all?
- path, ok := c.state.BindMounts[resolvconf.DefaultResolvConf]
- if !ok {
- return nil
- }
-
- if err := resolvconf.Remove(path, ips); err != nil {
- return fmt.Errorf("removing nameservers from container %s resolv.conf: %w", c.ID(), err)
- }
-
- return nil
-}
-
-func getLocalhostHostEntry(c *Container) etchosts.HostEntries {
- return etchosts.HostEntries{{IP: "127.0.0.1", Names: []string{c.Hostname(), c.config.Name}}}
-}
-
-// getHostsEntries returns the container ip host entries for the correct netmode
-func (c *Container) getHostsEntries() (etchosts.HostEntries, error) {
- var entries etchosts.HostEntries
- names := []string{c.Hostname(), c.config.Name}
- switch {
- case c.config.NetMode.IsBridge():
- entries = etchosts.GetNetworkHostEntries(c.state.NetworkStatus, names...)
- case c.config.NetMode.IsSlirp4netns():
- ip, err := GetSlirp4netnsIP(c.slirp4netnsSubnet)
- if err != nil {
- return nil, err
- }
- entries = etchosts.HostEntries{{IP: ip.String(), Names: names}}
- default:
- // check for net=none
- if !c.config.CreateNetNS {
- for _, ns := range c.config.Spec.Linux.Namespaces {
- if ns.Type == spec.NetworkNamespace {
- if ns.Path == "" {
- entries = etchosts.HostEntries{{IP: "127.0.0.1", Names: names}}
- }
- break
- }
- }
- }
- }
- return entries, nil
-}
-
-func (c *Container) createHosts() error {
- var containerIPsEntries etchosts.HostEntries
- var err error
- // if we configure the netns after the container create we should not add
- // the hosts here since we have no information about the actual ips
- // instead we will add them in c.completeNetworkSetup()
- if !c.config.PostConfigureNetNS {
- containerIPsEntries, err = c.getHostsEntries()
- if err != nil {
- return fmt.Errorf("failed to get container ip host entries: %w", err)
- }
- }
- baseHostFile, err := etchosts.GetBaseHostFile(c.runtime.config.Containers.BaseHostsFile, c.state.Mountpoint)
- if err != nil {
- return err
- }
-
- targetFile := filepath.Join(c.state.RunDir, "hosts")
- err = etchosts.New(&etchosts.Params{
- BaseFile: baseHostFile,
- ExtraHosts: c.config.HostAdd,
- ContainerIPs: containerIPsEntries,
- HostContainersInternalIP: etchosts.GetHostContainersInternalIP(c.runtime.config, c.state.NetworkStatus, c.runtime.network),
- TargetFile: targetFile,
- })
- if err != nil {
- return err
- }
-
- return c.bindMountRootFile(targetFile, config.DefaultHostsFile)
-}
-
-// bindMountRootFile will chown and relabel the source file to make it usable in the container.
-// It will also add the path to the container bind mount map.
-// source is the path on the host, dest is the path in the container.
-func (c *Container) bindMountRootFile(source, dest string) error {
- if err := os.Chown(source, c.RootUID(), c.RootGID()); err != nil {
- return err
- }
- if err := label.Relabel(source, c.MountLabel(), false); err != nil {
- return err
- }
-
- return c.mountIntoRootDirs(dest, source)
-}
-
-// generateGroupEntry generates an entry or entries into /etc/group as
-// required by container configuration.
-// Generally speaking, we will make an entry under two circumstances:
-// 1. The container is started as a specific user:group, and that group is both
-// numeric, and does not already exist in /etc/group.
-// 2. It is requested that Libpod add the group that launched Podman to
-// /etc/group via AddCurrentUserPasswdEntry (though this does not trigger if
-// the group in question already exists in /etc/passwd).
-// Returns group entry (as a string that can be appended to /etc/group) and any
-// error that occurred.
-func (c *Container) generateGroupEntry() (string, error) {
- groupString := ""
-
- // Things we *can't* handle: adding the user we added in
- // generatePasswdEntry to any *existing* groups.
- addedGID := 0
- if c.config.AddCurrentUserPasswdEntry {
- entry, gid, err := c.generateCurrentUserGroupEntry()
- if err != nil {
- return "", err
- }
- groupString += entry
- addedGID = gid
- }
- if c.config.User != "" {
- entry, err := c.generateUserGroupEntry(addedGID)
- if err != nil {
- return "", err
- }
- groupString += entry
- }
-
- return groupString, nil
-}
-
-// Make an entry in /etc/group for the group of the user running podman iff we
-// are rootless.
-func (c *Container) generateCurrentUserGroupEntry() (string, int, error) {
- gid := rootless.GetRootlessGID()
- if gid == 0 {
- return "", 0, nil
- }
-
- g, err := user.LookupGroupId(strconv.Itoa(gid))
- if err != nil {
- return "", 0, fmt.Errorf("failed to get current group: %w", err)
- }
-
- // Look up group name to see if it exists in the image.
- _, err = lookup.GetGroup(c.state.Mountpoint, g.Name)
- if err != runcuser.ErrNoGroupEntries {
- return "", 0, err
- }
-
- // Look up GID to see if it exists in the image.
- _, err = lookup.GetGroup(c.state.Mountpoint, g.Gid)
- if err != runcuser.ErrNoGroupEntries {
- return "", 0, err
- }
-
- // We need to get the username of the rootless user so we can add it to
- // the group.
- username := ""
- uid := rootless.GetRootlessUID()
- if uid != 0 {
- u, err := user.LookupId(strconv.Itoa(uid))
- if err != nil {
- return "", 0, fmt.Errorf("failed to get current user to make group entry: %w", err)
- }
- username = u.Username
- }
-
- // Make the entry.
- return fmt.Sprintf("%s:x:%s:%s\n", g.Name, g.Gid, username), gid, nil
-}
-
-// Make an entry in /etc/group for the group the container was specified to run
-// as.
-func (c *Container) generateUserGroupEntry(addedGID int) (string, error) {
- if c.config.User == "" {
- return "", nil
- }
-
- splitUser := strings.SplitN(c.config.User, ":", 2)
- group := splitUser[0]
- if len(splitUser) > 1 {
- group = splitUser[1]
- }
-
- gid, err := strconv.ParseUint(group, 10, 32)
- if err != nil {
- return "", nil //nolint: nilerr
- }
-
- if addedGID != 0 && addedGID == int(gid) {
- return "", nil
- }
-
- // Check if the group already exists
- _, err = lookup.GetGroup(c.state.Mountpoint, group)
- if err != runcuser.ErrNoGroupEntries {
- return "", err
- }
-
- return fmt.Sprintf("%d:x:%d:%s\n", gid, gid, splitUser[0]), nil
-}
-
-// generatePasswdEntry generates an entry or entries into /etc/passwd as
-// required by container configuration.
-// Generally speaking, we will make an entry under two circumstances:
-// 1. The container is started as a specific user who is not in /etc/passwd.
-// This only triggers if the user is given as a *numeric* ID.
-// 2. It is requested that Libpod add the user that launched Podman to
-// /etc/passwd via AddCurrentUserPasswdEntry (though this does not trigger if
-// the user in question already exists in /etc/passwd) or the UID to be added
-// is 0).
-// 3. The user specified additional host user accounts to add the the /etc/passwd file
-// Returns password entry (as a string that can be appended to /etc/passwd) and
-// any error that occurred.
-func (c *Container) generatePasswdEntry() (string, error) {
- passwdString := ""
-
- addedUID := 0
- for _, userid := range c.config.HostUsers {
- // Look up User on host
- u, err := util.LookupUser(userid)
- if err != nil {
- return "", err
- }
- entry, err := c.userPasswdEntry(u)
- if err != nil {
- return "", err
- }
- passwdString += entry
- }
- if c.config.AddCurrentUserPasswdEntry {
- entry, uid, _, err := c.generateCurrentUserPasswdEntry()
- if err != nil {
- return "", err
- }
- passwdString += entry
- addedUID = uid
- }
- if c.config.User != "" {
- entry, err := c.generateUserPasswdEntry(addedUID)
- if err != nil {
- return "", err
- }
- passwdString += entry
- }
-
- return passwdString, nil
-}
-
-// generateCurrentUserPasswdEntry generates an /etc/passwd entry for the user
-// running the container engine.
-// Returns a passwd entry for the user, and the UID and GID of the added entry.
-func (c *Container) generateCurrentUserPasswdEntry() (string, int, int, error) {
- uid := rootless.GetRootlessUID()
- if uid == 0 {
- return "", 0, 0, nil
- }
-
- u, err := user.LookupId(strconv.Itoa(uid))
- if err != nil {
- return "", 0, 0, fmt.Errorf("failed to get current user: %w", err)
- }
- pwd, err := c.userPasswdEntry(u)
- if err != nil {
- return "", 0, 0, err
- }
-
- return pwd, uid, rootless.GetRootlessGID(), nil
-}
-
-func (c *Container) userPasswdEntry(u *user.User) (string, error) {
- // Look up the user to see if it exists in the container image.
- _, err := lookup.GetUser(c.state.Mountpoint, u.Username)
- if err != runcuser.ErrNoPasswdEntries {
- return "", err
- }
-
- // Look up the UID to see if it exists in the container image.
- _, err = lookup.GetUser(c.state.Mountpoint, u.Uid)
- if err != runcuser.ErrNoPasswdEntries {
- return "", err
- }
-
- // If the user's actual home directory exists, or was mounted in - use
- // that.
- homeDir := c.WorkingDir()
- hDir := u.HomeDir
- for hDir != "/" {
- if MountExists(c.config.Spec.Mounts, hDir) {
- homeDir = u.HomeDir
- break
- }
- hDir = filepath.Dir(hDir)
- }
- if homeDir != u.HomeDir {
- for _, hDir := range c.UserVolumes() {
- if hDir == u.HomeDir {
- homeDir = u.HomeDir
- break
- }
- }
- }
- // Set HOME environment if not already set
- hasHomeSet := false
- for _, s := range c.config.Spec.Process.Env {
- if strings.HasPrefix(s, "HOME=") {
- hasHomeSet = true
- break
- }
- }
- if !hasHomeSet {
- c.config.Spec.Process.Env = append(c.config.Spec.Process.Env, fmt.Sprintf("HOME=%s", homeDir))
- }
- if c.config.PasswdEntry != "" {
- return c.passwdEntry(u.Username, u.Uid, u.Gid, u.Name, homeDir), nil
- }
-
- return fmt.Sprintf("%s:*:%s:%s:%s:%s:/bin/sh\n", u.Username, u.Uid, u.Gid, u.Name, homeDir), nil
-}
-
-// generateUserPasswdEntry generates an /etc/passwd entry for the container user
-// to run in the container.
-// The UID and GID of the added entry will also be returned.
-// Accepts one argument, that being any UID that has already been added to the
-// passwd file by other functions; if it matches the UID we were given, we don't
-// need to do anything.
-func (c *Container) generateUserPasswdEntry(addedUID int) (string, error) {
- var (
- groupspec string
- gid int
- )
- if c.config.User == "" {
- return "", nil
- }
- splitSpec := strings.SplitN(c.config.User, ":", 2)
- userspec := splitSpec[0]
- if len(splitSpec) > 1 {
- groupspec = splitSpec[1]
- }
- // If a non numeric User, then don't generate passwd
- uid, err := strconv.ParseUint(userspec, 10, 32)
- if err != nil {
- return "", nil //nolint: nilerr
- }
-
- if addedUID != 0 && int(uid) == addedUID {
- return "", nil
- }
-
- // Look up the user to see if it exists in the container image
- _, err = lookup.GetUser(c.state.Mountpoint, userspec)
- if err != runcuser.ErrNoPasswdEntries {
- return "", err
- }
-
- if groupspec != "" {
- ugid, err := strconv.ParseUint(groupspec, 10, 32)
- if err == nil {
- gid = int(ugid)
- } else {
- group, err := lookup.GetGroup(c.state.Mountpoint, groupspec)
- if err != nil {
- return "", fmt.Errorf("unable to get gid %s from group file: %w", groupspec, err)
- }
- gid = group.Gid
- }
- }
-
- if c.config.PasswdEntry != "" {
- entry := c.passwdEntry(fmt.Sprintf("%d", uid), fmt.Sprintf("%d", uid), fmt.Sprintf("%d", gid), "container user", c.WorkingDir())
- return entry, nil
- }
-
- return fmt.Sprintf("%d:*:%d:%d:container user:%s:/bin/sh\n", uid, uid, gid, c.WorkingDir()), nil
-}
-
-func (c *Container) passwdEntry(username string, uid, gid, name, homeDir string) string {
- s := c.config.PasswdEntry
- s = strings.ReplaceAll(s, "$USERNAME", username)
- s = strings.ReplaceAll(s, "$UID", uid)
- s = strings.ReplaceAll(s, "$GID", gid)
- s = strings.ReplaceAll(s, "$NAME", name)
- s = strings.ReplaceAll(s, "$HOME", homeDir)
- return s + "\n"
-}
-
-// generatePasswdAndGroup generates container-specific passwd and group files
-// iff g.config.User is a number or we are configured to make a passwd entry for
-// the current user or the user specified HostsUsers
-// Returns path to file to mount at /etc/passwd, path to file to mount at
-// /etc/group, and any error that occurred. If no passwd/group file were
-// required, the empty string will be returned for those path (this may occur
-// even if no error happened).
-// This may modify the mounted container's /etc/passwd and /etc/group instead of
-// making copies to bind-mount in, so we don't break useradd (it wants to make a
-// copy of /etc/passwd and rename the copy to /etc/passwd, which is impossible
-// with a bind mount). This is done in cases where the container is *not*
-// read-only. In this case, the function will return nothing ("", "", nil).
-func (c *Container) generatePasswdAndGroup() (string, string, error) {
- if !c.config.AddCurrentUserPasswdEntry && c.config.User == "" &&
- len(c.config.HostUsers) == 0 {
- return "", "", nil
- }
-
- needPasswd := true
- needGroup := true
-
- // First, check if there's a mount at /etc/passwd or group, we don't
- // want to interfere with user mounts.
- if MountExists(c.config.Spec.Mounts, "/etc/passwd") {
- needPasswd = false
- }
- if MountExists(c.config.Spec.Mounts, "/etc/group") {
- needGroup = false
- }
-
- // Next, check if we already made the files. If we didn't, don't need to
- // do anything more.
- if needPasswd {
- passwdPath := filepath.Join(c.config.StaticDir, "passwd")
- if _, err := os.Stat(passwdPath); err == nil {
- needPasswd = false
- }
- }
- if needGroup {
- groupPath := filepath.Join(c.config.StaticDir, "group")
- if _, err := os.Stat(groupPath); err == nil {
- needGroup = false
- }
- }
-
- // If we don't need a /etc/passwd or /etc/group at this point we can
- // just return.
- if !needPasswd && !needGroup {
- return "", "", nil
- }
-
- passwdPath := ""
- groupPath := ""
-
- ro := c.IsReadOnly()
-
- if needPasswd {
- passwdEntry, err := c.generatePasswdEntry()
- if err != nil {
- return "", "", err
- }
-
- needsWrite := passwdEntry != ""
- switch {
- case ro && needsWrite:
- logrus.Debugf("Making /etc/passwd for container %s", c.ID())
- originPasswdFile, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/passwd")
- if err != nil {
- return "", "", fmt.Errorf("error creating path to container %s /etc/passwd: %w", c.ID(), err)
- }
- orig, err := ioutil.ReadFile(originPasswdFile)
- if err != nil && !os.IsNotExist(err) {
- return "", "", err
- }
- passwdFile, err := c.writeStringToStaticDir("passwd", string(orig)+passwdEntry)
- if err != nil {
- return "", "", fmt.Errorf("failed to create temporary passwd file: %w", err)
- }
- if err := os.Chmod(passwdFile, 0644); err != nil {
- return "", "", err
- }
- passwdPath = passwdFile
- case !ro && needsWrite:
- logrus.Debugf("Modifying container %s /etc/passwd", c.ID())
- containerPasswd, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/passwd")
- if err != nil {
- return "", "", fmt.Errorf("error looking up location of container %s /etc/passwd: %w", c.ID(), err)
- }
-
- f, err := os.OpenFile(containerPasswd, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0600)
- if err != nil {
- return "", "", fmt.Errorf("container %s: %w", c.ID(), err)
- }
- defer f.Close()
-
- if _, err := f.WriteString(passwdEntry); err != nil {
- return "", "", fmt.Errorf("unable to append to container %s /etc/passwd: %w", c.ID(), err)
- }
- default:
- logrus.Debugf("Not modifying container %s /etc/passwd", c.ID())
- }
- }
- if needGroup {
- groupEntry, err := c.generateGroupEntry()
- if err != nil {
- return "", "", err
- }
-
- needsWrite := groupEntry != ""
- switch {
- case ro && needsWrite:
- logrus.Debugf("Making /etc/group for container %s", c.ID())
- originGroupFile, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/group")
- if err != nil {
- return "", "", fmt.Errorf("error creating path to container %s /etc/group: %w", c.ID(), err)
- }
- orig, err := ioutil.ReadFile(originGroupFile)
- if err != nil && !os.IsNotExist(err) {
- return "", "", err
- }
- groupFile, err := c.writeStringToStaticDir("group", string(orig)+groupEntry)
- if err != nil {
- return "", "", fmt.Errorf("failed to create temporary group file: %w", err)
- }
- if err := os.Chmod(groupFile, 0644); err != nil {
- return "", "", err
- }
- groupPath = groupFile
- case !ro && needsWrite:
- logrus.Debugf("Modifying container %s /etc/group", c.ID())
- containerGroup, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/group")
- if err != nil {
- return "", "", fmt.Errorf("error looking up location of container %s /etc/group: %w", c.ID(), err)
- }
-
- f, err := os.OpenFile(containerGroup, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0600)
- if err != nil {
- return "", "", fmt.Errorf("container %s: %w", c.ID(), err)
- }
- defer f.Close()
-
- if _, err := f.WriteString(groupEntry); err != nil {
- return "", "", fmt.Errorf("unable to append to container %s /etc/group: %w", c.ID(), err)
- }
- default:
- logrus.Debugf("Not modifying container %s /etc/group", c.ID())
- }
- }
-
- return passwdPath, groupPath, nil
-}
-
func isRootlessCgroupSet(cgroup string) bool {
// old versions of podman were setting the CgroupParent to CgroupfsDefaultCgroupParent
// by default. Avoid breaking these versions and check whether the cgroup parent is
@@ -3058,198 +398,257 @@ func (c *Container) getOCICgroupPath() (string, error) {
}
}
-func (c *Container) copyTimezoneFile(zonePath string) (string, error) {
- localtimeCopy := filepath.Join(c.state.RunDir, "localtime")
- file, err := os.Stat(zonePath)
- if err != nil {
- return "", err
- }
- if file.IsDir() {
- return "", errors.New("invalid timezone: is a directory")
- }
- src, err := os.Open(zonePath)
- if err != nil {
- return "", err
- }
- defer src.Close()
- dest, err := os.Create(localtimeCopy)
- if err != nil {
- return "", err
- }
- defer dest.Close()
- _, err = io.Copy(dest, src)
- if err != nil {
- return "", err
- }
- if err := c.relabel(localtimeCopy, c.config.MountLabel, false); err != nil {
- return "", err
+// If the container is rootless, set up the slirp4netns network
+func (c *Container) setupRootlessNetwork() error {
+ // set up slirp4netns again because slirp4netns will die when conmon exits
+ if c.config.NetMode.IsSlirp4netns() {
+ err := c.runtime.setupSlirp4netns(c, c.state.NetNS)
+ if err != nil {
+ return err
+ }
}
- if err := dest.Chown(c.RootUID(), c.RootGID()); err != nil {
- return "", err
+
+ // set up rootlesskit port forwarder again since it dies when conmon exits
+ // we use rootlesskit port forwarder only as rootless and when bridge network is used
+ if rootless.IsRootless() && c.config.NetMode.IsBridge() && len(c.config.PortMappings) > 0 {
+ err := c.runtime.setupRootlessPortMappingViaRLK(c, c.state.NetNS.Path(), c.state.NetworkStatus)
+ if err != nil {
+ return err
+ }
}
- return localtimeCopy, err
+ return nil
}
-func (c *Container) cleanupOverlayMounts() error {
- return overlay.CleanupContent(c.config.StaticDir)
+func openDirectory(path string) (fd int, err error) {
+ return unix.Open(path, unix.O_RDONLY|unix.O_PATH, 0)
}
-// Creates and mounts an empty dir to mount secrets into, if it does not already exist
-func (c *Container) createSecretMountDir() error {
- src := filepath.Join(c.state.RunDir, "/run/secrets")
- _, err := os.Stat(src)
- if os.IsNotExist(err) {
- oldUmask := umask.Set(0)
- defer umask.Set(oldUmask)
+func (c *Container) addNetworkNamespace(g *generate.Generator) error {
+ if c.config.CreateNetNS {
+ if c.config.PostConfigureNetNS {
+ if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), ""); err != nil {
+ return err
+ }
+ } else {
+ if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), c.state.NetNS.Path()); err != nil {
+ return err
+ }
+ }
+ }
+ return nil
+}
+
+func (c *Container) addSystemdMounts(g *generate.Generator) error {
+ if c.Systemd() {
+ if err := c.setupSystemd(g.Mounts(), *g); err != nil {
+ return fmt.Errorf("error adding systemd-specific mounts: %w", err)
+ }
+ }
+ return nil
+}
- if err := os.MkdirAll(src, 0755); err != nil {
+func (c *Container) addSharedNamespaces(g *generate.Generator) error {
+ if c.config.IPCNsCtr != "" {
+ if err := c.addNamespaceContainer(g, IPCNS, c.config.IPCNsCtr, spec.IPCNamespace); err != nil {
return err
}
- if err := label.Relabel(src, c.config.MountLabel, false); err != nil {
+ }
+ if c.config.MountNsCtr != "" {
+ if err := c.addNamespaceContainer(g, MountNS, c.config.MountNsCtr, spec.MountNamespace); err != nil {
return err
}
- if err := os.Chown(src, c.RootUID(), c.RootGID()); err != nil {
+ }
+ if c.config.NetNsCtr != "" {
+ if err := c.addNamespaceContainer(g, NetNS, c.config.NetNsCtr, spec.NetworkNamespace); err != nil {
return err
}
- c.state.BindMounts["/run/secrets"] = src
- return nil
+ }
+ if c.config.PIDNsCtr != "" {
+ if err := c.addNamespaceContainer(g, PIDNS, c.config.PIDNsCtr, spec.PIDNamespace); err != nil {
+ return err
+ }
+ }
+ if c.config.UserNsCtr != "" {
+ if err := c.addNamespaceContainer(g, UserNS, c.config.UserNsCtr, spec.UserNamespace); err != nil {
+ return err
+ }
+ if len(g.Config.Linux.UIDMappings) == 0 {
+ // runc complains if no mapping is specified, even if we join another ns. So provide a dummy mapping
+ g.AddLinuxUIDMapping(uint32(0), uint32(0), uint32(1))
+ g.AddLinuxGIDMapping(uint32(0), uint32(0), uint32(1))
+ }
}
- return err
-}
-
-// Fix ownership and permissions of the specified volume if necessary.
-func (c *Container) fixVolumePermissions(v *ContainerNamedVolume) error {
- vol, err := c.runtime.state.Volume(v.Name)
+ availableUIDs, availableGIDs, err := rootless.GetAvailableIDMaps()
if err != nil {
- return fmt.Errorf("error retrieving named volume %s for container %s: %w", v.Name, c.ID(), err)
+ if os.IsNotExist(err) {
+ // The kernel-provided files only exist if user namespaces are supported
+ logrus.Debugf("User or group ID mappings not available: %s", err)
+ } else {
+ return err
+ }
+ } else {
+ g.Config.Linux.UIDMappings = rootless.MaybeSplitMappings(g.Config.Linux.UIDMappings, availableUIDs)
+ g.Config.Linux.GIDMappings = rootless.MaybeSplitMappings(g.Config.Linux.GIDMappings, availableGIDs)
}
- vol.lock.Lock()
- defer vol.lock.Unlock()
+ // Hostname handling:
+ // If we have a UTS namespace, set Hostname in the OCI spec.
+ // Set the HOSTNAME environment variable unless explicitly overridden by
+ // the user (already present in OCI spec). If we don't have a UTS ns,
+ // set it to the host's hostname instead.
+ hostname := c.Hostname()
+ foundUTS := false
- // The volume may need a copy-up. Check the state.
- if err := vol.update(); err != nil {
- return err
+ for _, i := range c.config.Spec.Linux.Namespaces {
+ if i.Type == spec.UTSNamespace && i.Path == "" {
+ foundUTS = true
+ g.SetHostname(hostname)
+ break
+ }
}
-
- // Volumes owned by a volume driver are not chowned - we don't want to
- // mess with a mount not managed by us.
- if vol.state.NeedsChown && !vol.UsesVolumeDriver() {
- vol.state.NeedsChown = false
-
- uid := int(c.config.Spec.Process.User.UID)
- gid := int(c.config.Spec.Process.User.GID)
-
- if c.config.IDMappings.UIDMap != nil {
- p := idtools.IDPair{
- UID: uid,
- GID: gid,
- }
- mappings := idtools.NewIDMappingsFromMaps(c.config.IDMappings.UIDMap, c.config.IDMappings.GIDMap)
- newPair, err := mappings.ToHost(p)
- if err != nil {
- return fmt.Errorf("error mapping user %d:%d: %w", uid, gid, err)
- }
- uid = newPair.UID
- gid = newPair.GID
+ if !foundUTS {
+ tmpHostname, err := os.Hostname()
+ if err != nil {
+ return err
}
+ hostname = tmpHostname
+ }
+ needEnv := true
+ for _, checkEnv := range g.Config.Process.Env {
+ if strings.SplitN(checkEnv, "=", 2)[0] == "HOSTNAME" {
+ needEnv = false
+ break
+ }
+ }
+ if needEnv {
+ g.AddProcessEnv("HOSTNAME", hostname)
+ }
- vol.state.UIDChowned = uid
- vol.state.GIDChowned = gid
-
- if err := vol.save(); err != nil {
+ if c.config.UTSNsCtr != "" {
+ if err := c.addNamespaceContainer(g, UTSNS, c.config.UTSNsCtr, spec.UTSNamespace); err != nil {
return err
}
-
- mountPoint, err := vol.MountPoint()
- if err != nil {
+ }
+ if c.config.CgroupNsCtr != "" {
+ if err := c.addNamespaceContainer(g, CgroupNS, c.config.CgroupNsCtr, spec.CgroupNamespace); err != nil {
return err
}
+ }
- if err := os.Lchown(mountPoint, uid, gid); err != nil {
+ if c.config.UserNsCtr == "" && c.config.IDMappings.AutoUserNs {
+ if err := g.AddOrReplaceLinuxNamespace(string(spec.UserNamespace), ""); err != nil {
return err
}
+ g.ClearLinuxUIDMappings()
+ for _, uidmap := range c.config.IDMappings.UIDMap {
+ g.AddLinuxUIDMapping(uint32(uidmap.HostID), uint32(uidmap.ContainerID), uint32(uidmap.Size))
+ }
+ g.ClearLinuxGIDMappings()
+ for _, gidmap := range c.config.IDMappings.GIDMap {
+ g.AddLinuxGIDMapping(uint32(gidmap.HostID), uint32(gidmap.ContainerID), uint32(gidmap.Size))
+ }
+ }
+ return nil
+}
- // Make sure the new volume matches the permissions of the target directory.
- // https://github.com/containers/podman/issues/10188
- st, err := os.Lstat(filepath.Join(c.state.Mountpoint, v.Dest))
- if err == nil {
- if stat, ok := st.Sys().(*syscall.Stat_t); ok {
- if err := os.Lchown(mountPoint, int(stat.Uid), int(stat.Gid)); err != nil {
- return err
+func (c *Container) addRootPropagation(g *generate.Generator, mounts []spec.Mount) error {
+ // Determine property of RootPropagation based on volume properties. If
+ // a volume is shared, then keep root propagation shared. This should
+ // work for slave and private volumes too.
+ //
+ // For slave volumes, it can be either [r]shared/[r]slave.
+ //
+ // For private volumes any root propagation value should work.
+ rootPropagation := ""
+ for _, m := range mounts {
+ for _, opt := range m.Options {
+ switch opt {
+ case MountShared, MountRShared:
+ if rootPropagation != MountShared && rootPropagation != MountRShared {
+ rootPropagation = MountShared
+ }
+ case MountSlave, MountRSlave:
+ if rootPropagation != MountShared && rootPropagation != MountRShared && rootPropagation != MountSlave && rootPropagation != MountRSlave {
+ rootPropagation = MountRSlave
}
}
- if err := os.Chmod(mountPoint, st.Mode()); err != nil {
- return err
- }
- stat := st.Sys().(*syscall.Stat_t)
- atime := time.Unix(int64(stat.Atim.Sec), int64(stat.Atim.Nsec)) //nolint: unconvert
- if err := os.Chtimes(mountPoint, atime, st.ModTime()); err != nil {
- return err
- }
- } else if !os.IsNotExist(err) {
+ }
+ }
+ if rootPropagation != "" {
+ logrus.Debugf("Set root propagation to %q", rootPropagation)
+ if err := g.SetLinuxRootPropagation(rootPropagation); err != nil {
return err
}
}
return nil
}
-func (c *Container) relabel(src, mountLabel string, recurse bool) error {
- if !selinux.GetEnabled() || mountLabel == "" {
- return nil
- }
- // only relabel on initial creation of container
- if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateUnknown) {
- label, err := label.FileLabel(src)
- if err != nil {
- return err
- }
- // If labels are different, might be on a tmpfs
- if label == mountLabel {
- return nil
- }
+func (c *Container) setProcessLabel(g *generate.Generator) {
+ g.SetProcessSelinuxLabel(c.ProcessLabel())
+}
+
+func (c *Container) setMountLabel(g *generate.Generator) {
+ g.SetLinuxMountLabel(c.MountLabel())
+}
+
+func (c *Container) setCgroupsPath(g *generate.Generator) error {
+ cgroupPath, err := c.getOCICgroupPath()
+ if err != nil {
+ return err
}
- return label.Relabel(src, mountLabel, recurse)
+ g.SetLinuxCgroupsPath(cgroupPath)
+ return nil
}
-func (c *Container) ChangeHostPathOwnership(src string, recurse bool, uid, gid int) error {
- // only chown on initial creation of container
- if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateUnknown) {
- st, err := os.Stat(src)
+func (c *Container) addSlirp4netnsDNS(nameservers []string) []string {
+ // slirp4netns has a built in DNS forwarder.
+ if c.config.NetMode.IsSlirp4netns() {
+ slirp4netnsDNS, err := GetSlirp4netnsDNS(c.slirp4netnsSubnet)
if err != nil {
- return err
- }
-
- // If labels are different, might be on a tmpfs
- if int(st.Sys().(*syscall.Stat_t).Uid) == uid && int(st.Sys().(*syscall.Stat_t).Gid) == gid {
- return nil
+ logrus.Warn("Failed to determine Slirp4netns DNS: ", err.Error())
+ } else {
+ nameservers = append(nameservers, slirp4netnsDNS.String())
}
}
- return chown.ChangeHostPathOwnership(src, recurse, uid, gid)
+ return nameservers
}
-// If the container is rootless, set up the slirp4netns network
-func (c *Container) setupRootlessNetwork() error {
- // set up slirp4netns again because slirp4netns will die when conmon exits
+func (c *Container) isSlirp4netnsIPv6() (bool, error) {
if c.config.NetMode.IsSlirp4netns() {
- err := c.runtime.setupSlirp4netns(c, c.state.NetNS)
+ ctrNetworkSlipOpts := []string{}
+ if c.config.NetworkOptions != nil {
+ ctrNetworkSlipOpts = append(ctrNetworkSlipOpts, c.config.NetworkOptions["slirp4netns"]...)
+ }
+ slirpOpts, err := parseSlirp4netnsNetworkOptions(c.runtime, ctrNetworkSlipOpts)
if err != nil {
- return err
+ return false, err
}
+ return slirpOpts.enableIPv6, nil
}
- // set up rootlesskit port forwarder again since it dies when conmon exits
- // we use rootlesskit port forwarder only as rootless and when bridge network is used
- if rootless.IsRootless() && c.config.NetMode.IsBridge() && len(c.config.PortMappings) > 0 {
- err := c.runtime.setupRootlessPortMappingViaRLK(c, c.state.NetNS.Path(), c.state.NetworkStatus)
- if err != nil {
- return err
+ return false, nil
+}
+
+// check for net=none
+func (c *Container) hasNetNone() bool {
+ if !c.config.CreateNetNS {
+ for _, ns := range c.config.Spec.Linux.Namespaces {
+ if ns.Type == spec.NetworkNamespace {
+ if ns.Path == "" {
+ return true
+ }
+ }
}
}
- return nil
+ return false
}
-func openDirectory(path string) (fd int, err error) {
- return unix.Open(path, unix.O_RDONLY|unix.O_PATH, 0)
+func setVolumeAtime(mountPoint string, st os.FileInfo) error {
+ stat := st.Sys().(*syscall.Stat_t)
+ atime := time.Unix(int64(stat.Atim.Sec), int64(stat.Atim.Nsec)) //nolint: unconvert
+ if err := os.Chtimes(mountPoint, atime, st.ModTime()); err != nil {
+ return err
+ }
+ return nil
}
diff --git a/libpod/container_internal_unsupported.go b/libpod/container_internal_unsupported.go
index de92ff260..1967c577b 100644
--- a/libpod/container_internal_unsupported.go
+++ b/libpod/container_internal_unsupported.go
@@ -1,5 +1,5 @@
-//go:build !linux
-// +build !linux
+//go:build !linux && !freebsd
+// +build !linux,!freebsd
package libpod
@@ -69,21 +69,21 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti
// getHostsEntries returns the container ip host entries for the correct netmode
func (c *Container) getHostsEntries() (etchosts.HostEntries, error) {
- return nil, errors.New("unspported (*Container) getHostsEntries")
+ return nil, errors.New("unsupported (*Container) getHostsEntries")
}
// Fix ownership and permissions of the specified volume if necessary.
func (c *Container) fixVolumePermissions(v *ContainerNamedVolume) error {
- return errors.New("unspported (*Container) fixVolumePermissions")
+ return errors.New("unsupported (*Container) fixVolumePermissions")
}
func (c *Container) expectPodCgroup() (bool, error) {
- return false, errors.New("unspported (*Container) expectPodCgroup")
+ return false, errors.New("unsupported (*Container) expectPodCgroup")
}
// Get cgroup path in a format suitable for the OCI spec
func (c *Container) getOCICgroupPath() (string, error) {
- return "", errors.New("unspported (*Container) getOCICgroupPath")
+ return "", errors.New("unsupported (*Container) getOCICgroupPath")
}
func getLocalhostHostEntry(c *Container) etchosts.HostEntries {
diff --git a/libpod/container_linux.go b/libpod/container_linux.go
index 8b517e69f..9c17a1966 100644
--- a/libpod/container_linux.go
+++ b/libpod/container_linux.go
@@ -5,6 +5,7 @@ package libpod
import (
"github.com/containernetworking/plugins/pkg/ns"
+ spec "github.com/opencontainers/runtime-spec/specs-go"
)
type containerPlatformState struct {
@@ -13,3 +14,17 @@ type containerPlatformState struct {
// told to join another container's network namespace
NetNS ns.NetNS `json:"-"`
}
+
+func networkDisabled(c *Container) (bool, error) {
+ if c.config.CreateNetNS {
+ return false, nil
+ }
+ if !c.config.PostConfigureNetNS {
+ for _, ns := range c.config.Spec.Linux.Namespaces {
+ if ns.Type == spec.NetworkNamespace {
+ return ns.Path == "", nil
+ }
+ }
+ }
+ return false, nil
+}
diff --git a/libpod/container_validate.go b/libpod/container_validate.go
index da33f6db7..f4611ecce 100644
--- a/libpod/container_validate.go
+++ b/libpod/container_validate.go
@@ -137,5 +137,9 @@ func (c *Container) validate() error {
if c.config.SdNotifyMode == define.SdNotifyModeIgnore && len(c.config.SdNotifySocket) > 0 {
return fmt.Errorf("cannot set sd-notify socket %q with sd-notify mode %q", c.config.SdNotifySocket, c.config.SdNotifyMode)
}
+
+ if c.config.HealthCheckOnFailureAction != define.HealthCheckOnFailureActionNone && c.config.HealthCheckConfig == nil {
+ return fmt.Errorf("cannot set on-failure action to %s without a health check", c.config.HealthCheckOnFailureAction.String())
+ }
return nil
}
diff --git a/libpod/define/config.go b/libpod/define/config.go
index 34c1a675d..1fad5cc9a 100644
--- a/libpod/define/config.go
+++ b/libpod/define/config.go
@@ -85,4 +85,4 @@ const PassthroughLogging = "passthrough"
const RLimitDefaultValue = uint64(1048576)
// BindMountPrefix distinguishes its annotations from others
-const BindMountPrefix = "bind-mount-options:"
+const BindMountPrefix = "bind-mount-options"
diff --git a/libpod/define/container_inspect.go b/libpod/define/container_inspect.go
index 5982d684c..da5c58f27 100644
--- a/libpod/define/container_inspect.go
+++ b/libpod/define/container_inspect.go
@@ -55,6 +55,8 @@ type InspectContainerConfig struct {
StopSignal uint `json:"StopSignal"`
// Configured healthcheck for the container
Healthcheck *manifest.Schema2HealthConfig `json:"Healthcheck,omitempty"`
+ // HealthcheckOnFailureAction defines an action to take once the container turns unhealthy.
+ HealthcheckOnFailureAction string `json:"HealthcheckOnFailureAction,omitempty"`
// CreateCommand is the full command plus arguments of the process the
// container has been created with.
CreateCommand []string `json:"CreateCommand,omitempty"`
diff --git a/libpod/define/errors.go b/libpod/define/errors.go
index fd27e89de..be471c27e 100644
--- a/libpod/define/errors.go
+++ b/libpod/define/errors.go
@@ -179,6 +179,9 @@ var (
// ErrNetworkInUse indicates the requested operation failed because the network was in use
ErrNetworkInUse = errors.New("network is being used")
+ // ErrNetworkConnected indicates that the required operation failed because the container is already a network endpoint
+ ErrNetworkConnected = errors.New("network is already connected")
+
// ErrStoreNotInitialized indicates that the container storage was never
// initialized.
ErrStoreNotInitialized = errors.New("the container storage was never initialized")
diff --git a/libpod/define/exec_codes.go b/libpod/define/exec_codes.go
index 3f2da4910..a84730e72 100644
--- a/libpod/define/exec_codes.go
+++ b/libpod/define/exec_codes.go
@@ -11,8 +11,8 @@ const (
// ExecErrorCodeGeneric is the default error code to return from an exec session if libpod failed
// prior to calling the runtime
ExecErrorCodeGeneric = 125
- // ExecErrorCodeCannotInvoke is the error code to return when the runtime fails to invoke a command
- // an example of this can be found by trying to execute a directory:
+ // ExecErrorCodeCannotInvoke is the error code to return when the runtime fails to invoke a command.
+ // An example of this can be found by trying to execute a directory:
// `podman exec -l /etc`
ExecErrorCodeCannotInvoke = 126
// ExecErrorCodeNotFound is the error code to return when a command cannot be found
diff --git a/libpod/define/healthchecks.go b/libpod/define/healthchecks.go
index f71274350..274e02561 100644
--- a/libpod/define/healthchecks.go
+++ b/libpod/define/healthchecks.go
@@ -1,5 +1,10 @@
package define
+import (
+ "fmt"
+ "strings"
+)
+
const (
// HealthCheckHealthy describes a healthy container
HealthCheckHealthy string = "healthy"
@@ -57,3 +62,72 @@ const (
// HealthConfigTestCmdShell runs commands with the system's default shell
HealthConfigTestCmdShell = "CMD-SHELL"
)
+
+// HealthCheckOnFailureAction defines how Podman reacts when a container's health
+// status turns unhealthy.
+type HealthCheckOnFailureAction int
+
+// Healthcheck on-failure actions.
+const (
+ // HealthCheckOnFailureActionNonce instructs Podman to not react on an unhealthy status.
+ HealthCheckOnFailureActionNone = iota // Must be first iota for backwards compatibility
+ // HealthCheckOnFailureActionInvalid denotes an invalid on-failure policy.
+ HealthCheckOnFailureActionInvalid = iota
+ // HealthCheckOnFailureActionNonce instructs Podman to kill the container on an unhealthy status.
+ HealthCheckOnFailureActionKill = iota
+ // HealthCheckOnFailureActionNonce instructs Podman to restart the container on an unhealthy status.
+ HealthCheckOnFailureActionRestart = iota
+ // HealthCheckOnFailureActionNonce instructs Podman to stop the container on an unhealthy status.
+ HealthCheckOnFailureActionStop = iota
+)
+
+// String representations for on-failure actions.
+const (
+ strHealthCheckOnFailureActionNone = "none"
+ strHealthCheckOnFailureActionInvalid = "invalid"
+ strHealthCheckOnFailureActionKill = "kill"
+ strHealthCheckOnFailureActionRestart = "restart"
+ strHealthCheckOnFailureActionStop = "stop"
+)
+
+// SupportedHealthCheckOnFailureActions lists all supported healthcheck restart policies.
+var SupportedHealthCheckOnFailureActions = []string{
+ strHealthCheckOnFailureActionNone,
+ strHealthCheckOnFailureActionKill,
+ strHealthCheckOnFailureActionRestart,
+ strHealthCheckOnFailureActionStop,
+}
+
+// String returns the string representation of the HealthCheckOnFailureAction.
+func (h HealthCheckOnFailureAction) String() string {
+ switch h {
+ case HealthCheckOnFailureActionNone:
+ return strHealthCheckOnFailureActionNone
+ case HealthCheckOnFailureActionKill:
+ return strHealthCheckOnFailureActionKill
+ case HealthCheckOnFailureActionRestart:
+ return strHealthCheckOnFailureActionRestart
+ case HealthCheckOnFailureActionStop:
+ return strHealthCheckOnFailureActionStop
+ default:
+ return strHealthCheckOnFailureActionInvalid
+ }
+}
+
+// ParseHealthCheckOnFailureAction parses the specified string into a HealthCheckOnFailureAction.
+// An error is returned for an invalid input.
+func ParseHealthCheckOnFailureAction(s string) (HealthCheckOnFailureAction, error) {
+ switch s {
+ case "", strHealthCheckOnFailureActionNone:
+ return HealthCheckOnFailureActionNone, nil
+ case strHealthCheckOnFailureActionKill:
+ return HealthCheckOnFailureActionKill, nil
+ case strHealthCheckOnFailureActionRestart:
+ return HealthCheckOnFailureActionRestart, nil
+ case strHealthCheckOnFailureActionStop:
+ return HealthCheckOnFailureActionStop, nil
+ default:
+ err := fmt.Errorf("invalid on-failure action %q for health check: supported actions are %s", s, strings.Join(SupportedHealthCheckOnFailureActions, ","))
+ return HealthCheckOnFailureActionInvalid, err
+ }
+}
diff --git a/libpod/define/mount.go b/libpod/define/mount.go
index 1b0d019c8..db444fd83 100644
--- a/libpod/define/mount.go
+++ b/libpod/define/mount.go
@@ -1,8 +1,6 @@
package define
const (
- // TypeBind is the type for mounting host dir
- TypeBind = "bind"
// TypeVolume is the type for named volumes
TypeVolume = "volume"
// TypeTmpfs is the type for mounting tmpfs
diff --git a/libpod/define/mount_freebsd.go b/libpod/define/mount_freebsd.go
new file mode 100644
index 000000000..e080c9ec6
--- /dev/null
+++ b/libpod/define/mount_freebsd.go
@@ -0,0 +1,8 @@
+//go:build freebsd
+
+package define
+
+const (
+ // TypeBind is the type for mounting host dir
+ TypeBind = "nullfs"
+)
diff --git a/libpod/define/mount_linux.go b/libpod/define/mount_linux.go
new file mode 100644
index 000000000..5ef848905
--- /dev/null
+++ b/libpod/define/mount_linux.go
@@ -0,0 +1,8 @@
+//go:build linux
+
+package define
+
+const (
+ // TypeBind is the type for mounting host dir
+ TypeBind = "bind"
+)
diff --git a/libpod/define/mount_unsupported.go b/libpod/define/mount_unsupported.go
new file mode 100644
index 000000000..cb8642fe2
--- /dev/null
+++ b/libpod/define/mount_unsupported.go
@@ -0,0 +1,8 @@
+//go:build !linux && !freebsd
+
+package define
+
+const (
+ // TypeBind is the type for mounting host dir
+ TypeBind = "bind"
+)
diff --git a/libpod/events.go b/libpod/events.go
index c9e4c9d26..60142cb60 100644
--- a/libpod/events.go
+++ b/libpod/events.go
@@ -55,6 +55,12 @@ func (c *Container) newContainerExitedEvent(exitCode int32) {
e.Image = c.config.RootfsImageName
e.Type = events.Container
e.ContainerExitCode = int(exitCode)
+
+ e.Details = events.Details{
+ ID: e.ID,
+ Attributes: c.Labels(),
+ }
+
if err := c.runtime.eventer.Write(e); err != nil {
logrus.Errorf("Unable to write container exited event: %q", err)
}
@@ -70,6 +76,12 @@ func (c *Container) newExecDiedEvent(sessionID string, exitCode int) {
e.ContainerExitCode = exitCode
e.Attributes = make(map[string]string)
e.Attributes["execID"] = sessionID
+
+ e.Details = events.Details{
+ ID: e.ID,
+ Attributes: c.Labels(),
+ }
+
if err := c.runtime.eventer.Write(e); err != nil {
logrus.Errorf("Unable to write exec died event: %q", err)
}
diff --git a/libpod/healthcheck.go b/libpod/healthcheck.go
index 9b9d12b17..e835af9f0 100644
--- a/libpod/healthcheck.go
+++ b/libpod/healthcheck.go
@@ -2,6 +2,7 @@ package libpod
import (
"bufio"
+ "context"
"errors"
"fmt"
"io/ioutil"
@@ -12,6 +13,7 @@ import (
"github.com/containers/podman/v4/libpod/define"
"github.com/sirupsen/logrus"
+ "golang.org/x/sys/unix"
)
const (
@@ -29,9 +31,14 @@ func (r *Runtime) HealthCheck(name string) (define.HealthCheckStatus, error) {
if err != nil {
return define.HealthCheckContainerNotFound, fmt.Errorf("unable to look up %s to perform a health check: %w", name, err)
}
+
hcStatus, err := checkHealthCheckCanBeRun(container)
if err == nil {
- return container.runHealthCheck()
+ hcStatus, err := container.runHealthCheck()
+ if err := container.processHealthCheckStatus(hcStatus); err != nil {
+ return hcStatus, err
+ }
+ return hcStatus, err
}
return hcStatus, err
}
@@ -127,13 +134,45 @@ func (c *Container) runHealthCheck() (define.HealthCheckStatus, error) {
hcResult = define.HealthCheckFailure
hcErr = fmt.Errorf("healthcheck command exceeded timeout of %s", c.HealthCheckConfig().Timeout.String())
}
+
hcl := newHealthCheckLog(timeStart, timeEnd, returnCode, eventLog)
if err := c.updateHealthCheckLog(hcl, inStartPeriod); err != nil {
return hcResult, fmt.Errorf("unable to update health check log %s for %s: %w", c.healthCheckLogPath(), c.ID(), err)
}
+
return hcResult, hcErr
}
+func (c *Container) processHealthCheckStatus(status define.HealthCheckStatus) error {
+ if status == define.HealthCheckSuccess {
+ return nil
+ }
+
+ switch c.config.HealthCheckOnFailureAction {
+ case define.HealthCheckOnFailureActionNone: // Nothing to do
+
+ case define.HealthCheckOnFailureActionKill:
+ if err := c.Kill(uint(unix.SIGKILL)); err != nil {
+ return fmt.Errorf("killing container health-check turned unhealthy: %w", err)
+ }
+
+ case define.HealthCheckOnFailureActionRestart:
+ if err := c.RestartWithTimeout(context.Background(), c.config.StopTimeout); err != nil {
+ return fmt.Errorf("restarting container after health-check turned unhealthy: %w", err)
+ }
+
+ case define.HealthCheckOnFailureActionStop:
+ if err := c.Stop(); err != nil {
+ return fmt.Errorf("stopping container after health-check turned unhealthy: %w", err)
+ }
+
+ default: // Should not happen but better be safe than sorry
+ return fmt.Errorf("unsupported on-failure action %d", c.config.HealthCheckOnFailureAction)
+ }
+
+ return nil
+}
+
func checkHealthCheckCanBeRun(c *Container) (define.HealthCheckStatus, error) {
cstate, err := c.State()
if err != nil {
diff --git a/libpod/kube.go b/libpod/kube.go
index 8c09a6bb5..1f4831006 100644
--- a/libpod/kube.go
+++ b/libpod/kube.go
@@ -62,6 +62,7 @@ func (p *Pod) GenerateForKube(ctx context.Context) (*v1.Pod, []v1.ServicePort, e
extraHost := make([]v1.HostAlias, 0)
hostNetwork := false
+ hostUsers := true
if p.HasInfraContainer() {
infraContainer, err := p.getInfraContainer()
if err != nil {
@@ -87,8 +88,9 @@ func (p *Pod) GenerateForKube(ctx context.Context) (*v1.Pod, []v1.ServicePort, e
return nil, servicePorts, err
}
hostNetwork = infraContainer.NetworkMode() == string(namespaces.NetworkMode(specgen.Host))
+ hostUsers = infraContainer.IDMappings().HostUIDMapping && infraContainer.IDMappings().HostGIDMapping
}
- pod, err := p.podWithContainers(ctx, allContainers, ports, hostNetwork)
+ pod, err := p.podWithContainers(ctx, allContainers, ports, hostNetwork, hostUsers)
if err != nil {
return nil, servicePorts, err
}
@@ -267,6 +269,8 @@ func GenerateKubeServiceFromV1Pod(pod *v1.Pod, servicePorts []v1.ServicePort) (Y
}
service.Spec = serviceSpec
service.ObjectMeta = pod.ObjectMeta
+ // Reset the annotations for the service as the pod annotations are not needed for the service
+ service.ObjectMeta.Annotations = nil
tm := v12.TypeMeta{
Kind: "Service",
APIVersion: pod.TypeMeta.APIVersion,
@@ -346,7 +350,7 @@ func containersToServicePorts(containers []v1.Container) ([]v1.ServicePort, erro
return sps, nil
}
-func (p *Pod) podWithContainers(ctx context.Context, containers []*Container, ports []v1.ContainerPort, hostNetwork bool) (*v1.Pod, error) {
+func (p *Pod) podWithContainers(ctx context.Context, containers []*Container, ports []v1.ContainerPort, hostNetwork, hostUsers bool) (*v1.Pod, error) {
deDupPodVolumes := make(map[string]*v1.Volume)
first := true
podContainers := make([]v1.Container, 0, len(containers))
@@ -383,7 +387,7 @@ func (p *Pod) podWithContainers(ctx context.Context, containers []*Container, po
return nil, err
}
for k, v := range annotations {
- podAnnotations[define.BindMountPrefix+k] = TruncateKubeAnnotation(v)
+ podAnnotations[define.BindMountPrefix] = TruncateKubeAnnotation(k + ":" + v)
}
// Since port bindings for the pod are handled by the
// infra container, wipe them here only if we are sharing the net namespace
@@ -444,10 +448,11 @@ func (p *Pod) podWithContainers(ctx context.Context, containers []*Container, po
podVolumes,
&dnsInfo,
hostNetwork,
+ hostUsers,
hostname), nil
}
-func newPodObject(podName string, annotations map[string]string, initCtrs, containers []v1.Container, volumes []v1.Volume, dnsOptions *v1.PodDNSConfig, hostNetwork bool, hostname string) *v1.Pod {
+func newPodObject(podName string, annotations map[string]string, initCtrs, containers []v1.Container, volumes []v1.Volume, dnsOptions *v1.PodDNSConfig, hostNetwork, hostUsers bool, hostname string) *v1.Pod {
tm := v12.TypeMeta{
Kind: "Pod",
APIVersion: "v1",
@@ -466,12 +471,21 @@ func newPodObject(podName string, annotations map[string]string, initCtrs, conta
CreationTimestamp: v12.Now(),
Annotations: annotations,
}
+ // Set enableServiceLinks to false as podman doesn't use the service port environment variables
+ enableServiceLinks := false
+ // Set automountServiceAccountToken to false as podman doesn't use service account tokens
+ automountServiceAccountToken := false
ps := v1.PodSpec{
- Containers: containers,
- Hostname: hostname,
- HostNetwork: hostNetwork,
- InitContainers: initCtrs,
- Volumes: volumes,
+ Containers: containers,
+ Hostname: hostname,
+ HostNetwork: hostNetwork,
+ InitContainers: initCtrs,
+ Volumes: volumes,
+ EnableServiceLinks: &enableServiceLinks,
+ AutomountServiceAccountToken: &automountServiceAccountToken,
+ }
+ if !hostUsers {
+ ps.HostUsers = &hostUsers
}
if dnsOptions != nil && (len(dnsOptions.Nameservers)+len(dnsOptions.Searches)+len(dnsOptions.Options) > 0) {
ps.DNSConfig = dnsOptions
@@ -490,6 +504,7 @@ func simplePodWithV1Containers(ctx context.Context, ctrs []*Container) (*v1.Pod,
kubeCtrs := make([]v1.Container, 0, len(ctrs))
kubeInitCtrs := []v1.Container{}
kubeVolumes := make([]v1.Volume, 0)
+ hostUsers := true
hostNetwork := true
podDNS := v1.PodDNSConfig{}
kubeAnnotations := make(map[string]string)
@@ -519,12 +534,15 @@ func simplePodWithV1Containers(ctx context.Context, ctrs []*Container) (*v1.Pod,
if !ctr.HostNetwork() {
hostNetwork = false
}
+ if !(ctr.IDMappings().HostUIDMapping && ctr.IDMappings().HostGIDMapping) {
+ hostUsers = false
+ }
kubeCtr, kubeVols, ctrDNS, annotations, err := containerToV1Container(ctx, ctr)
if err != nil {
return nil, err
}
for k, v := range annotations {
- kubeAnnotations[define.BindMountPrefix+k] = TruncateKubeAnnotation(v)
+ kubeAnnotations[define.BindMountPrefix] = TruncateKubeAnnotation(k + ":" + v)
}
if isInit {
kubeInitCtrs = append(kubeInitCtrs, kubeCtr)
@@ -580,6 +598,7 @@ func simplePodWithV1Containers(ctx context.Context, ctrs []*Container) (*v1.Pod,
kubeVolumes,
&podDNS,
hostNetwork,
+ hostUsers,
hostname), nil
}
diff --git a/libpod/networking_linux.go b/libpod/networking_linux.go
index c05796768..c10c3c0b2 100644
--- a/libpod/networking_linux.go
+++ b/libpod/networking_linux.go
@@ -1357,6 +1357,11 @@ func (c *Container) NetworkConnect(nameOrID, netName string, netOpts types.PerNe
}
if err := c.runtime.state.NetworkConnect(c, netName, netOpts); err != nil {
+ // Docker compat: treat requests to attach already attached networks as a no-op, ignoring opts
+ if errors.Is(err, define.ErrNetworkConnected) && c.ensureState(define.ContainerStateConfigured) {
+ return nil
+ }
+
return err
}
c.newNetworkEvent(events.NetworkConnect, netName)
diff --git a/libpod/networking_unsupported.go b/libpod/networking_unsupported.go
index 76ffabb5e..9429287f9 100644
--- a/libpod/networking_unsupported.go
+++ b/libpod/networking_unsupported.go
@@ -5,6 +5,7 @@ package libpod
import (
"errors"
+ "net"
"path/filepath"
"github.com/containers/common/libnetwork/types"
@@ -84,3 +85,7 @@ func (r *Runtime) GetRootlessNetNs(new bool) (*RootlessNetNS, error) {
func (c *Container) convertPortMappings() []types.PortMapping {
return []types.PortMapping{}
}
+
+func GetSlirp4netnsIP(subnet *net.IPNet) (*net.IP, error) {
+ return nil, errors.New("not implemented GetSlirp4netnsIP")
+}
diff --git a/libpod/oci.go b/libpod/oci.go
index 70053db1b..e5b9a0dcd 100644
--- a/libpod/oci.go
+++ b/libpod/oci.go
@@ -5,6 +5,7 @@ import (
"github.com/containers/common/pkg/resize"
"github.com/containers/podman/v4/libpod/define"
+ "github.com/opencontainers/runtime-spec/specs-go"
)
// OCIRuntime is an implementation of an OCI runtime.
@@ -148,6 +149,9 @@ type OCIRuntime interface {
// RuntimeInfo returns verbose information about the runtime.
RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntimeInfo, error)
+
+ // UpdateContainer updates the given container's cgroup configuration.
+ UpdateContainer(ctr *Container, res *specs.LinuxResources) error
}
// AttachOptions are options used when attached to a container or an exec
diff --git a/libpod/oci_conmon_attach_common.go b/libpod/oci_conmon_attach_common.go
index a9e9b2bb5..dec749837 100644
--- a/libpod/oci_conmon_attach_common.go
+++ b/libpod/oci_conmon_attach_common.go
@@ -280,20 +280,20 @@ func readStdio(conn *net.UnixConn, streams *define.AttachStreams, receiveStdoutE
var err error
select {
case err = <-receiveStdoutError:
- if err := conn.CloseWrite(); err != nil {
+ if err := socketCloseWrite(conn); err != nil {
logrus.Errorf("Failed to close stdin: %v", err)
}
return err
case err = <-stdinDone:
if err == define.ErrDetach {
- if err := conn.CloseWrite(); err != nil {
+ if err := socketCloseWrite(conn); err != nil {
logrus.Errorf("Failed to close stdin: %v", err)
}
return err
}
if err == nil {
// copy stdin is done, close it
- if connErr := conn.CloseWrite(); connErr != nil {
+ if connErr := socketCloseWrite(conn); connErr != nil {
logrus.Errorf("Unable to close conn: %v", connErr)
}
}
diff --git a/libpod/oci_conmon_common.go b/libpod/oci_conmon_common.go
index 2c7c39726..8ef8ae721 100644
--- a/libpod/oci_conmon_common.go
+++ b/libpod/oci_conmon_common.go
@@ -277,15 +277,6 @@ func (r *ConmonOCIRuntime) UpdateContainerStatus(ctr *Container) error {
ctr.ID(), state.Status, define.ErrInternal)
}
- // Only grab exit status if we were not already stopped
- // If we were, it should already be in the database
- if ctr.state.State == define.ContainerStateStopped && oldState != define.ContainerStateStopped {
- if _, err := ctr.Wait(context.Background()); err != nil {
- logrus.Errorf("Waiting for container %s to exit: %v", ctr.ID(), err)
- }
- return nil
- }
-
// Handle ContainerStateStopping - keep it unless the container
// transitioned to no longer running.
if oldState == define.ContainerStateStopping && (ctr.state.State == define.ContainerStatePaused || ctr.state.State == define.ContainerStateRunning) {
@@ -316,6 +307,52 @@ func (r *ConmonOCIRuntime) StartContainer(ctr *Container) error {
return nil
}
+// UpdateContainer updates the given container's cgroup configuration
+func (r *ConmonOCIRuntime) UpdateContainer(ctr *Container, resources *spec.LinuxResources) error {
+ runtimeDir, err := util.GetRuntimeDir()
+ if err != nil {
+ return err
+ }
+ env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
+ if path, ok := os.LookupEnv("PATH"); ok {
+ env = append(env, fmt.Sprintf("PATH=%s", path))
+ }
+ args := r.runtimeFlags
+ args = append(args, "update")
+ tempFile, additionalArgs, err := generateResourceFile(resources)
+ if err != nil {
+ return err
+ }
+ defer os.Remove(tempFile)
+
+ args = append(args, additionalArgs...)
+ return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(args, ctr.ID())...)
+}
+
+func generateResourceFile(res *spec.LinuxResources) (string, []string, error) {
+ flags := []string{}
+ if res == nil {
+ return "", flags, nil
+ }
+
+ f, err := ioutil.TempFile("", "podman")
+ if err != nil {
+ return "", nil, err
+ }
+
+ j, err := json.Marshal(res)
+ if err != nil {
+ return "", nil, err
+ }
+ _, err = f.WriteString(string(j))
+ if err != nil {
+ return "", nil, err
+ }
+
+ flags = append(flags, "--resources="+f.Name())
+ return f.Name(), flags, nil
+}
+
// KillContainer sends the given signal to the given container.
// If all is set, send to all PIDs in the container.
// All is only supported if the container created cgroups.
@@ -392,13 +429,11 @@ func (r *ConmonOCIRuntime) StopContainer(ctr *Container, timeout uint, all bool)
}
}
- if err := r.KillContainer(ctr, 9, all); err != nil {
+ if err := r.KillContainer(ctr, uint(unix.SIGKILL), all); err != nil {
// Again, check if the container is gone. If it is, exit cleanly.
- err := unix.Kill(ctr.state.PID, 0)
- if err == unix.ESRCH {
+ if aliveErr := unix.Kill(ctr.state.PID, 0); errors.Is(aliveErr, unix.ESRCH) {
return nil
}
-
return fmt.Errorf("error sending SIGKILL to container %s: %w", ctr.ID(), err)
}
@@ -440,6 +475,16 @@ func (r *ConmonOCIRuntime) UnpauseContainer(ctr *Container) error {
return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "resume", ctr.ID())...)
}
+// This filters out ENOTCONN errors which can happen on FreeBSD if the
+// other side of the connection is already closed.
+func socketCloseWrite(conn *net.UnixConn) error {
+ err := conn.CloseWrite()
+ if err != nil && errors.Is(err, syscall.ENOTCONN) {
+ return nil
+ }
+ return err
+}
+
// HTTPAttach performs an attach for the HTTP API.
// The caller must handle closing the HTTP connection after this returns.
// The cancel channel is not closed; it is up to the caller to do so after
@@ -652,7 +697,7 @@ func (r *ConmonOCIRuntime) HTTPAttach(ctr *Container, req *http.Request, w http.
return err
}
// copy stdin is done, close it
- if connErr := conn.CloseWrite(); connErr != nil {
+ if connErr := socketCloseWrite(conn); connErr != nil {
logrus.Errorf("Unable to close conn: %v", connErr)
}
case <-cancel:
diff --git a/libpod/oci_conmon_exec_common.go b/libpod/oci_conmon_exec_common.go
index 16cd7ef9f..735dbb9c4 100644
--- a/libpod/oci_conmon_exec_common.go
+++ b/libpod/oci_conmon_exec_common.go
@@ -12,7 +12,6 @@ import (
"syscall"
"time"
- "github.com/containers/common/pkg/capabilities"
"github.com/containers/common/pkg/config"
"github.com/containers/common/pkg/resize"
cutil "github.com/containers/common/pkg/util"
@@ -386,7 +385,7 @@ func (r *ConmonOCIRuntime) startExec(c *Container, sessionID string, options *Ex
finalEnv = append(finalEnv, fmt.Sprintf("%s=%s", k, v))
}
- processFile, err := prepareProcessExec(c, options, finalEnv, sessionID)
+ processFile, err := c.prepareProcessExec(options, finalEnv, sessionID)
if err != nil {
return nil, nil, err
}
@@ -654,7 +653,7 @@ func attachExecHTTP(c *Container, sessionID string, r *http.Request, w http.Resp
return err
}
// copy stdin is done, close it
- if connErr := conn.CloseWrite(); connErr != nil {
+ if connErr := socketCloseWrite(conn); connErr != nil {
logrus.Errorf("Unable to close conn: %v", connErr)
}
case <-cancel:
@@ -665,7 +664,7 @@ func attachExecHTTP(c *Container, sessionID string, r *http.Request, w http.Resp
// prepareProcessExec returns the path of the process.json used in runc exec -p
// caller is responsible to close the returned *os.File if needed.
-func prepareProcessExec(c *Container, options *ExecOptions, env []string, sessionID string) (*os.File, error) {
+func (c *Container) prepareProcessExec(options *ExecOptions, env []string, sessionID string) (*os.File, error) {
f, err := ioutil.TempFile(c.execBundlePath(sessionID), "exec-process-")
if err != nil {
return nil, err
@@ -745,34 +744,9 @@ func prepareProcessExec(c *Container, options *ExecOptions, env []string, sessio
pspec.User = processUser
}
- ctrSpec, err := c.specFromState()
- if err != nil {
- return nil, err
- }
-
- allCaps, err := capabilities.BoundingSet()
- if err != nil {
+ if err := c.setProcessCapabilitiesExec(options, user, execUser, pspec); err != nil {
return nil, err
}
- if options.Privileged {
- pspec.Capabilities.Bounding = allCaps
- } else {
- pspec.Capabilities.Bounding = ctrSpec.Process.Capabilities.Bounding
- }
-
- // Always unset the inheritable capabilities similarly to what the Linux kernel does
- // They are used only when using capabilities with uid != 0.
- pspec.Capabilities.Inheritable = []string{}
-
- if execUser.Uid == 0 {
- pspec.Capabilities.Effective = pspec.Capabilities.Bounding
- pspec.Capabilities.Permitted = pspec.Capabilities.Bounding
- } else if user == c.config.User {
- pspec.Capabilities.Effective = ctrSpec.Process.Capabilities.Effective
- pspec.Capabilities.Inheritable = ctrSpec.Process.Capabilities.Effective
- pspec.Capabilities.Permitted = ctrSpec.Process.Capabilities.Effective
- pspec.Capabilities.Ambient = ctrSpec.Process.Capabilities.Effective
- }
hasHomeSet := false
for _, s := range pspec.Env {
diff --git a/libpod/oci_conmon_exec_freebsd.go b/libpod/oci_conmon_exec_freebsd.go
new file mode 100644
index 000000000..bf30404a1
--- /dev/null
+++ b/libpod/oci_conmon_exec_freebsd.go
@@ -0,0 +1,10 @@
+package libpod
+
+import (
+ "github.com/opencontainers/runc/libcontainer/user"
+ spec "github.com/opencontainers/runtime-spec/specs-go"
+)
+
+func (c *Container) setProcessCapabilitiesExec(options *ExecOptions, user string, execUser *user.ExecUser, pspec *spec.Process) error {
+ return nil
+}
diff --git a/libpod/oci_conmon_exec_linux.go b/libpod/oci_conmon_exec_linux.go
new file mode 100644
index 000000000..617e8d601
--- /dev/null
+++ b/libpod/oci_conmon_exec_linux.go
@@ -0,0 +1,39 @@
+package libpod
+
+import (
+ "github.com/containers/common/pkg/capabilities"
+ "github.com/opencontainers/runc/libcontainer/user"
+ spec "github.com/opencontainers/runtime-spec/specs-go"
+)
+
+func (c *Container) setProcessCapabilitiesExec(options *ExecOptions, user string, execUser *user.ExecUser, pspec *spec.Process) error {
+ ctrSpec, err := c.specFromState()
+ if err != nil {
+ return err
+ }
+
+ allCaps, err := capabilities.BoundingSet()
+ if err != nil {
+ return err
+ }
+ if options.Privileged {
+ pspec.Capabilities.Bounding = allCaps
+ } else {
+ pspec.Capabilities.Bounding = ctrSpec.Process.Capabilities.Bounding
+ }
+
+ // Always unset the inheritable capabilities similarly to what the Linux kernel does
+ // They are used only when using capabilities with uid != 0.
+ pspec.Capabilities.Inheritable = []string{}
+
+ if execUser.Uid == 0 {
+ pspec.Capabilities.Effective = pspec.Capabilities.Bounding
+ pspec.Capabilities.Permitted = pspec.Capabilities.Bounding
+ } else if user == c.config.User {
+ pspec.Capabilities.Effective = ctrSpec.Process.Capabilities.Effective
+ pspec.Capabilities.Inheritable = ctrSpec.Process.Capabilities.Effective
+ pspec.Capabilities.Permitted = ctrSpec.Process.Capabilities.Effective
+ pspec.Capabilities.Ambient = ctrSpec.Process.Capabilities.Effective
+ }
+ return nil
+}
diff --git a/libpod/oci_conmon_freebsd.go b/libpod/oci_conmon_freebsd.go
index 6f7ac7fc6..d74f2af01 100644
--- a/libpod/oci_conmon_freebsd.go
+++ b/libpod/oci_conmon_freebsd.go
@@ -19,6 +19,9 @@ func (r *ConmonOCIRuntime) withContainerSocketLabel(ctr *Container, closure func
// moveConmonToCgroupAndSignal gets a container's cgroupParent and moves the conmon process to that cgroup
// it then signals for conmon to start by sending nonce data down the start fd
func (r *ConmonOCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec.Cmd, startFd *os.File) error {
- // No equivalent on FreeBSD
+ // No equivalent to cgroup on FreeBSD, just signal conmon to start
+ if err := writeConmonPipeData(startFd); err != nil {
+ return err
+ }
return nil
}
diff --git a/libpod/oci_missing.go b/libpod/oci_missing.go
index 2ab2b4577..bbf2957ff 100644
--- a/libpod/oci_missing.go
+++ b/libpod/oci_missing.go
@@ -8,6 +8,7 @@ import (
"github.com/containers/common/pkg/resize"
"github.com/containers/podman/v4/libpod/define"
+ spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
)
@@ -80,6 +81,11 @@ func (r *MissingRuntime) StartContainer(ctr *Container) error {
return r.printError()
}
+// UpdateContainer is not available as the runtime is missing
+func (r *MissingRuntime) UpdateContainer(ctr *Container, resources *spec.LinuxResources) error {
+ return r.printError()
+}
+
// KillContainer is not available as the runtime is missing
// TODO: We could attempt to unix.Kill() the PID as recorded in the state if we
// really want to smooth things out? Won't be perfect, but if the container has
diff --git a/libpod/options.go b/libpod/options.go
index d31741094..71ad3d11e 100644
--- a/libpod/options.go
+++ b/libpod/options.go
@@ -1413,9 +1413,10 @@ func WithNamedVolumes(volumes []*ContainerNamedVolume) CtrCreateOption {
}
ctr.config.NamedVolumes = append(ctr.config.NamedVolumes, &ContainerNamedVolume{
- Name: vol.Name,
- Dest: vol.Dest,
- Options: mountOpts,
+ Name: vol.Name,
+ Dest: vol.Dest,
+ Options: mountOpts,
+ IsAnonymous: vol.IsAnonymous,
})
}
@@ -1472,6 +1473,17 @@ func WithHealthCheck(healthCheck *manifest.Schema2HealthConfig) CtrCreateOption
}
}
+// WithHealthCheckOnFailureAction adds an on-failure action to health-check config
+func WithHealthCheckOnFailureAction(action define.HealthCheckOnFailureAction) CtrCreateOption {
+ return func(ctr *Container) error {
+ if ctr.valid {
+ return define.ErrCtrFinalized
+ }
+ ctr.config.HealthCheckOnFailureAction = action
+ return nil
+ }
+}
+
// WithPreserveFDs forwards from the process running Libpod into the container
// the given number of extra FDs (starting after the standard streams) to the created container
func WithPreserveFDs(fd uint) CtrCreateOption {
diff --git a/libpod/runtime.go b/libpod/runtime.go
index 9b97fd724..1503b2344 100644
--- a/libpod/runtime.go
+++ b/libpod/runtime.go
@@ -5,6 +5,7 @@ import (
"context"
"errors"
"fmt"
+ "math/rand"
"os"
"path/filepath"
"strings"
@@ -112,6 +113,13 @@ type Runtime struct {
secretsManager *secrets.SecretsManager
}
+func init() {
+ // generateName calls namesgenerator.GetRandomName which the
+ // global RNG from math/rand. Seed it here to make sure we
+ // don't get the same name every time.
+ rand.Seed(time.Now().UnixNano())
+}
+
// SetXdgDirs ensures the XDG_RUNTIME_DIR env and XDG_CONFIG_HOME variables are set.
// containers/image uses XDG_RUNTIME_DIR to locate the auth file, XDG_CONFIG_HOME is
// use for the containers.conf configuration file.
diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go
index 703ae5cbe..fb4f80aa6 100644
--- a/libpod/runtime_ctr.go
+++ b/libpod/runtime_ctr.go
@@ -171,12 +171,17 @@ func (r *Runtime) initContainerVariables(rSpec *spec.Spec, config *ContainerConf
if config == nil {
ctr.config.ID = stringid.GenerateNonCryptoID()
size, err := units.FromHumanSize(r.config.Containers.ShmSize)
- if err != nil {
- return nil, fmt.Errorf("converting containers.conf ShmSize %s to an int: %w", r.config.Containers.ShmSize, err)
+ if useDevShm {
+ if err != nil {
+ return nil, fmt.Errorf("converting containers.conf ShmSize %s to an int: %w", r.config.Containers.ShmSize, err)
+ }
+ ctr.config.ShmSize = size
+ ctr.config.NoShm = false
+ ctr.config.NoShmShare = false
+ } else {
+ ctr.config.NoShm = true
+ ctr.config.NoShmShare = true
}
- ctr.config.ShmSize = size
- ctr.config.NoShm = false
- ctr.config.NoShmShare = false
ctr.config.StopSignal = 15
ctr.config.StopTimeout = r.config.Engine.StopTimeout
@@ -474,6 +479,11 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Contai
return nil, fmt.Errorf("error retrieving named volume %s for new container: %w", vol.Name, err)
}
}
+ if vol.IsAnonymous {
+ // If SetAnonymous is true, make this an anonymous volume
+ // this is needed for emptyDir volumes from kube yamls
+ isAnonymous = true
+ }
logrus.Debugf("Creating new volume %s for container", vol.Name)
@@ -523,7 +533,7 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Contai
}
}
- if !MountExists(ctr.config.Spec.Mounts, "/dev/shm") && ctr.config.ShmDir == "" && !ctr.config.NoShm {
+ if useDevShm && !MountExists(ctr.config.Spec.Mounts, "/dev/shm") && ctr.config.ShmDir == "" && !ctr.config.NoShm {
ctr.config.ShmDir = filepath.Join(ctr.bundlePath(), "shm")
if err := os.MkdirAll(ctr.config.ShmDir, 0700); err != nil {
if !os.IsExist(err) {
@@ -788,7 +798,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
// Deallocate the container's lock
if err := c.lock.Free(); err != nil {
- if cleanupErr == nil {
+ if cleanupErr == nil && !os.IsNotExist(err) {
cleanupErr = fmt.Errorf("error freeing lock for container %s: %w", c.ID(), err)
} else {
logrus.Errorf("Free container lock: %v", err)
@@ -814,11 +824,11 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
// Ignore error, since podman will report original error
volumesFrom, _ := c.volumesFrom()
if len(volumesFrom) > 0 {
- logrus.Debugf("Cleaning up volume not possible since volume is in use (%s)", v)
+ logrus.Debugf("Cleaning up volume not possible since volume is in use (%s)", v.Name)
continue
}
}
- logrus.Errorf("Cleaning up volume (%s): %v", v, err)
+ logrus.Errorf("Cleaning up volume (%s): %v", v.Name, err)
}
}
}
@@ -968,7 +978,7 @@ func (r *Runtime) evictContainer(ctx context.Context, idOrName string, removeVol
continue
}
if err := r.removeVolume(ctx, volume, false, timeout, false); err != nil && err != define.ErrNoSuchVolume && err != define.ErrVolumeBeingUsed {
- logrus.Errorf("Cleaning up volume (%s): %v", v, err)
+ logrus.Errorf("Cleaning up volume (%s): %v", v.Name, err)
}
}
}
diff --git a/libpod/runtime_ctr_freebsd.go b/libpod/runtime_ctr_freebsd.go
new file mode 100644
index 000000000..a8870a38c
--- /dev/null
+++ b/libpod/runtime_ctr_freebsd.go
@@ -0,0 +1,5 @@
+package libpod
+
+const (
+ useDevShm = false
+)
diff --git a/libpod/runtime_ctr_linux.go b/libpod/runtime_ctr_linux.go
new file mode 100644
index 000000000..7812d8238
--- /dev/null
+++ b/libpod/runtime_ctr_linux.go
@@ -0,0 +1,5 @@
+package libpod
+
+const (
+ useDevShm = true
+)
diff --git a/libpod/runtime_test.go b/libpod/runtime_test.go
new file mode 100644
index 000000000..2e16c7fcd
--- /dev/null
+++ b/libpod/runtime_test.go
@@ -0,0 +1,28 @@
+package libpod
+
+import (
+ "math/rand"
+ "os"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func Test_generateName(t *testing.T) {
+ state, path, _, err := getEmptyBoltState()
+ assert.NoError(t, err)
+ defer os.RemoveAll(path)
+ defer state.Close()
+
+ r := &Runtime{
+ state: state,
+ }
+
+ // Test that (*Runtime).generateName returns different names
+ // if called twice, even if the global RNG has the default
+ // seed.
+ n1, _ := r.generateName()
+ rand.Seed(1)
+ n2, _ := r.generateName()
+ assert.NotEqual(t, n1, n2)
+}