25 files changed, 3397 insertions, 2880 deletions
diff --git a/libpod/container.go b/libpod/container.go
index 44a8669fd..1891b124f 100644
--- a/libpod/container.go
+++ b/libpod/container.go
@@ -1133,20 +1133,6 @@ func (c *Container) NetworkDisabled() (bool, error) {
 	return networkDisabled(c)
 }
 
-func networkDisabled(c *Container) (bool, error) {
-	if c.config.CreateNetNS {
-		return false, nil
-	}
-	if !c.config.PostConfigureNetNS {
-		for _, ns := range c.config.Spec.Linux.Namespaces {
-			if ns.Type == spec.NetworkNamespace {
-				return ns.Path == "", nil
-			}
-		}
-	}
-	return false, nil
-}
-
 func (c *Container) HostNetwork() bool {
 	if c.config.CreateNetNS || c.config.NetNsCtr != "" {
 		return false
diff --git a/libpod/container_freebsd.go b/libpod/container_freebsd.go
index f9fbc4daa..7292ba37a 100644
--- a/libpod/container_freebsd.go
+++ b/libpod/container_freebsd.go
@@ -10,3 +10,13 @@ type containerPlatformState struct {
 	// namespace.
 	NetworkJail string `json:"-"`
 }
+
+func networkDisabled(c *Container) (bool, error) {
+	if c.config.CreateNetNS {
+		return false, nil
+	}
+	if !c.config.PostConfigureNetNS {
+		return c.state.NetworkJail == "", nil
+	}
+	return false, nil
+}
diff --git a/libpod/container_internal_common.go b/libpod/container_internal_common.go
new file mode 100644
index 000000000..192a86b6a
--- /dev/null
+++ b/libpod/container_internal_common.go
@@ -0,0 +1,2699 @@
+//go:build linux || freebsd
+// +build linux freebsd
+
+package libpod
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"math"
+	"os"
+	"os/user"
+	"path"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"syscall"
+	"time"
+
+	metadata "github.com/checkpoint-restore/checkpointctl/lib"
+	"github.com/checkpoint-restore/go-criu/v5/stats"
+	cdi "github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
+	"github.com/containers/buildah"
+	"github.com/containers/buildah/pkg/chrootuser"
+	"github.com/containers/buildah/pkg/overlay"
+	butil "github.com/containers/buildah/util"
+	"github.com/containers/common/libnetwork/etchosts"
+	"github.com/containers/common/libnetwork/resolvconf"
+	"github.com/containers/common/libnetwork/types"
+	"github.com/containers/common/pkg/apparmor"
+	"github.com/containers/common/pkg/chown"
+	"github.com/containers/common/pkg/config"
+	"github.com/containers/common/pkg/subscriptions"
+	"github.com/containers/common/pkg/umask"
+	cutil "github.com/containers/common/pkg/util"
+	is "github.com/containers/image/v5/storage"
+	"github.com/containers/podman/v4/libpod/define"
+	"github.com/containers/podman/v4/libpod/events"
+	"github.com/containers/podman/v4/pkg/annotations"
+	"github.com/containers/podman/v4/pkg/checkpoint/crutils"
+	"github.com/containers/podman/v4/pkg/criu"
+	"github.com/containers/podman/v4/pkg/lookup"
+	"github.com/containers/podman/v4/pkg/rootless"
+	"github.com/containers/podman/v4/pkg/util"
+	"github.com/containers/podman/v4/version"
+	"github.com/containers/storage/pkg/archive"
+	"github.com/containers/storage/pkg/idtools"
+	"github.com/containers/storage/pkg/lockfile"
+	securejoin "github.com/cyphar/filepath-securejoin"
+	runcuser "github.com/opencontainers/runc/libcontainer/user"
+	spec "github.com/opencontainers/runtime-spec/specs-go"
+	"github.com/opencontainers/runtime-tools/generate"
+	"github.com/opencontainers/selinux/go-selinux"
+	"github.com/opencontainers/selinux/go-selinux/label"
+	"github.com/sirupsen/logrus"
+)
+
+// Internal only function which returns upper and work dir from
+// overlay options.
+func getOverlayUpperAndWorkDir(options []string) (string, string, error) {
+	upperDir := ""
+	workDir := ""
+	for _, o := range options {
+		if strings.HasPrefix(o, "upperdir") {
+			splitOpt := strings.SplitN(o, "=", 2)
+			if len(splitOpt) > 1 {
+				upperDir = splitOpt[1]
+				if upperDir == "" {
+					return "", "", errors.New("cannot accept empty value for upperdir")
+				}
+			}
+		}
+		if strings.HasPrefix(o, "workdir") {
+			splitOpt := strings.SplitN(o, "=", 2)
+			if len(splitOpt) > 1 {
+				workDir = splitOpt[1]
+				if workDir == "" {
+					return "", "", errors.New("cannot accept empty value for workdir")
+				}
+			}
+		}
+	}
+	if (upperDir != "" && workDir == "") || (upperDir == "" && workDir != "") {
+		return "", "", errors.New("must specify both upperdir and workdir")
+	}
+	return upperDir, workDir, nil
+}
+
+// Generate spec for a container
+// Accepts a map of the container's dependencies
+func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
+	overrides := c.getUserOverrides()
+	execUser, err := lookup.GetUserGroupInfo(c.state.Mountpoint, c.config.User, overrides)
+	if err != nil {
+		if cutil.StringInSlice(c.config.User, c.config.HostUsers) {
+			execUser, err = lookupHostUser(c.config.User)
+		}
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	// NewFromSpec() is deprecated according to its comment
+	// however the recommended replace just causes a nil map panic
+	//nolint:staticcheck
+	g := generate.NewFromSpec(c.config.Spec)
+
+	// If the flag to mount all devices is set for a privileged container, add
+	// all the devices from the host's machine into the container
+	if c.config.MountAllDevices {
+		if err := util.AddPrivilegedDevices(&g); err != nil {
+			return nil, err
+		}
+	}
+
+	// If network namespace was requested, add it now
+	if err := c.addNetworkNamespace(&g); err != nil {
+		return nil, err
+	}
+
+	// Apply AppArmor checks and load the default profile if needed.
+	if len(c.config.Spec.Process.ApparmorProfile) > 0 {
+		updatedProfile, err := apparmor.CheckProfileAndLoadDefault(c.config.Spec.Process.ApparmorProfile)
+		if err != nil {
+			return nil, err
+		}
+		g.SetProcessApparmorProfile(updatedProfile)
+	}
+
+	if err := c.makeBindMounts(); err != nil {
+		return nil, err
+	}
+
+	if err := c.mountNotifySocket(g); err != nil {
+		return nil, err
+	}
+
+	// Get host UID and GID based on the container process UID and GID.
+	hostUID, hostGID, err := butil.GetHostIDs(util.IDtoolsToRuntimeSpec(c.config.IDMappings.UIDMap), util.IDtoolsToRuntimeSpec(c.config.IDMappings.GIDMap), uint32(execUser.Uid), uint32(execUser.Gid))
+	if err != nil {
+		return nil, err
+	}
+
+	// Add named volumes
+	for _, namedVol := range c.config.NamedVolumes {
+		volume, err := c.runtime.GetVolume(namedVol.Name)
+		if err != nil {
+			return nil, fmt.Errorf("error retrieving volume %s to add to container %s: %w", namedVol.Name, c.ID(), err)
+		}
+		mountPoint, err := volume.MountPoint()
+		if err != nil {
+			return nil, err
+		}
+
+		overlayFlag := false
+		upperDir := ""
+		workDir := ""
+		for _, o := range namedVol.Options {
+			if o == "O" {
+				overlayFlag = true
+				upperDir, workDir, err = getOverlayUpperAndWorkDir(namedVol.Options)
+				if err != nil {
+					return nil, err
+				}
+			}
+		}
+
+		if overlayFlag {
+			var overlayMount spec.Mount
+			var overlayOpts *overlay.Options
+			contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID())
+			if err != nil {
+				return nil, err
+			}
+
+			overlayOpts = &overlay.Options{RootUID: c.RootUID(),
+				RootGID:                c.RootGID(),
+				UpperDirOptionFragment: upperDir,
+				WorkDirOptionFragment:  workDir,
+				GraphOpts:              c.runtime.store.GraphOptions(),
+			}
+
+			overlayMount, err = overlay.MountWithOptions(contentDir, mountPoint, namedVol.Dest, overlayOpts)
+			if err != nil {
+				return nil, fmt.Errorf("mounting overlay failed %q: %w", mountPoint, err)
+			}
+
+			for _, o := range namedVol.Options {
+				if o == "U" {
+					if err := c.ChangeHostPathOwnership(mountPoint, true, int(hostUID), int(hostGID)); err != nil {
+						return nil, err
+					}
+
+					if err := c.ChangeHostPathOwnership(contentDir, true, int(hostUID), int(hostGID)); err != nil {
+						return nil, err
+					}
+				}
+			}
+			g.AddMount(overlayMount)
+		} else {
+			volMount := spec.Mount{
+				Type:        define.TypeBind,
+				Source:      mountPoint,
+				Destination: namedVol.Dest,
+				Options:     namedVol.Options,
+			}
+			g.AddMount(volMount)
+		}
+	}
+
+	// Check if the spec file mounts contain the options z, Z or U.
+	// If they have z or Z, relabel the source directory and then remove the option.
+	// If they have U, chown the source directory and them remove the option.
+	for i := range g.Config.Mounts {
+		m := &g.Config.Mounts[i]
+		var options []string
+		for _, o := range m.Options {
+			switch o {
+			case "U":
+				if m.Type == "tmpfs" {
+					options = append(options, []string{fmt.Sprintf("uid=%d", execUser.Uid), fmt.Sprintf("gid=%d", execUser.Gid)}...)
+				} else {
+					// only chown on initial creation of container
+					if err := c.ChangeHostPathOwnership(m.Source, true, int(hostUID), int(hostGID)); err != nil {
+						return nil, err
+					}
+				}
+			case "z":
+				fallthrough
+			case "Z":
+				if err := c.relabel(m.Source, c.MountLabel(), label.IsShared(o)); err != nil {
+					return nil, err
+				}
+
+			default:
+				options = append(options, o)
+			}
+		}
+		m.Options = options
+	}
+
+	c.setProcessLabel(&g)
+	c.setMountLabel(&g)
+
+	// Add bind mounts to container
+	for dstPath, srcPath := range c.state.BindMounts {
+		newMount := spec.Mount{
+			Type:        define.TypeBind,
+			Source:      srcPath,
+			Destination: dstPath,
+			Options:     bindOptions,
+		}
+		if c.IsReadOnly() && dstPath != "/dev/shm" {
+			newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev")
+		}
+		if dstPath == "/dev/shm" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
+			newMount.Options = append(newMount.Options, "nosuid", "noexec", "nodev")
+		}
+		if !MountExists(g.Mounts(), dstPath) {
+			g.AddMount(newMount)
+		} else {
+			logrus.Infof("User mount overriding libpod mount at %q", dstPath)
+		}
+	}
+
+	// Add overlay volumes
+	for _, overlayVol := range c.config.OverlayVolumes {
+		upperDir, workDir, err := getOverlayUpperAndWorkDir(overlayVol.Options)
+		if err != nil {
+			return nil, err
+		}
+		contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID())
+		if err != nil {
+			return nil, err
+		}
+		overlayOpts := &overlay.Options{RootUID: c.RootUID(),
+			RootGID:                c.RootGID(),
+			UpperDirOptionFragment: upperDir,
+			WorkDirOptionFragment:  workDir,
+			GraphOpts:              c.runtime.store.GraphOptions(),
+		}
+
+		overlayMount, err := overlay.MountWithOptions(contentDir, overlayVol.Source, overlayVol.Dest, overlayOpts)
+		if err != nil {
+			return nil, fmt.Errorf("mounting overlay failed %q: %w", overlayVol.Source, err)
+		}
+
+		// Check overlay volume options
+		for _, o := range overlayVol.Options {
+			if o == "U" {
+				if err := c.ChangeHostPathOwnership(overlayVol.Source, true, int(hostUID), int(hostGID)); err != nil {
+					return nil, err
+				}
+
+				if err := c.ChangeHostPathOwnership(contentDir, true, int(hostUID), int(hostGID)); err != nil {
+					return nil, err
+				}
+			}
+		}
+
+		g.AddMount(overlayMount)
+	}
+
+	// Add image volumes as overlay mounts
+	for _, volume := range c.config.ImageVolumes {
+		// Mount the specified image.
+		img, _, err := c.runtime.LibimageRuntime().LookupImage(volume.Source, nil)
+		if err != nil {
+			return nil, fmt.Errorf("error creating image volume %q:%q: %w", volume.Source, volume.Dest, err)
+		}
+		mountPoint, err := img.Mount(ctx, nil, "")
+		if err != nil {
+			return nil, fmt.Errorf("error mounting image volume %q:%q: %w", volume.Source, volume.Dest, err)
+		}
+
+		contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID())
+		if err != nil {
+			return nil, fmt.Errorf("failed to create TempDir in the %s directory: %w", c.config.StaticDir, err)
+		}
+
+		var overlayMount spec.Mount
+		if volume.ReadWrite {
+			overlayMount, err = overlay.Mount(contentDir, mountPoint, volume.Dest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions())
+		} else {
+			overlayMount, err = overlay.MountReadOnly(contentDir, mountPoint, volume.Dest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions())
+		}
+		if err != nil {
+			return nil, fmt.Errorf("creating overlay mount for image %q failed: %w", volume.Source, err)
+		}
+		g.AddMount(overlayMount)
+	}
+
+	hasHomeSet := false
+	for _, s := range c.config.Spec.Process.Env {
+		if strings.HasPrefix(s, "HOME=") {
+			hasHomeSet = true
+			break
+		}
+	}
+	if !hasHomeSet && execUser.Home != "" {
+		c.config.Spec.Process.Env = append(c.config.Spec.Process.Env, fmt.Sprintf("HOME=%s", execUser.Home))
+	}
+
+	if c.config.User != "" {
+		// User and Group must go together
+		g.SetProcessUID(uint32(execUser.Uid))
+		g.SetProcessGID(uint32(execUser.Gid))
+		g.AddProcessAdditionalGid(uint32(execUser.Gid))
+	}
+
+	if c.config.Umask != "" {
+		decVal, err := strconv.ParseUint(c.config.Umask, 8, 32)
+		if err != nil {
+			return nil, fmt.Errorf("invalid Umask Value: %w", err)
+		}
+		umask := uint32(decVal)
+		g.Config.Process.User.Umask = &umask
+	}
+
+	// Add addition groups if c.config.GroupAdd is not empty
+	if len(c.config.Groups) > 0 {
+		gids, err := lookup.GetContainerGroups(c.config.Groups, c.state.Mountpoint, overrides)
+		if err != nil {
+			return nil, fmt.Errorf("error looking up supplemental groups for container %s: %w", c.ID(), err)
+		}
+		for _, gid := range gids {
+			g.AddProcessAdditionalGid(gid)
+		}
+	}
+
+	if err := c.addSystemdMounts(&g); err != nil {
+		return nil, err
+	}
+
+	// Look up and add groups the user belongs to, if a group wasn't directly specified
+	if !strings.Contains(c.config.User, ":") {
+		// the gidMappings that are present inside the container user namespace
+		var gidMappings []idtools.IDMap
+
+		switch {
+		case len(c.config.IDMappings.GIDMap) > 0:
+			gidMappings = c.config.IDMappings.GIDMap
+		case rootless.IsRootless():
+			// Check whether the current user namespace has enough gids available.
+			availableGids, err := rootless.GetAvailableGids()
+			if err != nil {
+				return nil, fmt.Errorf("cannot read number of available GIDs: %w", err)
+			}
+			gidMappings = []idtools.IDMap{{
+				ContainerID: 0,
+				HostID:      0,
+				Size:        int(availableGids),
+			}}
+		default:
+			gidMappings = []idtools.IDMap{{
+				ContainerID: 0,
+				HostID:      0,
+				Size:        math.MaxInt32,
+			}}
+		}
+		for _, gid := range execUser.Sgids {
+			isGIDAvailable := false
+			for _, m := range gidMappings {
+				if gid >= m.ContainerID && gid < m.ContainerID+m.Size {
+					isGIDAvailable = true
+					break
+				}
+			}
+			if isGIDAvailable {
+				g.AddProcessAdditionalGid(uint32(gid))
+			} else {
+				logrus.Warnf("Additional gid=%d is not present in the user namespace, skip setting it", gid)
+			}
+		}
+	}
+
+	// Add shared namespaces from other containers
+	if err := c.addSharedNamespaces(&g); err != nil {
+		return nil, err
+	}
+
+	g.SetRootPath(c.state.Mountpoint)
+	g.AddAnnotation(annotations.Created, c.config.CreatedTime.Format(time.RFC3339Nano))
+	g.AddAnnotation("org.opencontainers.image.stopSignal", fmt.Sprintf("%d", c.config.StopSignal))
+
+	if _, exists := g.Config.Annotations[annotations.ContainerManager]; !exists {
+		g.AddAnnotation(annotations.ContainerManager, annotations.ContainerManagerLibpod)
+	}
+
+	if err := c.setCgroupsPath(&g); err != nil {
+		return nil, err
+	}
+
+	// Warning: CDI may alter g.Config in place.
+	if len(c.config.CDIDevices) > 0 {
+		registry := cdi.GetRegistry(
+			cdi.WithAutoRefresh(false),
+		)
+		if err := registry.Refresh(); err != nil {
+			logrus.Debugf("The following error was triggered when refreshing the CDI registry: %v", err)
+		}
+		_, err := registry.InjectDevices(g.Config, c.config.CDIDevices...)
+		if err != nil {
+			return nil, fmt.Errorf("error setting up CDI devices: %w", err)
+		}
+	}
+
+	// Mounts need to be sorted so paths will not cover other paths
+	mounts := sortMounts(g.Mounts())
+	g.ClearMounts()
+
+	for _, m := range mounts {
+		// We need to remove all symlinks from tmpfs mounts.
+		// Runc and other runtimes may choke on them.
+		// Easy solution: use securejoin to do a scoped evaluation of
+		// the links, then trim off the mount prefix.
+		if m.Type == "tmpfs" {
+			finalPath, err := securejoin.SecureJoin(c.state.Mountpoint, m.Destination)
+			if err != nil {
+				return nil, fmt.Errorf("error resolving symlinks for mount destination %s: %w", m.Destination, err)
+			}
+			trimmedPath := strings.TrimPrefix(finalPath, strings.TrimSuffix(c.state.Mountpoint, "/"))
+			m.Destination = trimmedPath
+		}
+		g.AddMount(m)
+	}
+
+	if err := c.addRootPropagation(&g, mounts); err != nil {
+		return nil, err
+	}
+
+	// Warning: precreate hooks may alter g.Config in place.
+	if c.state.ExtensionStageHooks, err = c.setupOCIHooks(ctx, g.Config); err != nil {
+		return nil, fmt.Errorf("error setting up OCI Hooks: %w", err)
+	}
+	if len(c.config.EnvSecrets) > 0 {
+		manager, err := c.runtime.SecretsManager()
+		if err != nil {
+			return nil, err
+		}
+		if err != nil {
+			return nil, err
+		}
+		for name, secr := range c.config.EnvSecrets {
+			_, data, err := manager.LookupSecretData(secr.Name)
+			if err != nil {
+				return nil, err
+			}
+			g.AddProcessEnv(name, string(data))
+		}
+	}
+
+	// Pass down the LISTEN_* environment (see #10443).
+	for _, key := range []string{"LISTEN_PID", "LISTEN_FDS", "LISTEN_FDNAMES"} {
+		if val, ok := os.LookupEnv(key); ok {
+			// Force the PID to `1` since we cannot rely on (all
+			// versions of) all runtimes to do it for us.
+			if key == "LISTEN_PID" {
+				val = "1"
+			}
+			g.AddProcessEnv(key, val)
+		}
+	}
+
+	return g.Config, nil
+}
+
+// isWorkDirSymlink returns true if resolved workdir is symlink or a chain of symlinks,
+// and final resolved target is present either on  volume, mount or inside of container
+// otherwise it returns false. Following function is meant for internal use only and
+// can change at any point of time.
+func (c *Container) isWorkDirSymlink(resolvedPath string) bool {
+	// We cannot create workdir since explicit --workdir is
+	// set in config but workdir could also be a symlink.
+	// If it's a symlink, check if the resolved target is present in the container.
+	// If so, that's a valid use case: return nil.
+
+	maxSymLinks := 0
+	for {
+		// Linux only supports a chain of 40 links.
+		// Reference: https://github.com/torvalds/linux/blob/master/include/linux/namei.h#L13
+		if maxSymLinks > 40 {
+			break
+		}
+		resolvedSymlink, err := os.Readlink(resolvedPath)
+		if err != nil {
+			// End sym-link resolution loop.
+			break
+		}
+		if resolvedSymlink != "" {
+			_, resolvedSymlinkWorkdir, err := c.resolvePath(c.state.Mountpoint, resolvedSymlink)
+			if isPathOnVolume(c, resolvedSymlinkWorkdir) || isPathOnBindMount(c, resolvedSymlinkWorkdir) {
+				// Resolved symlink exists on external volume or mount
+				return true
+			}
+			if err != nil {
+				// Could not resolve path so end sym-link resolution loop.
+				break
+			}
+			if resolvedSymlinkWorkdir != "" {
+				resolvedPath = resolvedSymlinkWorkdir
+				_, err := os.Stat(resolvedSymlinkWorkdir)
+				if err == nil {
+					// Symlink resolved successfully and resolved path exists on container,
+					// this is a valid use-case so return nil.
+					logrus.Debugf("Workdir is a symlink with target to %q and resolved symlink exists on container", resolvedSymlink)
+					return true
+				}
+			}
+		}
+		maxSymLinks++
+	}
+	return false
+}
+
+// resolveWorkDir resolves the container's workdir and, depending on the
+// configuration, will create it, or error out if it does not exist.
+// Note that the container must be mounted before.
+func (c *Container) resolveWorkDir() error {
+	workdir := c.WorkingDir()
+
+	// If the specified workdir is a subdir of a volume or mount,
+	// we don't need to do anything.  The runtime is taking care of
+	// that.
+	if isPathOnVolume(c, workdir) || isPathOnBindMount(c, workdir) {
+		logrus.Debugf("Workdir %q resolved to a volume or mount", workdir)
+		return nil
+	}
+
+	_, resolvedWorkdir, err := c.resolvePath(c.state.Mountpoint, workdir)
+	if err != nil {
+		return err
+	}
+	logrus.Debugf("Workdir %q resolved to host path %q", workdir, resolvedWorkdir)
+
+	st, err := os.Stat(resolvedWorkdir)
+	if err == nil {
+		if !st.IsDir() {
+			return fmt.Errorf("workdir %q exists on container %s, but is not a directory", workdir, c.ID())
+		}
+		return nil
+	}
+	if !c.config.CreateWorkingDir {
+		// No need to create it (e.g., `--workdir=/foo`), so let's make sure
+		// the path exists on the container.
+		if err != nil {
+			if os.IsNotExist(err) {
+				// If resolved Workdir path gets marked as a valid symlink,
+				// return nil cause this is valid use-case.
+				if c.isWorkDirSymlink(resolvedWorkdir) {
+					return nil
+				}
+				return fmt.Errorf("workdir %q does not exist on container %s", workdir, c.ID())
+			}
+			// This might be a serious error (e.g., permission), so
+			// we need to return the full error.
+			return fmt.Errorf("error detecting workdir %q on container %s: %w", workdir, c.ID(), err)
+		}
+		return nil
+	}
+	if err := os.MkdirAll(resolvedWorkdir, 0755); err != nil {
+		if os.IsExist(err) {
+			return nil
+		}
+		return fmt.Errorf("error creating container %s workdir: %w", c.ID(), err)
+	}
+
+	// Ensure container entrypoint is created (if required).
+	uid, gid, _, err := chrootuser.GetUser(c.state.Mountpoint, c.User())
+	if err != nil {
+		return fmt.Errorf("error looking up %s inside of the container %s: %w", c.User(), c.ID(), err)
+	}
+	if err := os.Chown(resolvedWorkdir, int(uid), int(gid)); err != nil {
+		return fmt.Errorf("error chowning container %s workdir to container root: %w", c.ID(), err)
+	}
+
+	return nil
+}
+
+func (c *Container) getUserOverrides() *lookup.Overrides {
+	var hasPasswdFile, hasGroupFile bool
+	overrides := lookup.Overrides{}
+	for _, m := range c.config.Spec.Mounts {
+		if m.Destination == "/etc/passwd" {
+			overrides.ContainerEtcPasswdPath = m.Source
+			hasPasswdFile = true
+		}
+		if m.Destination == "/etc/group" {
+			overrides.ContainerEtcGroupPath = m.Source
+			hasGroupFile = true
+		}
+		if m.Destination == "/etc" {
+			if !hasPasswdFile {
+				overrides.ContainerEtcPasswdPath = filepath.Join(m.Source, "passwd")
+			}
+			if !hasGroupFile {
+				overrides.ContainerEtcGroupPath = filepath.Join(m.Source, "group")
+			}
+		}
+	}
+	if path, ok := c.state.BindMounts["/etc/passwd"]; ok {
+		overrides.ContainerEtcPasswdPath = path
+	}
+	return &overrides
+}
+
+func lookupHostUser(name string) (*runcuser.ExecUser, error) {
+	var execUser runcuser.ExecUser
+	// Look up User on host
+	u, err := util.LookupUser(name)
+	if err != nil {
+		return &execUser, err
+	}
+	uid, err := strconv.ParseUint(u.Uid, 8, 32)
+	if err != nil {
+		return &execUser, err
+	}
+
+	gid, err := strconv.ParseUint(u.Gid, 8, 32)
+	if err != nil {
+		return &execUser, err
+	}
+	execUser.Uid = int(uid)
+	execUser.Gid = int(gid)
+	execUser.Home = u.HomeDir
+	return &execUser, nil
+}
+
+// mountNotifySocket mounts the NOTIFY_SOCKET into the container if it's set
+// and if the sdnotify mode is set to container.  It also sets c.notifySocket
+// to avoid redundantly looking up the env variable.
+func (c *Container) mountNotifySocket(g generate.Generator) error {
+	if c.config.SdNotifySocket == "" {
+		return nil
+	}
+	if c.config.SdNotifyMode != define.SdNotifyModeContainer {
+		return nil
+	}
+
+	notifyDir := filepath.Join(c.bundlePath(), "notify")
+	logrus.Debugf("Checking notify %q dir", notifyDir)
+	if err := os.MkdirAll(notifyDir, 0755); err != nil {
+		if !os.IsExist(err) {
+			return fmt.Errorf("unable to create notify %q dir: %w", notifyDir, err)
+		}
+	}
+	if err := label.Relabel(notifyDir, c.MountLabel(), true); err != nil {
+		return fmt.Errorf("relabel failed %q: %w", notifyDir, err)
+	}
+	logrus.Debugf("Add bindmount notify %q dir", notifyDir)
+	if _, ok := c.state.BindMounts["/run/notify"]; !ok {
+		c.state.BindMounts["/run/notify"] = notifyDir
+	}
+
+	// Set the container's notify socket to the proxy socket created by conmon
+	g.AddProcessEnv("NOTIFY_SOCKET", "/run/notify/notify.sock")
+
+	return nil
+}
+
+func (c *Container) addCheckpointImageMetadata(importBuilder *buildah.Builder) error {
+	// Get information about host environment
+	hostInfo, err := c.Runtime().hostInfo()
+	if err != nil {
+		return fmt.Errorf("getting host info: %v", err)
+	}
+
+	criuVersion, err := criu.GetCriuVersion()
+	if err != nil {
+		return fmt.Errorf("getting criu version: %v", err)
+	}
+
+	rootfsImageID, rootfsImageName := c.Image()
+
+	// Add image annotations with information about the container and the host.
+	// This information is useful to check compatibility before restoring the checkpoint
+
+	checkpointImageAnnotations := map[string]string{
+		define.CheckpointAnnotationName:                c.config.Name,
+		define.CheckpointAnnotationRawImageName:        c.config.RawImageName,
+		define.CheckpointAnnotationRootfsImageID:       rootfsImageID,
+		define.CheckpointAnnotationRootfsImageName:     rootfsImageName,
+		define.CheckpointAnnotationPodmanVersion:       version.Version.String(),
+		define.CheckpointAnnotationCriuVersion:         strconv.Itoa(criuVersion),
+		define.CheckpointAnnotationRuntimeName:         hostInfo.OCIRuntime.Name,
+		define.CheckpointAnnotationRuntimeVersion:      hostInfo.OCIRuntime.Version,
+		define.CheckpointAnnotationConmonVersion:       hostInfo.Conmon.Version,
+		define.CheckpointAnnotationHostArch:            hostInfo.Arch,
+		define.CheckpointAnnotationHostKernel:          hostInfo.Kernel,
+		define.CheckpointAnnotationCgroupVersion:       hostInfo.CgroupsVersion,
+		define.CheckpointAnnotationDistributionVersion: hostInfo.Distribution.Version,
+		define.CheckpointAnnotationDistributionName:    hostInfo.Distribution.Distribution,
+	}
+
+	for key, value := range checkpointImageAnnotations {
+		importBuilder.SetAnnotation(key, value)
+	}
+
+	return nil
+}
+
+func (c *Container) resolveCheckpointImageName(options *ContainerCheckpointOptions) error {
+	if options.CreateImage == "" {
+		return nil
+	}
+
+	// Resolve image name
+	resolvedImageName, err := c.runtime.LibimageRuntime().ResolveName(options.CreateImage)
+	if err != nil {
+		return err
+	}
+
+	options.CreateImage = resolvedImageName
+	return nil
+}
+
+func (c *Container) createCheckpointImage(ctx context.Context, options ContainerCheckpointOptions) error {
+	if options.CreateImage == "" {
+		return nil
+	}
+	logrus.Debugf("Create checkpoint image %s", options.CreateImage)
+
+	// Create storage reference
+	imageRef, err := is.Transport.ParseStoreReference(c.runtime.store, options.CreateImage)
+	if err != nil {
+		return errors.New("failed to parse image name")
+	}
+
+	// Build an image scratch
+	builderOptions := buildah.BuilderOptions{
+		FromImage: "scratch",
+	}
+	importBuilder, err := buildah.NewBuilder(ctx, c.runtime.store, builderOptions)
+	if err != nil {
+		return err
+	}
+	// Clean up buildah working container
+	defer func() {
+		if err := importBuilder.Delete(); err != nil {
+			logrus.Errorf("Image builder delete failed: %v", err)
+		}
+	}()
+
+	if err := c.prepareCheckpointExport(); err != nil {
+		return err
+	}
+
+	// Export checkpoint into temporary tar file
+	tmpDir, err := ioutil.TempDir("", "checkpoint_image_")
+	if err != nil {
+		return err
+	}
+	defer os.RemoveAll(tmpDir)
+
+	options.TargetFile = path.Join(tmpDir, "checkpoint.tar")
+
+	if err := c.exportCheckpoint(options); err != nil {
+		return err
+	}
+
+	// Copy checkpoint from temporary tar file in the image
+	addAndCopyOptions := buildah.AddAndCopyOptions{}
+	if err := importBuilder.Add("", true, addAndCopyOptions, options.TargetFile); err != nil {
+		return err
+	}
+
+	if err := c.addCheckpointImageMetadata(importBuilder); err != nil {
+		return err
+	}
+
+	commitOptions := buildah.CommitOptions{
+		Squash:        true,
+		SystemContext: c.runtime.imageContext,
+	}
+
+	// Create checkpoint image
+	id, _, _, err := importBuilder.Commit(ctx, imageRef, commitOptions)
+	if err != nil {
+		return err
+	}
+	logrus.Debugf("Created checkpoint image: %s", id)
+	return nil
+}
+
+func (c *Container) exportCheckpoint(options ContainerCheckpointOptions) error {
+	if len(c.Dependencies()) == 1 {
+		// Check if the dependency is an infra container. If it is we can checkpoint
+		// the container out of the Pod.
+		if c.config.Pod == "" {
+			return errors.New("cannot export checkpoints of containers with dependencies")
+		}
+
+		pod, err := c.runtime.state.Pod(c.config.Pod)
+		if err != nil {
+			return fmt.Errorf("container %s is in pod %s, but pod cannot be retrieved: %w", c.ID(), c.config.Pod, err)
+		}
+		infraID, err := pod.InfraContainerID()
+		if err != nil {
+			return fmt.Errorf("cannot retrieve infra container ID for pod %s: %w", c.config.Pod, err)
+		}
+		if c.Dependencies()[0] != infraID {
+			return errors.New("cannot export checkpoints of containers with dependencies")
+		}
+	}
+	if len(c.Dependencies()) > 1 {
+		return errors.New("cannot export checkpoints of containers with dependencies")
+	}
+	logrus.Debugf("Exporting checkpoint image of container %q to %q", c.ID(), options.TargetFile)
+
+	includeFiles := []string{
+		"artifacts",
+		metadata.DevShmCheckpointTar,
+		metadata.ConfigDumpFile,
+		metadata.SpecDumpFile,
+		metadata.NetworkStatusFile,
+		stats.StatsDump,
+	}
+
+	if c.LogDriver() == define.KubernetesLogging ||
+		c.LogDriver() == define.JSONLogging {
+		includeFiles = append(includeFiles, "ctr.log")
+	}
+	if options.PreCheckPoint {
+		includeFiles = append(includeFiles, preCheckpointDir)
+	} else {
+		includeFiles = append(includeFiles, metadata.CheckpointDirectory)
+	}
+	// Get root file-system changes included in the checkpoint archive
+	var addToTarFiles []string
+	if !options.IgnoreRootfs {
+		// To correctly track deleted files, let's go through the output of 'podman diff'
+		rootFsChanges, err := c.runtime.GetDiff("", c.ID(), define.DiffContainer)
+		if err != nil {
+			return fmt.Errorf("error exporting root file-system diff for %q: %w", c.ID(), err)
+		}
+
+		addToTarFiles, err := crutils.CRCreateRootFsDiffTar(&rootFsChanges, c.state.Mountpoint, c.bundlePath())
+		if err != nil {
+			return err
+		}
+
+		includeFiles = append(includeFiles, addToTarFiles...)
+	}
+
+	// Folder containing archived volumes that will be included in the export
+	expVolDir := filepath.Join(c.bundlePath(), metadata.CheckpointVolumesDirectory)
+
+	// Create an archive for each volume associated with the container
+	if !options.IgnoreVolumes {
+		if err := os.MkdirAll(expVolDir, 0700); err != nil {
+			return fmt.Errorf("error creating volumes export directory %q: %w", expVolDir, err)
+		}
+
+		for _, v := range c.config.NamedVolumes {
+			volumeTarFilePath := filepath.Join(metadata.CheckpointVolumesDirectory, v.Name+".tar")
+			volumeTarFileFullPath := filepath.Join(c.bundlePath(), volumeTarFilePath)
+
+			volumeTarFile, err := os.Create(volumeTarFileFullPath)
+			if err != nil {
+				return fmt.Errorf("error creating %q: %w", volumeTarFileFullPath, err)
+			}
+
+			volume, err := c.runtime.GetVolume(v.Name)
+			if err != nil {
+				return err
+			}
+
+			mp, err := volume.MountPoint()
+			if err != nil {
+				return err
+			}
+			if mp == "" {
+				return fmt.Errorf("volume %s is not mounted, cannot export: %w", volume.Name(), define.ErrInternal)
+			}
+
+			input, err := archive.TarWithOptions(mp, &archive.TarOptions{
+				Compression:      archive.Uncompressed,
+				IncludeSourceDir: true,
+			})
+			if err != nil {
+				return fmt.Errorf("error reading volume directory %q: %w", v.Dest, err)
+			}
+
+			_, err = io.Copy(volumeTarFile, input)
+			if err != nil {
+				return err
+			}
+			volumeTarFile.Close()
+
+			includeFiles = append(includeFiles, volumeTarFilePath)
+		}
+	}
+
+	input, err := archive.TarWithOptions(c.bundlePath(), &archive.TarOptions{
+		Compression:      options.Compression,
+		IncludeSourceDir: true,
+		IncludeFiles:     includeFiles,
+	})
+
+	if err != nil {
+		return fmt.Errorf("error reading checkpoint directory %q: %w", c.ID(), err)
+	}
+
+	outFile, err := os.Create(options.TargetFile)
+	if err != nil {
+		return fmt.Errorf("error creating checkpoint export file %q: %w", options.TargetFile, err)
+	}
+	defer outFile.Close()
+
+	if err := os.Chmod(options.TargetFile, 0600); err != nil {
+		return err
+	}
+
+	_, err = io.Copy(outFile, input)
+	if err != nil {
+		return err
+	}
+
+	for _, file := range addToTarFiles {
+		os.Remove(filepath.Join(c.bundlePath(), file))
+	}
+
+	if !options.IgnoreVolumes {
+		os.RemoveAll(expVolDir)
+	}
+
+	return nil
+}
+
+func (c *Container) checkpointRestoreSupported(version int) error {
+	if !criu.CheckForCriu(version) {
+		return fmt.Errorf("checkpoint/restore requires at least CRIU %d", version)
+	}
+	if !c.ociRuntime.SupportsCheckpoint() {
+		return errors.New("configured runtime does not support checkpoint/restore")
+	}
+	return nil
+}
+
+func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointOptions) (*define.CRIUCheckpointRestoreStatistics, int64, error) {
+	if err := c.checkpointRestoreSupported(criu.MinCriuVersion); err != nil {
+		return nil, 0, err
+	}
+
+	if c.state.State != define.ContainerStateRunning {
+		return nil, 0, fmt.Errorf("%q is not running, cannot checkpoint: %w", c.state.State, define.ErrCtrStateInvalid)
+	}
+
+	if c.AutoRemove() && options.TargetFile == "" {
+		return nil, 0, errors.New("cannot checkpoint containers that have been started with '--rm' unless '--export' is used")
+	}
+
+	if err := c.resolveCheckpointImageName(&options); err != nil {
+		return nil, 0, err
+	}
+
+	if err := crutils.CRCreateFileWithLabel(c.bundlePath(), "dump.log", c.MountLabel()); err != nil {
+		return nil, 0, err
+	}
+
+	// Setting CheckpointLog early in case there is a failure.
+	c.state.CheckpointLog = path.Join(c.bundlePath(), "dump.log")
+	c.state.CheckpointPath = c.CheckpointPath()
+
+	runtimeCheckpointDuration, err := c.ociRuntime.CheckpointContainer(c, options)
+	if err != nil {
+		return nil, 0, err
+	}
+
+	// Keep the content of /dev/shm directory
+	if c.config.ShmDir != "" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
+		shmDirTarFileFullPath := filepath.Join(c.bundlePath(), metadata.DevShmCheckpointTar)
+
+		shmDirTarFile, err := os.Create(shmDirTarFileFullPath)
+		if err != nil {
+			return nil, 0, err
+		}
+		defer shmDirTarFile.Close()
+
+		input, err := archive.TarWithOptions(c.config.ShmDir, &archive.TarOptions{
+			Compression:      archive.Uncompressed,
+			IncludeSourceDir: true,
+		})
+		if err != nil {
+			return nil, 0, err
+		}
+
+		if _, err = io.Copy(shmDirTarFile, input); err != nil {
+			return nil, 0, err
+		}
+	}
+
+	// Save network.status. This is needed to restore the container with
+	// the same IP. Currently limited to one IP address in a container
+	// with one interface.
+	// FIXME: will this break something?
+	if _, err := metadata.WriteJSONFile(c.getNetworkStatus(), c.bundlePath(), metadata.NetworkStatusFile); err != nil {
+		return nil, 0, err
+	}
+
+	defer c.newContainerEvent(events.Checkpoint)
+
+	// There is a bug from criu: https://github.com/checkpoint-restore/criu/issues/116
+	// We have to change the symbolic link from absolute path to relative path
+	if options.WithPrevious {
+		os.Remove(path.Join(c.CheckpointPath(), "parent"))
+		if err := os.Symlink("../pre-checkpoint", path.Join(c.CheckpointPath(), "parent")); err != nil {
+			return nil, 0, err
+		}
+	}
+
+	if options.TargetFile != "" {
+		if err := c.exportCheckpoint(options); err != nil {
+			return nil, 0, err
+		}
+	} else {
+		if err := c.createCheckpointImage(ctx, options); err != nil {
+			return nil, 0, err
+		}
+	}
+
+	logrus.Debugf("Checkpointed container %s", c.ID())
+
+	if !options.KeepRunning && !options.PreCheckPoint {
+		c.state.State = define.ContainerStateStopped
+		c.state.Checkpointed = true
+		c.state.CheckpointedTime = time.Now()
+		c.state.Restored = false
+		c.state.RestoredTime = time.Time{}
+
+		// Clean up Storage and Network
+		if err := c.cleanup(ctx); err != nil {
+			return nil, 0, err
+		}
+	}
+
+	criuStatistics, err := func() (*define.CRIUCheckpointRestoreStatistics, error) {
+		if !options.PrintStats {
+			return nil, nil
+		}
+		statsDirectory, err := os.Open(c.bundlePath())
+		if err != nil {
+			return nil, fmt.Errorf("not able to open %q: %w", c.bundlePath(), err)
+		}
+
+		dumpStatistics, err := stats.CriuGetDumpStats(statsDirectory)
+		if err != nil {
+			return nil, fmt.Errorf("displaying checkpointing statistics not possible: %w", err)
+		}
+
+		return &define.CRIUCheckpointRestoreStatistics{
+			FreezingTime: dumpStatistics.GetFreezingTime(),
+			FrozenTime:   dumpStatistics.GetFrozenTime(),
+			MemdumpTime:  dumpStatistics.GetMemdumpTime(),
+			MemwriteTime: dumpStatistics.GetMemwriteTime(),
+			PagesScanned: dumpStatistics.GetPagesScanned(),
+			PagesWritten: dumpStatistics.GetPagesWritten(),
+		}, nil
+	}()
+	if err != nil {
+		return nil, 0, err
+	}
+
+	if !options.Keep && !options.PreCheckPoint {
+		cleanup := []string{
+			"dump.log",
+			stats.StatsDump,
+			metadata.ConfigDumpFile,
+			metadata.SpecDumpFile,
+		}
+		for _, del := range cleanup {
+			file := filepath.Join(c.bundlePath(), del)
+			if err := os.Remove(file); err != nil {
+				logrus.Debugf("Unable to remove file %s", file)
+			}
+		}
+		// The file has been deleted. Do not mention it.
+		c.state.CheckpointLog = ""
+	}
+
+	c.state.FinishedTime = time.Now()
+	return criuStatistics, runtimeCheckpointDuration, c.save()
+}
+
+func (c *Container) generateContainerSpec() error {
+	// Make sure the newly created config.json exists on disk
+
+	// NewFromSpec() is deprecated according to its comment
+	// however the recommended replace just causes a nil map panic
+	//nolint:staticcheck
+	g := generate.NewFromSpec(c.config.Spec)
+
+	if err := c.saveSpec(g.Config); err != nil {
+		return fmt.Errorf("saving imported container specification for restore failed: %w", err)
+	}
+
+	return nil
+}
+
+func (c *Container) importCheckpointImage(ctx context.Context, imageID string) error {
+	img, _, err := c.Runtime().LibimageRuntime().LookupImage(imageID, nil)
+	if err != nil {
+		return err
+	}
+
+	mountPoint, err := img.Mount(ctx, nil, "")
+	defer func() {
+		if err := c.unmount(true); err != nil {
+			logrus.Errorf("Failed to unmount container: %v", err)
+		}
+	}()
+	if err != nil {
+		return err
+	}
+
+	// Import all checkpoint files except ConfigDumpFile and SpecDumpFile. We
+	// generate new container config files to enable to specifying a new
+	// container name.
+	checkpoint := []string{
+		"artifacts",
+		metadata.CheckpointDirectory,
+		metadata.CheckpointVolumesDirectory,
+		metadata.DevShmCheckpointTar,
+		metadata.RootFsDiffTar,
+		metadata.DeletedFilesFile,
+		metadata.PodOptionsFile,
+		metadata.PodDumpFile,
+	}
+
+	for _, name := range checkpoint {
+		src := filepath.Join(mountPoint, name)
+		dst := filepath.Join(c.bundlePath(), name)
+		if err := archive.NewDefaultArchiver().CopyWithTar(src, dst); err != nil {
+			logrus.Debugf("Can't import '%s' from checkpoint image", name)
+		}
+	}
+
+	return c.generateContainerSpec()
+}
+
+func (c *Container) importCheckpointTar(input string) error {
+	if err := crutils.CRImportCheckpointWithoutConfig(c.bundlePath(), input); err != nil {
+		return err
+	}
+
+	return c.generateContainerSpec()
+}
+
+func (c *Container) importPreCheckpoint(input string) error {
+	archiveFile, err := os.Open(input)
+	if err != nil {
+		return fmt.Errorf("failed to open pre-checkpoint archive for import: %w", err)
+	}
+
+	defer archiveFile.Close()
+
+	err = archive.Untar(archiveFile, c.bundlePath(), nil)
+	if err != nil {
+		return fmt.Errorf("unpacking of pre-checkpoint archive %s failed: %w", input, err)
+	}
+	return nil
+}
+
+func (c *Container) restore(ctx context.Context, options ContainerCheckpointOptions) (criuStatistics *define.CRIUCheckpointRestoreStatistics, runtimeRestoreDuration int64, retErr error) {
+	minCriuVersion := func() int {
+		if options.Pod == "" {
+			return criu.MinCriuVersion
+		}
+		return criu.PodCriuVersion
+	}()
+	if err := c.checkpointRestoreSupported(minCriuVersion); err != nil {
+		return nil, 0, err
+	}
+
+	if options.Pod != "" && !crutils.CRRuntimeSupportsPodCheckpointRestore(c.ociRuntime.Path()) {
+		return nil, 0, fmt.Errorf("runtime %s does not support pod restore", c.ociRuntime.Path())
+	}
+
+	if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) {
+		return nil, 0, fmt.Errorf("container %s is running or paused, cannot restore: %w", c.ID(), define.ErrCtrStateInvalid)
+	}
+
+	if options.ImportPrevious != "" {
+		if err := c.importPreCheckpoint(options.ImportPrevious); err != nil {
+			return nil, 0, err
+		}
+	}
+
+	if options.TargetFile != "" {
+		if err := c.importCheckpointTar(options.TargetFile); err != nil {
+			return nil, 0, err
+		}
+	} else if options.CheckpointImageID != "" {
+		if err := c.importCheckpointImage(ctx, options.CheckpointImageID); err != nil {
+			return nil, 0, err
+		}
+	}
+
+	// Let's try to stat() CRIU's inventory file. If it does not exist, it makes
+	// no sense to try a restore. This is a minimal check if a checkpoint exist.
+	if _, err := os.Stat(filepath.Join(c.CheckpointPath(), "inventory.img")); os.IsNotExist(err) {
+		return nil, 0, fmt.Errorf("a complete checkpoint for this container cannot be found, cannot restore: %w", err)
+	}
+
+	if err := crutils.CRCreateFileWithLabel(c.bundlePath(), "restore.log", c.MountLabel()); err != nil {
+		return nil, 0, err
+	}
+
+	// Setting RestoreLog early in case there is a failure.
+	c.state.RestoreLog = path.Join(c.bundlePath(), "restore.log")
+	c.state.CheckpointPath = c.CheckpointPath()
+
+	// Read network configuration from checkpoint
+	var netStatus map[string]types.StatusBlock
+	_, err := metadata.ReadJSONFile(&netStatus, c.bundlePath(), metadata.NetworkStatusFile)
+	if err != nil {
+		logrus.Infof("Failed to unmarshal network status, cannot restore the same ip/mac: %v", err)
+	}
+	// If the restored container should get a new name, the IP address of
+	// the container will not be restored. This assumes that if a new name is
+	// specified, the container is restored multiple times.
+	// TODO: This implicit restoring with or without IP depending on an
+	//       unrelated restore parameter (--name) does not seem like the
+	//       best solution.
+	if err == nil && options.Name == "" && (!options.IgnoreStaticIP || !options.IgnoreStaticMAC) {
+		// The file with the network.status does exist. Let's restore the
+		// container with the same networks settings as during checkpointing.
+		networkOpts, err := c.networks()
+		if err != nil {
+			return nil, 0, err
+		}
+
+		netOpts := make(map[string]types.PerNetworkOptions, len(netStatus))
+		for network, perNetOpts := range networkOpts {
+			// unset mac and ips before we start adding the ones from the status
+			perNetOpts.StaticMAC = nil
+			perNetOpts.StaticIPs = nil
+			for name, netInt := range netStatus[network].Interfaces {
+				perNetOpts.InterfaceName = name
+				if !options.IgnoreStaticIP {
+					perNetOpts.StaticMAC = netInt.MacAddress
+				}
+				if !options.IgnoreStaticIP {
+					for _, netAddress := range netInt.Subnets {
+						perNetOpts.StaticIPs = append(perNetOpts.StaticIPs, netAddress.IPNet.IP)
+					}
+				}
+				// Normally interfaces have a length of 1, only for some special cni configs we could get more.
+				// For now just use the first interface to get the ips this should be good enough for most cases.
+				break
+			}
+			netOpts[network] = perNetOpts
+		}
+		c.perNetworkOpts = netOpts
+	}
+
+	defer func() {
+		if retErr != nil {
+			if err := c.cleanup(ctx); err != nil {
+				logrus.Errorf("Cleaning up container %s: %v", c.ID(), err)
+			}
+		}
+	}()
+
+	if err := c.prepare(); err != nil {
+		return nil, 0, err
+	}
+
+	// Read config
+	jsonPath := filepath.Join(c.bundlePath(), "config.json")
+	logrus.Debugf("generate.NewFromFile at %v", jsonPath)
+	g, err := generate.NewFromFile(jsonPath)
+	if err != nil {
+		logrus.Debugf("generate.NewFromFile failed with %v", err)
+		return nil, 0, err
+	}
+
+	// Restoring from an import means that we are doing migration
+	if options.TargetFile != "" || options.CheckpointImageID != "" {
+		g.SetRootPath(c.state.Mountpoint)
+	}
+
+	// We want to have the same network namespace as before.
+	if err := c.addNetworkNamespace(&g); err != nil {
+		return nil, 0, err
+	}
+
+	if options.Pod != "" {
+		// Running in a Pod means that we have to change all namespace settings to
+		// the ones from the infrastructure container.
+		pod, err := c.runtime.LookupPod(options.Pod)
+		if err != nil {
+			return nil, 0, fmt.Errorf("pod %q cannot be retrieved: %w", options.Pod, err)
+		}
+
+		infraContainer, err := pod.InfraContainer()
+		if err != nil {
+			return nil, 0, fmt.Errorf("cannot retrieved infra container from pod %q: %w", options.Pod, err)
+		}
+
+		infraContainer.lock.Lock()
+		if err := infraContainer.syncContainer(); err != nil {
+			infraContainer.lock.Unlock()
+			return nil, 0, fmt.Errorf("error syncing infrastructure container %s status: %w", infraContainer.ID(), err)
+		}
+		if infraContainer.state.State != define.ContainerStateRunning {
+			if err := infraContainer.initAndStart(ctx); err != nil {
+				infraContainer.lock.Unlock()
+				return nil, 0, fmt.Errorf("error starting infrastructure container %s status: %w", infraContainer.ID(), err)
+			}
+		}
+		infraContainer.lock.Unlock()
+
+		if c.config.IPCNsCtr != "" {
+			nsPath, err := infraContainer.namespacePath(IPCNS)
+			if err != nil {
+				return nil, 0, fmt.Errorf("cannot retrieve IPC namespace path for Pod %q: %w", options.Pod, err)
+			}
+			if err := g.AddOrReplaceLinuxNamespace(string(spec.IPCNamespace), nsPath); err != nil {
+				return nil, 0, err
+			}
+		}
+
+		if c.config.NetNsCtr != "" {
+			nsPath, err := infraContainer.namespacePath(NetNS)
+			if err != nil {
+				return nil, 0, fmt.Errorf("cannot retrieve network namespace path for Pod %q: %w", options.Pod, err)
+			}
+			if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), nsPath); err != nil {
+				return nil, 0, err
+			}
+		}
+
+		if c.config.PIDNsCtr != "" {
+			nsPath, err := infraContainer.namespacePath(PIDNS)
+			if err != nil {
+				return nil, 0, fmt.Errorf("cannot retrieve PID namespace path for Pod %q: %w", options.Pod, err)
+			}
+			if err := g.AddOrReplaceLinuxNamespace(string(spec.PIDNamespace), nsPath); err != nil {
+				return nil, 0, err
+			}
+		}
+
+		if c.config.UTSNsCtr != "" {
+			nsPath, err := infraContainer.namespacePath(UTSNS)
+			if err != nil {
+				return nil, 0, fmt.Errorf("cannot retrieve UTS namespace path for Pod %q: %w", options.Pod, err)
+			}
+			if err := g.AddOrReplaceLinuxNamespace(string(spec.UTSNamespace), nsPath); err != nil {
+				return nil, 0, err
+			}
+		}
+
+		if c.config.CgroupNsCtr != "" {
+			nsPath, err := infraContainer.namespacePath(CgroupNS)
+			if err != nil {
+				return nil, 0, fmt.Errorf("cannot retrieve Cgroup namespace path for Pod %q: %w", options.Pod, err)
+			}
+			if err := g.AddOrReplaceLinuxNamespace(string(spec.CgroupNamespace), nsPath); err != nil {
+				return nil, 0, err
+			}
+		}
+	}
+
+	if err := c.makeBindMounts(); err != nil {
+		return nil, 0, err
+	}
+
+	if options.TargetFile != "" || options.CheckpointImageID != "" {
+		for dstPath, srcPath := range c.state.BindMounts {
+			newMount := spec.Mount{
+				Type:        "bind",
+				Source:      srcPath,
+				Destination: dstPath,
+				Options:     []string{"bind", "private"},
+			}
+			if c.IsReadOnly() && dstPath != "/dev/shm" {
+				newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev")
+			}
+			if dstPath == "/dev/shm" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
+				newMount.Options = append(newMount.Options, "nosuid", "noexec", "nodev")
+			}
+			if !MountExists(g.Mounts(), dstPath) {
+				g.AddMount(newMount)
+			}
+		}
+	}
+
+	// Restore /dev/shm content
+	if c.config.ShmDir != "" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
+		shmDirTarFileFullPath := filepath.Join(c.bundlePath(), metadata.DevShmCheckpointTar)
+		if _, err := os.Stat(shmDirTarFileFullPath); err != nil {
+			logrus.Debug("Container checkpoint doesn't contain dev/shm: ", err.Error())
+		} else {
+			shmDirTarFile, err := os.Open(shmDirTarFileFullPath)
+			if err != nil {
+				return nil, 0, err
+			}
+			defer shmDirTarFile.Close()
+
+			if err := archive.UntarUncompressed(shmDirTarFile, c.config.ShmDir, nil); err != nil {
+				return nil, 0, err
+			}
+		}
+	}
+
+	// Cleanup for a working restore.
+	if err := c.removeConmonFiles(); err != nil {
+		return nil, 0, err
+	}
+
+	// Save the OCI spec to disk
+	if err := c.saveSpec(g.Config); err != nil {
+		return nil, 0, err
+	}
+
+	// When restoring from an imported archive, allow restoring the content of volumes.
+	// Volumes are created in setupContainer()
+	if !options.IgnoreVolumes && (options.TargetFile != "" || options.CheckpointImageID != "") {
+		for _, v := range c.config.NamedVolumes {
+			volumeFilePath := filepath.Join(c.bundlePath(), metadata.CheckpointVolumesDirectory, v.Name+".tar")
+
+			volumeFile, err := os.Open(volumeFilePath)
+			if err != nil {
+				return nil, 0, fmt.Errorf("failed to open volume file %s: %w", volumeFilePath, err)
+			}
+			defer volumeFile.Close()
+
+			volume, err := c.runtime.GetVolume(v.Name)
+			if err != nil {
+				return nil, 0, fmt.Errorf("failed to retrieve volume %s: %w", v.Name, err)
+			}
+
+			mountPoint, err := volume.MountPoint()
+			if err != nil {
+				return nil, 0, err
+			}
+			if mountPoint == "" {
+				return nil, 0, fmt.Errorf("unable to import volume %s as it is not mounted: %w", volume.Name(), err)
+			}
+			if err := archive.UntarUncompressed(volumeFile, mountPoint, nil); err != nil {
+				return nil, 0, fmt.Errorf("failed to extract volume %s to %s: %w", volumeFilePath, mountPoint, err)
+			}
+		}
+	}
+
+	// Before actually restarting the container, apply the root file-system changes
+	if !options.IgnoreRootfs {
+		if err := crutils.CRApplyRootFsDiffTar(c.bundlePath(), c.state.Mountpoint); err != nil {
+			return nil, 0, err
+		}
+
+		if err := crutils.CRRemoveDeletedFiles(c.ID(), c.bundlePath(), c.state.Mountpoint); err != nil {
+			return nil, 0, err
+		}
+	}
+
+	runtimeRestoreDuration, err = c.ociRuntime.CreateContainer(c, &options)
+	if err != nil {
+		return nil, 0, err
+	}
+
+	criuStatistics, err = func() (*define.CRIUCheckpointRestoreStatistics, error) {
+		if !options.PrintStats {
+			return nil, nil
+		}
+		statsDirectory, err := os.Open(c.bundlePath())
+		if err != nil {
+			return nil, fmt.Errorf("not able to open %q: %w", c.bundlePath(), err)
+		}
+
+		restoreStatistics, err := stats.CriuGetRestoreStats(statsDirectory)
+		if err != nil {
+			return nil, fmt.Errorf("displaying restore statistics not possible: %w", err)
+		}
+
+		return &define.CRIUCheckpointRestoreStatistics{
+			PagesCompared:   restoreStatistics.GetPagesCompared(),
+			PagesSkippedCow: restoreStatistics.GetPagesSkippedCow(),
+			ForkingTime:     restoreStatistics.GetForkingTime(),
+			RestoreTime:     restoreStatistics.GetRestoreTime(),
+			PagesRestored:   restoreStatistics.GetPagesRestored(),
+		}, nil
+	}()
+	if err != nil {
+		return nil, 0, err
+	}
+
+	logrus.Debugf("Restored container %s", c.ID())
+
+	c.state.State = define.ContainerStateRunning
+	c.state.Checkpointed = false
+	c.state.Restored = true
+	c.state.CheckpointedTime = time.Time{}
+	c.state.RestoredTime = time.Now()
+
+	if !options.Keep {
+		// Delete all checkpoint related files. At this point, in theory, all files
+		// should exist. Still ignoring errors for now as the container should be
+		// restored and running. Not erroring out just because some cleanup operation
+		// failed. Starting with the checkpoint directory
+		err = os.RemoveAll(c.CheckpointPath())
+		if err != nil {
+			logrus.Debugf("Non-fatal: removal of checkpoint directory (%s) failed: %v", c.CheckpointPath(), err)
+		}
+		c.state.CheckpointPath = ""
+		err = os.RemoveAll(c.PreCheckPointPath())
+		if err != nil {
+			logrus.Debugf("Non-fatal: removal of pre-checkpoint directory (%s) failed: %v", c.PreCheckPointPath(), err)
+		}
+		err = os.RemoveAll(c.CheckpointVolumesPath())
+		if err != nil {
+			logrus.Debugf("Non-fatal: removal of checkpoint volumes directory (%s) failed: %v", c.CheckpointVolumesPath(), err)
+		}
+		cleanup := [...]string{
+			"restore.log",
+			"dump.log",
+			stats.StatsDump,
+			stats.StatsRestore,
+			metadata.DevShmCheckpointTar,
+			metadata.NetworkStatusFile,
+			metadata.RootFsDiffTar,
+			metadata.DeletedFilesFile,
+		}
+		for _, del := range cleanup {
+			file := filepath.Join(c.bundlePath(), del)
+			err = os.Remove(file)
+			if err != nil {
+				logrus.Debugf("Non-fatal: removal of checkpoint file (%s) failed: %v", file, err)
+			}
+		}
+		c.state.CheckpointLog = ""
+		c.state.RestoreLog = ""
+	}
+
+	return criuStatistics, runtimeRestoreDuration, c.save()
+}
+
+// Retrieves a container's "root" net namespace container dependency.
+func (c *Container) getRootNetNsDepCtr() (depCtr *Container, err error) {
+	containersVisited := map[string]int{c.config.ID: 1}
+	nextCtr := c.config.NetNsCtr
+	for nextCtr != "" {
+		// Make sure we aren't in a loop
+		if _, visited := containersVisited[nextCtr]; visited {
+			return nil, errors.New("loop encountered while determining net namespace container")
+		}
+		containersVisited[nextCtr] = 1
+
+		depCtr, err = c.runtime.state.Container(nextCtr)
+		if err != nil {
+			return nil, fmt.Errorf("error fetching dependency %s of container %s: %w", c.config.NetNsCtr, c.ID(), err)
+		}
+		// This should never happen without an error
+		if depCtr == nil {
+			break
+		}
+		nextCtr = depCtr.config.NetNsCtr
+	}
+
+	if depCtr == nil {
+		return nil, errors.New("unexpected error depCtr is nil without reported error from runtime state")
+	}
+	return depCtr, nil
+}
+
+// Ensure standard bind mounts are mounted into all root directories (including chroot directories)
+func (c *Container) mountIntoRootDirs(mountName string, mountPath string) error {
+	c.state.BindMounts[mountName] = mountPath
+
+	for _, chrootDir := range c.config.ChrootDirs {
+		c.state.BindMounts[filepath.Join(chrootDir, mountName)] = mountPath
+	}
+
+	return nil
+}
+
+// Make standard bind mounts to include in the container
+func (c *Container) makeBindMounts() error {
+	if err := os.Chown(c.state.RunDir, c.RootUID(), c.RootGID()); err != nil {
+		return fmt.Errorf("cannot chown run directory: %w", err)
+	}
+
+	if c.state.BindMounts == nil {
+		c.state.BindMounts = make(map[string]string)
+	}
+	netDisabled, err := c.NetworkDisabled()
+	if err != nil {
+		return err
+	}
+
+	if !netDisabled {
+		// If /etc/resolv.conf and /etc/hosts exist, delete them so we
+		// will recreate. Only do this if we aren't sharing them with
+		// another container.
+		if c.config.NetNsCtr == "" {
+			if resolvePath, ok := c.state.BindMounts["/etc/resolv.conf"]; ok {
+				if err := os.Remove(resolvePath); err != nil && !os.IsNotExist(err) {
+					return fmt.Errorf("container %s: %w", c.ID(), err)
+				}
+				delete(c.state.BindMounts, "/etc/resolv.conf")
+			}
+			if hostsPath, ok := c.state.BindMounts["/etc/hosts"]; ok {
+				if err := os.Remove(hostsPath); err != nil && !os.IsNotExist(err) {
+					return fmt.Errorf("container %s: %w", c.ID(), err)
+				}
+				delete(c.state.BindMounts, "/etc/hosts")
+			}
+		}
+
+		if c.config.NetNsCtr != "" && (!c.config.UseImageResolvConf || !c.config.UseImageHosts) {
+			// We share a net namespace.
+			// We want /etc/resolv.conf and /etc/hosts from the
+			// other container. Unless we're not creating both of
+			// them.
+			depCtr, err := c.getRootNetNsDepCtr()
+			if err != nil {
+				return fmt.Errorf("error fetching network namespace dependency container for container %s: %w", c.ID(), err)
+			}
+
+			// We need that container's bind mounts
+			bindMounts, err := depCtr.BindMounts()
+			if err != nil {
+				return fmt.Errorf("error fetching bind mounts from dependency %s of container %s: %w", depCtr.ID(), c.ID(), err)
+			}
+
+			// The other container may not have a resolv.conf or /etc/hosts
+			// If it doesn't, don't copy them
+			resolvPath, exists := bindMounts["/etc/resolv.conf"]
+			if !c.config.UseImageResolvConf && exists {
+				err := c.mountIntoRootDirs("/etc/resolv.conf", resolvPath)
+
+				if err != nil {
+					return fmt.Errorf("error assigning mounts to container %s: %w", c.ID(), err)
+				}
+			}
+
+			// check if dependency container has an /etc/hosts file.
+			// It may not have one, so only use it if it does.
+			hostsPath, exists := bindMounts[config.DefaultHostsFile]
+			if !c.config.UseImageHosts && exists {
+				// we cannot use the dependency container lock due ABBA deadlocks in cleanup()
+				lock, err := lockfile.GetLockfile(hostsPath)
+				if err != nil {
+					return fmt.Errorf("failed to lock hosts file: %w", err)
+				}
+				lock.Lock()
+
+				// add the newly added container to the hosts file
+				// we always use 127.0.0.1 as ip since they have the same netns
+				err = etchosts.Add(hostsPath, getLocalhostHostEntry(c))
+				lock.Unlock()
+				if err != nil {
+					return fmt.Errorf("error creating hosts file for container %s which depends on container %s: %w", c.ID(), depCtr.ID(), err)
+				}
+
+				// finally, save it in the new container
+				err = c.mountIntoRootDirs(config.DefaultHostsFile, hostsPath)
+				if err != nil {
+					return fmt.Errorf("error assigning mounts to container %s: %w", c.ID(), err)
+				}
+			}
+
+			if !hasCurrentUserMapped(c) {
+				if err := makeAccessible(resolvPath, c.RootUID(), c.RootGID()); err != nil {
+					return err
+				}
+				if err := makeAccessible(hostsPath, c.RootUID(), c.RootGID()); err != nil {
+					return err
+				}
+			}
+		} else {
+			if !c.config.UseImageResolvConf {
+				if err := c.generateResolvConf(); err != nil {
+					return fmt.Errorf("error creating resolv.conf for container %s: %w", c.ID(), err)
+				}
+			}
+
+			if !c.config.UseImageHosts {
+				if err := c.createHosts(); err != nil {
+					return fmt.Errorf("error creating hosts file for container %s: %w", c.ID(), err)
+				}
+			}
+		}
+
+		if c.state.BindMounts["/etc/hosts"] != "" {
+			if err := c.relabel(c.state.BindMounts["/etc/hosts"], c.config.MountLabel, true); err != nil {
+				return err
+			}
+		}
+
+		if c.state.BindMounts["/etc/resolv.conf"] != "" {
+			if err := c.relabel(c.state.BindMounts["/etc/resolv.conf"], c.config.MountLabel, true); err != nil {
+				return err
+			}
+		}
+	} else if !c.config.UseImageHosts && c.state.BindMounts["/etc/hosts"] == "" {
+		if err := c.createHosts(); err != nil {
+			return fmt.Errorf("error creating hosts file for container %s: %w", c.ID(), err)
+		}
+	}
+
+	if c.config.ShmDir != "" {
+		// If ShmDir has a value SHM is always added when we mount the container
+		c.state.BindMounts["/dev/shm"] = c.config.ShmDir
+	}
+
+	if c.config.Passwd == nil || *c.config.Passwd {
+		newPasswd, newGroup, err := c.generatePasswdAndGroup()
+		if err != nil {
+			return fmt.Errorf("error creating temporary passwd file for container %s: %w", c.ID(), err)
+		}
+		if newPasswd != "" {
+			// Make /etc/passwd
+			// If it already exists, delete so we can recreate
+			delete(c.state.BindMounts, "/etc/passwd")
+			c.state.BindMounts["/etc/passwd"] = newPasswd
+		}
+		if newGroup != "" {
+			// Make /etc/group
+			// If it already exists, delete so we can recreate
+			delete(c.state.BindMounts, "/etc/group")
+			c.state.BindMounts["/etc/group"] = newGroup
+		}
+	}
+
+	// Make /etc/hostname
+	// This should never change, so no need to recreate if it exists
+	if _, ok := c.state.BindMounts["/etc/hostname"]; !ok {
+		hostnamePath, err := c.writeStringToRundir("hostname", c.Hostname())
+		if err != nil {
+			return fmt.Errorf("error creating hostname file for container %s: %w", c.ID(), err)
+		}
+		c.state.BindMounts["/etc/hostname"] = hostnamePath
+	}
+
+	// Make /etc/localtime
+	ctrTimezone := c.Timezone()
+	if ctrTimezone != "" {
+		// validate the format of the timezone specified if it's not "local"
+		if ctrTimezone != "local" {
+			_, err = time.LoadLocation(ctrTimezone)
+			if err != nil {
+				return fmt.Errorf("error finding timezone for container %s: %w", c.ID(), err)
+			}
+		}
+		if _, ok := c.state.BindMounts["/etc/localtime"]; !ok {
+			var zonePath string
+			if ctrTimezone == "local" {
+				zonePath, err = filepath.EvalSymlinks("/etc/localtime")
+				if err != nil {
+					return fmt.Errorf("error finding local timezone for container %s: %w", c.ID(), err)
+				}
+			} else {
+				zone := filepath.Join("/usr/share/zoneinfo", ctrTimezone)
+				zonePath, err = filepath.EvalSymlinks(zone)
+				if err != nil {
+					return fmt.Errorf("error setting timezone for container %s: %w", c.ID(), err)
+				}
+			}
+			localtimePath, err := c.copyTimezoneFile(zonePath)
+			if err != nil {
+				return fmt.Errorf("error setting timezone for container %s: %w", c.ID(), err)
+			}
+			c.state.BindMounts["/etc/localtime"] = localtimePath
+		}
+	}
+
+	_, hasRunContainerenv := c.state.BindMounts["/run/.containerenv"]
+	if !hasRunContainerenv {
+		// check in the spec mounts
+		for _, m := range c.config.Spec.Mounts {
+			if m.Destination == "/run/.containerenv" || m.Destination == "/run" {
+				hasRunContainerenv = true
+				break
+			}
+		}
+	}
+
+	// Make .containerenv if it does not exist
+	if !hasRunContainerenv {
+		containerenv := c.runtime.graphRootMountedFlag(c.config.Spec.Mounts)
+		isRootless := 0
+		if rootless.IsRootless() {
+			isRootless = 1
+		}
+		imageID, imageName := c.Image()
+
+		if c.Privileged() {
+			// Populate the .containerenv with container information
+			containerenv = fmt.Sprintf(`engine="podman-%s"
+name=%q
+id=%q
+image=%q
+imageid=%q
+rootless=%d
+%s`, version.Version.String(), c.Name(), c.ID(), imageName, imageID, isRootless, containerenv)
+		}
+		containerenvPath, err := c.writeStringToRundir(".containerenv", containerenv)
+		if err != nil {
+			return fmt.Errorf("error creating containerenv file for container %s: %w", c.ID(), err)
+		}
+		c.state.BindMounts["/run/.containerenv"] = containerenvPath
+	}
+
+	// Add Subscription Mounts
+	subscriptionMounts := subscriptions.MountsWithUIDGID(c.config.MountLabel, c.state.RunDir, c.runtime.config.Containers.DefaultMountsFile, c.state.Mountpoint, c.RootUID(), c.RootGID(), rootless.IsRootless(), false)
+	for _, mount := range subscriptionMounts {
+		if _, ok := c.state.BindMounts[mount.Destination]; !ok {
+			c.state.BindMounts[mount.Destination] = mount.Source
+		}
+	}
+
+	// Secrets are mounted by getting the secret data from the secrets manager,
+	// copying the data into the container's static dir,
+	// then mounting the copied dir into /run/secrets.
+	// The secrets mounting must come after subscription mounts, since subscription mounts
+	// creates the /run/secrets dir in the container where we mount as well.
+	if len(c.Secrets()) > 0 {
+		// create /run/secrets if subscriptions did not create
+		if err := c.createSecretMountDir(); err != nil {
+			return fmt.Errorf("error creating secrets mount: %w", err)
+		}
+		for _, secret := range c.Secrets() {
+			secretFileName := secret.Name
+			base := "/run/secrets"
+			if secret.Target != "" {
+				secretFileName = secret.Target
+				// If absolute path for target given remove base.
+				if filepath.IsAbs(secretFileName) {
+					base = ""
+				}
+			}
+			src := filepath.Join(c.config.SecretsPath, secret.Name)
+			dest := filepath.Join(base, secretFileName)
+			c.state.BindMounts[dest] = src
+		}
+	}
+
+	return nil
+}
+
+// generateResolvConf generates a containers resolv.conf
+func (c *Container) generateResolvConf() error {
+	var (
+		networkNameServers   []string
+		networkSearchDomains []string
+	)
+
+	netStatus := c.getNetworkStatus()
+	for _, status := range netStatus {
+		if status.DNSServerIPs != nil {
+			for _, nsIP := range status.DNSServerIPs {
+				networkNameServers = append(networkNameServers, nsIP.String())
+			}
+			logrus.Debugf("Adding nameserver(s) from network status of '%q'", status.DNSServerIPs)
+		}
+		if status.DNSSearchDomains != nil {
+			networkSearchDomains = append(networkSearchDomains, status.DNSSearchDomains...)
+			logrus.Debugf("Adding search domain(s) from network status of '%q'", status.DNSSearchDomains)
+		}
+	}
+
+	ipv6, err := c.checkForIPv6(netStatus)
+	if err != nil {
+		return err
+	}
+
+	nameservers := make([]string, 0, len(c.runtime.config.Containers.DNSServers)+len(c.config.DNSServer))
+	nameservers = append(nameservers, c.runtime.config.Containers.DNSServers...)
+	for _, ip := range c.config.DNSServer {
+		nameservers = append(nameservers, ip.String())
+	}
+	// If the user provided dns, it trumps all; then dns masq; then resolv.conf
+	var search []string
+	keepHostServers := false
+	if len(nameservers) == 0 {
+		keepHostServers = true
+		// first add the nameservers from the networks status
+		nameservers = networkNameServers
+		// when we add network dns server we also have to add the search domains
+		search = networkSearchDomains
+		// slirp4netns has a built in DNS forwarder.
+		nameservers = c.addSlirp4netnsDNS(nameservers)
+	}
+
+	if len(c.config.DNSSearch) > 0 || len(c.runtime.config.Containers.DNSSearches) > 0 {
+		customSearch := make([]string, 0, len(c.config.DNSSearch)+len(c.runtime.config.Containers.DNSSearches))
+		customSearch = append(customSearch, c.runtime.config.Containers.DNSSearches...)
+		customSearch = append(customSearch, c.config.DNSSearch...)
+		search = customSearch
+	}
+
+	options := make([]string, 0, len(c.config.DNSOption)+len(c.runtime.config.Containers.DNSOptions))
+	options = append(options, c.runtime.config.Containers.DNSOptions...)
+	options = append(options, c.config.DNSOption...)
+
+	destPath := filepath.Join(c.state.RunDir, "resolv.conf")
+
+	if err := resolvconf.New(&resolvconf.Params{
+		IPv6Enabled:     ipv6,
+		KeepHostServers: keepHostServers,
+		Nameservers:     nameservers,
+		Namespaces:      c.config.Spec.Linux.Namespaces,
+		Options:         options,
+		Path:            destPath,
+		Searches:        search,
+	}); err != nil {
+		return fmt.Errorf("error building resolv.conf for container %s: %w", c.ID(), err)
+	}
+
+	return c.bindMountRootFile(destPath, resolvconf.DefaultResolvConf)
+}
+
+// Check if a container uses IPv6.
+func (c *Container) checkForIPv6(netStatus map[string]types.StatusBlock) (bool, error) {
+	for _, status := range netStatus {
+		for _, netInt := range status.Interfaces {
+			for _, netAddress := range netInt.Subnets {
+				// Note: only using To16() does not work since it also returns a valid ip for ipv4
+				if netAddress.IPNet.IP.To4() == nil && netAddress.IPNet.IP.To16() != nil {
+					return true, nil
+				}
+			}
+		}
+	}
+
+	return c.isSlirp4netnsIPv6()
+}
+
+// Add a new nameserver to the container's resolv.conf, ensuring that it is the
+// first nameserver present.
+// Usable only with running containers.
+func (c *Container) addNameserver(ips []string) error {
+	// Take no action if container is not running.
+	if !c.ensureState(define.ContainerStateRunning, define.ContainerStateCreated) {
+		return nil
+	}
+
+	// Do we have a resolv.conf at all?
+	path, ok := c.state.BindMounts[resolvconf.DefaultResolvConf]
+	if !ok {
+		return nil
+	}
+
+	if err := resolvconf.Add(path, ips); err != nil {
+		return fmt.Errorf("adding new nameserver to container %s resolv.conf: %w", c.ID(), err)
+	}
+
+	return nil
+}
+
+// Remove an entry from the existing resolv.conf of the container.
+// Usable only with running containers.
+func (c *Container) removeNameserver(ips []string) error {
+	// Take no action if container is not running.
+	if !c.ensureState(define.ContainerStateRunning, define.ContainerStateCreated) {
+		return nil
+	}
+
+	// Do we have a resolv.conf at all?
+	path, ok := c.state.BindMounts[resolvconf.DefaultResolvConf]
+	if !ok {
+		return nil
+	}
+
+	if err := resolvconf.Remove(path, ips); err != nil {
+		return fmt.Errorf("removing nameservers from container %s resolv.conf: %w", c.ID(), err)
+	}
+
+	return nil
+}
+
+func getLocalhostHostEntry(c *Container) etchosts.HostEntries {
+	return etchosts.HostEntries{{IP: "127.0.0.1", Names: []string{c.Hostname(), c.config.Name}}}
+}
+
+// getHostsEntries returns the container ip host entries for the correct netmode
+func (c *Container) getHostsEntries() (etchosts.HostEntries, error) {
+	var entries etchosts.HostEntries
+	names := []string{c.Hostname(), c.config.Name}
+	switch {
+	case c.config.NetMode.IsBridge():
+		entries = etchosts.GetNetworkHostEntries(c.state.NetworkStatus, names...)
+	case c.config.NetMode.IsSlirp4netns():
+		ip, err := GetSlirp4netnsIP(c.slirp4netnsSubnet)
+		if err != nil {
+			return nil, err
+		}
+		entries = etchosts.HostEntries{{IP: ip.String(), Names: names}}
+	default:
+		if c.hasNetNone() {
+			entries = etchosts.HostEntries{{IP: "127.0.0.1", Names: names}}
+		}
+	}
+	return entries, nil
+}
+
+func (c *Container) createHosts() error {
+	var containerIPsEntries etchosts.HostEntries
+	var err error
+	// if we configure the netns after the container create we should not add
+	// the hosts here since we have no information about the actual ips
+	// instead we will add them in c.completeNetworkSetup()
+	if !c.config.PostConfigureNetNS {
+		containerIPsEntries, err = c.getHostsEntries()
+		if err != nil {
+			return fmt.Errorf("failed to get container ip host entries: %w", err)
+		}
+	}
+	baseHostFile, err := etchosts.GetBaseHostFile(c.runtime.config.Containers.BaseHostsFile, c.state.Mountpoint)
+	if err != nil {
+		return err
+	}
+
+	targetFile := filepath.Join(c.state.RunDir, "hosts")
+	err = etchosts.New(&etchosts.Params{
+		BaseFile:                 baseHostFile,
+		ExtraHosts:               c.config.HostAdd,
+		ContainerIPs:             containerIPsEntries,
+		HostContainersInternalIP: etchosts.GetHostContainersInternalIP(c.runtime.config, c.state.NetworkStatus, c.runtime.network),
+		TargetFile:               targetFile,
+	})
+	if err != nil {
+		return err
+	}
+
+	return c.bindMountRootFile(targetFile, config.DefaultHostsFile)
+}
+
+// bindMountRootFile will chown and relabel the source file to make it usable in the container.
+// It will also add the path to the container bind mount map.
+// source is the path on the host, dest is the path in the container.
+func (c *Container) bindMountRootFile(source, dest string) error {
+	if err := os.Chown(source, c.RootUID(), c.RootGID()); err != nil {
+		return err
+	}
+	if err := label.Relabel(source, c.MountLabel(), false); err != nil {
+		return err
+	}
+
+	return c.mountIntoRootDirs(dest, source)
+}
+
+// generateGroupEntry generates an entry or entries into /etc/group as
+// required by container configuration.
+// Generally speaking, we will make an entry under two circumstances:
+//  1. The container is started as a specific user:group, and that group is both
+//     numeric, and does not already exist in /etc/group.
+//  2. It is requested that Libpod add the group that launched Podman to
+//     /etc/group via AddCurrentUserPasswdEntry (though this does not trigger if
+//     the group in question already exists in /etc/passwd).
+//
+// Returns group entry (as a string that can be appended to /etc/group) and any
+// error that occurred.
+func (c *Container) generateGroupEntry() (string, error) {
+	groupString := ""
+
+	// Things we *can't* handle: adding the user we added in
+	// generatePasswdEntry to any *existing* groups.
+	addedGID := 0
+	if c.config.AddCurrentUserPasswdEntry {
+		entry, gid, err := c.generateCurrentUserGroupEntry()
+		if err != nil {
+			return "", err
+		}
+		groupString += entry
+		addedGID = gid
+	}
+	if c.config.User != "" {
+		entry, err := c.generateUserGroupEntry(addedGID)
+		if err != nil {
+			return "", err
+		}
+		groupString += entry
+	}
+
+	return groupString, nil
+}
+
+// Make an entry in /etc/group for the group of the user running podman iff we
+// are rootless.
+func (c *Container) generateCurrentUserGroupEntry() (string, int, error) {
+	gid := rootless.GetRootlessGID()
+	if gid == 0 {
+		return "", 0, nil
+	}
+
+	g, err := user.LookupGroupId(strconv.Itoa(gid))
+	if err != nil {
+		return "", 0, fmt.Errorf("failed to get current group: %w", err)
+	}
+
+	// Look up group name to see if it exists in the image.
+	_, err = lookup.GetGroup(c.state.Mountpoint, g.Name)
+	if err != runcuser.ErrNoGroupEntries {
+		return "", 0, err
+	}
+
+	// Look up GID to see if it exists in the image.
+	_, err = lookup.GetGroup(c.state.Mountpoint, g.Gid)
+	if err != runcuser.ErrNoGroupEntries {
+		return "", 0, err
+	}
+
+	// We need to get the username of the rootless user so we can add it to
+	// the group.
+	username := ""
+	uid := rootless.GetRootlessUID()
+	if uid != 0 {
+		u, err := user.LookupId(strconv.Itoa(uid))
+		if err != nil {
+			return "", 0, fmt.Errorf("failed to get current user to make group entry: %w", err)
+		}
+		username = u.Username
+	}
+
+	// Make the entry.
+	return fmt.Sprintf("%s:x:%s:%s\n", g.Name, g.Gid, username), gid, nil
+}
+
+// Make an entry in /etc/group for the group the container was specified to run
+// as.
+func (c *Container) generateUserGroupEntry(addedGID int) (string, error) {
+	if c.config.User == "" {
+		return "", nil
+	}
+
+	splitUser := strings.SplitN(c.config.User, ":", 2)
+	group := splitUser[0]
+	if len(splitUser) > 1 {
+		group = splitUser[1]
+	}
+
+	gid, err := strconv.ParseUint(group, 10, 32)
+	if err != nil {
+		return "", nil //nolint: nilerr
+	}
+
+	if addedGID != 0 && addedGID == int(gid) {
+		return "", nil
+	}
+
+	// Check if the group already exists
+	_, err = lookup.GetGroup(c.state.Mountpoint, group)
+	if err != runcuser.ErrNoGroupEntries {
+		return "", err
+	}
+
+	return fmt.Sprintf("%d:x:%d:%s\n", gid, gid, splitUser[0]), nil
+}
+
+// generatePasswdEntry generates an entry or entries into /etc/passwd as
+// required by container configuration.
+// Generally speaking, we will make an entry under two circumstances:
+//  1. The container is started as a specific user who is not in /etc/passwd.
+//     This only triggers if the user is given as a *numeric* ID.
+//  2. It is requested that Libpod add the user that launched Podman to
+//     /etc/passwd via AddCurrentUserPasswdEntry (though this does not trigger if
+//     the user in question already exists in /etc/passwd) or the UID to be added
+//     is 0).
+//  3. The user specified additional host user accounts to add the the /etc/passwd file
+//
+// Returns password entry (as a string that can be appended to /etc/passwd) and
+// any error that occurred.
+func (c *Container) generatePasswdEntry() (string, error) {
+	passwdString := ""
+
+	addedUID := 0
+	for _, userid := range c.config.HostUsers {
+		// Look up User on host
+		u, err := util.LookupUser(userid)
+		if err != nil {
+			return "", err
+		}
+		entry, err := c.userPasswdEntry(u)
+		if err != nil {
+			return "", err
+		}
+		passwdString += entry
+	}
+	if c.config.AddCurrentUserPasswdEntry {
+		entry, uid, _, err := c.generateCurrentUserPasswdEntry()
+		if err != nil {
+			return "", err
+		}
+		passwdString += entry
+		addedUID = uid
+	}
+	if c.config.User != "" {
+		entry, err := c.generateUserPasswdEntry(addedUID)
+		if err != nil {
+			return "", err
+		}
+		passwdString += entry
+	}
+
+	return passwdString, nil
+}
+
+// generateCurrentUserPasswdEntry generates an /etc/passwd entry for the user
+// running the container engine.
+// Returns a passwd entry for the user, and the UID and GID of the added entry.
+func (c *Container) generateCurrentUserPasswdEntry() (string, int, int, error) {
+	uid := rootless.GetRootlessUID()
+	if uid == 0 {
+		return "", 0, 0, nil
+	}
+
+	u, err := user.LookupId(strconv.Itoa(uid))
+	if err != nil {
+		return "", 0, 0, fmt.Errorf("failed to get current user: %w", err)
+	}
+	pwd, err := c.userPasswdEntry(u)
+	if err != nil {
+		return "", 0, 0, err
+	}
+
+	return pwd, uid, rootless.GetRootlessGID(), nil
+}
+
+func (c *Container) userPasswdEntry(u *user.User) (string, error) {
+	// Look up the user to see if it exists in the container image.
+	_, err := lookup.GetUser(c.state.Mountpoint, u.Username)
+	if err != runcuser.ErrNoPasswdEntries {
+		return "", err
+	}
+
+	// Look up the UID to see if it exists in the container image.
+	_, err = lookup.GetUser(c.state.Mountpoint, u.Uid)
+	if err != runcuser.ErrNoPasswdEntries {
+		return "", err
+	}
+
+	// If the user's actual home directory exists, or was mounted in - use
+	// that.
+	homeDir := c.WorkingDir()
+	hDir := u.HomeDir
+	for hDir != "/" {
+		if MountExists(c.config.Spec.Mounts, hDir) {
+			homeDir = u.HomeDir
+			break
+		}
+		hDir = filepath.Dir(hDir)
+	}
+	if homeDir != u.HomeDir {
+		for _, hDir := range c.UserVolumes() {
+			if hDir == u.HomeDir {
+				homeDir = u.HomeDir
+				break
+			}
+		}
+	}
+	// Set HOME environment if not already set
+	hasHomeSet := false
+	for _, s := range c.config.Spec.Process.Env {
+		if strings.HasPrefix(s, "HOME=") {
+			hasHomeSet = true
+			break
+		}
+	}
+	if !hasHomeSet {
+		c.config.Spec.Process.Env = append(c.config.Spec.Process.Env, fmt.Sprintf("HOME=%s", homeDir))
+	}
+	if c.config.PasswdEntry != "" {
+		return c.passwdEntry(u.Username, u.Uid, u.Gid, u.Name, homeDir), nil
+	}
+
+	return fmt.Sprintf("%s:*:%s:%s:%s:%s:/bin/sh\n", u.Username, u.Uid, u.Gid, u.Name, homeDir), nil
+}
+
+// generateUserPasswdEntry generates an /etc/passwd entry for the container user
+// to run in the container.
+// The UID and GID of the added entry will also be returned.
+// Accepts one argument, that being any UID that has already been added to the
+// passwd file by other functions; if it matches the UID we were given, we don't
+// need to do anything.
+func (c *Container) generateUserPasswdEntry(addedUID int) (string, error) {
+	var (
+		groupspec string
+		gid       int
+	)
+	if c.config.User == "" {
+		return "", nil
+	}
+	splitSpec := strings.SplitN(c.config.User, ":", 2)
+	userspec := splitSpec[0]
+	if len(splitSpec) > 1 {
+		groupspec = splitSpec[1]
+	}
+	// If a non numeric User, then don't generate passwd
+	uid, err := strconv.ParseUint(userspec, 10, 32)
+	if err != nil {
+		return "", nil //nolint: nilerr
+	}
+
+	if addedUID != 0 && int(uid) == addedUID {
+		return "", nil
+	}
+
+	// Look up the user to see if it exists in the container image
+	_, err = lookup.GetUser(c.state.Mountpoint, userspec)
+	if err != runcuser.ErrNoPasswdEntries {
+		return "", err
+	}
+
+	if groupspec != "" {
+		ugid, err := strconv.ParseUint(groupspec, 10, 32)
+		if err == nil {
+			gid = int(ugid)
+		} else {
+			group, err := lookup.GetGroup(c.state.Mountpoint, groupspec)
+			if err != nil {
+				return "", fmt.Errorf("unable to get gid %s from group file: %w", groupspec, err)
+			}
+			gid = group.Gid
+		}
+	}
+
+	if c.config.PasswdEntry != "" {
+		entry := c.passwdEntry(fmt.Sprintf("%d", uid), fmt.Sprintf("%d", uid), fmt.Sprintf("%d", gid), "container user", c.WorkingDir())
+		return entry, nil
+	}
+
+	return fmt.Sprintf("%d:*:%d:%d:container user:%s:/bin/sh\n", uid, uid, gid, c.WorkingDir()), nil
+}
+
+func (c *Container) passwdEntry(username string, uid, gid, name, homeDir string) string {
+	s := c.config.PasswdEntry
+	s = strings.ReplaceAll(s, "$USERNAME", username)
+	s = strings.ReplaceAll(s, "$UID", uid)
+	s = strings.ReplaceAll(s, "$GID", gid)
+	s = strings.ReplaceAll(s, "$NAME", name)
+	s = strings.ReplaceAll(s, "$HOME", homeDir)
+	return s + "\n"
+}
+
+// generatePasswdAndGroup generates container-specific passwd and group files
+// iff g.config.User is a number or we are configured to make a passwd entry for
+// the current user or the user specified HostsUsers
+// Returns path to file to mount at /etc/passwd, path to file to mount at
+// /etc/group, and any error that occurred. If no passwd/group file were
+// required, the empty string will be returned for those path (this may occur
+// even if no error happened).
+// This may modify the mounted container's /etc/passwd and /etc/group instead of
+// making copies to bind-mount in, so we don't break useradd (it wants to make a
+// copy of /etc/passwd and rename the copy to /etc/passwd, which is impossible
+// with a bind mount). This is done in cases where the container is *not*
+// read-only. In this case, the function will return nothing ("", "", nil).
+func (c *Container) generatePasswdAndGroup() (string, string, error) {
+	if !c.config.AddCurrentUserPasswdEntry && c.config.User == "" &&
+		len(c.config.HostUsers) == 0 {
+		return "", "", nil
+	}
+
+	needPasswd := true
+	needGroup := true
+
+	// First, check if there's a mount at /etc/passwd or group, we don't
+	// want to interfere with user mounts.
+	if MountExists(c.config.Spec.Mounts, "/etc/passwd") {
+		needPasswd = false
+	}
+	if MountExists(c.config.Spec.Mounts, "/etc/group") {
+		needGroup = false
+	}
+
+	// Next, check if we already made the files. If we didn't, don't need to
+	// do anything more.
+	if needPasswd {
+		passwdPath := filepath.Join(c.config.StaticDir, "passwd")
+		if _, err := os.Stat(passwdPath); err == nil {
+			needPasswd = false
+		}
+	}
+	if needGroup {
+		groupPath := filepath.Join(c.config.StaticDir, "group")
+		if _, err := os.Stat(groupPath); err == nil {
+			needGroup = false
+		}
+	}
+
+	// If we don't need a /etc/passwd or /etc/group at this point we can
+	// just return.
+	if !needPasswd && !needGroup {
+		return "", "", nil
+	}
+
+	passwdPath := ""
+	groupPath := ""
+
+	ro := c.IsReadOnly()
+
+	if needPasswd {
+		passwdEntry, err := c.generatePasswdEntry()
+		if err != nil {
+			return "", "", err
+		}
+
+		needsWrite := passwdEntry != ""
+		switch {
+		case ro && needsWrite:
+			logrus.Debugf("Making /etc/passwd for container %s", c.ID())
+			originPasswdFile, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/passwd")
+			if err != nil {
+				return "", "", fmt.Errorf("error creating path to container %s /etc/passwd: %w", c.ID(), err)
+			}
+			orig, err := ioutil.ReadFile(originPasswdFile)
+			if err != nil && !os.IsNotExist(err) {
+				return "", "", err
+			}
+			passwdFile, err := c.writeStringToStaticDir("passwd", string(orig)+passwdEntry)
+			if err != nil {
+				return "", "", fmt.Errorf("failed to create temporary passwd file: %w", err)
+			}
+			if err := os.Chmod(passwdFile, 0644); err != nil {
+				return "", "", err
+			}
+			passwdPath = passwdFile
+		case !ro && needsWrite:
+			logrus.Debugf("Modifying container %s /etc/passwd", c.ID())
+			containerPasswd, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/passwd")
+			if err != nil {
+				return "", "", fmt.Errorf("error looking up location of container %s /etc/passwd: %w", c.ID(), err)
+			}
+
+			f, err := os.OpenFile(containerPasswd, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0600)
+			if err != nil {
+				return "", "", fmt.Errorf("container %s: %w", c.ID(), err)
+			}
+			defer f.Close()
+
+			if _, err := f.WriteString(passwdEntry); err != nil {
+				return "", "", fmt.Errorf("unable to append to container %s /etc/passwd: %w", c.ID(), err)
+			}
+		default:
+			logrus.Debugf("Not modifying container %s /etc/passwd", c.ID())
+		}
+	}
+	if needGroup {
+		groupEntry, err := c.generateGroupEntry()
+		if err != nil {
+			return "", "", err
+		}
+
+		needsWrite := groupEntry != ""
+		switch {
+		case ro && needsWrite:
+			logrus.Debugf("Making /etc/group for container %s", c.ID())
+			originGroupFile, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/group")
+			if err != nil {
+				return "", "", fmt.Errorf("error creating path to container %s /etc/group: %w", c.ID(), err)
+			}
+			orig, err := ioutil.ReadFile(originGroupFile)
+			if err != nil && !os.IsNotExist(err) {
+				return "", "", err
+			}
+			groupFile, err := c.writeStringToStaticDir("group", string(orig)+groupEntry)
+			if err != nil {
+				return "", "", fmt.Errorf("failed to create temporary group file: %w", err)
+			}
+			if err := os.Chmod(groupFile, 0644); err != nil {
+				return "", "", err
+			}
+			groupPath = groupFile
+		case !ro && needsWrite:
+			logrus.Debugf("Modifying container %s /etc/group", c.ID())
+			containerGroup, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/group")
+			if err != nil {
+				return "", "", fmt.Errorf("error looking up location of container %s /etc/group: %w", c.ID(), err)
+			}
+
+			f, err := os.OpenFile(containerGroup, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0600)
+			if err != nil {
+				return "", "", fmt.Errorf("container %s: %w", c.ID(), err)
+			}
+			defer f.Close()
+
+			if _, err := f.WriteString(groupEntry); err != nil {
+				return "", "", fmt.Errorf("unable to append to container %s /etc/group: %w", c.ID(), err)
+			}
+		default:
+			logrus.Debugf("Not modifying container %s /etc/group", c.ID())
+		}
+	}
+
+	return passwdPath, groupPath, nil
+}
+
+func (c *Container) copyTimezoneFile(zonePath string) (string, error) {
+	localtimeCopy := filepath.Join(c.state.RunDir, "localtime")
+	file, err := os.Stat(zonePath)
+	if err != nil {
+		return "", err
+	}
+	if file.IsDir() {
+		return "", errors.New("invalid timezone: is a directory")
+	}
+	src, err := os.Open(zonePath)
+	if err != nil {
+		return "", err
+	}
+	defer src.Close()
+	dest, err := os.Create(localtimeCopy)
+	if err != nil {
+		return "", err
+	}
+	defer dest.Close()
+	_, err = io.Copy(dest, src)
+	if err != nil {
+		return "", err
+	}
+	if err := c.relabel(localtimeCopy, c.config.MountLabel, false); err != nil {
+		return "", err
+	}
+	if err := dest.Chown(c.RootUID(), c.RootGID()); err != nil {
+		return "", err
+	}
+	return localtimeCopy, err
+}
+
+func (c *Container) cleanupOverlayMounts() error {
+	return overlay.CleanupContent(c.config.StaticDir)
+}
+
+// Creates and mounts an empty dir to mount secrets into, if it does not already exist
+func (c *Container) createSecretMountDir() error {
+	src := filepath.Join(c.state.RunDir, "/run/secrets")
+	_, err := os.Stat(src)
+	if os.IsNotExist(err) {
+		oldUmask := umask.Set(0)
+		defer umask.Set(oldUmask)
+
+		if err := os.MkdirAll(src, 0755); err != nil {
+			return err
+		}
+		if err := label.Relabel(src, c.config.MountLabel, false); err != nil {
+			return err
+		}
+		if err := os.Chown(src, c.RootUID(), c.RootGID()); err != nil {
+			return err
+		}
+		c.state.BindMounts["/run/secrets"] = src
+		return nil
+	}
+
+	return err
+}
+
+// Fix ownership and permissions of the specified volume if necessary.
+func (c *Container) fixVolumePermissions(v *ContainerNamedVolume) error {
+	vol, err := c.runtime.state.Volume(v.Name)
+	if err != nil {
+		return fmt.Errorf("error retrieving named volume %s for container %s: %w", v.Name, c.ID(), err)
+	}
+
+	vol.lock.Lock()
+	defer vol.lock.Unlock()
+
+	// The volume may need a copy-up. Check the state.
+	if err := vol.update(); err != nil {
+		return err
+	}
+
+	// Volumes owned by a volume driver are not chowned - we don't want to
+	// mess with a mount not managed by us.
+	if vol.state.NeedsChown && !vol.UsesVolumeDriver() {
+		vol.state.NeedsChown = false
+
+		uid := int(c.config.Spec.Process.User.UID)
+		gid := int(c.config.Spec.Process.User.GID)
+
+		if c.config.IDMappings.UIDMap != nil {
+			p := idtools.IDPair{
+				UID: uid,
+				GID: gid,
+			}
+			mappings := idtools.NewIDMappingsFromMaps(c.config.IDMappings.UIDMap, c.config.IDMappings.GIDMap)
+			newPair, err := mappings.ToHost(p)
+			if err != nil {
+				return fmt.Errorf("error mapping user %d:%d: %w", uid, gid, err)
+			}
+			uid = newPair.UID
+			gid = newPair.GID
+		}
+
+		vol.state.UIDChowned = uid
+		vol.state.GIDChowned = gid
+
+		if err := vol.save(); err != nil {
+			return err
+		}
+
+		mountPoint, err := vol.MountPoint()
+		if err != nil {
+			return err
+		}
+
+		if err := os.Lchown(mountPoint, uid, gid); err != nil {
+			return err
+		}
+
+		// Make sure the new volume matches the permissions of the target directory.
+		// https://github.com/containers/podman/issues/10188
+		st, err := os.Lstat(filepath.Join(c.state.Mountpoint, v.Dest))
+		if err == nil {
+			if stat, ok := st.Sys().(*syscall.Stat_t); ok {
+				if err := os.Lchown(mountPoint, int(stat.Uid), int(stat.Gid)); err != nil {
+					return err
+				}
+			}
+			if err := os.Chmod(mountPoint, st.Mode()); err != nil {
+				return err
+			}
+			if err := setVolumeAtime(mountPoint, st); err != nil {
+				return err
+			}
+		} else if !os.IsNotExist(err) {
+			return err
+		}
+	}
+	return nil
+}
+
+func (c *Container) relabel(src, mountLabel string, recurse bool) error {
+	if !selinux.GetEnabled() || mountLabel == "" {
+		return nil
+	}
+	// only relabel on initial creation of container
+	if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateUnknown) {
+		label, err := label.FileLabel(src)
+		if err != nil {
+			return err
+		}
+		// If labels are different, might be on a tmpfs
+		if label == mountLabel {
+			return nil
+		}
+	}
+	return label.Relabel(src, mountLabel, recurse)
+}
+
+func (c *Container) ChangeHostPathOwnership(src string, recurse bool, uid, gid int) error {
+	// only chown on initial creation of container
+	if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateUnknown) {
+		st, err := os.Stat(src)
+		if err != nil {
+			return err
+		}
+
+		// If labels are different, might be on a tmpfs
+		if int(st.Sys().(*syscall.Stat_t).Uid) == uid && int(st.Sys().(*syscall.Stat_t).Gid) == gid {
+			return nil
+		}
+	}
+	return chown.ChangeHostPathOwnership(src, recurse, uid, gid)
+}
diff --git a/libpod/container_internal_freebsd.go b/libpod/container_internal_freebsd.go
new file mode 100644
index 000000000..40c6c5ebf
--- /dev/null
+++ b/libpod/container_internal_freebsd.go
@@ -0,0 +1,285 @@
+//go:build freebsd
+// +build freebsd
+
+package libpod
+
+import (
+	"errors"
+	"fmt"
+	"os"
+	"strings"
+	"sync"
+	"syscall"
+	"time"
+
+	"github.com/containers/common/libnetwork/types"
+	"github.com/containers/podman/v4/pkg/rootless"
+	spec "github.com/opencontainers/runtime-spec/specs-go"
+	"github.com/opencontainers/runtime-tools/generate"
+	"github.com/sirupsen/logrus"
+	"golang.org/x/sys/unix"
+)
+
+var (
+	bindOptions = []string{}
+)
+
+// Network stubs to decouple container_internal_freebsd.go from
+// networking_freebsd.go so they can be reviewed separately.
+func (r *Runtime) createNetNS(ctr *Container) (netJail string, q map[string]types.StatusBlock, retErr error) {
+	return "", nil, errors.New("not implemented (*Runtime) createNetNS")
+}
+
+func (r *Runtime) teardownNetNS(ctr *Container) error {
+	return errors.New("not implemented (*Runtime) teardownNetNS")
+}
+
+func (r *Runtime) reloadContainerNetwork(ctr *Container) (map[string]types.StatusBlock, error) {
+	return nil, errors.New("not implemented (*Runtime) reloadContainerNetwork")
+}
+
+func (c *Container) mountSHM(shmOptions string) error {
+	return nil
+}
+
+func (c *Container) unmountSHM(path string) error {
+	return nil
+}
+
+// prepare mounts the container and sets up other required resources like net
+// namespaces
+func (c *Container) prepare() error {
+	var (
+		wg                              sync.WaitGroup
+		jailName                        string
+		networkStatus                   map[string]types.StatusBlock
+		createNetNSErr, mountStorageErr error
+		mountPoint                      string
+		tmpStateLock                    sync.Mutex
+	)
+
+	wg.Add(2)
+
+	go func() {
+		defer wg.Done()
+		// Set up network namespace if not already set up
+		noNetNS := c.state.NetworkJail == ""
+		if c.config.CreateNetNS && noNetNS && !c.config.PostConfigureNetNS {
+			jailName, networkStatus, createNetNSErr = c.runtime.createNetNS(c)
+			if createNetNSErr != nil {
+				return
+			}
+
+			tmpStateLock.Lock()
+			defer tmpStateLock.Unlock()
+
+			// Assign NetNS attributes to container
+			c.state.NetworkJail = jailName
+			c.state.NetworkStatus = networkStatus
+		}
+	}()
+	// Mount storage if not mounted
+	go func() {
+		defer wg.Done()
+		mountPoint, mountStorageErr = c.mountStorage()
+
+		if mountStorageErr != nil {
+			return
+		}
+
+		tmpStateLock.Lock()
+		defer tmpStateLock.Unlock()
+
+		// Finish up mountStorage
+		c.state.Mounted = true
+		c.state.Mountpoint = mountPoint
+
+		logrus.Debugf("Created root filesystem for container %s at %s", c.ID(), c.state.Mountpoint)
+	}()
+
+	wg.Wait()
+
+	var createErr error
+	if mountStorageErr != nil {
+		if createErr != nil {
+			logrus.Errorf("Preparing container %s: %v", c.ID(), createErr)
+		}
+		createErr = mountStorageErr
+	}
+
+	if createErr != nil {
+		return createErr
+	}
+
+	// Save changes to container state
+	if err := c.save(); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// cleanupNetwork unmounts and cleans up the container's network
+func (c *Container) cleanupNetwork() error {
+	if c.config.NetNsCtr != "" {
+		return nil
+	}
+	netDisabled, err := c.NetworkDisabled()
+	if err != nil {
+		return err
+	}
+	if netDisabled {
+		return nil
+	}
+
+	// Stop the container's network namespace (if it has one)
+	if err := c.runtime.teardownNetNS(c); err != nil {
+		logrus.Errorf("Unable to cleanup network for container %s: %q", c.ID(), err)
+	}
+
+	if c.valid {
+		return c.save()
+	}
+
+	return nil
+}
+
+// reloadNetwork reloads the network for the given container, recreating
+// firewall rules.
+func (c *Container) reloadNetwork() error {
+	result, err := c.runtime.reloadContainerNetwork(c)
+	if err != nil {
+		return err
+	}
+
+	c.state.NetworkStatus = result
+
+	return c.save()
+}
+
+// Add an existing container's network jail
+func (c *Container) addNetworkContainer(g *generate.Generator, ctr string) error {
+	nsCtr, err := c.runtime.state.Container(ctr)
+	c.runtime.state.UpdateContainer(nsCtr)
+	if err != nil {
+		return fmt.Errorf("error retrieving dependency %s of container %s from state: %w", ctr, c.ID(), err)
+	}
+	g.AddAnnotation("org.freebsd.parentJail", nsCtr.state.NetworkJail)
+	return nil
+}
+
+func isRootlessCgroupSet(cgroup string) bool {
+	return false
+}
+
+func (c *Container) expectPodCgroup() (bool, error) {
+	return false, nil
+}
+
+func (c *Container) getOCICgroupPath() (string, error) {
+	return "", nil
+}
+
+func openDirectory(path string) (fd int, err error) {
+	const O_PATH = 0x00400000
+	return unix.Open(path, unix.O_RDONLY|O_PATH, 0)
+}
+
+func (c *Container) addNetworkNamespace(g *generate.Generator) error {
+	if c.config.CreateNetNS {
+		g.AddAnnotation("org.freebsd.parentJail", c.state.NetworkJail)
+	}
+	return nil
+}
+
+func (c *Container) addSystemdMounts(g *generate.Generator) error {
+	return nil
+}
+
+func (c *Container) addSharedNamespaces(g *generate.Generator) error {
+	if c.config.NetNsCtr != "" {
+		if err := c.addNetworkContainer(g, c.config.NetNsCtr); err != nil {
+			return err
+		}
+	}
+
+	availableUIDs, availableGIDs, err := rootless.GetAvailableIDMaps()
+	if err != nil {
+		if os.IsNotExist(err) {
+			// The kernel-provided files only exist if user namespaces are supported
+			logrus.Debugf("User or group ID mappings not available: %s", err)
+		} else {
+			return err
+		}
+	} else {
+		g.Config.Linux.UIDMappings = rootless.MaybeSplitMappings(g.Config.Linux.UIDMappings, availableUIDs)
+		g.Config.Linux.GIDMappings = rootless.MaybeSplitMappings(g.Config.Linux.GIDMappings, availableGIDs)
+	}
+
+	// Hostname handling:
+	// If we have a UTS namespace, set Hostname in the OCI spec.
+	// Set the HOSTNAME environment variable unless explicitly overridden by
+	// the user (already present in OCI spec). If we don't have a UTS ns,
+	// set it to the host's hostname instead.
+	hostname := c.Hostname()
+	foundUTS := false
+
+	// TODO: make this optional, needs progress on adding FreeBSD section to the spec
+	foundUTS = true
+	g.SetHostname(hostname)
+
+	if !foundUTS {
+		tmpHostname, err := os.Hostname()
+		if err != nil {
+			return err
+		}
+		hostname = tmpHostname
+	}
+	needEnv := true
+	for _, checkEnv := range g.Config.Process.Env {
+		if strings.SplitN(checkEnv, "=", 2)[0] == "HOSTNAME" {
+			needEnv = false
+			break
+		}
+	}
+	if needEnv {
+		g.AddProcessEnv("HOSTNAME", hostname)
+	}
+	return nil
+}
+
+func (c *Container) addRootPropagation(g *generate.Generator, mounts []spec.Mount) error {
+	return nil
+}
+
+func (c *Container) setProcessLabel(g *generate.Generator) {
+}
+
+func (c *Container) setMountLabel(g *generate.Generator) {
+}
+
+func (c *Container) setCgroupsPath(g *generate.Generator) error {
+	return nil
+}
+
+func (c *Container) addSlirp4netnsDNS(nameservers []string) []string {
+	return nameservers
+}
+
+func (c *Container) isSlirp4netnsIPv6() (bool, error) {
+	return false, nil
+}
+
+// check for net=none
+func (c *Container) hasNetNone() bool {
+	return c.state.NetworkJail == ""
+}
+
+func setVolumeAtime(mountPoint string, st os.FileInfo) error {
+	stat := st.Sys().(*syscall.Stat_t)
+	atime := time.Unix(int64(stat.Atimespec.Sec), int64(stat.Atimespec.Nsec)) //nolint: unconvert
+	if err := os.Chtimes(mountPoint, atime, st.ModTime()); err != nil {
+		return err
+	}
+	return nil
+}
diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go
index 5c5fd471b..9b05a2d61 100644
--- a/libpod/container_internal_linux.go
+++ b/libpod/container_internal_linux.go
@@ -4,64 +4,34 @@
 package libpod
 
 import (
-	"context"
 	"errors"
 	"fmt"
-	"io"
-	"io/ioutil"
-	"math"
 	"os"
-	"os/user"
 	"path"
 	"path/filepath"
-	"strconv"
 	"strings"
 	"sync"
 	"syscall"
 	"time"
 
-	metadata "github.com/checkpoint-restore/checkpointctl/lib"
-	"github.com/checkpoint-restore/go-criu/v5/stats"
-	cdi "github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
 	"github.com/containernetworking/plugins/pkg/ns"
-	"github.com/containers/buildah"
-	"github.com/containers/buildah/pkg/chrootuser"
-	"github.com/containers/buildah/pkg/overlay"
-	butil "github.com/containers/buildah/util"
-	"github.com/containers/common/libnetwork/etchosts"
-	"github.com/containers/common/libnetwork/resolvconf"
 	"github.com/containers/common/libnetwork/types"
-	"github.com/containers/common/pkg/apparmor"
 	"github.com/containers/common/pkg/cgroups"
-	"github.com/containers/common/pkg/chown"
 	"github.com/containers/common/pkg/config"
-	"github.com/containers/common/pkg/subscriptions"
-	"github.com/containers/common/pkg/umask"
-	cutil "github.com/containers/common/pkg/util"
-	is "github.com/containers/image/v5/storage"
 	"github.com/containers/podman/v4/libpod/define"
-	"github.com/containers/podman/v4/libpod/events"
-	"github.com/containers/podman/v4/pkg/annotations"
-	"github.com/containers/podman/v4/pkg/checkpoint/crutils"
-	"github.com/containers/podman/v4/pkg/criu"
-	"github.com/containers/podman/v4/pkg/lookup"
 	"github.com/containers/podman/v4/pkg/rootless"
-	"github.com/containers/podman/v4/pkg/util"
 	"github.com/containers/podman/v4/utils"
-	"github.com/containers/podman/v4/version"
-	"github.com/containers/storage/pkg/archive"
-	"github.com/containers/storage/pkg/idtools"
-	"github.com/containers/storage/pkg/lockfile"
-	securejoin "github.com/cyphar/filepath-securejoin"
-	runcuser "github.com/opencontainers/runc/libcontainer/user"
 	spec "github.com/opencontainers/runtime-spec/specs-go"
 	"github.com/opencontainers/runtime-tools/generate"
-	"github.com/opencontainers/selinux/go-selinux"
 	"github.com/opencontainers/selinux/go-selinux/label"
 	"github.com/sirupsen/logrus"
 	"golang.org/x/sys/unix"
 )
 
+var (
+	bindOptions = []string{"bind", "rprivate"}
+)
+
 func (c *Container) mountSHM(shmOptions string) error {
 	if err := unix.Mount("shm", c.config.ShmDir, "tmpfs", unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV,
 		label.FormatMountLabel(shmOptions, c.config.MountLabel)); err != nil {
@@ -177,118 +147,6 @@ func (c *Container) prepare() error {
 	return nil
 }
 
-// isWorkDirSymlink returns true if resolved workdir is symlink or a chain of symlinks,
-// and final resolved target is present either on  volume, mount or inside of container
-// otherwise it returns false. Following function is meant for internal use only and
-// can change at any point of time.
-func (c *Container) isWorkDirSymlink(resolvedPath string) bool {
-	// We cannot create workdir since explicit --workdir is
-	// set in config but workdir could also be a symlink.
-	// If it's a symlink, check if the resolved target is present in the container.
-	// If so, that's a valid use case: return nil.
-
-	maxSymLinks := 0
-	for {
-		// Linux only supports a chain of 40 links.
-		// Reference: https://github.com/torvalds/linux/blob/master/include/linux/namei.h#L13
-		if maxSymLinks > 40 {
-			break
-		}
-		resolvedSymlink, err := os.Readlink(resolvedPath)
-		if err != nil {
-			// End sym-link resolution loop.
-			break
-		}
-		if resolvedSymlink != "" {
-			_, resolvedSymlinkWorkdir, err := c.resolvePath(c.state.Mountpoint, resolvedSymlink)
-			if isPathOnVolume(c, resolvedSymlinkWorkdir) || isPathOnBindMount(c, resolvedSymlinkWorkdir) {
-				// Resolved symlink exists on external volume or mount
-				return true
-			}
-			if err != nil {
-				// Could not resolve path so end sym-link resolution loop.
-				break
-			}
-			if resolvedSymlinkWorkdir != "" {
-				resolvedPath = resolvedSymlinkWorkdir
-				_, err := os.Stat(resolvedSymlinkWorkdir)
-				if err == nil {
-					// Symlink resolved successfully and resolved path exists on container,
-					// this is a valid use-case so return nil.
-					logrus.Debugf("Workdir is a symlink with target to %q and resolved symlink exists on container", resolvedSymlink)
-					return true
-				}
-			}
-		}
-		maxSymLinks++
-	}
-	return false
-}
-
-// resolveWorkDir resolves the container's workdir and, depending on the
-// configuration, will create it, or error out if it does not exist.
-// Note that the container must be mounted before.
-func (c *Container) resolveWorkDir() error {
-	workdir := c.WorkingDir()
-
-	// If the specified workdir is a subdir of a volume or mount,
-	// we don't need to do anything.  The runtime is taking care of
-	// that.
-	if isPathOnVolume(c, workdir) || isPathOnBindMount(c, workdir) {
-		logrus.Debugf("Workdir %q resolved to a volume or mount", workdir)
-		return nil
-	}
-
-	_, resolvedWorkdir, err := c.resolvePath(c.state.Mountpoint, workdir)
-	if err != nil {
-		return err
-	}
-	logrus.Debugf("Workdir %q resolved to host path %q", workdir, resolvedWorkdir)
-
-	st, err := os.Stat(resolvedWorkdir)
-	if err == nil {
-		if !st.IsDir() {
-			return fmt.Errorf("workdir %q exists on container %s, but is not a directory", workdir, c.ID())
-		}
-		return nil
-	}
-	if !c.config.CreateWorkingDir {
-		// No need to create it (e.g., `--workdir=/foo`), so let's make sure
-		// the path exists on the container.
-		if err != nil {
-			if os.IsNotExist(err) {
-				// If resolved Workdir path gets marked as a valid symlink,
-				// return nil cause this is valid use-case.
-				if c.isWorkDirSymlink(resolvedWorkdir) {
-					return nil
-				}
-				return fmt.Errorf("workdir %q does not exist on container %s", workdir, c.ID())
-			}
-			// This might be a serious error (e.g., permission), so
-			// we need to return the full error.
-			return fmt.Errorf("error detecting workdir %q on container %s: %w", workdir, c.ID(), err)
-		}
-		return nil
-	}
-	if err := os.MkdirAll(resolvedWorkdir, 0755); err != nil {
-		if os.IsExist(err) {
-			return nil
-		}
-		return fmt.Errorf("error creating container %s workdir: %w", c.ID(), err)
-	}
-
-	// Ensure container entrypoint is created (if required).
-	uid, gid, _, err := chrootuser.GetUser(c.state.Mountpoint, c.User())
-	if err != nil {
-		return fmt.Errorf("error looking up %s inside of the container %s: %w", c.User(), c.ID(), err)
-	}
-	if err := os.Chown(resolvedWorkdir, int(uid), int(gid)); err != nil {
-		return fmt.Errorf("error chowning container %s workdir to container root: %w", c.ID(), err)
-	}
-
-	return nil
-}
-
 // cleanupNetwork unmounts and cleans up the container's network
 func (c *Container) cleanupNetwork() error {
 	if c.config.NetNsCtr != "" {
@@ -335,670 +193,6 @@ func (c *Container) reloadNetwork() error {
 	return c.save()
 }
 
-func (c *Container) getUserOverrides() *lookup.Overrides {
-	var hasPasswdFile, hasGroupFile bool
-	overrides := lookup.Overrides{}
-	for _, m := range c.config.Spec.Mounts {
-		if m.Destination == "/etc/passwd" {
-			overrides.ContainerEtcPasswdPath = m.Source
-			hasPasswdFile = true
-		}
-		if m.Destination == "/etc/group" {
-			overrides.ContainerEtcGroupPath = m.Source
-			hasGroupFile = true
-		}
-		if m.Destination == "/etc" {
-			if !hasPasswdFile {
-				overrides.ContainerEtcPasswdPath = filepath.Join(m.Source, "passwd")
-			}
-			if !hasGroupFile {
-				overrides.ContainerEtcGroupPath = filepath.Join(m.Source, "group")
-			}
-		}
-	}
-	if path, ok := c.state.BindMounts["/etc/passwd"]; ok {
-		overrides.ContainerEtcPasswdPath = path
-	}
-	return &overrides
-}
-
-func lookupHostUser(name string) (*runcuser.ExecUser, error) {
-	var execUser runcuser.ExecUser
-	// Look up User on host
-	u, err := util.LookupUser(name)
-	if err != nil {
-		return &execUser, err
-	}
-	uid, err := strconv.ParseUint(u.Uid, 8, 32)
-	if err != nil {
-		return &execUser, err
-	}
-
-	gid, err := strconv.ParseUint(u.Gid, 8, 32)
-	if err != nil {
-		return &execUser, err
-	}
-	execUser.Uid = int(uid)
-	execUser.Gid = int(gid)
-	execUser.Home = u.HomeDir
-	return &execUser, nil
-}
-
-// Internal only function which returns upper and work dir from
-// overlay options.
-func getOverlayUpperAndWorkDir(options []string) (string, string, error) {
-	upperDir := ""
-	workDir := ""
-	for _, o := range options {
-		if strings.HasPrefix(o, "upperdir") {
-			splitOpt := strings.SplitN(o, "=", 2)
-			if len(splitOpt) > 1 {
-				upperDir = splitOpt[1]
-				if upperDir == "" {
-					return "", "", errors.New("cannot accept empty value for upperdir")
-				}
-			}
-		}
-		if strings.HasPrefix(o, "workdir") {
-			splitOpt := strings.SplitN(o, "=", 2)
-			if len(splitOpt) > 1 {
-				workDir = splitOpt[1]
-				if workDir == "" {
-					return "", "", errors.New("cannot accept empty value for workdir")
-				}
-			}
-		}
-	}
-	if (upperDir != "" && workDir == "") || (upperDir == "" && workDir != "") {
-		return "", "", errors.New("must specify both upperdir and workdir")
-	}
-	return upperDir, workDir, nil
-}
-
-// Generate spec for a container
-// Accepts a map of the container's dependencies
-func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
-	overrides := c.getUserOverrides()
-	execUser, err := lookup.GetUserGroupInfo(c.state.Mountpoint, c.config.User, overrides)
-	if err != nil {
-		if cutil.StringInSlice(c.config.User, c.config.HostUsers) {
-			execUser, err = lookupHostUser(c.config.User)
-		}
-		if err != nil {
-			return nil, err
-		}
-	}
-
-	// NewFromSpec() is deprecated according to its comment
-	// however the recommended replace just causes a nil map panic
-	//nolint:staticcheck
-	g := generate.NewFromSpec(c.config.Spec)
-
-	// If the flag to mount all devices is set for a privileged container, add
-	// all the devices from the host's machine into the container
-	if c.config.MountAllDevices {
-		if err := util.AddPrivilegedDevices(&g); err != nil {
-			return nil, err
-		}
-	}
-
-	// If network namespace was requested, add it now
-	if c.config.CreateNetNS {
-		if c.config.PostConfigureNetNS {
-			if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), ""); err != nil {
-				return nil, err
-			}
-		} else {
-			if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), c.state.NetNS.Path()); err != nil {
-				return nil, err
-			}
-		}
-	}
-
-	// Apply AppArmor checks and load the default profile if needed.
-	if len(c.config.Spec.Process.ApparmorProfile) > 0 {
-		updatedProfile, err := apparmor.CheckProfileAndLoadDefault(c.config.Spec.Process.ApparmorProfile)
-		if err != nil {
-			return nil, err
-		}
-		g.SetProcessApparmorProfile(updatedProfile)
-	}
-
-	if err := c.makeBindMounts(); err != nil {
-		return nil, err
-	}
-
-	if err := c.mountNotifySocket(g); err != nil {
-		return nil, err
-	}
-
-	// Get host UID and GID based on the container process UID and GID.
-	hostUID, hostGID, err := butil.GetHostIDs(util.IDtoolsToRuntimeSpec(c.config.IDMappings.UIDMap), util.IDtoolsToRuntimeSpec(c.config.IDMappings.GIDMap), uint32(execUser.Uid), uint32(execUser.Gid))
-	if err != nil {
-		return nil, err
-	}
-
-	// Add named volumes
-	for _, namedVol := range c.config.NamedVolumes {
-		volume, err := c.runtime.GetVolume(namedVol.Name)
-		if err != nil {
-			return nil, fmt.Errorf("error retrieving volume %s to add to container %s: %w", namedVol.Name, c.ID(), err)
-		}
-		mountPoint, err := volume.MountPoint()
-		if err != nil {
-			return nil, err
-		}
-
-		overlayFlag := false
-		upperDir := ""
-		workDir := ""
-		for _, o := range namedVol.Options {
-			if o == "O" {
-				overlayFlag = true
-				upperDir, workDir, err = getOverlayUpperAndWorkDir(namedVol.Options)
-				if err != nil {
-					return nil, err
-				}
-			}
-		}
-
-		if overlayFlag {
-			var overlayMount spec.Mount
-			var overlayOpts *overlay.Options
-			contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID())
-			if err != nil {
-				return nil, err
-			}
-
-			overlayOpts = &overlay.Options{RootUID: c.RootUID(),
-				RootGID:                c.RootGID(),
-				UpperDirOptionFragment: upperDir,
-				WorkDirOptionFragment:  workDir,
-				GraphOpts:              c.runtime.store.GraphOptions(),
-			}
-
-			overlayMount, err = overlay.MountWithOptions(contentDir, mountPoint, namedVol.Dest, overlayOpts)
-			if err != nil {
-				return nil, fmt.Errorf("mounting overlay failed %q: %w", mountPoint, err)
-			}
-
-			for _, o := range namedVol.Options {
-				if o == "U" {
-					if err := c.ChangeHostPathOwnership(mountPoint, true, int(hostUID), int(hostGID)); err != nil {
-						return nil, err
-					}
-
-					if err := c.ChangeHostPathOwnership(contentDir, true, int(hostUID), int(hostGID)); err != nil {
-						return nil, err
-					}
-				}
-			}
-			g.AddMount(overlayMount)
-		} else {
-			volMount := spec.Mount{
-				Type:        "bind",
-				Source:      mountPoint,
-				Destination: namedVol.Dest,
-				Options:     namedVol.Options,
-			}
-			g.AddMount(volMount)
-		}
-	}
-
-	// Check if the spec file mounts contain the options z, Z or U.
-	// If they have z or Z, relabel the source directory and then remove the option.
-	// If they have U, chown the source directory and them remove the option.
-	for i := range g.Config.Mounts {
-		m := &g.Config.Mounts[i]
-		var options []string
-		for _, o := range m.Options {
-			switch o {
-			case "U":
-				if m.Type == "tmpfs" {
-					options = append(options, []string{fmt.Sprintf("uid=%d", execUser.Uid), fmt.Sprintf("gid=%d", execUser.Gid)}...)
-				} else {
-					// only chown on initial creation of container
-					if err := c.ChangeHostPathOwnership(m.Source, true, int(hostUID), int(hostGID)); err != nil {
-						return nil, err
-					}
-				}
-			case "z":
-				fallthrough
-			case "Z":
-				if err := c.relabel(m.Source, c.MountLabel(), label.IsShared(o)); err != nil {
-					return nil, err
-				}
-
-			default:
-				options = append(options, o)
-			}
-		}
-		m.Options = options
-	}
-
-	g.SetProcessSelinuxLabel(c.ProcessLabel())
-	g.SetLinuxMountLabel(c.MountLabel())
-
-	// Add bind mounts to container
-	for dstPath, srcPath := range c.state.BindMounts {
-		newMount := spec.Mount{
-			Type:        "bind",
-			Source:      srcPath,
-			Destination: dstPath,
-			Options:     []string{"bind", "rprivate"},
-		}
-		if c.IsReadOnly() && dstPath != "/dev/shm" {
-			newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev")
-		}
-		if dstPath == "/dev/shm" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
-			newMount.Options = append(newMount.Options, "nosuid", "noexec", "nodev")
-		}
-		if !MountExists(g.Mounts(), dstPath) {
-			g.AddMount(newMount)
-		} else {
-			logrus.Infof("User mount overriding libpod mount at %q", dstPath)
-		}
-	}
-
-	// Add overlay volumes
-	for _, overlayVol := range c.config.OverlayVolumes {
-		upperDir, workDir, err := getOverlayUpperAndWorkDir(overlayVol.Options)
-		if err != nil {
-			return nil, err
-		}
-		contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID())
-		if err != nil {
-			return nil, err
-		}
-		overlayOpts := &overlay.Options{RootUID: c.RootUID(),
-			RootGID:                c.RootGID(),
-			UpperDirOptionFragment: upperDir,
-			WorkDirOptionFragment:  workDir,
-			GraphOpts:              c.runtime.store.GraphOptions(),
-		}
-
-		overlayMount, err := overlay.MountWithOptions(contentDir, overlayVol.Source, overlayVol.Dest, overlayOpts)
-		if err != nil {
-			return nil, fmt.Errorf("mounting overlay failed %q: %w", overlayVol.Source, err)
-		}
-
-		// Check overlay volume options
-		for _, o := range overlayVol.Options {
-			if o == "U" {
-				if err := c.ChangeHostPathOwnership(overlayVol.Source, true, int(hostUID), int(hostGID)); err != nil {
-					return nil, err
-				}
-
-				if err := c.ChangeHostPathOwnership(contentDir, true, int(hostUID), int(hostGID)); err != nil {
-					return nil, err
-				}
-			}
-		}
-
-		g.AddMount(overlayMount)
-	}
-
-	// Add image volumes as overlay mounts
-	for _, volume := range c.config.ImageVolumes {
-		// Mount the specified image.
-		img, _, err := c.runtime.LibimageRuntime().LookupImage(volume.Source, nil)
-		if err != nil {
-			return nil, fmt.Errorf("error creating image volume %q:%q: %w", volume.Source, volume.Dest, err)
-		}
-		mountPoint, err := img.Mount(ctx, nil, "")
-		if err != nil {
-			return nil, fmt.Errorf("error mounting image volume %q:%q: %w", volume.Source, volume.Dest, err)
-		}
-
-		contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID())
-		if err != nil {
-			return nil, fmt.Errorf("failed to create TempDir in the %s directory: %w", c.config.StaticDir, err)
-		}
-
-		var overlayMount spec.Mount
-		if volume.ReadWrite {
-			overlayMount, err = overlay.Mount(contentDir, mountPoint, volume.Dest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions())
-		} else {
-			overlayMount, err = overlay.MountReadOnly(contentDir, mountPoint, volume.Dest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions())
-		}
-		if err != nil {
-			return nil, fmt.Errorf("creating overlay mount for image %q failed: %w", volume.Source, err)
-		}
-		g.AddMount(overlayMount)
-	}
-
-	hasHomeSet := false
-	for _, s := range c.config.Spec.Process.Env {
-		if strings.HasPrefix(s, "HOME=") {
-			hasHomeSet = true
-			break
-		}
-	}
-	if !hasHomeSet && execUser.Home != "" {
-		c.config.Spec.Process.Env = append(c.config.Spec.Process.Env, fmt.Sprintf("HOME=%s", execUser.Home))
-	}
-
-	if c.config.User != "" {
-		// User and Group must go together
-		g.SetProcessUID(uint32(execUser.Uid))
-		g.SetProcessGID(uint32(execUser.Gid))
-	}
-
-	if c.config.Umask != "" {
-		decVal, err := strconv.ParseUint(c.config.Umask, 8, 32)
-		if err != nil {
-			return nil, fmt.Errorf("invalid Umask Value: %w", err)
-		}
-		umask := uint32(decVal)
-		g.Config.Process.User.Umask = &umask
-	}
-
-	// Add addition groups if c.config.GroupAdd is not empty
-	if len(c.config.Groups) > 0 {
-		gids, err := lookup.GetContainerGroups(c.config.Groups, c.state.Mountpoint, overrides)
-		if err != nil {
-			return nil, fmt.Errorf("error looking up supplemental groups for container %s: %w", c.ID(), err)
-		}
-		for _, gid := range gids {
-			g.AddProcessAdditionalGid(gid)
-		}
-	}
-
-	if c.Systemd() {
-		if err := c.setupSystemd(g.Mounts(), g); err != nil {
-			return nil, fmt.Errorf("error adding systemd-specific mounts: %w", err)
-		}
-	}
-
-	// Look up and add groups the user belongs to, if a group wasn't directly specified
-	if !strings.Contains(c.config.User, ":") {
-		// the gidMappings that are present inside the container user namespace
-		var gidMappings []idtools.IDMap
-
-		switch {
-		case len(c.config.IDMappings.GIDMap) > 0:
-			gidMappings = c.config.IDMappings.GIDMap
-		case rootless.IsRootless():
-			// Check whether the current user namespace has enough gids available.
-			availableGids, err := rootless.GetAvailableGids()
-			if err != nil {
-				return nil, fmt.Errorf("cannot read number of available GIDs: %w", err)
-			}
-			gidMappings = []idtools.IDMap{{
-				ContainerID: 0,
-				HostID:      0,
-				Size:        int(availableGids),
-			}}
-		default:
-			gidMappings = []idtools.IDMap{{
-				ContainerID: 0,
-				HostID:      0,
-				Size:        math.MaxInt32,
-			}}
-		}
-		for _, gid := range execUser.Sgids {
-			isGIDAvailable := false
-			for _, m := range gidMappings {
-				if gid >= m.ContainerID && gid < m.ContainerID+m.Size {
-					isGIDAvailable = true
-					break
-				}
-			}
-			if isGIDAvailable {
-				g.AddProcessAdditionalGid(uint32(gid))
-			} else {
-				logrus.Warnf("Additional gid=%d is not present in the user namespace, skip setting it", gid)
-			}
-		}
-	}
-
-	// Add shared namespaces from other containers
-	if c.config.IPCNsCtr != "" {
-		if err := c.addNamespaceContainer(&g, IPCNS, c.config.IPCNsCtr, spec.IPCNamespace); err != nil {
-			return nil, err
-		}
-	}
-	if c.config.MountNsCtr != "" {
-		if err := c.addNamespaceContainer(&g, MountNS, c.config.MountNsCtr, spec.MountNamespace); err != nil {
-			return nil, err
-		}
-	}
-	if c.config.NetNsCtr != "" {
-		if err := c.addNamespaceContainer(&g, NetNS, c.config.NetNsCtr, spec.NetworkNamespace); err != nil {
-			return nil, err
-		}
-	}
-	if c.config.PIDNsCtr != "" {
-		if err := c.addNamespaceContainer(&g, PIDNS, c.config.PIDNsCtr, spec.PIDNamespace); err != nil {
-			return nil, err
-		}
-	}
-	if c.config.UserNsCtr != "" {
-		if err := c.addNamespaceContainer(&g, UserNS, c.config.UserNsCtr, spec.UserNamespace); err != nil {
-			return nil, err
-		}
-		if len(g.Config.Linux.UIDMappings) == 0 {
-			// runc complains if no mapping is specified, even if we join another ns.  So provide a dummy mapping
-			g.AddLinuxUIDMapping(uint32(0), uint32(0), uint32(1))
-			g.AddLinuxGIDMapping(uint32(0), uint32(0), uint32(1))
-		}
-	}
-
-	availableUIDs, availableGIDs, err := rootless.GetAvailableIDMaps()
-	if err != nil {
-		if os.IsNotExist(err) {
-			// The kernel-provided files only exist if user namespaces are supported
-			logrus.Debugf("User or group ID mappings not available: %s", err)
-		} else {
-			return nil, err
-		}
-	} else {
-		g.Config.Linux.UIDMappings = rootless.MaybeSplitMappings(g.Config.Linux.UIDMappings, availableUIDs)
-		g.Config.Linux.GIDMappings = rootless.MaybeSplitMappings(g.Config.Linux.GIDMappings, availableGIDs)
-	}
-
-	// Hostname handling:
-	// If we have a UTS namespace, set Hostname in the OCI spec.
-	// Set the HOSTNAME environment variable unless explicitly overridden by
-	// the user (already present in OCI spec). If we don't have a UTS ns,
-	// set it to the host's hostname instead.
-	hostname := c.Hostname()
-	foundUTS := false
-
-	for _, i := range c.config.Spec.Linux.Namespaces {
-		if i.Type == spec.UTSNamespace && i.Path == "" {
-			foundUTS = true
-			g.SetHostname(hostname)
-			break
-		}
-	}
-	if !foundUTS {
-		tmpHostname, err := os.Hostname()
-		if err != nil {
-			return nil, err
-		}
-		hostname = tmpHostname
-	}
-	needEnv := true
-	for _, checkEnv := range g.Config.Process.Env {
-		if strings.SplitN(checkEnv, "=", 2)[0] == "HOSTNAME" {
-			needEnv = false
-			break
-		}
-	}
-	if needEnv {
-		g.AddProcessEnv("HOSTNAME", hostname)
-	}
-
-	if c.config.UTSNsCtr != "" {
-		if err := c.addNamespaceContainer(&g, UTSNS, c.config.UTSNsCtr, spec.UTSNamespace); err != nil {
-			return nil, err
-		}
-	}
-	if c.config.CgroupNsCtr != "" {
-		if err := c.addNamespaceContainer(&g, CgroupNS, c.config.CgroupNsCtr, spec.CgroupNamespace); err != nil {
-			return nil, err
-		}
-	}
-
-	if c.config.UserNsCtr == "" && c.config.IDMappings.AutoUserNs {
-		if err := g.AddOrReplaceLinuxNamespace(string(spec.UserNamespace), ""); err != nil {
-			return nil, err
-		}
-		g.ClearLinuxUIDMappings()
-		for _, uidmap := range c.config.IDMappings.UIDMap {
-			g.AddLinuxUIDMapping(uint32(uidmap.HostID), uint32(uidmap.ContainerID), uint32(uidmap.Size))
-		}
-		g.ClearLinuxGIDMappings()
-		for _, gidmap := range c.config.IDMappings.GIDMap {
-			g.AddLinuxGIDMapping(uint32(gidmap.HostID), uint32(gidmap.ContainerID), uint32(gidmap.Size))
-		}
-	}
-
-	g.SetRootPath(c.state.Mountpoint)
-	g.AddAnnotation(annotations.Created, c.config.CreatedTime.Format(time.RFC3339Nano))
-	g.AddAnnotation("org.opencontainers.image.stopSignal", fmt.Sprintf("%d", c.config.StopSignal))
-
-	if _, exists := g.Config.Annotations[annotations.ContainerManager]; !exists {
-		g.AddAnnotation(annotations.ContainerManager, annotations.ContainerManagerLibpod)
-	}
-
-	cgroupPath, err := c.getOCICgroupPath()
-	if err != nil {
-		return nil, err
-	}
-
-	g.SetLinuxCgroupsPath(cgroupPath)
-
-	// Warning: CDI may alter g.Config in place.
-	if len(c.config.CDIDevices) > 0 {
-		registry := cdi.GetRegistry(
-			cdi.WithAutoRefresh(false),
-		)
-		if err := registry.Refresh(); err != nil {
-			logrus.Debugf("The following error was triggered when refreshing the CDI registry: %v", err)
-		}
-		_, err := registry.InjectDevices(g.Config, c.config.CDIDevices...)
-		if err != nil {
-			return nil, fmt.Errorf("error setting up CDI devices: %w", err)
-		}
-	}
-
-	// Mounts need to be sorted so paths will not cover other paths
-	mounts := sortMounts(g.Mounts())
-	g.ClearMounts()
-
-	// Determine property of RootPropagation based on volume properties. If
-	// a volume is shared, then keep root propagation shared. This should
-	// work for slave and private volumes too.
-	//
-	// For slave volumes, it can be either [r]shared/[r]slave.
-	//
-	// For private volumes any root propagation value should work.
-	rootPropagation := ""
-	for _, m := range mounts {
-		// We need to remove all symlinks from tmpfs mounts.
-		// Runc and other runtimes may choke on them.
-		// Easy solution: use securejoin to do a scoped evaluation of
-		// the links, then trim off the mount prefix.
-		if m.Type == "tmpfs" {
-			finalPath, err := securejoin.SecureJoin(c.state.Mountpoint, m.Destination)
-			if err != nil {
-				return nil, fmt.Errorf("error resolving symlinks for mount destination %s: %w", m.Destination, err)
-			}
-			trimmedPath := strings.TrimPrefix(finalPath, strings.TrimSuffix(c.state.Mountpoint, "/"))
-			m.Destination = trimmedPath
-		}
-		g.AddMount(m)
-		for _, opt := range m.Options {
-			switch opt {
-			case MountShared, MountRShared:
-				if rootPropagation != MountShared && rootPropagation != MountRShared {
-					rootPropagation = MountShared
-				}
-			case MountSlave, MountRSlave:
-				if rootPropagation != MountShared && rootPropagation != MountRShared && rootPropagation != MountSlave && rootPropagation != MountRSlave {
-					rootPropagation = MountRSlave
-				}
-			}
-		}
-	}
-
-	if rootPropagation != "" {
-		logrus.Debugf("Set root propagation to %q", rootPropagation)
-		if err := g.SetLinuxRootPropagation(rootPropagation); err != nil {
-			return nil, err
-		}
-	}
-
-	// Warning: precreate hooks may alter g.Config in place.
-	if c.state.ExtensionStageHooks, err = c.setupOCIHooks(ctx, g.Config); err != nil {
-		return nil, fmt.Errorf("error setting up OCI Hooks: %w", err)
-	}
-	if len(c.config.EnvSecrets) > 0 {
-		manager, err := c.runtime.SecretsManager()
-		if err != nil {
-			return nil, err
-		}
-		if err != nil {
-			return nil, err
-		}
-		for name, secr := range c.config.EnvSecrets {
-			_, data, err := manager.LookupSecretData(secr.Name)
-			if err != nil {
-				return nil, err
-			}
-			g.AddProcessEnv(name, string(data))
-		}
-	}
-
-	// Pass down the LISTEN_* environment (see #10443).
-	for _, key := range []string{"LISTEN_PID", "LISTEN_FDS", "LISTEN_FDNAMES"} {
-		if val, ok := os.LookupEnv(key); ok {
-			// Force the PID to `1` since we cannot rely on (all
-			// versions of) all runtimes to do it for us.
-			if key == "LISTEN_PID" {
-				val = "1"
-			}
-			g.AddProcessEnv(key, val)
-		}
-	}
-
-	return g.Config, nil
-}
-
-// mountNotifySocket mounts the NOTIFY_SOCKET into the container if it's set
-// and if the sdnotify mode is set to container.  It also sets c.notifySocket
-// to avoid redundantly looking up the env variable.
-func (c *Container) mountNotifySocket(g generate.Generator) error {
-	if c.config.SdNotifySocket == "" {
-		return nil
-	}
-	if c.config.SdNotifyMode != define.SdNotifyModeContainer {
-		return nil
-	}
-
-	notifyDir := filepath.Join(c.bundlePath(), "notify")
-	logrus.Debugf("Checking notify %q dir", notifyDir)
-	if err := os.MkdirAll(notifyDir, 0755); err != nil {
-		if !os.IsExist(err) {
-			return fmt.Errorf("unable to create notify %q dir: %w", notifyDir, err)
-		}
-	}
-	if err := label.Relabel(notifyDir, c.MountLabel(), true); err != nil {
-		return fmt.Errorf("relabel failed %q: %w", notifyDir, err)
-	}
-	logrus.Debugf("Add bindmount notify %q dir", notifyDir)
-	if _, ok := c.state.BindMounts["/run/notify"]; !ok {
-		c.state.BindMounts["/run/notify"] = notifyDir
-	}
-
-	// Set the container's notify socket to the proxy socket created by conmon
-	g.AddProcessEnv("NOTIFY_SOCKET", "/run/notify/notify.sock")
-
-	return nil
-}
-
 // systemd expects to have /run, /run/lock and /tmp on tmpfs
 // It also expects to be able to write to /sys/fs/cgroup/systemd and /var/log/journal
 func (c *Container) setupSystemd(mounts []spec.Mount, g generate.Generator) error {
@@ -1073,9 +267,15 @@ func (c *Container) setupSystemd(mounts []spec.Mount, g generate.Generator) erro
 		g.AddMount(systemdMnt)
 	} else {
 		mountOptions := []string{"bind", "rprivate"}
+		skipMount := false
 
 		var statfs unix.Statfs_t
 		if err := unix.Statfs("/sys/fs/cgroup/systemd", &statfs); err != nil {
+			if errors.Is(err, os.ErrNotExist) {
+				// If the mount is missing on the host, we cannot bind mount it so
+				// just skip it.
+				skipMount = true
+			}
 			mountOptions = append(mountOptions, "nodev", "noexec", "nosuid")
 		} else {
 			if statfs.Flags&unix.MS_NODEV == unix.MS_NODEV {
@@ -1091,15 +291,16 @@ func (c *Container) setupSystemd(mounts []spec.Mount, g generate.Generator) erro
 				mountOptions = append(mountOptions, "ro")
 			}
 		}
-
-		systemdMnt := spec.Mount{
-			Destination: "/sys/fs/cgroup/systemd",
-			Type:        "bind",
-			Source:      "/sys/fs/cgroup/systemd",
-			Options:     mountOptions,
+		if !skipMount {
+			systemdMnt := spec.Mount{
+				Destination: "/sys/fs/cgroup/systemd",
+				Type:        "bind",
+				Source:      "/sys/fs/cgroup/systemd",
+				Options:     mountOptions,
+			}
+			g.AddMount(systemdMnt)
+			g.AddLinuxMaskedPaths("/sys/fs/cgroup/systemd/release_agent")
 		}
-		g.AddMount(systemdMnt)
-		g.AddLinuxMaskedPaths("/sys/fs/cgroup/systemd/release_agent")
 	}
 
 	return nil
@@ -1131,1867 +332,6 @@ func (c *Container) addNamespaceContainer(g *generate.Generator, ns LinuxNS, ctr
 	return nil
 }
 
-func (c *Container) addCheckpointImageMetadata(importBuilder *buildah.Builder) error {
-	// Get information about host environment
-	hostInfo, err := c.Runtime().hostInfo()
-	if err != nil {
-		return fmt.Errorf("getting host info: %v", err)
-	}
-
-	criuVersion, err := criu.GetCriuVersion()
-	if err != nil {
-		return fmt.Errorf("getting criu version: %v", err)
-	}
-
-	rootfsImageID, rootfsImageName := c.Image()
-
-	// Add image annotations with information about the container and the host.
-	// This information is useful to check compatibility before restoring the checkpoint
-
-	checkpointImageAnnotations := map[string]string{
-		define.CheckpointAnnotationName:                c.config.Name,
-		define.CheckpointAnnotationRawImageName:        c.config.RawImageName,
-		define.CheckpointAnnotationRootfsImageID:       rootfsImageID,
-		define.CheckpointAnnotationRootfsImageName:     rootfsImageName,
-		define.CheckpointAnnotationPodmanVersion:       version.Version.String(),
-		define.CheckpointAnnotationCriuVersion:         strconv.Itoa(criuVersion),
-		define.CheckpointAnnotationRuntimeName:         hostInfo.OCIRuntime.Name,
-		define.CheckpointAnnotationRuntimeVersion:      hostInfo.OCIRuntime.Version,
-		define.CheckpointAnnotationConmonVersion:       hostInfo.Conmon.Version,
-		define.CheckpointAnnotationHostArch:            hostInfo.Arch,
-		define.CheckpointAnnotationHostKernel:          hostInfo.Kernel,
-		define.CheckpointAnnotationCgroupVersion:       hostInfo.CgroupsVersion,
-		define.CheckpointAnnotationDistributionVersion: hostInfo.Distribution.Version,
-		define.CheckpointAnnotationDistributionName:    hostInfo.Distribution.Distribution,
-	}
-
-	for key, value := range checkpointImageAnnotations {
-		importBuilder.SetAnnotation(key, value)
-	}
-
-	return nil
-}
-
-func (c *Container) resolveCheckpointImageName(options *ContainerCheckpointOptions) error {
-	if options.CreateImage == "" {
-		return nil
-	}
-
-	// Resolve image name
-	resolvedImageName, err := c.runtime.LibimageRuntime().ResolveName(options.CreateImage)
-	if err != nil {
-		return err
-	}
-
-	options.CreateImage = resolvedImageName
-	return nil
-}
-
-func (c *Container) createCheckpointImage(ctx context.Context, options ContainerCheckpointOptions) error {
-	if options.CreateImage == "" {
-		return nil
-	}
-	logrus.Debugf("Create checkpoint image %s", options.CreateImage)
-
-	// Create storage reference
-	imageRef, err := is.Transport.ParseStoreReference(c.runtime.store, options.CreateImage)
-	if err != nil {
-		return errors.New("failed to parse image name")
-	}
-
-	// Build an image scratch
-	builderOptions := buildah.BuilderOptions{
-		FromImage: "scratch",
-	}
-	importBuilder, err := buildah.NewBuilder(ctx, c.runtime.store, builderOptions)
-	if err != nil {
-		return err
-	}
-	// Clean up buildah working container
-	defer func() {
-		if err := importBuilder.Delete(); err != nil {
-			logrus.Errorf("Image builder delete failed: %v", err)
-		}
-	}()
-
-	if err := c.prepareCheckpointExport(); err != nil {
-		return err
-	}
-
-	// Export checkpoint into temporary tar file
-	tmpDir, err := ioutil.TempDir("", "checkpoint_image_")
-	if err != nil {
-		return err
-	}
-	defer os.RemoveAll(tmpDir)
-
-	options.TargetFile = path.Join(tmpDir, "checkpoint.tar")
-
-	if err := c.exportCheckpoint(options); err != nil {
-		return err
-	}
-
-	// Copy checkpoint from temporary tar file in the image
-	addAndCopyOptions := buildah.AddAndCopyOptions{}
-	if err := importBuilder.Add("", true, addAndCopyOptions, options.TargetFile); err != nil {
-		return err
-	}
-
-	if err := c.addCheckpointImageMetadata(importBuilder); err != nil {
-		return err
-	}
-
-	commitOptions := buildah.CommitOptions{
-		Squash:        true,
-		SystemContext: c.runtime.imageContext,
-	}
-
-	// Create checkpoint image
-	id, _, _, err := importBuilder.Commit(ctx, imageRef, commitOptions)
-	if err != nil {
-		return err
-	}
-	logrus.Debugf("Created checkpoint image: %s", id)
-	return nil
-}
-
-func (c *Container) exportCheckpoint(options ContainerCheckpointOptions) error {
-	if len(c.Dependencies()) == 1 {
-		// Check if the dependency is an infra container. If it is we can checkpoint
-		// the container out of the Pod.
-		if c.config.Pod == "" {
-			return errors.New("cannot export checkpoints of containers with dependencies")
-		}
-
-		pod, err := c.runtime.state.Pod(c.config.Pod)
-		if err != nil {
-			return fmt.Errorf("container %s is in pod %s, but pod cannot be retrieved: %w", c.ID(), c.config.Pod, err)
-		}
-		infraID, err := pod.InfraContainerID()
-		if err != nil {
-			return fmt.Errorf("cannot retrieve infra container ID for pod %s: %w", c.config.Pod, err)
-		}
-		if c.Dependencies()[0] != infraID {
-			return errors.New("cannot export checkpoints of containers with dependencies")
-		}
-	}
-	if len(c.Dependencies()) > 1 {
-		return errors.New("cannot export checkpoints of containers with dependencies")
-	}
-	logrus.Debugf("Exporting checkpoint image of container %q to %q", c.ID(), options.TargetFile)
-
-	includeFiles := []string{
-		"artifacts",
-		metadata.DevShmCheckpointTar,
-		metadata.ConfigDumpFile,
-		metadata.SpecDumpFile,
-		metadata.NetworkStatusFile,
-		stats.StatsDump,
-	}
-
-	if c.LogDriver() == define.KubernetesLogging ||
-		c.LogDriver() == define.JSONLogging {
-		includeFiles = append(includeFiles, "ctr.log")
-	}
-	if options.PreCheckPoint {
-		includeFiles = append(includeFiles, preCheckpointDir)
-	} else {
-		includeFiles = append(includeFiles, metadata.CheckpointDirectory)
-	}
-	// Get root file-system changes included in the checkpoint archive
-	var addToTarFiles []string
-	if !options.IgnoreRootfs {
-		// To correctly track deleted files, let's go through the output of 'podman diff'
-		rootFsChanges, err := c.runtime.GetDiff("", c.ID(), define.DiffContainer)
-		if err != nil {
-			return fmt.Errorf("error exporting root file-system diff for %q: %w", c.ID(), err)
-		}
-
-		addToTarFiles, err := crutils.CRCreateRootFsDiffTar(&rootFsChanges, c.state.Mountpoint, c.bundlePath())
-		if err != nil {
-			return err
-		}
-
-		includeFiles = append(includeFiles, addToTarFiles...)
-	}
-
-	// Folder containing archived volumes that will be included in the export
-	expVolDir := filepath.Join(c.bundlePath(), metadata.CheckpointVolumesDirectory)
-
-	// Create an archive for each volume associated with the container
-	if !options.IgnoreVolumes {
-		if err := os.MkdirAll(expVolDir, 0700); err != nil {
-			return fmt.Errorf("error creating volumes export directory %q: %w", expVolDir, err)
-		}
-
-		for _, v := range c.config.NamedVolumes {
-			volumeTarFilePath := filepath.Join(metadata.CheckpointVolumesDirectory, v.Name+".tar")
-			volumeTarFileFullPath := filepath.Join(c.bundlePath(), volumeTarFilePath)
-
-			volumeTarFile, err := os.Create(volumeTarFileFullPath)
-			if err != nil {
-				return fmt.Errorf("error creating %q: %w", volumeTarFileFullPath, err)
-			}
-
-			volume, err := c.runtime.GetVolume(v.Name)
-			if err != nil {
-				return err
-			}
-
-			mp, err := volume.MountPoint()
-			if err != nil {
-				return err
-			}
-			if mp == "" {
-				return fmt.Errorf("volume %s is not mounted, cannot export: %w", volume.Name(), define.ErrInternal)
-			}
-
-			input, err := archive.TarWithOptions(mp, &archive.TarOptions{
-				Compression:      archive.Uncompressed,
-				IncludeSourceDir: true,
-			})
-			if err != nil {
-				return fmt.Errorf("error reading volume directory %q: %w", v.Dest, err)
-			}
-
-			_, err = io.Copy(volumeTarFile, input)
-			if err != nil {
-				return err
-			}
-			volumeTarFile.Close()
-
-			includeFiles = append(includeFiles, volumeTarFilePath)
-		}
-	}
-
-	input, err := archive.TarWithOptions(c.bundlePath(), &archive.TarOptions{
-		Compression:      options.Compression,
-		IncludeSourceDir: true,
-		IncludeFiles:     includeFiles,
-	})
-
-	if err != nil {
-		return fmt.Errorf("error reading checkpoint directory %q: %w", c.ID(), err)
-	}
-
-	outFile, err := os.Create(options.TargetFile)
-	if err != nil {
-		return fmt.Errorf("error creating checkpoint export file %q: %w", options.TargetFile, err)
-	}
-	defer outFile.Close()
-
-	if err := os.Chmod(options.TargetFile, 0600); err != nil {
-		return err
-	}
-
-	_, err = io.Copy(outFile, input)
-	if err != nil {
-		return err
-	}
-
-	for _, file := range addToTarFiles {
-		os.Remove(filepath.Join(c.bundlePath(), file))
-	}
-
-	if !options.IgnoreVolumes {
-		os.RemoveAll(expVolDir)
-	}
-
-	return nil
-}
-
-func (c *Container) checkpointRestoreSupported(version int) error {
-	if !criu.CheckForCriu(version) {
-		return fmt.Errorf("checkpoint/restore requires at least CRIU %d", version)
-	}
-	if !c.ociRuntime.SupportsCheckpoint() {
-		return errors.New("configured runtime does not support checkpoint/restore")
-	}
-	return nil
-}
-
-func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointOptions) (*define.CRIUCheckpointRestoreStatistics, int64, error) {
-	if err := c.checkpointRestoreSupported(criu.MinCriuVersion); err != nil {
-		return nil, 0, err
-	}
-
-	if c.state.State != define.ContainerStateRunning {
-		return nil, 0, fmt.Errorf("%q is not running, cannot checkpoint: %w", c.state.State, define.ErrCtrStateInvalid)
-	}
-
-	if c.AutoRemove() && options.TargetFile == "" {
-		return nil, 0, errors.New("cannot checkpoint containers that have been started with '--rm' unless '--export' is used")
-	}
-
-	if err := c.resolveCheckpointImageName(&options); err != nil {
-		return nil, 0, err
-	}
-
-	if err := crutils.CRCreateFileWithLabel(c.bundlePath(), "dump.log", c.MountLabel()); err != nil {
-		return nil, 0, err
-	}
-
-	// Setting CheckpointLog early in case there is a failure.
-	c.state.CheckpointLog = path.Join(c.bundlePath(), "dump.log")
-	c.state.CheckpointPath = c.CheckpointPath()
-
-	runtimeCheckpointDuration, err := c.ociRuntime.CheckpointContainer(c, options)
-	if err != nil {
-		return nil, 0, err
-	}
-
-	// Keep the content of /dev/shm directory
-	if c.config.ShmDir != "" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
-		shmDirTarFileFullPath := filepath.Join(c.bundlePath(), metadata.DevShmCheckpointTar)
-
-		shmDirTarFile, err := os.Create(shmDirTarFileFullPath)
-		if err != nil {
-			return nil, 0, err
-		}
-		defer shmDirTarFile.Close()
-
-		input, err := archive.TarWithOptions(c.config.ShmDir, &archive.TarOptions{
-			Compression:      archive.Uncompressed,
-			IncludeSourceDir: true,
-		})
-		if err != nil {
-			return nil, 0, err
-		}
-
-		if _, err = io.Copy(shmDirTarFile, input); err != nil {
-			return nil, 0, err
-		}
-	}
-
-	// Save network.status. This is needed to restore the container with
-	// the same IP. Currently limited to one IP address in a container
-	// with one interface.
-	// FIXME: will this break something?
-	if _, err := metadata.WriteJSONFile(c.getNetworkStatus(), c.bundlePath(), metadata.NetworkStatusFile); err != nil {
-		return nil, 0, err
-	}
-
-	defer c.newContainerEvent(events.Checkpoint)
-
-	// There is a bug from criu: https://github.com/checkpoint-restore/criu/issues/116
-	// We have to change the symbolic link from absolute path to relative path
-	if options.WithPrevious {
-		os.Remove(path.Join(c.CheckpointPath(), "parent"))
-		if err := os.Symlink("../pre-checkpoint", path.Join(c.CheckpointPath(), "parent")); err != nil {
-			return nil, 0, err
-		}
-	}
-
-	if options.TargetFile != "" {
-		if err := c.exportCheckpoint(options); err != nil {
-			return nil, 0, err
-		}
-	} else {
-		if err := c.createCheckpointImage(ctx, options); err != nil {
-			return nil, 0, err
-		}
-	}
-
-	logrus.Debugf("Checkpointed container %s", c.ID())
-
-	if !options.KeepRunning && !options.PreCheckPoint {
-		c.state.State = define.ContainerStateStopped
-		c.state.Checkpointed = true
-		c.state.CheckpointedTime = time.Now()
-		c.state.Restored = false
-		c.state.RestoredTime = time.Time{}
-
-		// Clean up Storage and Network
-		if err := c.cleanup(ctx); err != nil {
-			return nil, 0, err
-		}
-	}
-
-	criuStatistics, err := func() (*define.CRIUCheckpointRestoreStatistics, error) {
-		if !options.PrintStats {
-			return nil, nil
-		}
-		statsDirectory, err := os.Open(c.bundlePath())
-		if err != nil {
-			return nil, fmt.Errorf("not able to open %q: %w", c.bundlePath(), err)
-		}
-
-		dumpStatistics, err := stats.CriuGetDumpStats(statsDirectory)
-		if err != nil {
-			return nil, fmt.Errorf("displaying checkpointing statistics not possible: %w", err)
-		}
-
-		return &define.CRIUCheckpointRestoreStatistics{
-			FreezingTime: dumpStatistics.GetFreezingTime(),
-			FrozenTime:   dumpStatistics.GetFrozenTime(),
-			MemdumpTime:  dumpStatistics.GetMemdumpTime(),
-			MemwriteTime: dumpStatistics.GetMemwriteTime(),
-			PagesScanned: dumpStatistics.GetPagesScanned(),
-			PagesWritten: dumpStatistics.GetPagesWritten(),
-		}, nil
-	}()
-	if err != nil {
-		return nil, 0, err
-	}
-
-	if !options.Keep && !options.PreCheckPoint {
-		cleanup := []string{
-			"dump.log",
-			stats.StatsDump,
-			metadata.ConfigDumpFile,
-			metadata.SpecDumpFile,
-		}
-		for _, del := range cleanup {
-			file := filepath.Join(c.bundlePath(), del)
-			if err := os.Remove(file); err != nil {
-				logrus.Debugf("Unable to remove file %s", file)
-			}
-		}
-		// The file has been deleted. Do not mention it.
-		c.state.CheckpointLog = ""
-	}
-
-	c.state.FinishedTime = time.Now()
-	return criuStatistics, runtimeCheckpointDuration, c.save()
-}
-
-func (c *Container) generateContainerSpec() error {
-	// Make sure the newly created config.json exists on disk
-
-	// NewFromSpec() is deprecated according to its comment
-	// however the recommended replace just causes a nil map panic
-	//nolint:staticcheck
-	g := generate.NewFromSpec(c.config.Spec)
-
-	if err := c.saveSpec(g.Config); err != nil {
-		return fmt.Errorf("saving imported container specification for restore failed: %w", err)
-	}
-
-	return nil
-}
-
-func (c *Container) importCheckpointImage(ctx context.Context, imageID string) error {
-	img, _, err := c.Runtime().LibimageRuntime().LookupImage(imageID, nil)
-	if err != nil {
-		return err
-	}
-
-	mountPoint, err := img.Mount(ctx, nil, "")
-	defer func() {
-		if err := c.unmount(true); err != nil {
-			logrus.Errorf("Failed to unmount container: %v", err)
-		}
-	}()
-	if err != nil {
-		return err
-	}
-
-	// Import all checkpoint files except ConfigDumpFile and SpecDumpFile. We
-	// generate new container config files to enable to specifying a new
-	// container name.
-	checkpoint := []string{
-		"artifacts",
-		metadata.CheckpointDirectory,
-		metadata.CheckpointVolumesDirectory,
-		metadata.DevShmCheckpointTar,
-		metadata.RootFsDiffTar,
-		metadata.DeletedFilesFile,
-		metadata.PodOptionsFile,
-		metadata.PodDumpFile,
-	}
-
-	for _, name := range checkpoint {
-		src := filepath.Join(mountPoint, name)
-		dst := filepath.Join(c.bundlePath(), name)
-		if err := archive.NewDefaultArchiver().CopyWithTar(src, dst); err != nil {
-			logrus.Debugf("Can't import '%s' from checkpoint image", name)
-		}
-	}
-
-	return c.generateContainerSpec()
-}
-
-func (c *Container) importCheckpointTar(input string) error {
-	if err := crutils.CRImportCheckpointWithoutConfig(c.bundlePath(), input); err != nil {
-		return err
-	}
-
-	return c.generateContainerSpec()
-}
-
-func (c *Container) importPreCheckpoint(input string) error {
-	archiveFile, err := os.Open(input)
-	if err != nil {
-		return fmt.Errorf("failed to open pre-checkpoint archive for import: %w", err)
-	}
-
-	defer archiveFile.Close()
-
-	err = archive.Untar(archiveFile, c.bundlePath(), nil)
-	if err != nil {
-		return fmt.Errorf("unpacking of pre-checkpoint archive %s failed: %w", input, err)
-	}
-	return nil
-}
-
-func (c *Container) restore(ctx context.Context, options ContainerCheckpointOptions) (criuStatistics *define.CRIUCheckpointRestoreStatistics, runtimeRestoreDuration int64, retErr error) {
-	minCriuVersion := func() int {
-		if options.Pod == "" {
-			return criu.MinCriuVersion
-		}
-		return criu.PodCriuVersion
-	}()
-	if err := c.checkpointRestoreSupported(minCriuVersion); err != nil {
-		return nil, 0, err
-	}
-
-	if options.Pod != "" && !crutils.CRRuntimeSupportsPodCheckpointRestore(c.ociRuntime.Path()) {
-		return nil, 0, fmt.Errorf("runtime %s does not support pod restore", c.ociRuntime.Path())
-	}
-
-	if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) {
-		return nil, 0, fmt.Errorf("container %s is running or paused, cannot restore: %w", c.ID(), define.ErrCtrStateInvalid)
-	}
-
-	if options.ImportPrevious != "" {
-		if err := c.importPreCheckpoint(options.ImportPrevious); err != nil {
-			return nil, 0, err
-		}
-	}
-
-	if options.TargetFile != "" {
-		if err := c.importCheckpointTar(options.TargetFile); err != nil {
-			return nil, 0, err
-		}
-	} else if options.CheckpointImageID != "" {
-		if err := c.importCheckpointImage(ctx, options.CheckpointImageID); err != nil {
-			return nil, 0, err
-		}
-	}
-
-	// Let's try to stat() CRIU's inventory file. If it does not exist, it makes
-	// no sense to try a restore. This is a minimal check if a checkpoint exist.
-	if _, err := os.Stat(filepath.Join(c.CheckpointPath(), "inventory.img")); os.IsNotExist(err) {
-		return nil, 0, fmt.Errorf("a complete checkpoint for this container cannot be found, cannot restore: %w", err)
-	}
-
-	if err := crutils.CRCreateFileWithLabel(c.bundlePath(), "restore.log", c.MountLabel()); err != nil {
-		return nil, 0, err
-	}
-
-	// Setting RestoreLog early in case there is a failure.
-	c.state.RestoreLog = path.Join(c.bundlePath(), "restore.log")
-	c.state.CheckpointPath = c.CheckpointPath()
-
-	// Read network configuration from checkpoint
-	var netStatus map[string]types.StatusBlock
-	_, err := metadata.ReadJSONFile(&netStatus, c.bundlePath(), metadata.NetworkStatusFile)
-	if err != nil {
-		logrus.Infof("Failed to unmarshal network status, cannot restore the same ip/mac: %v", err)
-	}
-	// If the restored container should get a new name, the IP address of
-	// the container will not be restored. This assumes that if a new name is
-	// specified, the container is restored multiple times.
-	// TODO: This implicit restoring with or without IP depending on an
-	//       unrelated restore parameter (--name) does not seem like the
-	//       best solution.
-	if err == nil && options.Name == "" && (!options.IgnoreStaticIP || !options.IgnoreStaticMAC) {
-		// The file with the network.status does exist. Let's restore the
-		// container with the same networks settings as during checkpointing.
-		networkOpts, err := c.networks()
-		if err != nil {
-			return nil, 0, err
-		}
-
-		netOpts := make(map[string]types.PerNetworkOptions, len(netStatus))
-		for network, perNetOpts := range networkOpts {
-			// unset mac and ips before we start adding the ones from the status
-			perNetOpts.StaticMAC = nil
-			perNetOpts.StaticIPs = nil
-			for name, netInt := range netStatus[network].Interfaces {
-				perNetOpts.InterfaceName = name
-				if !options.IgnoreStaticIP {
-					perNetOpts.StaticMAC = netInt.MacAddress
-				}
-				if !options.IgnoreStaticIP {
-					for _, netAddress := range netInt.Subnets {
-						perNetOpts.StaticIPs = append(perNetOpts.StaticIPs, netAddress.IPNet.IP)
-					}
-				}
-				// Normally interfaces have a length of 1, only for some special cni configs we could get more.
-				// For now just use the first interface to get the ips this should be good enough for most cases.
-				break
-			}
-			netOpts[network] = perNetOpts
-		}
-		c.perNetworkOpts = netOpts
-	}
-
-	defer func() {
-		if retErr != nil {
-			if err := c.cleanup(ctx); err != nil {
-				logrus.Errorf("Cleaning up container %s: %v", c.ID(), err)
-			}
-		}
-	}()
-
-	if err := c.prepare(); err != nil {
-		return nil, 0, err
-	}
-
-	// Read config
-	jsonPath := filepath.Join(c.bundlePath(), "config.json")
-	logrus.Debugf("generate.NewFromFile at %v", jsonPath)
-	g, err := generate.NewFromFile(jsonPath)
-	if err != nil {
-		logrus.Debugf("generate.NewFromFile failed with %v", err)
-		return nil, 0, err
-	}
-
-	// Restoring from an import means that we are doing migration
-	if options.TargetFile != "" || options.CheckpointImageID != "" {
-		g.SetRootPath(c.state.Mountpoint)
-	}
-
-	// We want to have the same network namespace as before.
-	if c.config.CreateNetNS {
-		netNSPath := ""
-		if !c.config.PostConfigureNetNS {
-			netNSPath = c.state.NetNS.Path()
-		}
-
-		if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), netNSPath); err != nil {
-			return nil, 0, err
-		}
-	}
-
-	if options.Pod != "" {
-		// Running in a Pod means that we have to change all namespace settings to
-		// the ones from the infrastructure container.
-		pod, err := c.runtime.LookupPod(options.Pod)
-		if err != nil {
-			return nil, 0, fmt.Errorf("pod %q cannot be retrieved: %w", options.Pod, err)
-		}
-
-		infraContainer, err := pod.InfraContainer()
-		if err != nil {
-			return nil, 0, fmt.Errorf("cannot retrieved infra container from pod %q: %w", options.Pod, err)
-		}
-
-		infraContainer.lock.Lock()
-		if err := infraContainer.syncContainer(); err != nil {
-			infraContainer.lock.Unlock()
-			return nil, 0, fmt.Errorf("error syncing infrastructure container %s status: %w", infraContainer.ID(), err)
-		}
-		if infraContainer.state.State != define.ContainerStateRunning {
-			if err := infraContainer.initAndStart(ctx); err != nil {
-				infraContainer.lock.Unlock()
-				return nil, 0, fmt.Errorf("error starting infrastructure container %s status: %w", infraContainer.ID(), err)
-			}
-		}
-		infraContainer.lock.Unlock()
-
-		if c.config.IPCNsCtr != "" {
-			nsPath, err := infraContainer.namespacePath(IPCNS)
-			if err != nil {
-				return nil, 0, fmt.Errorf("cannot retrieve IPC namespace path for Pod %q: %w", options.Pod, err)
-			}
-			if err := g.AddOrReplaceLinuxNamespace(string(spec.IPCNamespace), nsPath); err != nil {
-				return nil, 0, err
-			}
-		}
-
-		if c.config.NetNsCtr != "" {
-			nsPath, err := infraContainer.namespacePath(NetNS)
-			if err != nil {
-				return nil, 0, fmt.Errorf("cannot retrieve network namespace path for Pod %q: %w", options.Pod, err)
-			}
-			if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), nsPath); err != nil {
-				return nil, 0, err
-			}
-		}
-
-		if c.config.PIDNsCtr != "" {
-			nsPath, err := infraContainer.namespacePath(PIDNS)
-			if err != nil {
-				return nil, 0, fmt.Errorf("cannot retrieve PID namespace path for Pod %q: %w", options.Pod, err)
-			}
-			if err := g.AddOrReplaceLinuxNamespace(string(spec.PIDNamespace), nsPath); err != nil {
-				return nil, 0, err
-			}
-		}
-
-		if c.config.UTSNsCtr != "" {
-			nsPath, err := infraContainer.namespacePath(UTSNS)
-			if err != nil {
-				return nil, 0, fmt.Errorf("cannot retrieve UTS namespace path for Pod %q: %w", options.Pod, err)
-			}
-			if err := g.AddOrReplaceLinuxNamespace(string(spec.UTSNamespace), nsPath); err != nil {
-				return nil, 0, err
-			}
-		}
-
-		if c.config.CgroupNsCtr != "" {
-			nsPath, err := infraContainer.namespacePath(CgroupNS)
-			if err != nil {
-				return nil, 0, fmt.Errorf("cannot retrieve Cgroup namespace path for Pod %q: %w", options.Pod, err)
-			}
-			if err := g.AddOrReplaceLinuxNamespace(string(spec.CgroupNamespace), nsPath); err != nil {
-				return nil, 0, err
-			}
-		}
-	}
-
-	if err := c.makeBindMounts(); err != nil {
-		return nil, 0, err
-	}
-
-	if options.TargetFile != "" || options.CheckpointImageID != "" {
-		for dstPath, srcPath := range c.state.BindMounts {
-			newMount := spec.Mount{
-				Type:        "bind",
-				Source:      srcPath,
-				Destination: dstPath,
-				Options:     []string{"bind", "private"},
-			}
-			if c.IsReadOnly() && dstPath != "/dev/shm" {
-				newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev")
-			}
-			if dstPath == "/dev/shm" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
-				newMount.Options = append(newMount.Options, "nosuid", "noexec", "nodev")
-			}
-			if !MountExists(g.Mounts(), dstPath) {
-				g.AddMount(newMount)
-			}
-		}
-	}
-
-	// Restore /dev/shm content
-	if c.config.ShmDir != "" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
-		shmDirTarFileFullPath := filepath.Join(c.bundlePath(), metadata.DevShmCheckpointTar)
-		if _, err := os.Stat(shmDirTarFileFullPath); err != nil {
-			logrus.Debug("Container checkpoint doesn't contain dev/shm: ", err.Error())
-		} else {
-			shmDirTarFile, err := os.Open(shmDirTarFileFullPath)
-			if err != nil {
-				return nil, 0, err
-			}
-			defer shmDirTarFile.Close()
-
-			if err := archive.UntarUncompressed(shmDirTarFile, c.config.ShmDir, nil); err != nil {
-				return nil, 0, err
-			}
-		}
-	}
-
-	// Cleanup for a working restore.
-	if err := c.removeConmonFiles(); err != nil {
-		return nil, 0, err
-	}
-
-	// Save the OCI spec to disk
-	if err := c.saveSpec(g.Config); err != nil {
-		return nil, 0, err
-	}
-
-	// When restoring from an imported archive, allow restoring the content of volumes.
-	// Volumes are created in setupContainer()
-	if !options.IgnoreVolumes && (options.TargetFile != "" || options.CheckpointImageID != "") {
-		for _, v := range c.config.NamedVolumes {
-			volumeFilePath := filepath.Join(c.bundlePath(), metadata.CheckpointVolumesDirectory, v.Name+".tar")
-
-			volumeFile, err := os.Open(volumeFilePath)
-			if err != nil {
-				return nil, 0, fmt.Errorf("failed to open volume file %s: %w", volumeFilePath, err)
-			}
-			defer volumeFile.Close()
-
-			volume, err := c.runtime.GetVolume(v.Name)
-			if err != nil {
-				return nil, 0, fmt.Errorf("failed to retrieve volume %s: %w", v.Name, err)
-			}
-
-			mountPoint, err := volume.MountPoint()
-			if err != nil {
-				return nil, 0, err
-			}
-			if mountPoint == "" {
-				return nil, 0, fmt.Errorf("unable to import volume %s as it is not mounted: %w", volume.Name(), err)
-			}
-			if err := archive.UntarUncompressed(volumeFile, mountPoint, nil); err != nil {
-				return nil, 0, fmt.Errorf("failed to extract volume %s to %s: %w", volumeFilePath, mountPoint, err)
-			}
-		}
-	}
-
-	// Before actually restarting the container, apply the root file-system changes
-	if !options.IgnoreRootfs {
-		if err := crutils.CRApplyRootFsDiffTar(c.bundlePath(), c.state.Mountpoint); err != nil {
-			return nil, 0, err
-		}
-
-		if err := crutils.CRRemoveDeletedFiles(c.ID(), c.bundlePath(), c.state.Mountpoint); err != nil {
-			return nil, 0, err
-		}
-	}
-
-	runtimeRestoreDuration, err = c.ociRuntime.CreateContainer(c, &options)
-	if err != nil {
-		return nil, 0, err
-	}
-
-	criuStatistics, err = func() (*define.CRIUCheckpointRestoreStatistics, error) {
-		if !options.PrintStats {
-			return nil, nil
-		}
-		statsDirectory, err := os.Open(c.bundlePath())
-		if err != nil {
-			return nil, fmt.Errorf("not able to open %q: %w", c.bundlePath(), err)
-		}
-
-		restoreStatistics, err := stats.CriuGetRestoreStats(statsDirectory)
-		if err != nil {
-			return nil, fmt.Errorf("displaying restore statistics not possible: %w", err)
-		}
-
-		return &define.CRIUCheckpointRestoreStatistics{
-			PagesCompared:   restoreStatistics.GetPagesCompared(),
-			PagesSkippedCow: restoreStatistics.GetPagesSkippedCow(),
-			ForkingTime:     restoreStatistics.GetForkingTime(),
-			RestoreTime:     restoreStatistics.GetRestoreTime(),
-			PagesRestored:   restoreStatistics.GetPagesRestored(),
-		}, nil
-	}()
-	if err != nil {
-		return nil, 0, err
-	}
-
-	logrus.Debugf("Restored container %s", c.ID())
-
-	c.state.State = define.ContainerStateRunning
-	c.state.Checkpointed = false
-	c.state.Restored = true
-	c.state.CheckpointedTime = time.Time{}
-	c.state.RestoredTime = time.Now()
-
-	if !options.Keep {
-		// Delete all checkpoint related files. At this point, in theory, all files
-		// should exist. Still ignoring errors for now as the container should be
-		// restored and running. Not erroring out just because some cleanup operation
-		// failed. Starting with the checkpoint directory
-		err = os.RemoveAll(c.CheckpointPath())
-		if err != nil {
-			logrus.Debugf("Non-fatal: removal of checkpoint directory (%s) failed: %v", c.CheckpointPath(), err)
-		}
-		c.state.CheckpointPath = ""
-		err = os.RemoveAll(c.PreCheckPointPath())
-		if err != nil {
-			logrus.Debugf("Non-fatal: removal of pre-checkpoint directory (%s) failed: %v", c.PreCheckPointPath(), err)
-		}
-		err = os.RemoveAll(c.CheckpointVolumesPath())
-		if err != nil {
-			logrus.Debugf("Non-fatal: removal of checkpoint volumes directory (%s) failed: %v", c.CheckpointVolumesPath(), err)
-		}
-		cleanup := [...]string{
-			"restore.log",
-			"dump.log",
-			stats.StatsDump,
-			stats.StatsRestore,
-			metadata.DevShmCheckpointTar,
-			metadata.NetworkStatusFile,
-			metadata.RootFsDiffTar,
-			metadata.DeletedFilesFile,
-		}
-		for _, del := range cleanup {
-			file := filepath.Join(c.bundlePath(), del)
-			err = os.Remove(file)
-			if err != nil {
-				logrus.Debugf("Non-fatal: removal of checkpoint file (%s) failed: %v", file, err)
-			}
-		}
-		c.state.CheckpointLog = ""
-		c.state.RestoreLog = ""
-	}
-
-	return criuStatistics, runtimeRestoreDuration, c.save()
-}
-
-// Retrieves a container's "root" net namespace container dependency.
-func (c *Container) getRootNetNsDepCtr() (depCtr *Container, err error) {
-	containersVisited := map[string]int{c.config.ID: 1}
-	nextCtr := c.config.NetNsCtr
-	for nextCtr != "" {
-		// Make sure we aren't in a loop
-		if _, visited := containersVisited[nextCtr]; visited {
-			return nil, errors.New("loop encountered while determining net namespace container")
-		}
-		containersVisited[nextCtr] = 1
-
-		depCtr, err = c.runtime.state.Container(nextCtr)
-		if err != nil {
-			return nil, fmt.Errorf("error fetching dependency %s of container %s: %w", c.config.NetNsCtr, c.ID(), err)
-		}
-		// This should never happen without an error
-		if depCtr == nil {
-			break
-		}
-		nextCtr = depCtr.config.NetNsCtr
-	}
-
-	if depCtr == nil {
-		return nil, errors.New("unexpected error depCtr is nil without reported error from runtime state")
-	}
-	return depCtr, nil
-}
-
-// Ensure standard bind mounts are mounted into all root directories (including chroot directories)
-func (c *Container) mountIntoRootDirs(mountName string, mountPath string) error {
-	c.state.BindMounts[mountName] = mountPath
-
-	for _, chrootDir := range c.config.ChrootDirs {
-		c.state.BindMounts[filepath.Join(chrootDir, mountName)] = mountPath
-	}
-
-	return nil
-}
-
-// Make standard bind mounts to include in the container
-func (c *Container) makeBindMounts() error {
-	if err := os.Chown(c.state.RunDir, c.RootUID(), c.RootGID()); err != nil {
-		return fmt.Errorf("cannot chown run directory: %w", err)
-	}
-
-	if c.state.BindMounts == nil {
-		c.state.BindMounts = make(map[string]string)
-	}
-	netDisabled, err := c.NetworkDisabled()
-	if err != nil {
-		return err
-	}
-
-	if !netDisabled {
-		// If /etc/resolv.conf and /etc/hosts exist, delete them so we
-		// will recreate. Only do this if we aren't sharing them with
-		// another container.
-		if c.config.NetNsCtr == "" {
-			if resolvePath, ok := c.state.BindMounts["/etc/resolv.conf"]; ok {
-				if err := os.Remove(resolvePath); err != nil && !os.IsNotExist(err) {
-					return fmt.Errorf("container %s: %w", c.ID(), err)
-				}
-				delete(c.state.BindMounts, "/etc/resolv.conf")
-			}
-			if hostsPath, ok := c.state.BindMounts["/etc/hosts"]; ok {
-				if err := os.Remove(hostsPath); err != nil && !os.IsNotExist(err) {
-					return fmt.Errorf("container %s: %w", c.ID(), err)
-				}
-				delete(c.state.BindMounts, "/etc/hosts")
-			}
-		}
-
-		if c.config.NetNsCtr != "" && (!c.config.UseImageResolvConf || !c.config.UseImageHosts) {
-			// We share a net namespace.
-			// We want /etc/resolv.conf and /etc/hosts from the
-			// other container. Unless we're not creating both of
-			// them.
-			depCtr, err := c.getRootNetNsDepCtr()
-			if err != nil {
-				return fmt.Errorf("error fetching network namespace dependency container for container %s: %w", c.ID(), err)
-			}
-
-			// We need that container's bind mounts
-			bindMounts, err := depCtr.BindMounts()
-			if err != nil {
-				return fmt.Errorf("error fetching bind mounts from dependency %s of container %s: %w", depCtr.ID(), c.ID(), err)
-			}
-
-			// The other container may not have a resolv.conf or /etc/hosts
-			// If it doesn't, don't copy them
-			resolvPath, exists := bindMounts["/etc/resolv.conf"]
-			if !c.config.UseImageResolvConf && exists {
-				err := c.mountIntoRootDirs("/etc/resolv.conf", resolvPath)
-
-				if err != nil {
-					return fmt.Errorf("error assigning mounts to container %s: %w", c.ID(), err)
-				}
-			}
-
-			// check if dependency container has an /etc/hosts file.
-			// It may not have one, so only use it if it does.
-			hostsPath, exists := bindMounts[config.DefaultHostsFile]
-			if !c.config.UseImageHosts && exists {
-				// we cannot use the dependency container lock due ABBA deadlocks in cleanup()
-				lock, err := lockfile.GetLockfile(hostsPath)
-				if err != nil {
-					return fmt.Errorf("failed to lock hosts file: %w", err)
-				}
-				lock.Lock()
-
-				// add the newly added container to the hosts file
-				// we always use 127.0.0.1 as ip since they have the same netns
-				err = etchosts.Add(hostsPath, getLocalhostHostEntry(c))
-				lock.Unlock()
-				if err != nil {
-					return fmt.Errorf("error creating hosts file for container %s which depends on container %s: %w", c.ID(), depCtr.ID(), err)
-				}
-
-				// finally, save it in the new container
-				err = c.mountIntoRootDirs(config.DefaultHostsFile, hostsPath)
-				if err != nil {
-					return fmt.Errorf("error assigning mounts to container %s: %w", c.ID(), err)
-				}
-			}
-
-			if !hasCurrentUserMapped(c) {
-				if err := makeAccessible(resolvPath, c.RootUID(), c.RootGID()); err != nil {
-					return err
-				}
-				if err := makeAccessible(hostsPath, c.RootUID(), c.RootGID()); err != nil {
-					return err
-				}
-			}
-		} else {
-			if !c.config.UseImageResolvConf {
-				if err := c.generateResolvConf(); err != nil {
-					return fmt.Errorf("error creating resolv.conf for container %s: %w", c.ID(), err)
-				}
-			}
-
-			if !c.config.UseImageHosts {
-				if err := c.createHosts(); err != nil {
-					return fmt.Errorf("error creating hosts file for container %s: %w", c.ID(), err)
-				}
-			}
-		}
-
-		if c.state.BindMounts["/etc/hosts"] != "" {
-			if err := c.relabel(c.state.BindMounts["/etc/hosts"], c.config.MountLabel, true); err != nil {
-				return err
-			}
-		}
-
-		if c.state.BindMounts["/etc/resolv.conf"] != "" {
-			if err := c.relabel(c.state.BindMounts["/etc/resolv.conf"], c.config.MountLabel, true); err != nil {
-				return err
-			}
-		}
-	} else if !c.config.UseImageHosts && c.state.BindMounts["/etc/hosts"] == "" {
-		if err := c.createHosts(); err != nil {
-			return fmt.Errorf("error creating hosts file for container %s: %w", c.ID(), err)
-		}
-	}
-
-	if c.config.ShmDir != "" {
-		// If ShmDir has a value SHM is always added when we mount the container
-		c.state.BindMounts["/dev/shm"] = c.config.ShmDir
-	}
-
-	if c.config.Passwd == nil || *c.config.Passwd {
-		newPasswd, newGroup, err := c.generatePasswdAndGroup()
-		if err != nil {
-			return fmt.Errorf("error creating temporary passwd file for container %s: %w", c.ID(), err)
-		}
-		if newPasswd != "" {
-			// Make /etc/passwd
-			// If it already exists, delete so we can recreate
-			delete(c.state.BindMounts, "/etc/passwd")
-			c.state.BindMounts["/etc/passwd"] = newPasswd
-		}
-		if newGroup != "" {
-			// Make /etc/group
-			// If it already exists, delete so we can recreate
-			delete(c.state.BindMounts, "/etc/group")
-			c.state.BindMounts["/etc/group"] = newGroup
-		}
-	}
-
-	// Make /etc/hostname
-	// This should never change, so no need to recreate if it exists
-	if _, ok := c.state.BindMounts["/etc/hostname"]; !ok {
-		hostnamePath, err := c.writeStringToRundir("hostname", c.Hostname())
-		if err != nil {
-			return fmt.Errorf("error creating hostname file for container %s: %w", c.ID(), err)
-		}
-		c.state.BindMounts["/etc/hostname"] = hostnamePath
-	}
-
-	// Make /etc/localtime
-	ctrTimezone := c.Timezone()
-	if ctrTimezone != "" {
-		// validate the format of the timezone specified if it's not "local"
-		if ctrTimezone != "local" {
-			_, err = time.LoadLocation(ctrTimezone)
-			if err != nil {
-				return fmt.Errorf("error finding timezone for container %s: %w", c.ID(), err)
-			}
-		}
-		if _, ok := c.state.BindMounts["/etc/localtime"]; !ok {
-			var zonePath string
-			if ctrTimezone == "local" {
-				zonePath, err = filepath.EvalSymlinks("/etc/localtime")
-				if err != nil {
-					return fmt.Errorf("error finding local timezone for container %s: %w", c.ID(), err)
-				}
-			} else {
-				zone := filepath.Join("/usr/share/zoneinfo", ctrTimezone)
-				zonePath, err = filepath.EvalSymlinks(zone)
-				if err != nil {
-					return fmt.Errorf("error setting timezone for container %s: %w", c.ID(), err)
-				}
-			}
-			localtimePath, err := c.copyTimezoneFile(zonePath)
-			if err != nil {
-				return fmt.Errorf("error setting timezone for container %s: %w", c.ID(), err)
-			}
-			c.state.BindMounts["/etc/localtime"] = localtimePath
-		}
-	}
-
-	_, hasRunContainerenv := c.state.BindMounts["/run/.containerenv"]
-	if !hasRunContainerenv {
-		// check in the spec mounts
-		for _, m := range c.config.Spec.Mounts {
-			if m.Destination == "/run/.containerenv" || m.Destination == "/run" {
-				hasRunContainerenv = true
-				break
-			}
-		}
-	}
-
-	// Make .containerenv if it does not exist
-	if !hasRunContainerenv {
-		containerenv := c.runtime.graphRootMountedFlag(c.config.Spec.Mounts)
-		isRootless := 0
-		if rootless.IsRootless() {
-			isRootless = 1
-		}
-		imageID, imageName := c.Image()
-
-		if c.Privileged() {
-			// Populate the .containerenv with container information
-			containerenv = fmt.Sprintf(`engine="podman-%s"
-name=%q
-id=%q
-image=%q
-imageid=%q
-rootless=%d
-%s`, version.Version.String(), c.Name(), c.ID(), imageName, imageID, isRootless, containerenv)
-		}
-		containerenvPath, err := c.writeStringToRundir(".containerenv", containerenv)
-		if err != nil {
-			return fmt.Errorf("error creating containerenv file for container %s: %w", c.ID(), err)
-		}
-		c.state.BindMounts["/run/.containerenv"] = containerenvPath
-	}
-
-	// Add Subscription Mounts
-	subscriptionMounts := subscriptions.MountsWithUIDGID(c.config.MountLabel, c.state.RunDir, c.runtime.config.Containers.DefaultMountsFile, c.state.Mountpoint, c.RootUID(), c.RootGID(), rootless.IsRootless(), false)
-	for _, mount := range subscriptionMounts {
-		if _, ok := c.state.BindMounts[mount.Destination]; !ok {
-			c.state.BindMounts[mount.Destination] = mount.Source
-		}
-	}
-
-	// Secrets are mounted by getting the secret data from the secrets manager,
-	// copying the data into the container's static dir,
-	// then mounting the copied dir into /run/secrets.
-	// The secrets mounting must come after subscription mounts, since subscription mounts
-	// creates the /run/secrets dir in the container where we mount as well.
-	if len(c.Secrets()) > 0 {
-		// create /run/secrets if subscriptions did not create
-		if err := c.createSecretMountDir(); err != nil {
-			return fmt.Errorf("error creating secrets mount: %w", err)
-		}
-		for _, secret := range c.Secrets() {
-			secretFileName := secret.Name
-			base := "/run/secrets"
-			if secret.Target != "" {
-				secretFileName = secret.Target
-				// If absolute path for target given remove base.
-				if filepath.IsAbs(secretFileName) {
-					base = ""
-				}
-			}
-			src := filepath.Join(c.config.SecretsPath, secret.Name)
-			dest := filepath.Join(base, secretFileName)
-			c.state.BindMounts[dest] = src
-		}
-	}
-
-	return nil
-}
-
-// generateResolvConf generates a containers resolv.conf
-func (c *Container) generateResolvConf() error {
-	var (
-		networkNameServers   []string
-		networkSearchDomains []string
-	)
-
-	netStatus := c.getNetworkStatus()
-	for _, status := range netStatus {
-		if status.DNSServerIPs != nil {
-			for _, nsIP := range status.DNSServerIPs {
-				networkNameServers = append(networkNameServers, nsIP.String())
-			}
-			logrus.Debugf("Adding nameserver(s) from network status of '%q'", status.DNSServerIPs)
-		}
-		if status.DNSSearchDomains != nil {
-			networkSearchDomains = append(networkSearchDomains, status.DNSSearchDomains...)
-			logrus.Debugf("Adding search domain(s) from network status of '%q'", status.DNSSearchDomains)
-		}
-	}
-
-	ipv6, err := c.checkForIPv6(netStatus)
-	if err != nil {
-		return err
-	}
-
-	nameservers := make([]string, 0, len(c.runtime.config.Containers.DNSServers)+len(c.config.DNSServer))
-	nameservers = append(nameservers, c.runtime.config.Containers.DNSServers...)
-	for _, ip := range c.config.DNSServer {
-		nameservers = append(nameservers, ip.String())
-	}
-	// If the user provided dns, it trumps all; then dns masq; then resolv.conf
-	var search []string
-	keepHostServers := false
-	if len(nameservers) == 0 {
-		keepHostServers = true
-		// first add the nameservers from the networks status
-		nameservers = networkNameServers
-		// when we add network dns server we also have to add the search domains
-		search = networkSearchDomains
-		// slirp4netns has a built in DNS forwarder.
-		if c.config.NetMode.IsSlirp4netns() {
-			slirp4netnsDNS, err := GetSlirp4netnsDNS(c.slirp4netnsSubnet)
-			if err != nil {
-				logrus.Warn("Failed to determine Slirp4netns DNS: ", err.Error())
-			} else {
-				nameservers = append(nameservers, slirp4netnsDNS.String())
-			}
-		}
-	}
-
-	if len(c.config.DNSSearch) > 0 || len(c.runtime.config.Containers.DNSSearches) > 0 {
-		customSearch := make([]string, 0, len(c.config.DNSSearch)+len(c.runtime.config.Containers.DNSSearches))
-		customSearch = append(customSearch, c.runtime.config.Containers.DNSSearches...)
-		customSearch = append(customSearch, c.config.DNSSearch...)
-		search = customSearch
-	}
-
-	options := make([]string, 0, len(c.config.DNSOption)+len(c.runtime.config.Containers.DNSOptions))
-	options = append(options, c.runtime.config.Containers.DNSOptions...)
-	options = append(options, c.config.DNSOption...)
-
-	destPath := filepath.Join(c.state.RunDir, "resolv.conf")
-
-	if err := resolvconf.New(&resolvconf.Params{
-		IPv6Enabled:     ipv6,
-		KeepHostServers: keepHostServers,
-		Nameservers:     nameservers,
-		Namespaces:      c.config.Spec.Linux.Namespaces,
-		Options:         options,
-		Path:            destPath,
-		Searches:        search,
-	}); err != nil {
-		return fmt.Errorf("error building resolv.conf for container %s: %w", c.ID(), err)
-	}
-
-	return c.bindMountRootFile(destPath, resolvconf.DefaultResolvConf)
-}
-
-// Check if a container uses IPv6.
-func (c *Container) checkForIPv6(netStatus map[string]types.StatusBlock) (bool, error) {
-	for _, status := range netStatus {
-		for _, netInt := range status.Interfaces {
-			for _, netAddress := range netInt.Subnets {
-				// Note: only using To16() does not work since it also returns a valid ip for ipv4
-				if netAddress.IPNet.IP.To4() == nil && netAddress.IPNet.IP.To16() != nil {
-					return true, nil
-				}
-			}
-		}
-	}
-
-	if c.config.NetMode.IsSlirp4netns() {
-		ctrNetworkSlipOpts := []string{}
-		if c.config.NetworkOptions != nil {
-			ctrNetworkSlipOpts = append(ctrNetworkSlipOpts, c.config.NetworkOptions["slirp4netns"]...)
-		}
-		slirpOpts, err := parseSlirp4netnsNetworkOptions(c.runtime, ctrNetworkSlipOpts)
-		if err != nil {
-			return false, err
-		}
-		return slirpOpts.enableIPv6, nil
-	}
-
-	return false, nil
-}
-
-// Add a new nameserver to the container's resolv.conf, ensuring that it is the
-// first nameserver present.
-// Usable only with running containers.
-func (c *Container) addNameserver(ips []string) error {
-	// Take no action if container is not running.
-	if !c.ensureState(define.ContainerStateRunning, define.ContainerStateCreated) {
-		return nil
-	}
-
-	// Do we have a resolv.conf at all?
-	path, ok := c.state.BindMounts[resolvconf.DefaultResolvConf]
-	if !ok {
-		return nil
-	}
-
-	if err := resolvconf.Add(path, ips); err != nil {
-		return fmt.Errorf("adding new nameserver to container %s resolv.conf: %w", c.ID(), err)
-	}
-
-	return nil
-}
-
-// Remove an entry from the existing resolv.conf of the container.
-// Usable only with running containers.
-func (c *Container) removeNameserver(ips []string) error {
-	// Take no action if container is not running.
-	if !c.ensureState(define.ContainerStateRunning, define.ContainerStateCreated) {
-		return nil
-	}
-
-	// Do we have a resolv.conf at all?
-	path, ok := c.state.BindMounts[resolvconf.DefaultResolvConf]
-	if !ok {
-		return nil
-	}
-
-	if err := resolvconf.Remove(path, ips); err != nil {
-		return fmt.Errorf("removing nameservers from container %s resolv.conf: %w", c.ID(), err)
-	}
-
-	return nil
-}
-
-func getLocalhostHostEntry(c *Container) etchosts.HostEntries {
-	return etchosts.HostEntries{{IP: "127.0.0.1", Names: []string{c.Hostname(), c.config.Name}}}
-}
-
-// getHostsEntries returns the container ip host entries for the correct netmode
-func (c *Container) getHostsEntries() (etchosts.HostEntries, error) {
-	var entries etchosts.HostEntries
-	names := []string{c.Hostname(), c.config.Name}
-	switch {
-	case c.config.NetMode.IsBridge():
-		entries = etchosts.GetNetworkHostEntries(c.state.NetworkStatus, names...)
-	case c.config.NetMode.IsSlirp4netns():
-		ip, err := GetSlirp4netnsIP(c.slirp4netnsSubnet)
-		if err != nil {
-			return nil, err
-		}
-		entries = etchosts.HostEntries{{IP: ip.String(), Names: names}}
-	default:
-		// check for net=none
-		if !c.config.CreateNetNS {
-			for _, ns := range c.config.Spec.Linux.Namespaces {
-				if ns.Type == spec.NetworkNamespace {
-					if ns.Path == "" {
-						entries = etchosts.HostEntries{{IP: "127.0.0.1", Names: names}}
-					}
-					break
-				}
-			}
-		}
-	}
-	return entries, nil
-}
-
-func (c *Container) createHosts() error {
-	var containerIPsEntries etchosts.HostEntries
-	var err error
-	// if we configure the netns after the container create we should not add
-	// the hosts here since we have no information about the actual ips
-	// instead we will add them in c.completeNetworkSetup()
-	if !c.config.PostConfigureNetNS {
-		containerIPsEntries, err = c.getHostsEntries()
-		if err != nil {
-			return fmt.Errorf("failed to get container ip host entries: %w", err)
-		}
-	}
-	baseHostFile, err := etchosts.GetBaseHostFile(c.runtime.config.Containers.BaseHostsFile, c.state.Mountpoint)
-	if err != nil {
-		return err
-	}
-
-	targetFile := filepath.Join(c.state.RunDir, "hosts")
-	err = etchosts.New(&etchosts.Params{
-		BaseFile:                 baseHostFile,
-		ExtraHosts:               c.config.HostAdd,
-		ContainerIPs:             containerIPsEntries,
-		HostContainersInternalIP: etchosts.GetHostContainersInternalIP(c.runtime.config, c.state.NetworkStatus, c.runtime.network),
-		TargetFile:               targetFile,
-	})
-	if err != nil {
-		return err
-	}
-
-	return c.bindMountRootFile(targetFile, config.DefaultHostsFile)
-}
-
-// bindMountRootFile will chown and relabel the source file to make it usable in the container.
-// It will also add the path to the container bind mount map.
-// source is the path on the host, dest is the path in the container.
-func (c *Container) bindMountRootFile(source, dest string) error {
-	if err := os.Chown(source, c.RootUID(), c.RootGID()); err != nil {
-		return err
-	}
-	if err := label.Relabel(source, c.MountLabel(), false); err != nil {
-		return err
-	}
-
-	return c.mountIntoRootDirs(dest, source)
-}
-
-// generateGroupEntry generates an entry or entries into /etc/group as
-// required by container configuration.
-// Generally speaking, we will make an entry under two circumstances:
-// 1. The container is started as a specific user:group, and that group is both
-//    numeric, and does not already exist in /etc/group.
-// 2. It is requested that Libpod add the group that launched Podman to
-//    /etc/group via AddCurrentUserPasswdEntry (though this does not trigger if
-//    the group in question already exists in /etc/passwd).
-// Returns group entry (as a string that can be appended to /etc/group) and any
-// error that occurred.
-func (c *Container) generateGroupEntry() (string, error) {
-	groupString := ""
-
-	// Things we *can't* handle: adding the user we added in
-	// generatePasswdEntry to any *existing* groups.
-	addedGID := 0
-	if c.config.AddCurrentUserPasswdEntry {
-		entry, gid, err := c.generateCurrentUserGroupEntry()
-		if err != nil {
-			return "", err
-		}
-		groupString += entry
-		addedGID = gid
-	}
-	if c.config.User != "" {
-		entry, err := c.generateUserGroupEntry(addedGID)
-		if err != nil {
-			return "", err
-		}
-		groupString += entry
-	}
-
-	return groupString, nil
-}
-
-// Make an entry in /etc/group for the group of the user running podman iff we
-// are rootless.
-func (c *Container) generateCurrentUserGroupEntry() (string, int, error) {
-	gid := rootless.GetRootlessGID()
-	if gid == 0 {
-		return "", 0, nil
-	}
-
-	g, err := user.LookupGroupId(strconv.Itoa(gid))
-	if err != nil {
-		return "", 0, fmt.Errorf("failed to get current group: %w", err)
-	}
-
-	// Look up group name to see if it exists in the image.
-	_, err = lookup.GetGroup(c.state.Mountpoint, g.Name)
-	if err != runcuser.ErrNoGroupEntries {
-		return "", 0, err
-	}
-
-	// Look up GID to see if it exists in the image.
-	_, err = lookup.GetGroup(c.state.Mountpoint, g.Gid)
-	if err != runcuser.ErrNoGroupEntries {
-		return "", 0, err
-	}
-
-	// We need to get the username of the rootless user so we can add it to
-	// the group.
-	username := ""
-	uid := rootless.GetRootlessUID()
-	if uid != 0 {
-		u, err := user.LookupId(strconv.Itoa(uid))
-		if err != nil {
-			return "", 0, fmt.Errorf("failed to get current user to make group entry: %w", err)
-		}
-		username = u.Username
-	}
-
-	// Make the entry.
-	return fmt.Sprintf("%s:x:%s:%s\n", g.Name, g.Gid, username), gid, nil
-}
-
-// Make an entry in /etc/group for the group the container was specified to run
-// as.
-func (c *Container) generateUserGroupEntry(addedGID int) (string, error) {
-	if c.config.User == "" {
-		return "", nil
-	}
-
-	splitUser := strings.SplitN(c.config.User, ":", 2)
-	group := splitUser[0]
-	if len(splitUser) > 1 {
-		group = splitUser[1]
-	}
-
-	gid, err := strconv.ParseUint(group, 10, 32)
-	if err != nil {
-		return "", nil //nolint: nilerr
-	}
-
-	if addedGID != 0 && addedGID == int(gid) {
-		return "", nil
-	}
-
-	// Check if the group already exists
-	_, err = lookup.GetGroup(c.state.Mountpoint, group)
-	if err != runcuser.ErrNoGroupEntries {
-		return "", err
-	}
-
-	return fmt.Sprintf("%d:x:%d:%s\n", gid, gid, splitUser[0]), nil
-}
-
-// generatePasswdEntry generates an entry or entries into /etc/passwd as
-// required by container configuration.
-// Generally speaking, we will make an entry under two circumstances:
-// 1. The container is started as a specific user who is not in /etc/passwd.
-//    This only triggers if the user is given as a *numeric* ID.
-// 2. It is requested that Libpod add the user that launched Podman to
-//    /etc/passwd via AddCurrentUserPasswdEntry (though this does not trigger if
-//    the user in question already exists in /etc/passwd) or the UID to be added
-//    is 0).
-// 3. The user specified additional host user accounts to add the the /etc/passwd file
-// Returns password entry (as a string that can be appended to /etc/passwd) and
-// any error that occurred.
-func (c *Container) generatePasswdEntry() (string, error) {
-	passwdString := ""
-
-	addedUID := 0
-	for _, userid := range c.config.HostUsers {
-		// Look up User on host
-		u, err := util.LookupUser(userid)
-		if err != nil {
-			return "", err
-		}
-		entry, err := c.userPasswdEntry(u)
-		if err != nil {
-			return "", err
-		}
-		passwdString += entry
-	}
-	if c.config.AddCurrentUserPasswdEntry {
-		entry, uid, _, err := c.generateCurrentUserPasswdEntry()
-		if err != nil {
-			return "", err
-		}
-		passwdString += entry
-		addedUID = uid
-	}
-	if c.config.User != "" {
-		entry, err := c.generateUserPasswdEntry(addedUID)
-		if err != nil {
-			return "", err
-		}
-		passwdString += entry
-	}
-
-	return passwdString, nil
-}
-
-// generateCurrentUserPasswdEntry generates an /etc/passwd entry for the user
-// running the container engine.
-// Returns a passwd entry for the user, and the UID and GID of the added entry.
-func (c *Container) generateCurrentUserPasswdEntry() (string, int, int, error) {
-	uid := rootless.GetRootlessUID()
-	if uid == 0 {
-		return "", 0, 0, nil
-	}
-
-	u, err := user.LookupId(strconv.Itoa(uid))
-	if err != nil {
-		return "", 0, 0, fmt.Errorf("failed to get current user: %w", err)
-	}
-	pwd, err := c.userPasswdEntry(u)
-	if err != nil {
-		return "", 0, 0, err
-	}
-
-	return pwd, uid, rootless.GetRootlessGID(), nil
-}
-
-func (c *Container) userPasswdEntry(u *user.User) (string, error) {
-	// Look up the user to see if it exists in the container image.
-	_, err := lookup.GetUser(c.state.Mountpoint, u.Username)
-	if err != runcuser.ErrNoPasswdEntries {
-		return "", err
-	}
-
-	// Look up the UID to see if it exists in the container image.
-	_, err = lookup.GetUser(c.state.Mountpoint, u.Uid)
-	if err != runcuser.ErrNoPasswdEntries {
-		return "", err
-	}
-
-	// If the user's actual home directory exists, or was mounted in - use
-	// that.
-	homeDir := c.WorkingDir()
-	hDir := u.HomeDir
-	for hDir != "/" {
-		if MountExists(c.config.Spec.Mounts, hDir) {
-			homeDir = u.HomeDir
-			break
-		}
-		hDir = filepath.Dir(hDir)
-	}
-	if homeDir != u.HomeDir {
-		for _, hDir := range c.UserVolumes() {
-			if hDir == u.HomeDir {
-				homeDir = u.HomeDir
-				break
-			}
-		}
-	}
-	// Set HOME environment if not already set
-	hasHomeSet := false
-	for _, s := range c.config.Spec.Process.Env {
-		if strings.HasPrefix(s, "HOME=") {
-			hasHomeSet = true
-			break
-		}
-	}
-	if !hasHomeSet {
-		c.config.Spec.Process.Env = append(c.config.Spec.Process.Env, fmt.Sprintf("HOME=%s", homeDir))
-	}
-	if c.config.PasswdEntry != "" {
-		return c.passwdEntry(u.Username, u.Uid, u.Gid, u.Name, homeDir), nil
-	}
-
-	return fmt.Sprintf("%s:*:%s:%s:%s:%s:/bin/sh\n", u.Username, u.Uid, u.Gid, u.Name, homeDir), nil
-}
-
-// generateUserPasswdEntry generates an /etc/passwd entry for the container user
-// to run in the container.
-// The UID and GID of the added entry will also be returned.
-// Accepts one argument, that being any UID that has already been added to the
-// passwd file by other functions; if it matches the UID we were given, we don't
-// need to do anything.
-func (c *Container) generateUserPasswdEntry(addedUID int) (string, error) {
-	var (
-		groupspec string
-		gid       int
-	)
-	if c.config.User == "" {
-		return "", nil
-	}
-	splitSpec := strings.SplitN(c.config.User, ":", 2)
-	userspec := splitSpec[0]
-	if len(splitSpec) > 1 {
-		groupspec = splitSpec[1]
-	}
-	// If a non numeric User, then don't generate passwd
-	uid, err := strconv.ParseUint(userspec, 10, 32)
-	if err != nil {
-		return "", nil //nolint: nilerr
-	}
-
-	if addedUID != 0 && int(uid) == addedUID {
-		return "", nil
-	}
-
-	// Look up the user to see if it exists in the container image
-	_, err = lookup.GetUser(c.state.Mountpoint, userspec)
-	if err != runcuser.ErrNoPasswdEntries {
-		return "", err
-	}
-
-	if groupspec != "" {
-		ugid, err := strconv.ParseUint(groupspec, 10, 32)
-		if err == nil {
-			gid = int(ugid)
-		} else {
-			group, err := lookup.GetGroup(c.state.Mountpoint, groupspec)
-			if err != nil {
-				return "", fmt.Errorf("unable to get gid %s from group file: %w", groupspec, err)
-			}
-			gid = group.Gid
-		}
-	}
-
-	if c.config.PasswdEntry != "" {
-		entry := c.passwdEntry(fmt.Sprintf("%d", uid), fmt.Sprintf("%d", uid), fmt.Sprintf("%d", gid), "container user", c.WorkingDir())
-		return entry, nil
-	}
-
-	return fmt.Sprintf("%d:*:%d:%d:container user:%s:/bin/sh\n", uid, uid, gid, c.WorkingDir()), nil
-}
-
-func (c *Container) passwdEntry(username string, uid, gid, name, homeDir string) string {
-	s := c.config.PasswdEntry
-	s = strings.ReplaceAll(s, "$USERNAME", username)
-	s = strings.ReplaceAll(s, "$UID", uid)
-	s = strings.ReplaceAll(s, "$GID", gid)
-	s = strings.ReplaceAll(s, "$NAME", name)
-	s = strings.ReplaceAll(s, "$HOME", homeDir)
-	return s + "\n"
-}
-
-// generatePasswdAndGroup generates container-specific passwd and group files
-// iff g.config.User is a number or we are configured to make a passwd entry for
-// the current user or the user specified HostsUsers
-// Returns path to file to mount at /etc/passwd, path to file to mount at
-// /etc/group, and any error that occurred. If no passwd/group file were
-// required, the empty string will be returned for those path (this may occur
-// even if no error happened).
-// This may modify the mounted container's /etc/passwd and /etc/group instead of
-// making copies to bind-mount in, so we don't break useradd (it wants to make a
-// copy of /etc/passwd and rename the copy to /etc/passwd, which is impossible
-// with a bind mount). This is done in cases where the container is *not*
-// read-only. In this case, the function will return nothing ("", "", nil).
-func (c *Container) generatePasswdAndGroup() (string, string, error) {
-	if !c.config.AddCurrentUserPasswdEntry && c.config.User == "" &&
-		len(c.config.HostUsers) == 0 {
-		return "", "", nil
-	}
-
-	needPasswd := true
-	needGroup := true
-
-	// First, check if there's a mount at /etc/passwd or group, we don't
-	// want to interfere with user mounts.
-	if MountExists(c.config.Spec.Mounts, "/etc/passwd") {
-		needPasswd = false
-	}
-	if MountExists(c.config.Spec.Mounts, "/etc/group") {
-		needGroup = false
-	}
-
-	// Next, check if we already made the files. If we didn't, don't need to
-	// do anything more.
-	if needPasswd {
-		passwdPath := filepath.Join(c.config.StaticDir, "passwd")
-		if _, err := os.Stat(passwdPath); err == nil {
-			needPasswd = false
-		}
-	}
-	if needGroup {
-		groupPath := filepath.Join(c.config.StaticDir, "group")
-		if _, err := os.Stat(groupPath); err == nil {
-			needGroup = false
-		}
-	}
-
-	// If we don't need a /etc/passwd or /etc/group at this point we can
-	// just return.
-	if !needPasswd && !needGroup {
-		return "", "", nil
-	}
-
-	passwdPath := ""
-	groupPath := ""
-
-	ro := c.IsReadOnly()
-
-	if needPasswd {
-		passwdEntry, err := c.generatePasswdEntry()
-		if err != nil {
-			return "", "", err
-		}
-
-		needsWrite := passwdEntry != ""
-		switch {
-		case ro && needsWrite:
-			logrus.Debugf("Making /etc/passwd for container %s", c.ID())
-			originPasswdFile, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/passwd")
-			if err != nil {
-				return "", "", fmt.Errorf("error creating path to container %s /etc/passwd: %w", c.ID(), err)
-			}
-			orig, err := ioutil.ReadFile(originPasswdFile)
-			if err != nil && !os.IsNotExist(err) {
-				return "", "", err
-			}
-			passwdFile, err := c.writeStringToStaticDir("passwd", string(orig)+passwdEntry)
-			if err != nil {
-				return "", "", fmt.Errorf("failed to create temporary passwd file: %w", err)
-			}
-			if err := os.Chmod(passwdFile, 0644); err != nil {
-				return "", "", err
-			}
-			passwdPath = passwdFile
-		case !ro && needsWrite:
-			logrus.Debugf("Modifying container %s /etc/passwd", c.ID())
-			containerPasswd, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/passwd")
-			if err != nil {
-				return "", "", fmt.Errorf("error looking up location of container %s /etc/passwd: %w", c.ID(), err)
-			}
-
-			f, err := os.OpenFile(containerPasswd, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0600)
-			if err != nil {
-				return "", "", fmt.Errorf("container %s: %w", c.ID(), err)
-			}
-			defer f.Close()
-
-			if _, err := f.WriteString(passwdEntry); err != nil {
-				return "", "", fmt.Errorf("unable to append to container %s /etc/passwd: %w", c.ID(), err)
-			}
-		default:
-			logrus.Debugf("Not modifying container %s /etc/passwd", c.ID())
-		}
-	}
-	if needGroup {
-		groupEntry, err := c.generateGroupEntry()
-		if err != nil {
-			return "", "", err
-		}
-
-		needsWrite := groupEntry != ""
-		switch {
-		case ro && needsWrite:
-			logrus.Debugf("Making /etc/group for container %s", c.ID())
-			originGroupFile, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/group")
-			if err != nil {
-				return "", "", fmt.Errorf("error creating path to container %s /etc/group: %w", c.ID(), err)
-			}
-			orig, err := ioutil.ReadFile(originGroupFile)
-			if err != nil && !os.IsNotExist(err) {
-				return "", "", err
-			}
-			groupFile, err := c.writeStringToStaticDir("group", string(orig)+groupEntry)
-			if err != nil {
-				return "", "", fmt.Errorf("failed to create temporary group file: %w", err)
-			}
-			if err := os.Chmod(groupFile, 0644); err != nil {
-				return "", "", err
-			}
-			groupPath = groupFile
-		case !ro && needsWrite:
-			logrus.Debugf("Modifying container %s /etc/group", c.ID())
-			containerGroup, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/group")
-			if err != nil {
-				return "", "", fmt.Errorf("error looking up location of container %s /etc/group: %w", c.ID(), err)
-			}
-
-			f, err := os.OpenFile(containerGroup, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0600)
-			if err != nil {
-				return "", "", fmt.Errorf("container %s: %w", c.ID(), err)
-			}
-			defer f.Close()
-
-			if _, err := f.WriteString(groupEntry); err != nil {
-				return "", "", fmt.Errorf("unable to append to container %s /etc/group: %w", c.ID(), err)
-			}
-		default:
-			logrus.Debugf("Not modifying container %s /etc/group", c.ID())
-		}
-	}
-
-	return passwdPath, groupPath, nil
-}
-
 func isRootlessCgroupSet(cgroup string) bool {
 	// old versions of podman were setting the CgroupParent to CgroupfsDefaultCgroupParent
 	// by default.  Avoid breaking these versions and check whether the cgroup parent is
@@ -3058,198 +398,257 @@ func (c *Container) getOCICgroupPath() (string, error) {
 	}
 }
 
-func (c *Container) copyTimezoneFile(zonePath string) (string, error) {
-	localtimeCopy := filepath.Join(c.state.RunDir, "localtime")
-	file, err := os.Stat(zonePath)
-	if err != nil {
-		return "", err
-	}
-	if file.IsDir() {
-		return "", errors.New("invalid timezone: is a directory")
-	}
-	src, err := os.Open(zonePath)
-	if err != nil {
-		return "", err
-	}
-	defer src.Close()
-	dest, err := os.Create(localtimeCopy)
-	if err != nil {
-		return "", err
-	}
-	defer dest.Close()
-	_, err = io.Copy(dest, src)
-	if err != nil {
-		return "", err
-	}
-	if err := c.relabel(localtimeCopy, c.config.MountLabel, false); err != nil {
-		return "", err
+// If the container is rootless, set up the slirp4netns network
+func (c *Container) setupRootlessNetwork() error {
+	// set up slirp4netns again because slirp4netns will die when conmon exits
+	if c.config.NetMode.IsSlirp4netns() {
+		err := c.runtime.setupSlirp4netns(c, c.state.NetNS)
+		if err != nil {
+			return err
+		}
 	}
-	if err := dest.Chown(c.RootUID(), c.RootGID()); err != nil {
-		return "", err
+
+	// set up rootlesskit port forwarder again since it dies when conmon exits
+	// we use rootlesskit port forwarder only as rootless and when bridge network is used
+	if rootless.IsRootless() && c.config.NetMode.IsBridge() && len(c.config.PortMappings) > 0 {
+		err := c.runtime.setupRootlessPortMappingViaRLK(c, c.state.NetNS.Path(), c.state.NetworkStatus)
+		if err != nil {
+			return err
+		}
 	}
-	return localtimeCopy, err
+	return nil
 }
 
-func (c *Container) cleanupOverlayMounts() error {
-	return overlay.CleanupContent(c.config.StaticDir)
+func openDirectory(path string) (fd int, err error) {
+	return unix.Open(path, unix.O_RDONLY|unix.O_PATH, 0)
 }
 
-// Creates and mounts an empty dir to mount secrets into, if it does not already exist
-func (c *Container) createSecretMountDir() error {
-	src := filepath.Join(c.state.RunDir, "/run/secrets")
-	_, err := os.Stat(src)
-	if os.IsNotExist(err) {
-		oldUmask := umask.Set(0)
-		defer umask.Set(oldUmask)
+func (c *Container) addNetworkNamespace(g *generate.Generator) error {
+	if c.config.CreateNetNS {
+		if c.config.PostConfigureNetNS {
+			if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), ""); err != nil {
+				return err
+			}
+		} else {
+			if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), c.state.NetNS.Path()); err != nil {
+				return err
+			}
+		}
+	}
+	return nil
+}
+
+func (c *Container) addSystemdMounts(g *generate.Generator) error {
+	if c.Systemd() {
+		if err := c.setupSystemd(g.Mounts(), *g); err != nil {
+			return fmt.Errorf("error adding systemd-specific mounts: %w", err)
+		}
+	}
+	return nil
+}
 
-		if err := os.MkdirAll(src, 0755); err != nil {
+func (c *Container) addSharedNamespaces(g *generate.Generator) error {
+	if c.config.IPCNsCtr != "" {
+		if err := c.addNamespaceContainer(g, IPCNS, c.config.IPCNsCtr, spec.IPCNamespace); err != nil {
 			return err
 		}
-		if err := label.Relabel(src, c.config.MountLabel, false); err != nil {
+	}
+	if c.config.MountNsCtr != "" {
+		if err := c.addNamespaceContainer(g, MountNS, c.config.MountNsCtr, spec.MountNamespace); err != nil {
 			return err
 		}
-		if err := os.Chown(src, c.RootUID(), c.RootGID()); err != nil {
+	}
+	if c.config.NetNsCtr != "" {
+		if err := c.addNamespaceContainer(g, NetNS, c.config.NetNsCtr, spec.NetworkNamespace); err != nil {
 			return err
 		}
-		c.state.BindMounts["/run/secrets"] = src
-		return nil
+	}
+	if c.config.PIDNsCtr != "" {
+		if err := c.addNamespaceContainer(g, PIDNS, c.config.PIDNsCtr, spec.PIDNamespace); err != nil {
+			return err
+		}
+	}
+	if c.config.UserNsCtr != "" {
+		if err := c.addNamespaceContainer(g, UserNS, c.config.UserNsCtr, spec.UserNamespace); err != nil {
+			return err
+		}
+		if len(g.Config.Linux.UIDMappings) == 0 {
+			// runc complains if no mapping is specified, even if we join another ns.  So provide a dummy mapping
+			g.AddLinuxUIDMapping(uint32(0), uint32(0), uint32(1))
+			g.AddLinuxGIDMapping(uint32(0), uint32(0), uint32(1))
+		}
 	}
 
-	return err
-}
-
-// Fix ownership and permissions of the specified volume if necessary.
-func (c *Container) fixVolumePermissions(v *ContainerNamedVolume) error {
-	vol, err := c.runtime.state.Volume(v.Name)
+	availableUIDs, availableGIDs, err := rootless.GetAvailableIDMaps()
 	if err != nil {
-		return fmt.Errorf("error retrieving named volume %s for container %s: %w", v.Name, c.ID(), err)
+		if os.IsNotExist(err) {
+			// The kernel-provided files only exist if user namespaces are supported
+			logrus.Debugf("User or group ID mappings not available: %s", err)
+		} else {
+			return err
+		}
+	} else {
+		g.Config.Linux.UIDMappings = rootless.MaybeSplitMappings(g.Config.Linux.UIDMappings, availableUIDs)
+		g.Config.Linux.GIDMappings = rootless.MaybeSplitMappings(g.Config.Linux.GIDMappings, availableGIDs)
 	}
 
-	vol.lock.Lock()
-	defer vol.lock.Unlock()
+	// Hostname handling:
+	// If we have a UTS namespace, set Hostname in the OCI spec.
+	// Set the HOSTNAME environment variable unless explicitly overridden by
+	// the user (already present in OCI spec). If we don't have a UTS ns,
+	// set it to the host's hostname instead.
+	hostname := c.Hostname()
+	foundUTS := false
 
-	// The volume may need a copy-up. Check the state.
-	if err := vol.update(); err != nil {
-		return err
+	for _, i := range c.config.Spec.Linux.Namespaces {
+		if i.Type == spec.UTSNamespace && i.Path == "" {
+			foundUTS = true
+			g.SetHostname(hostname)
+			break
+		}
 	}
-
-	// Volumes owned by a volume driver are not chowned - we don't want to
-	// mess with a mount not managed by us.
-	if vol.state.NeedsChown && !vol.UsesVolumeDriver() {
-		vol.state.NeedsChown = false
-
-		uid := int(c.config.Spec.Process.User.UID)
-		gid := int(c.config.Spec.Process.User.GID)
-
-		if c.config.IDMappings.UIDMap != nil {
-			p := idtools.IDPair{
-				UID: uid,
-				GID: gid,
-			}
-			mappings := idtools.NewIDMappingsFromMaps(c.config.IDMappings.UIDMap, c.config.IDMappings.GIDMap)
-			newPair, err := mappings.ToHost(p)
-			if err != nil {
-				return fmt.Errorf("error mapping user %d:%d: %w", uid, gid, err)
-			}
-			uid = newPair.UID
-			gid = newPair.GID
+	if !foundUTS {
+		tmpHostname, err := os.Hostname()
+		if err != nil {
+			return err
 		}
+		hostname = tmpHostname
+	}
+	needEnv := true
+	for _, checkEnv := range g.Config.Process.Env {
+		if strings.SplitN(checkEnv, "=", 2)[0] == "HOSTNAME" {
+			needEnv = false
+			break
+		}
+	}
+	if needEnv {
+		g.AddProcessEnv("HOSTNAME", hostname)
+	}
 
-		vol.state.UIDChowned = uid
-		vol.state.GIDChowned = gid
-
-		if err := vol.save(); err != nil {
+	if c.config.UTSNsCtr != "" {
+		if err := c.addNamespaceContainer(g, UTSNS, c.config.UTSNsCtr, spec.UTSNamespace); err != nil {
 			return err
 		}
-
-		mountPoint, err := vol.MountPoint()
-		if err != nil {
+	}
+	if c.config.CgroupNsCtr != "" {
+		if err := c.addNamespaceContainer(g, CgroupNS, c.config.CgroupNsCtr, spec.CgroupNamespace); err != nil {
 			return err
 		}
+	}
 
-		if err := os.Lchown(mountPoint, uid, gid); err != nil {
+	if c.config.UserNsCtr == "" && c.config.IDMappings.AutoUserNs {
+		if err := g.AddOrReplaceLinuxNamespace(string(spec.UserNamespace), ""); err != nil {
 			return err
 		}
+		g.ClearLinuxUIDMappings()
+		for _, uidmap := range c.config.IDMappings.UIDMap {
+			g.AddLinuxUIDMapping(uint32(uidmap.HostID), uint32(uidmap.ContainerID), uint32(uidmap.Size))
+		}
+		g.ClearLinuxGIDMappings()
+		for _, gidmap := range c.config.IDMappings.GIDMap {
+			g.AddLinuxGIDMapping(uint32(gidmap.HostID), uint32(gidmap.ContainerID), uint32(gidmap.Size))
+		}
+	}
+	return nil
+}
 
-		// Make sure the new volume matches the permissions of the target directory.
-		// https://github.com/containers/podman/issues/10188
-		st, err := os.Lstat(filepath.Join(c.state.Mountpoint, v.Dest))
-		if err == nil {
-			if stat, ok := st.Sys().(*syscall.Stat_t); ok {
-				if err := os.Lchown(mountPoint, int(stat.Uid), int(stat.Gid)); err != nil {
-					return err
+func (c *Container) addRootPropagation(g *generate.Generator, mounts []spec.Mount) error {
+	// Determine property of RootPropagation based on volume properties. If
+	// a volume is shared, then keep root propagation shared. This should
+	// work for slave and private volumes too.
+	//
+	// For slave volumes, it can be either [r]shared/[r]slave.
+	//
+	// For private volumes any root propagation value should work.
+	rootPropagation := ""
+	for _, m := range mounts {
+		for _, opt := range m.Options {
+			switch opt {
+			case MountShared, MountRShared:
+				if rootPropagation != MountShared && rootPropagation != MountRShared {
+					rootPropagation = MountShared
+				}
+			case MountSlave, MountRSlave:
+				if rootPropagation != MountShared && rootPropagation != MountRShared && rootPropagation != MountSlave && rootPropagation != MountRSlave {
+					rootPropagation = MountRSlave
 				}
 			}
-			if err := os.Chmod(mountPoint, st.Mode()); err != nil {
-				return err
-			}
-			stat := st.Sys().(*syscall.Stat_t)
-			atime := time.Unix(int64(stat.Atim.Sec), int64(stat.Atim.Nsec)) //nolint: unconvert
-			if err := os.Chtimes(mountPoint, atime, st.ModTime()); err != nil {
-				return err
-			}
-		} else if !os.IsNotExist(err) {
+		}
+	}
+	if rootPropagation != "" {
+		logrus.Debugf("Set root propagation to %q", rootPropagation)
+		if err := g.SetLinuxRootPropagation(rootPropagation); err != nil {
 			return err
 		}
 	}
 	return nil
 }
 
-func (c *Container) relabel(src, mountLabel string, recurse bool) error {
-	if !selinux.GetEnabled() || mountLabel == "" {
-		return nil
-	}
-	// only relabel on initial creation of container
-	if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateUnknown) {
-		label, err := label.FileLabel(src)
-		if err != nil {
-			return err
-		}
-		// If labels are different, might be on a tmpfs
-		if label == mountLabel {
-			return nil
-		}
+func (c *Container) setProcessLabel(g *generate.Generator) {
+	g.SetProcessSelinuxLabel(c.ProcessLabel())
+}
+
+func (c *Container) setMountLabel(g *generate.Generator) {
+	g.SetLinuxMountLabel(c.MountLabel())
+}
+
+func (c *Container) setCgroupsPath(g *generate.Generator) error {
+	cgroupPath, err := c.getOCICgroupPath()
+	if err != nil {
+		return err
 	}
-	return label.Relabel(src, mountLabel, recurse)
+	g.SetLinuxCgroupsPath(cgroupPath)
+	return nil
 }
 
-func (c *Container) ChangeHostPathOwnership(src string, recurse bool, uid, gid int) error {
-	// only chown on initial creation of container
-	if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateUnknown) {
-		st, err := os.Stat(src)
+func (c *Container) addSlirp4netnsDNS(nameservers []string) []string {
+	// slirp4netns has a built in DNS forwarder.
+	if c.config.NetMode.IsSlirp4netns() {
+		slirp4netnsDNS, err := GetSlirp4netnsDNS(c.slirp4netnsSubnet)
 		if err != nil {
-			return err
-		}
-
-		// If labels are different, might be on a tmpfs
-		if int(st.Sys().(*syscall.Stat_t).Uid) == uid && int(st.Sys().(*syscall.Stat_t).Gid) == gid {
-			return nil
+			logrus.Warn("Failed to determine Slirp4netns DNS: ", err.Error())
+		} else {
+			nameservers = append(nameservers, slirp4netnsDNS.String())
 		}
 	}
-	return chown.ChangeHostPathOwnership(src, recurse, uid, gid)
+	return nameservers
 }
 
-// If the container is rootless, set up the slirp4netns network
-func (c *Container) setupRootlessNetwork() error {
-	// set up slirp4netns again because slirp4netns will die when conmon exits
+func (c *Container) isSlirp4netnsIPv6() (bool, error) {
 	if c.config.NetMode.IsSlirp4netns() {
-		err := c.runtime.setupSlirp4netns(c, c.state.NetNS)
+		ctrNetworkSlipOpts := []string{}
+		if c.config.NetworkOptions != nil {
+			ctrNetworkSlipOpts = append(ctrNetworkSlipOpts, c.config.NetworkOptions["slirp4netns"]...)
+		}
+		slirpOpts, err := parseSlirp4netnsNetworkOptions(c.runtime, ctrNetworkSlipOpts)
 		if err != nil {
-			return err
+			return false, err
 		}
+		return slirpOpts.enableIPv6, nil
 	}
 
-	// set up rootlesskit port forwarder again since it dies when conmon exits
-	// we use rootlesskit port forwarder only as rootless and when bridge network is used
-	if rootless.IsRootless() && c.config.NetMode.IsBridge() && len(c.config.PortMappings) > 0 {
-		err := c.runtime.setupRootlessPortMappingViaRLK(c, c.state.NetNS.Path(), c.state.NetworkStatus)
-		if err != nil {
-			return err
+	return false, nil
+}
+
+// check for net=none
+func (c *Container) hasNetNone() bool {
+	if !c.config.CreateNetNS {
+		for _, ns := range c.config.Spec.Linux.Namespaces {
+			if ns.Type == spec.NetworkNamespace {
+				if ns.Path == "" {
+					return true
+				}
+			}
 		}
 	}
-	return nil
+	return false
 }
 
-func openDirectory(path string) (fd int, err error) {
-	return unix.Open(path, unix.O_RDONLY|unix.O_PATH, 0)
+func setVolumeAtime(mountPoint string, st os.FileInfo) error {
+	stat := st.Sys().(*syscall.Stat_t)
+	atime := time.Unix(int64(stat.Atim.Sec), int64(stat.Atim.Nsec)) //nolint: unconvert
+	if err := os.Chtimes(mountPoint, atime, st.ModTime()); err != nil {
+		return err
+	}
+	return nil
 }
diff --git a/libpod/container_internal_unsupported.go b/libpod/container_internal_unsupported.go
index 074aeee47..1967c577b 100644
--- a/libpod/container_internal_unsupported.go
+++ b/libpod/container_internal_unsupported.go
@@ -1,5 +1,5 @@
-//go:build !linux
-// +build !linux
+//go:build !linux && !freebsd
+// +build !linux,!freebsd
 
 package libpod
 
diff --git a/libpod/container_linux.go b/libpod/container_linux.go
index 8b517e69f..9c17a1966 100644
--- a/libpod/container_linux.go
+++ b/libpod/container_linux.go
@@ -5,6 +5,7 @@ package libpod
 
 import (
 	"github.com/containernetworking/plugins/pkg/ns"
+	spec "github.com/opencontainers/runtime-spec/specs-go"
 )
 
 type containerPlatformState struct {
@@ -13,3 +14,17 @@ type containerPlatformState struct {
 	// told to join another container's network namespace
 	NetNS ns.NetNS `json:"-"`
 }
+
+func networkDisabled(c *Container) (bool, error) {
+	if c.config.CreateNetNS {
+		return false, nil
+	}
+	if !c.config.PostConfigureNetNS {
+		for _, ns := range c.config.Spec.Linux.Namespaces {
+			if ns.Type == spec.NetworkNamespace {
+				return ns.Path == "", nil
+			}
+		}
+	}
+	return false, nil
+}
diff --git a/libpod/define/config.go b/libpod/define/config.go
index 34c1a675d..1fad5cc9a 100644
--- a/libpod/define/config.go
+++ b/libpod/define/config.go
@@ -85,4 +85,4 @@ const PassthroughLogging = "passthrough"
 const RLimitDefaultValue = uint64(1048576)
 
 // BindMountPrefix distinguishes its annotations from others
-const BindMountPrefix = "bind-mount-options:"
+const BindMountPrefix = "bind-mount-options"
diff --git a/libpod/define/mount.go b/libpod/define/mount.go
index 1b0d019c8..db444fd83 100644
--- a/libpod/define/mount.go
+++ b/libpod/define/mount.go
@@ -1,8 +1,6 @@
 package define
 
 const (
-	// TypeBind is the type for mounting host dir
-	TypeBind = "bind"
 	// TypeVolume is the type for named volumes
 	TypeVolume = "volume"
 	// TypeTmpfs is the type for mounting tmpfs
diff --git a/libpod/define/mount_freebsd.go b/libpod/define/mount_freebsd.go
new file mode 100644
index 000000000..e080c9ec6
--- /dev/null
+++ b/libpod/define/mount_freebsd.go
@@ -0,0 +1,8 @@
+//go:build freebsd
+
+package define
+
+const (
+	// TypeBind is the type for mounting host dir
+	TypeBind = "nullfs"
+)
diff --git a/libpod/define/mount_linux.go b/libpod/define/mount_linux.go
new file mode 100644
index 000000000..5ef848905
--- /dev/null
+++ b/libpod/define/mount_linux.go
@@ -0,0 +1,8 @@
+//go:build linux
+
+package define
+
+const (
+	// TypeBind is the type for mounting host dir
+	TypeBind = "bind"
+)
diff --git a/libpod/define/mount_unsupported.go b/libpod/define/mount_unsupported.go
new file mode 100644
index 000000000..cb8642fe2
--- /dev/null
+++ b/libpod/define/mount_unsupported.go
@@ -0,0 +1,8 @@
+//go:build !linux && !freebsd
+
+package define
+
+const (
+	// TypeBind is the type for mounting host dir
+	TypeBind = "bind"
+)
diff --git a/libpod/events.go b/libpod/events.go
index c9e4c9d26..60142cb60 100644
--- a/libpod/events.go
+++ b/libpod/events.go
@@ -55,6 +55,12 @@ func (c *Container) newContainerExitedEvent(exitCode int32) {
 	e.Image = c.config.RootfsImageName
 	e.Type = events.Container
 	e.ContainerExitCode = int(exitCode)
+
+	e.Details = events.Details{
+		ID:         e.ID,
+		Attributes: c.Labels(),
+	}
+
 	if err := c.runtime.eventer.Write(e); err != nil {
 		logrus.Errorf("Unable to write container exited event: %q", err)
 	}
@@ -70,6 +76,12 @@ func (c *Container) newExecDiedEvent(sessionID string, exitCode int) {
 	e.ContainerExitCode = exitCode
 	e.Attributes = make(map[string]string)
 	e.Attributes["execID"] = sessionID
+
+	e.Details = events.Details{
+		ID:         e.ID,
+		Attributes: c.Labels(),
+	}
+
 	if err := c.runtime.eventer.Write(e); err != nil {
 		logrus.Errorf("Unable to write exec died event: %q", err)
 	}
diff --git a/libpod/kube.go b/libpod/kube.go
index a3bc7efff..c7aa4b57d 100644
--- a/libpod/kube.go
+++ b/libpod/kube.go
@@ -385,7 +385,7 @@ func (p *Pod) podWithContainers(ctx context.Context, containers []*Container, po
 				return nil, err
 			}
 			for k, v := range annotations {
-				podAnnotations[define.BindMountPrefix+k] = TruncateKubeAnnotation(v)
+				podAnnotations[define.BindMountPrefix] = TruncateKubeAnnotation(k + ":" + v)
 			}
 			// Since port bindings for the pod are handled by the
 			// infra container, wipe them here only if we are sharing the net namespace
@@ -532,7 +532,7 @@ func simplePodWithV1Containers(ctx context.Context, ctrs []*Container) (*v1.Pod,
 			return nil, err
 		}
 		for k, v := range annotations {
-			kubeAnnotations[define.BindMountPrefix+k] = TruncateKubeAnnotation(v)
+			kubeAnnotations[define.BindMountPrefix] = TruncateKubeAnnotation(k + ":" + v)
 		}
 		if isInit {
 			kubeInitCtrs = append(kubeInitCtrs, kubeCtr)
diff --git a/libpod/networking_unsupported.go b/libpod/networking_unsupported.go
index 76ffabb5e..9429287f9 100644
--- a/libpod/networking_unsupported.go
+++ b/libpod/networking_unsupported.go
@@ -5,6 +5,7 @@ package libpod
 
 import (
 	"errors"
+	"net"
 	"path/filepath"
 
 	"github.com/containers/common/libnetwork/types"
@@ -84,3 +85,7 @@ func (r *Runtime) GetRootlessNetNs(new bool) (*RootlessNetNS, error) {
 func (c *Container) convertPortMappings() []types.PortMapping {
 	return []types.PortMapping{}
 }
+
+func GetSlirp4netnsIP(subnet *net.IPNet) (*net.IP, error) {
+	return nil, errors.New("not implemented GetSlirp4netnsIP")
+}
diff --git a/libpod/oci_conmon_attach_common.go b/libpod/oci_conmon_attach_common.go
index a9e9b2bb5..dec749837 100644
--- a/libpod/oci_conmon_attach_common.go
+++ b/libpod/oci_conmon_attach_common.go
@@ -280,20 +280,20 @@ func readStdio(conn *net.UnixConn, streams *define.AttachStreams, receiveStdoutE
 	var err error
 	select {
 	case err = <-receiveStdoutError:
-		if err := conn.CloseWrite(); err != nil {
+		if err := socketCloseWrite(conn); err != nil {
 			logrus.Errorf("Failed to close stdin: %v", err)
 		}
 		return err
 	case err = <-stdinDone:
 		if err == define.ErrDetach {
-			if err := conn.CloseWrite(); err != nil {
+			if err := socketCloseWrite(conn); err != nil {
 				logrus.Errorf("Failed to close stdin: %v", err)
 			}
 			return err
 		}
 		if err == nil {
 			// copy stdin is done, close it
-			if connErr := conn.CloseWrite(); connErr != nil {
+			if connErr := socketCloseWrite(conn); connErr != nil {
 				logrus.Errorf("Unable to close conn: %v", connErr)
 			}
 		}
diff --git a/libpod/oci_conmon_common.go b/libpod/oci_conmon_common.go
index cc65e1261..87f0aa4ad 100644
--- a/libpod/oci_conmon_common.go
+++ b/libpod/oci_conmon_common.go
@@ -477,6 +477,16 @@ func (r *ConmonOCIRuntime) UnpauseContainer(ctr *Container) error {
 	return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "resume", ctr.ID())...)
 }
 
+// This filters out ENOTCONN errors which can happen on FreeBSD if the
+// other side of the connection is already closed.
+func socketCloseWrite(conn *net.UnixConn) error {
+	err := conn.CloseWrite()
+	if err != nil && errors.Is(err, syscall.ENOTCONN) {
+		return nil
+	}
+	return err
+}
+
 // HTTPAttach performs an attach for the HTTP API.
 // The caller must handle closing the HTTP connection after this returns.
 // The cancel channel is not closed; it is up to the caller to do so after
@@ -689,7 +699,7 @@ func (r *ConmonOCIRuntime) HTTPAttach(ctr *Container, req *http.Request, w http.
 				return err
 			}
 			// copy stdin is done, close it
-			if connErr := conn.CloseWrite(); connErr != nil {
+			if connErr := socketCloseWrite(conn); connErr != nil {
 				logrus.Errorf("Unable to close conn: %v", connErr)
 			}
 		case <-cancel:
diff --git a/libpod/oci_conmon_exec_common.go b/libpod/oci_conmon_exec_common.go
index 16cd7ef9f..735dbb9c4 100644
--- a/libpod/oci_conmon_exec_common.go
+++ b/libpod/oci_conmon_exec_common.go
@@ -12,7 +12,6 @@ import (
 	"syscall"
 	"time"
 
-	"github.com/containers/common/pkg/capabilities"
 	"github.com/containers/common/pkg/config"
 	"github.com/containers/common/pkg/resize"
 	cutil "github.com/containers/common/pkg/util"
@@ -386,7 +385,7 @@ func (r *ConmonOCIRuntime) startExec(c *Container, sessionID string, options *Ex
 		finalEnv = append(finalEnv, fmt.Sprintf("%s=%s", k, v))
 	}
 
-	processFile, err := prepareProcessExec(c, options, finalEnv, sessionID)
+	processFile, err := c.prepareProcessExec(options, finalEnv, sessionID)
 	if err != nil {
 		return nil, nil, err
 	}
@@ -654,7 +653,7 @@ func attachExecHTTP(c *Container, sessionID string, r *http.Request, w http.Resp
 				return err
 			}
 			// copy stdin is done, close it
-			if connErr := conn.CloseWrite(); connErr != nil {
+			if connErr := socketCloseWrite(conn); connErr != nil {
 				logrus.Errorf("Unable to close conn: %v", connErr)
 			}
 		case <-cancel:
@@ -665,7 +664,7 @@ func attachExecHTTP(c *Container, sessionID string, r *http.Request, w http.Resp
 
 // prepareProcessExec returns the path of the process.json used in runc exec -p
 // caller is responsible to close the returned *os.File if needed.
-func prepareProcessExec(c *Container, options *ExecOptions, env []string, sessionID string) (*os.File, error) {
+func (c *Container) prepareProcessExec(options *ExecOptions, env []string, sessionID string) (*os.File, error) {
 	f, err := ioutil.TempFile(c.execBundlePath(sessionID), "exec-process-")
 	if err != nil {
 		return nil, err
@@ -745,34 +744,9 @@ func prepareProcessExec(c *Container, options *ExecOptions, env []string, sessio
 		pspec.User = processUser
 	}
 
-	ctrSpec, err := c.specFromState()
-	if err != nil {
-		return nil, err
-	}
-
-	allCaps, err := capabilities.BoundingSet()
-	if err != nil {
+	if err := c.setProcessCapabilitiesExec(options, user, execUser, pspec); err != nil {
 		return nil, err
 	}
-	if options.Privileged {
-		pspec.Capabilities.Bounding = allCaps
-	} else {
-		pspec.Capabilities.Bounding = ctrSpec.Process.Capabilities.Bounding
-	}
-
-	// Always unset the inheritable capabilities similarly to what the Linux kernel does
-	// They are used only when using capabilities with uid != 0.
-	pspec.Capabilities.Inheritable = []string{}
-
-	if execUser.Uid == 0 {
-		pspec.Capabilities.Effective = pspec.Capabilities.Bounding
-		pspec.Capabilities.Permitted = pspec.Capabilities.Bounding
-	} else if user == c.config.User {
-		pspec.Capabilities.Effective = ctrSpec.Process.Capabilities.Effective
-		pspec.Capabilities.Inheritable = ctrSpec.Process.Capabilities.Effective
-		pspec.Capabilities.Permitted = ctrSpec.Process.Capabilities.Effective
-		pspec.Capabilities.Ambient = ctrSpec.Process.Capabilities.Effective
-	}
 
 	hasHomeSet := false
 	for _, s := range pspec.Env {
diff --git a/libpod/oci_conmon_exec_freebsd.go b/libpod/oci_conmon_exec_freebsd.go
new file mode 100644
index 000000000..bf30404a1
--- /dev/null
+++ b/libpod/oci_conmon_exec_freebsd.go
@@ -0,0 +1,10 @@
+package libpod
+
+import (
+	"github.com/opencontainers/runc/libcontainer/user"
+	spec "github.com/opencontainers/runtime-spec/specs-go"
+)
+
+func (c *Container) setProcessCapabilitiesExec(options *ExecOptions, user string, execUser *user.ExecUser, pspec *spec.Process) error {
+	return nil
+}
diff --git a/libpod/oci_conmon_exec_linux.go b/libpod/oci_conmon_exec_linux.go
new file mode 100644
index 000000000..617e8d601
--- /dev/null
+++ b/libpod/oci_conmon_exec_linux.go
@@ -0,0 +1,39 @@
+package libpod
+
+import (
+	"github.com/containers/common/pkg/capabilities"
+	"github.com/opencontainers/runc/libcontainer/user"
+	spec "github.com/opencontainers/runtime-spec/specs-go"
+)
+
+func (c *Container) setProcessCapabilitiesExec(options *ExecOptions, user string, execUser *user.ExecUser, pspec *spec.Process) error {
+	ctrSpec, err := c.specFromState()
+	if err != nil {
+		return err
+	}
+
+	allCaps, err := capabilities.BoundingSet()
+	if err != nil {
+		return err
+	}
+	if options.Privileged {
+		pspec.Capabilities.Bounding = allCaps
+	} else {
+		pspec.Capabilities.Bounding = ctrSpec.Process.Capabilities.Bounding
+	}
+
+	// Always unset the inheritable capabilities similarly to what the Linux kernel does
+	// They are used only when using capabilities with uid != 0.
+	pspec.Capabilities.Inheritable = []string{}
+
+	if execUser.Uid == 0 {
+		pspec.Capabilities.Effective = pspec.Capabilities.Bounding
+		pspec.Capabilities.Permitted = pspec.Capabilities.Bounding
+	} else if user == c.config.User {
+		pspec.Capabilities.Effective = ctrSpec.Process.Capabilities.Effective
+		pspec.Capabilities.Inheritable = ctrSpec.Process.Capabilities.Effective
+		pspec.Capabilities.Permitted = ctrSpec.Process.Capabilities.Effective
+		pspec.Capabilities.Ambient = ctrSpec.Process.Capabilities.Effective
+	}
+	return nil
+}
diff --git a/libpod/runtime.go b/libpod/runtime.go
index 9b97fd724..1503b2344 100644
--- a/libpod/runtime.go
+++ b/libpod/runtime.go
@@ -5,6 +5,7 @@ import (
 	"context"
 	"errors"
 	"fmt"
+	"math/rand"
 	"os"
 	"path/filepath"
 	"strings"
@@ -112,6 +113,13 @@ type Runtime struct {
 	secretsManager *secrets.SecretsManager
 }
 
+func init() {
+	// generateName calls namesgenerator.GetRandomName which the
+	// global RNG from math/rand. Seed it here to make sure we
+	// don't get the same name every time.
+	rand.Seed(time.Now().UnixNano())
+}
+
 // SetXdgDirs ensures the XDG_RUNTIME_DIR env and XDG_CONFIG_HOME variables are set.
 // containers/image uses XDG_RUNTIME_DIR to locate the auth file, XDG_CONFIG_HOME is
 // use for the containers.conf configuration file.
diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go
index b43114fab..fb4f80aa6 100644
--- a/libpod/runtime_ctr.go
+++ b/libpod/runtime_ctr.go
@@ -171,12 +171,17 @@ func (r *Runtime) initContainerVariables(rSpec *spec.Spec, config *ContainerConf
 	if config == nil {
 		ctr.config.ID = stringid.GenerateNonCryptoID()
 		size, err := units.FromHumanSize(r.config.Containers.ShmSize)
-		if err != nil {
-			return nil, fmt.Errorf("converting containers.conf ShmSize %s to an int: %w", r.config.Containers.ShmSize, err)
+		if useDevShm {
+			if err != nil {
+				return nil, fmt.Errorf("converting containers.conf ShmSize %s to an int: %w", r.config.Containers.ShmSize, err)
+			}
+			ctr.config.ShmSize = size
+			ctr.config.NoShm = false
+			ctr.config.NoShmShare = false
+		} else {
+			ctr.config.NoShm = true
+			ctr.config.NoShmShare = true
 		}
-		ctr.config.ShmSize = size
-		ctr.config.NoShm = false
-		ctr.config.NoShmShare = false
 		ctr.config.StopSignal = 15
 
 		ctr.config.StopTimeout = r.config.Engine.StopTimeout
@@ -528,7 +533,7 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Contai
 		}
 	}
 
-	if !MountExists(ctr.config.Spec.Mounts, "/dev/shm") && ctr.config.ShmDir == "" && !ctr.config.NoShm {
+	if useDevShm && !MountExists(ctr.config.Spec.Mounts, "/dev/shm") && ctr.config.ShmDir == "" && !ctr.config.NoShm {
 		ctr.config.ShmDir = filepath.Join(ctr.bundlePath(), "shm")
 		if err := os.MkdirAll(ctr.config.ShmDir, 0700); err != nil {
 			if !os.IsExist(err) {
@@ -793,7 +798,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
 
 	// Deallocate the container's lock
 	if err := c.lock.Free(); err != nil {
-		if cleanupErr == nil {
+		if cleanupErr == nil && !os.IsNotExist(err) {
 			cleanupErr = fmt.Errorf("error freeing lock for container %s: %w", c.ID(), err)
 		} else {
 			logrus.Errorf("Free container lock: %v", err)
diff --git a/libpod/runtime_ctr_freebsd.go b/libpod/runtime_ctr_freebsd.go
new file mode 100644
index 000000000..a8870a38c
--- /dev/null
+++ b/libpod/runtime_ctr_freebsd.go
@@ -0,0 +1,5 @@
+package libpod
+
+const (
+	useDevShm = false
+)
diff --git a/libpod/runtime_ctr_linux.go b/libpod/runtime_ctr_linux.go
new file mode 100644
index 000000000..7812d8238
--- /dev/null
+++ b/libpod/runtime_ctr_linux.go
@@ -0,0 +1,5 @@
+package libpod
+
+const (
+	useDevShm = true
+)
diff --git a/libpod/runtime_test.go b/libpod/runtime_test.go
new file mode 100644
index 000000000..2e16c7fcd
--- /dev/null
+++ b/libpod/runtime_test.go
@@ -0,0 +1,28 @@
+package libpod
+
+import (
+	"math/rand"
+	"os"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func Test_generateName(t *testing.T) {
+	state, path, _, err := getEmptyBoltState()
+	assert.NoError(t, err)
+	defer os.RemoveAll(path)
+	defer state.Close()
+
+	r := &Runtime{
+		state: state,
+	}
+
+	// Test that (*Runtime).generateName returns different names
+	// if called twice, even if the global RNG has the default
+	// seed.
+	n1, _ := r.generateName()
+	rand.Seed(1)
+	n2, _ := r.generateName()
+	assert.NotEqual(t, n1, n2)
+}