summaryrefslogtreecommitdiff
path: root/libpod/container_internal_linux.go
diff options
context:
space:
mode:
Diffstat (limited to 'libpod/container_internal_linux.go')
-rw-r--r--libpod/container_internal_linux.go435
1 files changed, 349 insertions, 86 deletions
diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go
index 0a1784ba7..b540bbeb8 100644
--- a/libpod/container_internal_linux.go
+++ b/libpod/container_internal_linux.go
@@ -11,6 +11,7 @@ import (
"os"
"path"
"path/filepath"
+ "strconv"
"strings"
"sync"
"syscall"
@@ -19,11 +20,12 @@ import (
cnitypes "github.com/containernetworking/cni/pkg/types/current"
"github.com/containernetworking/plugins/pkg/ns"
crioAnnotations "github.com/containers/libpod/pkg/annotations"
- "github.com/containers/libpod/pkg/chrootuser"
"github.com/containers/libpod/pkg/criu"
+ "github.com/containers/libpod/pkg/lookup"
+ "github.com/containers/libpod/pkg/resolvconf"
"github.com/containers/libpod/pkg/rootless"
+ "github.com/containers/libpod/pkg/secrets"
"github.com/containers/storage/pkg/idtools"
- "github.com/cyphar/filepath-securejoin"
"github.com/opencontainers/runc/libcontainer/user"
spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/runtime-tools/generate"
@@ -59,7 +61,7 @@ func (c *Container) prepare() (err error) {
networkStatus []*cnitypes.Result
createNetNSErr, mountStorageErr error
mountPoint string
- saveNetworkStatus bool
+ tmpStateLock sync.Mutex
)
wg.Add(2)
@@ -68,17 +70,55 @@ func (c *Container) prepare() (err error) {
defer wg.Done()
// Set up network namespace if not already set up
if c.config.CreateNetNS && c.state.NetNS == nil && !c.config.PostConfigureNetNS {
- saveNetworkStatus = true
netNS, networkStatus, createNetNSErr = c.runtime.createNetNS(c)
+
+ tmpStateLock.Lock()
+ defer tmpStateLock.Unlock()
+
+ // Assign NetNS attributes to container
+ if createNetNSErr == nil {
+ c.state.NetNS = netNS
+ c.state.NetworkStatus = networkStatus
+ }
}
}()
// Mount storage if not mounted
go func() {
defer wg.Done()
mountPoint, mountStorageErr = c.mountStorage()
+
+ if mountStorageErr != nil {
+ return
+ }
+
+ tmpStateLock.Lock()
+ defer tmpStateLock.Unlock()
+
+ // Finish up mountStorage
+ c.state.Mounted = true
+ c.state.Mountpoint = mountPoint
+ if c.state.UserNSRoot == "" {
+ c.state.RealMountpoint = c.state.Mountpoint
+ } else {
+ c.state.RealMountpoint = filepath.Join(c.state.UserNSRoot, "mountpoint")
+ }
+
+ logrus.Debugf("Created root filesystem for container %s at %s", c.ID(), c.state.Mountpoint)
+ }()
+
+ defer func() {
+ if err != nil {
+ if err2 := c.cleanupNetwork(); err2 != nil {
+ logrus.Errorf("Error cleaning up container %s network: %v", c.ID(), err2)
+ }
+ if err2 := c.cleanupStorage(); err2 != nil {
+ logrus.Errorf("Error cleaning up container %s storage: %v", c.ID(), err2)
+ }
+ }
}()
wg.Wait()
+
if createNetNSErr != nil {
if mountStorageErr != nil {
logrus.Error(createNetNSErr)
@@ -90,28 +130,15 @@ func (c *Container) prepare() (err error) {
return mountStorageErr
}
- // Assign NetNS attributes to container
- if saveNetworkStatus {
- c.state.NetNS = netNS
- c.state.NetworkStatus = networkStatus
- }
-
- // Finish up mountStorage
- c.state.Mounted = true
- c.state.Mountpoint = mountPoint
- if c.state.UserNSRoot == "" {
- c.state.RealMountpoint = c.state.Mountpoint
- } else {
- c.state.RealMountpoint = filepath.Join(c.state.UserNSRoot, "mountpoint")
- }
-
- logrus.Debugf("Created root filesystem for container %s at %s", c.ID(), c.state.Mountpoint)
// Save the container
return c.save()
}
// cleanupNetwork unmounts and cleans up the container's network
func (c *Container) cleanupNetwork() error {
+ if c.NetworkDisabled() {
+ return nil
+ }
if c.state.NetNS == nil {
logrus.Debugf("Network is already cleaned up, skipping...")
return nil
@@ -135,6 +162,10 @@ func (c *Container) cleanupNetwork() error {
// Generate spec for a container
// Accepts a map of the container's dependencies
func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
+ execUser, err := lookup.GetUserGroupInfo(c.state.Mountpoint, c.config.User, nil)
+ if err != nil {
+ return nil, err
+ }
g := generate.NewFromSpec(c.config.Spec)
// If network namespace was requested, add it now
@@ -145,6 +176,11 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
g.AddOrReplaceLinuxNamespace(spec.NetworkNamespace, c.state.NetNS.Path())
}
}
+
+ if err := c.makeBindMounts(); err != nil {
+ return nil, err
+ }
+
// Check if the spec file mounts contain the label Relabel flags z or Z.
// If they do, relabel the source directory and then remove the option.
for _, m := range g.Mounts() {
@@ -188,11 +224,8 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
}
}
- var err error
- if !rootless.IsRootless() {
- if c.state.ExtensionStageHooks, err = c.setupOCIHooks(ctx, g.Config); err != nil {
- return nil, errors.Wrapf(err, "error setting up OCI Hooks")
- }
+ if c.state.ExtensionStageHooks, err = c.setupOCIHooks(ctx, g.Config); err != nil {
+ return nil, errors.Wrapf(err, "error setting up OCI Hooks")
}
// Bind builtin image volumes
@@ -203,28 +236,15 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
}
if c.config.User != "" {
- if !c.state.Mounted {
- return nil, errors.Wrapf(ErrCtrStateInvalid, "container %s must be mounted in order to translate User field", c.ID())
- }
- uid, gid, err := chrootuser.GetUser(c.state.Mountpoint, c.config.User)
- if err != nil {
- return nil, err
- }
// User and Group must go together
- g.SetProcessUID(uid)
- g.SetProcessGID(gid)
+ g.SetProcessUID(uint32(execUser.Uid))
+ g.SetProcessGID(uint32(execUser.Gid))
}
// Add addition groups if c.config.GroupAdd is not empty
if len(c.config.Groups) > 0 {
- if !c.state.Mounted {
- return nil, errors.Wrapf(ErrCtrStateInvalid, "container %s must be mounted in order to add additional groups", c.ID())
- }
- for _, group := range c.config.Groups {
- gid, err := chrootuser.GetGroup(c.state.Mountpoint, group)
- if err != nil {
- return nil, err
- }
+ gids, _ := lookup.GetContainerGroups(c.config.Groups, c.state.Mountpoint, nil)
+ for _, gid := range gids {
g.AddProcessAdditionalGid(gid)
}
}
@@ -237,26 +257,6 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
// Look up and add groups the user belongs to, if a group wasn't directly specified
if !rootless.IsRootless() && !strings.Contains(c.config.User, ":") {
- var groupDest, passwdDest string
- defaultExecUser := user.ExecUser{
- Uid: 0,
- Gid: 0,
- Home: "/",
- }
-
- // Make sure the /etc/group and /etc/passwd destinations are not a symlink to something naughty
- if groupDest, err = securejoin.SecureJoin(c.state.Mountpoint, "/etc/group"); err != nil {
- logrus.Debug(err)
- return nil, err
- }
- if passwdDest, err = securejoin.SecureJoin(c.state.Mountpoint, "/etc/passwd"); err != nil {
- logrus.Debug(err)
- return nil, err
- }
- execUser, err := user.GetExecUserPath(c.config.User, &defaultExecUser, passwdDest, groupDest)
- if err != nil {
- return nil, err
- }
for _, gid := range execUser.Sgids {
g.AddProcessAdditionalGid(uint32(gid))
}
@@ -351,8 +351,34 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
// Mounts need to be sorted so paths will not cover other paths
mounts := sortMounts(g.Mounts())
g.ClearMounts()
+
+ // Determine property of RootPropagation based on volume properties. If
+ // a volume is shared, then keep root propagation shared. This should
+ // work for slave and private volumes too.
+ //
+ // For slave volumes, it can be either [r]shared/[r]slave.
+ //
+ // For private volumes any root propagation value should work.
+ rootPropagation := ""
for _, m := range mounts {
g.AddMount(m)
+ for _, opt := range m.Options {
+ switch opt {
+ case MountShared, MountRShared:
+ if rootPropagation != MountShared && rootPropagation != MountRShared {
+ rootPropagation = MountShared
+ }
+ case MountSlave, MountRSlave:
+ if rootPropagation != MountShared && rootPropagation != MountRShared && rootPropagation != MountSlave && rootPropagation != MountRSlave {
+ rootPropagation = MountRSlave
+ }
+ }
+ }
+ }
+
+ if rootPropagation != "" {
+ logrus.Debugf("set root propagation to %q", rootPropagation)
+ g.SetLinuxRootPropagation(rootPropagation)
}
return g.Config, nil
}
@@ -386,19 +412,31 @@ func (c *Container) setupSystemd(mounts []spec.Mount, g generate.Generator) erro
g.AddMount(tmpfsMnt)
}
- cgroupPath, err := c.CGroupPath()
- if err != nil {
- return err
- }
- sourcePath := filepath.Join("/sys/fs/cgroup/systemd", cgroupPath)
+ // rootless containers have no write access to /sys/fs/cgroup, so don't
+ // add any mount into the container.
+ if !rootless.IsRootless() {
+ cgroupPath, err := c.CGroupPath()
+ if err != nil {
+ return err
+ }
+ sourcePath := filepath.Join("/sys/fs/cgroup/systemd", cgroupPath)
- systemdMnt := spec.Mount{
- Destination: "/sys/fs/cgroup/systemd",
- Type: "bind",
- Source: sourcePath,
- Options: []string{"bind", "private"},
+ systemdMnt := spec.Mount{
+ Destination: "/sys/fs/cgroup/systemd",
+ Type: "bind",
+ Source: sourcePath,
+ Options: []string{"bind", "private"},
+ }
+ g.AddMount(systemdMnt)
+ } else {
+ systemdMnt := spec.Mount{
+ Destination: "/sys/fs/cgroup/systemd",
+ Type: "bind",
+ Source: "/sys/fs/cgroup/systemd",
+ Options: []string{"bind", "nodev", "noexec", "nosuid"},
+ }
+ g.AddMount(systemdMnt)
}
- g.AddMount(systemdMnt)
return nil
}
@@ -423,7 +461,7 @@ func (c *Container) addNamespaceContainer(g *generate.Generator, ns LinuxNS, ctr
return nil
}
-func (c *Container) checkpoint(ctx context.Context, keep bool) (err error) {
+func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointOptions) (err error) {
if !criu.CheckForCriu() {
return errors.Errorf("checkpointing a container requires at least CRIU %d", criu.MinCriuVersion)
@@ -432,7 +470,7 @@ func (c *Container) checkpoint(ctx context.Context, keep bool) (err error) {
if c.state.State != ContainerStateRunning {
return errors.Wrapf(ErrCtrStateInvalid, "%q is not running, cannot checkpoint", c.state.State)
}
- if err := c.runtime.ociRuntime.checkpointContainer(c); err != nil {
+ if err := c.runtime.ociRuntime.checkpointContainer(c, options); err != nil {
return err
}
@@ -449,14 +487,16 @@ func (c *Container) checkpoint(ctx context.Context, keep bool) (err error) {
logrus.Debugf("Checkpointed container %s", c.ID())
- c.state.State = ContainerStateStopped
+ if !options.KeepRunning {
+ c.state.State = ContainerStateStopped
- // Cleanup Storage and Network
- if err := c.cleanup(ctx); err != nil {
- return err
+ // Cleanup Storage and Network
+ if err := c.cleanup(ctx); err != nil {
+ return err
+ }
}
- if !keep {
+ if !options.Keep {
// Remove log file
os.Remove(filepath.Join(c.bundlePath(), "dump.log"))
// Remove statistic file
@@ -466,7 +506,7 @@ func (c *Container) checkpoint(ctx context.Context, keep bool) (err error) {
return c.save()
}
-func (c *Container) restore(ctx context.Context, keep bool) (err error) {
+func (c *Container) restore(ctx context.Context, options ContainerCheckpointOptions) (err error) {
if !criu.CheckForCriu() {
return errors.Errorf("restoring a container requires at least CRIU %d", criu.MinCriuVersion)
@@ -510,9 +550,6 @@ func (c *Container) restore(ctx context.Context, keep bool) (err error) {
}
}
- if err := c.prepare(); err != nil {
- return err
- }
defer func() {
if err != nil {
if err2 := c.cleanup(ctx); err2 != nil {
@@ -521,6 +558,10 @@ func (c *Container) restore(ctx context.Context, keep bool) (err error) {
}
}()
+ if err := c.prepare(); err != nil {
+ return err
+ }
+
// TODO: use existing way to request static IPs, once it is merged in ocicni
// https://github.com/cri-o/ocicni/pull/23/
@@ -553,7 +594,7 @@ func (c *Container) restore(ctx context.Context, keep bool) (err error) {
// Cleanup for a working restore.
c.removeConmonFiles()
- if err := c.runtime.ociRuntime.createContainer(c, c.config.CgroupParent, true); err != nil {
+ if err := c.runtime.ociRuntime.createContainer(c, c.config.CgroupParent, &options); err != nil {
return err
}
@@ -561,7 +602,7 @@ func (c *Container) restore(ctx context.Context, keep bool) (err error) {
c.state.State = ContainerStateRunning
- if !keep {
+ if !options.Keep {
// Delete all checkpoint related files. At this point, in theory, all files
// should exist. Still ignoring errors for now as the container should be
// restored and running. Not erroring out just because some cleanup operation
@@ -582,3 +623,225 @@ func (c *Container) restore(ctx context.Context, keep bool) (err error) {
return c.save()
}
+
+// Make standard bind mounts to include in the container
+func (c *Container) makeBindMounts() error {
+ if err := os.Chown(c.state.RunDir, c.RootUID(), c.RootGID()); err != nil {
+ return errors.Wrapf(err, "cannot chown run directory %s", c.state.RunDir)
+ }
+
+ if c.state.BindMounts == nil {
+ c.state.BindMounts = make(map[string]string)
+ }
+
+ if !c.NetworkDisabled() {
+ // Make /etc/resolv.conf
+ if _, ok := c.state.BindMounts["/etc/resolv.conf"]; ok {
+ // If it already exists, delete so we can recreate
+ delete(c.state.BindMounts, "/etc/resolv.conf")
+ }
+ newResolv, err := c.generateResolvConf()
+ if err != nil {
+ return errors.Wrapf(err, "error creating resolv.conf for container %s", c.ID())
+ }
+ c.state.BindMounts["/etc/resolv.conf"] = newResolv
+
+ // Make /etc/hosts
+ if _, ok := c.state.BindMounts["/etc/hosts"]; ok {
+ // If it already exists, delete so we can recreate
+ delete(c.state.BindMounts, "/etc/hosts")
+ }
+ newHosts, err := c.generateHosts()
+ if err != nil {
+ return errors.Wrapf(err, "error creating hosts file for container %s", c.ID())
+ }
+ c.state.BindMounts["/etc/hosts"] = newHosts
+
+ }
+
+ // SHM is always added when we mount the container
+ c.state.BindMounts["/dev/shm"] = c.config.ShmDir
+
+ newPasswd, err := c.generatePasswd()
+ if err != nil {
+ return errors.Wrapf(err, "error creating temporary passwd file for container %s", c.ID())
+ }
+ if newPasswd != "" {
+ // Make /etc/passwd
+ if _, ok := c.state.BindMounts["/etc/passwd"]; ok {
+ // If it already exists, delete so we can recreate
+ delete(c.state.BindMounts, "/etc/passwd")
+ }
+ logrus.Debugf("adding entry to /etc/passwd for non existent default user")
+ c.state.BindMounts["/etc/passwd"] = newPasswd
+ }
+
+ // Make /etc/hostname
+ // This should never change, so no need to recreate if it exists
+ if _, ok := c.state.BindMounts["/etc/hostname"]; !ok {
+ hostnamePath, err := c.writeStringToRundir("hostname", c.Hostname())
+ if err != nil {
+ return errors.Wrapf(err, "error creating hostname file for container %s", c.ID())
+ }
+ c.state.BindMounts["/etc/hostname"] = hostnamePath
+ }
+
+ // Make .containerenv
+ // Empty file, so no need to recreate if it exists
+ if _, ok := c.state.BindMounts["/run/.containerenv"]; !ok {
+ // Empty string for now, but we may consider populating this later
+ containerenvPath, err := c.writeStringToRundir(".containerenv", "")
+ if err != nil {
+ return errors.Wrapf(err, "error creating containerenv file for container %s", c.ID())
+ }
+ c.state.BindMounts["/run/.containerenv"] = containerenvPath
+ }
+
+ // Add Secret Mounts
+ secretMounts := secrets.SecretMountsWithUIDGID(c.config.MountLabel, c.state.RunDir, c.runtime.config.DefaultMountsFile, c.state.DestinationRunDir, c.RootUID(), c.RootGID())
+ for _, mount := range secretMounts {
+ if _, ok := c.state.BindMounts[mount.Destination]; !ok {
+ c.state.BindMounts[mount.Destination] = mount.Source
+ }
+ }
+
+ return nil
+}
+
+// generateResolvConf generates a containers resolv.conf
+func (c *Container) generateResolvConf() (string, error) {
+ // Determine the endpoint for resolv.conf in case it is a symlink
+ resolvPath, err := filepath.EvalSymlinks("/etc/resolv.conf")
+ if err != nil {
+ return "", err
+ }
+
+ contents, err := ioutil.ReadFile(resolvPath)
+ if err != nil {
+ return "", errors.Wrapf(err, "unable to read %s", resolvPath)
+ }
+
+ // Ensure that the container's /etc/resolv.conf is compatible with its
+ // network configuration.
+ // TODO: set ipv6 enable bool more sanely
+ resolv, err := resolvconf.FilterResolvDNS(contents, true, c.config.CreateNetNS)
+ if err != nil {
+ return "", errors.Wrapf(err, "error parsing host resolv.conf")
+ }
+
+ // Make a new resolv.conf
+ nameservers := resolvconf.GetNameservers(resolv.Content)
+ if len(c.config.DNSServer) > 0 {
+ // We store DNS servers as net.IP, so need to convert to string
+ nameservers = []string{}
+ for _, server := range c.config.DNSServer {
+ nameservers = append(nameservers, server.String())
+ }
+ }
+
+ search := resolvconf.GetSearchDomains(resolv.Content)
+ if len(c.config.DNSSearch) > 0 {
+ search = c.config.DNSSearch
+ }
+
+ options := resolvconf.GetOptions(resolv.Content)
+ if len(c.config.DNSOption) > 0 {
+ options = c.config.DNSOption
+ }
+
+ destPath := filepath.Join(c.state.RunDir, "resolv.conf")
+
+ if err := os.Remove(destPath); err != nil && !os.IsNotExist(err) {
+ return "", errors.Wrapf(err, "error removing resolv.conf for container %s", c.ID())
+ }
+
+ // Build resolv.conf
+ if _, err = resolvconf.Build(destPath, nameservers, search, options); err != nil {
+ return "", errors.Wrapf(err, "error building resolv.conf for container %s", c.ID())
+ }
+
+ // Relabel resolv.conf for the container
+ if err := label.Relabel(destPath, c.config.MountLabel, false); err != nil {
+ return "", err
+ }
+
+ return filepath.Join(c.state.DestinationRunDir, "resolv.conf"), nil
+}
+
+// generateHosts creates a containers hosts file
+func (c *Container) generateHosts() (string, error) {
+ orig, err := ioutil.ReadFile("/etc/hosts")
+ if err != nil {
+ return "", errors.Wrapf(err, "unable to read /etc/hosts")
+ }
+ hosts := string(orig)
+ if len(c.config.HostAdd) > 0 {
+ for _, host := range c.config.HostAdd {
+ // the host format has already been verified at this point
+ fields := strings.SplitN(host, ":", 2)
+ hosts += fmt.Sprintf("%s %s\n", fields[1], fields[0])
+ }
+ }
+ if len(c.state.NetworkStatus) > 0 && len(c.state.NetworkStatus[0].IPs) > 0 {
+ ipAddress := strings.Split(c.state.NetworkStatus[0].IPs[0].Address.String(), "/")[0]
+ hosts += fmt.Sprintf("%s\t%s\n", ipAddress, c.Hostname())
+ }
+ return c.writeStringToRundir("hosts", hosts)
+}
+
+// generatePasswd generates a container specific passwd file,
+// iff g.config.User is a number
+func (c *Container) generatePasswd() (string, error) {
+ var (
+ groupspec string
+ gid int
+ )
+ if c.config.User == "" {
+ return "", nil
+ }
+ spec := strings.SplitN(c.config.User, ":", 2)
+ userspec := spec[0]
+ if len(spec) > 1 {
+ groupspec = spec[1]
+ }
+ // If a non numeric User, then don't generate passwd
+ uid, err := strconv.ParseUint(userspec, 10, 32)
+ if err != nil {
+ return "", nil
+ }
+ // Lookup the user to see if it exists in the container image
+ _, err = lookup.GetUser(c.state.Mountpoint, userspec)
+ if err != nil && err != user.ErrNoPasswdEntries {
+ return "", err
+ }
+ if err == nil {
+ return "", nil
+ }
+ if groupspec != "" {
+ ugid, err := strconv.ParseUint(groupspec, 10, 32)
+ if err == nil {
+ gid = int(ugid)
+ } else {
+ group, err := lookup.GetGroup(c.state.Mountpoint, groupspec)
+ if err != nil {
+ return "", errors.Wrapf(err, "unable to get gid %s from group file", groupspec)
+ }
+ gid = group.Gid
+ }
+ }
+ originPasswdFile := filepath.Join(c.state.Mountpoint, "/etc/passwd")
+ orig, err := ioutil.ReadFile(originPasswdFile)
+ if err != nil && !os.IsNotExist(err) {
+ return "", errors.Wrapf(err, "unable to read passwd file %s", originPasswdFile)
+ }
+
+ pwd := fmt.Sprintf("%s%d:x:%d:%d:container user:%s:/bin/sh\n", orig, uid, uid, gid, c.WorkingDir())
+ passwdFile, err := c.writeStringToRundir("passwd", pwd)
+ if err != nil {
+ return "", errors.Wrapf(err, "failed to create temporary passwd file")
+ }
+ if os.Chmod(passwdFile, 0644); err != nil {
+ return "", err
+ }
+ return passwdFile, nil
+}