diff options
Diffstat (limited to 'libpod/container_internal_linux.go')
-rw-r--r-- | libpod/container_internal_linux.go | 435 |
1 files changed, 349 insertions, 86 deletions
diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index 0a1784ba7..b540bbeb8 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -11,6 +11,7 @@ import ( "os" "path" "path/filepath" + "strconv" "strings" "sync" "syscall" @@ -19,11 +20,12 @@ import ( cnitypes "github.com/containernetworking/cni/pkg/types/current" "github.com/containernetworking/plugins/pkg/ns" crioAnnotations "github.com/containers/libpod/pkg/annotations" - "github.com/containers/libpod/pkg/chrootuser" "github.com/containers/libpod/pkg/criu" + "github.com/containers/libpod/pkg/lookup" + "github.com/containers/libpod/pkg/resolvconf" "github.com/containers/libpod/pkg/rootless" + "github.com/containers/libpod/pkg/secrets" "github.com/containers/storage/pkg/idtools" - "github.com/cyphar/filepath-securejoin" "github.com/opencontainers/runc/libcontainer/user" spec "github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/runtime-tools/generate" @@ -59,7 +61,7 @@ func (c *Container) prepare() (err error) { networkStatus []*cnitypes.Result createNetNSErr, mountStorageErr error mountPoint string - saveNetworkStatus bool + tmpStateLock sync.Mutex ) wg.Add(2) @@ -68,17 +70,55 @@ func (c *Container) prepare() (err error) { defer wg.Done() // Set up network namespace if not already set up if c.config.CreateNetNS && c.state.NetNS == nil && !c.config.PostConfigureNetNS { - saveNetworkStatus = true netNS, networkStatus, createNetNSErr = c.runtime.createNetNS(c) + + tmpStateLock.Lock() + defer tmpStateLock.Unlock() + + // Assign NetNS attributes to container + if createNetNSErr == nil { + c.state.NetNS = netNS + c.state.NetworkStatus = networkStatus + } } }() // Mount storage if not mounted go func() { defer wg.Done() mountPoint, mountStorageErr = c.mountStorage() + + if mountStorageErr != nil { + return + } + + tmpStateLock.Lock() + defer tmpStateLock.Unlock() + + // Finish up mountStorage + c.state.Mounted = true + c.state.Mountpoint = mountPoint + if c.state.UserNSRoot == "" { + c.state.RealMountpoint = c.state.Mountpoint + } else { + c.state.RealMountpoint = filepath.Join(c.state.UserNSRoot, "mountpoint") + } + + logrus.Debugf("Created root filesystem for container %s at %s", c.ID(), c.state.Mountpoint) + }() + + defer func() { + if err != nil { + if err2 := c.cleanupNetwork(); err2 != nil { + logrus.Errorf("Error cleaning up container %s network: %v", c.ID(), err2) + } + if err2 := c.cleanupStorage(); err2 != nil { + logrus.Errorf("Error cleaning up container %s storage: %v", c.ID(), err2) + } + } }() wg.Wait() + if createNetNSErr != nil { if mountStorageErr != nil { logrus.Error(createNetNSErr) @@ -90,28 +130,15 @@ func (c *Container) prepare() (err error) { return mountStorageErr } - // Assign NetNS attributes to container - if saveNetworkStatus { - c.state.NetNS = netNS - c.state.NetworkStatus = networkStatus - } - - // Finish up mountStorage - c.state.Mounted = true - c.state.Mountpoint = mountPoint - if c.state.UserNSRoot == "" { - c.state.RealMountpoint = c.state.Mountpoint - } else { - c.state.RealMountpoint = filepath.Join(c.state.UserNSRoot, "mountpoint") - } - - logrus.Debugf("Created root filesystem for container %s at %s", c.ID(), c.state.Mountpoint) // Save the container return c.save() } // cleanupNetwork unmounts and cleans up the container's network func (c *Container) cleanupNetwork() error { + if c.NetworkDisabled() { + return nil + } if c.state.NetNS == nil { logrus.Debugf("Network is already cleaned up, skipping...") return nil @@ -135,6 +162,10 @@ func (c *Container) cleanupNetwork() error { // Generate spec for a container // Accepts a map of the container's dependencies func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) { + execUser, err := lookup.GetUserGroupInfo(c.state.Mountpoint, c.config.User, nil) + if err != nil { + return nil, err + } g := generate.NewFromSpec(c.config.Spec) // If network namespace was requested, add it now @@ -145,6 +176,11 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) { g.AddOrReplaceLinuxNamespace(spec.NetworkNamespace, c.state.NetNS.Path()) } } + + if err := c.makeBindMounts(); err != nil { + return nil, err + } + // Check if the spec file mounts contain the label Relabel flags z or Z. // If they do, relabel the source directory and then remove the option. for _, m := range g.Mounts() { @@ -188,11 +224,8 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) { } } - var err error - if !rootless.IsRootless() { - if c.state.ExtensionStageHooks, err = c.setupOCIHooks(ctx, g.Config); err != nil { - return nil, errors.Wrapf(err, "error setting up OCI Hooks") - } + if c.state.ExtensionStageHooks, err = c.setupOCIHooks(ctx, g.Config); err != nil { + return nil, errors.Wrapf(err, "error setting up OCI Hooks") } // Bind builtin image volumes @@ -203,28 +236,15 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) { } if c.config.User != "" { - if !c.state.Mounted { - return nil, errors.Wrapf(ErrCtrStateInvalid, "container %s must be mounted in order to translate User field", c.ID()) - } - uid, gid, err := chrootuser.GetUser(c.state.Mountpoint, c.config.User) - if err != nil { - return nil, err - } // User and Group must go together - g.SetProcessUID(uid) - g.SetProcessGID(gid) + g.SetProcessUID(uint32(execUser.Uid)) + g.SetProcessGID(uint32(execUser.Gid)) } // Add addition groups if c.config.GroupAdd is not empty if len(c.config.Groups) > 0 { - if !c.state.Mounted { - return nil, errors.Wrapf(ErrCtrStateInvalid, "container %s must be mounted in order to add additional groups", c.ID()) - } - for _, group := range c.config.Groups { - gid, err := chrootuser.GetGroup(c.state.Mountpoint, group) - if err != nil { - return nil, err - } + gids, _ := lookup.GetContainerGroups(c.config.Groups, c.state.Mountpoint, nil) + for _, gid := range gids { g.AddProcessAdditionalGid(gid) } } @@ -237,26 +257,6 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) { // Look up and add groups the user belongs to, if a group wasn't directly specified if !rootless.IsRootless() && !strings.Contains(c.config.User, ":") { - var groupDest, passwdDest string - defaultExecUser := user.ExecUser{ - Uid: 0, - Gid: 0, - Home: "/", - } - - // Make sure the /etc/group and /etc/passwd destinations are not a symlink to something naughty - if groupDest, err = securejoin.SecureJoin(c.state.Mountpoint, "/etc/group"); err != nil { - logrus.Debug(err) - return nil, err - } - if passwdDest, err = securejoin.SecureJoin(c.state.Mountpoint, "/etc/passwd"); err != nil { - logrus.Debug(err) - return nil, err - } - execUser, err := user.GetExecUserPath(c.config.User, &defaultExecUser, passwdDest, groupDest) - if err != nil { - return nil, err - } for _, gid := range execUser.Sgids { g.AddProcessAdditionalGid(uint32(gid)) } @@ -351,8 +351,34 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) { // Mounts need to be sorted so paths will not cover other paths mounts := sortMounts(g.Mounts()) g.ClearMounts() + + // Determine property of RootPropagation based on volume properties. If + // a volume is shared, then keep root propagation shared. This should + // work for slave and private volumes too. + // + // For slave volumes, it can be either [r]shared/[r]slave. + // + // For private volumes any root propagation value should work. + rootPropagation := "" for _, m := range mounts { g.AddMount(m) + for _, opt := range m.Options { + switch opt { + case MountShared, MountRShared: + if rootPropagation != MountShared && rootPropagation != MountRShared { + rootPropagation = MountShared + } + case MountSlave, MountRSlave: + if rootPropagation != MountShared && rootPropagation != MountRShared && rootPropagation != MountSlave && rootPropagation != MountRSlave { + rootPropagation = MountRSlave + } + } + } + } + + if rootPropagation != "" { + logrus.Debugf("set root propagation to %q", rootPropagation) + g.SetLinuxRootPropagation(rootPropagation) } return g.Config, nil } @@ -386,19 +412,31 @@ func (c *Container) setupSystemd(mounts []spec.Mount, g generate.Generator) erro g.AddMount(tmpfsMnt) } - cgroupPath, err := c.CGroupPath() - if err != nil { - return err - } - sourcePath := filepath.Join("/sys/fs/cgroup/systemd", cgroupPath) + // rootless containers have no write access to /sys/fs/cgroup, so don't + // add any mount into the container. + if !rootless.IsRootless() { + cgroupPath, err := c.CGroupPath() + if err != nil { + return err + } + sourcePath := filepath.Join("/sys/fs/cgroup/systemd", cgroupPath) - systemdMnt := spec.Mount{ - Destination: "/sys/fs/cgroup/systemd", - Type: "bind", - Source: sourcePath, - Options: []string{"bind", "private"}, + systemdMnt := spec.Mount{ + Destination: "/sys/fs/cgroup/systemd", + Type: "bind", + Source: sourcePath, + Options: []string{"bind", "private"}, + } + g.AddMount(systemdMnt) + } else { + systemdMnt := spec.Mount{ + Destination: "/sys/fs/cgroup/systemd", + Type: "bind", + Source: "/sys/fs/cgroup/systemd", + Options: []string{"bind", "nodev", "noexec", "nosuid"}, + } + g.AddMount(systemdMnt) } - g.AddMount(systemdMnt) return nil } @@ -423,7 +461,7 @@ func (c *Container) addNamespaceContainer(g *generate.Generator, ns LinuxNS, ctr return nil } -func (c *Container) checkpoint(ctx context.Context, keep bool) (err error) { +func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointOptions) (err error) { if !criu.CheckForCriu() { return errors.Errorf("checkpointing a container requires at least CRIU %d", criu.MinCriuVersion) @@ -432,7 +470,7 @@ func (c *Container) checkpoint(ctx context.Context, keep bool) (err error) { if c.state.State != ContainerStateRunning { return errors.Wrapf(ErrCtrStateInvalid, "%q is not running, cannot checkpoint", c.state.State) } - if err := c.runtime.ociRuntime.checkpointContainer(c); err != nil { + if err := c.runtime.ociRuntime.checkpointContainer(c, options); err != nil { return err } @@ -449,14 +487,16 @@ func (c *Container) checkpoint(ctx context.Context, keep bool) (err error) { logrus.Debugf("Checkpointed container %s", c.ID()) - c.state.State = ContainerStateStopped + if !options.KeepRunning { + c.state.State = ContainerStateStopped - // Cleanup Storage and Network - if err := c.cleanup(ctx); err != nil { - return err + // Cleanup Storage and Network + if err := c.cleanup(ctx); err != nil { + return err + } } - if !keep { + if !options.Keep { // Remove log file os.Remove(filepath.Join(c.bundlePath(), "dump.log")) // Remove statistic file @@ -466,7 +506,7 @@ func (c *Container) checkpoint(ctx context.Context, keep bool) (err error) { return c.save() } -func (c *Container) restore(ctx context.Context, keep bool) (err error) { +func (c *Container) restore(ctx context.Context, options ContainerCheckpointOptions) (err error) { if !criu.CheckForCriu() { return errors.Errorf("restoring a container requires at least CRIU %d", criu.MinCriuVersion) @@ -510,9 +550,6 @@ func (c *Container) restore(ctx context.Context, keep bool) (err error) { } } - if err := c.prepare(); err != nil { - return err - } defer func() { if err != nil { if err2 := c.cleanup(ctx); err2 != nil { @@ -521,6 +558,10 @@ func (c *Container) restore(ctx context.Context, keep bool) (err error) { } }() + if err := c.prepare(); err != nil { + return err + } + // TODO: use existing way to request static IPs, once it is merged in ocicni // https://github.com/cri-o/ocicni/pull/23/ @@ -553,7 +594,7 @@ func (c *Container) restore(ctx context.Context, keep bool) (err error) { // Cleanup for a working restore. c.removeConmonFiles() - if err := c.runtime.ociRuntime.createContainer(c, c.config.CgroupParent, true); err != nil { + if err := c.runtime.ociRuntime.createContainer(c, c.config.CgroupParent, &options); err != nil { return err } @@ -561,7 +602,7 @@ func (c *Container) restore(ctx context.Context, keep bool) (err error) { c.state.State = ContainerStateRunning - if !keep { + if !options.Keep { // Delete all checkpoint related files. At this point, in theory, all files // should exist. Still ignoring errors for now as the container should be // restored and running. Not erroring out just because some cleanup operation @@ -582,3 +623,225 @@ func (c *Container) restore(ctx context.Context, keep bool) (err error) { return c.save() } + +// Make standard bind mounts to include in the container +func (c *Container) makeBindMounts() error { + if err := os.Chown(c.state.RunDir, c.RootUID(), c.RootGID()); err != nil { + return errors.Wrapf(err, "cannot chown run directory %s", c.state.RunDir) + } + + if c.state.BindMounts == nil { + c.state.BindMounts = make(map[string]string) + } + + if !c.NetworkDisabled() { + // Make /etc/resolv.conf + if _, ok := c.state.BindMounts["/etc/resolv.conf"]; ok { + // If it already exists, delete so we can recreate + delete(c.state.BindMounts, "/etc/resolv.conf") + } + newResolv, err := c.generateResolvConf() + if err != nil { + return errors.Wrapf(err, "error creating resolv.conf for container %s", c.ID()) + } + c.state.BindMounts["/etc/resolv.conf"] = newResolv + + // Make /etc/hosts + if _, ok := c.state.BindMounts["/etc/hosts"]; ok { + // If it already exists, delete so we can recreate + delete(c.state.BindMounts, "/etc/hosts") + } + newHosts, err := c.generateHosts() + if err != nil { + return errors.Wrapf(err, "error creating hosts file for container %s", c.ID()) + } + c.state.BindMounts["/etc/hosts"] = newHosts + + } + + // SHM is always added when we mount the container + c.state.BindMounts["/dev/shm"] = c.config.ShmDir + + newPasswd, err := c.generatePasswd() + if err != nil { + return errors.Wrapf(err, "error creating temporary passwd file for container %s", c.ID()) + } + if newPasswd != "" { + // Make /etc/passwd + if _, ok := c.state.BindMounts["/etc/passwd"]; ok { + // If it already exists, delete so we can recreate + delete(c.state.BindMounts, "/etc/passwd") + } + logrus.Debugf("adding entry to /etc/passwd for non existent default user") + c.state.BindMounts["/etc/passwd"] = newPasswd + } + + // Make /etc/hostname + // This should never change, so no need to recreate if it exists + if _, ok := c.state.BindMounts["/etc/hostname"]; !ok { + hostnamePath, err := c.writeStringToRundir("hostname", c.Hostname()) + if err != nil { + return errors.Wrapf(err, "error creating hostname file for container %s", c.ID()) + } + c.state.BindMounts["/etc/hostname"] = hostnamePath + } + + // Make .containerenv + // Empty file, so no need to recreate if it exists + if _, ok := c.state.BindMounts["/run/.containerenv"]; !ok { + // Empty string for now, but we may consider populating this later + containerenvPath, err := c.writeStringToRundir(".containerenv", "") + if err != nil { + return errors.Wrapf(err, "error creating containerenv file for container %s", c.ID()) + } + c.state.BindMounts["/run/.containerenv"] = containerenvPath + } + + // Add Secret Mounts + secretMounts := secrets.SecretMountsWithUIDGID(c.config.MountLabel, c.state.RunDir, c.runtime.config.DefaultMountsFile, c.state.DestinationRunDir, c.RootUID(), c.RootGID()) + for _, mount := range secretMounts { + if _, ok := c.state.BindMounts[mount.Destination]; !ok { + c.state.BindMounts[mount.Destination] = mount.Source + } + } + + return nil +} + +// generateResolvConf generates a containers resolv.conf +func (c *Container) generateResolvConf() (string, error) { + // Determine the endpoint for resolv.conf in case it is a symlink + resolvPath, err := filepath.EvalSymlinks("/etc/resolv.conf") + if err != nil { + return "", err + } + + contents, err := ioutil.ReadFile(resolvPath) + if err != nil { + return "", errors.Wrapf(err, "unable to read %s", resolvPath) + } + + // Ensure that the container's /etc/resolv.conf is compatible with its + // network configuration. + // TODO: set ipv6 enable bool more sanely + resolv, err := resolvconf.FilterResolvDNS(contents, true, c.config.CreateNetNS) + if err != nil { + return "", errors.Wrapf(err, "error parsing host resolv.conf") + } + + // Make a new resolv.conf + nameservers := resolvconf.GetNameservers(resolv.Content) + if len(c.config.DNSServer) > 0 { + // We store DNS servers as net.IP, so need to convert to string + nameservers = []string{} + for _, server := range c.config.DNSServer { + nameservers = append(nameservers, server.String()) + } + } + + search := resolvconf.GetSearchDomains(resolv.Content) + if len(c.config.DNSSearch) > 0 { + search = c.config.DNSSearch + } + + options := resolvconf.GetOptions(resolv.Content) + if len(c.config.DNSOption) > 0 { + options = c.config.DNSOption + } + + destPath := filepath.Join(c.state.RunDir, "resolv.conf") + + if err := os.Remove(destPath); err != nil && !os.IsNotExist(err) { + return "", errors.Wrapf(err, "error removing resolv.conf for container %s", c.ID()) + } + + // Build resolv.conf + if _, err = resolvconf.Build(destPath, nameservers, search, options); err != nil { + return "", errors.Wrapf(err, "error building resolv.conf for container %s", c.ID()) + } + + // Relabel resolv.conf for the container + if err := label.Relabel(destPath, c.config.MountLabel, false); err != nil { + return "", err + } + + return filepath.Join(c.state.DestinationRunDir, "resolv.conf"), nil +} + +// generateHosts creates a containers hosts file +func (c *Container) generateHosts() (string, error) { + orig, err := ioutil.ReadFile("/etc/hosts") + if err != nil { + return "", errors.Wrapf(err, "unable to read /etc/hosts") + } + hosts := string(orig) + if len(c.config.HostAdd) > 0 { + for _, host := range c.config.HostAdd { + // the host format has already been verified at this point + fields := strings.SplitN(host, ":", 2) + hosts += fmt.Sprintf("%s %s\n", fields[1], fields[0]) + } + } + if len(c.state.NetworkStatus) > 0 && len(c.state.NetworkStatus[0].IPs) > 0 { + ipAddress := strings.Split(c.state.NetworkStatus[0].IPs[0].Address.String(), "/")[0] + hosts += fmt.Sprintf("%s\t%s\n", ipAddress, c.Hostname()) + } + return c.writeStringToRundir("hosts", hosts) +} + +// generatePasswd generates a container specific passwd file, +// iff g.config.User is a number +func (c *Container) generatePasswd() (string, error) { + var ( + groupspec string + gid int + ) + if c.config.User == "" { + return "", nil + } + spec := strings.SplitN(c.config.User, ":", 2) + userspec := spec[0] + if len(spec) > 1 { + groupspec = spec[1] + } + // If a non numeric User, then don't generate passwd + uid, err := strconv.ParseUint(userspec, 10, 32) + if err != nil { + return "", nil + } + // Lookup the user to see if it exists in the container image + _, err = lookup.GetUser(c.state.Mountpoint, userspec) + if err != nil && err != user.ErrNoPasswdEntries { + return "", err + } + if err == nil { + return "", nil + } + if groupspec != "" { + ugid, err := strconv.ParseUint(groupspec, 10, 32) + if err == nil { + gid = int(ugid) + } else { + group, err := lookup.GetGroup(c.state.Mountpoint, groupspec) + if err != nil { + return "", errors.Wrapf(err, "unable to get gid %s from group file", groupspec) + } + gid = group.Gid + } + } + originPasswdFile := filepath.Join(c.state.Mountpoint, "/etc/passwd") + orig, err := ioutil.ReadFile(originPasswdFile) + if err != nil && !os.IsNotExist(err) { + return "", errors.Wrapf(err, "unable to read passwd file %s", originPasswdFile) + } + + pwd := fmt.Sprintf("%s%d:x:%d:%d:container user:%s:/bin/sh\n", orig, uid, uid, gid, c.WorkingDir()) + passwdFile, err := c.writeStringToRundir("passwd", pwd) + if err != nil { + return "", errors.Wrapf(err, "failed to create temporary passwd file") + } + if os.Chmod(passwdFile, 0644); err != nil { + return "", err + } + return passwdFile, nil +} |