diff options
Diffstat (limited to 'pkg')
-rw-r--r-- | pkg/namespaces/namespaces.go | 14 | ||||
-rw-r--r-- | pkg/rootless/rootless_linux.go | 115 | ||||
-rw-r--r-- | pkg/rootless/rootless_unsupported.go | 6 | ||||
-rw-r--r-- | pkg/spec/config_linux.go | 3 | ||||
-rw-r--r-- | pkg/spec/createconfig.go | 112 | ||||
-rw-r--r-- | pkg/spec/spec.go | 59 | ||||
-rw-r--r-- | pkg/varlinkapi/containers_create.go | 1 | ||||
-rw-r--r-- | pkg/varlinkapi/images.go | 4 | ||||
-rw-r--r-- | pkg/varlinkapi/system.go | 15 |
9 files changed, 238 insertions, 91 deletions
diff --git a/pkg/namespaces/namespaces.go b/pkg/namespaces/namespaces.go index 1bdb2b00d..bee833fa9 100644 --- a/pkg/namespaces/namespaces.go +++ b/pkg/namespaces/namespaces.go @@ -28,6 +28,16 @@ func (n UsernsMode) Valid() bool { return true } +// IsContainer indicates whether container uses a container userns. +func (n UsernsMode) IsContainer() bool { + return false +} + +// Container is the id of the container which network this container is connected to. +func (n UsernsMode) Container() string { + return "" +} + // UTSMode represents the UTS namespace of the container. type UTSMode string @@ -191,8 +201,8 @@ func (n NetworkMode) IsContainer() bool { return len(parts) > 1 && parts[0] == "container" } -// ConnectedContainer is the id of the container which network this container is connected to. -func (n NetworkMode) ConnectedContainer() string { +// Container is the id of the container which network this container is connected to. +func (n NetworkMode) Container() string { parts := strings.SplitN(string(n), ":", 2) if len(parts) > 1 { return parts[1] diff --git a/pkg/rootless/rootless_linux.go b/pkg/rootless/rootless_linux.go index 92020cf1c..5c45f2694 100644 --- a/pkg/rootless/rootless_linux.go +++ b/pkg/rootless/rootless_linux.go @@ -9,15 +9,16 @@ import ( "os/exec" gosignal "os/signal" "os/user" - "path/filepath" "runtime" "strconv" + "strings" "syscall" "unsafe" "github.com/containers/storage/pkg/idtools" "github.com/docker/docker/pkg/signal" "github.com/pkg/errors" + "github.com/sirupsen/logrus" ) /* @@ -111,6 +112,48 @@ func JoinNS(pid uint) (bool, int, error) { return true, int(ret), nil } +// JoinNSPath re-exec podman in a new userNS and join the owner user namespace of the +// specified path. +func JoinNSPath(path string) (bool, int, error) { + if os.Geteuid() == 0 || os.Getenv("_LIBPOD_USERNS_CONFIGURED") != "" { + return false, -1, nil + } + + userNS, err := getUserNSForPath(path) + if err != nil { + return false, -1, err + } + defer userNS.Close() + + pidC := C.reexec_userns_join(C.int(userNS.Fd())) + if int(pidC) < 0 { + return false, -1, errors.Errorf("cannot re-exec process") + } + + ret := C.reexec_in_user_namespace_wait(pidC) + if ret < 0 { + return false, -1, errors.New("error waiting for the re-exec process") + } + + return true, int(ret), nil +} + +const defaultMinimumMappings = 65536 + +func getMinimumIDs(p string) int { + content, err := ioutil.ReadFile(p) + if err != nil { + logrus.Debugf("error reading data from %q, use a default value of %d", p, defaultMinimumMappings) + return defaultMinimumMappings + } + ret, err := strconv.Atoi(strings.TrimSuffix(string(content), "\n")) + if err != nil { + logrus.Debugf("error reading data from %q, use a default value of %d", p, defaultMinimumMappings) + return defaultMinimumMappings + } + return ret + 1 +} + // BecomeRootInUserNS re-exec podman in a new userNS. It returns whether podman was re-executed // into a new user namespace and the return code from the re-executed podman process. // If podman was re-executed the caller needs to propagate the error code returned by the child @@ -151,8 +194,28 @@ func BecomeRootInUserNS() (bool, int, error) { } } mappings, err := idtools.NewIDMappings(username, username) - if err != nil && os.Getenv("PODMAN_ALLOW_SINGLE_ID_MAPPING_IN_USERNS") == "" { - return false, -1, err + if os.Getenv("PODMAN_ALLOW_SINGLE_ID_MAPPING_IN_USERNS") == "" { + if err != nil { + return false, -1, err + } + + availableGIDs, availableUIDs := 0, 0 + for _, i := range mappings.UIDs() { + availableUIDs += i.Size + } + + minUIDs := getMinimumIDs("/proc/sys/kernel/overflowuid") + if availableUIDs < minUIDs { + return false, 0, fmt.Errorf("not enough UIDs available for the user, at least %d are needed", minUIDs) + } + + for _, i := range mappings.GIDs() { + availableGIDs += i.Size + } + minGIDs := getMinimumIDs("/proc/sys/kernel/overflowgid") + if availableGIDs < minGIDs { + return false, 0, fmt.Errorf("not enough GIDs available for the user, at least %d are needed", minGIDs) + } } if err == nil { uids = mappings.UIDs() @@ -226,7 +289,16 @@ func readUserNs(path string) (string, error) { } func readUserNsFd(fd uintptr) (string, error) { - return readUserNs(filepath.Join("/proc/self/fd", fmt.Sprintf("%d", fd))) + return readUserNs(fmt.Sprintf("/proc/self/fd/%d", fd)) +} + +func getOwner(fd uintptr) (uintptr, error) { + const nsGetUserns = 0xb701 + ret, _, errno := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(nsGetUserns), 0) + if errno != 0 { + return 0, errno + } + return (uintptr)(unsafe.Pointer(ret)), nil } func getParentUserNs(fd uintptr) (uintptr, error) { @@ -238,7 +310,31 @@ func getParentUserNs(fd uintptr) (uintptr, error) { return (uintptr)(unsafe.Pointer(ret)), nil } -// getUserNSForPid returns an open FD for the first direct child user namespace that created the process +func getUserNSForPath(path string) (*os.File, error) { + u, err := os.Open(path) + if err != nil { + return nil, errors.Wrapf(err, "cannot open %s", path) + } + defer u.Close() + fd, err := getOwner(u.Fd()) + if err != nil { + return nil, err + } + + return getUserNSFirstChild(fd) +} + +func getUserNSForPid(pid uint) (*os.File, error) { + path := fmt.Sprintf("/proc/%d/ns/user", pid) + u, err := os.Open(path) + if err != nil { + return nil, errors.Wrapf(err, "cannot open %s", path) + } + + return getUserNSFirstChild(u.Fd()) +} + +// getUserNSFirstChild returns an open FD for the first direct child user namespace that created the process // Each container creates a new user namespace where the runtime runs. The current process in the container // might have created new user namespaces that are child of the initial namespace we created. // This function finds the initial namespace created for the container that is a child of the current namespace. @@ -250,19 +346,12 @@ func getParentUserNs(fd uintptr) (uintptr, error) { // b // / // NS READ USING THE PID -> c -func getUserNSForPid(pid uint) (*os.File, error) { +func getUserNSFirstChild(fd uintptr) (*os.File, error) { currentNS, err := readUserNs("/proc/self/ns/user") if err != nil { return nil, err } - path := filepath.Join("/proc", fmt.Sprintf("%d", pid), "ns/user") - u, err := os.Open(path) - if err != nil { - return nil, errors.Wrapf(err, "cannot open %s", path) - } - - fd := u.Fd() ns, err := readUserNsFd(fd) if err != nil { return nil, errors.Wrapf(err, "cannot read user namespace") diff --git a/pkg/rootless/rootless_unsupported.go b/pkg/rootless/rootless_unsupported.go index 31728e5c2..d72402c9f 100644 --- a/pkg/rootless/rootless_unsupported.go +++ b/pkg/rootless/rootless_unsupported.go @@ -36,3 +36,9 @@ func SkipStorageSetup() bool { func JoinNS(pid uint) (bool, int, error) { return false, -1, errors.New("this function is not supported on this os") } + +// JoinNSPath re-exec podman in a new userNS and join the owner user namespace of the +// specified path. +func JoinNSPath(path string) (bool, int, error) { + return false, -1, errors.New("this function is not supported on this os") +} diff --git a/pkg/spec/config_linux.go b/pkg/spec/config_linux.go index ea04b95bd..6c0a99419 100644 --- a/pkg/spec/config_linux.go +++ b/pkg/spec/config_linux.go @@ -60,6 +60,9 @@ func (c *CreateConfig) addPrivilegedDevices(g *generate.Generator) error { for _, d := range hostDevices { g.AddDevice(Device(d)) } + + // Add resources device - need to clear the existing one first. + g.Spec().Linux.Resources.Devices = nil g.AddLinuxResourcesDevice(true, "", nil, nil, "rwm") return nil } diff --git a/pkg/spec/createconfig.go b/pkg/spec/createconfig.go index a441b4019..887ef8e95 100644 --- a/pkg/spec/createconfig.go +++ b/pkg/spec/createconfig.go @@ -15,7 +15,6 @@ import ( "github.com/docker/go-connections/nat" spec "github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/runtime-tools/generate" - "github.com/opencontainers/selinux/go-selinux/label" "github.com/pkg/errors" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" @@ -113,8 +112,7 @@ type CreateConfig struct { Quiet bool //quiet ReadOnlyRootfs bool //read-only Resources CreateResourceConfig - Rm bool //rm - ShmDir string + Rm bool //rm StopSignal syscall.Signal // stop-signal StopTimeout uint // stop-timeout Sysctl map[string]string //sysctl @@ -124,14 +122,14 @@ type CreateConfig struct { UsernsMode namespaces.UsernsMode //userns User string //user UtsMode namespaces.UTSMode //uts + Mounts []spec.Mount //mounts Volumes []string //volume VolumesFrom []string - WorkDir string //workdir - MountLabel string //SecurityOpts - ProcessLabel string //SecurityOpts - NoNewPrivs bool //SecurityOpts - ApparmorProfile string //SecurityOpts - SeccompProfilePath string //SecurityOpts + WorkDir string //workdir + LabelOpts []string //SecurityOpts + NoNewPrivs bool //SecurityOpts + ApparmorProfile string //SecurityOpts + SeccompProfilePath string //SecurityOpts SecurityOpts []string Rootfs string LocalVolumes []string //Keeps track of the built-in volumes of container used in the --volumes-from flag @@ -145,58 +143,59 @@ func (c *CreateConfig) CreateBlockIO() (*spec.LinuxBlockIO, error) { return c.createBlockIO() } +func processOptions(options []string) []string { + var ( + foundrw, foundro bool + rootProp string + ) + options = append(options, "rbind") + for _, opt := range options { + switch opt { + case "rw": + foundrw = true + case "ro": + foundro = true + case "private", "rprivate", "slave", "rslave", "shared", "rshared": + rootProp = opt + } + } + if !foundrw && !foundro { + options = append(options, "rw") + } + if rootProp == "" { + options = append(options, "rprivate") + } + return options +} + +func (c *CreateConfig) initFSMounts() []spec.Mount { + var mounts []spec.Mount + for _, m := range c.Mounts { + m.Options = processOptions(m.Options) + if m.Type == "tmpfs" { + m.Options = append(m.Options, "tmpcopyup") + } else { + mounts = append(mounts, m) + } + } + return mounts +} + //GetVolumeMounts takes user provided input for bind mounts and creates Mount structs func (c *CreateConfig) GetVolumeMounts(specMounts []spec.Mount) ([]spec.Mount, error) { var m []spec.Mount for _, i := range c.Volumes { - var ( - options []string - foundrw, foundro, foundz, foundZ bool - rootProp string - ) - - // We need to handle SELinux options better here, specifically :Z + var options []string spliti := strings.Split(i, ":") if len(spliti) > 2 { options = strings.Split(spliti[2], ",") } - options = append(options, "rbind") - for _, opt := range options { - switch opt { - case "rw": - foundrw = true - case "ro": - foundro = true - case "z": - foundz = true - case "Z": - foundZ = true - case "private", "rprivate", "slave", "rslave", "shared", "rshared": - rootProp = opt - } - } - if !foundrw && !foundro { - options = append(options, "rw") - } - if foundz { - if err := label.Relabel(spliti[0], c.MountLabel, true); err != nil { - return nil, errors.Wrapf(err, "relabel failed %q", spliti[0]) - } - } - if foundZ { - if err := label.Relabel(spliti[0], c.MountLabel, false); err != nil { - return nil, errors.Wrapf(err, "relabel failed %q", spliti[0]) - } - } - if rootProp == "" { - options = append(options, "rprivate") - } m = append(m, spec.Mount{ Destination: spliti[1], Type: string(TypeBind), Source: spliti[0], - Options: options, + Options: processOptions(options), }) logrus.Debugf("User mount %s:%s options %v", spliti[0], spliti[1], options) @@ -380,9 +379,9 @@ func (c *CreateConfig) GetContainerCreateOptions(runtime *libpod.Runtime) ([]lib if IsNS(string(c.NetMode)) { // pass } else if c.NetMode.IsContainer() { - connectedCtr, err := c.Runtime.LookupContainer(c.NetMode.ConnectedContainer()) + connectedCtr, err := c.Runtime.LookupContainer(c.NetMode.Container()) if err != nil { - return nil, errors.Wrapf(err, "container %q not found", c.NetMode.ConnectedContainer()) + return nil, errors.Wrapf(err, "container %q not found", c.NetMode.Container()) } options = append(options, libpod.WithNetNSFrom(connectedCtr)) } else if !c.NetMode.IsHost() && !c.NetMode.IsNone() { @@ -449,11 +448,20 @@ func (c *CreateConfig) GetContainerCreateOptions(runtime *libpod.Runtime) ([]lib useImageVolumes := c.ImageVolumeType == "bind" // Gather up the options for NewContainer which consist of With... funcs options = append(options, libpod.WithRootFSFromImage(c.ImageID, c.Image, useImageVolumes)) - options = append(options, libpod.WithSELinuxLabels(c.ProcessLabel, c.MountLabel)) + options = append(options, libpod.WithSecLabels(c.LabelOpts)) options = append(options, libpod.WithConmonPidFile(c.ConmonPidFile)) options = append(options, libpod.WithLabels(c.Labels)) options = append(options, libpod.WithUser(c.User)) - options = append(options, libpod.WithShmDir(c.ShmDir)) + if c.IpcMode.IsHost() { + options = append(options, libpod.WithShmDir("/dev/shm")) + + } else if c.IpcMode.IsContainer() { + ctr, err := runtime.LookupContainer(c.IpcMode.Container()) + if err != nil { + return nil, errors.Wrapf(err, "container %q not found", c.IpcMode.Container()) + } + options = append(options, libpod.WithShmDir(ctr.ShmDir())) + } options = append(options, libpod.WithShmSize(c.Resources.ShmSize)) options = append(options, libpod.WithGroups(c.GroupAdd)) options = append(options, libpod.WithIDMappings(*c.IDMappings)) diff --git a/pkg/spec/spec.go b/pkg/spec/spec.go index cc3501e1e..ad14ea65d 100644 --- a/pkg/spec/spec.go +++ b/pkg/spec/spec.go @@ -18,6 +18,34 @@ import ( const cpuPeriod = 100000 +func supercedeUserMounts(mounts []spec.Mount, configMount []spec.Mount) []spec.Mount { + if len(mounts) > 0 { + // If we have overlappings mounts, remove them from the spec in favor of + // the user-added volume mounts + destinations := make(map[string]bool) + for _, mount := range mounts { + destinations[path.Clean(mount.Destination)] = true + } + // Copy all mounts from spec to defaultMounts, except for + // - mounts overridden by a user supplied mount; + // - all mounts under /dev if a user supplied /dev is present; + mountDev := destinations["/dev"] + for _, mount := range configMount { + if _, ok := destinations[path.Clean(mount.Destination)]; !ok { + if mountDev && strings.HasPrefix(mount.Destination, "/dev/") { + // filter out everything under /dev if /dev is user-mounted + continue + } + + logrus.Debugf("Adding mount %s", mount.Destination) + mounts = append(mounts, mount) + } + } + return mounts + } + return configMount +} + // CreateConfigToOCISpec parses information needed to create a container into an OCI runtime spec func CreateConfigToOCISpec(config *CreateConfig) (*spec.Spec, error) { //nolint cgroupPerm := "ro" @@ -211,8 +239,6 @@ func CreateConfigToOCISpec(config *CreateConfig) (*spec.Spec, error) { //nolint // SECURITY OPTS g.SetProcessNoNewPrivileges(config.NoNewPrivs) g.SetProcessApparmorProfile(config.ApparmorProfile) - g.SetProcessSelinuxLabel(config.ProcessLabel) - g.SetLinuxMountLabel(config.MountLabel) if canAddResources { blockAccessToKernelFilesystems(config, &g) @@ -248,6 +274,12 @@ func CreateConfigToOCISpec(config *CreateConfig) (*spec.Spec, error) { //nolint g.AddMount(tmpfsMnt) } + for _, m := range config.Mounts { + if m.Type == "tmpfs" { + g.AddMount(m) + } + } + for name, val := range config.Env { g.AddProcessEnv(name, val) } @@ -307,29 +339,14 @@ func CreateConfigToOCISpec(config *CreateConfig) (*spec.Spec, error) { //nolint return nil, errors.Wrap(err, "error getting volume mounts from --volumes-from flag") } - mounts, err := config.GetVolumeMounts(configSpec.Mounts) + volumeMounts, err := config.GetVolumeMounts(configSpec.Mounts) if err != nil { return nil, errors.Wrapf(err, "error getting volume mounts") } - if len(mounts) > 0 { - // If we have overlappings mounts, remove them from the spec in favor of - // the user-added volume mounts - destinations := make(map[string]bool) - for _, mount := range mounts { - destinations[path.Clean(mount.Destination)] = true - } - for _, mount := range configSpec.Mounts { - if _, ok := destinations[path.Clean(mount.Destination)]; !ok { - logrus.Debugf("Adding mount %s", mount.Destination) - mounts = append(mounts, mount) - } - } - configSpec.Mounts = mounts - } - if err := g.SetLinuxRootPropagation("shared"); err != nil { - return nil, errors.Wrapf(err, "failed to set propagation to rslave") - } + configSpec.Mounts = supercedeUserMounts(volumeMounts, configSpec.Mounts) + //--mount + configSpec.Mounts = supercedeUserMounts(config.initFSMounts(), configSpec.Mounts) if canAddResources { // BLOCK IO blkio, err := config.CreateBlockIO() diff --git a/pkg/varlinkapi/containers_create.go b/pkg/varlinkapi/containers_create.go index 843d7a5ba..ca1a57048 100644 --- a/pkg/varlinkapi/containers_create.go +++ b/pkg/varlinkapi/containers_create.go @@ -202,7 +202,6 @@ func varlinkCreateToCreateConfig(ctx context.Context, create iopodman.Create, ru Ulimit: create.Resources.Ulimit, }, Rm: create.Rm, - ShmDir: create.Shm_dir, StopSignal: stopSignal, StopTimeout: uint(create.Stop_timeout), Sysctl: create.Sys_ctl, diff --git a/pkg/varlinkapi/images.go b/pkg/varlinkapi/images.go index 3cd2b879a..d14c61c39 100644 --- a/pkg/varlinkapi/images.go +++ b/pkg/varlinkapi/images.go @@ -9,6 +9,8 @@ import ( "strings" "time" + "github.com/containers/buildah" + "github.com/containers/buildah/imagebuildah" "github.com/containers/image/docker" "github.com/containers/image/manifest" "github.com/containers/image/types" @@ -21,8 +23,6 @@ import ( "github.com/opencontainers/image-spec/specs-go/v1" "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" - "github.com/projectatomic/buildah" - "github.com/projectatomic/buildah/imagebuildah" ) // ListImages lists all the images in the store diff --git a/pkg/varlinkapi/system.go b/pkg/varlinkapi/system.go index a90b72a6d..54bce3d35 100644 --- a/pkg/varlinkapi/system.go +++ b/pkg/varlinkapi/system.go @@ -34,6 +34,9 @@ func (i *LibpodAPI) Ping(call iopodman.VarlinkCall) error { // GetInfo returns details about the podman host and its stores func (i *LibpodAPI) GetInfo(call iopodman.VarlinkCall) error { + var ( + registries, insecureRegistries []string + ) podmanInfo := iopodman.PodmanInfo{} info, err := i.Runtime.Info() if err != nil { @@ -76,7 +79,19 @@ func (i *LibpodAPI) GetInfo(call iopodman.VarlinkCall) error { Graph_status: graphStatus, } + registriesInterface := info[2].Data["registries"] + insecureRegistriesInterface := info[3].Data["registries"] + if registriesInterface != nil { + registries = registriesInterface.([]string) + } + if insecureRegistriesInterface != nil { + insecureRegistries = insecureRegistriesInterface.([]string) + } + podmanInfo.Store = infoStore podmanInfo.Podman = pmaninfo + podmanInfo.Registries = registries + podmanInfo.Insecure_registries = insecureRegistries + return call.ReplyGetInfo(podmanInfo) } |