From d65ff6b3ec18aad6a64329c54a83d5ba5d51b62f Mon Sep 17 00:00:00 2001 From: Brent Baude Date: Sun, 2 Feb 2020 09:39:12 -0600 Subject: apiv2 container create using specgen this uses the specgen structure to create containers rather than the outdated createconfig. right now, only the apiv2 create is wired up. eventually the cli will also have to be done. Signed-off-by: Brent Baude --- pkg/specgen/config_linux_cgo.go | 62 +++++ pkg/specgen/config_linux_nocgo.go | 11 + pkg/specgen/config_unsupported.go | 12 + pkg/specgen/create.go | 187 +++++++++++++++ pkg/specgen/namespaces.go | 467 ++++++++++++++++++++++++++++++++++++++ pkg/specgen/oci.go | 260 +++++++++++++++++++++ pkg/specgen/specgen.go | 99 ++++---- pkg/specgen/validate.go | 159 +++++++++++++ 8 files changed, 1214 insertions(+), 43 deletions(-) create mode 100644 pkg/specgen/config_linux_cgo.go create mode 100644 pkg/specgen/config_linux_nocgo.go create mode 100644 pkg/specgen/config_unsupported.go create mode 100644 pkg/specgen/create.go create mode 100644 pkg/specgen/namespaces.go create mode 100644 pkg/specgen/oci.go create mode 100644 pkg/specgen/validate.go (limited to 'pkg/specgen') diff --git a/pkg/specgen/config_linux_cgo.go b/pkg/specgen/config_linux_cgo.go new file mode 100644 index 000000000..6f547a40d --- /dev/null +++ b/pkg/specgen/config_linux_cgo.go @@ -0,0 +1,62 @@ +// +build linux,cgo + +package specgen + +import ( + "context" + "io/ioutil" + + "github.com/containers/libpod/libpod/image" + "github.com/containers/libpod/pkg/seccomp" + spec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" + goSeccomp "github.com/seccomp/containers-golang" + "github.com/sirupsen/logrus" +) + +func (s *SpecGenerator) getSeccompConfig(configSpec *spec.Spec, img *image.Image) (*spec.LinuxSeccomp, error) { + var seccompConfig *spec.LinuxSeccomp + var err error + + scp, err := seccomp.LookupPolicy(s.SeccompPolicy) + if err != nil { + return nil, err + } + + if scp == seccomp.PolicyImage { + labels, err := img.Labels(context.Background()) + if err != nil { + return nil, err + } + imagePolicy := labels[seccomp.ContainerImageLabel] + if len(imagePolicy) < 1 { + return nil, errors.New("no seccomp policy defined by image") + } + logrus.Debug("Loading seccomp profile from the security config") + seccompConfig, err = goSeccomp.LoadProfile(imagePolicy, configSpec) + if err != nil { + return nil, errors.Wrap(err, "loading seccomp profile failed") + } + return seccompConfig, nil + } + + if s.SeccompProfilePath != "" { + logrus.Debugf("Loading seccomp profile from %q", s.SeccompProfilePath) + seccompProfile, err := ioutil.ReadFile(s.SeccompProfilePath) + if err != nil { + return nil, errors.Wrapf(err, "opening seccomp profile (%s) failed", s.SeccompProfilePath) + } + seccompConfig, err = goSeccomp.LoadProfile(string(seccompProfile), configSpec) + if err != nil { + return nil, errors.Wrapf(err, "loading seccomp profile (%s) failed", s.SeccompProfilePath) + } + } else { + logrus.Debug("Loading default seccomp profile") + seccompConfig, err = goSeccomp.GetDefaultProfile(configSpec) + if err != nil { + return nil, errors.Wrapf(err, "loading seccomp profile (%s) failed", s.SeccompProfilePath) + } + } + + return seccompConfig, nil +} diff --git a/pkg/specgen/config_linux_nocgo.go b/pkg/specgen/config_linux_nocgo.go new file mode 100644 index 000000000..fc0c58c37 --- /dev/null +++ b/pkg/specgen/config_linux_nocgo.go @@ -0,0 +1,11 @@ +// +build linux,!cgo + +package specgen + +import ( + spec "github.com/opencontainers/runtime-spec/specs-go" +) + +func (s *SpecGenerator) getSeccompConfig(configSpec *spec.Spec) (*spec.LinuxSeccomp, error) { + return nil, nil +} diff --git a/pkg/specgen/config_unsupported.go b/pkg/specgen/config_unsupported.go new file mode 100644 index 000000000..5d24ac39c --- /dev/null +++ b/pkg/specgen/config_unsupported.go @@ -0,0 +1,12 @@ +// +build !linux + +package specgen + +import ( + spec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" +) + +func (s *SpecGenerator) getSeccompConfig(configSpec *spec.Spec) (*spec.LinuxSeccomp, error) { + return nil, errors.New("function not supported on non-linux OS's") +} diff --git a/pkg/specgen/create.go b/pkg/specgen/create.go new file mode 100644 index 000000000..c8fee5f05 --- /dev/null +++ b/pkg/specgen/create.go @@ -0,0 +1,187 @@ +package specgen + +import ( + "context" + "github.com/containers/libpod/libpod" + "github.com/containers/libpod/libpod/config" + "github.com/containers/libpod/libpod/define" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "os" +) + +// MakeContainer creates a container based on the SpecGenerator +func (s *SpecGenerator) MakeContainer(rt *libpod.Runtime) (*libpod.Container, error) { + var pod *libpod.Pod + if err := s.validate(rt); err != nil { + return nil, errors.Wrap(err, "invalid config provided") + } + rtc, err := rt.GetConfig() + if err != nil { + return nil, err + } + + options, err := s.createContainerOptions(rt, pod) + if err != nil { + return nil, err + } + + podmanPath, err := os.Executable() + if err != nil { + return nil, err + } + options = append(options, s.createExitCommandOption(rtc, podmanPath)) + newImage, err := rt.ImageRuntime().NewFromLocal(s.Image) + if err != nil { + return nil, err + } + + // TODO mheon wants to talk with Dan about this + useImageVolumes := s.ImageVolumeMode == "bind" + options = append(options, libpod.WithRootFSFromImage(newImage.ID(), s.Image, useImageVolumes)) + + runtimeSpec, err := s.toOCISpec(rt, newImage) + if err != nil { + return nil, err + } + return rt.NewContainer(context.Background(), runtimeSpec, options...) +} + +func (s *SpecGenerator) createContainerOptions(rt *libpod.Runtime, pod *libpod.Pod) ([]libpod.CtrCreateOption, error) { + var options []libpod.CtrCreateOption + var err error + + if s.Stdin { + options = append(options, libpod.WithStdin()) + } + if len(s.Systemd) > 0 { + options = append(options, libpod.WithSystemd()) + } + if len(s.Name) > 0 { + logrus.Debugf("setting container name %s", s.Name) + options = append(options, libpod.WithName(s.Name)) + } + if s.Pod != "" { + logrus.Debugf("adding container to pod %s", s.Pod) + options = append(options, rt.WithPod(pod)) + } + destinations := []string{} + // // Take all mount and named volume destinations. + for _, mount := range s.Mounts { + destinations = append(destinations, mount.Destination) + } + for _, volume := range s.Volumes { + destinations = append(destinations, volume.Dest) + } + options = append(options, libpod.WithUserVolumes(destinations)) + + if len(s.Volumes) != 0 { + options = append(options, libpod.WithNamedVolumes(s.Volumes)) + } + + if len(s.Command) != 0 { + options = append(options, libpod.WithCommand(s.Command)) + } + + options = append(options, libpod.WithEntrypoint(s.Entrypoint)) + if s.StopSignal != nil { + options = append(options, libpod.WithStopSignal(*s.StopSignal)) + } + if s.StopTimeout != nil { + options = append(options, libpod.WithStopTimeout(*s.StopTimeout)) + } + if s.LogConfiguration != nil { + if len(s.LogConfiguration.Path) > 0 { + options = append(options, libpod.WithLogPath(s.LogConfiguration.Path)) + } + if len(s.LogConfiguration.Options) > 0 && s.LogConfiguration.Options["tag"] != "" { + // Note: I'm really guessing here. + options = append(options, libpod.WithLogTag(s.LogConfiguration.Options["tag"])) + } + + if len(s.LogConfiguration.Driver) > 0 { + options = append(options, libpod.WithLogDriver(s.LogConfiguration.Driver)) + } + } + + // Security options + if len(s.SelinuxOpts) > 0 { + options = append(options, libpod.WithSecLabels(s.SelinuxOpts)) + } + options = append(options, libpod.WithPrivileged(s.Privileged)) + + // Get namespace related options + namespaceOptions, err := s.generateNamespaceContainerOpts(rt) + if err != nil { + return nil, err + } + options = append(options, namespaceOptions...) + + // TODO NetworkNS still needs to be done! + if len(s.ConmonPidFile) > 0 { + options = append(options, libpod.WithConmonPidFile(s.ConmonPidFile)) + } + options = append(options, libpod.WithLabels(s.Labels)) + if s.ShmSize != nil { + options = append(options, libpod.WithShmSize(*s.ShmSize)) + } + if s.Rootfs != "" { + options = append(options, libpod.WithRootFS(s.Rootfs)) + } + // Default used if not overridden on command line + + if s.RestartPolicy != "" { + if s.RestartPolicy == "unless-stopped" { + return nil, errors.Wrapf(define.ErrInvalidArg, "the unless-stopped restart policy is not supported") + } + if s.RestartRetries != nil { + options = append(options, libpod.WithRestartRetries(*s.RestartRetries)) + } + options = append(options, libpod.WithRestartPolicy(s.RestartPolicy)) + } + + if s.ContainerHealthCheckConfig.HealthConfig != nil { + options = append(options, libpod.WithHealthCheck(s.ContainerHealthCheckConfig.HealthConfig)) + logrus.Debugf("New container has a health check") + } + return options, nil +} + +func (s *SpecGenerator) createExitCommandOption(config *config.Config, podmanPath string) libpod.CtrCreateOption { + // We need a cleanup process for containers in the current model. + // But we can't assume that the caller is Podman - it could be another + // user of the API. + // As such, provide a way to specify a path to Podman, so we can + // still invoke a cleanup process. + + command := []string{podmanPath, + "--root", config.StorageConfig.GraphRoot, + "--runroot", config.StorageConfig.RunRoot, + "--log-level", logrus.GetLevel().String(), + "--cgroup-manager", config.CgroupManager, + "--tmpdir", config.TmpDir, + } + if config.OCIRuntime != "" { + command = append(command, []string{"--runtime", config.OCIRuntime}...) + } + if config.StorageConfig.GraphDriverName != "" { + command = append(command, []string{"--storage-driver", config.StorageConfig.GraphDriverName}...) + } + for _, opt := range config.StorageConfig.GraphDriverOptions { + command = append(command, []string{"--storage-opt", opt}...) + } + if config.EventsLogger != "" { + command = append(command, []string{"--events-backend", config.EventsLogger}...) + } + + // TODO Mheon wants to leave this for now + //if s.sys { + // command = append(command, "--syslog", "true") + //} + command = append(command, []string{"container", "cleanup"}...) + + if s.Remove { + command = append(command, "--rm") + } + return libpod.WithExitCommand(command) +} diff --git a/pkg/specgen/namespaces.go b/pkg/specgen/namespaces.go new file mode 100644 index 000000000..025cb31e0 --- /dev/null +++ b/pkg/specgen/namespaces.go @@ -0,0 +1,467 @@ +package specgen + +import ( + "os" + + "github.com/containers/libpod/libpod" + "github.com/containers/libpod/libpod/image" + "github.com/containers/libpod/pkg/capabilities" + "github.com/cri-o/ocicni/pkg/ocicni" + spec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/runtime-tools/generate" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +type NamespaceMode string + +const ( + // Host means the the namespace is derived from + // the host + Host NamespaceMode = "host" + // Path is the path to a namespace + Path NamespaceMode = "path" + // FromContainer means namespace is derived from a + // different container + FromContainer NamespaceMode = "container" + // FromPod indicates the namespace is derived from a pod + FromPod NamespaceMode = "pod" + // Private indicates the namespace is private + Private NamespaceMode = "private" + // NoNetwork indicates no network namespace should + // be joined. loopback should still exists + NoNetwork NamespaceMode = "none" + // Bridge indicates that a CNI network stack + // should be used + Bridge NamespaceMode = "bridge" + // Slirp indicates that a slirp4ns network stack should + // be used + Slirp NamespaceMode = "slirp4ns" +) + +// Namespace describes the namespace +type Namespace struct { + NSMode NamespaceMode `json:"nsmode,omitempty"` + Value string `json:"string,omitempty"` +} + +// IsHost returns a bool if the namespace is host based +func (n *Namespace) IsHost() bool { + return n.NSMode == Host +} + +// IsPath indicates via bool if the namespace is based on a path +func (n *Namespace) IsPath() bool { + return n.NSMode == Path +} + +// IsContainer indicates via bool if the namespace is based on a container +func (n *Namespace) IsContainer() bool { + return n.NSMode == FromContainer +} + +// IsPod indicates via bool if the namespace is based on a pod +func (n *Namespace) IsPod() bool { + return n.NSMode == FromPod +} + +// IsPrivate indicates the namespace is private +func (n *Namespace) IsPrivate() bool { + return n.NSMode == Private +} + +// validate perform simple validation on the namespace to make sure it is not +// invalid from the get-go +func (n *Namespace) validate() error { + if n == nil { + return nil + } + switch n.NSMode { + case Host, Path, FromContainer, FromPod, Private, NoNetwork, Bridge, Slirp: + break + default: + return errors.Errorf("invalid network %q", n.NSMode) + } + // Path and From Container MUST have a string value set + if n.NSMode == Path || n.NSMode == FromContainer { + if len(n.Value) < 1 { + return errors.Errorf("namespace mode %s requires a value", n.NSMode) + } + } else { + // All others must NOT set a string value + if len(n.Value) > 0 { + return errors.Errorf("namespace value %s cannot be provided with namespace mode %s", n.Value, n.NSMode) + } + } + return nil +} + +func (s *SpecGenerator) generateNamespaceContainerOpts(rt *libpod.Runtime) ([]libpod.CtrCreateOption, error) { + var portBindings []ocicni.PortMapping + options := make([]libpod.CtrCreateOption, 0) + + // Cgroups + switch { + case s.CgroupNS.IsPrivate(): + ns := s.CgroupNS.Value + if _, err := os.Stat(ns); err != nil { + return nil, err + } + case s.CgroupNS.IsContainer(): + connectedCtr, err := rt.LookupContainer(s.CgroupNS.Value) + if err != nil { + return nil, errors.Wrapf(err, "container %q not found", s.CgroupNS.Value) + } + options = append(options, libpod.WithCgroupNSFrom(connectedCtr)) + // TODO + //default: + // return nil, errors.New("cgroup name only supports private and container") + } + + if s.CgroupParent != "" { + options = append(options, libpod.WithCgroupParent(s.CgroupParent)) + } + + if s.CgroupsMode != "" { + options = append(options, libpod.WithCgroupsMode(s.CgroupsMode)) + } + + // ipc + switch { + case s.IpcNS.IsHost(): + options = append(options, libpod.WithShmDir("/dev/shm")) + case s.IpcNS.IsContainer(): + connectedCtr, err := rt.LookupContainer(s.IpcNS.Value) + if err != nil { + return nil, errors.Wrapf(err, "container %q not found", s.IpcNS.Value) + } + options = append(options, libpod.WithIPCNSFrom(connectedCtr)) + options = append(options, libpod.WithShmDir(connectedCtr.ShmDir())) + } + + // pid + if s.PidNS.IsContainer() { + connectedCtr, err := rt.LookupContainer(s.PidNS.Value) + if err != nil { + return nil, errors.Wrapf(err, "container %q not found", s.PidNS.Value) + } + options = append(options, libpod.WithPIDNSFrom(connectedCtr)) + } + + // uts + switch { + case s.UtsNS.IsPod(): + connectedPod, err := rt.LookupPod(s.UtsNS.Value) + if err != nil { + return nil, errors.Wrapf(err, "pod %q not found", s.UtsNS.Value) + } + options = append(options, libpod.WithUTSNSFromPod(connectedPod)) + case s.UtsNS.IsContainer(): + connectedCtr, err := rt.LookupContainer(s.UtsNS.Value) + if err != nil { + return nil, errors.Wrapf(err, "container %q not found", s.UtsNS.Value) + } + + options = append(options, libpod.WithUTSNSFrom(connectedCtr)) + } + + if s.UseImageHosts { + options = append(options, libpod.WithUseImageHosts()) + } else if len(s.HostAdd) > 0 { + options = append(options, libpod.WithHosts(s.HostAdd)) + } + + // User + + switch { + case s.UserNS.IsPath(): + ns := s.UserNS.Value + if ns == "" { + return nil, errors.Errorf("invalid empty user-defined user namespace") + } + _, err := os.Stat(ns) + if err != nil { + return nil, err + } + if s.IDMappings != nil { + options = append(options, libpod.WithIDMappings(*s.IDMappings)) + } + case s.UserNS.IsContainer(): + connectedCtr, err := rt.LookupContainer(s.UserNS.Value) + if err != nil { + return nil, errors.Wrapf(err, "container %q not found", s.UserNS.Value) + } + options = append(options, libpod.WithUserNSFrom(connectedCtr)) + default: + if s.IDMappings != nil { + options = append(options, libpod.WithIDMappings(*s.IDMappings)) + } + } + + options = append(options, libpod.WithUser(s.User)) + options = append(options, libpod.WithGroups(s.Groups)) + + if len(s.PortMappings) > 0 { + portBindings = s.PortMappings + } + + switch { + case s.NetNS.IsPath(): + ns := s.NetNS.Value + if ns == "" { + return nil, errors.Errorf("invalid empty user-defined network namespace") + } + _, err := os.Stat(ns) + if err != nil { + return nil, err + } + case s.NetNS.IsContainer(): + connectedCtr, err := rt.LookupContainer(s.NetNS.Value) + if err != nil { + return nil, errors.Wrapf(err, "container %q not found", s.NetNS.Value) + } + options = append(options, libpod.WithNetNSFrom(connectedCtr)) + case !s.NetNS.IsHost() && s.NetNS.NSMode != NoNetwork: + postConfigureNetNS := !s.UserNS.IsHost() + options = append(options, libpod.WithNetNS(portBindings, postConfigureNetNS, string(s.NetNS.NSMode), s.CNINetworks)) + } + + if len(s.DNSSearch) > 0 { + options = append(options, libpod.WithDNSSearch(s.DNSSearch)) + } + if len(s.DNSServer) > 0 { + // TODO I'm not sure how we are going to handle this given the input + if len(s.DNSServer) == 1 { //&& strings.ToLower(s.DNSServer[0].) == "none" { + options = append(options, libpod.WithUseImageResolvConf()) + } else { + var dnsServers []string + for _, d := range s.DNSServer { + dnsServers = append(dnsServers, d.String()) + } + options = append(options, libpod.WithDNS(dnsServers)) + } + } + if len(s.DNSOption) > 0 { + options = append(options, libpod.WithDNSOption(s.DNSOption)) + } + if s.StaticIP != nil { + options = append(options, libpod.WithStaticIP(*s.StaticIP)) + } + + if s.StaticMAC != nil { + options = append(options, libpod.WithStaticMAC(*s.StaticMAC)) + } + return options, nil +} + +func (s *SpecGenerator) pidConfigureGenerator(g *generate.Generator) error { + if s.PidNS.IsPath() { + return g.AddOrReplaceLinuxNamespace(string(spec.PIDNamespace), s.PidNS.Value) + } + if s.PidNS.IsHost() { + return g.RemoveLinuxNamespace(string(spec.PIDNamespace)) + } + if s.PidNS.IsContainer() { + logrus.Debugf("using container %s pidmode", s.PidNS.Value) + } + if s.PidNS.IsPod() { + logrus.Debug("using pod pidmode") + } + return nil +} + +func (s *SpecGenerator) utsConfigureGenerator(g *generate.Generator, runtime *libpod.Runtime) error { + hostname := s.Hostname + var err error + if hostname == "" { + switch { + case s.UtsNS.IsContainer(): + utsCtr, err := runtime.GetContainer(s.UtsNS.Value) + if err != nil { + return errors.Wrapf(err, "unable to retrieve hostname from dependency container %s", s.UtsNS.Value) + } + hostname = utsCtr.Hostname() + case s.NetNS.IsHost() || s.UtsNS.IsHost(): + hostname, err = os.Hostname() + if err != nil { + return errors.Wrap(err, "unable to retrieve hostname of the host") + } + default: + logrus.Debug("No hostname set; container's hostname will default to runtime default") + } + } + g.RemoveHostname() + if s.Hostname != "" || !s.UtsNS.IsHost() { + // Set the hostname in the OCI configuration only + // if specified by the user or if we are creating + // a new UTS namespace. + g.SetHostname(hostname) + } + g.AddProcessEnv("HOSTNAME", hostname) + + if s.UtsNS.IsPath() { + return g.AddOrReplaceLinuxNamespace(string(spec.UTSNamespace), s.UtsNS.Value) + } + if s.UtsNS.IsHost() { + return g.RemoveLinuxNamespace(string(spec.UTSNamespace)) + } + if s.UtsNS.IsContainer() { + logrus.Debugf("using container %s utsmode", s.UtsNS.Value) + } + return nil +} + +func (s *SpecGenerator) ipcConfigureGenerator(g *generate.Generator) error { + if s.IpcNS.IsPath() { + return g.AddOrReplaceLinuxNamespace(string(spec.IPCNamespace), s.IpcNS.Value) + } + if s.IpcNS.IsHost() { + return g.RemoveLinuxNamespace(s.IpcNS.Value) + } + if s.IpcNS.IsContainer() { + logrus.Debugf("Using container %s ipcmode", s.IpcNS.Value) + } + return nil +} + +func (s *SpecGenerator) cgroupConfigureGenerator(g *generate.Generator) error { + if s.CgroupNS.IsPath() { + return g.AddOrReplaceLinuxNamespace(string(spec.CgroupNamespace), s.CgroupNS.Value) + } + if s.CgroupNS.IsHost() { + return g.RemoveLinuxNamespace(s.CgroupNS.Value) + } + if s.CgroupNS.IsPrivate() { + return g.AddOrReplaceLinuxNamespace(string(spec.CgroupNamespace), "") + } + if s.CgroupNS.IsContainer() { + logrus.Debugf("Using container %s cgroup mode", s.CgroupNS.Value) + } + return nil +} + +func (s *SpecGenerator) networkConfigureGenerator(g *generate.Generator) error { + switch { + case s.NetNS.IsHost(): + logrus.Debug("Using host netmode") + if err := g.RemoveLinuxNamespace(string(spec.NetworkNamespace)); err != nil { + return err + } + + case s.NetNS.NSMode == NoNetwork: + logrus.Debug("Using none netmode") + case s.NetNS.NSMode == Bridge: + logrus.Debug("Using bridge netmode") + case s.NetNS.IsContainer(): + logrus.Debugf("using container %s netmode", s.NetNS.Value) + case s.NetNS.IsPath(): + logrus.Debug("Using ns netmode") + if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), s.NetNS.Value); err != nil { + return err + } + case s.NetNS.IsPod(): + logrus.Debug("Using pod netmode, unless pod is not sharing") + case s.NetNS.NSMode == Slirp: + logrus.Debug("Using slirp4netns netmode") + default: + return errors.Errorf("unknown network mode") + } + + if g.Config.Annotations == nil { + g.Config.Annotations = make(map[string]string) + } + + if s.PublishImagePorts { + g.Config.Annotations[libpod.InspectAnnotationPublishAll] = libpod.InspectResponseTrue + } else { + g.Config.Annotations[libpod.InspectAnnotationPublishAll] = libpod.InspectResponseFalse + } + + return nil +} + +func (s *SpecGenerator) userConfigureGenerator(g *generate.Generator) error { + if s.UserNS.IsPath() { + if err := g.AddOrReplaceLinuxNamespace(string(spec.UserNamespace), s.UserNS.Value); err != nil { + return err + } + // runc complains if no mapping is specified, even if we join another ns. So provide a dummy mapping + g.AddLinuxUIDMapping(uint32(0), uint32(0), uint32(1)) + g.AddLinuxGIDMapping(uint32(0), uint32(0), uint32(1)) + } + + if s.IDMappings != nil { + if (len(s.IDMappings.UIDMap) > 0 || len(s.IDMappings.GIDMap) > 0) && !s.UserNS.IsHost() { + if err := g.AddOrReplaceLinuxNamespace(string(spec.UserNamespace), ""); err != nil { + return err + } + } + for _, uidmap := range s.IDMappings.UIDMap { + g.AddLinuxUIDMapping(uint32(uidmap.HostID), uint32(uidmap.ContainerID), uint32(uidmap.Size)) + } + for _, gidmap := range s.IDMappings.GIDMap { + g.AddLinuxGIDMapping(uint32(gidmap.HostID), uint32(gidmap.ContainerID), uint32(gidmap.Size)) + } + } + return nil +} + +func (s *SpecGenerator) securityConfigureGenerator(g *generate.Generator, newImage *image.Image) error { + // HANDLE CAPABILITIES + // NOTE: Must happen before SECCOMP + if s.Privileged { + g.SetupPrivileged(true) + } + + useNotRoot := func(user string) bool { + if user == "" || user == "root" || user == "0" { + return false + } + return true + } + configSpec := g.Config + var err error + var caplist []string + bounding := configSpec.Process.Capabilities.Bounding + if useNotRoot(s.User) { + configSpec.Process.Capabilities.Bounding = caplist + } + caplist, err = capabilities.MergeCapabilities(configSpec.Process.Capabilities.Bounding, s.CapAdd, s.CapDrop) + if err != nil { + return err + } + + configSpec.Process.Capabilities.Bounding = caplist + configSpec.Process.Capabilities.Permitted = caplist + configSpec.Process.Capabilities.Inheritable = caplist + configSpec.Process.Capabilities.Effective = caplist + configSpec.Process.Capabilities.Ambient = caplist + if useNotRoot(s.User) { + caplist, err = capabilities.MergeCapabilities(bounding, s.CapAdd, s.CapDrop) + if err != nil { + return err + } + } + configSpec.Process.Capabilities.Bounding = caplist + + // HANDLE SECCOMP + if s.SeccompProfilePath != "unconfined" { + seccompConfig, err := s.getSeccompConfig(configSpec, newImage) + if err != nil { + return err + } + configSpec.Linux.Seccomp = seccompConfig + } + + // Clear default Seccomp profile from Generator for privileged containers + if s.SeccompProfilePath == "unconfined" || s.Privileged { + configSpec.Linux.Seccomp = nil + } + + g.SetRootReadonly(s.ReadOnlyFilesystem) + for sysctlKey, sysctlVal := range s.Sysctl { + g.AddLinuxSysctl(sysctlKey, sysctlVal) + } + + return nil +} diff --git a/pkg/specgen/oci.go b/pkg/specgen/oci.go new file mode 100644 index 000000000..2523f21b3 --- /dev/null +++ b/pkg/specgen/oci.go @@ -0,0 +1,260 @@ +package specgen + +import ( + "strings" + + "github.com/containers/libpod/libpod" + "github.com/containers/libpod/libpod/image" + "github.com/containers/libpod/pkg/rootless" + createconfig "github.com/containers/libpod/pkg/spec" + spec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/runtime-tools/generate" +) + +func (s *SpecGenerator) toOCISpec(rt *libpod.Runtime, newImage *image.Image) (*spec.Spec, error) { + var ( + inUserNS bool + ) + cgroupPerm := "ro" + g, err := generate.New("linux") + if err != nil { + return nil, err + } + // Remove the default /dev/shm mount to ensure we overwrite it + g.RemoveMount("/dev/shm") + g.HostSpecific = true + addCgroup := true + canMountSys := true + + isRootless := rootless.IsRootless() + if isRootless { + inUserNS = true + } + if !s.UserNS.IsHost() { + if s.UserNS.IsContainer() || s.UserNS.IsPath() { + inUserNS = true + } + if s.UserNS.IsPrivate() { + inUserNS = true + } + } + if inUserNS && s.NetNS.IsHost() { + canMountSys = false + } + + if s.Privileged && canMountSys { + cgroupPerm = "rw" + g.RemoveMount("/sys") + sysMnt := spec.Mount{ + Destination: "/sys", + Type: "sysfs", + Source: "sysfs", + Options: []string{"rprivate", "nosuid", "noexec", "nodev", "rw"}, + } + g.AddMount(sysMnt) + } else if !canMountSys { + addCgroup = false + g.RemoveMount("/sys") + r := "ro" + if s.Privileged { + r = "rw" + } + sysMnt := spec.Mount{ + Destination: "/sys", + Type: "bind", // should we use a constant for this, like createconfig? + Source: "/sys", + Options: []string{"rprivate", "nosuid", "noexec", "nodev", r, "rbind"}, + } + g.AddMount(sysMnt) + if !s.Privileged && isRootless { + g.AddLinuxMaskedPaths("/sys/kernel") + } + } + gid5Available := true + if isRootless { + nGids, err := createconfig.GetAvailableGids() + if err != nil { + return nil, err + } + gid5Available = nGids >= 5 + } + // When using a different user namespace, check that the GID 5 is mapped inside + // the container. + if gid5Available && (s.IDMappings != nil && len(s.IDMappings.GIDMap) > 0) { + mappingFound := false + for _, r := range s.IDMappings.GIDMap { + if r.ContainerID <= 5 && 5 < r.ContainerID+r.Size { + mappingFound = true + break + } + } + if !mappingFound { + gid5Available = false + } + + } + if !gid5Available { + // If we have no GID mappings, the gid=5 default option would fail, so drop it. + g.RemoveMount("/dev/pts") + devPts := spec.Mount{ + Destination: "/dev/pts", + Type: "devpts", + Source: "devpts", + Options: []string{"rprivate", "nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620"}, + } + g.AddMount(devPts) + } + + if inUserNS && s.IpcNS.IsHost() { + g.RemoveMount("/dev/mqueue") + devMqueue := spec.Mount{ + Destination: "/dev/mqueue", + Type: "bind", // constant ? + Source: "/dev/mqueue", + Options: []string{"bind", "nosuid", "noexec", "nodev"}, + } + g.AddMount(devMqueue) + } + if inUserNS && s.PidNS.IsHost() { + g.RemoveMount("/proc") + procMount := spec.Mount{ + Destination: "/proc", + Type: createconfig.TypeBind, + Source: "/proc", + Options: []string{"rbind", "nosuid", "noexec", "nodev"}, + } + g.AddMount(procMount) + } + + if addCgroup { + cgroupMnt := spec.Mount{ + Destination: "/sys/fs/cgroup", + Type: "cgroup", + Source: "cgroup", + Options: []string{"rprivate", "nosuid", "noexec", "nodev", "relatime", cgroupPerm}, + } + g.AddMount(cgroupMnt) + } + g.SetProcessCwd(s.WorkDir) + g.SetProcessArgs(s.Command) + g.SetProcessTerminal(s.Terminal) + + for key, val := range s.Annotations { + g.AddAnnotation(key, val) + } + g.AddProcessEnv("container", "podman") + + g.Config.Linux.Resources = s.ResourceLimits + + // Devices + if s.Privileged { + // If privileged, we need to add all the host devices to the + // spec. We do not add the user provided ones because we are + // already adding them all. + if err := createconfig.AddPrivilegedDevices(&g); err != nil { + return nil, err + } + } else { + for _, device := range s.Devices { + if err := createconfig.DevicesFromPath(&g, device.Path); err != nil { + return nil, err + } + } + } + + // SECURITY OPTS + g.SetProcessNoNewPrivileges(s.NoNewPrivileges) + + if !s.Privileged { + g.SetProcessApparmorProfile(s.ApparmorProfile) + } + + createconfig.BlockAccessToKernelFilesystems(s.Privileged, s.PidNS.IsHost(), &g) + + for name, val := range s.Env { + g.AddProcessEnv(name, val) + } + + // TODO rlimits and ulimits needs further refinement by someone more + // familiar with the code. + //if err := addRlimits(config, &g); err != nil { + // return nil, err + //} + + // NAMESPACES + + if err := s.pidConfigureGenerator(&g); err != nil { + return nil, err + } + + if err := s.userConfigureGenerator(&g); err != nil { + return nil, err + } + + if err := s.networkConfigureGenerator(&g); err != nil { + return nil, err + } + + if err := s.utsConfigureGenerator(&g, rt); err != nil { + return nil, err + } + + if err := s.ipcConfigureGenerator(&g); err != nil { + return nil, err + } + + if err := s.cgroupConfigureGenerator(&g); err != nil { + return nil, err + } + configSpec := g.Config + + if err := s.securityConfigureGenerator(&g, newImage); err != nil { + return nil, err + } + + // BIND MOUNTS + configSpec.Mounts = createconfig.SupercedeUserMounts(s.Mounts, configSpec.Mounts) + // Process mounts to ensure correct options + finalMounts, err := createconfig.InitFSMounts(configSpec.Mounts) + if err != nil { + return nil, err + } + configSpec.Mounts = finalMounts + + // Add annotations + if configSpec.Annotations == nil { + configSpec.Annotations = make(map[string]string) + } + + // TODO cidfile is not in specgen; when wiring up cli, we will need to move this out of here + // leaving as a reminder + //if config.CidFile != "" { + // configSpec.Annotations[libpod.InspectAnnotationCIDFile] = config.CidFile + //} + + if s.Remove { + configSpec.Annotations[libpod.InspectAnnotationAutoremove] = libpod.InspectResponseTrue + } else { + configSpec.Annotations[libpod.InspectAnnotationAutoremove] = libpod.InspectResponseFalse + } + + if len(s.VolumesFrom) > 0 { + configSpec.Annotations[libpod.InspectAnnotationVolumesFrom] = strings.Join(s.VolumesFrom, ",") + } + + if s.Privileged { + configSpec.Annotations[libpod.InspectAnnotationPrivileged] = libpod.InspectResponseTrue + } else { + configSpec.Annotations[libpod.InspectAnnotationPrivileged] = libpod.InspectResponseFalse + } + + // TODO Init might not make it into the specgen and therefore is not available here. We should deal + // with this when we wire up the CLI; leaving as a reminder + //if s.Init { + // configSpec.Annotations[libpod.InspectAnnotationInit] = libpod.InspectResponseTrue + //} else { + // configSpec.Annotations[libpod.InspectAnnotationInit] = libpod.InspectResponseFalse + //} + + return configSpec, nil +} diff --git a/pkg/specgen/specgen.go b/pkg/specgen/specgen.go index e22ee598f..e1dfe4dc5 100644 --- a/pkg/specgen/specgen.go +++ b/pkg/specgen/specgen.go @@ -2,24 +2,28 @@ package specgen import ( "net" + "syscall" "github.com/containers/image/v5/manifest" "github.com/containers/libpod/libpod" - "github.com/containers/libpod/libpod/define" + "github.com/containers/libpod/pkg/rootless" "github.com/containers/storage" "github.com/cri-o/ocicni/pkg/ocicni" spec "github.com/opencontainers/runtime-spec/specs-go" ) -// TODO -// mheon provided this an off the cuff suggestion. Adding it here to retain -// for history as we implement it. When this struct is implemented, we need -// to remove the nolints. -type Namespace struct { - isHost bool //nolint - isPath string //nolint - isContainer string //nolint - isPod bool //nolint +// LogConfig describes the logging characteristics for a container +type LogConfig struct { + // LogDriver is the container's log driver. + // Optional. + Driver string `json:"driver,omitempty"` + // LogPath is the path the container's logs will be stored at. + // Only available if LogDriver is set to "json-file" or "k8s-file". + // Optional. + Path string `json:"path,omitempty"` + // A set of options to accompany the log driver. + // Optional. + Options map[string]string `json:"options,omitempty"` } // ContainerBasicConfig contains the basic parts of a container. @@ -62,7 +66,7 @@ type ContainerBasicConfig struct { // If not provided, the default, SIGTERM, will be used. // Will conflict with Systemd if Systemd is set to "true" or "always". // Optional. - StopSignal *uint `json:"stop_signal,omitempty"` + StopSignal *syscall.Signal `json:"stop_signal,omitempty"` // StopTimeout is a timeout between the container's stop signal being // sent and SIGKILL being sent. // If not provided, the default will be used. @@ -70,13 +74,10 @@ type ContainerBasicConfig struct { // instead. // Optional. StopTimeout *uint `json:"stop_timeout,omitempty"` - // LogDriver is the container's log driver. - // Optional. - LogDriver string `json:"log_driver,omitempty"` - // LogPath is the path the container's logs will be stored at. - // Only available if LogDriver is set to "json-file" or "k8s-file". - // Optional. - LogPath string `json:"log_path,omitempty"` + // LogConfiguration describes the logging for a container including + // driver, path, and options. + // Optional + LogConfiguration *LogConfig `json:"log_configuration,omitempty"` // ConmonPidFile is a path at which a PID file for Conmon will be // placed. // If not given, a default location will be used. @@ -111,12 +112,10 @@ type ContainerBasicConfig struct { // Namespace is the libpod namespace the container will be placed in. // Optional. Namespace string `json:"namespace,omitempty"` - // PidNS is the container's PID namespace. // It defaults to private. // Mandatory. PidNS Namespace `json:"pidns,omitempty"` - // UtsNS is the container's UTS namespace. // It defaults to private. // Must be set to Private to set Hostname. @@ -128,6 +127,11 @@ type ContainerBasicConfig struct { // Conflicts with UtsNS if UtsNS is not set to private. // Optional. Hostname string `json:"hostname,omitempty"` + // Sysctl sets kernel parameters for the container + Sysctl map[string]string `json:"sysctl,omitempty"` + // Remove indicates if the container should be removed once it has been started + // and exits + Remove bool `json:"remove"` } // ContainerStorageConfig contains information on the storage configuration of a @@ -175,7 +179,7 @@ type ContainerStorageConfig struct { // Mandatory. IpcNS Namespace `json:"ipcns,omitempty"` // ShmSize is the size of the tmpfs to mount in at /dev/shm, in bytes. - // Conflicts with ShmSize if ShmSize is not private. + // Conflicts with ShmSize if IpcNS is not private. // Optional. ShmSize *int64 `json:"shm_size,omitempty"` // WorkDir is the container's working directory. @@ -234,6 +238,9 @@ type ContainerSecurityConfig struct { // will use. // Optional. ApparmorProfile string `json:"apparmor_profile,omitempty"` + // SeccompPolicy determines which seccomp profile gets applied + // the container. valid values: empty,default,image + SeccompPolicy string `json:"seccomp_policy,omitempty"` // SeccompProfilePath is the path to a JSON file containing the // container's Seccomp profile. // If not specified, no Seccomp profile will be used. @@ -252,7 +259,10 @@ type ContainerSecurityConfig struct { // IDMappings are UID and GID mappings that will be used by user // namespaces. // Required if UserNS is private. - IDMappings storage.IDMappingOptions `json:"idmappings,omitempty"` + IDMappings *storage.IDMappingOptions `json:"idmappings,omitempty"` + // ReadOnlyFilesystem indicates that everything will be mounted + // as read-only + ReadOnlyFilesystem bool `json:"read_only_filesystem,omittempty"` } // ContainerCgroupConfig contains configuration information about a container's @@ -260,16 +270,13 @@ type ContainerSecurityConfig struct { type ContainerCgroupConfig struct { // CgroupNS is the container's cgroup namespace. // It defaults to private. - // Conflicts with NoCgroups if not set to host. // Mandatory. CgroupNS Namespace `json:"cgroupns,omitempty"` - // NoCgroups indicates that the container should not create CGroups. - // Conflicts with CgroupParent and CgroupNS if CgroupNS is not set to - // host. - NoCgroups bool `json:"no_cgroups,omitempty"` + // CgroupsMode sets a policy for how cgroups will be created in the + // container, including the ability to disable creation entirely. + CgroupsMode string `json:"cgroups_mode,omitempty"` // CgroupParent is the container's CGroup parent. // If not set, the default for the current cgroup driver will be used. - // Conflicts with NoCgroups. // Optional. CgroupParent string `json:"cgroup_parent,omitempty"` } @@ -348,7 +355,7 @@ type ContainerNetworkConfig struct { // ContainerResourceConfig contains information on container resource limits. type ContainerResourceConfig struct { - // ResourceLimits are resource limits to apply to the container. + // ResourceLimits are resource limits to apply to the container., // Can only be set as root on cgroups v1 systems, but can be set as // rootless as well for cgroups v2. // Optional. @@ -365,11 +372,12 @@ type ContainerResourceConfig struct { // ContainerHealthCheckConfig describes a container healthcheck with attributes // like command, retries, interval, start period, and timeout. type ContainerHealthCheckConfig struct { - HealthConfig manifest.Schema2HealthConfig `json:"healthconfig,omitempty"` + HealthConfig *manifest.Schema2HealthConfig `json:"healthconfig,omitempty"` } // SpecGenerator creates an OCI spec and Libpod configuration options to create // a container based on the given configuration. +// swagger:model SpecGenerator type SpecGenerator struct { ContainerBasicConfig ContainerStorageConfig @@ -381,19 +389,24 @@ type SpecGenerator struct { } // NewSpecGenerator returns a SpecGenerator struct given one of two mandatory inputs -func NewSpecGenerator(image, rootfs *string) (*SpecGenerator, error) { - _ = image - _ = rootfs - return &SpecGenerator{}, define.ErrNotImplemented -} - -// Validate verifies that the given SpecGenerator is valid and satisfies required -// input for creating a container. -func (s *SpecGenerator) Validate() error { - return define.ErrNotImplemented +func NewSpecGenerator(image string) *SpecGenerator { + net := ContainerNetworkConfig{ + NetNS: Namespace{ + NSMode: Bridge, + }, + } + csc := ContainerStorageConfig{Image: image} + if rootless.IsRootless() { + net.NetNS.NSMode = Slirp + } + return &SpecGenerator{ + ContainerStorageConfig: csc, + ContainerNetworkConfig: net, + } } -// MakeContainer creates a container based on the SpecGenerator -func (s *SpecGenerator) MakeContainer() (*libpod.Container, error) { - return nil, define.ErrNotImplemented +// NewSpecGenerator returns a SpecGenerator struct given one of two mandatory inputs +func NewSpecGeneratorWithRootfs(rootfs string) *SpecGenerator { + csc := ContainerStorageConfig{Rootfs: rootfs} + return &SpecGenerator{ContainerStorageConfig: csc} } diff --git a/pkg/specgen/validate.go b/pkg/specgen/validate.go new file mode 100644 index 000000000..78e4d8ad5 --- /dev/null +++ b/pkg/specgen/validate.go @@ -0,0 +1,159 @@ +package specgen + +import ( + "strings" + + "github.com/containers/libpod/libpod" + "github.com/containers/libpod/pkg/util" + "github.com/pkg/errors" +) + +var ( + // ErrInvalidSpecConfig describes an error that the given SpecGenerator is invalid + ErrInvalidSpecConfig error = errors.New("invalid configuration") + // SystemDValues describes the only values that SystemD can be + SystemDValues = []string{"true", "false", "always"} + // ImageVolumeModeValues describes the only values that ImageVolumeMode can be + ImageVolumeModeValues = []string{"ignore", "tmpfs", "anonymous"} +) + +func exclusiveOptions(opt1, opt2 string) error { + return errors.Errorf("%s and %s are mutually exclusive options", opt1, opt2) +} + +// Validate verifies that the given SpecGenerator is valid and satisfies required +// input for creating a container. +func (s *SpecGenerator) validate(rt *libpod.Runtime) error { + + // + // ContainerBasicConfig + // + // Rootfs and Image cannot both populated + if len(s.ContainerStorageConfig.Image) > 0 && len(s.ContainerStorageConfig.Rootfs) > 0 { + return errors.Wrap(ErrInvalidSpecConfig, "both image and rootfs cannot be simultaneously") + } + // Cannot set hostname and utsns + if len(s.ContainerBasicConfig.Hostname) > 0 && !s.ContainerBasicConfig.UtsNS.IsPrivate() { + return errors.Wrap(ErrInvalidSpecConfig, "cannot set hostname when creating an UTS namespace") + } + // systemd values must be true, false, or always + if len(s.ContainerBasicConfig.Systemd) > 0 && !util.StringInSlice(strings.ToLower(s.ContainerBasicConfig.Systemd), SystemDValues) { + return errors.Wrapf(ErrInvalidSpecConfig, "SystemD values must be one of %s", strings.Join(SystemDValues, ",")) + } + + // + // ContainerStorageConfig + // + // rootfs and image cannot both be set + if len(s.ContainerStorageConfig.Image) > 0 && len(s.ContainerStorageConfig.Rootfs) > 0 { + return exclusiveOptions("rootfs", "image") + } + // imagevolumemode must be one of ignore, tmpfs, or anonymous if given + if len(s.ContainerStorageConfig.ImageVolumeMode) > 0 && !util.StringInSlice(strings.ToLower(s.ContainerStorageConfig.ImageVolumeMode), ImageVolumeModeValues) { + return errors.Errorf("ImageVolumeMode values must be one of %s", strings.Join(ImageVolumeModeValues, ",")) + } + // shmsize conflicts with IPC namespace + if s.ContainerStorageConfig.ShmSize != nil && !s.ContainerStorageConfig.IpcNS.IsPrivate() { + return errors.New("cannot set shmsize when creating an IPC namespace") + } + + // + // ContainerSecurityConfig + // + // groups and privileged are exclusive + if len(s.Groups) > 0 && s.Privileged { + return exclusiveOptions("Groups", "privileged") + } + // capadd and privileged are exclusive + if len(s.CapAdd) > 0 && s.Privileged { + return exclusiveOptions("CapAdd", "privileged") + } + // selinuxprocesslabel and privileged are exclusive + if len(s.SelinuxProcessLabel) > 0 && s.Privileged { + return exclusiveOptions("SelinuxProcessLabel", "privileged") + } + // selinuxmounmtlabel and privileged are exclusive + if len(s.SelinuxMountLabel) > 0 && s.Privileged { + return exclusiveOptions("SelinuxMountLabel", "privileged") + } + // selinuxopts and privileged are exclusive + if len(s.SelinuxOpts) > 0 && s.Privileged { + return exclusiveOptions("SelinuxOpts", "privileged") + } + // apparmor and privileged are exclusive + if len(s.ApparmorProfile) > 0 && s.Privileged { + return exclusiveOptions("AppArmorProfile", "privileged") + } + // userns and idmappings conflict + if s.UserNS.IsPrivate() && s.IDMappings == nil { + return errors.Wrap(ErrInvalidSpecConfig, "IDMappings are required when not creating a User namespace") + } + + // + // ContainerCgroupConfig + // + // + // None for now + + // + // ContainerNetworkConfig + // + if !s.NetNS.IsPrivate() && s.ConfigureNetNS { + return errors.New("can only configure network namespace when creating a network a network namespace") + } + // useimageresolveconf conflicts with dnsserver, dnssearch, dnsoption + if s.UseImageResolvConf { + if len(s.DNSServer) > 0 { + return exclusiveOptions("UseImageResolvConf", "DNSServer") + } + if len(s.DNSSearch) > 0 { + return exclusiveOptions("UseImageResolvConf", "DNSSearch") + } + if len(s.DNSOption) > 0 { + return exclusiveOptions("UseImageResolvConf", "DNSOption") + } + } + // UseImageHosts and HostAdd are exclusive + if s.UseImageHosts && len(s.HostAdd) > 0 { + return exclusiveOptions("UseImageHosts", "HostAdd") + } + + // TODO the specgen does not appear to handle this? Should it + //switch config.Cgroup.Cgroups { + //case "disabled": + // if addedResources { + // return errors.New("cannot specify resource limits when cgroups are disabled is specified") + // } + // configSpec.Linux.Resources = &spec.LinuxResources{} + //case "enabled", "no-conmon", "": + // // Do nothing + //default: + // return errors.New("unrecognized option for cgroups; supported are 'default', 'disabled', 'no-conmon'") + //} + + // Namespaces + if err := s.UtsNS.validate(); err != nil { + return err + } + if err := s.IpcNS.validate(); err != nil { + return err + } + if err := s.NetNS.validate(); err != nil { + return err + } + if err := s.PidNS.validate(); err != nil { + return err + } + if err := s.CgroupNS.validate(); err != nil { + return err + } + if err := s.UserNS.validate(); err != nil { + return err + } + + // The following are defaults as needed by container creation + if len(s.WorkDir) < 1 { + s.WorkDir = "/" + } + return nil +} -- cgit v1.2.3-54-g00ecf