diff options
Diffstat (limited to 'pkg/spec')
-rw-r--r-- | pkg/spec/createconfig.go | 508 | ||||
-rw-r--r-- | pkg/spec/parse.go | 128 | ||||
-rw-r--r-- | pkg/spec/ports.go | 107 | ||||
-rw-r--r-- | pkg/spec/spec.go | 422 | ||||
-rw-r--r-- | pkg/spec/spec_test.go | 45 |
5 files changed, 1210 insertions, 0 deletions
diff --git a/pkg/spec/createconfig.go b/pkg/spec/createconfig.go new file mode 100644 index 000000000..de6e0f593 --- /dev/null +++ b/pkg/spec/createconfig.go @@ -0,0 +1,508 @@ +package createconfig + +import ( + "os" + "strconv" + "strings" + "syscall" + + "github.com/containers/storage" + "github.com/cri-o/ocicni/pkg/ocicni" + "github.com/docker/docker/api/types/container" + "github.com/docker/go-connections/nat" + "github.com/opencontainers/runc/libcontainer/devices" + spec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/runtime-tools/generate" + "github.com/opencontainers/selinux/go-selinux/label" + "github.com/pkg/errors" + "github.com/projectatomic/libpod/libpod" + ann "github.com/projectatomic/libpod/pkg/annotations" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +type mountType string + +// Type constants +const ( + bps = iota + iops + // TypeBind is the type for mounting host dir + TypeBind mountType = "bind" + // TypeVolume is the type for remote storage volumes + // TypeVolume mountType = "volume" // re-enable upon use + // TypeTmpfs is the type for mounting tmpfs + TypeTmpfs mountType = "tmpfs" +) + +// CreateResourceConfig represents resource elements in CreateConfig +// structures +type CreateResourceConfig struct { + BlkioWeight uint16 // blkio-weight + BlkioWeightDevice []string // blkio-weight-device + CPUPeriod uint64 // cpu-period + CPUQuota int64 // cpu-quota + CPURtPeriod uint64 // cpu-rt-period + CPURtRuntime int64 // cpu-rt-runtime + CPUShares uint64 // cpu-shares + CPUs float64 // cpus + CPUsetCPUs string + CPUsetMems string // cpuset-mems + DeviceReadBps []string // device-read-bps + DeviceReadIOps []string // device-read-iops + DeviceWriteBps []string // device-write-bps + DeviceWriteIOps []string // device-write-iops + DisableOomKiller bool // oom-kill-disable + KernelMemory int64 // kernel-memory + Memory int64 //memory + MemoryReservation int64 // memory-reservation + MemorySwap int64 //memory-swap + MemorySwappiness int // memory-swappiness + OomScoreAdj int //oom-score-adj + PidsLimit int64 // pids-limit + ShmSize int64 + Ulimit []string //ulimit +} + +// CreateConfig is a pre OCI spec structure. It represents user input from varlink or the CLI +type CreateConfig struct { + Runtime *libpod.Runtime + Args []string + CapAdd []string // cap-add + CapDrop []string // cap-drop + CidFile string + ConmonPidFile string + CgroupParent string // cgroup-parent + Command []string + Detach bool // detach + Devices []string // device + DNSOpt []string //dns-opt + DNSSearch []string //dns-search + DNSServers []string //dns + Entrypoint []string //entrypoint + Env map[string]string //env + ExposedPorts map[nat.Port]struct{} + GroupAdd []string // group-add + HostAdd []string //add-host + Hostname string //hostname + Image string + ImageID string + BuiltinImgVolumes map[string]struct{} // volumes defined in the image config + IDMappings *storage.IDMappingOptions + ImageVolumeType string // how to handle the image volume, either bind, tmpfs, or ignore + Interactive bool //interactive + IpcMode container.IpcMode //ipc + IP6Address string //ipv6 + IPAddress string //ip + Labels map[string]string //label + LinkLocalIP []string // link-local-ip + LogDriver string // log-driver + LogDriverOpt []string // log-opt + MacAddress string //mac-address + Name string //name + NetMode container.NetworkMode //net + Network string //network + NetworkAlias []string //network-alias + PidMode container.PidMode //pid + Pod string //pod + PortBindings nat.PortMap + Privileged bool //privileged + Publish []string //publish + PublishAll bool //publish-all + Quiet bool //quiet + ReadOnlyRootfs bool //read-only + Resources CreateResourceConfig + Rm bool //rm + ShmDir string + StopSignal syscall.Signal // stop-signal + StopTimeout uint // stop-timeout + Sysctl map[string]string //sysctl + Tmpfs []string // tmpfs + Tty bool //tty + UsernsMode container.UsernsMode //userns + User string //user + UtsMode container.UTSMode //uts + Volumes []string //volume + WorkDir string //workdir + MountLabel string //SecurityOpts + ProcessLabel string //SecurityOpts + NoNewPrivs bool //SecurityOpts + ApparmorProfile string //SecurityOpts + SeccompProfilePath string //SecurityOpts + SecurityOpts []string +} + +func u32Ptr(i int64) *uint32 { u := uint32(i); return &u } +func fmPtr(i int64) *os.FileMode { fm := os.FileMode(i); return &fm } + +// CreateBlockIO returns a LinuxBlockIO struct from a CreateConfig +func (c *CreateConfig) CreateBlockIO() (*spec.LinuxBlockIO, error) { + bio := &spec.LinuxBlockIO{} + bio.Weight = &c.Resources.BlkioWeight + if len(c.Resources.BlkioWeightDevice) > 0 { + var lwds []spec.LinuxWeightDevice + for _, i := range c.Resources.BlkioWeightDevice { + wd, err := validateweightDevice(i) + if err != nil { + return bio, errors.Wrapf(err, "invalid values for blkio-weight-device") + } + wdStat, err := getStatFromPath(wd.path) + if err != nil { + return bio, errors.Wrapf(err, "error getting stat from path %q", wd.path) + } + lwd := spec.LinuxWeightDevice{ + Weight: &wd.weight, + } + lwd.Major = int64(unix.Major(wdStat.Rdev)) + lwd.Minor = int64(unix.Minor(wdStat.Rdev)) + lwds = append(lwds, lwd) + } + bio.WeightDevice = lwds + } + if len(c.Resources.DeviceReadBps) > 0 { + readBps, err := makeThrottleArray(c.Resources.DeviceReadBps, bps) + if err != nil { + return bio, err + } + bio.ThrottleReadBpsDevice = readBps + } + if len(c.Resources.DeviceWriteBps) > 0 { + writeBpds, err := makeThrottleArray(c.Resources.DeviceWriteBps, bps) + if err != nil { + return bio, err + } + bio.ThrottleWriteBpsDevice = writeBpds + } + if len(c.Resources.DeviceReadIOps) > 0 { + readIOps, err := makeThrottleArray(c.Resources.DeviceReadIOps, iops) + if err != nil { + return bio, err + } + bio.ThrottleReadIOPSDevice = readIOps + } + if len(c.Resources.DeviceWriteIOps) > 0 { + writeIOps, err := makeThrottleArray(c.Resources.DeviceWriteIOps, iops) + if err != nil { + return bio, err + } + bio.ThrottleWriteIOPSDevice = writeIOps + } + return bio, nil +} + +func makeThrottleArray(throttleInput []string, rateType int) ([]spec.LinuxThrottleDevice, error) { + var ( + ltds []spec.LinuxThrottleDevice + t *throttleDevice + err error + ) + for _, i := range throttleInput { + if rateType == bps { + t, err = validateBpsDevice(i) + } else { + t, err = validateIOpsDevice(i) + } + if err != nil { + return []spec.LinuxThrottleDevice{}, err + } + ltdStat, err := getStatFromPath(t.path) + if err != nil { + return ltds, errors.Wrapf(err, "error getting stat from path %q", t.path) + } + ltd := spec.LinuxThrottleDevice{ + Rate: t.rate, + } + ltd.Major = int64(unix.Major(ltdStat.Rdev)) + ltd.Minor = int64(unix.Minor(ltdStat.Rdev)) + ltds = append(ltds, ltd) + } + return ltds, nil +} + +// GetAnnotations returns the all the annotations for the container +func (c *CreateConfig) GetAnnotations() map[string]string { + a := getDefaultAnnotations() + // TODO - Which annotations do we want added by default + // TODO - This should be added to the DB long term + if c.Tty { + a["io.kubernetes.cri-o.TTY"] = "true" + } + return a +} + +func getDefaultAnnotations() map[string]string { + var annotations map[string]string + annotations = make(map[string]string) + annotations[ann.Annotations] = "" + annotations[ann.ContainerID] = "" + annotations[ann.ContainerName] = "" + annotations[ann.ContainerType] = "sandbox" + annotations[ann.Created] = "" + annotations[ann.HostName] = "" + annotations[ann.IP] = "" + annotations[ann.Image] = "" + annotations[ann.ImageName] = "" + annotations[ann.ImageRef] = "" + annotations[ann.KubeName] = "" + annotations[ann.Labels] = "" + annotations[ann.LogPath] = "" + annotations[ann.Metadata] = "" + annotations[ann.Name] = "" + annotations[ann.PrivilegedRuntime] = "" + annotations[ann.ResolvPath] = "" + annotations[ann.HostnamePath] = "" + annotations[ann.SandboxID] = "" + annotations[ann.SandboxName] = "" + annotations[ann.ShmPath] = "" + annotations[ann.MountPoint] = "" + annotations[ann.TrustedSandbox] = "" + annotations[ann.TTY] = "false" + annotations[ann.Stdin] = "" + annotations[ann.StdinOnce] = "" + annotations[ann.Volumes] = "" + + return annotations +} + +//GetVolumeMounts takes user provided input for bind mounts and creates Mount structs +func (c *CreateConfig) GetVolumeMounts(specMounts []spec.Mount) ([]spec.Mount, error) { + var m []spec.Mount + var options []string + for _, i := range c.Volumes { + // We need to handle SELinux options better here, specifically :Z + spliti := strings.Split(i, ":") + if len(spliti) > 2 { + options = strings.Split(spliti[2], ",") + } + if libpod.MountExists(specMounts, spliti[1]) { + continue + } + options = append(options, "rbind") + var foundrw, foundro, foundz, foundZ bool + var rootProp string + for _, opt := range options { + switch opt { + case "rw": + foundrw = true + case "ro": + foundro = true + case "z": + foundz = true + case "Z": + foundZ = true + case "private", "rprivate", "slave", "rslave", "shared", "rshared": + rootProp = opt + } + } + if !foundrw && !foundro { + options = append(options, "rw") + } + if foundz { + if err := label.Relabel(spliti[0], c.MountLabel, true); err != nil { + return nil, errors.Wrapf(err, "relabel failed %q", spliti[0]) + } + } + if foundZ { + if err := label.Relabel(spliti[0], c.MountLabel, false); err != nil { + return nil, errors.Wrapf(err, "relabel failed %q", spliti[0]) + } + } + if rootProp == "" { + options = append(options, "private") + } + + m = append(m, spec.Mount{ + Destination: spliti[1], + Type: string(TypeBind), + Source: spliti[0], + Options: options, + }) + } + + // volumes from image config + if c.ImageVolumeType != "tmpfs" { + return m, nil + } + for vol := range c.BuiltinImgVolumes { + if libpod.MountExists(specMounts, vol) { + continue + } + mount := spec.Mount{ + Destination: vol, + Type: string(TypeTmpfs), + Source: string(TypeTmpfs), + Options: []string{"rw", "noexec", "nosuid", "nodev", "tmpcopyup"}, + } + m = append(m, mount) + } + return m, nil +} + +//GetTmpfsMounts takes user provided input for Tmpfs mounts and creates Mount structs +func (c *CreateConfig) GetTmpfsMounts() []spec.Mount { + var m []spec.Mount + for _, i := range c.Tmpfs { + // Default options if nothing passed + options := []string{"rw", "noexec", "nosuid", "nodev", "size=65536k"} + spliti := strings.Split(i, ":") + destPath := spliti[0] + if len(spliti) > 1 { + options = strings.Split(spliti[1], ",") + } + m = append(m, spec.Mount{ + Destination: destPath, + Type: string(TypeTmpfs), + Options: options, + Source: string(TypeTmpfs), + }) + } + return m +} + +// GetContainerCreateOptions takes a CreateConfig and returns a slice of CtrCreateOptions +func (c *CreateConfig) GetContainerCreateOptions() ([]libpod.CtrCreateOption, error) { + var options []libpod.CtrCreateOption + var portBindings []ocicni.PortMapping + var err error + + // Uncomment after talking to mheon about unimplemented funcs + // options = append(options, libpod.WithLabels(c.labels)) + + if c.Interactive { + options = append(options, libpod.WithStdin()) + } + if c.Name != "" { + logrus.Debugf("appending name %s", c.Name) + options = append(options, libpod.WithName(c.Name)) + } + + if len(c.PortBindings) > 0 { + portBindings, err = c.CreatePortBindings() + if err != nil { + return nil, errors.Wrapf(err, "unable to create port bindings") + } + } + + if len(c.Volumes) != 0 { + // Volumes consist of multiple, comma-delineated fields + // The image spec only includes one part of that, so drop the + // others, if they are included + volumes := make([]string, 0, len(c.Volumes)) + for _, vol := range c.Volumes { + volumes = append(volumes, strings.SplitN(vol, ":", 2)[0]) + } + + options = append(options, libpod.WithUserVolumes(volumes)) + } + + if len(c.Command) != 0 { + options = append(options, libpod.WithCommand(c.Command)) + } + + // Add entrypoint unconditionally + // If it's empty it's because it was explicitly set to "" or the image + // does not have one + options = append(options, libpod.WithEntrypoint(c.Entrypoint)) + + if c.NetMode.IsContainer() { + connectedCtr, err := c.Runtime.LookupContainer(c.NetMode.ConnectedContainer()) + if err != nil { + return nil, errors.Wrapf(err, "container %q not found", c.NetMode.ConnectedContainer()) + } + options = append(options, libpod.WithNetNSFrom(connectedCtr)) + } else if !c.NetMode.IsHost() && !c.NetMode.IsNone() { + postConfigureNetNS := (len(c.IDMappings.UIDMap) > 0 || len(c.IDMappings.GIDMap) > 0) && !c.UsernsMode.IsHost() + options = append(options, libpod.WithNetNS([]ocicni.PortMapping{}, postConfigureNetNS)) + options = append(options, libpod.WithNetNS(portBindings, postConfigureNetNS)) + } + + if c.PidMode.IsContainer() { + connectedCtr, err := c.Runtime.LookupContainer(c.PidMode.Container()) + if err != nil { + return nil, errors.Wrapf(err, "container %q not found", c.PidMode.Container()) + } + + options = append(options, libpod.WithPIDNSFrom(connectedCtr)) + } + if c.IpcMode.IsContainer() { + connectedCtr, err := c.Runtime.LookupContainer(c.IpcMode.Container()) + if err != nil { + return nil, errors.Wrapf(err, "container %q not found", c.IpcMode.Container()) + } + + options = append(options, libpod.WithIPCNSFrom(connectedCtr)) + } + + options = append(options, libpod.WithStopSignal(c.StopSignal)) + options = append(options, libpod.WithStopTimeout(c.StopTimeout)) + if len(c.DNSSearch) > 0 { + options = append(options, libpod.WithDNSSearch(c.DNSSearch)) + } + if len(c.DNSServers) > 0 { + options = append(options, libpod.WithDNS(c.DNSServers)) + } + if len(c.DNSOpt) > 0 { + options = append(options, libpod.WithDNSOption(c.DNSOpt)) + } + if len(c.HostAdd) > 0 { + options = append(options, libpod.WithHosts(c.HostAdd)) + } + logPath := getLoggingPath(c.LogDriverOpt) + if logPath != "" { + options = append(options, libpod.WithLogPath(logPath)) + } + + options = append(options, libpod.WithPrivileged(c.Privileged)) + return options, nil +} + +// CreatePortBindings iterates ports mappings and exposed ports into a format CNI understands +func (c *CreateConfig) CreatePortBindings() ([]ocicni.PortMapping, error) { + var portBindings []ocicni.PortMapping + for containerPb, hostPb := range c.PortBindings { + var pm ocicni.PortMapping + pm.ContainerPort = int32(containerPb.Int()) + for _, i := range hostPb { + var hostPort int + var err error + pm.HostIP = i.HostIP + if i.HostPort == "" { + hostPort = containerPb.Int() + } else { + hostPort, err = strconv.Atoi(i.HostPort) + if err != nil { + return nil, errors.Wrapf(err, "unable to convert host port to integer") + } + } + + pm.HostPort = int32(hostPort) + // CNI requires us to make both udp and tcp structs + pm.Protocol = "udp" + portBindings = append(portBindings, pm) + pm.Protocol = "tcp" + portBindings = append(portBindings, pm) + } + } + return portBindings, nil +} + +// AddPrivilegedDevices iterates through host devices and adds all +// host devices to the spec +func (c *CreateConfig) AddPrivilegedDevices(g *generate.Generator) error { + hostDevices, err := devices.HostDevices() + if err != nil { + return err + } + g.ClearLinuxDevices() + for _, d := range hostDevices { + g.AddDevice(Device(d)) + } + g.AddLinuxResourcesDevice(true, "", nil, nil, "rwm") + return nil +} + +func getStatFromPath(path string) (unix.Stat_t, error) { + s := unix.Stat_t{} + err := unix.Stat(path, &s) + return s, err +} diff --git a/pkg/spec/parse.go b/pkg/spec/parse.go new file mode 100644 index 000000000..920674b10 --- /dev/null +++ b/pkg/spec/parse.go @@ -0,0 +1,128 @@ +package createconfig + +import ( + "fmt" + "strconv" + "strings" + + "github.com/docker/go-units" + "github.com/opencontainers/runc/libcontainer/configs" + spec "github.com/opencontainers/runtime-spec/specs-go" +) + +// weightDevice is a structure that holds device:weight pair +type weightDevice struct { + path string + weight uint16 +} + +func (w *weightDevice) String() string { + return fmt.Sprintf("%s:%d", w.path, w.weight) +} + +// validateweightDevice validates that the specified string has a valid device-weight format +// for blkio-weight-device flag +func validateweightDevice(val string) (*weightDevice, error) { + split := strings.SplitN(val, ":", 2) + if len(split) != 2 { + return nil, fmt.Errorf("bad format: %s", val) + } + if !strings.HasPrefix(split[0], "/dev/") { + return nil, fmt.Errorf("bad format for device path: %s", val) + } + weight, err := strconv.ParseUint(split[1], 10, 0) + if err != nil { + return nil, fmt.Errorf("invalid weight for device: %s", val) + } + if weight > 0 && (weight < 10 || weight > 1000) { + return nil, fmt.Errorf("invalid weight for device: %s", val) + } + + return &weightDevice{ + path: split[0], + weight: uint16(weight), + }, nil +} + +// throttleDevice is a structure that holds device:rate_per_second pair +type throttleDevice struct { + path string + rate uint64 +} + +func (t *throttleDevice) String() string { + return fmt.Sprintf("%s:%d", t.path, t.rate) +} + +// validateBpsDevice validates that the specified string has a valid device-rate format +// for device-read-bps and device-write-bps flags +func validateBpsDevice(val string) (*throttleDevice, error) { + split := strings.SplitN(val, ":", 2) + if len(split) != 2 { + return nil, fmt.Errorf("bad format: %s", val) + } + if !strings.HasPrefix(split[0], "/dev/") { + return nil, fmt.Errorf("bad format for device path: %s", val) + } + rate, err := units.RAMInBytes(split[1]) + if err != nil { + return nil, fmt.Errorf("invalid rate for device: %s. The correct format is <device-path>:<number>[<unit>]. Number must be a positive integer. Unit is optional and can be kb, mb, or gb", val) + } + if rate < 0 { + return nil, fmt.Errorf("invalid rate for device: %s. The correct format is <device-path>:<number>[<unit>]. Number must be a positive integer. Unit is optional and can be kb, mb, or gb", val) + } + + return &throttleDevice{ + path: split[0], + rate: uint64(rate), + }, nil +} + +// validateIOpsDevice validates that the specified string has a valid device-rate format +// for device-write-iops and device-read-iops flags +func validateIOpsDevice(val string) (*throttleDevice, error) { //nolint + split := strings.SplitN(val, ":", 2) + if len(split) != 2 { + return nil, fmt.Errorf("bad format: %s", val) + } + if !strings.HasPrefix(split[0], "/dev/") { + return nil, fmt.Errorf("bad format for device path: %s", val) + } + rate, err := strconv.ParseUint(split[1], 10, 64) + if err != nil { + return nil, fmt.Errorf("invalid rate for device: %s. The correct format is <device-path>:<number>. Number must be a positive integer", val) + } + if rate < 0 { + return nil, fmt.Errorf("invalid rate for device: %s. The correct format is <device-path>:<number>. Number must be a positive integer", val) + } + + return &throttleDevice{ + path: split[0], + rate: uint64(rate), + }, nil +} + +func getLoggingPath(opts []string) string { + for _, opt := range opts { + arr := strings.SplitN(opt, "=", 2) + if len(arr) == 2 { + if strings.TrimSpace(arr[0]) == "path" { + return strings.TrimSpace(arr[1]) + } + } + } + return "" +} + +// Device transforms a libcontainer configs.Device to a specs.LinuxDevice object. +func Device(d *configs.Device) spec.LinuxDevice { + return spec.LinuxDevice{ + Type: string(d.Type), + Path: d.Path, + Major: d.Major, + Minor: d.Minor, + FileMode: fmPtr(int64(d.FileMode)), + UID: u32Ptr(int64(d.Uid)), + GID: u32Ptr(int64(d.Gid)), + } +} diff --git a/pkg/spec/ports.go b/pkg/spec/ports.go new file mode 100644 index 000000000..4d9a625bf --- /dev/null +++ b/pkg/spec/ports.go @@ -0,0 +1,107 @@ +package createconfig + +import ( + "fmt" + "net" + "strconv" + + "github.com/docker/go-connections/nat" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +// ExposedPorts parses user and image ports and returns binding information +func ExposedPorts(expose, publish []string, publishAll bool, imageExposedPorts map[string]struct{}) (map[nat.Port][]nat.PortBinding, error) { + containerPorts := make(map[string]string) + + // add expose ports from the image itself + for expose := range imageExposedPorts { + _, port := nat.SplitProtoPort(expose) + containerPorts[port] = "" + } + + // add the expose ports from the user (--expose) + // can be single or a range + for _, expose := range expose { + //support two formats for expose, original format <portnum>/[<proto>] or <startport-endport>/[<proto>] + _, port := nat.SplitProtoPort(expose) + //parse the start and end port and create a sequence of ports to expose + //if expose a port, the start and end port are the same + start, end, err := nat.ParsePortRange(port) + if err != nil { + return nil, fmt.Errorf("invalid range format for --expose: %s, error: %s", expose, err) + } + for i := start; i <= end; i++ { + containerPorts[strconv.Itoa(int(i))] = "" + } + } + + // parse user input'd port bindings + pbPorts, portBindings, err := nat.ParsePortSpecs(publish) + if err != nil { + return nil, err + } + + // delete exposed container ports if being used by -p + for i := range pbPorts { + delete(containerPorts, i.Port()) + } + + // iterate container ports and make port bindings from them + if publishAll { + for e := range containerPorts { + //support two formats for expose, original format <portnum>/[<proto>] or <startport-endport>/[<proto>] + //proto, port := nat.SplitProtoPort(e) + p, err := nat.NewPort("tcp", e) + if err != nil { + return nil, err + } + rp, err := getRandomPort() + if err != nil { + return nil, err + } + logrus.Debug(fmt.Sprintf("Using random host port %d with container port %d", rp, p.Int())) + portBindings[p] = CreatePortBinding(rp, "") + } + } + + // We need to see if any host ports are not populated and if so, we need to assign a + // random port to them. + for k, pb := range portBindings { + if pb[0].HostPort == "" { + hostPort, err := getRandomPort() + if err != nil { + return nil, err + } + logrus.Debug(fmt.Sprintf("Using random host port %d with container port %s", hostPort, k.Port())) + pb[0].HostPort = strconv.Itoa(hostPort) + } + } + return portBindings, nil +} + +func getRandomPort() (int, error) { + l, err := net.Listen("tcp", ":0") + if err != nil { + return 0, errors.Wrapf(err, "unable to get free port") + } + defer l.Close() + _, randomPort, err := net.SplitHostPort(l.Addr().String()) + if err != nil { + return 0, errors.Wrapf(err, "unable to determine free port") + } + rp, err := strconv.Atoi(randomPort) + if err != nil { + return 0, errors.Wrapf(err, "unable to convert random port to int") + } + return rp, nil +} + +//CreatePortBinding takes port (int) and IP (string) and creates an array of portbinding structs +func CreatePortBinding(hostPort int, hostIP string) []nat.PortBinding { + pb := nat.PortBinding{ + HostPort: strconv.Itoa(hostPort), + } + pb.HostIP = hostIP + return []nat.PortBinding{pb} +} diff --git a/pkg/spec/spec.go b/pkg/spec/spec.go new file mode 100644 index 000000000..959a24213 --- /dev/null +++ b/pkg/spec/spec.go @@ -0,0 +1,422 @@ +package createconfig + +import ( + "strings" + + "github.com/docker/docker/daemon/caps" + "github.com/docker/docker/pkg/mount" + "github.com/docker/docker/profiles/seccomp" + "github.com/docker/go-units" + "github.com/opencontainers/runc/libcontainer/devices" + spec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/runtime-tools/generate" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "io/ioutil" +) + +const cpuPeriod = 100000 + +// CreateConfigToOCISpec parses information needed to create a container into an OCI runtime spec +func CreateConfigToOCISpec(config *CreateConfig) (*spec.Spec, error) { //nolint + cgroupPerm := "ro" + g := generate.New() + g.HostSpecific = true + addCgroup := true + if config.Privileged { + cgroupPerm = "rw" + g.RemoveMount("/sys") + sysMnt := spec.Mount{ + Destination: "/sys", + Type: "sysfs", + Source: "sysfs", + Options: []string{"nosuid", "noexec", "nodev", "rw"}, + } + g.AddMount(sysMnt) + } else if !config.UsernsMode.IsHost() && config.NetMode.IsHost() { + addCgroup = false + g.RemoveMount("/sys") + sysMnt := spec.Mount{ + Destination: "/sys", + Type: "bind", + Source: "/sys", + Options: []string{"nosuid", "noexec", "nodev", "ro", "rbind"}, + } + g.AddMount(sysMnt) + } + + if addCgroup { + cgroupMnt := spec.Mount{ + Destination: "/sys/fs/cgroup", + Type: "cgroup", + Source: "cgroup", + Options: []string{"nosuid", "noexec", "nodev", "relatime", cgroupPerm}, + } + g.AddMount(cgroupMnt) + } + g.SetProcessCwd(config.WorkDir) + g.SetProcessArgs(config.Command) + g.SetProcessTerminal(config.Tty) + + for key, val := range config.GetAnnotations() { + g.AddAnnotation(key, val) + } + g.SetRootReadonly(config.ReadOnlyRootfs) + g.SetHostname(config.Hostname) + if config.Hostname != "" { + g.AddProcessEnv("HOSTNAME", config.Hostname) + } + for sysctlKey, sysctlVal := range config.Sysctl { + g.AddLinuxSysctl(sysctlKey, sysctlVal) + } + g.AddProcessEnv("container", "podman") + + // RESOURCES - MEMORY + if config.Resources.Memory != 0 { + g.SetLinuxResourcesMemoryLimit(config.Resources.Memory) + } + if config.Resources.MemoryReservation != 0 { + g.SetLinuxResourcesMemoryReservation(config.Resources.MemoryReservation) + } + if config.Resources.MemorySwap != 0 { + g.SetLinuxResourcesMemorySwap(config.Resources.MemorySwap) + } + if config.Resources.KernelMemory != 0 { + g.SetLinuxResourcesMemoryKernel(config.Resources.KernelMemory) + } + if config.Resources.MemorySwappiness != -1 { + g.SetLinuxResourcesMemorySwappiness(uint64(config.Resources.MemorySwappiness)) + } + g.SetLinuxResourcesMemoryDisableOOMKiller(config.Resources.DisableOomKiller) + g.SetProcessOOMScoreAdj(config.Resources.OomScoreAdj) + + // RESOURCES - CPU + if config.Resources.CPUShares != 0 { + g.SetLinuxResourcesCPUShares(config.Resources.CPUShares) + } + if config.Resources.CPUQuota != 0 { + g.SetLinuxResourcesCPUQuota(config.Resources.CPUQuota) + } + if config.Resources.CPUPeriod != 0 { + g.SetLinuxResourcesCPUPeriod(config.Resources.CPUPeriod) + } + if config.Resources.CPUs != 0 { + g.SetLinuxResourcesCPUPeriod(cpuPeriod) + g.SetLinuxResourcesCPUQuota(int64(config.Resources.CPUs * cpuPeriod)) + } + if config.Resources.CPURtRuntime != 0 { + g.SetLinuxResourcesCPURealtimeRuntime(config.Resources.CPURtRuntime) + } + if config.Resources.CPURtPeriod != 0 { + g.SetLinuxResourcesCPURealtimePeriod(config.Resources.CPURtPeriod) + } + if config.Resources.CPUsetCPUs != "" { + g.SetLinuxResourcesCPUCpus(config.Resources.CPUsetCPUs) + } + if config.Resources.CPUsetMems != "" { + g.SetLinuxResourcesCPUMems(config.Resources.CPUsetMems) + } + + // Devices + if config.Privileged { + // If privileged, we need to add all the host devices to the + // spec. We do not add the user provided ones because we are + // already adding them all. + if err := config.AddPrivilegedDevices(&g); err != nil { + return nil, err + } + } else { + for _, device := range config.Devices { + if err := addDevice(&g, device); err != nil { + return nil, err + } + } + } + + for _, uidmap := range config.IDMappings.UIDMap { + g.AddLinuxUIDMapping(uint32(uidmap.HostID), uint32(uidmap.ContainerID), uint32(uidmap.Size)) + } + for _, gidmap := range config.IDMappings.GIDMap { + g.AddLinuxGIDMapping(uint32(gidmap.HostID), uint32(gidmap.ContainerID), uint32(gidmap.Size)) + } + // SECURITY OPTS + g.SetProcessNoNewPrivileges(config.NoNewPrivs) + g.SetProcessApparmorProfile(config.ApparmorProfile) + g.SetProcessSelinuxLabel(config.ProcessLabel) + g.SetLinuxMountLabel(config.MountLabel) + blockAccessToKernelFilesystems(config, &g) + + // RESOURCES - PIDS + if config.Resources.PidsLimit != 0 { + g.SetLinuxResourcesPidsLimit(config.Resources.PidsLimit) + } + + for _, i := range config.Tmpfs { + // Default options if nothing passed + options := []string{"rw", "noexec", "nosuid", "nodev", "size=65536k"} + spliti := strings.SplitN(i, ":", 2) + if len(spliti) > 1 { + if _, _, err := mount.ParseTmpfsOptions(spliti[1]); err != nil { + return nil, err + } + options = strings.Split(spliti[1], ",") + } + tmpfsMnt := spec.Mount{ + Destination: spliti[0], + Type: "tmpfs", + Source: "tmpfs", + Options: append(options, "tmpcopyup"), + } + g.AddMount(tmpfsMnt) + } + + for name, val := range config.Env { + g.AddProcessEnv(name, val) + } + + if err := addRlimits(config, &g); err != nil { + return nil, err + } + + if err := addPidNS(config, &g); err != nil { + return nil, err + } + + if err := addUserNS(config, &g); err != nil { + return nil, err + } + + if err := addNetNS(config, &g); err != nil { + return nil, err + } + + if err := addUTSNS(config, &g); err != nil { + return nil, err + } + + if err := addIpcNS(config, &g); err != nil { + return nil, err + } + configSpec := g.Spec() + + // HANDLE CAPABILITIES + // NOTE: Must happen before SECCOMP + if !config.Privileged { + if err := setupCapabilities(config, configSpec); err != nil { + return nil, err + } + } else { + g.SetupPrivileged(true) + } + + // HANDLE SECCOMP + if config.SeccompProfilePath != "unconfined" { + if config.SeccompProfilePath != "" { + seccompProfile, err := ioutil.ReadFile(config.SeccompProfilePath) + if err != nil { + return nil, errors.Wrapf(err, "opening seccomp profile (%s) failed", config.SeccompProfilePath) + } + seccompConfig, err := seccomp.LoadProfile(string(seccompProfile), configSpec) + if err != nil { + return nil, errors.Wrapf(err, "loading seccomp profile (%s) failed", config.SeccompProfilePath) + } + configSpec.Linux.Seccomp = seccompConfig + } else { + seccompConfig, err := seccomp.GetDefaultProfile(configSpec) + if err != nil { + return nil, errors.Wrapf(err, "loading seccomp profile (%s) failed", config.SeccompProfilePath) + } + configSpec.Linux.Seccomp = seccompConfig + } + } + + // Clear default Seccomp profile from Generator for privileged containers + if config.SeccompProfilePath == "unconfined" || config.Privileged { + configSpec.Linux.Seccomp = nil + } + + // BIND MOUNTS + mounts, err := config.GetVolumeMounts(configSpec.Mounts) + if err != nil { + return nil, errors.Wrapf(err, "error getting volume mounts") + } + configSpec.Mounts = append(configSpec.Mounts, mounts...) + for _, mount := range configSpec.Mounts { + for _, opt := range mount.Options { + switch opt { + case "private", "rprivate", "slave", "rslave", "shared", "rshared": + if err := g.SetLinuxRootPropagation(opt); err != nil { + return nil, errors.Wrapf(err, "error setting root propagation for %q", mount.Destination) + } + } + } + } + + // BLOCK IO + blkio, err := config.CreateBlockIO() + if err != nil { + return nil, errors.Wrapf(err, "error creating block io") + } + if blkio != nil { + configSpec.Linux.Resources.BlockIO = blkio + } + + /* + //Annotations + Resources: &configSpec.LinuxResources{ + BlockIO: &blkio, + //HugepageLimits: + Network: &configSpec.LinuxNetwork{ + // ClassID *uint32 + // Priorites []LinuxInterfacePriority + }, + }, + //CgroupsPath: + //Namespaces: []LinuxNamespace + // DefaultAction: + // Architectures + // Syscalls: + }, + // RootfsPropagation + // MaskedPaths + // ReadonlyPaths: + // IntelRdt + }, + } + */ + return configSpec, nil +} + +func blockAccessToKernelFilesystems(config *CreateConfig, g *generate.Generator) { + if !config.Privileged { + for _, mp := range []string{ + "/proc/kcore", + "/proc/latency_stats", + "/proc/timer_list", + "/proc/timer_stats", + "/proc/sched_debug", + "/proc/scsi", + "/sys/firmware", + } { + g.AddLinuxMaskedPaths(mp) + } + + for _, rp := range []string{ + "/proc/asound", + "/proc/bus", + "/proc/fs", + "/proc/irq", + "/proc/sys", + "/proc/sysrq-trigger", + } { + g.AddLinuxReadonlyPaths(rp) + } + } +} + +func addPidNS(config *CreateConfig, g *generate.Generator) error { + pidMode := config.PidMode + if pidMode.IsHost() { + return g.RemoveLinuxNamespace(string(spec.PIDNamespace)) + } + if pidMode.IsContainer() { + logrus.Debug("using container pidmode") + } + return nil +} + +func addUserNS(config *CreateConfig, g *generate.Generator) error { + if (len(config.IDMappings.UIDMap) > 0 || len(config.IDMappings.GIDMap) > 0) && !config.UsernsMode.IsHost() { + g.AddOrReplaceLinuxNamespace(spec.UserNamespace, "") + } + return nil +} + +func addNetNS(config *CreateConfig, g *generate.Generator) error { + netMode := config.NetMode + if netMode.IsHost() { + logrus.Debug("Using host netmode") + return g.RemoveLinuxNamespace(spec.NetworkNamespace) + } else if netMode.IsNone() { + logrus.Debug("Using none netmode") + return nil + } else if netMode.IsBridge() { + logrus.Debug("Using bridge netmode") + return nil + } else if netMode.IsContainer() { + logrus.Debug("Using container netmode") + } else { + return errors.Errorf("unknown network mode") + } + return nil +} + +func addUTSNS(config *CreateConfig, g *generate.Generator) error { + utsMode := config.UtsMode + if utsMode.IsHost() { + return g.RemoveLinuxNamespace(spec.UTSNamespace) + } + return nil +} + +func addIpcNS(config *CreateConfig, g *generate.Generator) error { + ipcMode := config.IpcMode + if ipcMode.IsHost() { + return g.RemoveLinuxNamespace(spec.IPCNamespace) + } + if ipcMode.IsContainer() { + logrus.Debug("Using container ipcmode") + } + + return nil +} + +func addRlimits(config *CreateConfig, g *generate.Generator) error { + var ( + ul *units.Ulimit + err error + ) + + for _, u := range config.Resources.Ulimit { + if ul, err = units.ParseUlimit(u); err != nil { + return errors.Wrapf(err, "ulimit option %q requires name=SOFT:HARD, failed to be parsed", u) + } + + g.AddProcessRlimits("RLIMIT_"+strings.ToUpper(ul.Name), uint64(ul.Hard), uint64(ul.Soft)) + } + return nil +} + +func setupCapabilities(config *CreateConfig, configSpec *spec.Spec) error { + var err error + var caplist []string + caplist, err = caps.TweakCapabilities(configSpec.Process.Capabilities.Bounding, config.CapAdd, config.CapDrop) + if err != nil { + return err + } + + configSpec.Process.Capabilities.Bounding = caplist + configSpec.Process.Capabilities.Permitted = caplist + configSpec.Process.Capabilities.Inheritable = caplist + configSpec.Process.Capabilities.Effective = caplist + return nil +} + +func addDevice(g *generate.Generator, device string) error { + dev, err := devices.DeviceFromPath(device, "rwm") + if err != nil { + return errors.Wrapf(err, "%s is not a valid device", device) + } + linuxdev := spec.LinuxDevice{ + Path: dev.Path, + Type: string(dev.Type), + Major: dev.Major, + Minor: dev.Minor, + FileMode: &dev.FileMode, + UID: &dev.Uid, + GID: &dev.Gid, + } + g.AddDevice(linuxdev) + g.AddLinuxResourcesDevice(true, string(dev.Type), &dev.Major, &dev.Minor, dev.Permissions) + return nil +} diff --git a/pkg/spec/spec_test.go b/pkg/spec/spec_test.go new file mode 100644 index 000000000..2a040a9a0 --- /dev/null +++ b/pkg/spec/spec_test.go @@ -0,0 +1,45 @@ +package createconfig + +import ( + "reflect" + "testing" + + spec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/stretchr/testify/assert" +) + +func TestCreateConfig_GetVolumeMounts(t *testing.T) { + data := spec.Mount{ + Destination: "/foobar", + Type: "bind", + Source: "foobar", + Options: []string{"ro", "rbind", "private"}, + } + config := CreateConfig{ + Volumes: []string{"foobar:/foobar:ro"}, + } + specMount, err := config.GetVolumeMounts([]spec.Mount{}) + assert.NoError(t, err) + assert.True(t, reflect.DeepEqual(data, specMount[0])) +} + +func TestCreateConfig_GetAnnotations(t *testing.T) { + config := CreateConfig{} + annotations := config.GetAnnotations() + assert.True(t, reflect.DeepEqual("sandbox", annotations["io.kubernetes.cri-o.ContainerType"])) +} + +func TestCreateConfig_GetTmpfsMounts(t *testing.T) { + data := spec.Mount{ + Destination: "/homer", + Type: "tmpfs", + Source: "tmpfs", + Options: []string{"rw", "size=787448k", "mode=1777"}, + } + config := CreateConfig{ + Tmpfs: []string{"/homer:rw,size=787448k,mode=1777"}, + } + tmpfsMount := config.GetTmpfsMounts() + assert.True(t, reflect.DeepEqual(data, tmpfsMount[0])) + +} |