summaryrefslogtreecommitdiff
path: root/pkg/spec
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/spec')
-rw-r--r--pkg/spec/createconfig.go508
-rw-r--r--pkg/spec/parse.go128
-rw-r--r--pkg/spec/ports.go107
-rw-r--r--pkg/spec/spec.go422
-rw-r--r--pkg/spec/spec_test.go45
5 files changed, 1210 insertions, 0 deletions
diff --git a/pkg/spec/createconfig.go b/pkg/spec/createconfig.go
new file mode 100644
index 000000000..de6e0f593
--- /dev/null
+++ b/pkg/spec/createconfig.go
@@ -0,0 +1,508 @@
+package createconfig
+
+import (
+ "os"
+ "strconv"
+ "strings"
+ "syscall"
+
+ "github.com/containers/storage"
+ "github.com/cri-o/ocicni/pkg/ocicni"
+ "github.com/docker/docker/api/types/container"
+ "github.com/docker/go-connections/nat"
+ "github.com/opencontainers/runc/libcontainer/devices"
+ spec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/opencontainers/runtime-tools/generate"
+ "github.com/opencontainers/selinux/go-selinux/label"
+ "github.com/pkg/errors"
+ "github.com/projectatomic/libpod/libpod"
+ ann "github.com/projectatomic/libpod/pkg/annotations"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/sys/unix"
+)
+
+type mountType string
+
+// Type constants
+const (
+ bps = iota
+ iops
+ // TypeBind is the type for mounting host dir
+ TypeBind mountType = "bind"
+ // TypeVolume is the type for remote storage volumes
+ // TypeVolume mountType = "volume" // re-enable upon use
+ // TypeTmpfs is the type for mounting tmpfs
+ TypeTmpfs mountType = "tmpfs"
+)
+
+// CreateResourceConfig represents resource elements in CreateConfig
+// structures
+type CreateResourceConfig struct {
+ BlkioWeight uint16 // blkio-weight
+ BlkioWeightDevice []string // blkio-weight-device
+ CPUPeriod uint64 // cpu-period
+ CPUQuota int64 // cpu-quota
+ CPURtPeriod uint64 // cpu-rt-period
+ CPURtRuntime int64 // cpu-rt-runtime
+ CPUShares uint64 // cpu-shares
+ CPUs float64 // cpus
+ CPUsetCPUs string
+ CPUsetMems string // cpuset-mems
+ DeviceReadBps []string // device-read-bps
+ DeviceReadIOps []string // device-read-iops
+ DeviceWriteBps []string // device-write-bps
+ DeviceWriteIOps []string // device-write-iops
+ DisableOomKiller bool // oom-kill-disable
+ KernelMemory int64 // kernel-memory
+ Memory int64 //memory
+ MemoryReservation int64 // memory-reservation
+ MemorySwap int64 //memory-swap
+ MemorySwappiness int // memory-swappiness
+ OomScoreAdj int //oom-score-adj
+ PidsLimit int64 // pids-limit
+ ShmSize int64
+ Ulimit []string //ulimit
+}
+
+// CreateConfig is a pre OCI spec structure. It represents user input from varlink or the CLI
+type CreateConfig struct {
+ Runtime *libpod.Runtime
+ Args []string
+ CapAdd []string // cap-add
+ CapDrop []string // cap-drop
+ CidFile string
+ ConmonPidFile string
+ CgroupParent string // cgroup-parent
+ Command []string
+ Detach bool // detach
+ Devices []string // device
+ DNSOpt []string //dns-opt
+ DNSSearch []string //dns-search
+ DNSServers []string //dns
+ Entrypoint []string //entrypoint
+ Env map[string]string //env
+ ExposedPorts map[nat.Port]struct{}
+ GroupAdd []string // group-add
+ HostAdd []string //add-host
+ Hostname string //hostname
+ Image string
+ ImageID string
+ BuiltinImgVolumes map[string]struct{} // volumes defined in the image config
+ IDMappings *storage.IDMappingOptions
+ ImageVolumeType string // how to handle the image volume, either bind, tmpfs, or ignore
+ Interactive bool //interactive
+ IpcMode container.IpcMode //ipc
+ IP6Address string //ipv6
+ IPAddress string //ip
+ Labels map[string]string //label
+ LinkLocalIP []string // link-local-ip
+ LogDriver string // log-driver
+ LogDriverOpt []string // log-opt
+ MacAddress string //mac-address
+ Name string //name
+ NetMode container.NetworkMode //net
+ Network string //network
+ NetworkAlias []string //network-alias
+ PidMode container.PidMode //pid
+ Pod string //pod
+ PortBindings nat.PortMap
+ Privileged bool //privileged
+ Publish []string //publish
+ PublishAll bool //publish-all
+ Quiet bool //quiet
+ ReadOnlyRootfs bool //read-only
+ Resources CreateResourceConfig
+ Rm bool //rm
+ ShmDir string
+ StopSignal syscall.Signal // stop-signal
+ StopTimeout uint // stop-timeout
+ Sysctl map[string]string //sysctl
+ Tmpfs []string // tmpfs
+ Tty bool //tty
+ UsernsMode container.UsernsMode //userns
+ User string //user
+ UtsMode container.UTSMode //uts
+ Volumes []string //volume
+ WorkDir string //workdir
+ MountLabel string //SecurityOpts
+ ProcessLabel string //SecurityOpts
+ NoNewPrivs bool //SecurityOpts
+ ApparmorProfile string //SecurityOpts
+ SeccompProfilePath string //SecurityOpts
+ SecurityOpts []string
+}
+
+func u32Ptr(i int64) *uint32 { u := uint32(i); return &u }
+func fmPtr(i int64) *os.FileMode { fm := os.FileMode(i); return &fm }
+
+// CreateBlockIO returns a LinuxBlockIO struct from a CreateConfig
+func (c *CreateConfig) CreateBlockIO() (*spec.LinuxBlockIO, error) {
+ bio := &spec.LinuxBlockIO{}
+ bio.Weight = &c.Resources.BlkioWeight
+ if len(c.Resources.BlkioWeightDevice) > 0 {
+ var lwds []spec.LinuxWeightDevice
+ for _, i := range c.Resources.BlkioWeightDevice {
+ wd, err := validateweightDevice(i)
+ if err != nil {
+ return bio, errors.Wrapf(err, "invalid values for blkio-weight-device")
+ }
+ wdStat, err := getStatFromPath(wd.path)
+ if err != nil {
+ return bio, errors.Wrapf(err, "error getting stat from path %q", wd.path)
+ }
+ lwd := spec.LinuxWeightDevice{
+ Weight: &wd.weight,
+ }
+ lwd.Major = int64(unix.Major(wdStat.Rdev))
+ lwd.Minor = int64(unix.Minor(wdStat.Rdev))
+ lwds = append(lwds, lwd)
+ }
+ bio.WeightDevice = lwds
+ }
+ if len(c.Resources.DeviceReadBps) > 0 {
+ readBps, err := makeThrottleArray(c.Resources.DeviceReadBps, bps)
+ if err != nil {
+ return bio, err
+ }
+ bio.ThrottleReadBpsDevice = readBps
+ }
+ if len(c.Resources.DeviceWriteBps) > 0 {
+ writeBpds, err := makeThrottleArray(c.Resources.DeviceWriteBps, bps)
+ if err != nil {
+ return bio, err
+ }
+ bio.ThrottleWriteBpsDevice = writeBpds
+ }
+ if len(c.Resources.DeviceReadIOps) > 0 {
+ readIOps, err := makeThrottleArray(c.Resources.DeviceReadIOps, iops)
+ if err != nil {
+ return bio, err
+ }
+ bio.ThrottleReadIOPSDevice = readIOps
+ }
+ if len(c.Resources.DeviceWriteIOps) > 0 {
+ writeIOps, err := makeThrottleArray(c.Resources.DeviceWriteIOps, iops)
+ if err != nil {
+ return bio, err
+ }
+ bio.ThrottleWriteIOPSDevice = writeIOps
+ }
+ return bio, nil
+}
+
+func makeThrottleArray(throttleInput []string, rateType int) ([]spec.LinuxThrottleDevice, error) {
+ var (
+ ltds []spec.LinuxThrottleDevice
+ t *throttleDevice
+ err error
+ )
+ for _, i := range throttleInput {
+ if rateType == bps {
+ t, err = validateBpsDevice(i)
+ } else {
+ t, err = validateIOpsDevice(i)
+ }
+ if err != nil {
+ return []spec.LinuxThrottleDevice{}, err
+ }
+ ltdStat, err := getStatFromPath(t.path)
+ if err != nil {
+ return ltds, errors.Wrapf(err, "error getting stat from path %q", t.path)
+ }
+ ltd := spec.LinuxThrottleDevice{
+ Rate: t.rate,
+ }
+ ltd.Major = int64(unix.Major(ltdStat.Rdev))
+ ltd.Minor = int64(unix.Minor(ltdStat.Rdev))
+ ltds = append(ltds, ltd)
+ }
+ return ltds, nil
+}
+
+// GetAnnotations returns the all the annotations for the container
+func (c *CreateConfig) GetAnnotations() map[string]string {
+ a := getDefaultAnnotations()
+ // TODO - Which annotations do we want added by default
+ // TODO - This should be added to the DB long term
+ if c.Tty {
+ a["io.kubernetes.cri-o.TTY"] = "true"
+ }
+ return a
+}
+
+func getDefaultAnnotations() map[string]string {
+ var annotations map[string]string
+ annotations = make(map[string]string)
+ annotations[ann.Annotations] = ""
+ annotations[ann.ContainerID] = ""
+ annotations[ann.ContainerName] = ""
+ annotations[ann.ContainerType] = "sandbox"
+ annotations[ann.Created] = ""
+ annotations[ann.HostName] = ""
+ annotations[ann.IP] = ""
+ annotations[ann.Image] = ""
+ annotations[ann.ImageName] = ""
+ annotations[ann.ImageRef] = ""
+ annotations[ann.KubeName] = ""
+ annotations[ann.Labels] = ""
+ annotations[ann.LogPath] = ""
+ annotations[ann.Metadata] = ""
+ annotations[ann.Name] = ""
+ annotations[ann.PrivilegedRuntime] = ""
+ annotations[ann.ResolvPath] = ""
+ annotations[ann.HostnamePath] = ""
+ annotations[ann.SandboxID] = ""
+ annotations[ann.SandboxName] = ""
+ annotations[ann.ShmPath] = ""
+ annotations[ann.MountPoint] = ""
+ annotations[ann.TrustedSandbox] = ""
+ annotations[ann.TTY] = "false"
+ annotations[ann.Stdin] = ""
+ annotations[ann.StdinOnce] = ""
+ annotations[ann.Volumes] = ""
+
+ return annotations
+}
+
+//GetVolumeMounts takes user provided input for bind mounts and creates Mount structs
+func (c *CreateConfig) GetVolumeMounts(specMounts []spec.Mount) ([]spec.Mount, error) {
+ var m []spec.Mount
+ var options []string
+ for _, i := range c.Volumes {
+ // We need to handle SELinux options better here, specifically :Z
+ spliti := strings.Split(i, ":")
+ if len(spliti) > 2 {
+ options = strings.Split(spliti[2], ",")
+ }
+ if libpod.MountExists(specMounts, spliti[1]) {
+ continue
+ }
+ options = append(options, "rbind")
+ var foundrw, foundro, foundz, foundZ bool
+ var rootProp string
+ for _, opt := range options {
+ switch opt {
+ case "rw":
+ foundrw = true
+ case "ro":
+ foundro = true
+ case "z":
+ foundz = true
+ case "Z":
+ foundZ = true
+ case "private", "rprivate", "slave", "rslave", "shared", "rshared":
+ rootProp = opt
+ }
+ }
+ if !foundrw && !foundro {
+ options = append(options, "rw")
+ }
+ if foundz {
+ if err := label.Relabel(spliti[0], c.MountLabel, true); err != nil {
+ return nil, errors.Wrapf(err, "relabel failed %q", spliti[0])
+ }
+ }
+ if foundZ {
+ if err := label.Relabel(spliti[0], c.MountLabel, false); err != nil {
+ return nil, errors.Wrapf(err, "relabel failed %q", spliti[0])
+ }
+ }
+ if rootProp == "" {
+ options = append(options, "private")
+ }
+
+ m = append(m, spec.Mount{
+ Destination: spliti[1],
+ Type: string(TypeBind),
+ Source: spliti[0],
+ Options: options,
+ })
+ }
+
+ // volumes from image config
+ if c.ImageVolumeType != "tmpfs" {
+ return m, nil
+ }
+ for vol := range c.BuiltinImgVolumes {
+ if libpod.MountExists(specMounts, vol) {
+ continue
+ }
+ mount := spec.Mount{
+ Destination: vol,
+ Type: string(TypeTmpfs),
+ Source: string(TypeTmpfs),
+ Options: []string{"rw", "noexec", "nosuid", "nodev", "tmpcopyup"},
+ }
+ m = append(m, mount)
+ }
+ return m, nil
+}
+
+//GetTmpfsMounts takes user provided input for Tmpfs mounts and creates Mount structs
+func (c *CreateConfig) GetTmpfsMounts() []spec.Mount {
+ var m []spec.Mount
+ for _, i := range c.Tmpfs {
+ // Default options if nothing passed
+ options := []string{"rw", "noexec", "nosuid", "nodev", "size=65536k"}
+ spliti := strings.Split(i, ":")
+ destPath := spliti[0]
+ if len(spliti) > 1 {
+ options = strings.Split(spliti[1], ",")
+ }
+ m = append(m, spec.Mount{
+ Destination: destPath,
+ Type: string(TypeTmpfs),
+ Options: options,
+ Source: string(TypeTmpfs),
+ })
+ }
+ return m
+}
+
+// GetContainerCreateOptions takes a CreateConfig and returns a slice of CtrCreateOptions
+func (c *CreateConfig) GetContainerCreateOptions() ([]libpod.CtrCreateOption, error) {
+ var options []libpod.CtrCreateOption
+ var portBindings []ocicni.PortMapping
+ var err error
+
+ // Uncomment after talking to mheon about unimplemented funcs
+ // options = append(options, libpod.WithLabels(c.labels))
+
+ if c.Interactive {
+ options = append(options, libpod.WithStdin())
+ }
+ if c.Name != "" {
+ logrus.Debugf("appending name %s", c.Name)
+ options = append(options, libpod.WithName(c.Name))
+ }
+
+ if len(c.PortBindings) > 0 {
+ portBindings, err = c.CreatePortBindings()
+ if err != nil {
+ return nil, errors.Wrapf(err, "unable to create port bindings")
+ }
+ }
+
+ if len(c.Volumes) != 0 {
+ // Volumes consist of multiple, comma-delineated fields
+ // The image spec only includes one part of that, so drop the
+ // others, if they are included
+ volumes := make([]string, 0, len(c.Volumes))
+ for _, vol := range c.Volumes {
+ volumes = append(volumes, strings.SplitN(vol, ":", 2)[0])
+ }
+
+ options = append(options, libpod.WithUserVolumes(volumes))
+ }
+
+ if len(c.Command) != 0 {
+ options = append(options, libpod.WithCommand(c.Command))
+ }
+
+ // Add entrypoint unconditionally
+ // If it's empty it's because it was explicitly set to "" or the image
+ // does not have one
+ options = append(options, libpod.WithEntrypoint(c.Entrypoint))
+
+ if c.NetMode.IsContainer() {
+ connectedCtr, err := c.Runtime.LookupContainer(c.NetMode.ConnectedContainer())
+ if err != nil {
+ return nil, errors.Wrapf(err, "container %q not found", c.NetMode.ConnectedContainer())
+ }
+ options = append(options, libpod.WithNetNSFrom(connectedCtr))
+ } else if !c.NetMode.IsHost() && !c.NetMode.IsNone() {
+ postConfigureNetNS := (len(c.IDMappings.UIDMap) > 0 || len(c.IDMappings.GIDMap) > 0) && !c.UsernsMode.IsHost()
+ options = append(options, libpod.WithNetNS([]ocicni.PortMapping{}, postConfigureNetNS))
+ options = append(options, libpod.WithNetNS(portBindings, postConfigureNetNS))
+ }
+
+ if c.PidMode.IsContainer() {
+ connectedCtr, err := c.Runtime.LookupContainer(c.PidMode.Container())
+ if err != nil {
+ return nil, errors.Wrapf(err, "container %q not found", c.PidMode.Container())
+ }
+
+ options = append(options, libpod.WithPIDNSFrom(connectedCtr))
+ }
+ if c.IpcMode.IsContainer() {
+ connectedCtr, err := c.Runtime.LookupContainer(c.IpcMode.Container())
+ if err != nil {
+ return nil, errors.Wrapf(err, "container %q not found", c.IpcMode.Container())
+ }
+
+ options = append(options, libpod.WithIPCNSFrom(connectedCtr))
+ }
+
+ options = append(options, libpod.WithStopSignal(c.StopSignal))
+ options = append(options, libpod.WithStopTimeout(c.StopTimeout))
+ if len(c.DNSSearch) > 0 {
+ options = append(options, libpod.WithDNSSearch(c.DNSSearch))
+ }
+ if len(c.DNSServers) > 0 {
+ options = append(options, libpod.WithDNS(c.DNSServers))
+ }
+ if len(c.DNSOpt) > 0 {
+ options = append(options, libpod.WithDNSOption(c.DNSOpt))
+ }
+ if len(c.HostAdd) > 0 {
+ options = append(options, libpod.WithHosts(c.HostAdd))
+ }
+ logPath := getLoggingPath(c.LogDriverOpt)
+ if logPath != "" {
+ options = append(options, libpod.WithLogPath(logPath))
+ }
+
+ options = append(options, libpod.WithPrivileged(c.Privileged))
+ return options, nil
+}
+
+// CreatePortBindings iterates ports mappings and exposed ports into a format CNI understands
+func (c *CreateConfig) CreatePortBindings() ([]ocicni.PortMapping, error) {
+ var portBindings []ocicni.PortMapping
+ for containerPb, hostPb := range c.PortBindings {
+ var pm ocicni.PortMapping
+ pm.ContainerPort = int32(containerPb.Int())
+ for _, i := range hostPb {
+ var hostPort int
+ var err error
+ pm.HostIP = i.HostIP
+ if i.HostPort == "" {
+ hostPort = containerPb.Int()
+ } else {
+ hostPort, err = strconv.Atoi(i.HostPort)
+ if err != nil {
+ return nil, errors.Wrapf(err, "unable to convert host port to integer")
+ }
+ }
+
+ pm.HostPort = int32(hostPort)
+ // CNI requires us to make both udp and tcp structs
+ pm.Protocol = "udp"
+ portBindings = append(portBindings, pm)
+ pm.Protocol = "tcp"
+ portBindings = append(portBindings, pm)
+ }
+ }
+ return portBindings, nil
+}
+
+// AddPrivilegedDevices iterates through host devices and adds all
+// host devices to the spec
+func (c *CreateConfig) AddPrivilegedDevices(g *generate.Generator) error {
+ hostDevices, err := devices.HostDevices()
+ if err != nil {
+ return err
+ }
+ g.ClearLinuxDevices()
+ for _, d := range hostDevices {
+ g.AddDevice(Device(d))
+ }
+ g.AddLinuxResourcesDevice(true, "", nil, nil, "rwm")
+ return nil
+}
+
+func getStatFromPath(path string) (unix.Stat_t, error) {
+ s := unix.Stat_t{}
+ err := unix.Stat(path, &s)
+ return s, err
+}
diff --git a/pkg/spec/parse.go b/pkg/spec/parse.go
new file mode 100644
index 000000000..920674b10
--- /dev/null
+++ b/pkg/spec/parse.go
@@ -0,0 +1,128 @@
+package createconfig
+
+import (
+ "fmt"
+ "strconv"
+ "strings"
+
+ "github.com/docker/go-units"
+ "github.com/opencontainers/runc/libcontainer/configs"
+ spec "github.com/opencontainers/runtime-spec/specs-go"
+)
+
+// weightDevice is a structure that holds device:weight pair
+type weightDevice struct {
+ path string
+ weight uint16
+}
+
+func (w *weightDevice) String() string {
+ return fmt.Sprintf("%s:%d", w.path, w.weight)
+}
+
+// validateweightDevice validates that the specified string has a valid device-weight format
+// for blkio-weight-device flag
+func validateweightDevice(val string) (*weightDevice, error) {
+ split := strings.SplitN(val, ":", 2)
+ if len(split) != 2 {
+ return nil, fmt.Errorf("bad format: %s", val)
+ }
+ if !strings.HasPrefix(split[0], "/dev/") {
+ return nil, fmt.Errorf("bad format for device path: %s", val)
+ }
+ weight, err := strconv.ParseUint(split[1], 10, 0)
+ if err != nil {
+ return nil, fmt.Errorf("invalid weight for device: %s", val)
+ }
+ if weight > 0 && (weight < 10 || weight > 1000) {
+ return nil, fmt.Errorf("invalid weight for device: %s", val)
+ }
+
+ return &weightDevice{
+ path: split[0],
+ weight: uint16(weight),
+ }, nil
+}
+
+// throttleDevice is a structure that holds device:rate_per_second pair
+type throttleDevice struct {
+ path string
+ rate uint64
+}
+
+func (t *throttleDevice) String() string {
+ return fmt.Sprintf("%s:%d", t.path, t.rate)
+}
+
+// validateBpsDevice validates that the specified string has a valid device-rate format
+// for device-read-bps and device-write-bps flags
+func validateBpsDevice(val string) (*throttleDevice, error) {
+ split := strings.SplitN(val, ":", 2)
+ if len(split) != 2 {
+ return nil, fmt.Errorf("bad format: %s", val)
+ }
+ if !strings.HasPrefix(split[0], "/dev/") {
+ return nil, fmt.Errorf("bad format for device path: %s", val)
+ }
+ rate, err := units.RAMInBytes(split[1])
+ if err != nil {
+ return nil, fmt.Errorf("invalid rate for device: %s. The correct format is <device-path>:<number>[<unit>]. Number must be a positive integer. Unit is optional and can be kb, mb, or gb", val)
+ }
+ if rate < 0 {
+ return nil, fmt.Errorf("invalid rate for device: %s. The correct format is <device-path>:<number>[<unit>]. Number must be a positive integer. Unit is optional and can be kb, mb, or gb", val)
+ }
+
+ return &throttleDevice{
+ path: split[0],
+ rate: uint64(rate),
+ }, nil
+}
+
+// validateIOpsDevice validates that the specified string has a valid device-rate format
+// for device-write-iops and device-read-iops flags
+func validateIOpsDevice(val string) (*throttleDevice, error) { //nolint
+ split := strings.SplitN(val, ":", 2)
+ if len(split) != 2 {
+ return nil, fmt.Errorf("bad format: %s", val)
+ }
+ if !strings.HasPrefix(split[0], "/dev/") {
+ return nil, fmt.Errorf("bad format for device path: %s", val)
+ }
+ rate, err := strconv.ParseUint(split[1], 10, 64)
+ if err != nil {
+ return nil, fmt.Errorf("invalid rate for device: %s. The correct format is <device-path>:<number>. Number must be a positive integer", val)
+ }
+ if rate < 0 {
+ return nil, fmt.Errorf("invalid rate for device: %s. The correct format is <device-path>:<number>. Number must be a positive integer", val)
+ }
+
+ return &throttleDevice{
+ path: split[0],
+ rate: uint64(rate),
+ }, nil
+}
+
+func getLoggingPath(opts []string) string {
+ for _, opt := range opts {
+ arr := strings.SplitN(opt, "=", 2)
+ if len(arr) == 2 {
+ if strings.TrimSpace(arr[0]) == "path" {
+ return strings.TrimSpace(arr[1])
+ }
+ }
+ }
+ return ""
+}
+
+// Device transforms a libcontainer configs.Device to a specs.LinuxDevice object.
+func Device(d *configs.Device) spec.LinuxDevice {
+ return spec.LinuxDevice{
+ Type: string(d.Type),
+ Path: d.Path,
+ Major: d.Major,
+ Minor: d.Minor,
+ FileMode: fmPtr(int64(d.FileMode)),
+ UID: u32Ptr(int64(d.Uid)),
+ GID: u32Ptr(int64(d.Gid)),
+ }
+}
diff --git a/pkg/spec/ports.go b/pkg/spec/ports.go
new file mode 100644
index 000000000..4d9a625bf
--- /dev/null
+++ b/pkg/spec/ports.go
@@ -0,0 +1,107 @@
+package createconfig
+
+import (
+ "fmt"
+ "net"
+ "strconv"
+
+ "github.com/docker/go-connections/nat"
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+)
+
+// ExposedPorts parses user and image ports and returns binding information
+func ExposedPorts(expose, publish []string, publishAll bool, imageExposedPorts map[string]struct{}) (map[nat.Port][]nat.PortBinding, error) {
+ containerPorts := make(map[string]string)
+
+ // add expose ports from the image itself
+ for expose := range imageExposedPorts {
+ _, port := nat.SplitProtoPort(expose)
+ containerPorts[port] = ""
+ }
+
+ // add the expose ports from the user (--expose)
+ // can be single or a range
+ for _, expose := range expose {
+ //support two formats for expose, original format <portnum>/[<proto>] or <startport-endport>/[<proto>]
+ _, port := nat.SplitProtoPort(expose)
+ //parse the start and end port and create a sequence of ports to expose
+ //if expose a port, the start and end port are the same
+ start, end, err := nat.ParsePortRange(port)
+ if err != nil {
+ return nil, fmt.Errorf("invalid range format for --expose: %s, error: %s", expose, err)
+ }
+ for i := start; i <= end; i++ {
+ containerPorts[strconv.Itoa(int(i))] = ""
+ }
+ }
+
+ // parse user input'd port bindings
+ pbPorts, portBindings, err := nat.ParsePortSpecs(publish)
+ if err != nil {
+ return nil, err
+ }
+
+ // delete exposed container ports if being used by -p
+ for i := range pbPorts {
+ delete(containerPorts, i.Port())
+ }
+
+ // iterate container ports and make port bindings from them
+ if publishAll {
+ for e := range containerPorts {
+ //support two formats for expose, original format <portnum>/[<proto>] or <startport-endport>/[<proto>]
+ //proto, port := nat.SplitProtoPort(e)
+ p, err := nat.NewPort("tcp", e)
+ if err != nil {
+ return nil, err
+ }
+ rp, err := getRandomPort()
+ if err != nil {
+ return nil, err
+ }
+ logrus.Debug(fmt.Sprintf("Using random host port %d with container port %d", rp, p.Int()))
+ portBindings[p] = CreatePortBinding(rp, "")
+ }
+ }
+
+ // We need to see if any host ports are not populated and if so, we need to assign a
+ // random port to them.
+ for k, pb := range portBindings {
+ if pb[0].HostPort == "" {
+ hostPort, err := getRandomPort()
+ if err != nil {
+ return nil, err
+ }
+ logrus.Debug(fmt.Sprintf("Using random host port %d with container port %s", hostPort, k.Port()))
+ pb[0].HostPort = strconv.Itoa(hostPort)
+ }
+ }
+ return portBindings, nil
+}
+
+func getRandomPort() (int, error) {
+ l, err := net.Listen("tcp", ":0")
+ if err != nil {
+ return 0, errors.Wrapf(err, "unable to get free port")
+ }
+ defer l.Close()
+ _, randomPort, err := net.SplitHostPort(l.Addr().String())
+ if err != nil {
+ return 0, errors.Wrapf(err, "unable to determine free port")
+ }
+ rp, err := strconv.Atoi(randomPort)
+ if err != nil {
+ return 0, errors.Wrapf(err, "unable to convert random port to int")
+ }
+ return rp, nil
+}
+
+//CreatePortBinding takes port (int) and IP (string) and creates an array of portbinding structs
+func CreatePortBinding(hostPort int, hostIP string) []nat.PortBinding {
+ pb := nat.PortBinding{
+ HostPort: strconv.Itoa(hostPort),
+ }
+ pb.HostIP = hostIP
+ return []nat.PortBinding{pb}
+}
diff --git a/pkg/spec/spec.go b/pkg/spec/spec.go
new file mode 100644
index 000000000..959a24213
--- /dev/null
+++ b/pkg/spec/spec.go
@@ -0,0 +1,422 @@
+package createconfig
+
+import (
+ "strings"
+
+ "github.com/docker/docker/daemon/caps"
+ "github.com/docker/docker/pkg/mount"
+ "github.com/docker/docker/profiles/seccomp"
+ "github.com/docker/go-units"
+ "github.com/opencontainers/runc/libcontainer/devices"
+ spec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/opencontainers/runtime-tools/generate"
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+ "io/ioutil"
+)
+
+const cpuPeriod = 100000
+
+// CreateConfigToOCISpec parses information needed to create a container into an OCI runtime spec
+func CreateConfigToOCISpec(config *CreateConfig) (*spec.Spec, error) { //nolint
+ cgroupPerm := "ro"
+ g := generate.New()
+ g.HostSpecific = true
+ addCgroup := true
+ if config.Privileged {
+ cgroupPerm = "rw"
+ g.RemoveMount("/sys")
+ sysMnt := spec.Mount{
+ Destination: "/sys",
+ Type: "sysfs",
+ Source: "sysfs",
+ Options: []string{"nosuid", "noexec", "nodev", "rw"},
+ }
+ g.AddMount(sysMnt)
+ } else if !config.UsernsMode.IsHost() && config.NetMode.IsHost() {
+ addCgroup = false
+ g.RemoveMount("/sys")
+ sysMnt := spec.Mount{
+ Destination: "/sys",
+ Type: "bind",
+ Source: "/sys",
+ Options: []string{"nosuid", "noexec", "nodev", "ro", "rbind"},
+ }
+ g.AddMount(sysMnt)
+ }
+
+ if addCgroup {
+ cgroupMnt := spec.Mount{
+ Destination: "/sys/fs/cgroup",
+ Type: "cgroup",
+ Source: "cgroup",
+ Options: []string{"nosuid", "noexec", "nodev", "relatime", cgroupPerm},
+ }
+ g.AddMount(cgroupMnt)
+ }
+ g.SetProcessCwd(config.WorkDir)
+ g.SetProcessArgs(config.Command)
+ g.SetProcessTerminal(config.Tty)
+
+ for key, val := range config.GetAnnotations() {
+ g.AddAnnotation(key, val)
+ }
+ g.SetRootReadonly(config.ReadOnlyRootfs)
+ g.SetHostname(config.Hostname)
+ if config.Hostname != "" {
+ g.AddProcessEnv("HOSTNAME", config.Hostname)
+ }
+ for sysctlKey, sysctlVal := range config.Sysctl {
+ g.AddLinuxSysctl(sysctlKey, sysctlVal)
+ }
+ g.AddProcessEnv("container", "podman")
+
+ // RESOURCES - MEMORY
+ if config.Resources.Memory != 0 {
+ g.SetLinuxResourcesMemoryLimit(config.Resources.Memory)
+ }
+ if config.Resources.MemoryReservation != 0 {
+ g.SetLinuxResourcesMemoryReservation(config.Resources.MemoryReservation)
+ }
+ if config.Resources.MemorySwap != 0 {
+ g.SetLinuxResourcesMemorySwap(config.Resources.MemorySwap)
+ }
+ if config.Resources.KernelMemory != 0 {
+ g.SetLinuxResourcesMemoryKernel(config.Resources.KernelMemory)
+ }
+ if config.Resources.MemorySwappiness != -1 {
+ g.SetLinuxResourcesMemorySwappiness(uint64(config.Resources.MemorySwappiness))
+ }
+ g.SetLinuxResourcesMemoryDisableOOMKiller(config.Resources.DisableOomKiller)
+ g.SetProcessOOMScoreAdj(config.Resources.OomScoreAdj)
+
+ // RESOURCES - CPU
+ if config.Resources.CPUShares != 0 {
+ g.SetLinuxResourcesCPUShares(config.Resources.CPUShares)
+ }
+ if config.Resources.CPUQuota != 0 {
+ g.SetLinuxResourcesCPUQuota(config.Resources.CPUQuota)
+ }
+ if config.Resources.CPUPeriod != 0 {
+ g.SetLinuxResourcesCPUPeriod(config.Resources.CPUPeriod)
+ }
+ if config.Resources.CPUs != 0 {
+ g.SetLinuxResourcesCPUPeriod(cpuPeriod)
+ g.SetLinuxResourcesCPUQuota(int64(config.Resources.CPUs * cpuPeriod))
+ }
+ if config.Resources.CPURtRuntime != 0 {
+ g.SetLinuxResourcesCPURealtimeRuntime(config.Resources.CPURtRuntime)
+ }
+ if config.Resources.CPURtPeriod != 0 {
+ g.SetLinuxResourcesCPURealtimePeriod(config.Resources.CPURtPeriod)
+ }
+ if config.Resources.CPUsetCPUs != "" {
+ g.SetLinuxResourcesCPUCpus(config.Resources.CPUsetCPUs)
+ }
+ if config.Resources.CPUsetMems != "" {
+ g.SetLinuxResourcesCPUMems(config.Resources.CPUsetMems)
+ }
+
+ // Devices
+ if config.Privileged {
+ // If privileged, we need to add all the host devices to the
+ // spec. We do not add the user provided ones because we are
+ // already adding them all.
+ if err := config.AddPrivilegedDevices(&g); err != nil {
+ return nil, err
+ }
+ } else {
+ for _, device := range config.Devices {
+ if err := addDevice(&g, device); err != nil {
+ return nil, err
+ }
+ }
+ }
+
+ for _, uidmap := range config.IDMappings.UIDMap {
+ g.AddLinuxUIDMapping(uint32(uidmap.HostID), uint32(uidmap.ContainerID), uint32(uidmap.Size))
+ }
+ for _, gidmap := range config.IDMappings.GIDMap {
+ g.AddLinuxGIDMapping(uint32(gidmap.HostID), uint32(gidmap.ContainerID), uint32(gidmap.Size))
+ }
+ // SECURITY OPTS
+ g.SetProcessNoNewPrivileges(config.NoNewPrivs)
+ g.SetProcessApparmorProfile(config.ApparmorProfile)
+ g.SetProcessSelinuxLabel(config.ProcessLabel)
+ g.SetLinuxMountLabel(config.MountLabel)
+ blockAccessToKernelFilesystems(config, &g)
+
+ // RESOURCES - PIDS
+ if config.Resources.PidsLimit != 0 {
+ g.SetLinuxResourcesPidsLimit(config.Resources.PidsLimit)
+ }
+
+ for _, i := range config.Tmpfs {
+ // Default options if nothing passed
+ options := []string{"rw", "noexec", "nosuid", "nodev", "size=65536k"}
+ spliti := strings.SplitN(i, ":", 2)
+ if len(spliti) > 1 {
+ if _, _, err := mount.ParseTmpfsOptions(spliti[1]); err != nil {
+ return nil, err
+ }
+ options = strings.Split(spliti[1], ",")
+ }
+ tmpfsMnt := spec.Mount{
+ Destination: spliti[0],
+ Type: "tmpfs",
+ Source: "tmpfs",
+ Options: append(options, "tmpcopyup"),
+ }
+ g.AddMount(tmpfsMnt)
+ }
+
+ for name, val := range config.Env {
+ g.AddProcessEnv(name, val)
+ }
+
+ if err := addRlimits(config, &g); err != nil {
+ return nil, err
+ }
+
+ if err := addPidNS(config, &g); err != nil {
+ return nil, err
+ }
+
+ if err := addUserNS(config, &g); err != nil {
+ return nil, err
+ }
+
+ if err := addNetNS(config, &g); err != nil {
+ return nil, err
+ }
+
+ if err := addUTSNS(config, &g); err != nil {
+ return nil, err
+ }
+
+ if err := addIpcNS(config, &g); err != nil {
+ return nil, err
+ }
+ configSpec := g.Spec()
+
+ // HANDLE CAPABILITIES
+ // NOTE: Must happen before SECCOMP
+ if !config.Privileged {
+ if err := setupCapabilities(config, configSpec); err != nil {
+ return nil, err
+ }
+ } else {
+ g.SetupPrivileged(true)
+ }
+
+ // HANDLE SECCOMP
+ if config.SeccompProfilePath != "unconfined" {
+ if config.SeccompProfilePath != "" {
+ seccompProfile, err := ioutil.ReadFile(config.SeccompProfilePath)
+ if err != nil {
+ return nil, errors.Wrapf(err, "opening seccomp profile (%s) failed", config.SeccompProfilePath)
+ }
+ seccompConfig, err := seccomp.LoadProfile(string(seccompProfile), configSpec)
+ if err != nil {
+ return nil, errors.Wrapf(err, "loading seccomp profile (%s) failed", config.SeccompProfilePath)
+ }
+ configSpec.Linux.Seccomp = seccompConfig
+ } else {
+ seccompConfig, err := seccomp.GetDefaultProfile(configSpec)
+ if err != nil {
+ return nil, errors.Wrapf(err, "loading seccomp profile (%s) failed", config.SeccompProfilePath)
+ }
+ configSpec.Linux.Seccomp = seccompConfig
+ }
+ }
+
+ // Clear default Seccomp profile from Generator for privileged containers
+ if config.SeccompProfilePath == "unconfined" || config.Privileged {
+ configSpec.Linux.Seccomp = nil
+ }
+
+ // BIND MOUNTS
+ mounts, err := config.GetVolumeMounts(configSpec.Mounts)
+ if err != nil {
+ return nil, errors.Wrapf(err, "error getting volume mounts")
+ }
+ configSpec.Mounts = append(configSpec.Mounts, mounts...)
+ for _, mount := range configSpec.Mounts {
+ for _, opt := range mount.Options {
+ switch opt {
+ case "private", "rprivate", "slave", "rslave", "shared", "rshared":
+ if err := g.SetLinuxRootPropagation(opt); err != nil {
+ return nil, errors.Wrapf(err, "error setting root propagation for %q", mount.Destination)
+ }
+ }
+ }
+ }
+
+ // BLOCK IO
+ blkio, err := config.CreateBlockIO()
+ if err != nil {
+ return nil, errors.Wrapf(err, "error creating block io")
+ }
+ if blkio != nil {
+ configSpec.Linux.Resources.BlockIO = blkio
+ }
+
+ /*
+ //Annotations
+ Resources: &configSpec.LinuxResources{
+ BlockIO: &blkio,
+ //HugepageLimits:
+ Network: &configSpec.LinuxNetwork{
+ // ClassID *uint32
+ // Priorites []LinuxInterfacePriority
+ },
+ },
+ //CgroupsPath:
+ //Namespaces: []LinuxNamespace
+ // DefaultAction:
+ // Architectures
+ // Syscalls:
+ },
+ // RootfsPropagation
+ // MaskedPaths
+ // ReadonlyPaths:
+ // IntelRdt
+ },
+ }
+ */
+ return configSpec, nil
+}
+
+func blockAccessToKernelFilesystems(config *CreateConfig, g *generate.Generator) {
+ if !config.Privileged {
+ for _, mp := range []string{
+ "/proc/kcore",
+ "/proc/latency_stats",
+ "/proc/timer_list",
+ "/proc/timer_stats",
+ "/proc/sched_debug",
+ "/proc/scsi",
+ "/sys/firmware",
+ } {
+ g.AddLinuxMaskedPaths(mp)
+ }
+
+ for _, rp := range []string{
+ "/proc/asound",
+ "/proc/bus",
+ "/proc/fs",
+ "/proc/irq",
+ "/proc/sys",
+ "/proc/sysrq-trigger",
+ } {
+ g.AddLinuxReadonlyPaths(rp)
+ }
+ }
+}
+
+func addPidNS(config *CreateConfig, g *generate.Generator) error {
+ pidMode := config.PidMode
+ if pidMode.IsHost() {
+ return g.RemoveLinuxNamespace(string(spec.PIDNamespace))
+ }
+ if pidMode.IsContainer() {
+ logrus.Debug("using container pidmode")
+ }
+ return nil
+}
+
+func addUserNS(config *CreateConfig, g *generate.Generator) error {
+ if (len(config.IDMappings.UIDMap) > 0 || len(config.IDMappings.GIDMap) > 0) && !config.UsernsMode.IsHost() {
+ g.AddOrReplaceLinuxNamespace(spec.UserNamespace, "")
+ }
+ return nil
+}
+
+func addNetNS(config *CreateConfig, g *generate.Generator) error {
+ netMode := config.NetMode
+ if netMode.IsHost() {
+ logrus.Debug("Using host netmode")
+ return g.RemoveLinuxNamespace(spec.NetworkNamespace)
+ } else if netMode.IsNone() {
+ logrus.Debug("Using none netmode")
+ return nil
+ } else if netMode.IsBridge() {
+ logrus.Debug("Using bridge netmode")
+ return nil
+ } else if netMode.IsContainer() {
+ logrus.Debug("Using container netmode")
+ } else {
+ return errors.Errorf("unknown network mode")
+ }
+ return nil
+}
+
+func addUTSNS(config *CreateConfig, g *generate.Generator) error {
+ utsMode := config.UtsMode
+ if utsMode.IsHost() {
+ return g.RemoveLinuxNamespace(spec.UTSNamespace)
+ }
+ return nil
+}
+
+func addIpcNS(config *CreateConfig, g *generate.Generator) error {
+ ipcMode := config.IpcMode
+ if ipcMode.IsHost() {
+ return g.RemoveLinuxNamespace(spec.IPCNamespace)
+ }
+ if ipcMode.IsContainer() {
+ logrus.Debug("Using container ipcmode")
+ }
+
+ return nil
+}
+
+func addRlimits(config *CreateConfig, g *generate.Generator) error {
+ var (
+ ul *units.Ulimit
+ err error
+ )
+
+ for _, u := range config.Resources.Ulimit {
+ if ul, err = units.ParseUlimit(u); err != nil {
+ return errors.Wrapf(err, "ulimit option %q requires name=SOFT:HARD, failed to be parsed", u)
+ }
+
+ g.AddProcessRlimits("RLIMIT_"+strings.ToUpper(ul.Name), uint64(ul.Hard), uint64(ul.Soft))
+ }
+ return nil
+}
+
+func setupCapabilities(config *CreateConfig, configSpec *spec.Spec) error {
+ var err error
+ var caplist []string
+ caplist, err = caps.TweakCapabilities(configSpec.Process.Capabilities.Bounding, config.CapAdd, config.CapDrop)
+ if err != nil {
+ return err
+ }
+
+ configSpec.Process.Capabilities.Bounding = caplist
+ configSpec.Process.Capabilities.Permitted = caplist
+ configSpec.Process.Capabilities.Inheritable = caplist
+ configSpec.Process.Capabilities.Effective = caplist
+ return nil
+}
+
+func addDevice(g *generate.Generator, device string) error {
+ dev, err := devices.DeviceFromPath(device, "rwm")
+ if err != nil {
+ return errors.Wrapf(err, "%s is not a valid device", device)
+ }
+ linuxdev := spec.LinuxDevice{
+ Path: dev.Path,
+ Type: string(dev.Type),
+ Major: dev.Major,
+ Minor: dev.Minor,
+ FileMode: &dev.FileMode,
+ UID: &dev.Uid,
+ GID: &dev.Gid,
+ }
+ g.AddDevice(linuxdev)
+ g.AddLinuxResourcesDevice(true, string(dev.Type), &dev.Major, &dev.Minor, dev.Permissions)
+ return nil
+}
diff --git a/pkg/spec/spec_test.go b/pkg/spec/spec_test.go
new file mode 100644
index 000000000..2a040a9a0
--- /dev/null
+++ b/pkg/spec/spec_test.go
@@ -0,0 +1,45 @@
+package createconfig
+
+import (
+ "reflect"
+ "testing"
+
+ spec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/stretchr/testify/assert"
+)
+
+func TestCreateConfig_GetVolumeMounts(t *testing.T) {
+ data := spec.Mount{
+ Destination: "/foobar",
+ Type: "bind",
+ Source: "foobar",
+ Options: []string{"ro", "rbind", "private"},
+ }
+ config := CreateConfig{
+ Volumes: []string{"foobar:/foobar:ro"},
+ }
+ specMount, err := config.GetVolumeMounts([]spec.Mount{})
+ assert.NoError(t, err)
+ assert.True(t, reflect.DeepEqual(data, specMount[0]))
+}
+
+func TestCreateConfig_GetAnnotations(t *testing.T) {
+ config := CreateConfig{}
+ annotations := config.GetAnnotations()
+ assert.True(t, reflect.DeepEqual("sandbox", annotations["io.kubernetes.cri-o.ContainerType"]))
+}
+
+func TestCreateConfig_GetTmpfsMounts(t *testing.T) {
+ data := spec.Mount{
+ Destination: "/homer",
+ Type: "tmpfs",
+ Source: "tmpfs",
+ Options: []string{"rw", "size=787448k", "mode=1777"},
+ }
+ config := CreateConfig{
+ Tmpfs: []string{"/homer:rw,size=787448k,mode=1777"},
+ }
+ tmpfsMount := config.GetTmpfsMounts()
+ assert.True(t, reflect.DeepEqual(data, tmpfsMount[0]))
+
+}