summaryrefslogtreecommitdiff
path: root/vendor/github.com/opencontainers/runc/libcontainer/configs
diff options
context:
space:
mode:
authorMatthew Heon <matthew.heon@gmail.com>2017-11-01 11:24:59 -0400
committerMatthew Heon <matthew.heon@gmail.com>2017-11-01 11:24:59 -0400
commita031b83a09a8628435317a03f199cdc18b78262f (patch)
treebc017a96769ce6de33745b8b0b1304ccf38e9df0 /vendor/github.com/opencontainers/runc/libcontainer/configs
parent2b74391cd5281f6fdf391ff8ad50fd1490f6bf89 (diff)
downloadpodman-a031b83a09a8628435317a03f199cdc18b78262f.tar.gz
podman-a031b83a09a8628435317a03f199cdc18b78262f.tar.bz2
podman-a031b83a09a8628435317a03f199cdc18b78262f.zip
Initial checkin from CRI-O repo
Signed-off-by: Matthew Heon <matthew.heon@gmail.com>
Diffstat (limited to 'vendor/github.com/opencontainers/runc/libcontainer/configs')
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go61
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go122
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go6
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_windows.go6
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/configs/config.go344
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go61
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/configs/device.go57
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/configs/device_defaults.go111
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/configs/hugepage_limit.go9
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/configs/interface_priority_map.go14
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go39
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces.go5
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go122
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go31
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go13
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go8
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/configs/network.go72
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go117
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go195
19 files changed, 1393 insertions, 0 deletions
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go
new file mode 100644
index 000000000..e0f3ca165
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go
@@ -0,0 +1,61 @@
+package configs
+
+import "fmt"
+
+// blockIODevice holds major:minor format supported in blkio cgroup
+type blockIODevice struct {
+ // Major is the device's major number
+ Major int64 `json:"major"`
+ // Minor is the device's minor number
+ Minor int64 `json:"minor"`
+}
+
+// WeightDevice struct holds a `major:minor weight`|`major:minor leaf_weight` pair
+type WeightDevice struct {
+ blockIODevice
+ // Weight is the bandwidth rate for the device, range is from 10 to 1000
+ Weight uint16 `json:"weight"`
+ // LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only
+ LeafWeight uint16 `json:"leafWeight"`
+}
+
+// NewWeightDevice returns a configured WeightDevice pointer
+func NewWeightDevice(major, minor int64, weight, leafWeight uint16) *WeightDevice {
+ wd := &WeightDevice{}
+ wd.Major = major
+ wd.Minor = minor
+ wd.Weight = weight
+ wd.LeafWeight = leafWeight
+ return wd
+}
+
+// WeightString formats the struct to be writable to the cgroup specific file
+func (wd *WeightDevice) WeightString() string {
+ return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.Weight)
+}
+
+// LeafWeightString formats the struct to be writable to the cgroup specific file
+func (wd *WeightDevice) LeafWeightString() string {
+ return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.LeafWeight)
+}
+
+// ThrottleDevice struct holds a `major:minor rate_per_second` pair
+type ThrottleDevice struct {
+ blockIODevice
+ // Rate is the IO rate limit per cgroup per device
+ Rate uint64 `json:"rate"`
+}
+
+// NewThrottleDevice returns a configured ThrottleDevice pointer
+func NewThrottleDevice(major, minor int64, rate uint64) *ThrottleDevice {
+ td := &ThrottleDevice{}
+ td.Major = major
+ td.Minor = minor
+ td.Rate = rate
+ return td
+}
+
+// String formats the struct to be writable to the cgroup specific file
+func (td *ThrottleDevice) String() string {
+ return fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate)
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go
new file mode 100644
index 000000000..e15a662f5
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go
@@ -0,0 +1,122 @@
+package configs
+
+type FreezerState string
+
+const (
+ Undefined FreezerState = ""
+ Frozen FreezerState = "FROZEN"
+ Thawed FreezerState = "THAWED"
+)
+
+type Cgroup struct {
+ // Deprecated, use Path instead
+ Name string `json:"name,omitempty"`
+
+ // name of parent of cgroup or slice
+ // Deprecated, use Path instead
+ Parent string `json:"parent,omitempty"`
+
+ // Path specifies the path to cgroups that are created and/or joined by the container.
+ // The path is assumed to be relative to the host system cgroup mountpoint.
+ Path string `json:"path"`
+
+ // ScopePrefix describes prefix for the scope name
+ ScopePrefix string `json:"scope_prefix"`
+
+ // Paths represent the absolute cgroups paths to join.
+ // This takes precedence over Path.
+ Paths map[string]string
+
+ // Resources contains various cgroups settings to apply
+ *Resources
+}
+
+type Resources struct {
+ // If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list.
+ // Deprecated
+ AllowAllDevices *bool `json:"allow_all_devices,omitempty"`
+ // Deprecated
+ AllowedDevices []*Device `json:"allowed_devices,omitempty"`
+ // Deprecated
+ DeniedDevices []*Device `json:"denied_devices,omitempty"`
+
+ Devices []*Device `json:"devices"`
+
+ // Memory limit (in bytes)
+ Memory int64 `json:"memory"`
+
+ // Memory reservation or soft_limit (in bytes)
+ MemoryReservation int64 `json:"memory_reservation"`
+
+ // Total memory usage (memory + swap); set `-1` to enable unlimited swap
+ MemorySwap int64 `json:"memory_swap"`
+
+ // Kernel memory limit (in bytes)
+ KernelMemory int64 `json:"kernel_memory"`
+
+ // Kernel memory limit for TCP use (in bytes)
+ KernelMemoryTCP int64 `json:"kernel_memory_tcp"`
+
+ // CPU shares (relative weight vs. other containers)
+ CpuShares uint64 `json:"cpu_shares"`
+
+ // CPU hardcap limit (in usecs). Allowed cpu time in a given period.
+ CpuQuota int64 `json:"cpu_quota"`
+
+ // CPU period to be used for hardcapping (in usecs). 0 to use system default.
+ CpuPeriod uint64 `json:"cpu_period"`
+
+ // How many time CPU will use in realtime scheduling (in usecs).
+ CpuRtRuntime int64 `json:"cpu_rt_quota"`
+
+ // CPU period to be used for realtime scheduling (in usecs).
+ CpuRtPeriod uint64 `json:"cpu_rt_period"`
+
+ // CPU to use
+ CpusetCpus string `json:"cpuset_cpus"`
+
+ // MEM to use
+ CpusetMems string `json:"cpuset_mems"`
+
+ // Process limit; set <= `0' to disable limit.
+ PidsLimit int64 `json:"pids_limit"`
+
+ // Specifies per cgroup weight, range is from 10 to 1000.
+ BlkioWeight uint16 `json:"blkio_weight"`
+
+ // Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only
+ BlkioLeafWeight uint16 `json:"blkio_leaf_weight"`
+
+ // Weight per cgroup per device, can override BlkioWeight.
+ BlkioWeightDevice []*WeightDevice `json:"blkio_weight_device"`
+
+ // IO read rate limit per cgroup per device, bytes per second.
+ BlkioThrottleReadBpsDevice []*ThrottleDevice `json:"blkio_throttle_read_bps_device"`
+
+ // IO write rate limit per cgroup per device, bytes per second.
+ BlkioThrottleWriteBpsDevice []*ThrottleDevice `json:"blkio_throttle_write_bps_device"`
+
+ // IO read rate limit per cgroup per device, IO per second.
+ BlkioThrottleReadIOPSDevice []*ThrottleDevice `json:"blkio_throttle_read_iops_device"`
+
+ // IO write rate limit per cgroup per device, IO per second.
+ BlkioThrottleWriteIOPSDevice []*ThrottleDevice `json:"blkio_throttle_write_iops_device"`
+
+ // set the freeze value for the process
+ Freezer FreezerState `json:"freezer"`
+
+ // Hugetlb limit (in bytes)
+ HugetlbLimit []*HugepageLimit `json:"hugetlb_limit"`
+
+ // Whether to disable OOM Killer
+ OomKillDisable bool `json:"oom_kill_disable"`
+
+ // Tuning swappiness behaviour per cgroup
+ MemorySwappiness *uint64 `json:"memory_swappiness"`
+
+ // Set priority of network traffic for container
+ NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"`
+
+ // Set class identifier for container's network packets
+ NetClsClassid uint32 `json:"net_cls_classid_u"`
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go
new file mode 100644
index 000000000..95e2830a4
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go
@@ -0,0 +1,6 @@
+// +build !windows,!linux,!freebsd
+
+package configs
+
+type Cgroup struct {
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_windows.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_windows.go
new file mode 100644
index 000000000..d74847b0d
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_windows.go
@@ -0,0 +1,6 @@
+package configs
+
+// TODO Windows: This can ultimately be entirely factored out on Windows as
+// cgroups are a Unix-specific construct.
+type Cgroup struct {
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
new file mode 100644
index 000000000..269fffff3
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
@@ -0,0 +1,344 @@
+package configs
+
+import (
+ "bytes"
+ "encoding/json"
+ "fmt"
+ "os/exec"
+ "time"
+
+ "github.com/opencontainers/runtime-spec/specs-go"
+
+ "github.com/sirupsen/logrus"
+)
+
+type Rlimit struct {
+ Type int `json:"type"`
+ Hard uint64 `json:"hard"`
+ Soft uint64 `json:"soft"`
+}
+
+// IDMap represents UID/GID Mappings for User Namespaces.
+type IDMap struct {
+ ContainerID int `json:"container_id"`
+ HostID int `json:"host_id"`
+ Size int `json:"size"`
+}
+
+// Seccomp represents syscall restrictions
+// By default, only the native architecture of the kernel is allowed to be used
+// for syscalls. Additional architectures can be added by specifying them in
+// Architectures.
+type Seccomp struct {
+ DefaultAction Action `json:"default_action"`
+ Architectures []string `json:"architectures"`
+ Syscalls []*Syscall `json:"syscalls"`
+}
+
+// Action is taken upon rule match in Seccomp
+type Action int
+
+const (
+ Kill Action = iota + 1
+ Errno
+ Trap
+ Allow
+ Trace
+)
+
+// Operator is a comparison operator to be used when matching syscall arguments in Seccomp
+type Operator int
+
+const (
+ EqualTo Operator = iota + 1
+ NotEqualTo
+ GreaterThan
+ GreaterThanOrEqualTo
+ LessThan
+ LessThanOrEqualTo
+ MaskEqualTo
+)
+
+// Arg is a rule to match a specific syscall argument in Seccomp
+type Arg struct {
+ Index uint `json:"index"`
+ Value uint64 `json:"value"`
+ ValueTwo uint64 `json:"value_two"`
+ Op Operator `json:"op"`
+}
+
+// Syscall is a rule to match a syscall in Seccomp
+type Syscall struct {
+ Name string `json:"name"`
+ Action Action `json:"action"`
+ Args []*Arg `json:"args"`
+}
+
+// TODO Windows. Many of these fields should be factored out into those parts
+// which are common across platforms, and those which are platform specific.
+
+// Config defines configuration options for executing a process inside a contained environment.
+type Config struct {
+ // NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs
+ // This is a common option when the container is running in ramdisk
+ NoPivotRoot bool `json:"no_pivot_root"`
+
+ // ParentDeathSignal specifies the signal that is sent to the container's process in the case
+ // that the parent process dies.
+ ParentDeathSignal int `json:"parent_death_signal"`
+
+ // Path to a directory containing the container's root filesystem.
+ Rootfs string `json:"rootfs"`
+
+ // Readonlyfs will remount the container's rootfs as readonly where only externally mounted
+ // bind mounts are writtable.
+ Readonlyfs bool `json:"readonlyfs"`
+
+ // Specifies the mount propagation flags to be applied to /.
+ RootPropagation int `json:"rootPropagation"`
+
+ // Mounts specify additional source and destination paths that will be mounted inside the container's
+ // rootfs and mount namespace if specified
+ Mounts []*Mount `json:"mounts"`
+
+ // The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well!
+ Devices []*Device `json:"devices"`
+
+ MountLabel string `json:"mount_label"`
+
+ // Hostname optionally sets the container's hostname if provided
+ Hostname string `json:"hostname"`
+
+ // Namespaces specifies the container's namespaces that it should setup when cloning the init process
+ // If a namespace is not provided that namespace is shared from the container's parent process
+ Namespaces Namespaces `json:"namespaces"`
+
+ // Capabilities specify the capabilities to keep when executing the process inside the container
+ // All capabilities not specified will be dropped from the processes capability mask
+ Capabilities *Capabilities `json:"capabilities"`
+
+ // Networks specifies the container's network setup to be created
+ Networks []*Network `json:"networks"`
+
+ // Routes can be specified to create entries in the route table as the container is started
+ Routes []*Route `json:"routes"`
+
+ // Cgroups specifies specific cgroup settings for the various subsystems that the container is
+ // placed into to limit the resources the container has available
+ Cgroups *Cgroup `json:"cgroups"`
+
+ // AppArmorProfile specifies the profile to apply to the process running in the container and is
+ // change at the time the process is execed
+ AppArmorProfile string `json:"apparmor_profile,omitempty"`
+
+ // ProcessLabel specifies the label to apply to the process running in the container. It is
+ // commonly used by selinux
+ ProcessLabel string `json:"process_label,omitempty"`
+
+ // Rlimits specifies the resource limits, such as max open files, to set in the container
+ // If Rlimits are not set, the container will inherit rlimits from the parent process
+ Rlimits []Rlimit `json:"rlimits,omitempty"`
+
+ // OomScoreAdj specifies the adjustment to be made by the kernel when calculating oom scores
+ // for a process. Valid values are between the range [-1000, '1000'], where processes with
+ // higher scores are preferred for being killed.
+ // More information about kernel oom score calculation here: https://lwn.net/Articles/317814/
+ OomScoreAdj int `json:"oom_score_adj"`
+
+ // UidMappings is an array of User ID mappings for User Namespaces
+ UidMappings []IDMap `json:"uid_mappings"`
+
+ // GidMappings is an array of Group ID mappings for User Namespaces
+ GidMappings []IDMap `json:"gid_mappings"`
+
+ // MaskPaths specifies paths within the container's rootfs to mask over with a bind
+ // mount pointing to /dev/null as to prevent reads of the file.
+ MaskPaths []string `json:"mask_paths"`
+
+ // ReadonlyPaths specifies paths within the container's rootfs to remount as read-only
+ // so that these files prevent any writes.
+ ReadonlyPaths []string `json:"readonly_paths"`
+
+ // Sysctl is a map of properties and their values. It is the equivalent of using
+ // sysctl -w my.property.name value in Linux.
+ Sysctl map[string]string `json:"sysctl"`
+
+ // Seccomp allows actions to be taken whenever a syscall is made within the container.
+ // A number of rules are given, each having an action to be taken if a syscall matches it.
+ // A default action to be taken if no rules match is also given.
+ Seccomp *Seccomp `json:"seccomp"`
+
+ // NoNewPrivileges controls whether processes in the container can gain additional privileges.
+ NoNewPrivileges bool `json:"no_new_privileges,omitempty"`
+
+ // Hooks are a collection of actions to perform at various container lifecycle events.
+ // CommandHooks are serialized to JSON, but other hooks are not.
+ Hooks *Hooks
+
+ // Version is the version of opencontainer specification that is supported.
+ Version string `json:"version"`
+
+ // Labels are user defined metadata that is stored in the config and populated on the state
+ Labels []string `json:"labels"`
+
+ // NoNewKeyring will not allocated a new session keyring for the container. It will use the
+ // callers keyring in this case.
+ NoNewKeyring bool `json:"no_new_keyring"`
+
+ // Rootless specifies whether the container is a rootless container.
+ Rootless bool `json:"rootless"`
+}
+
+type Hooks struct {
+ // Prestart commands are executed after the container namespaces are created,
+ // but before the user supplied command is executed from init.
+ Prestart []Hook
+
+ // Poststart commands are executed after the container init process starts.
+ Poststart []Hook
+
+ // Poststop commands are executed after the container init process exits.
+ Poststop []Hook
+}
+
+type Capabilities struct {
+ // Bounding is the set of capabilities checked by the kernel.
+ Bounding []string
+ // Effective is the set of capabilities checked by the kernel.
+ Effective []string
+ // Inheritable is the capabilities preserved across execve.
+ Inheritable []string
+ // Permitted is the limiting superset for effective capabilities.
+ Permitted []string
+ // Ambient is the ambient set of capabilities that are kept.
+ Ambient []string
+}
+
+func (hooks *Hooks) UnmarshalJSON(b []byte) error {
+ var state struct {
+ Prestart []CommandHook
+ Poststart []CommandHook
+ Poststop []CommandHook
+ }
+
+ if err := json.Unmarshal(b, &state); err != nil {
+ return err
+ }
+
+ deserialize := func(shooks []CommandHook) (hooks []Hook) {
+ for _, shook := range shooks {
+ hooks = append(hooks, shook)
+ }
+
+ return hooks
+ }
+
+ hooks.Prestart = deserialize(state.Prestart)
+ hooks.Poststart = deserialize(state.Poststart)
+ hooks.Poststop = deserialize(state.Poststop)
+ return nil
+}
+
+func (hooks Hooks) MarshalJSON() ([]byte, error) {
+ serialize := func(hooks []Hook) (serializableHooks []CommandHook) {
+ for _, hook := range hooks {
+ switch chook := hook.(type) {
+ case CommandHook:
+ serializableHooks = append(serializableHooks, chook)
+ default:
+ logrus.Warnf("cannot serialize hook of type %T, skipping", hook)
+ }
+ }
+
+ return serializableHooks
+ }
+
+ return json.Marshal(map[string]interface{}{
+ "prestart": serialize(hooks.Prestart),
+ "poststart": serialize(hooks.Poststart),
+ "poststop": serialize(hooks.Poststop),
+ })
+}
+
+// HookState is the payload provided to a hook on execution.
+type HookState specs.State
+
+type Hook interface {
+ // Run executes the hook with the provided state.
+ Run(HookState) error
+}
+
+// NewFunctionHook will call the provided function when the hook is run.
+func NewFunctionHook(f func(HookState) error) FuncHook {
+ return FuncHook{
+ run: f,
+ }
+}
+
+type FuncHook struct {
+ run func(HookState) error
+}
+
+func (f FuncHook) Run(s HookState) error {
+ return f.run(s)
+}
+
+type Command struct {
+ Path string `json:"path"`
+ Args []string `json:"args"`
+ Env []string `json:"env"`
+ Dir string `json:"dir"`
+ Timeout *time.Duration `json:"timeout"`
+}
+
+// NewCommandHook will execute the provided command when the hook is run.
+func NewCommandHook(cmd Command) CommandHook {
+ return CommandHook{
+ Command: cmd,
+ }
+}
+
+type CommandHook struct {
+ Command
+}
+
+func (c Command) Run(s HookState) error {
+ b, err := json.Marshal(s)
+ if err != nil {
+ return err
+ }
+ var stdout, stderr bytes.Buffer
+ cmd := exec.Cmd{
+ Path: c.Path,
+ Args: c.Args,
+ Env: c.Env,
+ Stdin: bytes.NewReader(b),
+ Stdout: &stdout,
+ Stderr: &stderr,
+ }
+ if err := cmd.Start(); err != nil {
+ return err
+ }
+ errC := make(chan error, 1)
+ go func() {
+ err := cmd.Wait()
+ if err != nil {
+ err = fmt.Errorf("error running hook: %v, stdout: %s, stderr: %s", err, stdout.String(), stderr.String())
+ }
+ errC <- err
+ }()
+ var timerCh <-chan time.Time
+ if c.Timeout != nil {
+ timer := time.NewTimer(*c.Timeout)
+ defer timer.Stop()
+ timerCh = timer.C
+ }
+ select {
+ case err := <-errC:
+ return err
+ case <-timerCh:
+ cmd.Process.Kill()
+ cmd.Wait()
+ return fmt.Errorf("hook ran past specified timeout of %.1fs", c.Timeout.Seconds())
+ }
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go
new file mode 100644
index 000000000..07da10804
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go
@@ -0,0 +1,61 @@
+package configs
+
+import "fmt"
+
+// HostUID gets the translated uid for the process on host which could be
+// different when user namespaces are enabled.
+func (c Config) HostUID(containerId int) (int, error) {
+ if c.Namespaces.Contains(NEWUSER) {
+ if c.UidMappings == nil {
+ return -1, fmt.Errorf("User namespaces enabled, but no uid mappings found.")
+ }
+ id, found := c.hostIDFromMapping(containerId, c.UidMappings)
+ if !found {
+ return -1, fmt.Errorf("User namespaces enabled, but no user mapping found.")
+ }
+ return id, nil
+ }
+ // Return unchanged id.
+ return containerId, nil
+}
+
+// HostRootUID gets the root uid for the process on host which could be non-zero
+// when user namespaces are enabled.
+func (c Config) HostRootUID() (int, error) {
+ return c.HostUID(0)
+}
+
+// HostGID gets the translated gid for the process on host which could be
+// different when user namespaces are enabled.
+func (c Config) HostGID(containerId int) (int, error) {
+ if c.Namespaces.Contains(NEWUSER) {
+ if c.GidMappings == nil {
+ return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.")
+ }
+ id, found := c.hostIDFromMapping(containerId, c.GidMappings)
+ if !found {
+ return -1, fmt.Errorf("User namespaces enabled, but no group mapping found.")
+ }
+ return id, nil
+ }
+ // Return unchanged id.
+ return containerId, nil
+}
+
+// HostRootGID gets the root gid for the process on host which could be non-zero
+// when user namespaces are enabled.
+func (c Config) HostRootGID() (int, error) {
+ return c.HostGID(0)
+}
+
+// Utility function that gets a host ID for a container ID from user namespace map
+// if that ID is present in the map.
+func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) {
+ for _, m := range uMap {
+ if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) {
+ hostID := m.HostID + (containerID - m.ContainerID)
+ return hostID, true
+ }
+ }
+ return -1, false
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/device.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/device.go
new file mode 100644
index 000000000..8701bb212
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/device.go
@@ -0,0 +1,57 @@
+package configs
+
+import (
+ "fmt"
+ "os"
+)
+
+const (
+ Wildcard = -1
+)
+
+// TODO Windows: This can be factored out in the future
+
+type Device struct {
+ // Device type, block, char, etc.
+ Type rune `json:"type"`
+
+ // Path to the device.
+ Path string `json:"path"`
+
+ // Major is the device's major number.
+ Major int64 `json:"major"`
+
+ // Minor is the device's minor number.
+ Minor int64 `json:"minor"`
+
+ // Cgroup permissions format, rwm.
+ Permissions string `json:"permissions"`
+
+ // FileMode permission bits for the device.
+ FileMode os.FileMode `json:"file_mode"`
+
+ // Uid of the device.
+ Uid uint32 `json:"uid"`
+
+ // Gid of the device.
+ Gid uint32 `json:"gid"`
+
+ // Write the file to the allowed list
+ Allow bool `json:"allow"`
+}
+
+func (d *Device) CgroupString() string {
+ return fmt.Sprintf("%c %s:%s %s", d.Type, deviceNumberString(d.Major), deviceNumberString(d.Minor), d.Permissions)
+}
+
+func (d *Device) Mkdev() int {
+ return int((d.Major << 8) | (d.Minor & 0xff) | ((d.Minor & 0xfff00) << 12))
+}
+
+// deviceNumberString converts the device number to a string return result.
+func deviceNumberString(number int64) string {
+ if number == Wildcard {
+ return "*"
+ }
+ return fmt.Sprint(number)
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/device_defaults.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/device_defaults.go
new file mode 100644
index 000000000..4d348d217
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/device_defaults.go
@@ -0,0 +1,111 @@
+// +build linux freebsd
+
+package configs
+
+var (
+ // DefaultSimpleDevices are devices that are to be both allowed and created.
+ DefaultSimpleDevices = []*Device{
+ // /dev/null and zero
+ {
+ Path: "/dev/null",
+ Type: 'c',
+ Major: 1,
+ Minor: 3,
+ Permissions: "rwm",
+ FileMode: 0666,
+ },
+ {
+ Path: "/dev/zero",
+ Type: 'c',
+ Major: 1,
+ Minor: 5,
+ Permissions: "rwm",
+ FileMode: 0666,
+ },
+
+ {
+ Path: "/dev/full",
+ Type: 'c',
+ Major: 1,
+ Minor: 7,
+ Permissions: "rwm",
+ FileMode: 0666,
+ },
+
+ // consoles and ttys
+ {
+ Path: "/dev/tty",
+ Type: 'c',
+ Major: 5,
+ Minor: 0,
+ Permissions: "rwm",
+ FileMode: 0666,
+ },
+
+ // /dev/urandom,/dev/random
+ {
+ Path: "/dev/urandom",
+ Type: 'c',
+ Major: 1,
+ Minor: 9,
+ Permissions: "rwm",
+ FileMode: 0666,
+ },
+ {
+ Path: "/dev/random",
+ Type: 'c',
+ Major: 1,
+ Minor: 8,
+ Permissions: "rwm",
+ FileMode: 0666,
+ },
+ }
+ DefaultAllowedDevices = append([]*Device{
+ // allow mknod for any device
+ {
+ Type: 'c',
+ Major: Wildcard,
+ Minor: Wildcard,
+ Permissions: "m",
+ },
+ {
+ Type: 'b',
+ Major: Wildcard,
+ Minor: Wildcard,
+ Permissions: "m",
+ },
+
+ {
+ Path: "/dev/console",
+ Type: 'c',
+ Major: 5,
+ Minor: 1,
+ Permissions: "rwm",
+ },
+ // /dev/pts/ - pts namespaces are "coming soon"
+ {
+ Path: "",
+ Type: 'c',
+ Major: 136,
+ Minor: Wildcard,
+ Permissions: "rwm",
+ },
+ {
+ Path: "",
+ Type: 'c',
+ Major: 5,
+ Minor: 2,
+ Permissions: "rwm",
+ },
+
+ // tuntap
+ {
+ Path: "",
+ Type: 'c',
+ Major: 10,
+ Minor: 200,
+ Permissions: "rwm",
+ },
+ }, DefaultSimpleDevices...)
+ DefaultAutoCreatedDevices = append([]*Device{}, DefaultSimpleDevices...)
+)
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/hugepage_limit.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/hugepage_limit.go
new file mode 100644
index 000000000..d30216380
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/hugepage_limit.go
@@ -0,0 +1,9 @@
+package configs
+
+type HugepageLimit struct {
+ // which type of hugepage to limit.
+ Pagesize string `json:"page_size"`
+
+ // usage limit for hugepage.
+ Limit uint64 `json:"limit"`
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/interface_priority_map.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/interface_priority_map.go
new file mode 100644
index 000000000..9a0395eaf
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/interface_priority_map.go
@@ -0,0 +1,14 @@
+package configs
+
+import (
+ "fmt"
+)
+
+type IfPrioMap struct {
+ Interface string `json:"interface"`
+ Priority int64 `json:"priority"`
+}
+
+func (i *IfPrioMap) CgroupString() string {
+ return fmt.Sprintf("%s %d", i.Interface, i.Priority)
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go
new file mode 100644
index 000000000..670757ddb
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go
@@ -0,0 +1,39 @@
+package configs
+
+const (
+ // EXT_COPYUP is a directive to copy up the contents of a directory when
+ // a tmpfs is mounted over it.
+ EXT_COPYUP = 1 << iota
+)
+
+type Mount struct {
+ // Source path for the mount.
+ Source string `json:"source"`
+
+ // Destination path for the mount inside the container.
+ Destination string `json:"destination"`
+
+ // Device the mount is for.
+ Device string `json:"device"`
+
+ // Mount flags.
+ Flags int `json:"flags"`
+
+ // Propagation Flags
+ PropagationFlags []int `json:"propagation_flags"`
+
+ // Mount data applied to the mount.
+ Data string `json:"data"`
+
+ // Relabel source if set, "z" indicates shared, "Z" indicates unshared.
+ Relabel string `json:"relabel"`
+
+ // Extensions are additional flags that are specific to runc.
+ Extensions int `json:"extensions"`
+
+ // Optional Command to be run before Source is mounted.
+ PremountCmds []Command `json:"premount_cmds"`
+
+ // Optional Command to be run after Source is mounted.
+ PostmountCmds []Command `json:"postmount_cmds"`
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces.go
new file mode 100644
index 000000000..a3329a31a
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces.go
@@ -0,0 +1,5 @@
+package configs
+
+type NamespaceType string
+
+type Namespaces []Namespace
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go
new file mode 100644
index 000000000..5fc171a57
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go
@@ -0,0 +1,122 @@
+package configs
+
+import (
+ "fmt"
+ "os"
+ "sync"
+)
+
+const (
+ NEWNET NamespaceType = "NEWNET"
+ NEWPID NamespaceType = "NEWPID"
+ NEWNS NamespaceType = "NEWNS"
+ NEWUTS NamespaceType = "NEWUTS"
+ NEWIPC NamespaceType = "NEWIPC"
+ NEWUSER NamespaceType = "NEWUSER"
+)
+
+var (
+ nsLock sync.Mutex
+ supportedNamespaces = make(map[NamespaceType]bool)
+)
+
+// NsName converts the namespace type to its filename
+func NsName(ns NamespaceType) string {
+ switch ns {
+ case NEWNET:
+ return "net"
+ case NEWNS:
+ return "mnt"
+ case NEWPID:
+ return "pid"
+ case NEWIPC:
+ return "ipc"
+ case NEWUSER:
+ return "user"
+ case NEWUTS:
+ return "uts"
+ }
+ return ""
+}
+
+// IsNamespaceSupported returns whether a namespace is available or
+// not
+func IsNamespaceSupported(ns NamespaceType) bool {
+ nsLock.Lock()
+ defer nsLock.Unlock()
+ supported, ok := supportedNamespaces[ns]
+ if ok {
+ return supported
+ }
+ nsFile := NsName(ns)
+ // if the namespace type is unknown, just return false
+ if nsFile == "" {
+ return false
+ }
+ _, err := os.Stat(fmt.Sprintf("/proc/self/ns/%s", nsFile))
+ // a namespace is supported if it exists and we have permissions to read it
+ supported = err == nil
+ supportedNamespaces[ns] = supported
+ return supported
+}
+
+func NamespaceTypes() []NamespaceType {
+ return []NamespaceType{
+ NEWUSER, // Keep user NS always first, don't move it.
+ NEWIPC,
+ NEWUTS,
+ NEWNET,
+ NEWPID,
+ NEWNS,
+ }
+}
+
+// Namespace defines configuration for each namespace. It specifies an
+// alternate path that is able to be joined via setns.
+type Namespace struct {
+ Type NamespaceType `json:"type"`
+ Path string `json:"path"`
+}
+
+func (n *Namespace) GetPath(pid int) string {
+ return fmt.Sprintf("/proc/%d/ns/%s", pid, NsName(n.Type))
+}
+
+func (n *Namespaces) Remove(t NamespaceType) bool {
+ i := n.index(t)
+ if i == -1 {
+ return false
+ }
+ *n = append((*n)[:i], (*n)[i+1:]...)
+ return true
+}
+
+func (n *Namespaces) Add(t NamespaceType, path string) {
+ i := n.index(t)
+ if i == -1 {
+ *n = append(*n, Namespace{Type: t, Path: path})
+ return
+ }
+ (*n)[i].Path = path
+}
+
+func (n *Namespaces) index(t NamespaceType) int {
+ for i, ns := range *n {
+ if ns.Type == t {
+ return i
+ }
+ }
+ return -1
+}
+
+func (n *Namespaces) Contains(t NamespaceType) bool {
+ return n.index(t) != -1
+}
+
+func (n *Namespaces) PathOf(t NamespaceType) string {
+ i := n.index(t)
+ if i == -1 {
+ return ""
+ }
+ return (*n)[i].Path
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go
new file mode 100644
index 000000000..4ce6813d2
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go
@@ -0,0 +1,31 @@
+// +build linux
+
+package configs
+
+import "golang.org/x/sys/unix"
+
+func (n *Namespace) Syscall() int {
+ return namespaceInfo[n.Type]
+}
+
+var namespaceInfo = map[NamespaceType]int{
+ NEWNET: unix.CLONE_NEWNET,
+ NEWNS: unix.CLONE_NEWNS,
+ NEWUSER: unix.CLONE_NEWUSER,
+ NEWIPC: unix.CLONE_NEWIPC,
+ NEWUTS: unix.CLONE_NEWUTS,
+ NEWPID: unix.CLONE_NEWPID,
+}
+
+// CloneFlags parses the container's Namespaces options to set the correct
+// flags on clone, unshare. This function returns flags only for new namespaces.
+func (n *Namespaces) CloneFlags() uintptr {
+ var flag int
+ for _, v := range *n {
+ if v.Path != "" {
+ continue
+ }
+ flag |= namespaceInfo[v.Type]
+ }
+ return uintptr(flag)
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go
new file mode 100644
index 000000000..5d9a5c81f
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go
@@ -0,0 +1,13 @@
+// +build !linux,!windows
+
+package configs
+
+func (n *Namespace) Syscall() int {
+ panic("No namespace syscall support")
+}
+
+// CloneFlags parses the container's Namespaces options to set the correct
+// flags on clone, unshare. This function returns flags only for new namespaces.
+func (n *Namespaces) CloneFlags() uintptr {
+ panic("No namespace syscall support")
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go
new file mode 100644
index 000000000..19bf713de
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go
@@ -0,0 +1,8 @@
+// +build !linux
+
+package configs
+
+// Namespace defines configuration for each namespace. It specifies an
+// alternate path that is able to be joined via setns.
+type Namespace struct {
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/network.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/network.go
new file mode 100644
index 000000000..ccdb228e1
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/network.go
@@ -0,0 +1,72 @@
+package configs
+
+// Network defines configuration for a container's networking stack
+//
+// The network configuration can be omitted from a container causing the
+// container to be setup with the host's networking stack
+type Network struct {
+ // Type sets the networks type, commonly veth and loopback
+ Type string `json:"type"`
+
+ // Name of the network interface
+ Name string `json:"name"`
+
+ // The bridge to use.
+ Bridge string `json:"bridge"`
+
+ // MacAddress contains the MAC address to set on the network interface
+ MacAddress string `json:"mac_address"`
+
+ // Address contains the IPv4 and mask to set on the network interface
+ Address string `json:"address"`
+
+ // Gateway sets the gateway address that is used as the default for the interface
+ Gateway string `json:"gateway"`
+
+ // IPv6Address contains the IPv6 and mask to set on the network interface
+ IPv6Address string `json:"ipv6_address"`
+
+ // IPv6Gateway sets the ipv6 gateway address that is used as the default for the interface
+ IPv6Gateway string `json:"ipv6_gateway"`
+
+ // Mtu sets the mtu value for the interface and will be mirrored on both the host and
+ // container's interfaces if a pair is created, specifically in the case of type veth
+ // Note: This does not apply to loopback interfaces.
+ Mtu int `json:"mtu"`
+
+ // TxQueueLen sets the tx_queuelen value for the interface and will be mirrored on both the host and
+ // container's interfaces if a pair is created, specifically in the case of type veth
+ // Note: This does not apply to loopback interfaces.
+ TxQueueLen int `json:"txqueuelen"`
+
+ // HostInterfaceName is a unique name of a veth pair that resides on in the host interface of the
+ // container.
+ HostInterfaceName string `json:"host_interface_name"`
+
+ // HairpinMode specifies if hairpin NAT should be enabled on the virtual interface
+ // bridge port in the case of type veth
+ // Note: This is unsupported on some systems.
+ // Note: This does not apply to loopback interfaces.
+ HairpinMode bool `json:"hairpin_mode"`
+}
+
+// Routes can be specified to create entries in the route table as the container is started
+//
+// All of destination, source, and gateway should be either IPv4 or IPv6.
+// One of the three options must be present, and omitted entries will use their
+// IP family default for the route table. For IPv4 for example, setting the
+// gateway to 1.2.3.4 and the interface to eth0 will set up a standard
+// destination of 0.0.0.0(or *) when viewed in the route table.
+type Route struct {
+ // Sets the destination and mask, should be a CIDR. Accepts IPv4 and IPv6
+ Destination string `json:"destination"`
+
+ // Sets the source and mask, should be a CIDR. Accepts IPv4 and IPv6
+ Source string `json:"source"`
+
+ // Sets the gateway. Accepts IPv4 and IPv6
+ Gateway string `json:"gateway"`
+
+ // The device to set this route up for, for example: eth0
+ InterfaceName string `json:"interface_name"`
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go
new file mode 100644
index 000000000..0cebfaf80
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go
@@ -0,0 +1,117 @@
+package validate
+
+import (
+ "fmt"
+ "os"
+ "reflect"
+ "strings"
+
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+var (
+ geteuid = os.Geteuid
+ getegid = os.Getegid
+)
+
+func (v *ConfigValidator) rootless(config *configs.Config) error {
+ if err := rootlessMappings(config); err != nil {
+ return err
+ }
+ if err := rootlessMount(config); err != nil {
+ return err
+ }
+ // Currently, cgroups cannot effectively be used in rootless containers.
+ // The new cgroup namespace doesn't really help us either because it doesn't
+ // have nice interactions with the user namespace (we're working with upstream
+ // to fix this).
+ if err := rootlessCgroup(config); err != nil {
+ return err
+ }
+
+ // XXX: We currently can't verify the user config at all, because
+ // configs.Config doesn't store the user-related configs. So this
+ // has to be verified by setupUser() in init_linux.go.
+
+ return nil
+}
+
+func rootlessMappings(config *configs.Config) error {
+ rootuid, err := config.HostRootUID()
+ if err != nil {
+ return fmt.Errorf("failed to get root uid from uidMappings: %v", err)
+ }
+ if euid := geteuid(); euid != 0 {
+ if !config.Namespaces.Contains(configs.NEWUSER) {
+ return fmt.Errorf("rootless containers require user namespaces")
+ }
+ if rootuid != euid {
+ return fmt.Errorf("rootless containers cannot map container root to a different host user")
+ }
+ }
+
+ rootgid, err := config.HostRootGID()
+ if err != nil {
+ return fmt.Errorf("failed to get root gid from gidMappings: %v", err)
+ }
+
+ // Similar to the above test, we need to make sure that we aren't trying to
+ // map to a group ID that we don't have the right to be.
+ if rootgid != getegid() {
+ return fmt.Errorf("rootless containers cannot map container root to a different host group")
+ }
+
+ // We can only map one user and group inside a container (our own).
+ if len(config.UidMappings) != 1 || config.UidMappings[0].Size != 1 {
+ return fmt.Errorf("rootless containers cannot map more than one user")
+ }
+ if len(config.GidMappings) != 1 || config.GidMappings[0].Size != 1 {
+ return fmt.Errorf("rootless containers cannot map more than one group")
+ }
+
+ return nil
+}
+
+// cgroup verifies that the user isn't trying to set any cgroup limits or paths.
+func rootlessCgroup(config *configs.Config) error {
+ // Nothing set at all.
+ if config.Cgroups == nil || config.Cgroups.Resources == nil {
+ return nil
+ }
+
+ // Used for comparing to the zero value.
+ left := reflect.ValueOf(*config.Cgroups.Resources)
+ right := reflect.Zero(left.Type())
+
+ // This is all we need to do, since specconv won't add cgroup options in
+ // rootless mode.
+ if !reflect.DeepEqual(left.Interface(), right.Interface()) {
+ return fmt.Errorf("cannot specify resource limits in rootless container")
+ }
+
+ return nil
+}
+
+// mount verifies that the user isn't trying to set up any mounts they don't have
+// the rights to do. In addition, it makes sure that no mount has a `uid=` or
+// `gid=` option that doesn't resolve to root.
+func rootlessMount(config *configs.Config) error {
+ // XXX: We could whitelist allowed devices at this point, but I'm not
+ // convinced that's a good idea. The kernel is the best arbiter of
+ // access control.
+
+ for _, mount := range config.Mounts {
+ // Check that the options list doesn't contain any uid= or gid= entries
+ // that don't resolve to root.
+ for _, opt := range strings.Split(mount.Data, ",") {
+ if strings.HasPrefix(opt, "uid=") && opt != "uid=0" {
+ return fmt.Errorf("cannot specify uid= mount options in rootless containers where argument isn't 0")
+ }
+ if strings.HasPrefix(opt, "gid=") && opt != "gid=0" {
+ return fmt.Errorf("cannot specify gid= mount options in rootless containers where argument isn't 0")
+ }
+ }
+ }
+
+ return nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go
new file mode 100644
index 000000000..828434544
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go
@@ -0,0 +1,195 @@
+package validate
+
+import (
+ "fmt"
+ "os"
+ "path/filepath"
+ "strings"
+
+ "github.com/opencontainers/runc/libcontainer/configs"
+ selinux "github.com/opencontainers/selinux/go-selinux"
+)
+
+type Validator interface {
+ Validate(*configs.Config) error
+}
+
+func New() Validator {
+ return &ConfigValidator{}
+}
+
+type ConfigValidator struct {
+}
+
+func (v *ConfigValidator) Validate(config *configs.Config) error {
+ if err := v.rootfs(config); err != nil {
+ return err
+ }
+ if err := v.network(config); err != nil {
+ return err
+ }
+ if err := v.hostname(config); err != nil {
+ return err
+ }
+ if err := v.security(config); err != nil {
+ return err
+ }
+ if err := v.usernamespace(config); err != nil {
+ return err
+ }
+ if err := v.sysctl(config); err != nil {
+ return err
+ }
+ if config.Rootless {
+ if err := v.rootless(config); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// rootfs validates if the rootfs is an absolute path and is not a symlink
+// to the container's root filesystem.
+func (v *ConfigValidator) rootfs(config *configs.Config) error {
+ if _, err := os.Stat(config.Rootfs); err != nil {
+ if os.IsNotExist(err) {
+ return fmt.Errorf("rootfs (%s) does not exist", config.Rootfs)
+ }
+ return err
+ }
+ cleaned, err := filepath.Abs(config.Rootfs)
+ if err != nil {
+ return err
+ }
+ if cleaned, err = filepath.EvalSymlinks(cleaned); err != nil {
+ return err
+ }
+ if filepath.Clean(config.Rootfs) != cleaned {
+ return fmt.Errorf("%s is not an absolute path or is a symlink", config.Rootfs)
+ }
+ return nil
+}
+
+func (v *ConfigValidator) network(config *configs.Config) error {
+ if !config.Namespaces.Contains(configs.NEWNET) {
+ if len(config.Networks) > 0 || len(config.Routes) > 0 {
+ return fmt.Errorf("unable to apply network settings without a private NET namespace")
+ }
+ }
+ return nil
+}
+
+func (v *ConfigValidator) hostname(config *configs.Config) error {
+ if config.Hostname != "" && !config.Namespaces.Contains(configs.NEWUTS) {
+ return fmt.Errorf("unable to set hostname without a private UTS namespace")
+ }
+ return nil
+}
+
+func (v *ConfigValidator) security(config *configs.Config) error {
+ // restrict sys without mount namespace
+ if (len(config.MaskPaths) > 0 || len(config.ReadonlyPaths) > 0) &&
+ !config.Namespaces.Contains(configs.NEWNS) {
+ return fmt.Errorf("unable to restrict sys entries without a private MNT namespace")
+ }
+ if config.ProcessLabel != "" && !selinux.GetEnabled() {
+ return fmt.Errorf("selinux label is specified in config, but selinux is disabled or not supported")
+ }
+
+ return nil
+}
+
+func (v *ConfigValidator) usernamespace(config *configs.Config) error {
+ if config.Namespaces.Contains(configs.NEWUSER) {
+ if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) {
+ return fmt.Errorf("USER namespaces aren't enabled in the kernel")
+ }
+ } else {
+ if config.UidMappings != nil || config.GidMappings != nil {
+ return fmt.Errorf("User namespace mappings specified, but USER namespace isn't enabled in the config")
+ }
+ }
+ return nil
+}
+
+// sysctl validates that the specified sysctl keys are valid or not.
+// /proc/sys isn't completely namespaced and depending on which namespaces
+// are specified, a subset of sysctls are permitted.
+func (v *ConfigValidator) sysctl(config *configs.Config) error {
+ validSysctlMap := map[string]bool{
+ "kernel.msgmax": true,
+ "kernel.msgmnb": true,
+ "kernel.msgmni": true,
+ "kernel.sem": true,
+ "kernel.shmall": true,
+ "kernel.shmmax": true,
+ "kernel.shmmni": true,
+ "kernel.shm_rmid_forced": true,
+ }
+
+ for s := range config.Sysctl {
+ if validSysctlMap[s] || strings.HasPrefix(s, "fs.mqueue.") {
+ if config.Namespaces.Contains(configs.NEWIPC) {
+ continue
+ } else {
+ return fmt.Errorf("sysctl %q is not allowed in the hosts ipc namespace", s)
+ }
+ }
+ if strings.HasPrefix(s, "net.") {
+ if config.Namespaces.Contains(configs.NEWNET) {
+ if path := config.Namespaces.PathOf(configs.NEWNET); path != "" {
+ if err := checkHostNs(s, path); err != nil {
+ return err
+ }
+ }
+ continue
+ } else {
+ return fmt.Errorf("sysctl %q is not allowed in the hosts network namespace", s)
+ }
+ }
+ return fmt.Errorf("sysctl %q is not in a separate kernel namespace", s)
+ }
+
+ return nil
+}
+
+func isSymbolicLink(path string) (bool, error) {
+ fi, err := os.Lstat(path)
+ if err != nil {
+ return false, err
+ }
+
+ return fi.Mode()&os.ModeSymlink == os.ModeSymlink, nil
+}
+
+// checkHostNs checks whether network sysctl is used in host namespace.
+func checkHostNs(sysctlConfig string, path string) error {
+ var currentProcessNetns = "/proc/self/ns/net"
+ // readlink on the current processes network namespace
+ destOfCurrentProcess, err := os.Readlink(currentProcessNetns)
+ if err != nil {
+ return fmt.Errorf("read soft link %q error", currentProcessNetns)
+ }
+
+ // First check if the provided path is a symbolic link
+ symLink, err := isSymbolicLink(path)
+ if err != nil {
+ return fmt.Errorf("could not check that %q is a symlink: %v", path, err)
+ }
+
+ if symLink == false {
+ // The provided namespace is not a symbolic link,
+ // it is not the host namespace.
+ return nil
+ }
+
+ // readlink on the path provided in the struct
+ destOfContainer, err := os.Readlink(path)
+ if err != nil {
+ return fmt.Errorf("read soft link %q error", path)
+ }
+ if destOfContainer == destOfCurrentProcess {
+ return fmt.Errorf("sysctl %q is not allowed in the hosts network namespace", sysctlConfig)
+ }
+ return nil
+}