diff options
-rw-r--r-- | Makefile | 4 | ||||
-rw-r--r-- | cmd/podman/common.go | 4 | ||||
-rw-r--r-- | cmd/podman/inspect.go | 42 | ||||
-rw-r--r-- | cmd/podman/shared/container_inspect.go | 211 | ||||
-rw-r--r-- | cmd/podman/shared/create.go | 27 | ||||
-rw-r--r-- | cmd/podman/shared/intermediate.go | 1 | ||||
-rw-r--r-- | docs/podman-create.1.md | 8 | ||||
-rw-r--r-- | docs/podman-run.1.md | 8 | ||||
-rw-r--r-- | go.mod | 2 | ||||
-rw-r--r-- | libpod/container_api.go | 9 | ||||
-rw-r--r-- | libpod/container_inspect.go | 1041 | ||||
-rw-r--r-- | libpod/oci_linux.go | 18 | ||||
-rw-r--r-- | pkg/adapter/pods.go | 2 | ||||
-rw-r--r-- | pkg/apparmor/apparmor_linux.go | 13 | ||||
-rw-r--r-- | pkg/apparmor/apparmor_linux_test.go | 17 | ||||
-rw-r--r-- | pkg/apparmor/apparmor_unsupported.go | 5 | ||||
-rw-r--r-- | pkg/namespaces/namespaces.go | 57 | ||||
-rw-r--r-- | pkg/spec/createconfig.go | 19 | ||||
-rw-r--r-- | pkg/spec/spec.go | 77 | ||||
-rw-r--r-- | pkg/util/utils_linux.go | 43 | ||||
-rw-r--r-- | pkg/util/utils_unsupported.go | 12 | ||||
-rw-r--r-- | pkg/varlinkapi/containers.go | 24 | ||||
-rw-r--r-- | test/e2e/common_test.go | 7 | ||||
-rw-r--r-- | test/e2e/play_kube_test.go | 123 | ||||
-rw-r--r-- | test/e2e/run_ns_test.go | 9 | ||||
-rw-r--r-- | vendor/modules.txt | 2 |
26 files changed, 1441 insertions, 344 deletions
@@ -127,8 +127,8 @@ help: .gopathok: ifeq ("$(wildcard $(GOPKGDIR))","") mkdir -p "$(GOPKGBASEDIR)" - ln -sf "$(CURDIR)" "$(GOPKGBASEDIR)" - ln -sf "$(CURDIR)/vendor/github.com/varlink" "$(FIRST_GOPATH)/src/github.com/varlink" + ln -sfnT "$(CURDIR)" "$(GOPKGDIR)" + ln -sfnT "$(CURDIR)/vendor/github.com/varlink" "$(FIRST_GOPATH)/src/github.com/varlink" endif touch $@ diff --git a/cmd/podman/common.go b/cmd/podman/common.go index 15f753d55..1e9092bd6 100644 --- a/cmd/podman/common.go +++ b/cmd/podman/common.go @@ -130,6 +130,10 @@ func getCreateFlags(c *cliconfig.PodmanCommand) { "Drop capabilities from the container", ) createFlags.String( + "cgroupns", "host", + "cgroup namespace to use", + ) + createFlags.String( "cgroup-parent", "", "Optional parent cgroup for the container", ) diff --git a/cmd/podman/inspect.go b/cmd/podman/inspect.go index df597c868..12d89764c 100644 --- a/cmd/podman/inspect.go +++ b/cmd/podman/inspect.go @@ -6,9 +6,7 @@ import ( "github.com/containers/buildah/pkg/formats" "github.com/containers/libpod/cmd/podman/cliconfig" - "github.com/containers/libpod/cmd/podman/shared" "github.com/containers/libpod/pkg/adapter" - cc "github.com/containers/libpod/pkg/spec" "github.com/containers/libpod/pkg/util" "github.com/pkg/errors" "github.com/spf13/cobra" @@ -148,19 +146,9 @@ func iterateInput(ctx context.Context, size bool, args []string, runtime *adapte inspectError = errors.Wrapf(err, "error looking up container %q", input) break } - libpodInspectData, err := ctr.Inspect(size) + data, err = ctr.Inspect(size) if err != nil { - inspectError = errors.Wrapf(err, "error getting libpod container inspect data %s", ctr.ID()) - break - } - artifact, err := getArtifact(ctr) - if inspectError != nil { - inspectError = err - break - } - data, err = shared.GetCtrInspectInfo(ctr.Config(), libpodInspectData, artifact) - if err != nil { - inspectError = errors.Wrapf(err, "error parsing container data %q", ctr.ID()) + inspectError = errors.Wrapf(err, "error inspecting container %s", ctr.ID()) break } case inspectTypeImage: @@ -188,19 +176,9 @@ func iterateInput(ctx context.Context, size bool, args []string, runtime *adapte break } } else { - libpodInspectData, err := ctr.Inspect(size) - if err != nil { - inspectError = errors.Wrapf(err, "error getting libpod container inspect data %s", ctr.ID()) - break - } - artifact, err := getArtifact(ctr) + data, err = ctr.Inspect(size) if err != nil { - inspectError = err - break - } - data, err = shared.GetCtrInspectInfo(ctr.Config(), libpodInspectData, artifact) - if err != nil { - inspectError = errors.Wrapf(err, "error parsing container data %s", ctr.ID()) + inspectError = errors.Wrapf(err, "error inspecting container %s", ctr.ID()) break } } @@ -211,15 +189,3 @@ func iterateInput(ctx context.Context, size bool, args []string, runtime *adapte } return inspectedItems, inspectError } - -func getArtifact(ctr *adapter.Container) (*cc.CreateConfig, error) { - var createArtifact cc.CreateConfig - artifact, err := ctr.GetArtifact("create-config") - if err != nil { - return nil, err - } - if err := json.Unmarshal(artifact, &createArtifact); err != nil { - return nil, err - } - return &createArtifact, nil -} diff --git a/cmd/podman/shared/container_inspect.go b/cmd/podman/shared/container_inspect.go deleted file mode 100644 index a8094466e..000000000 --- a/cmd/podman/shared/container_inspect.go +++ /dev/null @@ -1,211 +0,0 @@ -package shared - -import ( - "github.com/containers/libpod/libpod" - cc "github.com/containers/libpod/pkg/spec" - "github.com/docker/go-connections/nat" - "github.com/opencontainers/runtime-spec/specs-go" -) - -// InspectContainer holds all inspect data for a container. -// The format of individual components is fixed so the overall structure, when -// JSON encoded, matches the output of `docker inspect`. -// It combines Libpod-source inspect data with Podman-specific inspect data. -type InspectContainer struct { - *libpod.InspectContainerData - HostConfig *InspectContainerHostConfig `json:"HostConfig"` -} - -// InspectContainerHostConfig holds Container configuration that is not specific -// to Libpod. This information is (mostly) stored by Podman as an artifact. -// This struct is matched to the output of `docker inspect`. -type InspectContainerHostConfig struct { - ContainerIDFile string `json:"ContainerIDFile"` - LogConfig *InspectLogConfig `json:"LogConfig"` //TODO - NetworkMode string `json:"NetworkMode"` - PortBindings nat.PortMap `json:"PortBindings"` //TODO - AutoRemove bool `json:"AutoRemove"` - CapAdd []string `json:"CapAdd"` - CapDrop []string `json:"CapDrop"` - DNS []string `json:"DNS"` - DNSOptions []string `json:"DNSOptions"` - DNSSearch []string `json:"DNSSearch"` - ExtraHosts []string `json:"ExtraHosts"` - GroupAdd []uint32 `json:"GroupAdd"` - IpcMode string `json:"IpcMode"` - Cgroup string `json:"Cgroup"` - OomScoreAdj *int `json:"OomScoreAdj"` - PidMode string `json:"PidMode"` - Privileged bool `json:"Privileged"` - PublishAllPorts bool `json:"PublishAllPorts"` //TODO - ReadOnlyRootfs bool `json:"ReadonlyRootfs"` - ReadOnlyTmpfs bool `json:"ReadonlyTmpfs"` - SecurityOpt []string `json:"SecurityOpt"` - UTSMode string `json:"UTSMode"` - UsernsMode string `json:"UsernsMode"` - ShmSize int64 `json:"ShmSize"` - Runtime string `json:"Runtime"` - ConsoleSize *specs.Box `json:"ConsoleSize"` - CPUShares *uint64 `json:"CpuShares"` - Memory int64 `json:"Memory"` - NanoCPUs int `json:"NanoCpus"` - CgroupParent string `json:"CgroupParent"` - BlkioWeight *uint16 `json:"BlkioWeight"` - BlkioWeightDevice []specs.LinuxWeightDevice `json:"BlkioWeightDevice"` - BlkioDeviceReadBps []specs.LinuxThrottleDevice `json:"BlkioDeviceReadBps"` - BlkioDeviceWriteBps []specs.LinuxThrottleDevice `json:"BlkioDeviceWriteBps"` - BlkioDeviceReadIOps []specs.LinuxThrottleDevice `json:"BlkioDeviceReadIOps"` - BlkioDeviceWriteIOps []specs.LinuxThrottleDevice `json:"BlkioDeviceWriteIOps"` - CPUPeriod *uint64 `json:"CpuPeriod"` - CPUQuota *int64 `json:"CpuQuota"` - CPURealtimePeriod *uint64 `json:"CpuRealtimePeriod"` - CPURealtimeRuntime *int64 `json:"CpuRealtimeRuntime"` - CPUSetCPUs string `json:"CpuSetCpus"` - CPUSetMems string `json:"CpuSetMems"` - Devices []specs.LinuxDevice `json:"Devices"` - DiskQuota int `json:"DiskQuota"` //check type, TODO - KernelMemory *int64 `json:"KernelMemory"` - MemoryReservation *int64 `json:"MemoryReservation"` - MemorySwap *int64 `json:"MemorySwap"` - MemorySwappiness *uint64 `json:"MemorySwappiness"` - OomKillDisable *bool `json:"OomKillDisable"` - PidsLimit *int64 `json:"PidsLimit"` - Ulimits []string `json:"Ulimits"` - CPUCount int `json:"CpuCount"` - CPUPercent int `json:"CpuPercent"` - IOMaximumIOps int `json:"IOMaximumIOps"` //check type, TODO - IOMaximumBandwidth int `json:"IOMaximumBandwidth"` //check type, TODO - Tmpfs []string `json:"Tmpfs"` -} - -// InspectLogConfig holds information about a container's configured log driver -// and is presently unused. It is retained for Docker compatibility. -type InspectLogConfig struct { - Type string `json:"Type"` - Config map[string]string `json:"Config"` //idk type, TODO -} - -// GetCtrInspectInfo inspects a container, combining Libpod inspect information -// with other information not stored in Libpod and returning a struct that, when -// formatted for JSON output, is compatible with `docker inspect`. -func GetCtrInspectInfo(config *libpod.ContainerConfig, ctrInspectData *libpod.InspectContainerData, createArtifact *cc.CreateConfig) (*InspectContainer, error) { - spec := config.Spec - - cpus, mems, period, quota, realtimePeriod, realtimeRuntime, shares := getCPUInfo(spec) - blkioWeight, blkioWeightDevice, blkioReadBps, blkioWriteBps, blkioReadIOPS, blkioeWriteIOPS := getBLKIOInfo(spec) - memKernel, memReservation, memSwap, memSwappiness, memDisableOOMKiller := getMemoryInfo(spec) - pidsLimit := getPidsInfo(spec) - cgroup := getCgroup(spec) - logConfig := InspectLogConfig{ - config.LogDriver, - make(map[string]string), - } - - data := &InspectContainer{ - ctrInspectData, - &InspectContainerHostConfig{ - ConsoleSize: spec.Process.ConsoleSize, - OomScoreAdj: spec.Process.OOMScoreAdj, - CPUShares: shares, - BlkioWeight: blkioWeight, - BlkioWeightDevice: blkioWeightDevice, - BlkioDeviceReadBps: blkioReadBps, - BlkioDeviceWriteBps: blkioWriteBps, - BlkioDeviceReadIOps: blkioReadIOPS, - BlkioDeviceWriteIOps: blkioeWriteIOPS, - CPUPeriod: period, - CPUQuota: quota, - CPURealtimePeriod: realtimePeriod, - CPURealtimeRuntime: realtimeRuntime, - CPUSetCPUs: cpus, - CPUSetMems: mems, - Devices: spec.Linux.Devices, - KernelMemory: memKernel, - LogConfig: &logConfig, - MemoryReservation: memReservation, - MemorySwap: memSwap, - MemorySwappiness: memSwappiness, - OomKillDisable: memDisableOOMKiller, - PidsLimit: pidsLimit, - Privileged: config.Privileged, - ReadOnlyRootfs: spec.Root.Readonly, - ReadOnlyTmpfs: createArtifact.ReadOnlyTmpfs, - Runtime: config.OCIRuntime, - NetworkMode: string(createArtifact.NetMode), - IpcMode: string(createArtifact.IpcMode), - Cgroup: cgroup, - UTSMode: string(createArtifact.UtsMode), - UsernsMode: string(createArtifact.UsernsMode), - GroupAdd: spec.Process.User.AdditionalGids, - ContainerIDFile: createArtifact.CidFile, - AutoRemove: createArtifact.Rm, - CapAdd: createArtifact.CapAdd, - CapDrop: createArtifact.CapDrop, - DNS: createArtifact.DNSServers, - DNSOptions: createArtifact.DNSOpt, - DNSSearch: createArtifact.DNSSearch, - PidMode: string(createArtifact.PidMode), - CgroupParent: createArtifact.CgroupParent, - ShmSize: createArtifact.Resources.ShmSize, - Memory: createArtifact.Resources.Memory, - Ulimits: createArtifact.Resources.Ulimit, - SecurityOpt: createArtifact.SecurityOpts, - Tmpfs: createArtifact.Tmpfs, - }, - } - return data, nil -} - -func getCPUInfo(spec *specs.Spec) (string, string, *uint64, *int64, *uint64, *int64, *uint64) { - if spec.Linux.Resources == nil { - return "", "", nil, nil, nil, nil, nil - } - cpu := spec.Linux.Resources.CPU - if cpu == nil { - return "", "", nil, nil, nil, nil, nil - } - return cpu.Cpus, cpu.Mems, cpu.Period, cpu.Quota, cpu.RealtimePeriod, cpu.RealtimeRuntime, cpu.Shares -} - -func getBLKIOInfo(spec *specs.Spec) (*uint16, []specs.LinuxWeightDevice, []specs.LinuxThrottleDevice, []specs.LinuxThrottleDevice, []specs.LinuxThrottleDevice, []specs.LinuxThrottleDevice) { - if spec.Linux.Resources == nil { - return nil, nil, nil, nil, nil, nil - } - blkio := spec.Linux.Resources.BlockIO - if blkio == nil { - return nil, nil, nil, nil, nil, nil - } - return blkio.Weight, blkio.WeightDevice, blkio.ThrottleReadBpsDevice, blkio.ThrottleWriteBpsDevice, blkio.ThrottleReadIOPSDevice, blkio.ThrottleWriteIOPSDevice -} - -func getMemoryInfo(spec *specs.Spec) (*int64, *int64, *int64, *uint64, *bool) { - if spec.Linux.Resources == nil { - return nil, nil, nil, nil, nil - } - memory := spec.Linux.Resources.Memory - if memory == nil { - return nil, nil, nil, nil, nil - } - return memory.Kernel, memory.Reservation, memory.Swap, memory.Swappiness, memory.DisableOOMKiller -} - -func getPidsInfo(spec *specs.Spec) *int64 { - if spec.Linux.Resources == nil { - return nil - } - pids := spec.Linux.Resources.Pids - if pids == nil { - return nil - } - return &pids.Limit -} - -func getCgroup(spec *specs.Spec) string { - cgroup := "host" - for _, ns := range spec.Linux.Namespaces { - if ns.Type == specs.CgroupNamespace && ns.Path != "" { - cgroup = "container" - } - } - return cgroup -} diff --git a/cmd/podman/shared/create.go b/cmd/podman/shared/create.go index fd319e215..b14ce431d 100644 --- a/cmd/podman/shared/create.go +++ b/cmd/podman/shared/create.go @@ -400,11 +400,12 @@ func ParseCreateOpts(ctx context.Context, c *GenericCLIResults, runtime *libpod. namespaceNet = c.String("net") } namespaces = map[string]string{ - "pid": c.String("pid"), - "net": namespaceNet, - "ipc": c.String("ipc"), - "user": c.String("userns"), - "uts": c.String("uts"), + "cgroup": c.String("cgroupns"), + "pid": c.String("pid"), + "net": namespaceNet, + "ipc": c.String("ipc"), + "user": c.String("userns"), + "uts": c.String("uts"), } originalPodName := c.String("pod") @@ -462,6 +463,11 @@ func ParseCreateOpts(ctx context.Context, c *GenericCLIResults, runtime *libpod. return nil, errors.Errorf("--uts %q is not valid", namespaces["uts"]) } + cgroupMode := ns.CgroupMode(namespaces["cgroup"]) + if !cgroupMode.Valid() { + return nil, errors.Errorf("--cgroup %q is not valid", namespaces["cgroup"]) + } + ipcMode := ns.IpcMode(namespaces["ipc"]) if !cc.Valid(string(ipcMode), ipcMode) { return nil, errors.Errorf("--ipc %q is not valid", ipcMode) @@ -651,6 +657,8 @@ func ParseCreateOpts(ctx context.Context, c *GenericCLIResults, runtime *libpod. ImageVolumeType: c.String("image-volume"), CapAdd: c.StringSlice("cap-add"), CapDrop: c.StringSlice("cap-drop"), + CidFile: c.String("cidfile"), + Cgroupns: c.String("cgroupns"), CgroupParent: c.String("cgroup-parent"), Command: command, Detach: c.Bool("detach"), @@ -686,6 +694,7 @@ func ParseCreateOpts(ctx context.Context, c *GenericCLIResults, runtime *libpod. NetMode: netMode, UtsMode: utsMode, PidMode: pidMode, + CgroupMode: cgroupMode, Pod: podName, Privileged: c.Bool("privileged"), Publish: c.StringSlice("publish"), @@ -766,14 +775,6 @@ func CreateContainerFromCreateConfig(r *libpod.Runtime, createConfig *cc.CreateC if err != nil { return nil, err } - - createConfigJSON, err := json.Marshal(createConfig) - if err != nil { - return nil, err - } - if err := ctr.AddArtifact("create-config", createConfigJSON); err != nil { - return nil, err - } return ctr, nil } diff --git a/cmd/podman/shared/intermediate.go b/cmd/podman/shared/intermediate.go index 8337dc647..4062ac48a 100644 --- a/cmd/podman/shared/intermediate.go +++ b/cmd/podman/shared/intermediate.go @@ -370,6 +370,7 @@ func NewIntermediateLayer(c *cliconfig.PodmanCommand, remote bool) GenericCLIRes m["blkio-weight-device"] = newCRStringSlice(c, "blkio-weight-device") m["cap-add"] = newCRStringSlice(c, "cap-add") m["cap-drop"] = newCRStringSlice(c, "cap-drop") + m["cgroupns"] = newCRString(c, "cgroupns") m["cgroup-parent"] = newCRString(c, "cgroup-parent") m["cidfile"] = newCRString(c, "cidfile") m["conmon-pidfile"] = newCRString(c, "conmon-pidfile") diff --git a/docs/podman-create.1.md b/docs/podman-create.1.md index 89f146670..24a78ab93 100644 --- a/docs/podman-create.1.md +++ b/docs/podman-create.1.md @@ -63,6 +63,14 @@ Add Linux capabilities Drop Linux capabilities +**--cgroupns**=*mode* + +Set the cgroup namespace mode for the container, by default **host** is used. + **host**: use the host's cgroup namespace inside the container. + **container:<NAME|ID>**: join the namespace of the specified container. + **private**: create a new cgroup namespace. + **ns:<PATH>**: join the namespace at the specified path. + **--cgroup-parent**=*path* Path to cgroups under which the cgroup for the container will be created. If the path is not absolute, the path is considered to be relative to the cgroups path of the init process. Cgroups will be created if they do not already exist. diff --git a/docs/podman-run.1.md b/docs/podman-run.1.md index ebf774b24..6315afea6 100644 --- a/docs/podman-run.1.md +++ b/docs/podman-run.1.md @@ -77,6 +77,14 @@ Add Linux capabilities Drop Linux capabilities +**--cgroupns**=*mode* + +Set the cgroup namespace mode for the container, by default **host** is used. + **host**: use the host's cgroup namespace inside the container. + **container:<NAME|ID>**: join the namespace of the specified container. + **private**: create a new cgroup namespace. + **ns:<PATH>**: join the namespace at the specified path. + **--cgroup-parent**=*cgroup* Path to cgroups under which the cgroup for the container will be created. If the path is not absolute, the path is considered to be relative to the cgroups path of the init process. Cgroups will be created if they do not already exist. @@ -101,7 +101,7 @@ require ( github.com/spf13/pflag v1.0.3 github.com/spf13/viper v1.4.0 // indirect github.com/stretchr/testify v1.3.0 - github.com/syndtr/gocapability v0.0.0-20180916011248-d98352740cb2 // indirect + github.com/syndtr/gocapability v0.0.0-20180916011248-d98352740cb2 github.com/tchap/go-patricia v2.3.0+incompatible // indirect github.com/uber/jaeger-client-go v2.16.0+incompatible github.com/uber/jaeger-lib v0.0.0-20190122222657-d036253de8f5 // indirect diff --git a/libpod/container_api.go b/libpod/container_api.go index 3577b8e8c..ae181887e 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -305,6 +305,11 @@ func (c *Container) Exec(tty, privileged bool, env, cmd []string, user, workDir if err != nil { if exited { // If the runtime exited, propagate the error we got from the process. + // We need to remove PID files to ensure no memory leaks + if err2 := os.Remove(pidFile); err2 != nil { + logrus.Errorf("Error removing exit file for container %s exec session %s: %v", c.ID(), sessionID, err2) + } + return err } return errors.Wrapf(err, "timed out waiting for runtime to create pidfile for exec session in container %s", c.ID()) @@ -312,6 +317,10 @@ func (c *Container) Exec(tty, privileged bool, env, cmd []string, user, workDir // Pidfile exists, read it contents, err := ioutil.ReadFile(pidFile) + // We need to remove PID files to ensure no memory leaks + if err2 := os.Remove(pidFile); err2 != nil { + logrus.Errorf("Error removing exit file for container %s exec session %s: %v", c.ID(), sessionID, err2) + } if err != nil { // We don't know the PID of the exec session // However, it may still be alive diff --git a/libpod/container_inspect.go b/libpod/container_inspect.go index de0027414..c4d2af66e 100644 --- a/libpod/container_inspect.go +++ b/libpod/container_inspect.go @@ -1,16 +1,88 @@ package libpod import ( + "fmt" "strings" "time" "github.com/containers/image/manifest" "github.com/containers/libpod/libpod/define" "github.com/containers/libpod/libpod/driver" + "github.com/containers/libpod/pkg/util" "github.com/cri-o/ocicni/pkg/ocicni" spec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/runtime-tools/generate" + "github.com/opencontainers/runtime-tools/validate" "github.com/pkg/errors" "github.com/sirupsen/logrus" + "github.com/syndtr/gocapability/capability" +) + +const ( + // InspectAnnotationCIDFile is used by Inspect to determine if a + // container ID file was created for the container. + // If an annotation with this key is found in the OCI spec, it will be + // used in the output of Inspect(). + InspectAnnotationCIDFile = "io.podman.annotations.cid-file" + // InspectAnnotationAutoremove is used by Inspect to determine if a + // container will be automatically removed on exit. + // If an annotation with this key is found in the OCI spec and is one of + // the two supported boolean values (InspectResponseTrue and + // InspectResponseFalse) it will be used in the output of Inspect(). + InspectAnnotationAutoremove = "io.podman.annotations.autoremove" + // InspectAnnotationVolumesFrom is used by Inspect to identify + // containers whose volumes are are being used by this container. + // It is expected to be a comma-separated list of container names and/or + // IDs. + // If an annotation with this key is found in the OCI spec, it will be + // used in the output of Inspect(). + InspectAnnotationVolumesFrom = "io.podman.annotations.volumes-from" + // InspectAnnotationPrivileged is used by Inspect to identify containers + // which are privileged (IE, running with elevated privileges). + // It is expected to be a boolean, populated by one of + // InspectResponseTrue or InspectResponseFalse. + // If an annotation with this key is found in the OCI spec, it will be + // used in the output of Inspect(). + InspectAnnotationPrivileged = "io.podman.annotations.privileged" + // InspectAnnotationPublishAll is used by Inspect to identify containers + // which have all the ports from their image published. + // It is expected to be a boolean, populated by one of + // InspectResponseTrue or InspectResponseFalse. + // If an annotation with this key is found in the OCI spec, it will be + // used in the output of Inspect(). + InspectAnnotationPublishAll = "io.podman.annotations.publish-all" + // InspectAnnotationInit is used by Inspect to identify containers that + // mount an init binary in. + // It is expected to be a boolean, populated by one of + // InspectResponseTrue or InspectResponseFalse. + // If an annotation with this key is found in the OCI spec, it will be + // used in the output of Inspect(). + InspectAnnotationInit = "io.podman.annotations.init" + // InspectAnnotationLabel is used by Inspect to identify containers with + // special SELinux-related settings. It is used to populate the output + // of the SecurityOpt setting. + // If an annotation with this key is found in the OCI spec, it will be + // used in the output of Inspect(). + InspectAnnotationLabel = "io.podman.annotations.label" + // InspectAnnotationSeccomp is used by Inspect to identify containers + // with special Seccomp-related settings. It is used to populate the + // output of the SecurityOpt setting in Inspect. + // If an annotation with this key is found in the OCI spec, it will be + // used in the output of Inspect(). + InspectAnnotationSeccomp = "io.podman.annotations.seccomp" + // InspectAnnotationApparmor is used by Inspect to identify containers + // with special Apparmor-related settings. It is used to populate the + // output of the SecurityOpt setting. + // If an annotation with this key is found in the OCI spec, it will be + // used in the output of Inspect(). + InspectAnnotationApparmor = "io.podman.annotations.apparmor" + + // InspectResponseTrue is a boolean True response for an inspect + // annotation. + InspectResponseTrue = "TRUE" + // InspectResponseFalse is a boolean False response for an inspect + // annotation. + InspectResponseFalse = "FALSE" ) // InspectContainerData provides a detailed record of a container's configuration @@ -19,41 +91,42 @@ import ( // compatible with `docker inspect` JSON, but additional fields have been added // as required to share information not in the original output. type InspectContainerData struct { - ID string `json:"Id"` - Created time.Time `json:"Created"` - Path string `json:"Path"` - Args []string `json:"Args"` - State *InspectContainerState `json:"State"` - ImageID string `json:"Image"` - ImageName string `json:"ImageName"` - Rootfs string `json:"Rootfs"` - ResolvConfPath string `json:"ResolvConfPath"` - HostnamePath string `json:"HostnamePath"` - HostsPath string `json:"HostsPath"` - StaticDir string `json:"StaticDir"` - OCIConfigPath string `json:"OCIConfigPath,omitempty"` - OCIRuntime string `json:"OCIRuntime,omitempty"` - LogPath string `json:"LogPath"` - ConmonPidFile string `json:"ConmonPidFile"` - Name string `json:"Name"` - RestartCount int32 `json:"RestartCount"` - Driver string `json:"Driver"` - MountLabel string `json:"MountLabel"` - ProcessLabel string `json:"ProcessLabel"` - AppArmorProfile string `json:"AppArmorProfile"` - EffectiveCaps []string `json:"EffectiveCaps"` - BoundingCaps []string `json:"BoundingCaps"` - ExecIDs []string `json:"ExecIDs"` - GraphDriver *driver.Data `json:"GraphDriver"` - SizeRw int64 `json:"SizeRw,omitempty"` - SizeRootFs int64 `json:"SizeRootFs,omitempty"` - Mounts []InspectMount `json:"Mounts"` - Dependencies []string `json:"Dependencies"` - NetworkSettings *InspectNetworkSettings `json:"NetworkSettings"` //TODO - ExitCommand []string `json:"ExitCommand"` - Namespace string `json:"Namespace"` - IsInfra bool `json:"IsInfra"` - Config *InspectContainerConfig `json:"Config"` + ID string `json:"Id"` + Created time.Time `json:"Created"` + Path string `json:"Path"` + Args []string `json:"Args"` + State *InspectContainerState `json:"State"` + ImageID string `json:"Image"` + ImageName string `json:"ImageName"` + Rootfs string `json:"Rootfs"` + ResolvConfPath string `json:"ResolvConfPath"` + HostnamePath string `json:"HostnamePath"` + HostsPath string `json:"HostsPath"` + StaticDir string `json:"StaticDir"` + OCIConfigPath string `json:"OCIConfigPath,omitempty"` + OCIRuntime string `json:"OCIRuntime,omitempty"` + LogPath string `json:"LogPath"` + ConmonPidFile string `json:"ConmonPidFile"` + Name string `json:"Name"` + RestartCount int32 `json:"RestartCount"` + Driver string `json:"Driver"` + MountLabel string `json:"MountLabel"` + ProcessLabel string `json:"ProcessLabel"` + AppArmorProfile string `json:"AppArmorProfile"` + EffectiveCaps []string `json:"EffectiveCaps"` + BoundingCaps []string `json:"BoundingCaps"` + ExecIDs []string `json:"ExecIDs"` + GraphDriver *driver.Data `json:"GraphDriver"` + SizeRw int64 `json:"SizeRw,omitempty"` + SizeRootFs int64 `json:"SizeRootFs,omitempty"` + Mounts []InspectMount `json:"Mounts"` + Dependencies []string `json:"Dependencies"` + NetworkSettings *InspectNetworkSettings `json:"NetworkSettings"` //TODO + ExitCommand []string `json:"ExitCommand"` + Namespace string `json:"Namespace"` + IsInfra bool `json:"IsInfra"` + Config *InspectContainerConfig `json:"Config"` + HostConfig *InspectContainerHostConfig `json:"HostConfig"` } // InspectContainerConfig holds further data about how a container was initially @@ -102,6 +175,353 @@ type InspectContainerConfig struct { Healthcheck *manifest.Schema2HealthConfig `json:"Healthcheck,omitempty"` } +// InspectContainerHostConfig holds information used when the container was +// created. +// It's very much a Docker-specific struct, retained (mostly) as-is for +// compatibility. We fill individual fields as best as we can, inferring as much +// as possible from the spec and container config. +// Some things cannot be inferred. These will be populated by spec annotations +// (if available). +// Field names are fixed for compatibility and cannot be changed. +// As such, silence lint warnings about them. +//nolint +type InspectContainerHostConfig struct { + // Binds contains an array of user-added mounts. + // Both volume mounts and named volumes are included. + // Tmpfs mounts are NOT included. + // In 'docker inspect' this is separated into 'Binds' and 'Mounts' based + // on how a mount was added. We do not make this distinction and do not + // include a Mounts field in inspect. + // Format: <src>:<destination>[:<comma-separated options>] + Binds []string `json:"Binds"` + // ContainerIDFile is a file created during container creation to hold + // the ID of the created container. + // This is not handled within libpod and is stored in an annotation. + ContainerIDFile string `json:"ContainerIDFile"` + // LogConfig contains information on the container's logging backend + LogConfig *InspectLogConfig `json:"LogConfig"` + // NetworkMode is the configuration of the container's network + // namespace. + // Populated as follows: + // default - A network namespace is being created and configured via CNI + // none - A network namespace is being created, not configured via CNI + // host - No network namespace created + // container:<id> - Using another container's network namespace + // ns:<path> - A path to a network namespace has been specified + NetworkMode string `json:"NetworkMode"` + // PortBindings contains the container's port bindings. + // It is formatted as map[string][]InspectHostPort. + // The string key here is formatted as <integer port number>/<protocol> + // and represents the container port. A single container port may be + // bound to multiple host ports (on different IPs). + PortBindings map[string][]InspectHostPort `json:"PortBindings"` + // RestartPolicy contains the container's restart policy. + RestartPolicy *InspectRestartPolicy `json:"RestartPolicy"` + // AutoRemove is whether the container will be automatically removed on + // exiting. + // It is not handled directly within libpod and is stored in an + // annotation. + AutoRemove bool `json:"AutoRemove"` + // VolumeDriver is presently unused and is retained for Docker + // compatibility. + VolumeDriver string `json:"VolumeDriver"` + // VolumesFrom is a list of containers which this container uses volumes + // from. This is not handled directly within libpod and is stored in an + // annotation. + // It is formatted as an array of container names and IDs. + VolumesFrom []string `json:"VolumesFrom"` + // CapAdd is a list of capabilities added to the container. + // It is not directly stored by Libpod, and instead computed from the + // capabilities listed in the container's spec, compared against a set + // of default capabilities. + CapAdd []string `json:"CapAdd"` + // CapDrop is a list of capabilities removed from the container. + // It is not directly stored by libpod, and instead computed from the + // capabilities listed in the container's spec, compared against a set + // of default capabilities. + CapDrop []string `json:"CapDrop"` + // Dns is a list of DNS nameservers that will be added to the + // container's resolv.conf + Dns []string `json:"Dns"` + // DnsOptions is a list of DNS options that will be set in the + // container's resolv.conf + DnsOptions []string `json:"DnsOptions"` + // DnsSearch is a list of DNS search domains that will be set in the + // container's resolv.conf + DnsSearch []string `json:"DnsSearch"` + // ExtraHosts contains hosts that will be aded to the container's + // /etc/hosts. + ExtraHosts []string `json:"ExtraHosts"` + // GroupAdd contains groups that the user inside the container will be + // added to. + GroupAdd []string `json:"GroupAdd"` + // IpcMode represents the configuration of the container's IPC + // namespace. + // Populated as follows: + // "" (empty string) - Default, an IPC namespace will be created + // host - No IPC namespace created + // container:<id> - Using another container's IPC namespace + // ns:<path> - A path to an IPC namespace has been specified + IpcMode string `json:"IpcMode"` + // Cgroup contains the container's cgroup. It is presently not + // populated. + // TODO. + Cgroup string `json:"Cgroup"` + // Links is unused, and provided purely for Docker compatibility. + Links []string `json:"Links"` + // OOMScoreAdj is an adjustment that will be made to the container's OOM + // score. + OomScoreAdj int `json:"OomScoreAdj"` + // PidMode represents the configuration of the container's PID + // namespace. + // Populated as follows: + // "" (empty string) - Default, a PID namespace will be created + // host - No PID namespace created + // container:<id> - Using another container's PID namespace + // ns:<path> - A path to a PID namespace has been specified + PidMode string `json:"PidMode"` + // Privileged indicates whether the container is running with elevated + // privileges. + // This has a very specific meaning in the Docker sense, so it's very + // difficult to decode from the spec and config, and so is stored as an + // annotation. + Privileged bool `json:"Privileged"` + // PublishAllPorts indicates whether image ports are being published. + // This is not directly stored in libpod and is saved as an annotation. + PublishAllPorts bool `json:"PublishAllPorts"` + // ReadonlyRootfs is whether the container will be mounted read-only. + ReadonlyRootfs bool `json:"ReadonlyRootfs"` + // SecurityOpt is a list of security-related options that are set in the + // container. + SecurityOpt []string `json:"SecurityOpt"` + // Tmpfs is a list of tmpfs filesystems that will be mounted into the + // container. + // It is a map of destination path to options for the mount. + Tmpfs map[string]string `json:"Tmpfs"` + // UTSMode represents the configuration of the container's UID + // namespace. + // Populated as follows: + // "" (empty string) - Default, a UTS namespace will be created + // host - no UTS namespace created + // container:<id> - Using another container's UTS namespace + // ns:<path> - A path to a UTS namespace has been specified + UTSMode string `json:"UTSMode"` + // UsernsMode represents the configuration of the container's user + // namespace. + // When running rootless, a user namespace is created outside of libpod + // to allow some privileged operations. This will not be reflected here. + // Populated as follows: + // "" (empty string) - No user namespace will be created + // private - The container will be run in a user namespace + // container:<id> - Using another container's user namespace + // ns:<path> - A path to a user namespace has been specified + // TODO Rootless has an additional 'keep-id' option, presently not + // reflected here. + UsernsMode string `json:"UsernsMode"` + // ShmSize is the size of the container's SHM device. + ShmSize int64 `json:"ShmSize"` + // Runtime is provided purely for Docker compatibility. + // It is set unconditionally to "oci" as Podman does not presently + // support non-OCI runtimes. + Runtime string `json:"Runtime"` + // ConsoleSize is an array of 2 integers showing the size of the + // container's console. + // It is only set if the container is creating a terminal. + // TODO. + ConsoleSize []uint `json:"ConsoleSize"` + // Isolation is presently unused and provided solely for Docker + // compatibility. + Isolation string `json:"Isolation"` + // CpuShares indicates the CPU resources allocated to the container. + // It is a relative weight in the scheduler for assigning CPU time + // versus other CGroups. + CpuShares uint64 `json:"CpuShares"` + // Memory indicates the memory resources allocated to the container. + // This is the limit (in bytes) of RAM the container may use. + Memory int64 `json:"Memory"` + // NanoCpus indicates number of CPUs allocated to the container. + // It is an integer where one full CPU is indicated by 1000000000 (one + // billion). + // Thus, 2.5 CPUs (fractional portions of CPUs are allowed) would be + // 2500000000 (2.5 billion). + // In 'docker inspect' this is set exclusively of two further options in + // the output (CpuPeriod and CpuQuota) which are both used to implement + // this functionality. + // We can't distinguish here, so if CpuQuota is set to the default of + // 100000, we will set both CpuQuota, CpuPeriod, and NanoCpus. If + // CpuQuota is not the default, we will not set NanoCpus. + NanoCpus int64 `json:"NanoCpus"` + // CgroupParent is the CGroup parent of the container. + // Only set if not default. + CgroupParent string `json:"CgroupParent"` + // BlkioWeight indicates the I/O resources allocated to the container. + // It is a relative weight in the scheduler for assigning I/O time + // versus other CGroups. + BlkioWeight uint16 `json:"BlkioWeight"` + // BlkioWeightDevice is an array of I/O resource priorities for + // individual device nodes. + // Unfortunately, the spec only stores the device's Major/Minor numbers + // and not the path, which is used here. + // Fortunately, the kernel provides an interface for retrieving the path + // of a given node by major:minor at /sys/dev/. However, the exact path + // in use may not be what was used in the original CLI invocation - + // though it is guaranteed that the device node will be the same, and + // using the given path will be functionally identical. + BlkioWeightDevice []InspectBlkioWeightDevice `json:"BlkioWeightDevice"` + // BlkioDeviceReadBps is an array of I/O throttle parameters for + // individual device nodes. + // This specifically sets read rate cap in bytes per second for device + // nodes. + // As with BlkioWeightDevice, we pull the path from /sys/dev, and we + // don't guarantee the path will be identical to the original (though + // the node will be). + BlkioDeviceReadBps []InspectBlkioThrottleDevice `json:"BlkioDeviceReadBps"` + // BlkioDeviceWriteBps is an array of I/O throttle parameters for + // individual device nodes. + // this specifically sets write rate cap in bytes per second for device + // nodes. + // as with BlkioWeightDevice, we pull the path from /sys/dev, and we + // don't guarantee the path will be identical to the original (though + // the node will be). + BlkioDeviceWriteBps []InspectBlkioThrottleDevice `json:"BlkioDeviceWriteBps"` + // BlkioDeviceReadIOps is an array of I/O throttle parameters for + // individual device nodes. + // This specifically sets the read rate cap in iops per second for + // device nodes. + // As with BlkioWeightDevice, we pull the path from /sys/dev, and we + // don't guarantee the path will be identical to the original (though + // the node will be). + BlkioDeviceReadIOps []InspectBlkioThrottleDevice `json:"BlkioDeviceReadIOps"` + // BlkioDeviceWriteIOps is an array of I/O throttle parameters for + // individual device nodes. + // This specifically sets the write rate cap in iops per second for + // device nodes. + // As with BlkioWeightDevice, we pull the path from /sys/dev, and we + // don't guarantee the path will be identical to the original (though + // the node will be). + BlkioDeviceWriteIOps []InspectBlkioThrottleDevice `json:"BlkioDeviceWriteIOps"` + // CpuPeriod is the length of a CPU period in microseconds. + // It relates directly to CpuQuota. + CpuPeriod uint64 `json:"CpuPeriod"` + // CpuPeriod is the amount of time (in microseconds) that a container + // can use the CPU in every CpuPeriod. + CpuQuota int64 `json:"CpuQuota"` + // CpuRealtimePeriod is the length of time (in microseconds) of the CPU + // realtime period. If set to 0, no time will be allocated to realtime + // tasks. + CpuRealtimePeriod uint64 `json:"CpuRealtimePeriod"` + // CpuRealtimeRuntime is the length of time (in microseconds) allocated + // for realtime tasks within every CpuRealtimePeriod. + CpuRealtimeRuntime int64 `json:"CpuRealtimeRuntime"` + // CpusetCpus is the is the set of CPUs that the container will execute + // on. Formatted as `0-3` or `0,2`. Default (if unset) is all CPUs. + CpusetCpus string `json:"CpusetCpus"` + // CpusetMems is the set of memory nodes the container will use. + // Formatted as `0-3` or `0,2`. Default (if unset) is all memory nodes. + CpusetMems string `json:"CpusetMems"` + // Devices is a list of device nodes that will be added to the + // container. + // These are stored in the OCI spec only as type, major, minor while we + // display the host path. We convert this with /sys/dev, but we cannot + // guarantee that the host path will be identical - only that the actual + // device will be. + Devices []InspectDevice `json:"Devices"` + // DiskQuota is the maximum amount of disk space the container may use + // (in bytes). + // Presently not populated. + // TODO. + DiskQuota uint64 `json:"DiskQuota"` + // KernelMemory is the maximum amount of memory the kernel will devote + // to the container. + KernelMemory int64 `json:"KernelMemory"` + // MemoryReservation is the reservation (soft limit) of memory available + // to the container. Soft limits are warnings only and can be exceeded. + MemoryReservation int64 `json:"MemoryReservation"` + // MemorySwap is the total limit for all memory available to the + // container, including swap. 0 indicates that there is no limit to the + // amount of memory available. + MemorySwap int64 `json:"MemorySwap"` + // MemorySwappiness is the willingness of the kernel to page container + // memory to swap. It is an integer from 0 to 100, with low numbers + // being more likely to be put into swap. + // -1, the default, will not set swappiness and use the system defaults. + MemorySwappiness int64 `json:"MemorySwappiness"` + // OomKillDisable indicates whether the kernel OOM killer is disabled + // for the container. + OomKillDisable bool `json:"OomKillDisable"` + // Init indicates whether the container has an init mounted into it. + Init bool `json:"Init,omitempty"` + // PidsLimit is the maximum number of PIDs what may be created within + // the container. 0, the default, indicates no limit. + PidsLimit int64 `json:"PidsLimit"` + // Ulimits is a set of ulimits that will be set within the container. + Ulimits []InspectUlimit `json:"Ulimits"` + // CpuCount is Windows-only and not presently implemented. + CpuCount uint64 `json:"CpuCount"` + // CpuPercent is Windows-only and not presently implemented. + CpuPercent uint64 `json:"CpuPercent"` + // IOMaximumIOps is Windows-only and not presently implemented. + IOMaximumIOps uint64 `json:"IOMaximumIOps"` + // IOMaximumBandwidth is Windows-only and not presently implemented. + IOMaximumBandwidth uint64 `json:"IOMaximumBandwidth"` +} + +// InspectLogConfig holds information about a container's configured log driver +// and is presently unused. It is retained for Docker compatibility. +type InspectLogConfig struct { + Type string `json:"Type"` + Config map[string]string `json:"Config"` //idk type, TODO +} + +// InspectRestartPolicy holds information about the container's restart policy. +type InspectRestartPolicy struct { + // Name contains the container's restart policy. + // Allowable values are "no" or "" (take no action), + // "on-failure" (restart on non-zero exit code, with an optional max + // retry count), and "always" (always restart on container stop, unless + // explicitly requested by API). + // Note that this is NOT actually a name of any sort - the poor naming + // is for Docker compatibility. + Name string `json:"Name"` + // MaximumRetryCount is the maximum number of retries allowed if the + // "on-failure" restart policy is in use. Not used if "on-failure" is + // not set. + MaximumRetryCount uint `json:"MaximumRetryCount"` +} + +// InspectBlkioWeightDevice holds information about the relative weight +// of an individual device node. Weights are used in the I/O scheduler to give +// relative priority to some accesses. +type InspectBlkioWeightDevice struct { + // Path is the path to the device this applies to. + Path string `json:"Path"` + // Weight is the relative weight the scheduler will use when scheduling + // I/O. + Weight uint16 `json:"Weight"` +} + +// InspectBlkioThrottleDevice holds information about a speed cap for a device +// node. This cap applies to a specific operation (read, write, etc) on the given +// node. +type InspectBlkioThrottleDevice struct { + // Path is the path to the device this applies to. + Path string `json:"Path"` + // Rate is the maximum rate. It is in either bytes per second or iops + // per second, determined by where it is used - documentation will + // indicate which is appropriate. + Rate uint64 `json:"Rate"` +} + +// InspectUlimit is a ulimit that will be applied to the container. +type InspectUlimit struct { + // Name is the name (type) of the ulimit. + Name string `json:"Name"` + // Soft is the soft limit that will be applied. + Soft uint64 `json:"Soft"` + // Hard is the hard limit that will be applied. + Hard uint64 `json:"Hard"` +} + // InspectMount provides a record of a single mount in a container. It contains // fields for both named and normal volumes. Only user-specified volumes will be // included, and tmpfs volumes are not included even if the user specified them. @@ -131,6 +551,29 @@ type InspectMount struct { Propagation string `json:"Propagation"` } +// InspectDevice is a single device that will be mounted into the container. +type InspectDevice struct { + // PathOnHost is the path of the device on the host. + PathOnHost string `json:"PathOnHost"` + // PathInContainer is the path of the device within the container. + PathInContainer string `json:"PathInContainer"` + // CgroupPermissions is the permissions of the mounted device. + // Presently not populated. + // TODO. + CgroupPermissions string `json:"CgroupPermissions"` +} + +// InspectHostPort provides information on a port on the host that a container's +// port is bound to. +type InspectHostPort struct { + // IP on the host we are bound to. "" if not specified (binding to all + // IPs). + HostIP string `json:"HostIp"` + // Port on the host we are bound to. No special formatting - just an + // integer stuffed into a string. + HostPort string `json:"HostPort"` +} + // InspectContainerState provides a detailed record of a container's current // state. It is returned as part of InspectContainerData. // As with InspectContainerData, many portions of this struct are matched to @@ -206,7 +649,7 @@ func (c *Container) Inspect(size bool) (*InspectContainerData, error) { func (c *Container) getContainerInspectData(size bool, driverData *driver.Data) (*InspectContainerData, error) { config := c.config runtimeInfo := c.state - stateSpec, err := c.specFromState() + ctrSpec, err := c.specFromState() if err != nil { return nil, err } @@ -244,7 +687,8 @@ func (c *Container) getContainerInspectData(size bool, driverData *driver.Data) } } - mounts, err := c.getInspectMounts(stateSpec) + namedVolumes, mounts := c.sortUserVolumes(ctrSpec) + inspectMounts, err := c.getInspectMounts(ctrSpec, namedVolumes, mounts) if err != nil { return nil, err } @@ -255,7 +699,7 @@ func (c *Container) getContainerInspectData(size bool, driverData *driver.Data) Path: path, Args: args, State: &InspectContainerState{ - OciVersion: stateSpec.Version, + OciVersion: ctrSpec.Version, Status: runtimeInfo.State.String(), Running: runtimeInfo.State == define.ContainerStateRunning, Paused: runtimeInfo.State == define.ContainerStatePaused, @@ -285,12 +729,12 @@ func (c *Container) getContainerInspectData(size bool, driverData *driver.Data) Driver: driverData.Name, MountLabel: config.MountLabel, ProcessLabel: config.ProcessLabel, - EffectiveCaps: stateSpec.Process.Capabilities.Effective, - BoundingCaps: stateSpec.Process.Capabilities.Bounding, - AppArmorProfile: stateSpec.Process.ApparmorProfile, + EffectiveCaps: ctrSpec.Process.Capabilities.Effective, + BoundingCaps: ctrSpec.Process.Capabilities.Bounding, + AppArmorProfile: ctrSpec.Process.ApparmorProfile, ExecIDs: execIDs, GraphDriver: driverData, - Mounts: mounts, + Mounts: inspectMounts, Dependencies: c.Dependencies(), NetworkSettings: &InspectNetworkSettings{ Bridge: "", // TODO @@ -338,12 +782,18 @@ func (c *Container) getContainerInspectData(size bool, driverData *driver.Data) // Get information on the container's network namespace (if present) data = c.getContainerNetworkInfo(data) - inspectConfig, err := c.generateInspectContainerConfig(stateSpec) + inspectConfig, err := c.generateInspectContainerConfig(ctrSpec) if err != nil { return nil, err } data.Config = inspectConfig + hostConfig, err := c.generateInspectContainerHostConfig(ctrSpec, namedVolumes, mounts) + if err != nil { + return nil, err + } + data.HostConfig = hostConfig + if size { rootFsSize, err := c.rootFsSize() if err != nil { @@ -362,7 +812,7 @@ func (c *Container) getContainerInspectData(size bool, driverData *driver.Data) // Get inspect-formatted mounts list. // Only includes user-specified mounts. Only includes bind mounts and named // volumes, not tmpfs volumes. -func (c *Container) getInspectMounts(ctrSpec *spec.Spec) ([]InspectMount, error) { +func (c *Container) getInspectMounts(ctrSpec *spec.Spec, namedVolumes []*ContainerNamedVolume, mounts []spec.Mount) ([]InspectMount, error) { inspectMounts := []InspectMount{} // No mounts, return early @@ -370,7 +820,6 @@ func (c *Container) getInspectMounts(ctrSpec *spec.Spec) ([]InspectMount, error) return inspectMounts, nil } - namedVolumes, mounts := c.sortUserVolumes(ctrSpec) for _, volume := range namedVolumes { mountStruct := InspectMount{} mountStruct.Type = "volume" @@ -493,3 +942,505 @@ func (c *Container) generateInspectContainerConfig(spec *spec.Spec) (*InspectCon return ctrConfig, nil } + +// Generate the InspectContainerHostConfig struct for the HostConfig field of +// Inspect. +func (c *Container) generateInspectContainerHostConfig(ctrSpec *spec.Spec, namedVolumes []*ContainerNamedVolume, mounts []spec.Mount) (*InspectContainerHostConfig, error) { + hostConfig := new(InspectContainerHostConfig) + + logConfig := new(InspectLogConfig) + logConfig.Type = c.config.LogDriver + hostConfig.LogConfig = logConfig + + restartPolicy := new(InspectRestartPolicy) + restartPolicy.Name = c.config.RestartPolicy + restartPolicy.MaximumRetryCount = c.config.RestartRetries + hostConfig.RestartPolicy = restartPolicy + + hostConfig.Dns = make([]string, 0, len(c.config.DNSServer)) + for _, dns := range c.config.DNSServer { + hostConfig.Dns = append(hostConfig.Dns, dns.String()) + } + + hostConfig.DnsOptions = make([]string, 0, len(c.config.DNSOption)) + for _, opt := range c.config.DNSOption { + hostConfig.DnsOptions = append(hostConfig.DnsOptions, opt) + } + + hostConfig.DnsSearch = make([]string, 0, len(c.config.DNSSearch)) + for _, search := range c.config.DNSSearch { + hostConfig.DnsSearch = append(hostConfig.DnsSearch, search) + } + + hostConfig.ExtraHosts = make([]string, 0, len(c.config.HostAdd)) + for _, host := range c.config.HostAdd { + hostConfig.ExtraHosts = append(hostConfig.ExtraHosts, host) + } + + hostConfig.GroupAdd = make([]string, 0, len(c.config.Groups)) + for _, group := range c.config.Groups { + hostConfig.GroupAdd = append(hostConfig.GroupAdd, group) + } + + hostConfig.SecurityOpt = []string{} + if ctrSpec.Process != nil { + if ctrSpec.Process.OOMScoreAdj != nil { + hostConfig.OomScoreAdj = *ctrSpec.Process.OOMScoreAdj + } + if ctrSpec.Process.NoNewPrivileges { + hostConfig.SecurityOpt = append(hostConfig.SecurityOpt, "no-new-privileges") + } + } + + hostConfig.ReadonlyRootfs = ctrSpec.Root.Readonly + hostConfig.ShmSize = c.config.ShmSize + hostConfig.Runtime = "oci" + + // This is very expensive to initialize. + // So we don't want to initialize it unless we absolutely have to - IE, + // there are things that require a major:minor to path translation. + var deviceNodes map[string]string + + // Annotations + if ctrSpec.Annotations != nil { + hostConfig.ContainerIDFile = ctrSpec.Annotations[InspectAnnotationCIDFile] + if ctrSpec.Annotations[InspectAnnotationAutoremove] == InspectResponseTrue { + hostConfig.AutoRemove = true + } + if ctrs, ok := ctrSpec.Annotations[InspectAnnotationVolumesFrom]; ok { + hostConfig.VolumesFrom = strings.Split(ctrs, ",") + } + if ctrSpec.Annotations[InspectAnnotationPrivileged] == InspectResponseTrue { + hostConfig.Privileged = true + } + if ctrSpec.Annotations[InspectAnnotationInit] == InspectResponseTrue { + hostConfig.Init = true + } + if label, ok := ctrSpec.Annotations[InspectAnnotationLabel]; ok { + hostConfig.SecurityOpt = append(hostConfig.SecurityOpt, fmt.Sprintf("label=%s", label)) + } + if seccomp, ok := ctrSpec.Annotations[InspectAnnotationSeccomp]; ok { + hostConfig.SecurityOpt = append(hostConfig.SecurityOpt, fmt.Sprintf("seccomp=%s", seccomp)) + } + if apparmor, ok := ctrSpec.Annotations[InspectAnnotationApparmor]; ok { + hostConfig.SecurityOpt = append(hostConfig.SecurityOpt, fmt.Sprintf("apparmor=%s", apparmor)) + } + } + + // Resource limits + if ctrSpec.Linux != nil { + if ctrSpec.Linux.Resources != nil { + if ctrSpec.Linux.Resources.CPU != nil { + if ctrSpec.Linux.Resources.CPU.Shares != nil { + hostConfig.CpuShares = *ctrSpec.Linux.Resources.CPU.Shares + } + if ctrSpec.Linux.Resources.CPU.Period != nil { + hostConfig.CpuPeriod = *ctrSpec.Linux.Resources.CPU.Period + } + if ctrSpec.Linux.Resources.CPU.Quota != nil { + hostConfig.CpuQuota = *ctrSpec.Linux.Resources.CPU.Quota + } + if ctrSpec.Linux.Resources.CPU.RealtimePeriod != nil { + hostConfig.CpuRealtimePeriod = *ctrSpec.Linux.Resources.CPU.RealtimePeriod + } + if ctrSpec.Linux.Resources.CPU.RealtimeRuntime != nil { + hostConfig.CpuRealtimeRuntime = *ctrSpec.Linux.Resources.CPU.RealtimeRuntime + } + hostConfig.CpusetCpus = ctrSpec.Linux.Resources.CPU.Cpus + hostConfig.CpusetMems = ctrSpec.Linux.Resources.CPU.Mems + } + if ctrSpec.Linux.Resources.Memory != nil { + if ctrSpec.Linux.Resources.Memory.Limit != nil { + hostConfig.Memory = *ctrSpec.Linux.Resources.Memory.Limit + } + if ctrSpec.Linux.Resources.Memory.Kernel != nil { + hostConfig.KernelMemory = *ctrSpec.Linux.Resources.Memory.Kernel + } + if ctrSpec.Linux.Resources.Memory.Reservation != nil { + hostConfig.MemoryReservation = *ctrSpec.Linux.Resources.Memory.Reservation + } + if ctrSpec.Linux.Resources.Memory.Swap != nil { + hostConfig.MemorySwap = *ctrSpec.Linux.Resources.Memory.Swap + } + if ctrSpec.Linux.Resources.Memory.Swappiness != nil { + hostConfig.MemorySwappiness = int64(*ctrSpec.Linux.Resources.Memory.Swappiness) + } else { + // Swappiness has a default of -1 + hostConfig.MemorySwappiness = -1 + } + if ctrSpec.Linux.Resources.Memory.DisableOOMKiller != nil { + hostConfig.OomKillDisable = *ctrSpec.Linux.Resources.Memory.DisableOOMKiller + } + } + if ctrSpec.Linux.Resources.Pids != nil { + hostConfig.PidsLimit = ctrSpec.Linux.Resources.Pids.Limit + } + if ctrSpec.Linux.Resources.BlockIO != nil { + if ctrSpec.Linux.Resources.BlockIO.Weight != nil { + hostConfig.BlkioWeight = *ctrSpec.Linux.Resources.BlockIO.Weight + } + hostConfig.BlkioWeightDevice = []InspectBlkioWeightDevice{} + for _, dev := range ctrSpec.Linux.Resources.BlockIO.WeightDevice { + key := fmt.Sprintf("%d:%d", dev.Major, dev.Minor) + // TODO: how do we handle LeafWeight vs + // Weight? For now, ignore anything + // without Weight set. + if dev.Weight == nil { + logrus.Warnf("Ignoring weight device %s as it lacks a weight", key) + continue + } + if deviceNodes == nil { + nodes, err := util.FindDeviceNodes() + if err != nil { + return nil, err + } + deviceNodes = nodes + } + path, ok := deviceNodes[key] + if !ok { + logrus.Warnf("Could not locate weight device %s in system devices", key) + continue + } + weightDev := InspectBlkioWeightDevice{} + weightDev.Path = path + weightDev.Weight = *dev.Weight + hostConfig.BlkioWeightDevice = append(hostConfig.BlkioWeightDevice, weightDev) + } + + handleThrottleDevice := func(devs []spec.LinuxThrottleDevice) ([]InspectBlkioThrottleDevice, error) { + out := []InspectBlkioThrottleDevice{} + for _, dev := range devs { + key := fmt.Sprintf("%d:%d", dev.Major, dev.Minor) + if deviceNodes == nil { + nodes, err := util.FindDeviceNodes() + if err != nil { + return nil, err + } + deviceNodes = nodes + } + path, ok := deviceNodes[key] + if !ok { + logrus.Warnf("Could not locate throttle device %s in system devices", key) + continue + } + throttleDev := InspectBlkioThrottleDevice{} + throttleDev.Path = path + throttleDev.Rate = dev.Rate + out = append(out, throttleDev) + } + return out, nil + } + + readBps, err := handleThrottleDevice(ctrSpec.Linux.Resources.BlockIO.ThrottleReadBpsDevice) + if err != nil { + return nil, err + } + hostConfig.BlkioDeviceReadBps = readBps + + writeBps, err := handleThrottleDevice(ctrSpec.Linux.Resources.BlockIO.ThrottleWriteBpsDevice) + if err != nil { + return nil, err + } + hostConfig.BlkioDeviceWriteBps = writeBps + + readIops, err := handleThrottleDevice(ctrSpec.Linux.Resources.BlockIO.ThrottleReadIOPSDevice) + if err != nil { + return nil, err + } + hostConfig.BlkioDeviceReadIOps = readIops + + writeIops, err := handleThrottleDevice(ctrSpec.Linux.Resources.BlockIO.ThrottleWriteIOPSDevice) + if err != nil { + return nil, err + } + hostConfig.BlkioDeviceWriteIOps = writeIops + } + } + } + + // NanoCPUs. + // This is only calculated if CpuPeriod == 100000. + // It is given in nanoseconds, versus the microseconds used elsewhere - + // so multiply by 10000 (not sure why, but 1000 is off by 10). + if hostConfig.CpuPeriod == 100000 { + hostConfig.NanoCpus = 10000 * hostConfig.CpuQuota + } + + // Bind mounts, formatted as src:dst. + // We'll be appending some options that aren't necessarily in the + // original command line... but no helping that from inside libpod. + binds := []string{} + tmpfs := make(map[string]string) + for _, namedVol := range namedVolumes { + if len(namedVol.Options) > 0 { + binds = append(binds, fmt.Sprintf("%s:%s:%s", namedVol.Name, namedVol.Dest, strings.Join(namedVol.Options, ","))) + } else { + binds = append(binds, fmt.Sprintf("%s:%s", namedVol.Name, namedVol.Dest)) + } + } + for _, mount := range mounts { + if mount.Type == "tmpfs" { + tmpfs[mount.Destination] = strings.Join(mount.Options, ",") + } else { + // TODO - maybe we should parse for empty source/destination + // here. Would be confusing if we print just a bare colon. + if len(mount.Options) > 0 { + binds = append(binds, fmt.Sprintf("%s:%s:%s", mount.Source, mount.Destination, strings.Join(mount.Options, ","))) + } else { + binds = append(binds, fmt.Sprintf("%s:%s", mount.Source, mount.Destination)) + } + } + } + hostConfig.Binds = binds + hostConfig.Tmpfs = tmpfs + + // Network mode parsing. + networkMode := "" + if c.config.CreateNetNS { + networkMode = "default" + } else if c.config.NetNsCtr != "" { + networkMode = fmt.Sprintf("container:%s", c.config.NetNsCtr) + } else { + // Find the spec's network namespace. + // If there is none, it's host networking. + // If there is one and it has a path, it's "ns:". + foundNetNS := false + for _, ns := range ctrSpec.Linux.Namespaces { + if ns.Type == spec.NetworkNamespace { + foundNetNS = true + if ns.Path != "" { + networkMode = fmt.Sprintf("ns:%s", ns.Path) + } else { + networkMode = "none" + } + break + } + } + if !foundNetNS { + networkMode = "host" + } + } + hostConfig.NetworkMode = networkMode + + // Port bindings. + // Only populate if we're using CNI to configure the network. + portBindings := make(map[string][]InspectHostPort) + if c.config.CreateNetNS { + for _, port := range c.config.PortMappings { + key := fmt.Sprintf("%d/%s", port.ContainerPort, port.Protocol) + hostPorts := portBindings[key] + if hostPorts == nil { + hostPorts = []InspectHostPort{} + } + hostPorts = append(hostPorts, InspectHostPort{ + HostIP: port.HostIP, + HostPort: fmt.Sprintf("%d", port.HostPort), + }) + portBindings[key] = hostPorts + } + } + hostConfig.PortBindings = portBindings + + // Cap add and cap drop. + // We need a default set of capabilities to compare against. + // The OCI generate package has one, and is commonly used, so we'll + // use it. + // Problem: there are 5 sets of capabilities. + // Use the bounding set for this computation, it's the most encompassing + // (but still not perfect). + capAdd := []string{} + capDrop := []string{} + // No point in continuing if we got a spec without a Process block... + if ctrSpec.Process != nil { + // Max an O(1) lookup table for default bounding caps. + boundingCaps := make(map[string]bool) + g, err := generate.New("linux") + if err != nil { + return nil, err + } + if !hostConfig.Privileged { + for _, cap := range g.Config.Process.Capabilities.Bounding { + boundingCaps[cap] = true + } + } else { + // If we are privileged, use all caps. + for _, cap := range capability.List() { + if g.HostSpecific && cap > validate.LastCap() { + continue + } + boundingCaps[fmt.Sprintf("CAP_%s", strings.ToUpper(cap.String()))] = true + } + } + // Iterate through spec caps. + // If it's not in default bounding caps, it was added. + // If it is, delete from the default set. Whatever remains after + // we finish are the dropped caps. + for _, cap := range ctrSpec.Process.Capabilities.Bounding { + if _, ok := boundingCaps[cap]; ok { + delete(boundingCaps, cap) + } else { + capAdd = append(capAdd, cap) + } + } + for cap := range boundingCaps { + capDrop = append(capDrop, cap) + } + } + hostConfig.CapAdd = capAdd + hostConfig.CapDrop = capDrop + + // IPC Namespace mode + ipcMode := "" + if c.config.IPCNsCtr != "" { + ipcMode = fmt.Sprintf("container:%s", c.config.IPCNsCtr) + } else { + // Locate the spec's IPC namespace. + // If there is none, it's ipc=host. + // If there is one and it has a path, it's "ns:". + // If no path, it's default - the empty string. + foundIPCNS := false + for _, ns := range ctrSpec.Linux.Namespaces { + if ns.Type == spec.IPCNamespace { + foundIPCNS = true + if ns.Path != "" { + ipcMode = fmt.Sprintf("ns:%s", ns.Path) + } + break + } + } + if !foundIPCNS { + ipcMode = "host" + } + } + hostConfig.IpcMode = ipcMode + + // CGroup parent + // Need to check if it's the default, and not print if so. + defaultCgroupParent := "" + switch c.runtime.config.CgroupManager { + case CgroupfsCgroupsManager: + defaultCgroupParent = CgroupfsDefaultCgroupParent + case SystemdCgroupsManager: + defaultCgroupParent = SystemdDefaultCgroupParent + } + if c.config.CgroupParent != defaultCgroupParent { + hostConfig.CgroupParent = c.config.CgroupParent + } + + // PID namespace mode + pidMode := "" + if c.config.PIDNsCtr != "" { + pidMode = fmt.Sprintf("container:%s", c.config.PIDNsCtr) + } else { + // Locate the spec's PID namespace. + // If there is none, it's pid=host. + // If there is one and it has a path, it's "ns:". + // If there is no path, it's default - the empty string. + foundPIDNS := false + for _, ns := range ctrSpec.Linux.Namespaces { + if ns.Type == spec.PIDNamespace { + foundPIDNS = true + if ns.Path != "" { + pidMode = fmt.Sprintf("ns:%s", ns.Path) + } + break + } + } + if !foundPIDNS { + pidMode = "host" + } + } + hostConfig.PidMode = pidMode + + // UTS namespace mode + utsMode := "" + if c.config.UTSNsCtr != "" { + utsMode = fmt.Sprintf("container:%s", c.config.UTSNsCtr) + } else { + // Locate the spec's UTS namespace. + // If there is none, it's uts=host. + // If there is one and it has a path, it's "ns:". + // If there is no path, it's default - the empty string. + foundUTSNS := false + for _, ns := range ctrSpec.Linux.Namespaces { + if ns.Type == spec.UTSNamespace { + foundUTSNS = true + if ns.Path != "" { + utsMode = fmt.Sprintf("ns:%s", ns.Path) + } + break + } + } + if !foundUTSNS { + utsMode = "host" + } + } + hostConfig.UTSMode = utsMode + + // User namespace mode + usernsMode := "" + if c.config.UserNsCtr != "" { + usernsMode = fmt.Sprintf("container:%s", c.config.UserNsCtr) + } else { + // Locate the spec's user namespace. + // If there is none, it's default - the empty string. + // If there is one, it's "private" if no path, or "ns:" if + // there's a path. + for _, ns := range ctrSpec.Linux.Namespaces { + if ns.Type == spec.UserNamespace { + if ns.Path != "" { + usernsMode = fmt.Sprintf("ns:%s", ns.Path) + } else { + usernsMode = "private" + } + } + } + } + hostConfig.UsernsMode = usernsMode + + // Devices + // Do not include if privileged - assumed that all devices will be + // included. + hostConfig.Devices = []InspectDevice{} + if ctrSpec.Linux != nil && !hostConfig.Privileged { + for _, dev := range ctrSpec.Linux.Devices { + key := fmt.Sprintf("%d:%d", dev.Major, dev.Minor) + if deviceNodes == nil { + nodes, err := util.FindDeviceNodes() + if err != nil { + return nil, err + } + deviceNodes = nodes + } + path, ok := deviceNodes[key] + if !ok { + logrus.Warnf("Could not locate device %s on host", key) + continue + } + newDev := InspectDevice{} + newDev.PathOnHost = path + newDev.PathInContainer = dev.Path + hostConfig.Devices = append(hostConfig.Devices, newDev) + } + } + + // Ulimits + hostConfig.Ulimits = []InspectUlimit{} + if ctrSpec.Process != nil { + for _, limit := range ctrSpec.Process.Rlimits { + newLimit := InspectUlimit{} + newLimit.Name = limit.Type + newLimit.Soft = limit.Soft + newLimit.Hard = limit.Hard + hostConfig.Ulimits = append(hostConfig.Ulimits, newLimit) + } + } + + // Terminal size + // We can't actually get this for now... + // So default to something sane. + // TODO: Populate this. + hostConfig.ConsoleSize = []uint{0, 0} + + return hostConfig, nil +} diff --git a/libpod/oci_linux.go b/libpod/oci_linux.go index 044373ec5..1182457f4 100644 --- a/libpod/oci_linux.go +++ b/libpod/oci_linux.go @@ -11,6 +11,7 @@ import ( "os/exec" "path/filepath" "runtime" + "strconv" "strings" "syscall" "time" @@ -461,8 +462,21 @@ func (r *OCIRuntime) createOCIContainer(ctr *Container, cgroupParent string, res return errors.Wrapf(define.ErrInternal, "container create failed") } ctr.state.PID = ss.si.Pid - if cmd.Process != nil { - ctr.state.ConmonPID = cmd.Process.Pid + // Let's try reading the Conmon pid at the same time. + if ctr.config.ConmonPidFile != "" { + contents, err := ioutil.ReadFile(ctr.config.ConmonPidFile) + if err != nil { + logrus.Warnf("Error reading Conmon pidfile for container %s: %v", ctr.ID(), err) + } else { + // Convert it to an int + conmonPID, err := strconv.Atoi(string(contents)) + if err != nil { + logrus.Warnf("Error decoding Conmon PID %q for container %s: %v", string(contents), ctr.ID(), err) + } else { + ctr.state.ConmonPID = conmonPID + logrus.Infof("Got Conmon PID as %d", conmonPID) + } + } } case <-time.After(ContainerCreateTimeout): return errors.Wrapf(define.ErrInternal, "container creation timeout") diff --git a/pkg/adapter/pods.go b/pkg/adapter/pods.go index b45b02d09..2ca4f228f 100644 --- a/pkg/adapter/pods.go +++ b/pkg/adapter/pods.go @@ -676,7 +676,7 @@ func kubeContainerToCreateConfig(ctx context.Context, containerYAML v1.Container if imageData != nil && imageData.Config != nil { containerConfig.Command = append(containerConfig.Command, imageData.Config.Entrypoint...) } - if len(containerConfig.Command) != 0 { + if len(containerYAML.Command) != 0 { containerConfig.Command = append(containerConfig.Command, containerYAML.Command...) } else if imageData != nil && imageData.Config != nil { containerConfig.Command = append(containerConfig.Command, imageData.Config.Cmd...) diff --git a/pkg/apparmor/apparmor_linux.go b/pkg/apparmor/apparmor_linux.go index 0d01f41e9..479600408 100644 --- a/pkg/apparmor/apparmor_linux.go +++ b/pkg/apparmor/apparmor_linux.go @@ -4,6 +4,7 @@ package apparmor import ( "bufio" + "bytes" "fmt" "io" "os" @@ -104,6 +105,18 @@ func InstallDefault(name string) error { return cmd.Wait() } +// DefaultContent returns the default profile content as byte slice. The +// profile is named as the provided `name`. The function errors if the profile +// generation fails. +func DefaultContent(name string) ([]byte, error) { + p := profileData{Name: name} + var bytes bytes.Buffer + if err := p.generateDefault(&bytes); err != nil { + return nil, err + } + return bytes.Bytes(), nil +} + // IsLoaded checks if a profile with the given name has been loaded into the // kernel. func IsLoaded(name string) (bool, error) { diff --git a/pkg/apparmor/apparmor_linux_test.go b/pkg/apparmor/apparmor_linux_test.go index ac3260723..e94293d87 100644 --- a/pkg/apparmor/apparmor_linux_test.go +++ b/pkg/apparmor/apparmor_linux_test.go @@ -78,10 +78,12 @@ Copyright 2009-2012 Canonical Ltd. } } -func TestInstallDefault(t *testing.T) { - profile := "libpod-default-testing" - aapath := "/sys/kernel/security/apparmor/" +const ( + aapath = "/sys/kernel/security/apparmor/" + profile = "libpod-default-testing" +) +func TestInstallDefault(t *testing.T) { if _, err := os.Stat(aapath); err != nil { t.Skip("AppArmor isn't available in this environment") } @@ -127,3 +129,12 @@ func TestInstallDefault(t *testing.T) { } checkLoaded(false) } + +func TestDefaultContent(t *testing.T) { + if _, err := os.Stat(aapath); err != nil { + t.Skip("AppArmor isn't available in this environment") + } + if err := DefaultContent(profile); err != nil { + t.Fatalf("Couldn't retrieve default AppArmor profile content '%s': %v", profile, err) + } +} diff --git a/pkg/apparmor/apparmor_unsupported.go b/pkg/apparmor/apparmor_unsupported.go index b2b4de5f5..13469f1b6 100644 --- a/pkg/apparmor/apparmor_unsupported.go +++ b/pkg/apparmor/apparmor_unsupported.go @@ -24,3 +24,8 @@ func CheckProfileAndLoadDefault(name string) (string, error) { } return "", ErrApparmorUnsupported } + +// DefaultContent dummy. +func DefaultContent(name string) ([]byte, error) { + return nil, nil +} diff --git a/pkg/namespaces/namespaces.go b/pkg/namespaces/namespaces.go index ec9276344..7ed95bd0f 100644 --- a/pkg/namespaces/namespaces.go +++ b/pkg/namespaces/namespaces.go @@ -4,6 +4,63 @@ import ( "strings" ) +// CgroupMode represents cgroup mode in the container. +type CgroupMode string + +// IsHost indicates whether the container uses the host's cgroup. +func (n CgroupMode) IsHost() bool { + return n == "host" +} + +// IsNS indicates a cgroup namespace passed in by path (ns:<path>) +func (n CgroupMode) IsNS() bool { + return strings.HasPrefix(string(n), "ns:") +} + +// NS gets the path associated with a ns:<path> cgroup ns +func (n CgroupMode) NS() string { + parts := strings.SplitN(string(n), ":", 2) + if len(parts) > 1 { + return parts[1] + } + return "" +} + +// IsContainer indicates whether the container uses a new cgroup namespace. +func (n CgroupMode) IsContainer() bool { + parts := strings.SplitN(string(n), ":", 2) + return len(parts) > 1 && parts[0] == "container" +} + +// Container returns the name of the container whose cgroup namespace is going to be used. +func (n CgroupMode) Container() string { + parts := strings.SplitN(string(n), ":", 2) + if len(parts) > 1 { + return parts[1] + } + return "" +} + +// IsPrivate indicates whether the container uses the a private cgroup. +func (n CgroupMode) IsPrivate() bool { + return n == "private" +} + +// Valid indicates whether the Cgroup namespace is valid. +func (n CgroupMode) Valid() bool { + parts := strings.Split(string(n), ":") + switch mode := parts[0]; mode { + case "", "host", "private", "ns": + case "container": + if len(parts) != 2 || parts[1] == "" { + return false + } + default: + return false + } + return true +} + // UsernsMode represents userns mode in the container. type UsernsMode string diff --git a/pkg/spec/createconfig.go b/pkg/spec/createconfig.go index 0042ed401..1fb1f829b 100644 --- a/pkg/spec/createconfig.go +++ b/pkg/spec/createconfig.go @@ -63,6 +63,7 @@ type CreateConfig struct { CapDrop []string // cap-drop CidFile string ConmonPidFile string + Cgroupns string CgroupParent string // cgroup-parent Command []string Detach bool // detach @@ -101,6 +102,7 @@ type CreateConfig struct { NetworkAlias []string //network-alias PidMode namespaces.PidMode //pid Pod string //pod + CgroupMode namespaces.CgroupMode //cgroup PortBindings nat.PortMap Privileged bool //privileged Publish []string //publish @@ -268,6 +270,23 @@ func (c *CreateConfig) getContainerCreateOptions(runtime *libpod.Runtime, pod *l options = append(options, libpod.WithNetNS(portBindings, postConfigureNetNS, string(c.NetMode), networks)) } + if c.CgroupMode.IsNS() { + ns := c.CgroupMode.NS() + if ns == "" { + return nil, errors.Errorf("invalid empty user-defined network namespace") + } + _, err := os.Stat(ns) + if err != nil { + return nil, err + } + } else if c.CgroupMode.IsContainer() { + connectedCtr, err := runtime.LookupContainer(c.CgroupMode.Container()) + if err != nil { + return nil, errors.Wrapf(err, "container %q not found", c.CgroupMode.Container()) + } + options = append(options, libpod.WithCgroupNSFrom(connectedCtr)) + } + if c.PidMode.IsContainer() { connectedCtr, err := runtime.LookupContainer(c.PidMode.Container()) if err != nil { diff --git a/pkg/spec/spec.go b/pkg/spec/spec.go index 6d8d399f4..a8ab4911a 100644 --- a/pkg/spec/spec.go +++ b/pkg/spec/spec.go @@ -325,6 +325,10 @@ func (config *CreateConfig) createConfigToOCISpec(runtime *libpod.Runtime, userM if err := addIpcNS(config, &g); err != nil { return nil, err } + + if err := addCgroupNS(config, &g); err != nil { + return nil, err + } configSpec := g.Config // HANDLE CAPABILITIES @@ -418,6 +422,62 @@ func (config *CreateConfig) createConfigToOCISpec(runtime *libpod.Runtime, userM } } + // Add annotations + if configSpec.Annotations == nil { + configSpec.Annotations = make(map[string]string) + } + + if config.CidFile != "" { + configSpec.Annotations[libpod.InspectAnnotationCIDFile] = config.CidFile + } + + if config.Rm { + configSpec.Annotations[libpod.InspectAnnotationAutoremove] = libpod.InspectResponseTrue + } else { + configSpec.Annotations[libpod.InspectAnnotationAutoremove] = libpod.InspectResponseFalse + } + + if len(config.VolumesFrom) > 0 { + configSpec.Annotations[libpod.InspectAnnotationVolumesFrom] = strings.Join(config.VolumesFrom, ",") + } + + if config.Privileged { + configSpec.Annotations[libpod.InspectAnnotationPrivileged] = libpod.InspectResponseTrue + } else { + configSpec.Annotations[libpod.InspectAnnotationPrivileged] = libpod.InspectResponseFalse + } + + if config.PublishAll { + configSpec.Annotations[libpod.InspectAnnotationPublishAll] = libpod.InspectResponseTrue + } else { + configSpec.Annotations[libpod.InspectAnnotationPublishAll] = libpod.InspectResponseFalse + } + + if config.Init { + configSpec.Annotations[libpod.InspectAnnotationInit] = libpod.InspectResponseTrue + } else { + configSpec.Annotations[libpod.InspectAnnotationInit] = libpod.InspectResponseFalse + } + + for _, opt := range config.SecurityOpts { + // Split on both : and = + splitOpt := strings.Split(opt, "=") + if len(splitOpt) == 1 { + splitOpt = strings.Split(opt, ":") + } + if len(splitOpt) < 2 { + continue + } + switch splitOpt[0] { + case "label": + configSpec.Annotations[libpod.InspectAnnotationLabel] = splitOpt[1] + case "seccomp": + configSpec.Annotations[libpod.InspectAnnotationSeccomp] = splitOpt[1] + case "apparmor": + configSpec.Annotations[libpod.InspectAnnotationApparmor] = splitOpt[1] + } + } + return configSpec, nil } @@ -566,6 +626,23 @@ func addIpcNS(config *CreateConfig, g *generate.Generator) error { return nil } +func addCgroupNS(config *CreateConfig, g *generate.Generator) error { + cgroupMode := config.CgroupMode + if cgroupMode.IsNS() { + return g.AddOrReplaceLinuxNamespace(string(spec.CgroupNamespace), NS(string(cgroupMode))) + } + if cgroupMode.IsHost() { + return g.RemoveLinuxNamespace(spec.CgroupNamespace) + } + if cgroupMode.IsPrivate() { + return g.AddOrReplaceLinuxNamespace(spec.CgroupNamespace, "") + } + if cgroupMode.IsContainer() { + logrus.Debug("Using container cgroup mode") + } + return nil +} + func addRlimits(config *CreateConfig, g *generate.Generator) error { var ( kernelMax uint64 = 1048576 diff --git a/pkg/util/utils_linux.go b/pkg/util/utils_linux.go index 47fa1031f..318bd2b1b 100644 --- a/pkg/util/utils_linux.go +++ b/pkg/util/utils_linux.go @@ -1,7 +1,14 @@ package util import ( + "fmt" + "os" + "path/filepath" + "syscall" + "github.com/containers/psgo" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" ) // GetContainerPidInformationDescriptors returns a string slice of all supported @@ -9,3 +16,39 @@ import ( func GetContainerPidInformationDescriptors() ([]string, error) { return psgo.ListDescriptors(), nil } + +// FindDeviceNodes parses /dev/ into a set of major:minor -> path, where +// [major:minor] is the device's major and minor numbers formatted as, for +// example, 2:0 and path is the path to the device node. +// Symlinks to nodes are ignored. +func FindDeviceNodes() (map[string]string, error) { + nodes := make(map[string]string) + err := filepath.Walk("/dev", func(path string, info os.FileInfo, err error) error { + if err != nil { + logrus.Warnf("Error descending into path %s: %v", path, err) + return filepath.SkipDir + } + + // If we aren't a device node, do nothing. + if info.Mode()&(os.ModeDevice|os.ModeCharDevice) == 0 { + return nil + } + + // We are a device node. Get major/minor. + sysstat, ok := info.Sys().(*syscall.Stat_t) + if !ok { + return errors.Errorf("Could not convert stat output for use") + } + major := uint64(sysstat.Rdev / 256) + minor := uint64(sysstat.Rdev % 256) + + nodes[fmt.Sprintf("%d:%d", major, minor)] = path + + return nil + }) + if err != nil { + return nil, err + } + + return nodes, nil +} diff --git a/pkg/util/utils_unsupported.go b/pkg/util/utils_unsupported.go new file mode 100644 index 000000000..62805d7c8 --- /dev/null +++ b/pkg/util/utils_unsupported.go @@ -0,0 +1,12 @@ +// +build darwin windows + +package util + +import ( + "github.com/pkg/errors" +) + +// FindDeviceNodes is not implemented anywhere except Linux. +func FindDeviceNodes() (map[string]string, error) { + return nil, errors.Errorf("not supported on non-Linux OSes") +} diff --git a/pkg/varlinkapi/containers.go b/pkg/varlinkapi/containers.go index 83743351c..6f6909fac 100644 --- a/pkg/varlinkapi/containers.go +++ b/pkg/varlinkapi/containers.go @@ -19,7 +19,6 @@ import ( "github.com/containers/libpod/libpod/define" "github.com/containers/libpod/libpod/logs" "github.com/containers/libpod/pkg/adapter/shortcuts" - cc "github.com/containers/libpod/pkg/spec" "github.com/containers/storage/pkg/archive" "github.com/pkg/errors" ) @@ -172,16 +171,7 @@ func (i *LibpodAPI) InspectContainer(call iopodman.VarlinkCall, name string) err if err != nil { return call.ReplyContainerNotFound(name, err.Error()) } - inspectInfo, err := ctr.Inspect(true) - if err != nil { - return call.ReplyErrorOccurred(err.Error()) - } - artifact, err := getArtifact(ctr) - if err != nil { - return call.ReplyErrorOccurred(err.Error()) - } - - data, err := shared.GetCtrInspectInfo(ctr.Config(), inspectInfo, artifact) + data, err := ctr.Inspect(true) if err != nil { return call.ReplyErrorOccurred(err.Error()) } @@ -589,18 +579,6 @@ func (i *LibpodAPI) ContainerRestore(call iopodman.VarlinkCall, name string, kee return call.ReplyContainerRestore(ctr.ID()) } -func getArtifact(ctr *libpod.Container) (*cc.CreateConfig, error) { - var createArtifact cc.CreateConfig - artifact, err := ctr.GetArtifact("create-config") - if err != nil { - return nil, err - } - if err := json.Unmarshal(artifact, &createArtifact); err != nil { - return nil, err - } - return &createArtifact, nil -} - // ContainerConfig returns just the container.config struct func (i *LibpodAPI) ContainerConfig(call iopodman.VarlinkCall, name string) error { ctr, err := i.Runtime.LookupContainer(name) diff --git a/test/e2e/common_test.go b/test/e2e/common_test.go index abaf2cccf..ef1c85518 100644 --- a/test/e2e/common_test.go +++ b/test/e2e/common_test.go @@ -12,7 +12,6 @@ import ( "testing" "time" - "github.com/containers/libpod/cmd/podman/shared" "github.com/containers/libpod/libpod" "github.com/containers/libpod/pkg/inspect" "github.com/containers/libpod/pkg/rootless" @@ -322,7 +321,7 @@ func (s *PodmanSessionIntegration) InspectImageJSON() []inspect.ImageData { } // InspectContainer returns a container's inspect data in JSON format -func (p *PodmanTestIntegration) InspectContainer(name string) []shared.InspectContainer { +func (p *PodmanTestIntegration) InspectContainer(name string) []libpod.InspectContainerData { cmd := []string{"inspect", name} session := p.Podman(cmd) session.WaitWithDefaultTimeout() @@ -481,8 +480,8 @@ func (p *PodmanTestIntegration) PullImage(image string) error { // InspectContainerToJSON takes the session output of an inspect // container and returns json -func (s *PodmanSessionIntegration) InspectContainerToJSON() []shared.InspectContainer { - var i []shared.InspectContainer +func (s *PodmanSessionIntegration) InspectContainerToJSON() []libpod.InspectContainerData { + var i []libpod.InspectContainerData err := json.Unmarshal(s.Out.Contents(), &i) Expect(err).To(BeNil()) return i diff --git a/test/e2e/play_kube_test.go b/test/e2e/play_kube_test.go new file mode 100644 index 000000000..a6f59a3da --- /dev/null +++ b/test/e2e/play_kube_test.go @@ -0,0 +1,123 @@ +// +build !remoteclient + +package integration + +import ( + "os" + "path/filepath" + "text/template" + + . "github.com/containers/libpod/test/utils" + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" +) + +var yamlTemplate = ` +apiVersion: v1 +kind: Pod +metadata: + creationTimestamp: "2019-07-17T14:44:08Z" + labels: + app: {{ .Name }} + name: {{ .Name }} +spec: + containers: +{{ with .Containers }} + {{ range . }} + - command: + - {{ .Cmd }} + env: + - name: PATH + value: /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin + - name: TERM + value: xterm + - name: HOSTNAME + - name: container + value: podman + image: {{ .Image }} + name: {{ .Name }} + resources: {} + securityContext: + allowPrivilegeEscalation: true + capabilities: {} + privileged: false + readOnlyRootFilesystem: false + workingDir: / + {{ end }} +{{ end }} +status: {} +` + +type Pod struct { + Name string + Containers []Container +} + +type Container struct { + Cmd string + Image string + Name string +} + +func generateKubeYaml(ctrs []Container, fileName string) error { + f, err := os.Create(fileName) + if err != nil { + return err + } + defer f.Close() + testPod := Pod{"test", ctrs} + + t, err := template.New("pod").Parse(yamlTemplate) + if err != nil { + return err + } + + if err := t.Execute(f, testPod); err != nil { + return err + } + + return nil +} + +var _ = Describe("Podman generate kube", func() { + var ( + tempdir string + err error + podmanTest *PodmanTestIntegration + ) + + BeforeEach(func() { + tempdir, err = CreateTempDirInTempDir() + if err != nil { + os.Exit(1) + } + podmanTest = PodmanTestCreate(tempdir) + podmanTest.Setup() + podmanTest.SeedImages() + }) + + AfterEach(func() { + podmanTest.Cleanup() + f := CurrentGinkgoTestDescription() + processTestResult(f) + }) + + It("podman play kube test correct command", func() { + ctrName := "testCtr" + ctrCmd := "top" + testContainer := Container{ctrCmd, ALPINE, ctrName} + tempFile := filepath.Join(podmanTest.TempDir, "kube.yaml") + + err := generateKubeYaml([]Container{testContainer}, tempFile) + Expect(err).To(BeNil()) + + kube := podmanTest.Podman([]string{"play", "kube", tempFile}) + kube.WaitWithDefaultTimeout() + Expect(kube.ExitCode()).To(Equal(0)) + + inspect := podmanTest.Podman([]string{"inspect", ctrName}) + inspect.WaitWithDefaultTimeout() + Expect(inspect.ExitCode()).To(Equal(0)) + Expect(inspect.OutputToString()).To(ContainSubstring(ctrCmd)) + }) +}) diff --git a/test/e2e/run_ns_test.go b/test/e2e/run_ns_test.go index 6ba0d1aba..e3e86fc66 100644 --- a/test/e2e/run_ns_test.go +++ b/test/e2e/run_ns_test.go @@ -51,6 +51,15 @@ var _ = Describe("Podman run ns", func() { Expect(session.ExitCode()).To(Not(Equal(0))) }) + It("podman run --cgroup private test", func() { + session := podmanTest.Podman([]string{"run", "--cgroupns=private", fedoraMinimal, "cat", "/proc/self/cgroup"}) + session.WaitWithDefaultTimeout() + Expect(session.ExitCode()).To(Equal(0)) + + output := session.OutputToString() + Expect(output).ToNot(ContainSubstring("slice")) + }) + It("podman run ipcns test", func() { setup := SystemExec("ls", []string{"--inode", "-d", "/dev/shm"}) Expect(setup.ExitCode()).To(Equal(0)) diff --git a/vendor/modules.txt b/vendor/modules.txt index ad2f69976..462ba1408 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -355,8 +355,8 @@ github.com/opencontainers/runc/libcontainer/system github.com/opencontainers/runtime-spec/specs-go # github.com/opencontainers/runtime-tools v0.9.0 github.com/opencontainers/runtime-tools/generate -github.com/opencontainers/runtime-tools/generate/seccomp github.com/opencontainers/runtime-tools/validate +github.com/opencontainers/runtime-tools/generate/seccomp github.com/opencontainers/runtime-tools/filepath github.com/opencontainers/runtime-tools/specerror github.com/opencontainers/runtime-tools/error |