diff options
46 files changed, 552 insertions, 247 deletions
@@ -1732,7 +1732,7 @@ uptime [string](https://godoc.org/builtin#string) eventlogger [string](https://godoc.org/builtin#string) ### <a name="InfoPodmanBinary"></a>type InfoPodmanBinary -InfoPodman provides details on the podman binary +InfoPodman provides details on the Podman binary compiler [string](https://godoc.org/builtin#string) @@ -136,8 +136,8 @@ help: .gopathok: ifeq ("$(wildcard $(GOPKGDIR))","") mkdir -p "$(GOPKGBASEDIR)" - ln -sfnT "$(CURDIR)" "$(GOPKGDIR)" - ln -sfnT "$(CURDIR)/vendor/github.com/varlink" "$(FIRST_GOPATH)/src/github.com/varlink" + ln -sfn "$(CURDIR)" "$(GOPKGDIR)" + ln -sfn "$(CURDIR)/vendor/github.com/varlink" "$(FIRST_GOPATH)/src/github.com/varlink" endif touch $@ diff --git a/cmd/podman/README.md b/cmd/podman/README.md index 0fee7eafa..937eef510 100644 --- a/cmd/podman/README.md +++ b/cmd/podman/README.md @@ -1,5 +1,5 @@ -# podman - Simple debugging tool for pods and images -podman is a daemonless container runtime for managing containers, pods, and container images. +# Podman - Simple debugging tool for pods and images +Podman is a daemonless container runtime for managing containers, pods, and container images. It is intended as a counterpart to CRI-O, to provide low-level debugging not available through the CRI interface used by Kubernetes. It can also act as a container runtime independent of CRI-O, creating and managing its own set of containers. diff --git a/cmd/podman/common.go b/cmd/podman/common.go index 9724d18c6..0115e6ef1 100644 --- a/cmd/podman/common.go +++ b/cmd/podman/common.go @@ -135,6 +135,10 @@ func getCreateFlags(c *cliconfig.PodmanCommand) { "cgroup namespace to use", ) createFlags.String( + "cgroups", "enabled", + "control container cgroup configuration", + ) + createFlags.String( "cgroup-parent", "", "Optional parent cgroup for the container", ) diff --git a/cmd/podman/shared/create.go b/cmd/podman/shared/create.go index acbd53dba..fc8197721 100644 --- a/cmd/podman/shared/create.go +++ b/cmd/podman/shared/create.go @@ -695,6 +695,7 @@ func ParseCreateOpts(ctx context.Context, c *GenericCLIResults, runtime *libpod. CapDrop: c.StringSlice("cap-drop"), CidFile: c.String("cidfile"), Cgroupns: c.String("cgroupns"), + Cgroups: c.String("cgroups"), CgroupParent: c.String("cgroup-parent"), Command: command, UserCommand: userCommand, diff --git a/cmd/podman/shared/intermediate.go b/cmd/podman/shared/intermediate.go index 5aaac8687..cccdd1bea 100644 --- a/cmd/podman/shared/intermediate.go +++ b/cmd/podman/shared/intermediate.go @@ -370,6 +370,8 @@ func NewIntermediateLayer(c *cliconfig.PodmanCommand, remote bool) GenericCLIRes m["blkio-weight-device"] = newCRStringSlice(c, "blkio-weight-device") m["cap-add"] = newCRStringSlice(c, "cap-add") m["cap-drop"] = newCRStringSlice(c, "cap-drop") + m["cgroupns"] = newCRString(c, "cgroupns") + m["cgroups"] = newCRString(c, "cgroups") m["cgroup-parent"] = newCRString(c, "cgroup-parent") m["cidfile"] = newCRString(c, "cidfile") m["conmon-pidfile"] = newCRString(c, "conmon-pidfile") diff --git a/cmd/podman/varlink/io.podman.varlink b/cmd/podman/varlink/io.podman.varlink index 2e46b31ce..4692525e3 100644 --- a/cmd/podman/varlink/io.podman.varlink +++ b/cmd/podman/varlink/io.podman.varlink @@ -249,7 +249,7 @@ type InfoStore ( run_root: string ) -# InfoPodman provides details on the podman binary +# InfoPodman provides details on the Podman binary type InfoPodmanBinary ( compiler: string, go_version: string, diff --git a/contrib/cirrus/README.md b/contrib/cirrus/README.md index ada362d95..7aa8881d6 100644 --- a/contrib/cirrus/README.md +++ b/contrib/cirrus/README.md @@ -72,7 +72,7 @@ and `darwin` targets. ### ``special_testing_cgroupv2`` Task Use the latest Fedora release with the required kernel options pre-set for -exercising cgroups v2 with podman integration tests. Also depends on +exercising cgroups v2 with Podman integration tests. Also depends on having `SPECIALMODE` set to 'cgroupv2` @@ -272,7 +272,7 @@ values follows: * `rootless`: Causes a random, ordinary user account to be created and utilized for testing. * `in_podman`: Causes testing to occur within a container executed by - podman on the host. + Podman on the host. * `cgroupv2`: The kernel on this VM was prepared with options to enable v2 cgroups * `windows`: See **darwin** * `darwin`: Signals the ``special_testing_cross`` task to cross-compile the remote client. diff --git a/contrib/cirrus/container_test.sh b/contrib/cirrus/container_test.sh index 27baf0ad7..9d3f09f60 100644 --- a/contrib/cirrus/container_test.sh +++ b/contrib/cirrus/container_test.sh @@ -126,6 +126,7 @@ if [ $install -eq 1 ]; then make TAGS="${TAGS}" install.bin PREFIX=/usr ETCDIR=/etc make TAGS="${TAGS}" install.man PREFIX=/usr ETCDIR=/etc make TAGS="${TAGS}" install.cni PREFIX=/usr ETCDIR=/etc + make TAGS="${TAGS}" install.config PREFIX=/usr ETCDIR=/etc make TAGS="${TAGS}" install.systemd PREFIX=/usr ETCDIR=/etc fi diff --git a/contrib/cirrus/integration_test.sh b/contrib/cirrus/integration_test.sh index 552f2ba73..00c3b0ec3 100755 --- a/contrib/cirrus/integration_test.sh +++ b/contrib/cirrus/integration_test.sh @@ -45,6 +45,7 @@ case "$SPECIALMODE" in export OCI_RUNTIME=/usr/bin/crun make make install PREFIX=/usr ETCDIR=/etc + make install.config PREFIX=/usr make test-binaries make local${TESTSUITE} ;; @@ -57,6 +58,7 @@ case "$SPECIALMODE" in none) make make install PREFIX=/usr ETCDIR=/etc + make install.config PREFIX=/usr make test-binaries if [[ "$TEST_REMOTE_CLIENT" == "true" ]] then diff --git a/contrib/podmanimage/README.md b/contrib/podmanimage/README.md index 3dc07ad63..ab55f3189 100644 --- a/contrib/podmanimage/README.md +++ b/contrib/podmanimage/README.md @@ -5,7 +5,7 @@ ## Overview This directory contains the Dockerfiles necessary to create the three podmanimage container -images that are housed on quay.io under the podman account. All three repositories where +images that are housed on quay.io under the Podman account. All three repositories where the images live are public and can be pulled without credentials. These container images are secured and the resulting containers can run safely with privileges within the container. The container images are built using the latest Fedora and then Podman is installed into them: diff --git a/contrib/podmanimage/stable/Dockerfile b/contrib/podmanimage/stable/Dockerfile index 056f62624..6b4eb2220 100644 --- a/contrib/podmanimage/stable/Dockerfile +++ b/contrib/podmanimage/stable/Dockerfile @@ -18,7 +18,7 @@ RUN sed -i -e 's|^#mount_program|mount_program|g' -e '/additionalimage.*/a "/var RUN mkdir -p /var/lib/shared/overlay-images /var/lib/shared/overlay-layers; touch /var/lib/shared/overlay-images/images.lock; touch /var/lib/shared/overlay-layers/layers.lock # Adjust libpod.conf to write logging to a file -RUN sed -i 's/events_logger = "journald"/events_logger = "file"/g' /usr/share/containers/libpod.conf +RUN sed -i 's/# events_logger = "journald"/events_logger = "file"/g' /usr/share/containers/libpod.conf # Set up environment variables to note that this is # not starting with usernamespace and default to diff --git a/contrib/podmanimage/testing/Dockerfile b/contrib/podmanimage/testing/Dockerfile index 50d8ed7f2..4dffc8911 100644 --- a/contrib/podmanimage/testing/Dockerfile +++ b/contrib/podmanimage/testing/Dockerfile @@ -20,7 +20,7 @@ RUN sed -i -e 's|^#mount_program|mount_program|g' -e '/additionalimage.*/a "/var RUN mkdir -p /var/lib/shared/overlay-images /var/lib/shared/overlay-layers; touch /var/lib/shared/overlay-images/images.lock; touch /var/lib/shared/overlay-layers/layers.lock # Adjust libpod.conf to write logging to a file -RUN sed -i 's/events_logger = "journald"/events_logger = "file"/g' /usr/share/containers/libpod.conf +RUN sed -i 's/# events_logger = "journald"/events_logger = "file"/g' /usr/share/containers/libpod.conf # Set up environment variables to note that this is # not starting with usernamespace and default to diff --git a/contrib/podmanimage/upstream/Dockerfile b/contrib/podmanimage/upstream/Dockerfile index 3583e1c54..82b88b50b 100644 --- a/contrib/podmanimage/upstream/Dockerfile +++ b/contrib/podmanimage/upstream/Dockerfile @@ -62,7 +62,7 @@ RUN dnf -y install --exclude container-selinux \ mkdir -p /usr/share/containers; \ cp $GOPATH/src/github.com/containers/libpod/libpod.conf /usr/share/containers; \ # Adjust libpod.conf to write logging to a file - sed -i 's/events_logger = "journald"/events_logger = "file"/g' /usr/share/containers/libpod.conf; \ + sed -i 's/# events_logger = "journald"/events_logger = "file"/g' /usr/share/containers/libpod.conf; \ rm -rf /root/podman/*; \ dnf -y remove git golang go-md2man make; \ dnf clean all; diff --git a/dependencies/analyses/README.md b/dependencies/analyses/README.md index a440a0ebd..67dab6f75 100644 --- a/dependencies/analyses/README.md +++ b/dependencies/analyses/README.md @@ -13,7 +13,7 @@ The analysis script will then read and parse the build data and print a sorted t Running such an analysis on libpod may look as follows: ``` -# 1) Build the podman binary with `-work -a`. +# 1) Build the Podman binary with `-work -a`. [libpod]$ BUILDFLAGS="-work -a" make podman [...] WORK=/tmp/go-build794287815 diff --git a/docs/podman-build.1.md b/docs/podman-build.1.md index 20f4d6aab..1a04f8224 100644 --- a/docs/podman-build.1.md +++ b/docs/podman-build.1.md @@ -521,8 +521,8 @@ process. **--volume**, **-v**[=*[HOST-DIR:CONTAINER-DIR[:OPTIONS]]*] - Create a bind mount. If you specify, ` -v /HOST-DIR:/CONTAINER-DIR`, podman - bind mounts `/HOST-DIR` in the host to `/CONTAINER-DIR` in the podman + Create a bind mount. If you specify, ` -v /HOST-DIR:/CONTAINER-DIR`, Podman + bind mounts `/HOST-DIR` in the host to `/CONTAINER-DIR` in the Podman container. The `OPTIONS` are a comma delimited list and can be: * [rw|ro] @@ -547,14 +547,14 @@ See examples. Labeling systems like SELinux require that proper labels are placed on volume content mounted into a container. Without a label, the security system might prevent the processes running inside the container from using the content. By -default, podman does not change the labels set by the OS. +default, Podman does not change the labels set by the OS. To change a label in the container context, you can add either of two suffixes -`:z` or `:Z` to the volume mount. These suffixes tell podman to relabel file -objects on the shared volumes. The `z` option tells podman that two containers -share the volume content. As a result, podman labels the content with a shared +`:z` or `:Z` to the volume mount. These suffixes tell Podman to relabel file +objects on the shared volumes. The `z` option tells Podman that two containers +share the volume content. As a result, Podman labels the content with a shared content label. Shared volume labels allow all containers to read/write content. -The `Z` option tells podman to label the content with a private unshared label. +The `Z` option tells Podman to label the content with a private unshared label. Only the current container can use a private volume. `Overlay Volume Mounts` diff --git a/docs/podman-commit.1.md b/docs/podman-commit.1.md index 5b0ba48aa..07a885ae2 100644 --- a/docs/podman-commit.1.md +++ b/docs/podman-commit.1.md @@ -15,7 +15,7 @@ configured with the `--change` flag and a commit message can be set using the `--message` flag. The container and its processes are paused while the image is committed. This minimizes the likelihood of data corruption when creating the new image. If this is not desired, the `--pause` flag can be set to false. When the commit -is complete, podman will print out the ID of the new image. +is complete, Podman will print out the ID of the new image. If *image* does not begin with a registry name component, `localhost` will be added to the name. diff --git a/docs/podman-create.1.md b/docs/podman-create.1.md index 9924e7dff..996ef3863 100644 --- a/docs/podman-create.1.md +++ b/docs/podman-create.1.md @@ -73,6 +73,12 @@ Set the cgroup namespace mode for the container, by default **host** is used. **private**: create a new cgroup namespace. **ns:<PATH>**: join the namespace at the specified path. +**--cgroups**=*mode* + +Determines whether the container will create CGroups. +Valid values are *enabled* and *disabled*, which the default being *enabled*. +The *disabled* option will force the container to not create CGroups, and thus conflicts with CGroup options (**--cgroupns** and **--cgroup-parent**). + **--cgroup-parent**=*path* Path to cgroups under which the cgroup for the container will be created. If the path is not absolute, the path is considered to be relative to the cgroups path of the init process. Cgroups will be created if they do not already exist. @@ -322,7 +328,7 @@ Print usage statement **--http-proxy**=*true|false* By default proxy environment variables are passed into the container if set -for the podman process. This can be disabled by setting the `--http-proxy` +for the Podman process. This can be disabled by setting the `--http-proxy` option to `false`. The environment variables passed in include `http_proxy`, `https_proxy`, `ftp_proxy`, `no_proxy`, and also the upper case versions of those. This option is only needed when the host system must use a proxy but @@ -341,7 +347,7 @@ Defaults to `true` **--image-volume**, **builtin-volume**=*bind|tmpfs|ignore* -Tells podman how to handle the builtin image volumes. The options are: 'bind', 'tmpfs', or 'ignore' (default 'bind'). +Tells Podman how to handle the builtin image volumes. The options are: 'bind', 'tmpfs', or 'ignore' (default 'bind'). bind: A directory is created inside the container state directory and bind mounted into the container for the volumes. tmpfs: The volume is mounted onto the container as a tmpfs, which allows the users to create @@ -505,7 +511,7 @@ Set the Network mode for the container. Invalid if using **--dns**, **--dns-opti 'bridge': create a network stack on the default bridge 'none': no networking 'container:<name|id>': reuse another container's network stack - 'host': use the podman host network stack. Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure. + 'host': use the Podman host network stack. Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure. '<network-name>|<network-id>': connect to a user-defined network 'ns:<path>': path to a network namespace to join 'slirp4netns': use slirp4netns to create a user network stack. This is the default for rootless containers @@ -543,19 +549,19 @@ Tune the container's pids limit. Set `-1` to have unlimited pids for the contain **--pod**=*name* -Run container in an existing pod. If you want podman to make the pod for you, preference the pod name with `new:`. +Run container in an existing pod. If you want Podman to make the pod for you, preference the pod name with `new:`. To make a pod with more granular options, use the `podman pod create` command before creating a container. **--privileged**=*true|false* Give extended privileges to this container. The default is *false*. -By default, podman containers are +By default, Podman containers are “unprivileged” (=false) and cannot, for example, modify parts of the kernel. This is because by default a container is not allowed to access any devices. A “privileged” container is given access to all devices. -When the operator executes a privileged container, podman enables access +When the operator executes a privileged container, Podman enables access to all devices on the host, turns off graphdriver mount options, as well as turning off most of the security measures protecting the host from the container. @@ -577,9 +583,9 @@ Use `podman port` to see the actual mapping: `podman port CONTAINER $CONTAINERPO Publish all exposed ports to random ports on the host interfaces. The default is *false*. When set to true publish all exposed ports to the host interfaces. The -default is false. If the operator uses -P (or -p) then podman will make the +default is false. If the operator uses -P (or -p) then Podman will make the exposed port accessible on the host and the ports will be available to any -client that can reach the host. When using -P, podman will bind any exposed +client that can reach the host. When using -P, Podman will bind any exposed port to a random port on the host within an *ephemeral port range* defined by `/proc/sys/net/ipv4/ip_local_port_range`. To find the mapping between the host ports and the exposed ports, use `podman port`. @@ -733,11 +739,11 @@ any options, the systems uses the following options: Allocate a pseudo-TTY. The default is *false*. -When set to true podman will allocate a pseudo-tty and attach to the standard +When set to true Podman will allocate a pseudo-tty and attach to the standard input of the container. This can be used, for example, to run a throwaway interactive shell. The default is false. -Note: The **-t** option is incompatible with a redirection of the podman client +Note: The **-t** option is incompatible with a redirection of the Podman client standard input. **--uidmap**=*container_uid:host_uid:amount* @@ -793,8 +799,8 @@ container. The `OPTIONS` are a comma delimited list and can be: * [`[r]shared`|`[r]slave`|`[r]private`] The `CONTAINER-DIR` must be an absolute path such as `/src/docs`. The `HOST-DIR` -must be an absolute path as well. podman bind-mounts the `HOST-DIR` to the -path you specify. For example, if you supply the `/foo` value, podman creates a bind-mount. +must be an absolute path as well. Podman bind-mounts the `HOST-DIR` to the +path you specify. For example, if you supply the `/foo` value, Podman creates a bind-mount. You can specify multiple **-v** options to mount one or more mounts to a container. @@ -806,14 +812,14 @@ See examples. Labeling systems like SELinux require that proper labels are placed on volume content mounted into a container. Without a label, the security system might prevent the processes running inside the container from using the content. By -default, podman does not change the labels set by the OS. +default, Podman does not change the labels set by the OS. To change a label in the container context, you can add either of two suffixes -`:z` or `:Z` to the volume mount. These suffixes tell podman to relabel file -objects on the shared volumes. The `z` option tells podman that two containers -share the volume content. As a result, podman labels the content with a shared +`:z` or `:Z` to the volume mount. These suffixes tell Podman to relabel file +objects on the shared volumes. The `z` option tells Podman that two containers +share the volume content. As a result, Podman labels the content with a shared content label. Shared volume labels allow all containers to read/write content. -The `Z` option tells podman to label the content with a private unshared label. +The `Z` option tells Podman to label the content with a private unshared label. Only the current container can use a private volume. By default bind mounted volumes are `private`. That means any mounts done @@ -861,7 +867,7 @@ To share a volume, use the --volumes-from option when running the target container. You can share volumes even if the source container is not running. -By default, podman mounts the volumes in the same mode (read-write or +By default, Podman mounts the volumes in the same mode (read-write or read-only) as it is mounted in the source container. Optionally, you can change this by suffixing the container-id with either the `ro` or `rw` keyword. @@ -869,11 +875,11 @@ can change this by suffixing the container-id with either the `ro` or Labeling systems like SELinux require that proper labels are placed on volume content mounted into a container. Without a label, the security system might prevent the processes running inside the container from using the content. By -default, podman does not change the labels set by the OS. +default, Podman does not change the labels set by the OS. To change a label in the container context, you can add `z` to the volume mount. -This suffix tells podman to relabel file objects on the shared volumes. The `z` -option tells podman that two containers share the volume content. As a result, +This suffix tells Podman to relabel file objects on the shared volumes. The `z` +option tells Podman that two containers share the volume content. As a result, podman labels the content with a shared content label. Shared volume labels allow all containers to read/write content. @@ -927,7 +933,7 @@ Note: RHEL7 and Centos 7 will not have this feature until RHEL7.7 is released. In order for users to run rootless, there must be an entry for their username in /etc/subuid and /etc/subgid which lists the UIDs for their user namespace. -Rootless podman works better if the fuse-overlayfs and slirp4netns packages are installed. +Rootless Podman works better if the fuse-overlayfs and slirp4netns packages are installed. The fuse-overlay package provides a userspace overlay storage driver, otherwise users need to use the vfs storage driver, which is diskspace expensive and does not perform well. slirp4netns is required for VPN, without it containers need to be run with the --net=host flag. @@ -937,7 +943,7 @@ required for VPN, without it containers need to be run with the --net=host flag. Environment variables within containers can be set using multiple different options: This section describes the precedence. Precedence Order: - **--env-host** : Host environment of the process executing podman is added. + **--env-host** : Host environment of the process executing Podman is added. Container image : Any environment variables specified in the container image. @@ -967,7 +973,7 @@ b subgid(5), subuid(5), libpod.conf(5), systemd.unit(5), setsebool(8), slirp4netns(1), fuse-overlayfs(1) ## HISTORY -October 2017, converted from Docker documentation to podman by Dan Walsh for podman <dwalsh@redhat.com> +October 2017, converted from Docker documentation to Podman by Dan Walsh for Podman <dwalsh@redhat.com> November 2014, updated by Sven Dowideit <SvenDowideit@home.org.au> diff --git a/docs/podman-events.1.md b/docs/podman-events.1.md index ed3faedfd..a5a715098 100644 --- a/docs/podman-events.1.md +++ b/docs/podman-events.1.md @@ -98,7 +98,7 @@ The *since* and *until* values can be RFC3339Nano time stamps or a Go duration s ## EXAMPLES -Showing podman events +Showing Podman events ``` $ podman events 2019-03-02 10:33:42.312377447 -0600 CST container create 34503c192940 (image=docker.io/library/alpine:latest, name=friendly_allen) @@ -108,7 +108,7 @@ $ podman events 2019-03-02 10:33:51.047104966 -0600 CST container cleanup 34503c192940 (image=docker.io/library/alpine:latest, name=friendly_allen) ``` -Show only podman create events +Show only Podman create events ``` $ podman events --filter event=create 2019-03-02 10:36:01.375685062 -0600 CST container create 20dc581f6fbf (image=docker.io/library/alpine:latest, name=sharp_morse) @@ -117,7 +117,7 @@ $ podman events --filter event=create 2019-03-02 10:36:29.978806894 -0600 CST container create d81e30f1310f (image=docker.io/library/busybox:latest, name=musing_newton) ``` -Show only podman pod create events +Show only Podman pod create events ``` $ podman events --filter event=create --filter type=pod 2019-03-02 10:44:29.601746633 -0600 CST pod create 1df5ebca7b44 (image=, name=confident_hawking) @@ -125,7 +125,7 @@ $ podman events --filter event=create --filter type=pod 2019-03-02 10:44:47.486759133 -0600 CST pod create 71e807fc3a8e (image=, name=reverent_swanson) ``` -Show only podman events created in the last five minutes: +Show only Podman events created in the last five minutes: ``` $ sudo podman events --since 5m 2019-03-02 10:44:29.598835409 -0600 CST container create b629d10d3831 (image=k8s.gcr.io/pause:3.1, name=1df5ebca7b44-infra) @@ -134,7 +134,7 @@ $ sudo podman events --since 5m 2019-03-02 10:44:42.374637304 -0600 CST pod create ca731231718e (image=, name=webapp) ``` -Show podman events in JSON Lines format +Show Podman events in JSON Lines format ``` events --format json {"ID":"683b0909d556a9c02fa8cd2b61c3531a965db42158627622d1a67b391964d519","Image":"localhost/myshdemo:latest","Name":"agitated_diffie","Status":"cleanup","Time":"2019-04-27T22:47:00.849932843-04:00","Type":"container"} diff --git a/docs/podman-exec.1.md b/docs/podman-exec.1.md index f71b21126..4c17c056a 100644 --- a/docs/podman-exec.1.md +++ b/docs/podman-exec.1.md @@ -64,7 +64,7 @@ when creating the container. The exit code from `podman exec` gives information about why the command within the container failed to run or why it exited. When `podman exec` exits with a non-zero code, the exit codes follow the `chroot` standard, see below: -**_125_** if the error is with podman **_itself_** +**_125_** if the error is with Podman **_itself_** $ podman exec --foo ctrID /bin/sh; echo $? Error: unknown flag: --foo diff --git a/docs/podman-generate-kube.1.md b/docs/podman-generate-kube.1.md index 8f15e14ba..f4b4cd482 100644 --- a/docs/podman-generate-kube.1.md +++ b/docs/podman-generate-kube.1.md @@ -6,7 +6,7 @@ podman-generate-kube - Generate Kubernetes YAML based on a pod or container **podman generate kube** [*options*] *container* | *pod* ## DESCRIPTION -**podman generate kube** will generate Kubernetes Pod YAML (v1 specification) from a podman container or pod. Whether +**podman generate kube** will generate Kubernetes Pod YAML (v1 specification) from a Podman container or pod. Whether the input is for a container or pod, Podman will always generate the specification as a Pod. The input may be in the form of a pod or container name or ID. diff --git a/docs/podman-image-sign.1.md b/docs/podman-image-sign.1.md index ca438b438..62845e715 100644 --- a/docs/podman-image-sign.1.md +++ b/docs/podman-image-sign.1.md @@ -39,7 +39,7 @@ Sign the busybox image with the identify of foo@bar.com with a user's keyring an The write (and read) location for signatures is defined in YAML-based configuration files in /etc/containers/registries.d/. When you sign -an image, podman will use those configuration files to determine +an image, Podman will use those configuration files to determine where to write the signature based on the the name of the originating registry or a default storage value unless overriden with the --directory option. For example, consider the following configuration file. diff --git a/docs/podman-image-tree.1.md b/docs/podman-image-tree.1.md index 5ffd995f6..c4624e05c 100644 --- a/docs/podman-image-tree.1.md +++ b/docs/podman-image-tree.1.md @@ -9,7 +9,7 @@ podman\-image\-tree - Prints layer hierarchy of an image in a tree format ## DESCRIPTION Prints layer hierarchy of an image in a tree format. -If you do not provide a *tag*, podman will default to `latest` for the *image*. +If you do not provide a *tag*, Podman will default to `latest` for the *image*. Layers are indicated with image tags as `Top Layer of`, when the tag is known locally. ## OPTIONS diff --git a/docs/podman-run.1.md b/docs/podman-run.1.md index c642b50b6..0dbd4ea6f 100644 --- a/docs/podman-run.1.md +++ b/docs/podman-run.1.md @@ -87,6 +87,12 @@ Set the cgroup namespace mode for the container, by default **host** is used. **private**: create a new cgroup namespace. **ns:<PATH>**: join the namespace at the specified path. +**--cgroups**=*mode* + +Determines whether the container will create CGroups. +Valid values are *enabled* and *disabled*, which the default being *enabled*. +The *disabled* option will force the container to not create CGroups, and thus conflicts with CGroup options (**--cgroupns** and **--cgroup-parent**). + **--cgroup-parent**=*cgroup* Path to cgroups under which the cgroup for the container will be created. If the path is not absolute, the path is considered to be relative to the cgroups path of the init process. Cgroups will be created if they do not already exist. @@ -282,7 +288,7 @@ on the host system. **--gidmap**=*container_gid:host_gid:amount* Run the container in a new user namespace using the supplied mapping. This option conflicts with the --userns and --subgidname flags. -This option can be passed several times to map different ranges. If calling podman run as an unprivileged user, the user needs to have the right to use the mapping. See `subuid(5)`. +This option can be passed several times to map different ranges. If calling Podman run as an unprivileged user, the user needs to have the right to use the mapping. See `subuid(5)`. The example maps gids 0-2000 in the container to the gids 30000-31999 on the host. `--gidmap=0:30000:2000` **--group-add**=*group* @@ -329,7 +335,7 @@ Sets the container host name that is available inside the container. **--http-proxy**=*true|false* By default proxy environment variables are passed into the container if set -for the podman process. This can be disabled by setting the `--http-proxy` +for the Podman process. This can be disabled by setting the `--http-proxy` option to `false`. The environment variables passed in include `http_proxy`, `https_proxy`, `ftp_proxy`, `no_proxy`, and also the upper case versions of those. This option is only needed when the host system must use a proxy but @@ -348,7 +354,7 @@ Defaults to `true` **--image-volume**, **builtin-volume**=*bind|tmpfs|ignore* -Tells podman how to handle the builtin image volumes. +Tells Podman how to handle the builtin image volumes. The options are: `bind`, `tmpfs`, or `ignore` (default `bind`) @@ -475,6 +481,8 @@ Current supported mount TYPES are bind, and tmpfs. type=bind,source=/path/on/host,destination=/path/in/container + type=bind,source=volume-name,destination=/path/in/container + type=tmpfs,tmpfs-size=512M,destination=/path/in/container Common Options: @@ -516,7 +524,7 @@ Set the Network mode for the container. Invalid if using **--dns**, **--dns-opti - `bridge`: create a network stack on the default bridge - `none`: no networking - `container:<name|id>`: reuse another container's network stack -- `host`: use the podman host network stack. Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure. +- `host`: use the Podman host network stack. Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure. - `<network-name>|<network-id>`: connect to a user-defined network - `ns:<path>`: path to a network namespace to join - `slirp4netns`: use slirp4netns to create a user network stack. This is the default for rootless containers @@ -556,7 +564,7 @@ Tune the container's pids limit. Set `-1` to have unlimited pids for the contain **--pod**=*name* -Run container in an existing pod. If you want podman to make the pod for you, preference the pod name with `new:`. +Run container in an existing pod. If you want Podman to make the pod for you, preference the pod name with `new:`. To make a pod with more granular options, use the `podman pod create` command before creating a container. If a container is run with a pod, and the pod has an infra-container, the infra-container will be started before the container is. @@ -564,12 +572,12 @@ If a container is run with a pod, and the pod has an infra-container, the infra- Give extended privileges to this container. The default is *false*. -By default, podman containers are “unprivileged” (=false) and cannot, +By default, Podman containers are “unprivileged” (=false) and cannot, for example, modify parts of the kernel. This is because by default a container is not allowed to access any devices. A “privileged” container is given access to all devices. -When the operator executes **podman run --privileged**, podman enables access +When the operator executes **podman run --privileged**, Podman enables access to all devices on the host, turns off graphdriver mount options, as well as turning off most of the security measures protecting the host from the container. @@ -595,11 +603,11 @@ Use `podman port` to see the actual mapping: `podman port CONTAINER $CONTAINERPO Publish all exposed ports to random ports on the host interfaces. The default is *false*. When set to true publish all exposed ports to the host interfaces. The -default is false. If the operator uses -P (or -p) then podman will make the +default is false. If the operator uses -P (or -p) then Podman will make the exposed port accessible on the host and the ports will be available to any client that can reach the host. -When using -P, podman will bind any exposed port to a random port on the host +When using -P, Podman will bind any exposed port to a random port on the host within an *ephemeral port range* defined by `/proc/sys/net/ipv4/ip_local_port_range`. To find the mapping between the host ports and the exposed ports, use `podman port`. @@ -702,13 +710,13 @@ Timeout (in seconds) to stop a container. Default is 10. **--subgidname**=*name* Run the container in a new user namespace using the map with 'name' in the `/etc/subgid` file. -If calling podman run as an unprivileged user, the user needs to have the right to use the mapping. See `subgid(5)`. +If calling Podman run as an unprivileged user, the user needs to have the right to use the mapping. See `subgid(5)`. This flag conflicts with `--userns` and `--gidmap`. **--subuidname**=*name* Run the container in a new user namespace using the map with 'name' in the `/etc/subuid` file. -If calling podman run as an unprivileged user, the user needs to have the right to use the mapping. See `subuid(5)`. +If calling Podman run as an unprivileged user, the user needs to have the right to use the mapping. See `subuid(5)`. This flag conflicts with `--userns` and `--uidmap`. **--sysctl**=SYSCTL @@ -738,7 +746,7 @@ Note: if you use the `--network=host` option these sysctls will not be allowed. Run container in systemd mode. The default is *true*. -If the command you are running inside of the container is systemd or init, podman +If the command you are running inside of the container is systemd or init, Podman will setup tmpfs mount points in the following directories: /run, /run/lock, /tmp, /sys/fs/cgroup/systemd, /var/lib/journal @@ -770,17 +778,17 @@ any options, the systems uses the following options: Allocate a pseudo-TTY. The default is *false*. -When set to true podman will allocate a pseudo-tty and attach to the standard +When set to true Podman will allocate a pseudo-tty and attach to the standard input of the container. This can be used, for example, to run a throwaway interactive shell. The default is false. -**NOTE**: The **-t** option is incompatible with a redirection of the podman client +**NOTE**: The **-t** option is incompatible with a redirection of the Podman client standard input. **--uidmap**=*container_uid:host_uid:amount* Run the container in a new user namespace using the supplied mapping. This option conflicts with the --userns and --subuidname flags. -This option can be passed several times to map different ranges. If calling podman run as an unprivileged user, the user needs to have the right to use the mapping. See `subuid(5)`. +This option can be passed several times to map different ranges. If calling Podman run as an unprivileged user, the user needs to have the right to use the mapping. See `subuid(5)`. The example maps uids 0-2000 in the container to the uids 30000-31999 on the host. `--uidmap=0:30000:2000` **--ulimit**=*option* @@ -821,19 +829,23 @@ Set the UTS mode for the container **NOTE**: the host mode gives the container access to changing the host's hostname and is therefore considered insecure. -**--volume**, **-v**[=*[HOST-DIR:CONTAINER-DIR[:OPTIONS]]*] +**--volume**, **-v**[=*[HOST-DIR-OR-VOUME-NAME:CONTAINER-DIR[:OPTIONS]]*] -Create a bind mount. If you specify, ` -v /HOST-DIR:/CONTAINER-DIR`, podman -bind mounts `/HOST-DIR` in the host to `/CONTAINER-DIR` in the podman -container. The `OPTIONS` are a comma delimited list and can be: +Create a bind mount. If you specify, ` -v /HOST-DIR:/CONTAINER-DIR`, Podman +bind mounts `/HOST-DIR` in the host to `/CONTAINER-DIR` in the Podman +container. Similarly, `-v VOLUME-NAME:/CONTAINER-DIR` will mount the volume +in the host to the container. If no such named volume exists, Podman will +create one. + + The `OPTIONS` are a comma delimited list and can be: * [`rw`|`ro`] * [`z`|`Z`] * [`[r]shared`|`[r]slave`|`[r]private`] -The `CONTAINER-DIR` must be an absolute path such as `/src/docs`. The `HOST-DIR` -must be an absolute path as well. podman bind-mounts the `HOST-DIR` to the -path you specify. For example, if you supply the `/foo` value, podman creates a bind-mount. +The `/CONTAINER-DIR` must be an absolute path such as `/src/docs`. The `/HOST-DIR` +must be an absolute path as well. Podman bind-mounts the `HOST-DIR` to the +path you specify. For example, if you supply the `/foo` value, Podman creates a bind-mount. You can specify multiple **-v** options to mount one or more mounts to a container. @@ -845,14 +857,14 @@ See examples. Labeling systems like SELinux require that proper labels are placed on volume content mounted into a container. Without a label, the security system might prevent the processes running inside the container from using the content. By -default, podman does not change the labels set by the OS. +default, Podman does not change the labels set by the OS. To change a label in the container context, you can add either of two suffixes -`:z` or `:Z` to the volume mount. These suffixes tell podman to relabel file -objects on the shared volumes. The `z` option tells podman that two containers -share the volume content. As a result, podman labels the content with a shared +`:z` or `:Z` to the volume mount. These suffixes tell Podman to relabel file +objects on the shared volumes. The `z` option tells Podman that two containers +share the volume content. As a result, Podman labels the content with a shared content label. Shared volume labels allow all containers to read/write content. -The `Z` option tells podman to label the content with a private unshared label. +The `Z` option tells Podman to label the content with a private unshared label. Only the current container can use a private volume. By default bind mounted volumes are `private`. That means any mounts done @@ -900,7 +912,7 @@ To share a volume, use the --volumes-from option when running the target container. You can share volumes even if the source container is not running. -By default, podman mounts the volumes in the same mode (read-write or +By default, Podman mounts the volumes in the same mode (read-write or read-only) as it is mounted in the source container. Optionally, you can change this by suffixing the container-id with either the `ro` or `rw` keyword. @@ -908,11 +920,11 @@ can change this by suffixing the container-id with either the `ro` or Labeling systems like SELinux require that proper labels are placed on volume content mounted into a container. Without a label, the security system might prevent the processes running inside the container from using the content. By -default, podman does not change the labels set by the OS. +default, Podman does not change the labels set by the OS. To change a label in the container context, you can add `z` to the volume mount. -This suffix tells podman to relabel file objects on the shared volumes. The `z` -option tells podman that two containers share the volume content. As a result, +This suffix tells Podman to relabel file objects on the shared volumes. The `z` +option tells Podman that two containers share the volume content. As a result, podman labels the content with a shared content label. Shared volume labels allow all containers to read/write content. @@ -934,7 +946,7 @@ The exit code from `podman run` gives information about why the container failed to run or why it exited. When `podman run` exits with a non-zero code, the exit codes follow the `chroot` standard, see below: -**_125_** if the error is with podman **_itself_** +**_125_** if the error is with Podman **_itself_** $ podman run --foo busybox; echo $? Error: unknown flag: --foo @@ -1005,7 +1017,7 @@ This should list the message sent to logger. ### Attaching to one or more from STDIN, STDOUT, STDERR -If you do not specify -a then podman will attach everything (stdin,stdout,stderr). +If you do not specify -a then Podman will attach everything (stdin,stdout,stderr). You can specify to which of the three standard streams (stdin, stdout, stderr) you'd like to connect instead, as in: @@ -1092,18 +1104,26 @@ $ podman run -p 8080:80 -d -i -t fedora/httpd To mount a host directory as a container volume, specify the absolute path to the directory and the absolute path for the container directory separated by a -colon: +colon. If the source is a named volume maintained by Podman, it's recommended to +use it's name rather than the path to the volume. Otherwise the volume will be +considered as an orphan and wiped if you execute `podman volume prune`: ``` $ podman run -v /var/db:/data1 -i -t fedora bash + +$ podman run -v data:/data2 -i -t fedora bash ``` Using --mount flags, To mount a host directory as a container folder, specify -the absolute path to the directory and the absolute path for the container -directory: +the absolute path to the directory or the volume name, and the absolute path +within the container directory: +```` $ podman run --mount type=bind,src=/var/db,target=/data1 busybox sh +$ podman run --mount type=bind,src=volume-name,target=/data1 busybox sh +```` + When using SELinux, be aware that the host has no knowledge of container SELinux policy. Therefore, in the above example, if SELinux policy is enforced, the `/var/db` directory is not writable to the container. A "Permission Denied" @@ -1178,7 +1198,7 @@ $ podman run --sysctl net.ipv4.ip_forward=1 someimage Note: -Not all sysctls are namespaced. podman does not support changing sysctls +Not all sysctls are namespaced. Podman does not support changing sysctls inside of a container that also modify the host system. As the kernel evolves we expect to see more sysctls become namespaced. @@ -1212,7 +1232,7 @@ Note: RHEL7 and Centos 7 will not have this feature until RHEL7.7 is released. In order for users to run rootless, there must be an entry for their username in /etc/subuid and /etc/subgid which lists the UIDs for their user namespace. -Rootless podman works better if the fuse-overlayfs and slirp4netns packages are installed. +Rootless Podman works better if the fuse-overlayfs and slirp4netns packages are installed. The fuse-overlay package provides a userspace overlay storage driver, otherwise users need to use the vfs storage driver, which is diskspace expensive and does not perform well. slirp4netns is required for VPN, without it containers need to be run with the --net=host flag. @@ -1223,7 +1243,7 @@ Environment variables within containers can be set using multiple different opti Precedence Order: - **--env-host** : Host environment of the process executing podman is added. + **--env-host** : Host environment of the process executing Podman is added. Container image : Any environment variables specified in the container image. @@ -1253,7 +1273,7 @@ subgid(5), subuid(5), libpod.conf(5), systemd.unit(5), setsebool(8), slirp4netns ## HISTORY September 2018, updated by Kunal Kushwaha <kushwaha_kunal_v7@lab.ntt.co.jp> -October 2017, converted from Docker documentation to podman by Dan Walsh for podman <dwalsh@redhat.com> +October 2017, converted from Docker documentation to Podman by Dan Walsh for Podman <dwalsh@redhat.com> November 2015, updated by Sally O'Malley <somalley@redhat.com> diff --git a/docs/tutorials/rootless_tutorial.md b/docs/tutorials/rootless_tutorial.md index 91962fead..c98e74c96 100644 --- a/docs/tutorials/rootless_tutorial.md +++ b/docs/tutorials/rootless_tutorial.md @@ -22,6 +22,8 @@ The [slirp4netns](https://github.com/rootless-containers/slirp4netns) package pr When using Podman in a rootless environment, it is recommended to use fuse-overlayfs rather than the VFS file system. Installing the fuse3-devel package gives Podman the dependencies it needs to install, build and use fuse-overlayfs in a rootless environment for you. The fuse-overlayfs project is also available from [GitHub](https://github.com/containers/fuse-overlayfs). This especially needs to be checked on Ubuntu distributions as fuse-overlayfs is not generally installed by default. +If podman is installed before fuse-overlayfs, it may be necessary to change the `driver` option under `[storage]` to `"overlay"`. + ### Enable user namespaces (on RHEL7 machines) The number of user namespaces that are allowed on the system is specified in the file `/proc/sys/user/max_user_namespaces`. On most Linux platforms this is preset by default and no adjustment is necessary. However on RHEL7 machines a user with root privileges may need to set that to a reasonable value by using this command: `sysctl user.max_user_namespaces=15000`. diff --git a/libpod.conf b/libpod.conf index 81fece5d2..47f66ecc1 100644 --- a/libpod.conf +++ b/libpod.conf @@ -122,6 +122,10 @@ runtime = "runc" # libpod will use it for reporting nicer errors. runtime_supports_json = ["crun", "runc"] +# List of all the OCI runtimes that support --cgroup-manager=disable to disable +# creation of CGroups for containers. +runtime_supports_nocgroups = ["crun"] + # Paths to look for a valid OCI runtime (runc, runv, etc) # If the paths are empty or no valid path was found, then the `$PATH` # environment variable will be used as the fallback. diff --git a/libpod/container.go b/libpod/container.go index 9c01d2adf..3d8e58375 100644 --- a/libpod/container.go +++ b/libpod/container.go @@ -356,6 +356,9 @@ type ContainerConfig struct { StopTimeout uint `json:"stopTimeout,omitempty"` // Time container was created CreatedTime time.Time `json:"createdTime"` + // NoCgroups indicates that the container will not create CGroups. It is + // incompatible with CgroupParent. + NoCgroups bool `json:"noCgroups,omitempty"` // Cgroup parent of the container CgroupParent string `json:"cgroupParent"` // LogPath log location diff --git a/libpod/container_inspect.go b/libpod/container_inspect.go index 1b6dd829c..3c32a2f45 100644 --- a/libpod/container_inspect.go +++ b/libpod/container_inspect.go @@ -268,6 +268,11 @@ type InspectContainerHostConfig struct { // populated. // TODO. Cgroup string `json:"Cgroup"` + // Cgroups contains the container's CGroup mode. + // Allowed values are "default" (container is creating CGroups) and + // "disabled" (container is not creating CGroups). + // This is Libpod-specific and not included in `docker inspect`. + Cgroups string `json:"Cgroups"` // Links is unused, and provided purely for Docker compatibility. Links []string `json:"Links"` // OOMScoreAdj is an adjustment that will be made to the container's OOM @@ -958,6 +963,11 @@ func (c *Container) generateInspectContainerHostConfig(ctrSpec *spec.Spec, named restartPolicy.Name = c.config.RestartPolicy restartPolicy.MaximumRetryCount = c.config.RestartRetries hostConfig.RestartPolicy = restartPolicy + if c.config.NoCgroups { + hostConfig.Cgroups = "disabled" + } else { + hostConfig.Cgroups = "default" + } hostConfig.Dns = make([]string, 0, len(c.config.DNSServer)) for _, dns := range c.config.DNSServer { diff --git a/libpod/container_internal.go b/libpod/container_internal.go index ffc6c11ee..ac565fdad 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -21,6 +21,7 @@ import ( "github.com/containers/storage" "github.com/containers/storage/pkg/archive" "github.com/containers/storage/pkg/mount" + "github.com/cyphar/filepath-securejoin" spec "github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/runtime-tools/generate" "github.com/opencontainers/selinux/go-selinux/label" @@ -1119,6 +1120,10 @@ func (c *Container) stop(timeout uint) error { // Internal, non-locking function to pause a container func (c *Container) pause() error { + if c.config.NoCgroups { + return errors.Wrapf(define.ErrNoCgroups, "cannot pause without using CGroups") + } + if err := c.ociRuntime.pauseContainer(c); err != nil { return err } @@ -1132,6 +1137,10 @@ func (c *Container) pause() error { // Internal, non-locking function to unpause a container func (c *Container) unpause() error { + if c.config.NoCgroups { + return errors.Wrapf(define.ErrNoCgroups, "cannot unpause without using CGroups") + } + if err := c.ociRuntime.unpauseContainer(c); err != nil { return err } @@ -1234,43 +1243,82 @@ func (c *Container) mountStorage() (_ string, Err error) { }() } + // We need to mount the container before volumes - to ensure the copyup + // works properly. + mountPoint := c.config.Rootfs + if mountPoint == "" { + mountPoint, err = c.mount() + if err != nil { + return "", err + } + defer func() { + if Err != nil { + if err := c.unmount(false); err != nil { + logrus.Errorf("Error unmounting container %s after mount error: %v", c.ID(), err) + } + } + }() + } + // Request a mount of all named volumes for _, v := range c.config.NamedVolumes { - vol, err := c.runtime.state.Volume(v.Name) + vol, err := c.mountNamedVolume(v, mountPoint) if err != nil { - return "", errors.Wrapf(err, "error retrieving named volume %s for container %s", v.Name, c.ID()) + return "", err } - - if vol.needsMount() { + defer func() { + if Err == nil { + return + } vol.lock.Lock() - if err := vol.mount(); err != nil { - vol.lock.Unlock() - return "", errors.Wrapf(err, "error mounting volume %s for container %s", vol.Name(), c.ID()) + if err := vol.unmount(false); err != nil { + logrus.Errorf("Error unmounting volume %s after error mounting container %s: %v", vol.Name(), c.ID(), err) } vol.lock.Unlock() - defer func() { - if Err == nil { - return - } - vol.lock.Lock() - if err := vol.unmount(false); err != nil { - logrus.Errorf("Error unmounting volume %s after error mounting container %s: %v", vol.Name(), c.ID(), err) - } - vol.lock.Unlock() - }() - } + }() } - // TODO: generalize this mount code so it will mount every mount in ctr.config.Mounts - mountPoint := c.config.Rootfs - if mountPoint == "" { - mountPoint, err = c.mount() - if err != nil { - return "", err + return mountPoint, nil +} + +// Mount a single named volume into the container. +// If necessary, copy up image contents into the volume. +// Does not verify that the name volume given is actually present in container +// config. +// Returns the volume that was mounted. +func (c *Container) mountNamedVolume(v *ContainerNamedVolume, mountpoint string) (*Volume, error) { + vol, err := c.runtime.state.Volume(v.Name) + if err != nil { + return nil, errors.Wrapf(err, "error retrieving named volume %s for container %s", v.Name, c.ID()) + } + + vol.lock.Lock() + defer vol.lock.Unlock() + if vol.needsMount() { + if err := vol.mount(); err != nil { + return nil, errors.Wrapf(err, "error mounting volume %s for container %s", vol.Name(), c.ID()) } } + // The volume may need a copy-up. Check the state. + if err := vol.update(); err != nil { + return nil, err + } + if vol.state.NeedsCopyUp { + logrus.Debugf("Copying up contents from container %s to volume %s", c.ID(), vol.Name()) + srcDir, err := securejoin.SecureJoin(mountpoint, v.Dest) + if err != nil { + return nil, errors.Wrapf(err, "error calculating destination path to copy up container %s volume %s", c.ID(), vol.Name()) + } + if err := c.copyWithTarFromImage(srcDir, vol.MountPoint()); err != nil && !os.IsNotExist(err) { + return nil, errors.Wrapf(err, "error copying content from container %s into volume %s", c.ID(), vol.Name()) + } - return mountPoint, nil + vol.state.NeedsCopyUp = false + if err := vol.save(); err != nil { + return nil, err + } + } + return vol, nil } // cleanupStorage unmounts and cleans up the container's root filesystem @@ -1614,15 +1662,11 @@ func (c *Container) unmount(force bool) error { } // this should be from chrootarchive. -func (c *Container) copyWithTarFromImage(src, dest string) error { - mountpoint, err := c.mount() - if err != nil { - return err - } +// Container MUST be mounted before calling. +func (c *Container) copyWithTarFromImage(source, dest string) error { a := archive.NewDefaultArchiver() - source := filepath.Join(mountpoint, src) - if err = c.copyOwnerAndPerms(source, dest); err != nil { + if err := c.copyOwnerAndPerms(source, dest); err != nil { return err } return a.CopyWithTar(source, dest) diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index d1e63a6eb..9f16389e6 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -115,7 +115,9 @@ func (c *Container) prepare() (Err error) { createErr = createNetNSErr } if mountStorageErr != nil { - logrus.Errorf("Error preparing container %s: %v", c.ID(), createErr) + if createErr != nil { + logrus.Errorf("Error preparing container %s: %v", c.ID(), createErr) + } createErr = mountStorageErr } @@ -379,7 +381,7 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) { if err != nil { return nil, err } - if rootless.IsRootless() && !unified { + if (rootless.IsRootless() && !unified) || c.config.NoCgroups { g.SetLinuxCgroupsPath("") } else if c.runtime.config.CgroupManager == SystemdCgroupsManager { // When runc is set to use Systemd as a cgroup manager, it diff --git a/libpod/container_top_linux.go b/libpod/container_top_linux.go index ce471838d..5f4f28130 100644 --- a/libpod/container_top_linux.go +++ b/libpod/container_top_linux.go @@ -15,6 +15,10 @@ import ( // Top gathers statistics about the running processes in a container. It returns a // []string for output func (c *Container) Top(descriptors []string) ([]string, error) { + if c.config.NoCgroups { + return nil, errors.Wrapf(define.ErrNoCgroups, "cannot run top on container %s as it did not create a cgroup", c.ID()) + } + conStat, err := c.State() if err != nil { return nil, errors.Wrapf(err, "unable to look up state for %s", c.ID()) diff --git a/libpod/define/errors.go b/libpod/define/errors.go index 9d532263c..004acd58f 100644 --- a/libpod/define/errors.go +++ b/libpod/define/errors.go @@ -61,6 +61,10 @@ var ( // the user. ErrDetach = utils.ErrDetach + // ErrNoCgroups indicates that the container does not have its own + // CGroup. + ErrNoCgroups = errors.New("this container does not have a cgroup") + // ErrRuntimeStopped indicates that the runtime has already been shut // down and no further operations can be performed on it ErrRuntimeStopped = errors.New("runtime has already been stopped") diff --git a/libpod/oci.go b/libpod/oci.go index 8a873ca5b..9879fa90e 100644 --- a/libpod/oci.go +++ b/libpod/oci.go @@ -48,19 +48,20 @@ const ( // OCIRuntime represents an OCI-compatible runtime that libpod can call into // to perform container operations type OCIRuntime struct { - name string - path string - conmonPath string - conmonEnv []string - cgroupManager string - tmpDir string - exitsDir string - socketsDir string - logSizeMax int64 - noPivot bool - reservePorts bool - supportsJSON bool - sdNotify bool + name string + path string + conmonPath string + conmonEnv []string + cgroupManager string + tmpDir string + exitsDir string + socketsDir string + logSizeMax int64 + noPivot bool + reservePorts bool + supportsJSON bool + supportsNoCgroups bool + sdNotify bool } // ociError is used to parse the OCI runtime JSON log. It is not part of the @@ -73,7 +74,7 @@ type ociError struct { // Make a new OCI runtime with provided options. // The first path that points to a valid executable will be used. -func newOCIRuntime(name string, paths []string, conmonPath string, runtimeCfg *RuntimeConfig, supportsJSON bool) (*OCIRuntime, error) { +func newOCIRuntime(name string, paths []string, conmonPath string, runtimeCfg *RuntimeConfig, supportsJSON, supportsNoCgroups bool) (*OCIRuntime, error) { if name == "" { return nil, errors.Wrapf(define.ErrInvalidArg, "the OCI runtime must be provided a non-empty name") } @@ -93,6 +94,7 @@ func newOCIRuntime(name string, paths []string, conmonPath string, runtimeCfg *R // TODO: probe OCI runtime for feature and enable automatically if // available. runtime.supportsJSON = supportsJSON + runtime.supportsNoCgroups = supportsNoCgroups foundPath := false for _, path := range paths { diff --git a/libpod/oci_internal_linux.go b/libpod/oci_internal_linux.go index 48b7370e0..f9e935d86 100644 --- a/libpod/oci_internal_linux.go +++ b/libpod/oci_internal_linux.go @@ -263,7 +263,7 @@ func (r *OCIRuntime) configureConmonEnv(runtimeDir string) ([]string, []*os.File func (r *OCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, exitDir, ociLogPath string) []string { // set the conmon API version to be able to use the correct sync struct keys args := []string{"--api-version", "1"} - if r.cgroupManager == SystemdCgroupsManager { + if r.cgroupManager == SystemdCgroupsManager && !ctr.config.NoCgroups { args = append(args, "-s") } args = append(args, "-c", ctr.ID()) @@ -307,6 +307,10 @@ func (r *OCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath if ociLogPath != "" { args = append(args, "--runtime-arg", "--log-format=json", "--runtime-arg", "--log", fmt.Sprintf("--runtime-arg=%s", ociLogPath)) } + if ctr.config.NoCgroups { + logrus.Debugf("Running with no CGroups") + args = append(args, "--runtime-arg", "--cgroup-manager", "--runtime-arg", "disabled") + } return args } @@ -355,6 +359,11 @@ func startCommandGivenSelinux(cmd *exec.Cmd) error { // moveConmonToCgroupAndSignal gets a container's cgroupParent and moves the conmon process to that cgroup // it then signals for conmon to start by sending nonse data down the start fd func (r *OCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec.Cmd, startFd *os.File, uuid string) error { + // If cgroup creation is disabled - just signal. + if ctr.config.NoCgroups { + return writeConmonPipeData(startFd) + } + cgroupParent := ctr.CgroupParent() if r.cgroupManager == SystemdCgroupsManager { unitName := createUnitName("libpod-conmon", ctr.ID()) diff --git a/libpod/oci_linux.go b/libpod/oci_linux.go index 6dba1260c..091b6d155 100644 --- a/libpod/oci_linux.go +++ b/libpod/oci_linux.go @@ -402,10 +402,12 @@ func (r *OCIRuntime) stopContainer(ctr *Container, timeout uint) error { } var args []string - if rootless.IsRootless() { + if rootless.IsRootless() || ctr.config.NoCgroups { // we don't use --all for rootless containers as the OCI runtime might use // the cgroups to determine the PIDs, but for rootless containers there is // not any. + // Same logic for NoCgroups - we can't use cgroups as the user + // explicitly requested none be created. args = []string{"kill", ctr.ID(), "KILL"} } else { args = []string{"kill", "--all", ctr.ID(), "KILL"} diff --git a/libpod/options.go b/libpod/options.go index 6df1ca5be..d28cb3d8c 100644 --- a/libpod/options.go +++ b/libpod/options.go @@ -847,6 +847,10 @@ func WithPIDNSFrom(nsCtr *Container) CtrCreateOption { return errors.Wrapf(define.ErrInvalidArg, "container has joined pod %s and dependency container %s is not a member of the pod", ctr.config.Pod, nsCtr.ID()) } + if ctr.config.NoCgroups { + return errors.Wrapf(define.ErrInvalidArg, "container has disabled creation of CGroups, which is incompatible with sharing a PID namespace") + } + ctr.config.PIDNsCtr = nsCtr.ID() return nil @@ -1056,6 +1060,27 @@ func WithLogPath(path string) CtrCreateOption { } } +// WithNoCgroups disables the creation of CGroups for the new container. +func WithNoCgroups() CtrCreateOption { + return func(ctr *Container) error { + if ctr.valid { + return define.ErrCtrFinalized + } + + if ctr.config.CgroupParent != "" { + return errors.Wrapf(define.ErrInvalidArg, "NoCgroups conflicts with CgroupParent") + } + + if ctr.config.PIDNsCtr != "" { + return errors.Wrapf(define.ErrInvalidArg, "NoCgroups requires a private PID namespace and cannot be used when PID namespace is shared with another container") + } + + ctr.config.NoCgroups = true + + return nil + } +} + // WithCgroupParent sets the Cgroup Parent of the new container. func WithCgroupParent(parent string) CtrCreateOption { return func(ctr *Container) error { @@ -1067,6 +1092,10 @@ func WithCgroupParent(parent string) CtrCreateOption { return errors.Wrapf(define.ErrInvalidArg, "cgroup parent cannot be empty") } + if ctr.config.NoCgroups { + return errors.Wrapf(define.ErrInvalidArg, "CgroupParent conflicts with NoCgroups") + } + ctr.config.CgroupParent = parent return nil diff --git a/libpod/runtime.go b/libpod/runtime.go index 323a46266..80b58654e 100644 --- a/libpod/runtime.go +++ b/libpod/runtime.go @@ -157,8 +157,12 @@ type RuntimeConfig struct { OCIRuntime string `toml:"runtime"` // OCIRuntimes are the set of configured OCI runtimes (default is runc) OCIRuntimes map[string][]string `toml:"runtimes"` - // RuntimeSupportsJSON is the list of the OCI runtimes that support --format=json + // RuntimeSupportsJSON is the list of the OCI runtimes that support + // --format=json. RuntimeSupportsJSON []string `toml:"runtime_supports_json"` + // RuntimeSupportsNoCgroups is a list of OCI runtimes that support + // running containers without CGroups. + RuntimeSupportsNoCgroups []string `toml:"runtime_supports_nocgroups"` // RuntimePath is the path to OCI runtime binary for launching // containers. // The first path pointing to a valid file will be used @@ -259,21 +263,22 @@ type RuntimeConfig struct { // If they were not, we may override them with information from the database, // if it exists and differs from what is present in the system already. type runtimeConfiguredFrom struct { - storageGraphDriverSet bool - storageGraphRootSet bool - storageRunRootSet bool - libpodStaticDirSet bool - libpodTmpDirSet bool - volPathSet bool - conmonPath bool - conmonEnvVars bool - initPath bool - ociRuntimes bool - runtimePath bool - cniPluginDir bool - noPivotRoot bool - runtimeSupportsJSON bool - ociRuntime bool + storageGraphDriverSet bool + storageGraphRootSet bool + storageRunRootSet bool + libpodStaticDirSet bool + libpodTmpDirSet bool + volPathSet bool + conmonPath bool + conmonEnvVars bool + initPath bool + ociRuntimes bool + runtimePath bool + cniPluginDir bool + noPivotRoot bool + runtimeSupportsJSON bool + runtimeSupportsNoCgroups bool + ociRuntime bool } func defaultRuntimeConfig() (RuntimeConfig, error) { @@ -603,6 +608,9 @@ func newRuntimeFromConfig(ctx context.Context, userConfigPath string, options .. if tmpConfig.RuntimeSupportsJSON != nil { runtime.configuredFrom.runtimeSupportsJSON = true } + if tmpConfig.RuntimeSupportsNoCgroups != nil { + runtime.configuredFrom.runtimeSupportsNoCgroups = true + } if tmpConfig.OCIRuntime != "" { runtime.configuredFrom.ociRuntime = true } @@ -649,6 +657,9 @@ func newRuntimeFromConfig(ctx context.Context, userConfigPath string, options .. if !runtime.configuredFrom.runtimeSupportsJSON { runtime.config.RuntimeSupportsJSON = tmpConfig.RuntimeSupportsJSON } + if !runtime.configuredFrom.runtimeSupportsNoCgroups { + runtime.config.RuntimeSupportsNoCgroups = tmpConfig.RuntimeSupportsNoCgroups + } if !runtime.configuredFrom.ociRuntime { runtime.config.OCIRuntime = tmpConfig.OCIRuntime } @@ -1009,6 +1020,16 @@ func makeRuntime(ctx context.Context, runtime *Runtime) (err error) { } } + // Make lookup tables for runtime support + supportsJSON := make(map[string]bool) + supportsNoCgroups := make(map[string]bool) + for _, r := range runtime.config.RuntimeSupportsJSON { + supportsJSON[r] = true + } + for _, r := range runtime.config.RuntimeSupportsNoCgroups { + supportsNoCgroups[r] = true + } + // Get us at least one working OCI runtime. runtime.ociRuntimes = make(map[string]*OCIRuntime) @@ -1026,15 +1047,10 @@ func makeRuntime(ctx context.Context, runtime *Runtime) (err error) { name := filepath.Base(runtime.config.RuntimePath[0]) - supportsJSON := false - for _, r := range runtime.config.RuntimeSupportsJSON { - if r == name { - supportsJSON = true - break - } - } + json := supportsJSON[name] + nocgroups := supportsNoCgroups[name] - ociRuntime, err := newOCIRuntime(name, runtime.config.RuntimePath, runtime.conmonPath, runtime.config, supportsJSON) + ociRuntime, err := newOCIRuntime(name, runtime.config.RuntimePath, runtime.conmonPath, runtime.config, json, nocgroups) if err != nil { return err } @@ -1045,15 +1061,10 @@ func makeRuntime(ctx context.Context, runtime *Runtime) (err error) { // Initialize remaining OCI runtimes for name, paths := range runtime.config.OCIRuntimes { - supportsJSON := false - for _, r := range runtime.config.RuntimeSupportsJSON { - if r == name { - supportsJSON = true - break - } - } + json := supportsJSON[name] + nocgroups := supportsNoCgroups[name] - ociRuntime, err := newOCIRuntime(name, paths, runtime.conmonPath, runtime.config, supportsJSON) + ociRuntime, err := newOCIRuntime(name, paths, runtime.conmonPath, runtime.config, json, nocgroups) if err != nil { // Don't fatally error. // This will allow us to ship configs including optional @@ -1073,15 +1084,10 @@ func makeRuntime(ctx context.Context, runtime *Runtime) (err error) { if strings.HasPrefix(runtime.config.OCIRuntime, "/") { name := filepath.Base(runtime.config.OCIRuntime) - supportsJSON := false - for _, r := range runtime.config.RuntimeSupportsJSON { - if r == name { - supportsJSON = true - break - } - } + json := supportsJSON[name] + nocgroups := supportsNoCgroups[name] - ociRuntime, err := newOCIRuntime(name, []string{runtime.config.OCIRuntime}, runtime.conmonPath, runtime.config, supportsJSON) + ociRuntime, err := newOCIRuntime(name, []string{runtime.config.OCIRuntime}, runtime.conmonPath, runtime.config, json, nocgroups) if err != nil { return err } diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index acd317d20..bffce7bca 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -8,7 +8,7 @@ import ( "strings" "time" - config2 "github.com/containers/libpod/libpod/define" + "github.com/containers/libpod/libpod/define" "github.com/containers/libpod/libpod/events" "github.com/containers/libpod/pkg/rootless" "github.com/containers/storage/pkg/stringid" @@ -35,7 +35,7 @@ func (r *Runtime) NewContainer(ctx context.Context, rSpec *spec.Spec, options .. r.lock.Lock() defer r.lock.Unlock() if !r.valid { - return nil, config2.ErrRuntimeStopped + return nil, define.ErrRuntimeStopped } return r.newContainer(ctx, rSpec, options...) } @@ -45,7 +45,7 @@ func (r *Runtime) RestoreContainer(ctx context.Context, rSpec *spec.Spec, config r.lock.Lock() defer r.lock.Unlock() if !r.valid { - return nil, config2.ErrRuntimeStopped + return nil, define.ErrRuntimeStopped } ctr, err := r.initContainerVariables(rSpec, config) @@ -67,7 +67,7 @@ func (r *Runtime) RestoreContainer(ctx context.Context, rSpec *spec.Spec, config func (r *Runtime) initContainerVariables(rSpec *spec.Spec, config *ContainerConfig) (c *Container, err error) { if rSpec == nil { - return nil, errors.Wrapf(config2.ErrInvalidArg, "must provide a valid runtime spec to create container") + return nil, errors.Wrapf(define.ErrInvalidArg, "must provide a valid runtime spec to create container") } ctr := new(Container) ctr.config = new(ContainerConfig) @@ -100,7 +100,7 @@ func (r *Runtime) initContainerVariables(rSpec *spec.Spec, config *ContainerConf ctr.state.BindMounts = make(map[string]string) - ctr.config.StopTimeout = config2.CtrRemoveTimeout + ctr.config.StopTimeout = define.CtrRemoveTimeout ctr.config.OCIRuntime = r.defaultOCIRuntime.name @@ -152,7 +152,7 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (c *Contai }() ctr.valid = true - ctr.state.State = config2.ContainerStateConfigured + ctr.state.State = define.ContainerStateConfigured ctr.runtime = r if ctr.config.OCIRuntime == "" { @@ -160,11 +160,18 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (c *Contai } else { ociRuntime, ok := r.ociRuntimes[ctr.config.OCIRuntime] if !ok { - return nil, errors.Wrapf(config2.ErrInvalidArg, "requested OCI runtime %s is not available", ctr.config.OCIRuntime) + return nil, errors.Wrapf(define.ErrInvalidArg, "requested OCI runtime %s is not available", ctr.config.OCIRuntime) } ctr.ociRuntime = ociRuntime } + // Check NoCgroups support + if ctr.config.NoCgroups { + if !ctr.ociRuntime.supportsNoCgroups { + return nil, errors.Wrapf(define.ErrInvalidArg, "requested OCI runtime %s is not compatible with NoCgroups", ctr.ociRuntime.name) + } + } + var pod *Pod if ctr.config.Pod != "" { // Get the pod from state @@ -183,43 +190,67 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (c *Contai ctr.config.Name = name } - // Check CGroup parent sanity, and set it if it was not set - switch r.config.CgroupManager { - case CgroupfsCgroupsManager: - if ctr.config.CgroupParent == "" { - if pod != nil && pod.config.UsePodCgroup { - podCgroup, err := pod.CgroupPath() - if err != nil { - return nil, errors.Wrapf(err, "error retrieving pod %s cgroup", pod.ID()) + // If CGroups are disabled, we MUST create a PID namespace. + // Otherwise, the OCI runtime won't be able to stop our container. + if ctr.config.NoCgroups { + if ctr.config.Spec.Linux == nil { + return nil, errors.Wrapf(define.ErrInvalidArg, "must provide Linux namespace configuration in OCI spec when using NoCgroups") + } + foundPid := false + for _, ns := range ctr.config.Spec.Linux.Namespaces { + if ns.Type == spec.PIDNamespace { + foundPid = true + if ns.Path != "" { + return nil, errors.Wrapf(define.ErrInvalidArg, "containers not creating CGroups must create a private PID namespace - cannot use another") } - if podCgroup == "" { - return nil, errors.Wrapf(config2.ErrInternal, "pod %s cgroup is not set", pod.ID()) + break + } + } + if !foundPid { + return nil, errors.Wrapf(define.ErrInvalidArg, "containers not creating CGroups must create a private PID namespace") + } + } + + // Check CGroup parent sanity, and set it if it was not set. + // Only if we're actually configuring CGroups. + if !ctr.config.NoCgroups { + switch r.config.CgroupManager { + case CgroupfsCgroupsManager: + if ctr.config.CgroupParent == "" { + if pod != nil && pod.config.UsePodCgroup { + podCgroup, err := pod.CgroupPath() + if err != nil { + return nil, errors.Wrapf(err, "error retrieving pod %s cgroup", pod.ID()) + } + if podCgroup == "" { + return nil, errors.Wrapf(define.ErrInternal, "pod %s cgroup is not set", pod.ID()) + } + ctr.config.CgroupParent = podCgroup + } else { + ctr.config.CgroupParent = CgroupfsDefaultCgroupParent } - ctr.config.CgroupParent = podCgroup - } else { - ctr.config.CgroupParent = CgroupfsDefaultCgroupParent + } else if strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") { + return nil, errors.Wrapf(define.ErrInvalidArg, "systemd slice received as cgroup parent when using cgroupfs") } - } else if strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") { - return nil, errors.Wrapf(config2.ErrInvalidArg, "systemd slice received as cgroup parent when using cgroupfs") - } - case SystemdCgroupsManager: - if ctr.config.CgroupParent == "" { - if pod != nil && pod.config.UsePodCgroup { - podCgroup, err := pod.CgroupPath() - if err != nil { - return nil, errors.Wrapf(err, "error retrieving pod %s cgroup", pod.ID()) + case SystemdCgroupsManager: + if ctr.config.CgroupParent == "" { + if pod != nil && pod.config.UsePodCgroup { + podCgroup, err := pod.CgroupPath() + if err != nil { + return nil, errors.Wrapf(err, "error retrieving pod %s cgroup", pod.ID()) + } + ctr.config.CgroupParent = podCgroup + } else if rootless.IsRootless() { + ctr.config.CgroupParent = SystemdDefaultRootlessCgroupParent + } else { + ctr.config.CgroupParent = SystemdDefaultCgroupParent } - ctr.config.CgroupParent = podCgroup - } else if rootless.IsRootless() { - ctr.config.CgroupParent = SystemdDefaultRootlessCgroupParent - } else { - ctr.config.CgroupParent = SystemdDefaultCgroupParent + } else if len(ctr.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") { + return nil, errors.Wrapf(define.ErrInvalidArg, "did not receive systemd slice as cgroup parent when using systemd to manage cgroups") } - } else if len(ctr.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") { - return nil, errors.Wrapf(config2.ErrInvalidArg, "did not receive systemd slice as cgroup parent when using systemd to manage cgroups") + default: + return nil, errors.Wrapf(define.ErrInvalidArg, "unsupported CGroup manager: %s - cannot validate cgroup parent", r.config.CgroupManager) } - default: - return nil, errors.Wrapf(config2.ErrInvalidArg, "unsupported CGroup manager: %s - cannot validate cgroup parent", r.config.CgroupManager) } if ctr.restoreFromCheckpoint { @@ -262,7 +293,7 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (c *Contai ctrNamedVolumes = append(ctrNamedVolumes, dbVol) // The volume exists, we're good continue - } else if errors.Cause(err) != config2.ErrNoSuchVolume { + } else if errors.Cause(err) != define.ErrNoSuchVolume { return nil, errors.Wrapf(err, "error retrieving named volume %s for new container", vol.Name) } @@ -275,10 +306,6 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (c *Contai return nil, errors.Wrapf(err, "error creating named volume %q", vol.Name) } - if err := ctr.copyWithTarFromImage(vol.Dest, newVol.MountPoint()); err != nil && !os.IsNotExist(err) { - return nil, errors.Wrapf(err, "Failed to copy content into new volume mount %q", vol.Name) - } - ctrNamedVolumes = append(ctrNamedVolumes, newVol) } @@ -386,7 +413,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool, } if !r.valid { - return config2.ErrRuntimeStopped + return define.ErrRuntimeStopped } // Update the container to get current state @@ -402,7 +429,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool, } } - if c.state.State == config2.ContainerStatePaused { + if c.state.State == define.ContainerStatePaused { if err := c.ociRuntime.killContainer(c, 9); err != nil { return err } @@ -416,7 +443,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool, } // Check that the container's in a good state to be removed - if c.state.State == config2.ContainerStateRunning { + if c.state.State == define.ContainerStateRunning { if err := c.stop(c.StopTimeout()); err != nil { return errors.Wrapf(err, "cannot remove container %s as it could not be stopped", c.ID()) } @@ -439,7 +466,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool, } if len(deps) != 0 { depsStr := strings.Join(deps, ", ") - return errors.Wrapf(config2.ErrCtrExists, "container %s has dependent containers which must be removed before it: %s", c.ID(), depsStr) + return errors.Wrapf(define.ErrCtrExists, "container %s has dependent containers which must be removed before it: %s", c.ID(), depsStr) } } @@ -483,8 +510,8 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool, // Delete the container. // Not needed in Configured and Exited states, where the container // doesn't exist in the runtime - if c.state.State != config2.ContainerStateConfigured && - c.state.State != config2.ContainerStateExited { + if c.state.State != define.ContainerStateConfigured && + c.state.State != define.ContainerStateExited { if err := c.delete(ctx); err != nil { if cleanupErr == nil { cleanupErr = err @@ -514,7 +541,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool, if !volume.IsCtrSpecific() { continue } - if err := runtime.removeVolume(ctx, volume, false); err != nil && err != config2.ErrNoSuchVolume && err != config2.ErrVolumeBeingUsed { + if err := runtime.removeVolume(ctx, volume, false); err != nil && err != define.ErrNoSuchVolume && err != define.ErrVolumeBeingUsed { logrus.Errorf("cleanup volume (%s): %v", v, err) } } @@ -529,7 +556,7 @@ func (r *Runtime) GetContainer(id string) (*Container, error) { defer r.lock.RUnlock() if !r.valid { - return nil, config2.ErrRuntimeStopped + return nil, define.ErrRuntimeStopped } return r.state.Container(id) @@ -541,7 +568,7 @@ func (r *Runtime) HasContainer(id string) (bool, error) { defer r.lock.RUnlock() if !r.valid { - return false, config2.ErrRuntimeStopped + return false, define.ErrRuntimeStopped } return r.state.HasContainer(id) @@ -554,7 +581,7 @@ func (r *Runtime) LookupContainer(idOrName string) (*Container, error) { defer r.lock.RUnlock() if !r.valid { - return nil, config2.ErrRuntimeStopped + return nil, define.ErrRuntimeStopped } return r.state.LookupContainer(idOrName) } @@ -568,7 +595,7 @@ func (r *Runtime) GetContainers(filters ...ContainerFilter) ([]*Container, error defer r.lock.RUnlock() if !r.valid { - return nil, config2.ErrRuntimeStopped + return nil, define.ErrRuntimeStopped } ctrs, err := r.state.AllContainers() @@ -601,7 +628,7 @@ func (r *Runtime) GetAllContainers() ([]*Container, error) { func (r *Runtime) GetRunningContainers() ([]*Container, error) { running := func(c *Container) bool { state, _ := c.State() - return state == config2.ContainerStateRunning + return state == define.ContainerStateRunning } return r.GetContainers(running) } @@ -629,7 +656,7 @@ func (r *Runtime) GetLatestContainer() (*Container, error) { return nil, errors.Wrapf(err, "unable to find latest container") } if len(ctrs) == 0 { - return nil, config2.ErrNoSuchCtr + return nil, define.ErrNoSuchCtr } for containerIndex, ctr := range ctrs { createdTime := ctr.config.CreatedTime diff --git a/libpod/stats.go b/libpod/stats.go index 776870bd2..5513abce5 100644 --- a/libpod/stats.go +++ b/libpod/stats.go @@ -19,6 +19,10 @@ func (c *Container) GetContainerStats(previousStats *ContainerStats) (*Container stats.ContainerID = c.ID() stats.Name = c.Name() + if c.config.NoCgroups { + return nil, errors.Wrapf(define.ErrNoCgroups, "cannot run top on container %s as it did not create a cgroup", c.ID()) + } + if !c.batched { c.lock.Lock() defer c.lock.Unlock() diff --git a/libpod/volume.go b/libpod/volume.go index b4de3aedc..c4771bbb8 100644 --- a/libpod/volume.go +++ b/libpod/volume.go @@ -57,6 +57,13 @@ type VolumeState struct { // On incrementing from 0, the volume will be mounted on the host. // On decrementing to 0, the volume will be unmounted on the host. MountCount uint `json:"mountCount"` + // NeedsCopyUp indicates that the next time the volume is mounted into + // a container, the container will "copy up" the contents of the + // mountpoint into the volume. + // This should only be done once. As such, this is set at container + // create time, then cleared after the copy up is done and never set + // again. + NeedsCopyUp bool `json:"notYetMounted,omitempty"` } // Name retrieves the volume's name diff --git a/libpod/volume_internal.go b/libpod/volume_internal.go index 2e886e1b0..42b935e7c 100644 --- a/libpod/volume_internal.go +++ b/libpod/volume_internal.go @@ -11,9 +11,11 @@ import ( func newVolume(runtime *Runtime) (*Volume, error) { volume := new(Volume) volume.config = new(VolumeConfig) + volume.state = new(VolumeState) volume.runtime = runtime volume.config.Labels = make(map[string]string) volume.config.Options = make(map[string]string) + volume.state.NeedsCopyUp = true return volume, nil } diff --git a/pkg/spec/createconfig.go b/pkg/spec/createconfig.go index 3f70e5935..c17172016 100644 --- a/pkg/spec/createconfig.go +++ b/pkg/spec/createconfig.go @@ -64,6 +64,7 @@ type CreateConfig struct { CidFile string ConmonPidFile string Cgroupns string + Cgroups string CgroupParent string // cgroup-parent Command []string // Full command that will be used UserCommand []string // User-entered command (or image CMD) @@ -206,6 +207,9 @@ func (c *CreateConfig) getContainerCreateOptions(runtime *libpod.Runtime, pod *l logrus.Debugf("adding container to pod %s", c.Pod) options = append(options, runtime.WithPod(pod)) } + if c.Cgroups == "disabled" { + options = append(options, libpod.WithNoCgroups()) + } if len(c.PortBindings) > 0 { portBindings, err = c.CreatePortBindings() if err != nil { diff --git a/pkg/spec/spec.go b/pkg/spec/spec.go index 44bbda885..38f9c7306 100644 --- a/pkg/spec/spec.go +++ b/pkg/spec/spec.go @@ -396,6 +396,18 @@ func (config *CreateConfig) createConfigToOCISpec(runtime *libpod.Runtime, userM } } + switch config.Cgroups { + case "disabled": + if addedResources { + return nil, errors.New("cannot specify resource limits when cgroups are disabled is specified") + } + configSpec.Linux.Resources = &spec.LinuxResources{} + case "enabled", "": + // Do nothing + default: + return nil, errors.New("unrecognized option for cgroups; supported are 'default' and 'disabled'") + } + // Add annotations if configSpec.Annotations == nil { configSpec.Annotations = make(map[string]string) diff --git a/test/e2e/run_test.go b/test/e2e/run_test.go index 6e102cfa5..4d2cee8e3 100644 --- a/test/e2e/run_test.go +++ b/test/e2e/run_test.go @@ -903,4 +903,75 @@ USER mail` } Expect(found).To(BeTrue()) }) + + It("podman run with cgroups=disabled runs without cgroups", func() { + SkipIfRemote() + SkipIfRootless() + // Only works on crun + if !strings.Contains(podmanTest.OCIRuntime, "crun") { + Skip("Test only works on crun") + } + + curCgroupsBytes, err := ioutil.ReadFile("/proc/self/cgroup") + Expect(err).To(BeNil()) + var curCgroups string = string(curCgroupsBytes) + fmt.Printf("Output:\n%s\n", curCgroups) + Expect(curCgroups).To(Not(Equal(""))) + + ctrName := "testctr" + container := podmanTest.Podman([]string{"run", "--name", ctrName, "-d", "--cgroups=disabled", ALPINE, "top"}) + container.WaitWithDefaultTimeout() + Expect(container.ExitCode()).To(Equal(0)) + + // Get PID and get cgroups of that PID + inspectOut := podmanTest.InspectContainer(ctrName) + Expect(len(inspectOut)).To(Equal(1)) + pid := inspectOut[0].State.Pid + Expect(pid).To(Not(Equal(0))) + Expect(inspectOut[0].HostConfig.CgroupParent).To(Equal("")) + + ctrCgroupsBytes, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/cgroup", pid)) + Expect(err).To(BeNil()) + var ctrCgroups string = string(ctrCgroupsBytes) + fmt.Printf("Output\n:%s\n", ctrCgroups) + Expect(curCgroups).To(Equal(ctrCgroups)) + }) + + It("podman run with cgroups=enabled makes cgroups", func() { + SkipIfRemote() + SkipIfRootless() + // Only works on crun + if !strings.Contains(podmanTest.OCIRuntime, "crun") { + Skip("Test only works on crun") + } + + curCgroupsBytes, err := ioutil.ReadFile("/proc/self/cgroup") + Expect(err).To(BeNil()) + var curCgroups string = string(curCgroupsBytes) + fmt.Printf("Output:\n%s\n", curCgroups) + Expect(curCgroups).To(Not(Equal(""))) + + ctrName := "testctr" + container := podmanTest.Podman([]string{"run", "--name", ctrName, "-d", "--cgroups=enabled", ALPINE, "top"}) + container.WaitWithDefaultTimeout() + Expect(container.ExitCode()).To(Equal(0)) + + // Get PID and get cgroups of that PID + inspectOut := podmanTest.InspectContainer(ctrName) + Expect(len(inspectOut)).To(Equal(1)) + pid := inspectOut[0].State.Pid + Expect(pid).To(Not(Equal(0))) + + ctrCgroupsBytes, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/cgroup", pid)) + Expect(err).To(BeNil()) + var ctrCgroups string = string(ctrCgroupsBytes) + fmt.Printf("Output\n:%s\n", ctrCgroups) + Expect(curCgroups).To(Not(Equal(ctrCgroups))) + }) + + It("podman run with cgroups=garbage errors", func() { + session := podmanTest.Podman([]string{"run", "-d", "--cgroups=garbage", ALPINE, "top"}) + session.WaitWithDefaultTimeout() + Expect(session.ExitCode()).To(Not(Equal(0))) + }) }) diff --git a/test/e2e/run_volume_test.go b/test/e2e/run_volume_test.go index 551e86b93..fc1998ab2 100644 --- a/test/e2e/run_volume_test.go +++ b/test/e2e/run_volume_test.go @@ -249,4 +249,25 @@ var _ = Describe("Podman run with volumes", func() { fmt.Printf("Output: %s", mountOut3) Expect(strings.Contains(mountOut3, volName)).To(BeFalse()) }) + + It("podman named volume copyup", func() { + baselineSession := podmanTest.Podman([]string{"run", "--rm", "-t", "-i", ALPINE, "ls", "/etc/apk/"}) + baselineSession.WaitWithDefaultTimeout() + Expect(baselineSession.ExitCode()).To(Equal(0)) + baselineOutput := baselineSession.OutputToString() + + inlineVolumeSession := podmanTest.Podman([]string{"run", "--rm", "-t", "-i", "-v", "testvol1:/etc/apk", ALPINE, "ls", "/etc/apk/"}) + inlineVolumeSession.WaitWithDefaultTimeout() + Expect(inlineVolumeSession.ExitCode()).To(Equal(0)) + Expect(inlineVolumeSession.OutputToString()).To(Equal(baselineOutput)) + + makeVolumeSession := podmanTest.Podman([]string{"volume", "create", "testvol2"}) + makeVolumeSession.WaitWithDefaultTimeout() + Expect(makeVolumeSession.ExitCode()).To(Equal(0)) + + separateVolumeSession := podmanTest.Podman([]string{"run", "--rm", "-t", "-i", "-v", "testvol2:/etc/apk", ALPINE, "ls", "/etc/apk/"}) + separateVolumeSession.WaitWithDefaultTimeout() + Expect(separateVolumeSession.ExitCode()).To(Equal(0)) + Expect(separateVolumeSession.OutputToString()).To(Equal(baselineOutput)) + }) }) diff --git a/troubleshooting.md b/troubleshooting.md index 9a5b38e01..89c850356 100644 --- a/troubleshooting.md +++ b/troubleshooting.md @@ -146,11 +146,11 @@ If the entry in the Dockerfile looked like: RUN useradd -u 99999000 -g users new ### 7) Permission denied when running Podman commands -When rootless podman attempts to execute a container on a non exec home directory a permission error will be raised. +When rootless Podman attempts to execute a container on a non exec home directory a permission error will be raised. #### Symptom -If you are running podman or buildah on a home directory that is mounted noexec, +If you are running Podman or buildah on a home directory that is mounted noexec, then they will fail. With a message like: ``` @@ -194,11 +194,11 @@ processes to write to the cgroup file system. Turn on this boolean, on SELinux s ### 9) Newuidmap missing when running rootless Podman commands -Rootless podman requires the newuidmap and newgidmap programs to be installed. +Rootless Podman requires the newuidmap and newgidmap programs to be installed. #### Symptom -If you are running podman or buildah as a not root user, you get an error complaining about +If you are running Podman or buildah as a not root user, you get an error complaining about a missing newuidmap executable. ``` @@ -212,7 +212,7 @@ Install a version of shadow-utils that includes these executables. Note RHEL7 a ### 10) rootless setup user: invalid argument -Rootless podman requires the user running it to have a range of UIDs listed in /etc/subuid and /etc/subgid. +Rootless Podman requires the user running it to have a range of UIDs listed in /etc/subuid and /etc/subgid. #### Symptom @@ -262,7 +262,7 @@ grep johndoe /etc/subuid /etc/subgid ### 11) Changing the location of the Graphroot leads to permission denied When I change the graphroot storage location in storage.conf, the next time I -run podman I get an error like: +run Podman I get an error like: ``` # podman run -p 5000:5000 -it centos bash @@ -360,7 +360,7 @@ Choose one of the following: * Setup containers/storage in a different directory, not on an NFS share. * Create a directory on a local file system. * Edit `~/.config/containers/libpod.conf` and point the `volume_path` option to that local directory. - * Otherwise just run podman as root, via `sudo podman` + * Otherwise just run Podman as root, via `sudo podman` ### 15) Rootless 'podman build' fails when using OverlayFS: |