diff options
47 files changed, 852 insertions, 222 deletions
diff --git a/.cirrus.yml b/.cirrus.yml index d41828d5d..39275f6db 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -104,14 +104,12 @@ gating_task: # N/B: entrypoint.sh resets $GOSRC (same as make clean) - '/usr/local/bin/entrypoint.sh install.tools |& ${TIMESTAMP}' - '/usr/local/bin/entrypoint.sh validate |& ${TIMESTAMP}' - - '/usr/local/bin/entrypoint.sh golangci-lint |& ${TIMESTAMP}' # This task builds Podman with different buildtags to ensure the build does # not break. It also verifies all sub-commands have man pages. build_script: - '/usr/local/bin/entrypoint.sh podman |& ${TIMESTAMP}' - 'cd $GOSRC && ./hack/podman-commands.sh |& ${TIMESTAMP}' - - 'cd $GOSRC && ./hack/man-page-checker |& ${TIMESTAMP}' # N/B: need 'clean' so some commited files are re-generated. - '/usr/local/bin/entrypoint.sh clean podman-remote |& ${TIMESTAMP}' - '/usr/local/bin/entrypoint.sh clean podman BUILDTAGS="exclude_graphdriver_devicemapper selinux seccomp" |& ${TIMESTAMP}' @@ -83,7 +83,7 @@ LIBSECCOMP_COMMIT := release-2.3 GINKGOTIMEOUT ?= -timeout=90m RELEASE_VERSION ?= $(shell hack/get_release_info.sh VERSION) -RELEASE_NUMBER ?= $(shell hack/get_release_info.sh NUMBER) +RELEASE_NUMBER ?= $(shell hack/get_release_info.sh NUMBER|sed -e 's/^v\(.*\)/\1/') RELEASE_DIST ?= $(shell hack/get_release_info.sh DIST) RELEASE_DIST_VER ?= $(shell hack/get_release_info.sh DIST_VER) RELEASE_ARCH ?= $(shell hack/get_release_info.sh ARCH) @@ -164,6 +164,10 @@ podman: .gopathok $(PODMAN_VARLINK_DEPENDENCIES) ## Build with podman podman-remote: .gopathok $(PODMAN_VARLINK_DEPENDENCIES) ## Build with podman on remote environment $(GO_BUILD) $(BUILDFLAGS) -gcflags '$(GCFLAGS)' -asmflags '$(ASMFLAGS)' -ldflags '$(LDFLAGS_PODMAN)' -tags "$(BUILDTAGS) remoteclient" -o bin/$@ $(PROJECT)/cmd/podman +.PHONY: podman.msi +podman.msi: podman-remote-windows ## Will always rebuild exe as there is no podman-remote-windows.exe target to verify timestamp + wixl -D VERSION=$(RELEASE_NUMBER) -o bin/podman-v$(RELEASE_NUMBER).msi contrib/msi/podman.wxs + podman-remote-%: .gopathok $(PODMAN_VARLINK_DEPENDENCIES) ## Build podman for a specific GOOS $(eval BINSFX := $(shell test "$*" != "windows" || echo ".exe")) CGO_ENABLED=0 GOOS=$* $(GO_BUILD) -gcflags '$(GCFLAGS)' -asmflags '$(ASMFLAGS)' -ldflags '$(LDFLAGS_PODMAN)' -tags "remoteclient containers_image_openpgp exclude_graphdriver_devicemapper" -o bin/$@$(BINSFX) $(PROJECT)/cmd/podman @@ -315,6 +319,9 @@ docs: $(MANPAGES) ## Generate documentation install-podman-remote-docs: docs @(cd docs; ./podman-remote.sh ./remote) +man-page-check: + ./hack/man-page-checker + # When publishing releases include critical build-time details .PHONY: release.txt release.txt: @@ -325,9 +332,9 @@ release.txt: echo -n " $$field"; done >> "$@" echo "" >> "$@" -podman-$(RELEASE_NUMBER).tar.gz: binaries docs release.txt +podman-v$(RELEASE_NUMBER).tar.gz: binaries docs release.txt $(eval TMPDIR := $(shell mktemp -d -p '' podman_XXXX)) - $(eval SUBDIR := podman-$(RELEASE_NUMBER)) + $(eval SUBDIR := podman-v$(RELEASE_NUMBER)) mkdir -p "$(TMPDIR)/$(SUBDIR)" $(MAKE) install.bin install.man install.cni install.systemd "DESTDIR=$(TMPDIR)/$(SUBDIR)" "PREFIX=/usr" # release.txt location and content depended upon by automated tooling @@ -336,7 +343,7 @@ podman-$(RELEASE_NUMBER).tar.gz: binaries docs release.txt -rm -rf "$(TMPDIR)" # Must call make in-line: Dependency-spec. w/ wild-card also consumes variable value. -podman-remote-$(RELEASE_NUMBER)-%.zip: +podman-remote-v$(RELEASE_NUMBER)-%.zip: $(MAKE) podman-remote-$* install-podman-remote-docs release.txt \ RELEASE_BASENAME=$(shell hack/get_release_info.sh REMOTENAME) \ RELEASE_DIST=$* RELEASE_DIST_VER="-" @@ -361,12 +368,12 @@ podman-remote-$(RELEASE_NUMBER)-%.zip: .PHONY: podman-release podman-release: rm -f release.txt - $(MAKE) podman-$(RELEASE_NUMBER).tar.gz + $(MAKE) podman-v$(RELEASE_NUMBER).tar.gz .PHONY: podman-remote-%-release podman-remote-%-release: rm -f release.txt - $(MAKE) podman-remote-$(RELEASE_NUMBER)-$*.zip + $(MAKE) podman-remote-v$(RELEASE_NUMBER)-$*.zip docker-docs: docs (cd docs; ./dckrman.sh *.1) @@ -505,7 +512,7 @@ validate.completions: completions/bash/podman . completions/bash/podman if [ -x /bin/zsh ]; then /bin/zsh completions/zsh/_podman; fi -validate: gofmt .gitvalidation validate.completions +validate: gofmt .gitvalidation validate.completions golangci-lint man-page-check build-all-new-commits: # Validate that all the commits build on top of $(GIT_BASE_BRANCH) diff --git a/cmd/podman/cliconfig/config.go b/cmd/podman/cliconfig/config.go index 812cc1f51..bf88e853b 100644 --- a/cmd/podman/cliconfig/config.go +++ b/cmd/podman/cliconfig/config.go @@ -518,6 +518,10 @@ type SearchValues struct { TlsVerify bool } +type TrustValues struct { + PodmanCommand +} + type SignValues struct { PodmanCommand Directory string diff --git a/cmd/podman/commands.go b/cmd/podman/commands.go index 77c76d1b7..31f1b3ba4 100644 --- a/cmd/podman/commands.go +++ b/cmd/podman/commands.go @@ -33,6 +33,7 @@ func getMainCommands() []*cobra.Command { func getImageSubCommands() []*cobra.Command { return []*cobra.Command{ _signCommand, + _trustCommand, } } diff --git a/cmd/podman/common.go b/cmd/podman/common.go index 9724d18c6..0115e6ef1 100644 --- a/cmd/podman/common.go +++ b/cmd/podman/common.go @@ -135,6 +135,10 @@ func getCreateFlags(c *cliconfig.PodmanCommand) { "cgroup namespace to use", ) createFlags.String( + "cgroups", "enabled", + "control container cgroup configuration", + ) + createFlags.String( "cgroup-parent", "", "Optional parent cgroup for the container", ) diff --git a/cmd/podman/main_local.go b/cmd/podman/main_local.go index 0feba609b..cad256615 100644 --- a/cmd/podman/main_local.go +++ b/cmd/podman/main_local.go @@ -5,9 +5,12 @@ package main import ( "context" + "fmt" + "io/ioutil" "log/syslog" "os" "runtime/pprof" + "strconv" "strings" "syscall" @@ -18,6 +21,7 @@ import ( "github.com/containers/libpod/pkg/rootless" "github.com/containers/libpod/pkg/tracing" "github.com/containers/libpod/pkg/util" + "github.com/containers/libpod/utils" "github.com/opentracing/opentracing-go" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -119,7 +123,29 @@ func profileOff(cmd *cobra.Command) error { return nil } +func movePauseProcessToScope() error { + pausePidPath, err := util.GetRootlessPauseProcessPidPath() + if err != nil { + return errors.Wrapf(err, "could not get pause process pid file path") + } + + data, err := ioutil.ReadFile(pausePidPath) + if err != nil { + return errors.Wrapf(err, "cannot read pause pid file") + } + pid, err := strconv.ParseUint(string(data), 10, 0) + if err != nil { + return errors.Wrapf(err, "cannot parse pid file %s", pausePidPath) + } + + return utils.RunUnderSystemdScope(int(pid), "user.slice", "podman-pause.scope") +} + func setupRootless(cmd *cobra.Command, args []string) error { + if !rootless.IsRootless() { + return nil + } + matches, err := rootless.ConfigurationMatches() if err != nil { return err @@ -128,9 +154,6 @@ func setupRootless(cmd *cobra.Command, args []string) error { logrus.Warningf("the current user namespace doesn't match the configuration in /etc/subuid or /etc/subgid") logrus.Warningf("you can use `%s system migrate` to recreate the user namespace and restart the containers", os.Args[0]) } - if os.Geteuid() == 0 || cmd == _searchCommand || cmd == _versionCommand || cmd == _mountCommand || cmd == _migrateCommand || strings.HasPrefix(cmd.Use, "help") { - return nil - } podmanCmd := cliconfig.PodmanCommand{ Command: cmd, @@ -139,6 +162,39 @@ func setupRootless(cmd *cobra.Command, args []string) error { Remote: remoteclient, } + runtime, err := libpodruntime.GetRuntime(getContext(), &podmanCmd) + if err != nil { + return errors.Wrapf(err, "could not get runtime") + } + defer runtime.DeferredShutdown(false) + + // do it only after podman has already re-execed and running with uid==0. + if os.Geteuid() == 0 { + ownsCgroup, err := cgroups.UserOwnsCurrentSystemdCgroup() + if err != nil { + return err + } + + if !ownsCgroup { + unitName := fmt.Sprintf("podman-%d.scope", os.Getpid()) + if err := utils.RunUnderSystemdScope(os.Getpid(), "user.slice", unitName); err != nil { + conf, err := runtime.GetConfig() + if err != nil { + return err + } + if conf.CgroupManager == libpod.SystemdCgroupsManager { + logrus.Warnf("Failed to add podman to systemd sandbox cgroup: %v", err) + } else { + logrus.Debugf("Failed to add podman to systemd sandbox cgroup: %v", err) + } + } + } + } + + if os.Geteuid() == 0 || cmd == _searchCommand || cmd == _versionCommand || cmd == _mountCommand || cmd == _migrateCommand || strings.HasPrefix(cmd.Use, "help") { + return nil + } + pausePidPath, err := util.GetRootlessPauseProcessPidPath() if err != nil { return errors.Wrapf(err, "could not get pause process pid file path") @@ -158,13 +214,6 @@ func setupRootless(cmd *cobra.Command, args []string) error { } // if there is no pid file, try to join existing containers, and create a pause process. - - runtime, err := libpodruntime.GetRuntime(getContext(), &podmanCmd) - if err != nil { - return errors.Wrapf(err, "could not get runtime") - } - defer runtime.DeferredShutdown(false) - ctrs, err := runtime.GetRunningContainers() if err != nil { logrus.Errorf(err.Error()) @@ -177,6 +226,17 @@ func setupRootless(cmd *cobra.Command, args []string) error { } became, ret, err := rootless.TryJoinFromFilePaths(pausePidPath, true, paths) + if err := movePauseProcessToScope(); err != nil { + conf, err := runtime.GetConfig() + if err != nil { + return err + } + if conf.CgroupManager == libpod.SystemdCgroupsManager { + logrus.Warnf("Failed to add pause process to systemd sandbox cgroup: %v", err) + } else { + logrus.Debugf("Failed to add pause process to systemd sandbox cgroup: %v", err) + } + } if err != nil { logrus.Errorf(err.Error()) os.Exit(1) diff --git a/cmd/podman/shared/create.go b/cmd/podman/shared/create.go index acbd53dba..fc8197721 100644 --- a/cmd/podman/shared/create.go +++ b/cmd/podman/shared/create.go @@ -695,6 +695,7 @@ func ParseCreateOpts(ctx context.Context, c *GenericCLIResults, runtime *libpod. CapDrop: c.StringSlice("cap-drop"), CidFile: c.String("cidfile"), Cgroupns: c.String("cgroupns"), + Cgroups: c.String("cgroups"), CgroupParent: c.String("cgroup-parent"), Command: command, UserCommand: userCommand, diff --git a/cmd/podman/shared/intermediate.go b/cmd/podman/shared/intermediate.go index 5aaac8687..cccdd1bea 100644 --- a/cmd/podman/shared/intermediate.go +++ b/cmd/podman/shared/intermediate.go @@ -370,6 +370,8 @@ func NewIntermediateLayer(c *cliconfig.PodmanCommand, remote bool) GenericCLIRes m["blkio-weight-device"] = newCRStringSlice(c, "blkio-weight-device") m["cap-add"] = newCRStringSlice(c, "cap-add") m["cap-drop"] = newCRStringSlice(c, "cap-drop") + m["cgroupns"] = newCRString(c, "cgroupns") + m["cgroups"] = newCRString(c, "cgroups") m["cgroup-parent"] = newCRString(c, "cgroup-parent") m["cidfile"] = newCRString(c, "cidfile") m["conmon-pidfile"] = newCRString(c, "conmon-pidfile") diff --git a/cmd/podman/trust.go b/cmd/podman/trust.go index 0a79e1570..f13af96bc 100644 --- a/cmd/podman/trust.go +++ b/cmd/podman/trust.go @@ -6,22 +6,20 @@ import ( ) var ( + trustCommand cliconfig.TrustValues trustDescription = `Manages which registries you trust as a source of container images based on its location. - The location is determined by the transport and the registry host of the image. Using this container image docker://docker.io/library/busybox as an example, docker is the transport and docker.io is the registry host.` - trustCommand = cliconfig.PodmanCommand{ - Command: &cobra.Command{ - Use: "trust", - Short: "Manage container image trust policy", - Long: trustDescription, - RunE: commandRunE(), - }, + _trustCommand = &cobra.Command{ + Use: "trust", + Short: "Manage container image trust policy", + Long: trustDescription, + RunE: commandRunE(), } ) func init() { + trustCommand.Command = _trustCommand trustCommand.SetHelpTemplate(HelpTemplate()) trustCommand.SetUsageTemplate(UsageTemplate()) trustCommand.AddCommand(getTrustSubCommands()...) - imageCommand.AddCommand(trustCommand.Command) } diff --git a/contrib/cirrus/container_test.sh b/contrib/cirrus/container_test.sh index 27baf0ad7..9d3f09f60 100644 --- a/contrib/cirrus/container_test.sh +++ b/contrib/cirrus/container_test.sh @@ -126,6 +126,7 @@ if [ $install -eq 1 ]; then make TAGS="${TAGS}" install.bin PREFIX=/usr ETCDIR=/etc make TAGS="${TAGS}" install.man PREFIX=/usr ETCDIR=/etc make TAGS="${TAGS}" install.cni PREFIX=/usr ETCDIR=/etc + make TAGS="${TAGS}" install.config PREFIX=/usr ETCDIR=/etc make TAGS="${TAGS}" install.systemd PREFIX=/usr ETCDIR=/etc fi diff --git a/contrib/cirrus/integration_test.sh b/contrib/cirrus/integration_test.sh index 552f2ba73..00c3b0ec3 100755 --- a/contrib/cirrus/integration_test.sh +++ b/contrib/cirrus/integration_test.sh @@ -45,6 +45,7 @@ case "$SPECIALMODE" in export OCI_RUNTIME=/usr/bin/crun make make install PREFIX=/usr ETCDIR=/etc + make install.config PREFIX=/usr make test-binaries make local${TESTSUITE} ;; @@ -57,6 +58,7 @@ case "$SPECIALMODE" in none) make make install PREFIX=/usr ETCDIR=/etc + make install.config PREFIX=/usr make test-binaries if [[ "$TEST_REMOTE_CLIENT" == "true" ]] then diff --git a/contrib/cirrus/setup_environment.sh b/contrib/cirrus/setup_environment.sh index 7c7659169..c1a01d6ab 100755 --- a/contrib/cirrus/setup_environment.sh +++ b/contrib/cirrus/setup_environment.sh @@ -50,9 +50,7 @@ case "${OS_REL_VER}" in bash "$SCRIPT_BASE/add_second_partition.sh"; fi ;; centos-7) # Current VM is an image-builder-image no local podman/testing - echo "No further setup required for VM image building" - # All SELinux distros need this for systemd-in-a-container - setsebool container_manage_cgroup true + echo "No further setup required for VM image building" exit 0 ;; *) bad_os_id_ver ;; diff --git a/contrib/msi/podman-logo.ico b/contrib/msi/podman-logo.ico Binary files differnew file mode 100644 index 000000000..cb1dab6a7 --- /dev/null +++ b/contrib/msi/podman-logo.ico diff --git a/contrib/msi/podman.bat b/contrib/msi/podman.bat new file mode 100644 index 000000000..091c1c4c4 --- /dev/null +++ b/contrib/msi/podman.bat @@ -0,0 +1,43 @@ +@echo off +setlocal enableextensions + +title Podman + +:: If remote-host is given on command line -- use it +setlocal enabledelayedexpansion +for %%a in (%*) do ( + echo "%%a" |find "--remote-host" >NUL + if !errorlevel! == 0 ( + goto run_podman + ) +) + +:: If PODMAN_VARLINK_BRIDGE is set -- use it +if defined PODMAN_VARLINK_BRIDGE ( + goto run_podman +) + +:: If the configuration file exists -- use it +set config_home=%USERPROFILE%\AppData\podman +set config_file=%config_home%\podman-remote.conf +if exist "%config_file%" ( + goto run_podman +) + +:: Get connection information from user and build configuration file +md "%config_home%" +set /p host="Please enter the remote hosts name or IP address: " +set /p user="Please enter the remote user name: " +( + echo [connections] + echo [connections."%host%"] + echo destination = "%host%" + echo username = "%user%" + echo default = true +) >"%config_file%" + +:run_podman +endlocal +podman-remote-windows.exe %* + +:End diff --git a/contrib/msi/podman.wxs b/contrib/msi/podman.wxs new file mode 100644 index 000000000..77c6e2815 --- /dev/null +++ b/contrib/msi/podman.wxs @@ -0,0 +1,48 @@ +<?xml version="1.0" encoding="utf-8"?> +<Wix xmlns="http://schemas.microsoft.com/wix/2006/wi"> + + <?ifndef var.VERSION?> + <?error VERSION must be defined via command line argument?> + <?endif?> + + <Product Name="Podman $(var.VERSION)" Id="*" UpgradeCode="696BAB5D-CA1F-4B05-B123-320F245B8D6D" Version="$(var.VERSION)" Language="1033" Manufacturer="Red Hat Inc."> + + <Package Id="*" Keywords="Installer" Description="Red Hat's Podman $(var.VERSION) Installer" Comments="Apache 2.0 License" Manufacturer="Red Hat Inc." InstallScope="perMachine" InstallerVersion="100" Compressed="yes"/> + <Media Id="1" Cabinet="Podman.cab" EmbedCab="yes"/> + <Property Id="DiskPrompt" Value="Red Hat's Podman $(var.VERSION) Installation"/> + + <Directory Id="TARGETDIR" Name="SourceDir"> + + <Directory Id="ProgramFilesFolder" Name="PFiles"> + <Directory Id="RedHatPFiles" Name="RedHat"> + <Directory Id="INSTALLDIR" Name="Podman"> + <Component Id="INSTALLDIR_Component" Guid="14B310C4-9B5D-4DA5-ADF9-B9D008E4CD82"> + <CreateFolder/> + </Component> + <Component Id="MainExecutable" Guid="73752F94-6589-4C7B-ABED-39D655A19714"> + <File Id="520C6E17-77A2-4F41-9611-30FA763A0702" Name="podman-remote-windows.exe" Source="bin/podman-remote-windows.exe"/> + <File Id="A14218A0-4180-44AC-B109-7C63B3099DCA" Name="podman.bat" Source="podman.bat" KeyPath="yes"/> + </Component> + </Directory> + </Directory> + </Directory> + </Directory> + + <Property Id="setx" Value="setx.exe"/> + <CustomAction Id="ChangePath" ExeCommand='PATH "%PATH%;[INSTALLDIR]"' Property="setx" Execute="deferred" Impersonate="yes" Return="check"/> + + <Feature Id="Complete" Level="1"> + <ComponentRef Id="INSTALLDIR_Component"/> + <ComponentRef Id="MainExecutable"/> + </Feature> + + <Icon Id="podman.ico" SourceFile="contrib/msi/podman-logo.ico"/> + <Property Id="ARPPRODUCTICON" Value="podman.ico"/> + + <InstallExecuteSequence> + <RemoveExistingProducts Before="InstallInitialize"/> + <Custom Action="ChangePath" After="InstallServices">NOT Installed</Custom> + </InstallExecuteSequence> + + </Product> +</Wix> diff --git a/docs/podman-create.1.md b/docs/podman-create.1.md index 8a0334765..996ef3863 100644 --- a/docs/podman-create.1.md +++ b/docs/podman-create.1.md @@ -73,6 +73,12 @@ Set the cgroup namespace mode for the container, by default **host** is used. **private**: create a new cgroup namespace. **ns:<PATH>**: join the namespace at the specified path. +**--cgroups**=*mode* + +Determines whether the container will create CGroups. +Valid values are *enabled* and *disabled*, which the default being *enabled*. +The *disabled* option will force the container to not create CGroups, and thus conflicts with CGroup options (**--cgroupns** and **--cgroup-parent**). + **--cgroup-parent**=*path* Path to cgroups under which the cgroup for the container will be created. If the path is not absolute, the path is considered to be relative to the cgroups path of the init process. Cgroups will be created if they do not already exist. diff --git a/docs/podman-image-trust.1.md b/docs/podman-image-trust.1.md index b35e883d7..3fe4f7f52 100644 --- a/docs/podman-image-trust.1.md +++ b/docs/podman-image-trust.1.md @@ -8,7 +8,9 @@ podman\-image\-trust - Manage container registry image trust policy **podman image trust** set|show [*options*] *registry[/repository]* ## DESCRIPTION -Manages which registries you trust as a source of container images based on its location. The location is determined +Manages which registries you trust as a source of container images based on its location. (Not available for remote commands) + +The location is determined by the transport and the registry host of the image. Using this container image `docker://docker.io/library/busybox` as an example, `docker` is the transport and `docker.io` is the registry host. diff --git a/docs/podman-run.1.md b/docs/podman-run.1.md index 8f46e8f22..0dbd4ea6f 100644 --- a/docs/podman-run.1.md +++ b/docs/podman-run.1.md @@ -87,6 +87,12 @@ Set the cgroup namespace mode for the container, by default **host** is used. **private**: create a new cgroup namespace. **ns:<PATH>**: join the namespace at the specified path. +**--cgroups**=*mode* + +Determines whether the container will create CGroups. +Valid values are *enabled* and *disabled*, which the default being *enabled*. +The *disabled* option will force the container to not create CGroups, and thus conflicts with CGroup options (**--cgroupns** and **--cgroup-parent**). + **--cgroup-parent**=*cgroup* Path to cgroups under which the cgroup for the container will be created. If the path is not absolute, the path is considered to be relative to the cgroups path of the init process. Cgroups will be created if they do not already exist. diff --git a/docs/tutorials/rootless_tutorial.md b/docs/tutorials/rootless_tutorial.md index c98e74c96..ed700485a 100644 --- a/docs/tutorials/rootless_tutorial.md +++ b/docs/tutorials/rootless_tutorial.md @@ -4,6 +4,17 @@ Prior to allowing users without root privileges to run Podman, the administrator must install or build Podman and complete the following configurations. +## cgroup V2 support + +The cgroup V2 Linux kernel feature allows the user to limit the amount of resources a rootless container can use. If the Linux distribution that you are running Podman on is enabled with cgroup V2 then you might need to change the default OCI Runtime. The default runtime `runc` does not currently work with cgroup V2 enabled systems, so you have to switch to the alternative OCI runtime `crun`. + +The alternative OCI runtime support for cgroup V2 can be turned on at the command line by using the `--runtime` option: + +``` +sudo podman --runtime /usr/bin/crun +``` +or by changing the value for the "Default OCI runtime" in the libpod.conf file either at the system level or at the [#user-configuration-files](user level) from `runtime = "runc"` to `runtime = "crun"`. + ## Administrator Actions ### Installing Podman @@ -22,7 +33,7 @@ The [slirp4netns](https://github.com/rootless-containers/slirp4netns) package pr When using Podman in a rootless environment, it is recommended to use fuse-overlayfs rather than the VFS file system. Installing the fuse3-devel package gives Podman the dependencies it needs to install, build and use fuse-overlayfs in a rootless environment for you. The fuse-overlayfs project is also available from [GitHub](https://github.com/containers/fuse-overlayfs). This especially needs to be checked on Ubuntu distributions as fuse-overlayfs is not generally installed by default. -If podman is installed before fuse-overlayfs, it may be necessary to change the `driver` option under `[storage]` to `"overlay"`. +If Podman is installed before fuse-overlayfs, it may be necessary to change the `driver` option under `[storage]` to `"overlay"`. ### Enable user namespaces (on RHEL7 machines) @@ -48,7 +59,7 @@ The format of this file is USERNAME:UID:RANGE This means the user johndoe is allocated UIDS 100000-165535 as well as their standard UID in the /etc/passwd file. NOTE: this is not currently supported with network installs. These files must be available locally to the host machine. It is not possible to configure this with LDAP or Active Directory. -If you update either the /etc/subuid or the /etc/subgid file, you need to stop all the running containers owned by the user and kill the pause process that is running on the system for that user. This can be done automatically by using the [`podman system migrate`](https://github.com/containers/libpod/blob/master/docs/podman-system-migrate.1.md) command which will stop all the containers for the user and will kill the pause process. +If you update either the /etc/subuid or the /etc/subgid file, you need to stop all the running containers owned by the user and kill the pause process that is running on the system for that user. This can be done automatically by using the `[podman system migrate](https://github.com/containers/libpod/blob/master/docs/podman-system-migrate.1.md)` command which will stop all the containers for the user and will kill the pause process. Rather than updating the files directly, the usermod program can be used to assign UIDs and GIDs to a user. @@ -78,7 +89,7 @@ Once the Administrator has completed the setup on the machine and then the confi ### User Configuration Files. -The Podman configuration files for root reside in /usr/share/containers with overrides in /etc/containers. In the rootless environment they reside in ${XDG\_CONFIG\_HOME}/containers and are owned by each individual user. The user can modify these files as they wish. +The Podman configuration files for root reside in /usr/share/containers with overrides in /etc/containers. In the rootless environment they reside in ${XDG\_CONFIG\_HOME}/containers and are owned by each individual user. The main files are libpod.conf and storage.conf and the user can modify these files as they wish. The default authorization file used by the `podman login` and `podman logout` commands reside in ${XDG\_RUNTIME\_DIR}/containers/auth.json. @@ -89,7 +100,6 @@ The default authorization file used by the `podman login` and `podman logout` co Description=nginx Requires=user@1001.service After=user@1001.service - [Service] Type=simple KillMode=none @@ -101,7 +111,6 @@ ExecStop=/usr/bin/podman stop nginx Restart=always User=nginx Group=nginx - [Install] WantedBy=multi-user.target ``` diff --git a/libpod.conf b/libpod.conf index 81fece5d2..47f66ecc1 100644 --- a/libpod.conf +++ b/libpod.conf @@ -122,6 +122,10 @@ runtime = "runc" # libpod will use it for reporting nicer errors. runtime_supports_json = ["crun", "runc"] +# List of all the OCI runtimes that support --cgroup-manager=disable to disable +# creation of CGroups for containers. +runtime_supports_nocgroups = ["crun"] + # Paths to look for a valid OCI runtime (runc, runv, etc) # If the paths are empty or no valid path was found, then the `$PATH` # environment variable will be used as the fallback. diff --git a/libpod/boltdb_state.go b/libpod/boltdb_state.go index 4e7f78f13..a6fd9a7d8 100644 --- a/libpod/boltdb_state.go +++ b/libpod/boltdb_state.go @@ -1749,6 +1749,7 @@ func (s *BoltState) LookupVolume(name string) (*Volume, error) { volume := new(Volume) volume.config = new(VolumeConfig) + volume.state = new(VolumeState) db, err := s.getDBCon() if err != nil { diff --git a/libpod/container.go b/libpod/container.go index 9c01d2adf..3d8e58375 100644 --- a/libpod/container.go +++ b/libpod/container.go @@ -356,6 +356,9 @@ type ContainerConfig struct { StopTimeout uint `json:"stopTimeout,omitempty"` // Time container was created CreatedTime time.Time `json:"createdTime"` + // NoCgroups indicates that the container will not create CGroups. It is + // incompatible with CgroupParent. + NoCgroups bool `json:"noCgroups,omitempty"` // Cgroup parent of the container CgroupParent string `json:"cgroupParent"` // LogPath log location diff --git a/libpod/container_inspect.go b/libpod/container_inspect.go index 1b6dd829c..3c32a2f45 100644 --- a/libpod/container_inspect.go +++ b/libpod/container_inspect.go @@ -268,6 +268,11 @@ type InspectContainerHostConfig struct { // populated. // TODO. Cgroup string `json:"Cgroup"` + // Cgroups contains the container's CGroup mode. + // Allowed values are "default" (container is creating CGroups) and + // "disabled" (container is not creating CGroups). + // This is Libpod-specific and not included in `docker inspect`. + Cgroups string `json:"Cgroups"` // Links is unused, and provided purely for Docker compatibility. Links []string `json:"Links"` // OOMScoreAdj is an adjustment that will be made to the container's OOM @@ -958,6 +963,11 @@ func (c *Container) generateInspectContainerHostConfig(ctrSpec *spec.Spec, named restartPolicy.Name = c.config.RestartPolicy restartPolicy.MaximumRetryCount = c.config.RestartRetries hostConfig.RestartPolicy = restartPolicy + if c.config.NoCgroups { + hostConfig.Cgroups = "disabled" + } else { + hostConfig.Cgroups = "default" + } hostConfig.Dns = make([]string, 0, len(c.config.DNSServer)) for _, dns := range c.config.DNSServer { diff --git a/libpod/container_internal.go b/libpod/container_internal.go index ffc6c11ee..ac565fdad 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -21,6 +21,7 @@ import ( "github.com/containers/storage" "github.com/containers/storage/pkg/archive" "github.com/containers/storage/pkg/mount" + "github.com/cyphar/filepath-securejoin" spec "github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/runtime-tools/generate" "github.com/opencontainers/selinux/go-selinux/label" @@ -1119,6 +1120,10 @@ func (c *Container) stop(timeout uint) error { // Internal, non-locking function to pause a container func (c *Container) pause() error { + if c.config.NoCgroups { + return errors.Wrapf(define.ErrNoCgroups, "cannot pause without using CGroups") + } + if err := c.ociRuntime.pauseContainer(c); err != nil { return err } @@ -1132,6 +1137,10 @@ func (c *Container) pause() error { // Internal, non-locking function to unpause a container func (c *Container) unpause() error { + if c.config.NoCgroups { + return errors.Wrapf(define.ErrNoCgroups, "cannot unpause without using CGroups") + } + if err := c.ociRuntime.unpauseContainer(c); err != nil { return err } @@ -1234,43 +1243,82 @@ func (c *Container) mountStorage() (_ string, Err error) { }() } + // We need to mount the container before volumes - to ensure the copyup + // works properly. + mountPoint := c.config.Rootfs + if mountPoint == "" { + mountPoint, err = c.mount() + if err != nil { + return "", err + } + defer func() { + if Err != nil { + if err := c.unmount(false); err != nil { + logrus.Errorf("Error unmounting container %s after mount error: %v", c.ID(), err) + } + } + }() + } + // Request a mount of all named volumes for _, v := range c.config.NamedVolumes { - vol, err := c.runtime.state.Volume(v.Name) + vol, err := c.mountNamedVolume(v, mountPoint) if err != nil { - return "", errors.Wrapf(err, "error retrieving named volume %s for container %s", v.Name, c.ID()) + return "", err } - - if vol.needsMount() { + defer func() { + if Err == nil { + return + } vol.lock.Lock() - if err := vol.mount(); err != nil { - vol.lock.Unlock() - return "", errors.Wrapf(err, "error mounting volume %s for container %s", vol.Name(), c.ID()) + if err := vol.unmount(false); err != nil { + logrus.Errorf("Error unmounting volume %s after error mounting container %s: %v", vol.Name(), c.ID(), err) } vol.lock.Unlock() - defer func() { - if Err == nil { - return - } - vol.lock.Lock() - if err := vol.unmount(false); err != nil { - logrus.Errorf("Error unmounting volume %s after error mounting container %s: %v", vol.Name(), c.ID(), err) - } - vol.lock.Unlock() - }() - } + }() } - // TODO: generalize this mount code so it will mount every mount in ctr.config.Mounts - mountPoint := c.config.Rootfs - if mountPoint == "" { - mountPoint, err = c.mount() - if err != nil { - return "", err + return mountPoint, nil +} + +// Mount a single named volume into the container. +// If necessary, copy up image contents into the volume. +// Does not verify that the name volume given is actually present in container +// config. +// Returns the volume that was mounted. +func (c *Container) mountNamedVolume(v *ContainerNamedVolume, mountpoint string) (*Volume, error) { + vol, err := c.runtime.state.Volume(v.Name) + if err != nil { + return nil, errors.Wrapf(err, "error retrieving named volume %s for container %s", v.Name, c.ID()) + } + + vol.lock.Lock() + defer vol.lock.Unlock() + if vol.needsMount() { + if err := vol.mount(); err != nil { + return nil, errors.Wrapf(err, "error mounting volume %s for container %s", vol.Name(), c.ID()) } } + // The volume may need a copy-up. Check the state. + if err := vol.update(); err != nil { + return nil, err + } + if vol.state.NeedsCopyUp { + logrus.Debugf("Copying up contents from container %s to volume %s", c.ID(), vol.Name()) + srcDir, err := securejoin.SecureJoin(mountpoint, v.Dest) + if err != nil { + return nil, errors.Wrapf(err, "error calculating destination path to copy up container %s volume %s", c.ID(), vol.Name()) + } + if err := c.copyWithTarFromImage(srcDir, vol.MountPoint()); err != nil && !os.IsNotExist(err) { + return nil, errors.Wrapf(err, "error copying content from container %s into volume %s", c.ID(), vol.Name()) + } - return mountPoint, nil + vol.state.NeedsCopyUp = false + if err := vol.save(); err != nil { + return nil, err + } + } + return vol, nil } // cleanupStorage unmounts and cleans up the container's root filesystem @@ -1614,15 +1662,11 @@ func (c *Container) unmount(force bool) error { } // this should be from chrootarchive. -func (c *Container) copyWithTarFromImage(src, dest string) error { - mountpoint, err := c.mount() - if err != nil { - return err - } +// Container MUST be mounted before calling. +func (c *Container) copyWithTarFromImage(source, dest string) error { a := archive.NewDefaultArchiver() - source := filepath.Join(mountpoint, src) - if err = c.copyOwnerAndPerms(source, dest); err != nil { + if err := c.copyOwnerAndPerms(source, dest); err != nil { return err } return a.CopyWithTar(source, dest) diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index e96af8536..4bbbef5db 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -21,7 +21,7 @@ import ( "github.com/containernetworking/plugins/pkg/ns" "github.com/containers/buildah/pkg/secrets" "github.com/containers/libpod/libpod/define" - crioAnnotations "github.com/containers/libpod/pkg/annotations" + "github.com/containers/libpod/pkg/annotations" "github.com/containers/libpod/pkg/apparmor" "github.com/containers/libpod/pkg/cgroups" "github.com/containers/libpod/pkg/criu" @@ -115,7 +115,9 @@ func (c *Container) prepare() (Err error) { createErr = createNetNSErr } if mountStorageErr != nil { - logrus.Errorf("Error preparing container %s: %v", c.ID(), createErr) + if createErr != nil { + logrus.Errorf("Error preparing container %s: %v", c.ID(), createErr) + } createErr = mountStorageErr } @@ -347,9 +349,13 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) { } g.SetRootPath(c.state.Mountpoint) - g.AddAnnotation(crioAnnotations.Created, c.config.CreatedTime.Format(time.RFC3339Nano)) + g.AddAnnotation(annotations.Created, c.config.CreatedTime.Format(time.RFC3339Nano)) g.AddAnnotation("org.opencontainers.image.stopSignal", fmt.Sprintf("%d", c.config.StopSignal)) + if _, exists := g.Config.Annotations[annotations.ContainerManager]; !exists { + g.AddAnnotation(annotations.ContainerManager, annotations.ContainerManagerLibpod) + } + for _, i := range c.config.Spec.Linux.Namespaces { if i.Type == spec.UTSNamespace { hostname := c.Hostname() @@ -375,7 +381,7 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) { if err != nil { return nil, err } - if rootless.IsRootless() && !unified { + if (rootless.IsRootless() && !unified) || c.config.NoCgroups { g.SetLinuxCgroupsPath("") } else if c.runtime.config.CgroupManager == SystemdCgroupsManager { // When runc is set to use Systemd as a cgroup manager, it @@ -485,12 +491,29 @@ func (c *Container) setupSystemd(mounts []spec.Mount, g generate.Generator) erro if unified { g.RemoveMount("/sys/fs/cgroup") - sourcePath := filepath.Join("/sys/fs/cgroup") - systemdMnt := spec.Mount{ - Destination: "/sys/fs/cgroup", - Type: "bind", - Source: sourcePath, - Options: []string{"bind", "private", "rw"}, + hasCgroupNs := false + for _, ns := range c.config.Spec.Linux.Namespaces { + if ns.Type == spec.CgroupNamespace { + hasCgroupNs = true + break + } + } + + var systemdMnt spec.Mount + if hasCgroupNs { + systemdMnt = spec.Mount{ + Destination: "/sys/fs/cgroup", + Type: "cgroup", + Source: "cgroup", + Options: []string{"private", "rw"}, + } + } else { + systemdMnt = spec.Mount{ + Destination: "/sys/fs/cgroup", + Type: "bind", + Source: "/sys/fs/cgroup", + Options: []string{"bind", "private", "rw"}, + } } g.AddMount(systemdMnt) } else { diff --git a/libpod/container_top_linux.go b/libpod/container_top_linux.go index ce471838d..5f4f28130 100644 --- a/libpod/container_top_linux.go +++ b/libpod/container_top_linux.go @@ -15,6 +15,10 @@ import ( // Top gathers statistics about the running processes in a container. It returns a // []string for output func (c *Container) Top(descriptors []string) ([]string, error) { + if c.config.NoCgroups { + return nil, errors.Wrapf(define.ErrNoCgroups, "cannot run top on container %s as it did not create a cgroup", c.ID()) + } + conStat, err := c.State() if err != nil { return nil, errors.Wrapf(err, "unable to look up state for %s", c.ID()) diff --git a/libpod/define/errors.go b/libpod/define/errors.go index 9d532263c..004acd58f 100644 --- a/libpod/define/errors.go +++ b/libpod/define/errors.go @@ -61,6 +61,10 @@ var ( // the user. ErrDetach = utils.ErrDetach + // ErrNoCgroups indicates that the container does not have its own + // CGroup. + ErrNoCgroups = errors.New("this container does not have a cgroup") + // ErrRuntimeStopped indicates that the runtime has already been shut // down and no further operations can be performed on it ErrRuntimeStopped = errors.New("runtime has already been stopped") diff --git a/libpod/image/prune.go b/libpod/image/prune.go index 6ef5d321f..006cbdf22 100644 --- a/libpod/image/prune.go +++ b/libpod/image/prune.go @@ -4,7 +4,9 @@ import ( "context" "github.com/containers/libpod/libpod/events" + "github.com/containers/storage" "github.com/pkg/errors" + "github.com/sirupsen/logrus" ) // GetPruneImages returns a slice of images that have no names/unused @@ -44,6 +46,10 @@ func (ir *Runtime) PruneImages(ctx context.Context, all bool) ([]string, error) } for _, p := range pruneImages { if err := p.Remove(ctx, true); err != nil { + if errors.Cause(err) == storage.ErrImageUsedByContainer { + logrus.Warnf("Failed to prune image %s as it is in use: %v", p.ID(), err) + continue + } return nil, errors.Wrap(err, "failed to prune image") } defer p.newImageEvent(events.Prune) diff --git a/libpod/oci.go b/libpod/oci.go index 8a873ca5b..9879fa90e 100644 --- a/libpod/oci.go +++ b/libpod/oci.go @@ -48,19 +48,20 @@ const ( // OCIRuntime represents an OCI-compatible runtime that libpod can call into // to perform container operations type OCIRuntime struct { - name string - path string - conmonPath string - conmonEnv []string - cgroupManager string - tmpDir string - exitsDir string - socketsDir string - logSizeMax int64 - noPivot bool - reservePorts bool - supportsJSON bool - sdNotify bool + name string + path string + conmonPath string + conmonEnv []string + cgroupManager string + tmpDir string + exitsDir string + socketsDir string + logSizeMax int64 + noPivot bool + reservePorts bool + supportsJSON bool + supportsNoCgroups bool + sdNotify bool } // ociError is used to parse the OCI runtime JSON log. It is not part of the @@ -73,7 +74,7 @@ type ociError struct { // Make a new OCI runtime with provided options. // The first path that points to a valid executable will be used. -func newOCIRuntime(name string, paths []string, conmonPath string, runtimeCfg *RuntimeConfig, supportsJSON bool) (*OCIRuntime, error) { +func newOCIRuntime(name string, paths []string, conmonPath string, runtimeCfg *RuntimeConfig, supportsJSON, supportsNoCgroups bool) (*OCIRuntime, error) { if name == "" { return nil, errors.Wrapf(define.ErrInvalidArg, "the OCI runtime must be provided a non-empty name") } @@ -93,6 +94,7 @@ func newOCIRuntime(name string, paths []string, conmonPath string, runtimeCfg *R // TODO: probe OCI runtime for feature and enable automatically if // available. runtime.supportsJSON = supportsJSON + runtime.supportsNoCgroups = supportsNoCgroups foundPath := false for _, path := range paths { diff --git a/libpod/oci_internal_linux.go b/libpod/oci_internal_linux.go index 48b7370e0..4df1e4010 100644 --- a/libpod/oci_internal_linux.go +++ b/libpod/oci_internal_linux.go @@ -21,6 +21,7 @@ import ( "github.com/containers/libpod/pkg/cgroups" "github.com/containers/libpod/pkg/errorhandling" "github.com/containers/libpod/pkg/lookup" + "github.com/containers/libpod/pkg/rootless" "github.com/containers/libpod/pkg/util" "github.com/containers/libpod/utils" "github.com/coreos/go-systemd/activation" @@ -263,7 +264,7 @@ func (r *OCIRuntime) configureConmonEnv(runtimeDir string) ([]string, []*os.File func (r *OCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, exitDir, ociLogPath string) []string { // set the conmon API version to be able to use the correct sync struct keys args := []string{"--api-version", "1"} - if r.cgroupManager == SystemdCgroupsManager { + if r.cgroupManager == SystemdCgroupsManager && !ctr.config.NoCgroups { args = append(args, "-s") } args = append(args, "-c", ctr.ID()) @@ -307,6 +308,10 @@ func (r *OCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath if ociLogPath != "" { args = append(args, "--runtime-arg", "--log-format=json", "--runtime-arg", "--log", fmt.Sprintf("--runtime-arg=%s", ociLogPath)) } + if ctr.config.NoCgroups { + logrus.Debugf("Running with no CGroups") + args = append(args, "--runtime-arg", "--cgroup-manager", "--runtime-arg", "disabled") + } return args } @@ -355,30 +360,46 @@ func startCommandGivenSelinux(cmd *exec.Cmd) error { // moveConmonToCgroupAndSignal gets a container's cgroupParent and moves the conmon process to that cgroup // it then signals for conmon to start by sending nonse data down the start fd func (r *OCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec.Cmd, startFd *os.File, uuid string) error { - cgroupParent := ctr.CgroupParent() - if r.cgroupManager == SystemdCgroupsManager { - unitName := createUnitName("libpod-conmon", ctr.ID()) - - realCgroupParent := cgroupParent - splitParent := strings.Split(cgroupParent, "/") - if strings.HasSuffix(cgroupParent, ".slice") && len(splitParent) > 1 { - realCgroupParent = splitParent[len(splitParent)-1] - } + mustCreateCgroup := true + // If cgroup creation is disabled - just signal. + if ctr.config.NoCgroups { + mustCreateCgroup = false + } - logrus.Infof("Running conmon under slice %s and unitName %s", realCgroupParent, unitName) - if err := utils.RunUnderSystemdScope(cmd.Process.Pid, realCgroupParent, unitName); err != nil { - logrus.Warnf("Failed to add conmon to systemd sandbox cgroup: %v", err) - } - } else { - cgroupPath := filepath.Join(ctr.config.CgroupParent, "conmon") - control, err := cgroups.New(cgroupPath, &spec.LinuxResources{}) + if rootless.IsRootless() { + ownsCgroup, err := cgroups.UserOwnsCurrentSystemdCgroup() if err != nil { - logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err) + return err + } + mustCreateCgroup = !ownsCgroup + } + + if mustCreateCgroup { + cgroupParent := ctr.CgroupParent() + if r.cgroupManager == SystemdCgroupsManager { + unitName := createUnitName("libpod-conmon", ctr.ID()) + + realCgroupParent := cgroupParent + splitParent := strings.Split(cgroupParent, "/") + if strings.HasSuffix(cgroupParent, ".slice") && len(splitParent) > 1 { + realCgroupParent = splitParent[len(splitParent)-1] + } + + logrus.Infof("Running conmon under slice %s and unitName %s", realCgroupParent, unitName) + if err := utils.RunUnderSystemdScope(cmd.Process.Pid, realCgroupParent, unitName); err != nil { + logrus.Warnf("Failed to add conmon to systemd sandbox cgroup: %v", err) + } } else { - // we need to remove this defer and delete the cgroup once conmon exits - // maybe need a conmon monitor? - if err := control.AddPid(cmd.Process.Pid); err != nil { + cgroupPath := filepath.Join(ctr.config.CgroupParent, "conmon") + control, err := cgroups.New(cgroupPath, &spec.LinuxResources{}) + if err != nil { logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err) + } else { + // we need to remove this defer and delete the cgroup once conmon exits + // maybe need a conmon monitor? + if err := control.AddPid(cmd.Process.Pid); err != nil { + logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err) + } } } } diff --git a/libpod/oci_linux.go b/libpod/oci_linux.go index 6dba1260c..091b6d155 100644 --- a/libpod/oci_linux.go +++ b/libpod/oci_linux.go @@ -402,10 +402,12 @@ func (r *OCIRuntime) stopContainer(ctr *Container, timeout uint) error { } var args []string - if rootless.IsRootless() { + if rootless.IsRootless() || ctr.config.NoCgroups { // we don't use --all for rootless containers as the OCI runtime might use // the cgroups to determine the PIDs, but for rootless containers there is // not any. + // Same logic for NoCgroups - we can't use cgroups as the user + // explicitly requested none be created. args = []string{"kill", ctr.ID(), "KILL"} } else { args = []string{"kill", "--all", ctr.ID(), "KILL"} diff --git a/libpod/options.go b/libpod/options.go index 6df1ca5be..d28cb3d8c 100644 --- a/libpod/options.go +++ b/libpod/options.go @@ -847,6 +847,10 @@ func WithPIDNSFrom(nsCtr *Container) CtrCreateOption { return errors.Wrapf(define.ErrInvalidArg, "container has joined pod %s and dependency container %s is not a member of the pod", ctr.config.Pod, nsCtr.ID()) } + if ctr.config.NoCgroups { + return errors.Wrapf(define.ErrInvalidArg, "container has disabled creation of CGroups, which is incompatible with sharing a PID namespace") + } + ctr.config.PIDNsCtr = nsCtr.ID() return nil @@ -1056,6 +1060,27 @@ func WithLogPath(path string) CtrCreateOption { } } +// WithNoCgroups disables the creation of CGroups for the new container. +func WithNoCgroups() CtrCreateOption { + return func(ctr *Container) error { + if ctr.valid { + return define.ErrCtrFinalized + } + + if ctr.config.CgroupParent != "" { + return errors.Wrapf(define.ErrInvalidArg, "NoCgroups conflicts with CgroupParent") + } + + if ctr.config.PIDNsCtr != "" { + return errors.Wrapf(define.ErrInvalidArg, "NoCgroups requires a private PID namespace and cannot be used when PID namespace is shared with another container") + } + + ctr.config.NoCgroups = true + + return nil + } +} + // WithCgroupParent sets the Cgroup Parent of the new container. func WithCgroupParent(parent string) CtrCreateOption { return func(ctr *Container) error { @@ -1067,6 +1092,10 @@ func WithCgroupParent(parent string) CtrCreateOption { return errors.Wrapf(define.ErrInvalidArg, "cgroup parent cannot be empty") } + if ctr.config.NoCgroups { + return errors.Wrapf(define.ErrInvalidArg, "CgroupParent conflicts with NoCgroups") + } + ctr.config.CgroupParent = parent return nil diff --git a/libpod/runtime.go b/libpod/runtime.go index 323a46266..80b58654e 100644 --- a/libpod/runtime.go +++ b/libpod/runtime.go @@ -157,8 +157,12 @@ type RuntimeConfig struct { OCIRuntime string `toml:"runtime"` // OCIRuntimes are the set of configured OCI runtimes (default is runc) OCIRuntimes map[string][]string `toml:"runtimes"` - // RuntimeSupportsJSON is the list of the OCI runtimes that support --format=json + // RuntimeSupportsJSON is the list of the OCI runtimes that support + // --format=json. RuntimeSupportsJSON []string `toml:"runtime_supports_json"` + // RuntimeSupportsNoCgroups is a list of OCI runtimes that support + // running containers without CGroups. + RuntimeSupportsNoCgroups []string `toml:"runtime_supports_nocgroups"` // RuntimePath is the path to OCI runtime binary for launching // containers. // The first path pointing to a valid file will be used @@ -259,21 +263,22 @@ type RuntimeConfig struct { // If they were not, we may override them with information from the database, // if it exists and differs from what is present in the system already. type runtimeConfiguredFrom struct { - storageGraphDriverSet bool - storageGraphRootSet bool - storageRunRootSet bool - libpodStaticDirSet bool - libpodTmpDirSet bool - volPathSet bool - conmonPath bool - conmonEnvVars bool - initPath bool - ociRuntimes bool - runtimePath bool - cniPluginDir bool - noPivotRoot bool - runtimeSupportsJSON bool - ociRuntime bool + storageGraphDriverSet bool + storageGraphRootSet bool + storageRunRootSet bool + libpodStaticDirSet bool + libpodTmpDirSet bool + volPathSet bool + conmonPath bool + conmonEnvVars bool + initPath bool + ociRuntimes bool + runtimePath bool + cniPluginDir bool + noPivotRoot bool + runtimeSupportsJSON bool + runtimeSupportsNoCgroups bool + ociRuntime bool } func defaultRuntimeConfig() (RuntimeConfig, error) { @@ -603,6 +608,9 @@ func newRuntimeFromConfig(ctx context.Context, userConfigPath string, options .. if tmpConfig.RuntimeSupportsJSON != nil { runtime.configuredFrom.runtimeSupportsJSON = true } + if tmpConfig.RuntimeSupportsNoCgroups != nil { + runtime.configuredFrom.runtimeSupportsNoCgroups = true + } if tmpConfig.OCIRuntime != "" { runtime.configuredFrom.ociRuntime = true } @@ -649,6 +657,9 @@ func newRuntimeFromConfig(ctx context.Context, userConfigPath string, options .. if !runtime.configuredFrom.runtimeSupportsJSON { runtime.config.RuntimeSupportsJSON = tmpConfig.RuntimeSupportsJSON } + if !runtime.configuredFrom.runtimeSupportsNoCgroups { + runtime.config.RuntimeSupportsNoCgroups = tmpConfig.RuntimeSupportsNoCgroups + } if !runtime.configuredFrom.ociRuntime { runtime.config.OCIRuntime = tmpConfig.OCIRuntime } @@ -1009,6 +1020,16 @@ func makeRuntime(ctx context.Context, runtime *Runtime) (err error) { } } + // Make lookup tables for runtime support + supportsJSON := make(map[string]bool) + supportsNoCgroups := make(map[string]bool) + for _, r := range runtime.config.RuntimeSupportsJSON { + supportsJSON[r] = true + } + for _, r := range runtime.config.RuntimeSupportsNoCgroups { + supportsNoCgroups[r] = true + } + // Get us at least one working OCI runtime. runtime.ociRuntimes = make(map[string]*OCIRuntime) @@ -1026,15 +1047,10 @@ func makeRuntime(ctx context.Context, runtime *Runtime) (err error) { name := filepath.Base(runtime.config.RuntimePath[0]) - supportsJSON := false - for _, r := range runtime.config.RuntimeSupportsJSON { - if r == name { - supportsJSON = true - break - } - } + json := supportsJSON[name] + nocgroups := supportsNoCgroups[name] - ociRuntime, err := newOCIRuntime(name, runtime.config.RuntimePath, runtime.conmonPath, runtime.config, supportsJSON) + ociRuntime, err := newOCIRuntime(name, runtime.config.RuntimePath, runtime.conmonPath, runtime.config, json, nocgroups) if err != nil { return err } @@ -1045,15 +1061,10 @@ func makeRuntime(ctx context.Context, runtime *Runtime) (err error) { // Initialize remaining OCI runtimes for name, paths := range runtime.config.OCIRuntimes { - supportsJSON := false - for _, r := range runtime.config.RuntimeSupportsJSON { - if r == name { - supportsJSON = true - break - } - } + json := supportsJSON[name] + nocgroups := supportsNoCgroups[name] - ociRuntime, err := newOCIRuntime(name, paths, runtime.conmonPath, runtime.config, supportsJSON) + ociRuntime, err := newOCIRuntime(name, paths, runtime.conmonPath, runtime.config, json, nocgroups) if err != nil { // Don't fatally error. // This will allow us to ship configs including optional @@ -1073,15 +1084,10 @@ func makeRuntime(ctx context.Context, runtime *Runtime) (err error) { if strings.HasPrefix(runtime.config.OCIRuntime, "/") { name := filepath.Base(runtime.config.OCIRuntime) - supportsJSON := false - for _, r := range runtime.config.RuntimeSupportsJSON { - if r == name { - supportsJSON = true - break - } - } + json := supportsJSON[name] + nocgroups := supportsNoCgroups[name] - ociRuntime, err := newOCIRuntime(name, []string{runtime.config.OCIRuntime}, runtime.conmonPath, runtime.config, supportsJSON) + ociRuntime, err := newOCIRuntime(name, []string{runtime.config.OCIRuntime}, runtime.conmonPath, runtime.config, json, nocgroups) if err != nil { return err } diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index acd317d20..bffce7bca 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -8,7 +8,7 @@ import ( "strings" "time" - config2 "github.com/containers/libpod/libpod/define" + "github.com/containers/libpod/libpod/define" "github.com/containers/libpod/libpod/events" "github.com/containers/libpod/pkg/rootless" "github.com/containers/storage/pkg/stringid" @@ -35,7 +35,7 @@ func (r *Runtime) NewContainer(ctx context.Context, rSpec *spec.Spec, options .. r.lock.Lock() defer r.lock.Unlock() if !r.valid { - return nil, config2.ErrRuntimeStopped + return nil, define.ErrRuntimeStopped } return r.newContainer(ctx, rSpec, options...) } @@ -45,7 +45,7 @@ func (r *Runtime) RestoreContainer(ctx context.Context, rSpec *spec.Spec, config r.lock.Lock() defer r.lock.Unlock() if !r.valid { - return nil, config2.ErrRuntimeStopped + return nil, define.ErrRuntimeStopped } ctr, err := r.initContainerVariables(rSpec, config) @@ -67,7 +67,7 @@ func (r *Runtime) RestoreContainer(ctx context.Context, rSpec *spec.Spec, config func (r *Runtime) initContainerVariables(rSpec *spec.Spec, config *ContainerConfig) (c *Container, err error) { if rSpec == nil { - return nil, errors.Wrapf(config2.ErrInvalidArg, "must provide a valid runtime spec to create container") + return nil, errors.Wrapf(define.ErrInvalidArg, "must provide a valid runtime spec to create container") } ctr := new(Container) ctr.config = new(ContainerConfig) @@ -100,7 +100,7 @@ func (r *Runtime) initContainerVariables(rSpec *spec.Spec, config *ContainerConf ctr.state.BindMounts = make(map[string]string) - ctr.config.StopTimeout = config2.CtrRemoveTimeout + ctr.config.StopTimeout = define.CtrRemoveTimeout ctr.config.OCIRuntime = r.defaultOCIRuntime.name @@ -152,7 +152,7 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (c *Contai }() ctr.valid = true - ctr.state.State = config2.ContainerStateConfigured + ctr.state.State = define.ContainerStateConfigured ctr.runtime = r if ctr.config.OCIRuntime == "" { @@ -160,11 +160,18 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (c *Contai } else { ociRuntime, ok := r.ociRuntimes[ctr.config.OCIRuntime] if !ok { - return nil, errors.Wrapf(config2.ErrInvalidArg, "requested OCI runtime %s is not available", ctr.config.OCIRuntime) + return nil, errors.Wrapf(define.ErrInvalidArg, "requested OCI runtime %s is not available", ctr.config.OCIRuntime) } ctr.ociRuntime = ociRuntime } + // Check NoCgroups support + if ctr.config.NoCgroups { + if !ctr.ociRuntime.supportsNoCgroups { + return nil, errors.Wrapf(define.ErrInvalidArg, "requested OCI runtime %s is not compatible with NoCgroups", ctr.ociRuntime.name) + } + } + var pod *Pod if ctr.config.Pod != "" { // Get the pod from state @@ -183,43 +190,67 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (c *Contai ctr.config.Name = name } - // Check CGroup parent sanity, and set it if it was not set - switch r.config.CgroupManager { - case CgroupfsCgroupsManager: - if ctr.config.CgroupParent == "" { - if pod != nil && pod.config.UsePodCgroup { - podCgroup, err := pod.CgroupPath() - if err != nil { - return nil, errors.Wrapf(err, "error retrieving pod %s cgroup", pod.ID()) + // If CGroups are disabled, we MUST create a PID namespace. + // Otherwise, the OCI runtime won't be able to stop our container. + if ctr.config.NoCgroups { + if ctr.config.Spec.Linux == nil { + return nil, errors.Wrapf(define.ErrInvalidArg, "must provide Linux namespace configuration in OCI spec when using NoCgroups") + } + foundPid := false + for _, ns := range ctr.config.Spec.Linux.Namespaces { + if ns.Type == spec.PIDNamespace { + foundPid = true + if ns.Path != "" { + return nil, errors.Wrapf(define.ErrInvalidArg, "containers not creating CGroups must create a private PID namespace - cannot use another") } - if podCgroup == "" { - return nil, errors.Wrapf(config2.ErrInternal, "pod %s cgroup is not set", pod.ID()) + break + } + } + if !foundPid { + return nil, errors.Wrapf(define.ErrInvalidArg, "containers not creating CGroups must create a private PID namespace") + } + } + + // Check CGroup parent sanity, and set it if it was not set. + // Only if we're actually configuring CGroups. + if !ctr.config.NoCgroups { + switch r.config.CgroupManager { + case CgroupfsCgroupsManager: + if ctr.config.CgroupParent == "" { + if pod != nil && pod.config.UsePodCgroup { + podCgroup, err := pod.CgroupPath() + if err != nil { + return nil, errors.Wrapf(err, "error retrieving pod %s cgroup", pod.ID()) + } + if podCgroup == "" { + return nil, errors.Wrapf(define.ErrInternal, "pod %s cgroup is not set", pod.ID()) + } + ctr.config.CgroupParent = podCgroup + } else { + ctr.config.CgroupParent = CgroupfsDefaultCgroupParent } - ctr.config.CgroupParent = podCgroup - } else { - ctr.config.CgroupParent = CgroupfsDefaultCgroupParent + } else if strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") { + return nil, errors.Wrapf(define.ErrInvalidArg, "systemd slice received as cgroup parent when using cgroupfs") } - } else if strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") { - return nil, errors.Wrapf(config2.ErrInvalidArg, "systemd slice received as cgroup parent when using cgroupfs") - } - case SystemdCgroupsManager: - if ctr.config.CgroupParent == "" { - if pod != nil && pod.config.UsePodCgroup { - podCgroup, err := pod.CgroupPath() - if err != nil { - return nil, errors.Wrapf(err, "error retrieving pod %s cgroup", pod.ID()) + case SystemdCgroupsManager: + if ctr.config.CgroupParent == "" { + if pod != nil && pod.config.UsePodCgroup { + podCgroup, err := pod.CgroupPath() + if err != nil { + return nil, errors.Wrapf(err, "error retrieving pod %s cgroup", pod.ID()) + } + ctr.config.CgroupParent = podCgroup + } else if rootless.IsRootless() { + ctr.config.CgroupParent = SystemdDefaultRootlessCgroupParent + } else { + ctr.config.CgroupParent = SystemdDefaultCgroupParent } - ctr.config.CgroupParent = podCgroup - } else if rootless.IsRootless() { - ctr.config.CgroupParent = SystemdDefaultRootlessCgroupParent - } else { - ctr.config.CgroupParent = SystemdDefaultCgroupParent + } else if len(ctr.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") { + return nil, errors.Wrapf(define.ErrInvalidArg, "did not receive systemd slice as cgroup parent when using systemd to manage cgroups") } - } else if len(ctr.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") { - return nil, errors.Wrapf(config2.ErrInvalidArg, "did not receive systemd slice as cgroup parent when using systemd to manage cgroups") + default: + return nil, errors.Wrapf(define.ErrInvalidArg, "unsupported CGroup manager: %s - cannot validate cgroup parent", r.config.CgroupManager) } - default: - return nil, errors.Wrapf(config2.ErrInvalidArg, "unsupported CGroup manager: %s - cannot validate cgroup parent", r.config.CgroupManager) } if ctr.restoreFromCheckpoint { @@ -262,7 +293,7 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (c *Contai ctrNamedVolumes = append(ctrNamedVolumes, dbVol) // The volume exists, we're good continue - } else if errors.Cause(err) != config2.ErrNoSuchVolume { + } else if errors.Cause(err) != define.ErrNoSuchVolume { return nil, errors.Wrapf(err, "error retrieving named volume %s for new container", vol.Name) } @@ -275,10 +306,6 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (c *Contai return nil, errors.Wrapf(err, "error creating named volume %q", vol.Name) } - if err := ctr.copyWithTarFromImage(vol.Dest, newVol.MountPoint()); err != nil && !os.IsNotExist(err) { - return nil, errors.Wrapf(err, "Failed to copy content into new volume mount %q", vol.Name) - } - ctrNamedVolumes = append(ctrNamedVolumes, newVol) } @@ -386,7 +413,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool, } if !r.valid { - return config2.ErrRuntimeStopped + return define.ErrRuntimeStopped } // Update the container to get current state @@ -402,7 +429,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool, } } - if c.state.State == config2.ContainerStatePaused { + if c.state.State == define.ContainerStatePaused { if err := c.ociRuntime.killContainer(c, 9); err != nil { return err } @@ -416,7 +443,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool, } // Check that the container's in a good state to be removed - if c.state.State == config2.ContainerStateRunning { + if c.state.State == define.ContainerStateRunning { if err := c.stop(c.StopTimeout()); err != nil { return errors.Wrapf(err, "cannot remove container %s as it could not be stopped", c.ID()) } @@ -439,7 +466,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool, } if len(deps) != 0 { depsStr := strings.Join(deps, ", ") - return errors.Wrapf(config2.ErrCtrExists, "container %s has dependent containers which must be removed before it: %s", c.ID(), depsStr) + return errors.Wrapf(define.ErrCtrExists, "container %s has dependent containers which must be removed before it: %s", c.ID(), depsStr) } } @@ -483,8 +510,8 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool, // Delete the container. // Not needed in Configured and Exited states, where the container // doesn't exist in the runtime - if c.state.State != config2.ContainerStateConfigured && - c.state.State != config2.ContainerStateExited { + if c.state.State != define.ContainerStateConfigured && + c.state.State != define.ContainerStateExited { if err := c.delete(ctx); err != nil { if cleanupErr == nil { cleanupErr = err @@ -514,7 +541,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool, if !volume.IsCtrSpecific() { continue } - if err := runtime.removeVolume(ctx, volume, false); err != nil && err != config2.ErrNoSuchVolume && err != config2.ErrVolumeBeingUsed { + if err := runtime.removeVolume(ctx, volume, false); err != nil && err != define.ErrNoSuchVolume && err != define.ErrVolumeBeingUsed { logrus.Errorf("cleanup volume (%s): %v", v, err) } } @@ -529,7 +556,7 @@ func (r *Runtime) GetContainer(id string) (*Container, error) { defer r.lock.RUnlock() if !r.valid { - return nil, config2.ErrRuntimeStopped + return nil, define.ErrRuntimeStopped } return r.state.Container(id) @@ -541,7 +568,7 @@ func (r *Runtime) HasContainer(id string) (bool, error) { defer r.lock.RUnlock() if !r.valid { - return false, config2.ErrRuntimeStopped + return false, define.ErrRuntimeStopped } return r.state.HasContainer(id) @@ -554,7 +581,7 @@ func (r *Runtime) LookupContainer(idOrName string) (*Container, error) { defer r.lock.RUnlock() if !r.valid { - return nil, config2.ErrRuntimeStopped + return nil, define.ErrRuntimeStopped } return r.state.LookupContainer(idOrName) } @@ -568,7 +595,7 @@ func (r *Runtime) GetContainers(filters ...ContainerFilter) ([]*Container, error defer r.lock.RUnlock() if !r.valid { - return nil, config2.ErrRuntimeStopped + return nil, define.ErrRuntimeStopped } ctrs, err := r.state.AllContainers() @@ -601,7 +628,7 @@ func (r *Runtime) GetAllContainers() ([]*Container, error) { func (r *Runtime) GetRunningContainers() ([]*Container, error) { running := func(c *Container) bool { state, _ := c.State() - return state == config2.ContainerStateRunning + return state == define.ContainerStateRunning } return r.GetContainers(running) } @@ -629,7 +656,7 @@ func (r *Runtime) GetLatestContainer() (*Container, error) { return nil, errors.Wrapf(err, "unable to find latest container") } if len(ctrs) == 0 { - return nil, config2.ErrNoSuchCtr + return nil, define.ErrNoSuchCtr } for containerIndex, ctr := range ctrs { createdTime := ctr.config.CreatedTime diff --git a/libpod/stats.go b/libpod/stats.go index 776870bd2..5513abce5 100644 --- a/libpod/stats.go +++ b/libpod/stats.go @@ -19,6 +19,10 @@ func (c *Container) GetContainerStats(previousStats *ContainerStats) (*Container stats.ContainerID = c.ID() stats.Name = c.Name() + if c.config.NoCgroups { + return nil, errors.Wrapf(define.ErrNoCgroups, "cannot run top on container %s as it did not create a cgroup", c.ID()) + } + if !c.batched { c.lock.Lock() defer c.lock.Unlock() diff --git a/libpod/volume.go b/libpod/volume.go index b4de3aedc..c4771bbb8 100644 --- a/libpod/volume.go +++ b/libpod/volume.go @@ -57,6 +57,13 @@ type VolumeState struct { // On incrementing from 0, the volume will be mounted on the host. // On decrementing to 0, the volume will be unmounted on the host. MountCount uint `json:"mountCount"` + // NeedsCopyUp indicates that the next time the volume is mounted into + // a container, the container will "copy up" the contents of the + // mountpoint into the volume. + // This should only be done once. As such, this is set at container + // create time, then cleared after the copy up is done and never set + // again. + NeedsCopyUp bool `json:"notYetMounted,omitempty"` } // Name retrieves the volume's name diff --git a/libpod/volume_internal.go b/libpod/volume_internal.go index 2e886e1b0..42b935e7c 100644 --- a/libpod/volume_internal.go +++ b/libpod/volume_internal.go @@ -11,9 +11,11 @@ import ( func newVolume(runtime *Runtime) (*Volume, error) { volume := new(Volume) volume.config = new(VolumeConfig) + volume.state = new(VolumeState) volume.runtime = runtime volume.config.Labels = make(map[string]string) volume.config.Options = make(map[string]string) + volume.state.NeedsCopyUp = true return volume, nil } diff --git a/pkg/annotations/annotations.go b/pkg/annotations/annotations.go index fe2591a0c..19b1029d1 100644 --- a/pkg/annotations/annotations.go +++ b/pkg/annotations/annotations.go @@ -102,6 +102,10 @@ const ( // CNIResult is the JSON string representation of the Result from CNI CNIResult = "io.kubernetes.cri-o.CNIResult" + + // ContainerManager is the annotation key for indicating the creator and + // manager of the container + ContainerManager = "io.container.manager" ) // ContainerType values @@ -112,3 +116,7 @@ const ( // ContainerTypeContainer represents a container running within a pod ContainerTypeContainer = "container" ) + +// ContainerManagerLibpod indicates that libpod created and manages the +// container +const ContainerManagerLibpod = "libpod" diff --git a/pkg/cgroups/cgroups_supported.go b/pkg/cgroups/cgroups_supported.go index fcd44dfc8..2a36777d4 100644 --- a/pkg/cgroups/cgroups_supported.go +++ b/pkg/cgroups/cgroups_supported.go @@ -3,8 +3,15 @@ package cgroups import ( + "bufio" + "fmt" + "os" + "path/filepath" + "strings" "sync" "syscall" + + "github.com/pkg/errors" ) var ( @@ -25,3 +32,58 @@ func IsCgroup2UnifiedMode() (bool, error) { }) return isUnified, isUnifiedErr } + +// UserOwnsCurrentSystemdCgroup checks whether the current EUID owns the +// current cgroup. +func UserOwnsCurrentSystemdCgroup() (bool, error) { + uid := os.Geteuid() + + cgroup2, err := IsCgroup2UnifiedMode() + if err != nil { + return false, err + } + + f, err := os.Open("/proc/self/cgroup") + if err != nil { + return false, errors.Wrapf(err, "open file /proc/self/cgroup") + } + defer f.Close() + + scanner := bufio.NewScanner(f) + for scanner.Scan() { + line := scanner.Text() + parts := strings.SplitN(line, ":", 3) + + if len(parts) < 3 { + continue + } + + var cgroupPath string + + if cgroup2 { + cgroupPath = filepath.Join(cgroupRoot, parts[2]) + } else { + if parts[1] != "name=systemd" { + continue + } + cgroupPath = filepath.Join(cgroupRoot, "systemd", parts[2]) + } + + st, err := os.Stat(cgroupPath) + if err != nil { + return false, err + } + s := st.Sys() + if s == nil { + return false, fmt.Errorf("error stat cgroup path %s", cgroupPath) + } + + if int(s.(*syscall.Stat_t).Uid) != uid { + return false, nil + } + } + if err := scanner.Err(); err != nil { + return false, errors.Wrapf(err, "parsing file /proc/self/cgroup") + } + return true, nil +} diff --git a/pkg/cgroups/cgroups_unsupported.go b/pkg/cgroups/cgroups_unsupported.go index 9dc196e42..cd140fbf3 100644 --- a/pkg/cgroups/cgroups_unsupported.go +++ b/pkg/cgroups/cgroups_unsupported.go @@ -6,3 +6,9 @@ package cgroups func IsCgroup2UnifiedMode() (bool, error) { return false, nil } + +// UserOwnsCurrentSystemdCgroup checks whether the current EUID owns the +// current cgroup. +func UserOwnsCurrentSystemdCgroup() (bool, error) { + return false, nil +} diff --git a/pkg/spec/createconfig.go b/pkg/spec/createconfig.go index 3f70e5935..c17172016 100644 --- a/pkg/spec/createconfig.go +++ b/pkg/spec/createconfig.go @@ -64,6 +64,7 @@ type CreateConfig struct { CidFile string ConmonPidFile string Cgroupns string + Cgroups string CgroupParent string // cgroup-parent Command []string // Full command that will be used UserCommand []string // User-entered command (or image CMD) @@ -206,6 +207,9 @@ func (c *CreateConfig) getContainerCreateOptions(runtime *libpod.Runtime, pod *l logrus.Debugf("adding container to pod %s", c.Pod) options = append(options, runtime.WithPod(pod)) } + if c.Cgroups == "disabled" { + options = append(options, libpod.WithNoCgroups()) + } if len(c.PortBindings) > 0 { portBindings, err = c.CreatePortBindings() if err != nil { diff --git a/pkg/spec/spec.go b/pkg/spec/spec.go index 44bbda885..38f9c7306 100644 --- a/pkg/spec/spec.go +++ b/pkg/spec/spec.go @@ -396,6 +396,18 @@ func (config *CreateConfig) createConfigToOCISpec(runtime *libpod.Runtime, userM } } + switch config.Cgroups { + case "disabled": + if addedResources { + return nil, errors.New("cannot specify resource limits when cgroups are disabled is specified") + } + configSpec.Linux.Resources = &spec.LinuxResources{} + case "enabled", "": + // Do nothing + default: + return nil, errors.New("unrecognized option for cgroups; supported are 'default' and 'disabled'") + } + // Add annotations if configSpec.Annotations == nil { configSpec.Annotations = make(map[string]string) diff --git a/rootless.md b/rootless.md index c5033881b..960430d54 100644 --- a/rootless.md +++ b/rootless.md @@ -6,14 +6,14 @@ Contributors are more than welcomed to help with this work. If you decide to ca * Podman can not create containers that bind to ports < 1024. * The kernel does not allow processes without CAP_NET_BIND_SERVICE to bind to low ports. -* Lacking “How To” documentation or documentation in general -* If /etc/subuid and /etc/subgid not setup for a user, then podman commands +* “How To” documentation is patchy at best. +* If /etc/subuid and /etc/subgid are not setup for a user, then podman commands can easily fail * This can be a big issue on machines using Network Based Password information (FreeIPA, Active Directory, LDAP) * We are working to get support for NSSWITCH on the /etc/subuid and /etc/subgid files. -* No cgroup Support (hopefully fixed when cgroups V2 happens). - * Cgroups V1 does not safely support cgroup delegation. - * Cgroups V2 development for container support is ongoing. +* No cgroup V1 Support + * cgroup V1 does not safely support cgroup delegation. + * However, cgroup V2 provides cgroup delegation and is available on Fedora starting with version 29 and other Linux distributions. * Can not share container images with CRI-O or other users * Difficult to use additional stores for sharing content * Does not work on NFS or parallel filesystem homedirs (e.g. [GPFS](https://www.ibm.com/support/knowledgecenter/en/SSFKCN/gpfs_welcome.html)) @@ -28,13 +28,13 @@ can easily fail * No CNI Support * CNI wants to modify IPTables, plus other network manipulation that requires CAP_SYS_ADMIN. * There is potential we could probably do some sort of blacklisting of the relevant plugins, and add a new plugin for rootless networking - slirp4netns as one example and there may be others -* Cannot use ping +* Cannot use ping out of the box. * [(Can be fixed by setting sysctl on host)](https://github.com/containers/libpod/blob/master/troubleshooting.md#5-rootless-containers-cannot-ping-hosts) -* Requires new shadow-utils (not found in older (RHEL7/Centos7 distros) Should be fixed in RHEL7.7 release +* Requires new shadow-utils (not found in older (RHEL7/Centos7 distros) Should be fixed in RHEL7.7 release) * A few commands do not work. * mount/unmount (on fuse-overlay) * Only works if you enter the mount namespace with a tool like buildah unshare - * podman stats (Lack of Cgroup support) + * podman stats (Without cgroup V2 support) * Checkpoint and Restore (CRIU requires root) * Pause and Unpause (no freezer cgroup) * Issues with higher UIDs can cause builds to fail diff --git a/test/e2e/container_inspect_test.go b/test/e2e/container_inspect_test.go new file mode 100644 index 000000000..91c025197 --- /dev/null +++ b/test/e2e/container_inspect_test.go @@ -0,0 +1,45 @@ +package integration + +import ( + "os" + + "github.com/containers/libpod/pkg/annotations" + . "github.com/containers/libpod/test/utils" + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" +) + +var _ = Describe("Podman container inspect", func() { + var ( + tempdir string + err error + podmanTest *PodmanTestIntegration + ) + + BeforeEach(func() { + tempdir, err = CreateTempDirInTempDir() + if err != nil { + os.Exit(1) + } + podmanTest = PodmanTestCreate(tempdir) + podmanTest.Setup() + podmanTest.SeedImages() + }) + + AfterEach(func() { + podmanTest.CleanupPod() + f := CurrentGinkgoTestDescription() + processTestResult(f) + }) + + It("podman inspect a container for the container manager annotation", func() { + const testContainer = "container-inspect-test-1" + setup := podmanTest.RunTopContainer(testContainer) + setup.WaitWithDefaultTimeout() + Expect(setup.ExitCode()).To(Equal(0)) + + data := podmanTest.InspectContainer(testContainer) + Expect(data[0].Config.Annotations[annotations.ContainerManager]). + To(Equal(annotations.ContainerManagerLibpod)) + }) +}) diff --git a/test/e2e/run_test.go b/test/e2e/run_test.go index 6e102cfa5..4d2cee8e3 100644 --- a/test/e2e/run_test.go +++ b/test/e2e/run_test.go @@ -903,4 +903,75 @@ USER mail` } Expect(found).To(BeTrue()) }) + + It("podman run with cgroups=disabled runs without cgroups", func() { + SkipIfRemote() + SkipIfRootless() + // Only works on crun + if !strings.Contains(podmanTest.OCIRuntime, "crun") { + Skip("Test only works on crun") + } + + curCgroupsBytes, err := ioutil.ReadFile("/proc/self/cgroup") + Expect(err).To(BeNil()) + var curCgroups string = string(curCgroupsBytes) + fmt.Printf("Output:\n%s\n", curCgroups) + Expect(curCgroups).To(Not(Equal(""))) + + ctrName := "testctr" + container := podmanTest.Podman([]string{"run", "--name", ctrName, "-d", "--cgroups=disabled", ALPINE, "top"}) + container.WaitWithDefaultTimeout() + Expect(container.ExitCode()).To(Equal(0)) + + // Get PID and get cgroups of that PID + inspectOut := podmanTest.InspectContainer(ctrName) + Expect(len(inspectOut)).To(Equal(1)) + pid := inspectOut[0].State.Pid + Expect(pid).To(Not(Equal(0))) + Expect(inspectOut[0].HostConfig.CgroupParent).To(Equal("")) + + ctrCgroupsBytes, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/cgroup", pid)) + Expect(err).To(BeNil()) + var ctrCgroups string = string(ctrCgroupsBytes) + fmt.Printf("Output\n:%s\n", ctrCgroups) + Expect(curCgroups).To(Equal(ctrCgroups)) + }) + + It("podman run with cgroups=enabled makes cgroups", func() { + SkipIfRemote() + SkipIfRootless() + // Only works on crun + if !strings.Contains(podmanTest.OCIRuntime, "crun") { + Skip("Test only works on crun") + } + + curCgroupsBytes, err := ioutil.ReadFile("/proc/self/cgroup") + Expect(err).To(BeNil()) + var curCgroups string = string(curCgroupsBytes) + fmt.Printf("Output:\n%s\n", curCgroups) + Expect(curCgroups).To(Not(Equal(""))) + + ctrName := "testctr" + container := podmanTest.Podman([]string{"run", "--name", ctrName, "-d", "--cgroups=enabled", ALPINE, "top"}) + container.WaitWithDefaultTimeout() + Expect(container.ExitCode()).To(Equal(0)) + + // Get PID and get cgroups of that PID + inspectOut := podmanTest.InspectContainer(ctrName) + Expect(len(inspectOut)).To(Equal(1)) + pid := inspectOut[0].State.Pid + Expect(pid).To(Not(Equal(0))) + + ctrCgroupsBytes, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/cgroup", pid)) + Expect(err).To(BeNil()) + var ctrCgroups string = string(ctrCgroupsBytes) + fmt.Printf("Output\n:%s\n", ctrCgroups) + Expect(curCgroups).To(Not(Equal(ctrCgroups))) + }) + + It("podman run with cgroups=garbage errors", func() { + session := podmanTest.Podman([]string{"run", "-d", "--cgroups=garbage", ALPINE, "top"}) + session.WaitWithDefaultTimeout() + Expect(session.ExitCode()).To(Not(Equal(0))) + }) }) diff --git a/test/e2e/run_volume_test.go b/test/e2e/run_volume_test.go index 551e86b93..fc1998ab2 100644 --- a/test/e2e/run_volume_test.go +++ b/test/e2e/run_volume_test.go @@ -249,4 +249,25 @@ var _ = Describe("Podman run with volumes", func() { fmt.Printf("Output: %s", mountOut3) Expect(strings.Contains(mountOut3, volName)).To(BeFalse()) }) + + It("podman named volume copyup", func() { + baselineSession := podmanTest.Podman([]string{"run", "--rm", "-t", "-i", ALPINE, "ls", "/etc/apk/"}) + baselineSession.WaitWithDefaultTimeout() + Expect(baselineSession.ExitCode()).To(Equal(0)) + baselineOutput := baselineSession.OutputToString() + + inlineVolumeSession := podmanTest.Podman([]string{"run", "--rm", "-t", "-i", "-v", "testvol1:/etc/apk", ALPINE, "ls", "/etc/apk/"}) + inlineVolumeSession.WaitWithDefaultTimeout() + Expect(inlineVolumeSession.ExitCode()).To(Equal(0)) + Expect(inlineVolumeSession.OutputToString()).To(Equal(baselineOutput)) + + makeVolumeSession := podmanTest.Podman([]string{"volume", "create", "testvol2"}) + makeVolumeSession.WaitWithDefaultTimeout() + Expect(makeVolumeSession.ExitCode()).To(Equal(0)) + + separateVolumeSession := podmanTest.Podman([]string{"run", "--rm", "-t", "-i", "-v", "testvol2:/etc/apk", ALPINE, "ls", "/etc/apk/"}) + separateVolumeSession.WaitWithDefaultTimeout() + Expect(separateVolumeSession.ExitCode()).To(Equal(0)) + Expect(separateVolumeSession.OutputToString()).To(Equal(baselineOutput)) + }) }) diff --git a/utils/utils_supported.go b/utils/utils_supported.go index 8b0ba4438..8bc232179 100644 --- a/utils/utils_supported.go +++ b/utils/utils_supported.go @@ -3,6 +3,8 @@ package utils import ( + "github.com/containers/libpod/pkg/cgroups" + "github.com/containers/libpod/pkg/rootless" systemdDbus "github.com/coreos/go-systemd/dbus" "github.com/godbus/dbus" ) @@ -10,9 +12,19 @@ import ( // RunUnderSystemdScope adds the specified pid to a systemd scope func RunUnderSystemdScope(pid int, slice string, unitName string) error { var properties []systemdDbus.Property - conn, err := systemdDbus.New() - if err != nil { - return err + var conn *systemdDbus.Conn + var err error + + if rootless.IsRootless() { + conn, err = cgroups.GetUserConnection(rootless.GetRootlessUID()) + if err != nil { + return err + } + } else { + conn, err = systemdDbus.New() + if err != nil { + return err + } } properties = append(properties, systemdDbus.PropSlice(slice)) properties = append(properties, newProp("PIDs", []uint32{uint32(pid)})) |