aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CONTRIBUTING.md7
-rw-r--r--RELEASE_NOTES.md2
-rw-r--r--cmd/podman/containers/stats.go5
-rw-r--r--cmd/podman/images/save.go4
-rw-r--r--cmd/podman/kube/generate.go29
-rw-r--r--cmd/podman/manifest/create.go2
-rw-r--r--cmd/podman/registry/config.go2
-rw-r--r--docs/source/markdown/.gitignore9
-rw-r--r--docs/source/markdown/options/arch.md1
-rw-r--r--docs/source/markdown/options/authfile.md6
-rw-r--r--docs/source/markdown/options/cert-dir.md5
-rw-r--r--docs/source/markdown/options/cpu-period.md4
-rw-r--r--docs/source/markdown/options/cpu-quota.md4
-rw-r--r--docs/source/markdown/options/cpu-shares.md4
-rw-r--r--docs/source/markdown/options/cpuset-cpus.md4
-rw-r--r--docs/source/markdown/options/cpuset-mems.md4
-rw-r--r--docs/source/markdown/options/platform.md1
-rw-r--r--docs/source/markdown/podman-auto-update.1.md.in (renamed from docs/source/markdown/podman-auto-update.1.md)6
-rw-r--r--docs/source/markdown/podman-build.1.md.in33
-rw-r--r--docs/source/markdown/podman-container-runlabel.1.md.in (renamed from docs/source/markdown/podman-container-runlabel.1.md)5
-rw-r--r--docs/source/markdown/podman-create.1.md.in8
-rw-r--r--docs/source/markdown/podman-image-sign.1.md.in (renamed from docs/source/markdown/podman-image-sign.1.md)7
-rw-r--r--docs/source/markdown/podman-kube-play.1.md.in13
-rw-r--r--docs/source/markdown/podman-login.1.md.in (renamed from docs/source/markdown/podman-login.1.md)7
-rw-r--r--docs/source/markdown/podman-logout.1.md.in (renamed from docs/source/markdown/podman-logout.1.md)7
-rw-r--r--docs/source/markdown/podman-manifest-add.1.md.in (renamed from docs/source/markdown/podman-manifest-add.1.md)8
-rw-r--r--docs/source/markdown/podman-manifest-create.1.md2
-rw-r--r--docs/source/markdown/podman-manifest-push.1.md.in (renamed from docs/source/markdown/podman-manifest-push.1.md)8
-rw-r--r--docs/source/markdown/podman-pod-create.1.md.in8
-rw-r--r--docs/source/markdown/podman-pull.1.md.in14
-rw-r--r--docs/source/markdown/podman-push.1.md.in (renamed from docs/source/markdown/podman-push.1.md)8
-rw-r--r--docs/source/markdown/podman-run.1.md.in8
-rw-r--r--docs/source/markdown/podman-search.1.md.in (renamed from docs/source/markdown/podman-search.1.md)7
-rw-r--r--docs/source/markdown/podman-stats.1.md10
-rw-r--r--go.mod2
-rw-r--r--go.sum4
-rwxr-xr-xhack/markdown-preprocess-review56
-rw-r--r--libpod/events/events_freebsd.go23
-rw-r--r--libpod/events/events_unsupported.go4
-rw-r--r--libpod/events/logfile.go4
-rw-r--r--libpod/networking_unsupported.go7
-rw-r--r--libpod/oci_conmon_attach_common.go305
-rw-r--r--libpod/oci_conmon_attach_freebsd.go21
-rw-r--r--libpod/oci_conmon_attach_linux.go297
-rw-r--r--libpod/oci_conmon_common.go1600
-rw-r--r--libpod/oci_conmon_exec_common.go (renamed from libpod/oci_conmon_exec_linux.go)0
-rw-r--r--libpod/oci_conmon_freebsd.go24
-rw-r--r--libpod/oci_conmon_linux.go1657
-rw-r--r--libpod/oci_conmon_unsupported.go4
-rw-r--r--libpod/runtime.go113
-rw-r--r--pkg/api/handlers/compat/containers.go1
-rw-r--r--test/apiv2/10-images.at2
-rw-r--r--test/e2e/manifest_test.go28
-rw-r--r--test/e2e/stats_test.go3
-rw-r--r--test/system/710-kube.bats171
-rw-r--r--test/system/helpers.bash14
-rw-r--r--troubleshooting.md33
-rw-r--r--vendor/github.com/containers/common/pkg/config/config.go23
-rw-r--r--vendor/github.com/containers/common/pkg/config/default.go60
-rw-r--r--vendor/modules.txt2
-rw-r--r--version/version.go2
61 files changed, 2477 insertions, 2235 deletions
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index a5ee0c1df..d0f4ceb02 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -97,6 +97,13 @@ You need install some dependencies before building a binary.
$ export PKG_CONFIG_PATH="/usr/lib/pkgconfig"
```
+#### Debian / Ubuntu
+
+ ```shell
+ $ sudo apt-get install -y libsystemd-dev libgpgme-dev libseccomp-dev
+ $ export PKG_CONFIG_PATH="/usr/lib/pkgconfig"
+ ```
+
### Building binaries and test your changes
To test your changes do `make binaries` to generate your binaries.
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
index b762bbbe3..8a9672507 100644
--- a/RELEASE_NOTES.md
+++ b/RELEASE_NOTES.md
@@ -11,7 +11,7 @@
- The `podman play kube` command now supports volumes with the `BlockDevice` and `CharDevice` types ([#13951](https://github.com/containers/podman/issues/13951)).
- The `podman play kube` command now features a new flag, `--userns`, to set the user namespace of created pods. Two values are allowed at present: `host` and `auto` ([#7504](https://github.com/containers/podman/issues/7504)).
- The `podman play kube` command now supports setting the type of created init containers via the `io.podman.annotations.init.container.type` annotation.
-- Pods now have include an exit policy (configurable via the `--exit-policy` option to `podman pod create`), which determines what will happen to the pod's infra container when the entire pod stops. The default, `continue`, acts as Podman currently does, while a new option, `stop`, stops the infra container after the last container in the pod stops, and is used by default for pods from `podman play kube` ([#13464](https://github.com/containers/podman/issues/13464)).
+- The `podman pod create` command now supports an exit policy (configurable via the `--exit-policy` option), which determines what will happen to the pod's infra container when the entire pod stops. The default, `continue`, acts as Podman currently does, while a new option, `stop`, stops the infra container after the last container in the pod stops. The latter is used for pods created via `podman play kube` ([#13464](https://github.com/containers/podman/issues/13464)).
- The `podman pod create` command now allows the pod's name to be specified as an argument, instead of using the `--name` option - for example, `podman pod create mypod` instead of the prior `podman pod create --name mypod`. Please note that the `--name` option is not deprecated and will continue to work.
- The `podman pod create` command's `--share` option now supports adding namespaces to the set by prefacing them with `+` (as opposed to specifying all namespaces that should be shared) ([#13422](https://github.com/containers/podman/issues/13422)).
- The `podman pod create` command has a new option, `--shm-size`, to specify the size of the `/dev/shm` mount that will be shared if the pod shares its UTS namespace ([#14609](https://github.com/containers/podman/issues/14609)).
diff --git a/cmd/podman/containers/stats.go b/cmd/podman/containers/stats.go
index 0dd8ce80a..f29bbf34c 100644
--- a/cmd/podman/containers/stats.go
+++ b/cmd/podman/containers/stats.go
@@ -58,6 +58,7 @@ type statsOptionsCLI struct {
var (
statsOptions statsOptionsCLI
+ notrunc bool
)
func statFlags(cmd *cobra.Command) {
@@ -69,6 +70,7 @@ func statFlags(cmd *cobra.Command) {
flags.StringVar(&statsOptions.Format, formatFlagName, "", "Pretty-print container statistics to JSON or using a Go template")
_ = cmd.RegisterFlagCompletionFunc(formatFlagName, common.AutocompleteFormat(&containerStats{}))
+ flags.BoolVar(&notrunc, "no-trunc", false, "Do not truncate output")
flags.BoolVar(&statsOptions.NoReset, "no-reset", false, "Disable resetting the screen between intervals")
flags.BoolVar(&statsOptions.NoStream, "no-stream", false, "Disable streaming stats and only pull the first result, default setting is false")
intervalFlagName := "interval"
@@ -186,6 +188,9 @@ type containerStats struct {
}
func (s *containerStats) ID() string {
+ if notrunc {
+ return s.ContainerID
+ }
return s.ContainerID[0:12]
}
diff --git a/cmd/podman/images/save.go b/cmd/podman/images/save.go
index 43366e1b3..ecff0f841 100644
--- a/cmd/podman/images/save.go
+++ b/cmd/podman/images/save.go
@@ -103,8 +103,8 @@ func save(cmd *cobra.Command, args []string) (finalErr error) {
tags []string
succeeded = false
)
- if cmd.Flag("compress").Changed && (saveOpts.Format != define.OCIManifestDir && saveOpts.Format != define.V2s2ManifestDir) {
- return errors.New("--compress can only be set when --format is either 'oci-dir' or 'docker-dir'")
+ if cmd.Flag("compress").Changed && saveOpts.Format != define.V2s2ManifestDir {
+ return errors.New("--compress can only be set when --format is 'docker-dir'")
}
if len(saveOpts.Output) == 0 {
saveOpts.Quiet = true
diff --git a/cmd/podman/kube/generate.go b/cmd/podman/kube/generate.go
index 6df4b55fc..ee2ea51ae 100644
--- a/cmd/podman/kube/generate.go
+++ b/cmd/podman/kube/generate.go
@@ -22,7 +22,7 @@ var (
Whether the input is for a container or pod, Podman will always generate the specification as a pod.`
- generateKubeCmd = &cobra.Command{
+ kubeGenerateCmd = &cobra.Command{
Use: "generate [options] {CONTAINER...|POD...|VOLUME...}",
Short: "Generate Kubernetes YAML from containers, pods or volumes.",
Long: generateDescription,
@@ -35,33 +35,28 @@ var (
podman kube generate volumeName
podman kube generate ctrID podID volumeName --service`,
}
- kubeGenerateDescription = generateDescription
- kubeGenerateCmd = &cobra.Command{
+ generateKubeCmd = &cobra.Command{
Use: "kube [options] {CONTAINER...|POD...|VOLUME...}",
- Short: "Generate Kubernetes YAML from containers, pods or volumes.",
- Long: kubeGenerateDescription,
- RunE: kubeGenerate,
- Args: cobra.MinimumNArgs(1),
- ValidArgsFunction: common.AutocompleteForGenerate,
- Example: `podman kube generate ctrID
- podman kube generate podID
- podman kube generate --service podID
- podman kube generate volumeName
- podman kube generate ctrID podID volumeName --service`,
+ Short: kubeGenerateCmd.Short,
+ Long: kubeGenerateCmd.Long,
+ RunE: kubeGenerateCmd.RunE,
+ Args: kubeGenerateCmd.Args,
+ ValidArgsFunction: kubeGenerateCmd.ValidArgsFunction,
+ Example: kubeGenerateCmd.Example,
}
)
func init() {
registry.Commands = append(registry.Commands, registry.CliCommand{
Command: generateKubeCmd,
- Parent: kubeCmd,
+ Parent: generate.GenerateCmd,
})
generateFlags(generateKubeCmd)
registry.Commands = append(registry.Commands, registry.CliCommand{
Command: kubeGenerateCmd,
- Parent: generate.GenerateCmd,
+ Parent: kubeCmd,
})
generateFlags(kubeGenerateCmd)
}
@@ -103,7 +98,3 @@ func generateKube(cmd *cobra.Command, args []string) error {
fmt.Println(string(content))
return nil
}
-
-func kubeGenerate(cmd *cobra.Command, args []string) error {
- return generateKube(cmd, args)
-}
diff --git a/cmd/podman/manifest/create.go b/cmd/podman/manifest/create.go
index 0a0ea1d88..2ea40d832 100644
--- a/cmd/podman/manifest/create.go
+++ b/cmd/podman/manifest/create.go
@@ -42,7 +42,7 @@ func init() {
})
flags := createCmd.Flags()
flags.BoolVar(&manifestCreateOpts.All, "all", false, "add all of the lists' images if the images to add are lists")
- flags.BoolVar(&manifestCreateOpts.Amend, "amend", false, "modify an existing list if one with the desired name already exists")
+ flags.BoolVarP(&manifestCreateOpts.Amend, "amend", "a", false, "modify an existing list if one with the desired name already exists")
flags.BoolVar(&manifestCreateOpts.Insecure, "insecure", false, "neither require HTTPS nor verify certificates when accessing the registry")
_ = flags.MarkHidden("insecure")
flags.BoolVar(&manifestCreateOpts.TLSVerifyCLI, "tls-verify", true, "require HTTPS and verify certificates when accessing the registry")
diff --git a/cmd/podman/registry/config.go b/cmd/podman/registry/config.go
index cae618b44..a118fdc4d 100644
--- a/cmd/podman/registry/config.go
+++ b/cmd/podman/registry/config.go
@@ -61,7 +61,7 @@ func newPodmanConfig() {
switch runtime.GOOS {
case "darwin", "windows":
mode = entities.TunnelMode
- case "linux":
+ case "linux", "freebsd":
// Some linux clients might only be compiled without ABI
// support (e.g., podman-remote).
if abiSupport && !IsRemote() {
diff --git a/docs/source/markdown/.gitignore b/docs/source/markdown/.gitignore
index 6689b5b71..70f1c2bd7 100644
--- a/docs/source/markdown/.gitignore
+++ b/docs/source/markdown/.gitignore
@@ -1,8 +1,17 @@
+podman-auto-update.1.md
podman-build.1.md
podman-container-clone.1.md
+podman-container-runlabel.1.md
podman-create.1.md
+podman-image-sign.1.md
podman-kube-play.1.md
+podman-login.1.md
+podman-logout.1.md
+podman-manifest-add.1.md
+podman-manifest-push.1.md
podman-pod-clone.1.md
podman-pod-create.1.md
podman-pull.1.md
+podman-push.1.md
podman-run.1.md
+podman-search.1.md
diff --git a/docs/source/markdown/options/arch.md b/docs/source/markdown/options/arch.md
index 005197707..76fb349a0 100644
--- a/docs/source/markdown/options/arch.md
+++ b/docs/source/markdown/options/arch.md
@@ -1,2 +1,3 @@
#### **--arch**=*ARCH*
Override the architecture, defaults to hosts, of the image to be pulled. For example, `arm`.
+Unless overridden, subsequent lookups of the same image in the local storage will match this architecture, regardless of the host.
diff --git a/docs/source/markdown/options/authfile.md b/docs/source/markdown/options/authfile.md
new file mode 100644
index 000000000..d6198aa24
--- /dev/null
+++ b/docs/source/markdown/options/authfile.md
@@ -0,0 +1,6 @@
+#### **--authfile**=*path*
+
+Path of the authentication file. Default is `${XDG_RUNTIME_DIR}/containers/auth.json`, which is set using **[podman login](podman-login.1.md)**.
+If the authorization state is not found there, `$HOME/.docker/config.json` is checked, which is set using **docker login**.
+
+Note: There is also the option to override the default path of the authentication file by setting the `REGISTRY_AUTH_FILE` environment variable. This can be done with **export REGISTRY_AUTH_FILE=_path_**.
diff --git a/docs/source/markdown/options/cert-dir.md b/docs/source/markdown/options/cert-dir.md
new file mode 100644
index 000000000..4d05075cf
--- /dev/null
+++ b/docs/source/markdown/options/cert-dir.md
@@ -0,0 +1,5 @@
+#### **--cert-dir**=*path*
+
+Use certificates at *path* (\*.crt, \*.cert, \*.key) to connect to the registry. (Default: /etc/containers/certs.d)
+Please refer to **[containers-certs.d(5)](https://github.com/containers/image/blob/main/docs/containers-certs.d.5.md)** for details.
+(This option is not available with the remote Podman client, including Mac and Windows (excluding WSL2) machines)
diff --git a/docs/source/markdown/options/cpu-period.md b/docs/source/markdown/options/cpu-period.md
index 8df6445e9..efbe6c2ab 100644
--- a/docs/source/markdown/options/cpu-period.md
+++ b/docs/source/markdown/options/cpu-period.md
@@ -5,6 +5,6 @@ duration in microseconds. Once the container's CPU quota is used up, it will
not be scheduled to run until the current period ends. Defaults to 100000
microseconds.
-On some systems, changing the CPU limits may not be allowed for non-root
+On some systems, changing the resource limits may not be allowed for non-root
users. For more details, see
-https://github.com/containers/podman/blob/main/troubleshooting.md#26-running-containers-with-cpu-limits-fails-with-a-permissions-error
+https://github.com/containers/podman/blob/main/troubleshooting.md#26-running-containers-with-resource-limits-fails-with-a-permissions-error
diff --git a/docs/source/markdown/options/cpu-quota.md b/docs/source/markdown/options/cpu-quota.md
index 67b9dee8c..753797bad 100644
--- a/docs/source/markdown/options/cpu-quota.md
+++ b/docs/source/markdown/options/cpu-quota.md
@@ -7,6 +7,6 @@ CPU resource. The limit is a number in microseconds. If a number is provided,
the container will be allowed to use that much CPU time until the CPU period
ends (controllable via **--cpu-period**).
-On some systems, changing the CPU limits may not be allowed for non-root
+On some systems, changing the resource limits may not be allowed for non-root
users. For more details, see
-https://github.com/containers/podman/blob/main/troubleshooting.md#26-running-containers-with-cpu-limits-fails-with-a-permissions-error
+https://github.com/containers/podman/blob/main/troubleshooting.md#26-running-containers-with-resource-limits-fails-with-a-permissions-error
diff --git a/docs/source/markdown/options/cpu-shares.md b/docs/source/markdown/options/cpu-shares.md
index a5aacd2ca..c2115c1bf 100644
--- a/docs/source/markdown/options/cpu-shares.md
+++ b/docs/source/markdown/options/cpu-shares.md
@@ -33,3 +33,7 @@ this can result in the following division of CPU shares:
| 100 | C0 | 0 | 100% of CPU0 |
| 101 | C1 | 1 | 100% of CPU1 |
| 102 | C1 | 2 | 100% of CPU2 |
+
+On some systems, changing the resource limits may not be allowed for non-root
+users. For more details, see
+https://github.com/containers/podman/blob/main/troubleshooting.md#26-running-containers-with-resource-limits-fails-with-a-permissions-error
diff --git a/docs/source/markdown/options/cpuset-cpus.md b/docs/source/markdown/options/cpuset-cpus.md
index d717516a0..a67766897 100644
--- a/docs/source/markdown/options/cpuset-cpus.md
+++ b/docs/source/markdown/options/cpuset-cpus.md
@@ -3,3 +3,7 @@
CPUs in which to allow execution. Can be specified as a comma-separated list
(e.g. **0,1**), as a range (e.g. **0-3**), or any combination thereof
(e.g. **0-3,7,11-15**).
+
+On some systems, changing the resource limits may not be allowed for non-root
+users. For more details, see
+https://github.com/containers/podman/blob/main/troubleshooting.md#26-running-containers-with-resource-limits-fails-with-a-permissions-error
diff --git a/docs/source/markdown/options/cpuset-mems.md b/docs/source/markdown/options/cpuset-mems.md
index d2d13eb54..1eeab7b13 100644
--- a/docs/source/markdown/options/cpuset-mems.md
+++ b/docs/source/markdown/options/cpuset-mems.md
@@ -6,3 +6,7 @@ NUMA systems.
If there are four memory nodes on the system (0-3), use **--cpuset-mems=0,1**
then processes in the container will only use memory from the first
two memory nodes.
+
+On some systems, changing the resource limits may not be allowed for non-root
+users. For more details, see
+https://github.com/containers/podman/blob/main/troubleshooting.md#26-running-containers-with-resource-limits-fails-with-a-permissions-error
diff --git a/docs/source/markdown/options/platform.md b/docs/source/markdown/options/platform.md
index edfa428ff..b66efdfb2 100644
--- a/docs/source/markdown/options/platform.md
+++ b/docs/source/markdown/options/platform.md
@@ -2,3 +2,4 @@
Specify the platform for selecting the image. (Conflicts with --arch and --os)
The `--platform` option can be used to override the current architecture and operating system.
+Unless overridden, subsequent lookups of the same image in the local storage will match this platform, regardless of the host.
diff --git a/docs/source/markdown/podman-auto-update.1.md b/docs/source/markdown/podman-auto-update.1.md.in
index 992c87432..bc92d6165 100644
--- a/docs/source/markdown/podman-auto-update.1.md
+++ b/docs/source/markdown/podman-auto-update.1.md.in
@@ -34,12 +34,8 @@ Systemd units that start and stop a container cannot run a new image.
Podman ships with a `podman-auto-update.service` systemd unit. This unit is triggered daily at midnight by the `podman-auto-update.timer` systemd timer. The timer can be altered for custom time-based updates if desired. The unit can further be invoked by other systemd units (e.g., via the dependency tree) or manually via **systemctl start podman-auto-update.service**.
## OPTIONS
-#### **--authfile**=*path*
-Path of the authentication file. Default is `${XDG_RUNTIME_DIR}/containers/auth.json`, which is set using **[podman login](podman-login.1.md)**.
-If the authorization state is not found there, `$HOME/.docker/config.json` is checked, which is set using **docker login**.
-
-Note: There is also the option to override the default path of the authentication file by setting the `REGISTRY_AUTH_FILE` environment variable. This can be done with **export REGISTRY_AUTH_FILE=_path_**.
+@@option authfile
#### **--dry-run**
diff --git a/docs/source/markdown/podman-build.1.md.in b/docs/source/markdown/podman-build.1.md.in
index c0cf08f3c..760396d0a 100644
--- a/docs/source/markdown/podman-build.1.md.in
+++ b/docs/source/markdown/podman-build.1.md.in
@@ -65,19 +65,11 @@ discarded when writing images in Docker formats.
Set the architecture of the image to be built, and that of the base image to be
pulled, if the build uses one, to the provided value instead of using the
-architecture of the build host. (Examples: arm, arm64, 386, amd64, ppc64le,
-s390x)
+architecture of the build host. Unless overridden, subsequent lookups of the
+same image in the local storage will match this architecture, regardless of the
+host. (Examples: arm, arm64, 386, amd64, ppc64le, s390x)
-#### **--authfile**=*path*
-
-Path of the authentication file. Default is
-${XDG\_RUNTIME\_DIR}/containers/auth.json, which is set using `podman login`.
-If the authorization state is not found there, $HOME/.docker/config.json is
-checked, which is set using `docker login`.
-
-Note: You can also override the default path of the authentication file by
-setting the REGISTRY\_AUTH\_FILE environment variable.
-`export REGISTRY_AUTH_FILE=path`
+@@option authfile
#### **--build-arg**=*arg=value*
@@ -173,10 +165,7 @@ If a capability is specified to both the **--cap-add** and **--cap-drop**
options, it will be dropped, regardless of the order in which the options were
given.
-#### **--cert-dir**=*path*
-
-Use certificates at *path* (\*.crt, \*.cert, \*.key) to connect to the registry. (Default: /etc/containers/certs.d)
-Please refer to containers-certs.d(5) for details. (This option is not available with the remote Podman client, including Mac and Windows (excluding WSL2) machines)
+@@option cert-dir
@@option cgroup-parent
@@ -463,7 +452,8 @@ do not include `History` information in their images.
Set the OS of the image to be built, and that of the base image to be pulled,
if the build uses one, instead of using the current operating system of the
-build host.
+build host. Unless overridden, subsequent lookups of the same image in the
+local storage will match this OS, regardless of the host.
#### **--os-feature**=*feature*
@@ -515,9 +505,12 @@ process.
Set the *os/arch* of the built image (and its base image, if your build uses one)
to the provided value instead of using the current operating system and
-architecture of the host (for example `linux/arm`). If `--platform` is set,
-then the values of the `--arch`, `--os`, and `--variant` options will be
-overridden.
+architecture of the host (for example `linux/arm`). Unless overridden,
+subsequent lookups of the same image in the local storage will match this
+platform, regardless of the host.
+
+If `--platform` is set, then the values of the `--arch`, `--os`, and
+`--variant` options will be overridden.
The `--platform` option can be specified more than once, or given a
comma-separated list of values as its argument. When more than one platform is
diff --git a/docs/source/markdown/podman-container-runlabel.1.md b/docs/source/markdown/podman-container-runlabel.1.md.in
index 40e5392ce..7f462bf70 100644
--- a/docs/source/markdown/podman-container-runlabel.1.md
+++ b/docs/source/markdown/podman-container-runlabel.1.md.in
@@ -29,11 +29,8 @@ As specified by the `--name` option. The format is identical to the one of the
Will be replaced with the current working directory.
## OPTIONS
-#### **--authfile**=*path*
-Path of the containers-auth.json(5) file. Default is ${XDG\_RUNTIME\_DIR}/containers/auth.json, which is set using `podman login`. If the authorization state is not found there, $HOME/.docker/config.json is checked, which is set using `docker login`.
-
-Note: You can also override the default path of the authentication file by setting the REGISTRY\_AUTH\_FILE environment variable. `export REGISTRY_AUTH_FILE=path`
+@@option authfile
#### **--cert-dir**=*path*
diff --git a/docs/source/markdown/podman-create.1.md.in b/docs/source/markdown/podman-create.1.md.in
index f5301c60a..7ec4fc66f 100644
--- a/docs/source/markdown/podman-create.1.md.in
+++ b/docs/source/markdown/podman-create.1.md.in
@@ -83,12 +83,7 @@ error. It can even pretend to be a TTY (this is what most command line
executables expect) and pass along signals. The **-a** option can be set for
each of stdin, stdout, and stderr.
-#### **--authfile**=*path*
-
-Path of the authentication file. Default is ${XDG\_RUNTIME\_DIR}/containers/auth.json
-
-Note: You can also override the default path of the authentication file by setting the REGISTRY\_AUTH\_FILE
-environment variable. `export REGISTRY_AUTH_FILE=path`
+@@option authfile
@@option blkio-weight
@@ -457,6 +452,7 @@ This option conflicts with **--add-host**.
#### **--os**=*OS*
Override the OS, defaults to hosts, of the image to be pulled. For example, `windows`.
+Unless overridden, subsequent lookups of the same image in the local storage will match this OS, regardless of the host.
@@option passwd-entry
diff --git a/docs/source/markdown/podman-image-sign.1.md b/docs/source/markdown/podman-image-sign.1.md.in
index 035e10743..340cdbd21 100644
--- a/docs/source/markdown/podman-image-sign.1.md
+++ b/docs/source/markdown/podman-image-sign.1.md.in
@@ -19,12 +19,7 @@ By default, the signature will be written into `/var/lib/containers/sigstore` fo
Sign all the manifests of the multi-architecture image (default false).
-#### **--authfile**=*path*
-
-Path of the authentication file. Default is ${XDG\_RUNTIME\_DIR}/containers/auth.json
-
-Note: You can also override the default path of the authentication file by setting the REGISTRY\_AUTH\_FILE
-environment variable. `export REGISTRY_AUTH_FILE=path`
+@@option authfile
#### **--cert-dir**=*path*
diff --git a/docs/source/markdown/podman-kube-play.1.md.in b/docs/source/markdown/podman-kube-play.1.md.in
index 5fc183ee2..14c5f2498 100644
--- a/docs/source/markdown/podman-kube-play.1.md.in
+++ b/docs/source/markdown/podman-kube-play.1.md.in
@@ -112,22 +112,13 @@ and as a result environment variable `FOO` will be set to `bar` for container `c
@@option annotation.container
-#### **--authfile**=*path*
-
-Path of the authentication file. Default is ${XDG\_RUNTIME\_DIR}/containers/auth.json, which is set using `podman login`.
-If the authorization state is not found there, $HOME/.docker/config.json is checked, which is set using `docker login`.
-
-Note: You can also override the default path of the authentication file by setting the REGISTRY\_AUTH\_FILE
-environment variable. `export REGISTRY_AUTH_FILE=path`
+@@option authfile
#### **--build**
Build images even if they are found in the local storage. Use `--build=false` to completely disable builds. (This option is not available with the remote Podman client)
-#### **--cert-dir**=*path*
-
-Use certificates at *path* (\*.crt, \*.cert, \*.key) to connect to the registry. (Default: /etc/containers/certs.d)
-Please refer to containers-certs.d(5) for details. (This option is not available with the remote Podman client, including Mac and Windows (excluding WSL2) machines)
+@@option cert-dir
#### **--configmap**=*path*
diff --git a/docs/source/markdown/podman-login.1.md b/docs/source/markdown/podman-login.1.md.in
index c84b0cc99..6ec207a1e 100644
--- a/docs/source/markdown/podman-login.1.md
+++ b/docs/source/markdown/podman-login.1.md.in
@@ -28,12 +28,7 @@ For more details about format and configurations of the auth.json file, please r
## OPTIONS
-#### **--authfile**=*path*
-
-Path of the authentication file. Default is ${XDG\_RUNTIME\_DIR}/containers/auth.json.
-
-Note: You can also override the default path of the authentication file by setting the REGISTRY\_AUTH\_FILE
-environment variable. `export REGISTRY_AUTH_FILE=path`
+@@option authfile
#### **--cert-dir**=*path*
diff --git a/docs/source/markdown/podman-logout.1.md b/docs/source/markdown/podman-logout.1.md.in
index 96ac98f35..6997bb36e 100644
--- a/docs/source/markdown/podman-logout.1.md
+++ b/docs/source/markdown/podman-logout.1.md.in
@@ -25,12 +25,7 @@ All the cached credentials can be removed by setting the **all** flag.
Remove the cached credentials for all registries in the auth file
-#### **--authfile**=*path*
-
-Path of the authentication file. Default is ${XDG\_RUNTIME\_DIR}/containers/auth.json.
-
-Note: You can also override the default path of the authentication file by setting the REGISTRY\_AUTH\_FILE
-environment variable. `export REGISTRY_AUTH_FILE=path`
+@@option authfile
#### **--help**, **-h**
diff --git a/docs/source/markdown/podman-manifest-add.1.md b/docs/source/markdown/podman-manifest-add.1.md.in
index 5aa7f8341..a1c498e4f 100644
--- a/docs/source/markdown/podman-manifest-add.1.md
+++ b/docs/source/markdown/podman-manifest-add.1.md.in
@@ -33,13 +33,7 @@ the image. If *imageName* refers to a manifest list or image index, the
architecture information will be retrieved from it. Otherwise, it will be
retrieved from the image's configuration information.
-#### **--authfile**=*path*
-
-Path of the authentication file. Default is ${XDG\_RUNTIME\_DIR}/containers/auth.json, which is set using `podman login`.
-If the authorization state is not found there, $HOME/.docker/config.json is checked, which is set using `docker login`.
-
-Note: You can also override the default path of the authentication file by setting the REGISTRY\_AUTH\_FILE
-environment variable. `export REGISTRY_AUTH_FILE=path`
+@@option authfile
#### **--cert-dir**=*path*
diff --git a/docs/source/markdown/podman-manifest-create.1.md b/docs/source/markdown/podman-manifest-create.1.md
index f2aac6069..06a24da2b 100644
--- a/docs/source/markdown/podman-manifest-create.1.md
+++ b/docs/source/markdown/podman-manifest-create.1.md
@@ -22,7 +22,7 @@ If any of the images which should be added to the new list or index are
themselves lists or indexes, add all of their contents. By default, only one
image from such a list will be added to the newly-created list or index.
-#### **--amend**
+#### **--amend**, **-a**
If a manifest list named *listnameorindexname* already exists, modify the
preexisting list instead of exiting with an error. The contents of
diff --git a/docs/source/markdown/podman-manifest-push.1.md b/docs/source/markdown/podman-manifest-push.1.md.in
index cfe2b9230..631ead376 100644
--- a/docs/source/markdown/podman-manifest-push.1.md
+++ b/docs/source/markdown/podman-manifest-push.1.md.in
@@ -19,13 +19,7 @@ The list image's ID and the digest of the image's manifest.
Push the images mentioned in the manifest list or image index, in addition to
the list or index itself. (Default true)
-#### **--authfile**=*path*
-
-Path of the authentication file. Default is ${XDG\_RUNTIME\_DIR}/containers/auth.json, which is set using `podman login`.
-If the authorization state is not found there, $HOME/.docker/config.json is checked, which is set using `docker login`.
-
-Note: You can also override the default path of the authentication file by setting the REGISTRY\_AUTH\_FILE
-environment variable. `export REGISTRY_AUTH_FILE=path`
+@@option authfile
#### **--cert-dir**=*path*
diff --git a/docs/source/markdown/podman-pod-create.1.md.in b/docs/source/markdown/podman-pod-create.1.md.in
index 73b634548..c12f296b4 100644
--- a/docs/source/markdown/podman-pod-create.1.md.in
+++ b/docs/source/markdown/podman-pod-create.1.md.in
@@ -89,10 +89,10 @@ Set custom DNS search domains in the /etc/resolv.conf file that will be shared b
Set the exit policy of the pod when the last container exits. Supported policies are:
-| Exit Policy | Description |
-| ------------------ | --------------------------------------------------------------------------- |
-| *continue* | The pod continues running when the last container exits. Used by default. |
-| *stop* | The pod is stopped when the last container exits. Used in `kube play`. |
+| Exit Policy | Description |
+| ------------------ | -------------------------------------------------------------------------------------------------------------------------- |
+| *continue* | The pod continues running, by keeping its infra container alive, when the last container exits. Used by default. |
+| *stop* | The pod (including its infra container) is stopped when the last container exits. Used in `kube play`. |
#### **--gidmap**=*container_gid:host_gid:amount*
diff --git a/docs/source/markdown/podman-pull.1.md.in b/docs/source/markdown/podman-pull.1.md.in
index 29c4f865d..4753c38c7 100644
--- a/docs/source/markdown/podman-pull.1.md.in
+++ b/docs/source/markdown/podman-pull.1.md.in
@@ -51,18 +51,9 @@ All tagged images in the repository will be pulled.
@@option arch
-#### **--authfile**=*path*
+@@option authfile
-Path of the authentication file. If the authorization state is not found there, `$HOME/.docker/config.json` is checked, which is set using `docker login`.
-
-Default is `${XDG\_RUNTIME\_DIR}/containers/auth.json`, which is set using `podman login`.
-
-*IMPORTANT: The default path of the authentication file can be overwritten by setting the `REGISTRY\_AUTH\_FILE` environment variable. `export REGISTRY_AUTH_FILE=path`*
-
-#### **--cert-dir**=*path*
-
-Use certificates at *path* (\*.crt, \*.cert, \*.key) to connect to the registry. (Default: /etc/containers/certs.d)
-Please refer to **[containers-certs.d(5)](https://github.com/containers/image/blob/main/docs/containers-certs.d.5.md)** for details. (This option is not available with the remote Podman client, including Mac and Windows (excluding WSL2) machines)
+@@option cert-dir
#### **--creds**=*[username[:password]]*
@@ -83,6 +74,7 @@ Print the usage statement.
#### **--os**=*OS*
Override the OS, defaults to hosts, of the image to be pulled. For example, `windows`.
+Unless overridden, subsequent lookups of the same image in the local storage will match this OS, regardless of the host.
@@option platform
diff --git a/docs/source/markdown/podman-push.1.md b/docs/source/markdown/podman-push.1.md.in
index d674975b0..1c936cd66 100644
--- a/docs/source/markdown/podman-push.1.md
+++ b/docs/source/markdown/podman-push.1.md.in
@@ -47,13 +47,7 @@ $ podman push myimage oci-archive:/tmp/myimage
## OPTIONS
-#### **--authfile**=*path*
-
-Path of the authentication file. Default is ${XDG\_RUNTIME\_DIR}/containers/auth.json, which is set using `podman login`.
-If the authorization state is not found there, $HOME/.docker/config.json is checked, which is set using `docker login`.
-
-Note: You can also override the default path of the authentication file by setting the REGISTRY\_AUTH\_FILE
-environment variable. `export REGISTRY_AUTH_FILE=path`
+@@option authfile
#### **--cert-dir**=*path*
diff --git a/docs/source/markdown/podman-run.1.md.in b/docs/source/markdown/podman-run.1.md.in
index 81b635bc8..d10520e35 100644
--- a/docs/source/markdown/podman-run.1.md.in
+++ b/docs/source/markdown/podman-run.1.md.in
@@ -100,12 +100,7 @@ error. It can even pretend to be a TTY (this is what most commandline
executables expect) and pass along signals. The **-a** option can be set for
each of **stdin**, **stdout**, and **stderr**.
-#### **--authfile**=*[path]*
-
-Path to the authentication file. Default is *${XDG_RUNTIME_DIR}/containers/auth.json*.
-
-Note: You can also override the default path of the authentication file by setting the **REGISTRY_AUTH_FILE**
-environment variable.
+@@option authfile
@@option blkio-weight
@@ -470,6 +465,7 @@ This option conflicts with **--add-host**.
#### **--os**=*OS*
Override the OS, defaults to hosts, of the image to be pulled. For example, `windows`.
+Unless overridden, subsequent lookups of the same image in the local storage will match this OS, regardless of the host.
#### **--passwd**
diff --git a/docs/source/markdown/podman-search.1.md b/docs/source/markdown/podman-search.1.md.in
index 5b49d7f8e..9dd8cebf8 100644
--- a/docs/source/markdown/podman-search.1.md
+++ b/docs/source/markdown/podman-search.1.md.in
@@ -30,12 +30,7 @@ Further note that searching without a search term will only work for registries
## OPTIONS
-#### **--authfile**=*path*
-
-Path of the authentication file. Default is ${XDG\_RUNTIME\_DIR}/containers/auth.json
-
-Note: You can also override the default path of the authentication file by setting the REGISTRY\_AUTH\_FILE
-environment variable. `export REGISTRY_AUTH_FILE=path`
+@@option authfile
#### **--compatible**
diff --git a/docs/source/markdown/podman-stats.1.md b/docs/source/markdown/podman-stats.1.md
index d87da6a60..8d07be1a0 100644
--- a/docs/source/markdown/podman-stats.1.md
+++ b/docs/source/markdown/podman-stats.1.md
@@ -61,6 +61,10 @@ Do not clear the terminal/screen in between reporting intervals
Disable streaming stats and only pull the first result, default setting is false
+#### **--no-trunc**
+
+Do not truncate output
+
## EXAMPLE
```
@@ -77,6 +81,12 @@ a9f807ffaacd frosty_hodgkin -- 3.092MB / 16.7GB 0.02% -- / -- --
```
```
+$ podman stats --no-trunc 3667 --format 'table {{ .ID }} {{ .MemUsage }}'
+ID MEM USAGE / LIMIT
+3667c6aacb06aac2eaffce914c01736420023d56ef9b0f4cfe58b6d6a78b7503 49.15kB / 67.17GB
+```
+
+```
# podman stats --no-stream --format=json a9f80
[
{
diff --git a/go.mod b/go.mod
index 742c3c21d..635c0a17d 100644
--- a/go.mod
+++ b/go.mod
@@ -12,7 +12,7 @@ require (
github.com/containernetworking/cni v1.1.2
github.com/containernetworking/plugins v1.1.1
github.com/containers/buildah v1.27.0
- github.com/containers/common v0.49.2-0.20220809074359-b0ea008ba661
+ github.com/containers/common v0.49.2-0.20220817132854-f6679f170eca
github.com/containers/conmon v2.0.20+incompatible
github.com/containers/image/v5 v5.22.0
github.com/containers/ocicrypt v1.1.5
diff --git a/go.sum b/go.sum
index 644f03dd6..5053589c5 100644
--- a/go.sum
+++ b/go.sum
@@ -395,8 +395,8 @@ github.com/containernetworking/plugins v1.1.1/go.mod h1:Sr5TH/eBsGLXK/h71HeLfX19
github.com/containers/buildah v1.27.0 h1:LJ1ks7vKxwPzJGr5BWVvigbtVL9w7XeHtNEmiIOPJqI=
github.com/containers/buildah v1.27.0/go.mod h1:anH3ExvDXRNP9zLQCrOc1vWb5CrhqLF/aYFim4tslvA=
github.com/containers/common v0.49.1/go.mod h1:ueM5hT0itKqCQvVJDs+EtjornAQtrHYxQJzP2gxeGIg=
-github.com/containers/common v0.49.2-0.20220809074359-b0ea008ba661 h1:2Ldzg1st4REr5uUJRhjsye1zCbu0i/89RBh87Xc/cTY=
-github.com/containers/common v0.49.2-0.20220809074359-b0ea008ba661/go.mod h1:eT2iSsNzjOlF5VFLkyj9OU2SXznURvEYndsioQImuoE=
+github.com/containers/common v0.49.2-0.20220817132854-f6679f170eca h1:OjhEBVpFskIJ6Vq9nikYW7M6YXfkTxOBu+EQBoCyhuM=
+github.com/containers/common v0.49.2-0.20220817132854-f6679f170eca/go.mod h1:eT2iSsNzjOlF5VFLkyj9OU2SXznURvEYndsioQImuoE=
github.com/containers/conmon v2.0.20+incompatible h1:YbCVSFSCqFjjVwHTPINGdMX1F6JXHGTUje2ZYobNrkg=
github.com/containers/conmon v2.0.20+incompatible/go.mod h1:hgwZ2mtuDrppv78a/cOBNiCm6O0UMWGx1mu7P00nu5I=
github.com/containers/image/v5 v5.22.0 h1:KemxPmD4D2YYOFZN2SgoTk7nBFcnwPiPW0MqjYtknSE=
diff --git a/hack/markdown-preprocess-review b/hack/markdown-preprocess-review
index a487265ad..a3e237fb6 100755
--- a/hack/markdown-preprocess-review
+++ b/hack/markdown-preprocess-review
@@ -114,8 +114,60 @@ for my $i (0..$#all_opts) {
next if $ans =~ /^n/i;
exit 0 if $ans =~ /^q/i;
- system("diffuse", "-w", glob("*")) == 0
- or die "Diffuse failed\n";
+ # Try to cull the files (remove identical ones)
+ my @files = glob("*");
+ my $winner = pop @files;
+
+ for my $f (@files) {
+ system('cmp', '-s', $f, $winner);
+ if ($? == 0) {
+ print "[ $f is the one we went with; removing from list ]\n";
+ unlink $f;
+ next;
+ }
+
+ system('wdiff', '-1', '-2', '-3', $f, $winner);
+ if ($? == 0) {
+ print "[ $f is whitespace-identical with what we went with ]\n";
+ unlink $f;
+ next;
+ }
+ }
+
+ # Recompute @files, in case some were deleted above
+ @files = glob("*"); pop @files;
+
+ for (my $i=0; $i < $#files; $i++) {
+ my $f1 = $files[$i];
+ next unless -e $f1;
+
+ for (my $j=$i+1; $j <= $#files; $j++) {
+ my $f2 = $files[$j];
+ next unless -e $f2;
+
+ system('wdiff', '-1', '-2', '-3', $f1, $f2);
+ if ($? == 0) {
+ print "[ $f2 : removing, it =~ $f1 ]\n";
+ unlink $f2;
+ }
+ }
+ }
+
+ # Recompute @files, in case some were deleted above
+ @files = glob("*");
+
+ # diffuse works great for 3-4 files, passable for 5, not at all for >5
+ if (@files <= 5) {
+ system("diffuse", "-w", @files) == 0
+ or die "Diffuse failed\n";
+ }
+ else {
+ # Too many files. Go by threes.
+ my $winner = pop @files;
+ for (my $i=0; $i < @files; $i += 3) {
+ system("diffuse", "-w", @files[$i..$i+2], $winner);
+ }
+ }
}
diff --git a/libpod/events/events_freebsd.go b/libpod/events/events_freebsd.go
new file mode 100644
index 000000000..17d410089
--- /dev/null
+++ b/libpod/events/events_freebsd.go
@@ -0,0 +1,23 @@
+package events
+
+import (
+ "fmt"
+ "strings"
+
+ "github.com/sirupsen/logrus"
+)
+
+// NewEventer creates an eventer based on the eventer type
+func NewEventer(options EventerOptions) (Eventer, error) {
+ logrus.Debugf("Initializing event backend %s", options.EventerType)
+ switch strings.ToUpper(options.EventerType) {
+ case strings.ToUpper(LogFile.String()):
+ return EventLogFile{options}, nil
+ case strings.ToUpper(Null.String()):
+ return NewNullEventer(), nil
+ case strings.ToUpper(Memory.String()):
+ return NewMemoryEventer(), nil
+ default:
+ return nil, fmt.Errorf("unknown event logger type: %s", strings.ToUpper(options.EventerType))
+ }
+}
diff --git a/libpod/events/events_unsupported.go b/libpod/events/events_unsupported.go
index d766402a9..01031c225 100644
--- a/libpod/events/events_unsupported.go
+++ b/libpod/events/events_unsupported.go
@@ -1,5 +1,5 @@
-//go:build !linux
-// +build !linux
+//go:build !linux && !freebsd
+// +build !linux,!freebsd
package events
diff --git a/libpod/events/logfile.go b/libpod/events/logfile.go
index c7dbf4850..519e16629 100644
--- a/libpod/events/logfile.go
+++ b/libpod/events/logfile.go
@@ -1,5 +1,5 @@
-//go:build linux
-// +build linux
+//go:build linux || freebsd
+// +build linux freebsd
package events
diff --git a/libpod/networking_unsupported.go b/libpod/networking_unsupported.go
index 227b512cd..76ffabb5e 100644
--- a/libpod/networking_unsupported.go
+++ b/libpod/networking_unsupported.go
@@ -77,3 +77,10 @@ func (r *RootlessNetNS) Cleanup(runtime *Runtime) error {
func (r *Runtime) GetRootlessNetNs(new bool) (*RootlessNetNS, error) {
return nil, errors.New("not implemented (*Runtime) GetRootlessNetNs")
}
+
+// convertPortMappings will remove the HostIP part from the ports when running inside podman machine.
+// This is need because a HostIP of 127.0.0.1 would now allow the gvproxy forwarder to reach to open ports.
+// For machine the HostIP must only be used by gvproxy and never in the VM.
+func (c *Container) convertPortMappings() []types.PortMapping {
+ return []types.PortMapping{}
+}
diff --git a/libpod/oci_conmon_attach_common.go b/libpod/oci_conmon_attach_common.go
new file mode 100644
index 000000000..a9e9b2bb5
--- /dev/null
+++ b/libpod/oci_conmon_attach_common.go
@@ -0,0 +1,305 @@
+//go:build linux || freebsd
+// +build linux freebsd
+
+package libpod
+
+import (
+ "errors"
+ "fmt"
+ "io"
+ "net"
+ "os"
+ "path/filepath"
+ "syscall"
+
+ "github.com/containers/common/pkg/config"
+ "github.com/containers/common/pkg/resize"
+ "github.com/containers/common/pkg/util"
+ "github.com/containers/podman/v4/libpod/define"
+ "github.com/containers/podman/v4/pkg/errorhandling"
+ "github.com/moby/term"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/sys/unix"
+)
+
+/* Sync with stdpipe_t in conmon.c */
+const (
+ AttachPipeStdin = 1
+ AttachPipeStdout = 2
+ AttachPipeStderr = 3
+)
+
+// Attach to the given container.
+// Does not check if state is appropriate.
+// started is only required if startContainer is true.
+func (r *ConmonOCIRuntime) Attach(c *Container, params *AttachOptions) error {
+ passthrough := c.LogDriver() == define.PassthroughLogging
+
+ if params == nil || params.Streams == nil {
+ return fmt.Errorf("must provide parameters to Attach: %w", define.ErrInternal)
+ }
+
+ if !params.Streams.AttachOutput && !params.Streams.AttachError && !params.Streams.AttachInput && !passthrough {
+ return fmt.Errorf("must provide at least one stream to attach to: %w", define.ErrInvalidArg)
+ }
+ if params.Start && params.Started == nil {
+ return fmt.Errorf("started chan not passed when startContainer set: %w", define.ErrInternal)
+ }
+
+ keys := config.DefaultDetachKeys
+ if params.DetachKeys != nil {
+ keys = *params.DetachKeys
+ }
+
+ detachKeys, err := processDetachKeys(keys)
+ if err != nil {
+ return err
+ }
+
+ var conn *net.UnixConn
+ if !passthrough {
+ logrus.Debugf("Attaching to container %s", c.ID())
+
+ // If we have a resize, do it.
+ if params.InitialSize != nil {
+ if err := r.AttachResize(c, *params.InitialSize); err != nil {
+ return err
+ }
+ }
+
+ attachSock, err := c.AttachSocketPath()
+ if err != nil {
+ return err
+ }
+
+ conn, err = openUnixSocket(attachSock)
+ if err != nil {
+ return fmt.Errorf("failed to connect to container's attach socket: %v: %w", attachSock, err)
+ }
+ defer func() {
+ if err := conn.Close(); err != nil {
+ logrus.Errorf("unable to close socket: %q", err)
+ }
+ }()
+ }
+
+ // If starting was requested, start the container and notify when that's
+ // done.
+ if params.Start {
+ if err := c.start(); err != nil {
+ return err
+ }
+ params.Started <- true
+ }
+
+ if passthrough {
+ return nil
+ }
+
+ receiveStdoutError, stdinDone := setupStdioChannels(params.Streams, conn, detachKeys)
+ if params.AttachReady != nil {
+ params.AttachReady <- true
+ }
+ return readStdio(conn, params.Streams, receiveStdoutError, stdinDone)
+}
+
+// Attach to the given container's exec session
+// attachFd and startFd must be open file descriptors
+// attachFd must be the output side of the fd. attachFd is used for two things:
+// conmon will first send a nonce value across the pipe indicating it has set up its side of the console socket
+// this ensures attachToExec gets all of the output of the called process
+// conmon will then send the exit code of the exec process, or an error in the exec session
+// startFd must be the input side of the fd.
+// newSize resizes the tty to this size before the process is started, must be nil if the exec session has no tty
+// conmon will wait to start the exec session until the parent process has set up the console socket.
+// Once attachToExec successfully attaches to the console socket, the child conmon process responsible for calling runtime exec
+// will read from the output side of start fd, thus learning to start the child process.
+// Thus, the order goes as follow:
+// 1. conmon parent process sets up its console socket. sends on attachFd
+// 2. attachToExec attaches to the console socket after reading on attachFd and resizes the tty
+// 3. child waits on startFd for attachToExec to attach to said console socket
+// 4. attachToExec sends on startFd, signalling it has attached to the socket and child is ready to go
+// 5. child receives on startFd, runs the runtime exec command
+// attachToExec is responsible for closing startFd and attachFd
+func (c *Container) attachToExec(streams *define.AttachStreams, keys *string, sessionID string, startFd, attachFd *os.File, newSize *resize.TerminalSize) error {
+ if !streams.AttachOutput && !streams.AttachError && !streams.AttachInput {
+ return fmt.Errorf("must provide at least one stream to attach to: %w", define.ErrInvalidArg)
+ }
+ if startFd == nil || attachFd == nil {
+ return fmt.Errorf("start sync pipe and attach sync pipe must be defined for exec attach: %w", define.ErrInvalidArg)
+ }
+
+ defer errorhandling.CloseQuiet(startFd)
+ defer errorhandling.CloseQuiet(attachFd)
+
+ detachString := config.DefaultDetachKeys
+ if keys != nil {
+ detachString = *keys
+ }
+ detachKeys, err := processDetachKeys(detachString)
+ if err != nil {
+ return err
+ }
+
+ logrus.Debugf("Attaching to container %s exec session %s", c.ID(), sessionID)
+
+ // set up the socket path, such that it is the correct length and location for exec
+ sockPath, err := c.execAttachSocketPath(sessionID)
+ if err != nil {
+ return err
+ }
+
+ // 2: read from attachFd that the parent process has set up the console socket
+ if _, err := readConmonPipeData(c.ociRuntime.Name(), attachFd, ""); err != nil {
+ return err
+ }
+
+ // resize before we start the container process
+ if newSize != nil {
+ err = c.ociRuntime.ExecAttachResize(c, sessionID, *newSize)
+ if err != nil {
+ logrus.Warnf("Resize failed: %v", err)
+ }
+ }
+
+ // 2: then attach
+ conn, err := openUnixSocket(sockPath)
+ if err != nil {
+ return fmt.Errorf("failed to connect to container's attach socket: %v: %w", sockPath, err)
+ }
+ defer func() {
+ if err := conn.Close(); err != nil {
+ logrus.Errorf("Unable to close socket: %q", err)
+ }
+ }()
+
+ // start listening on stdio of the process
+ receiveStdoutError, stdinDone := setupStdioChannels(streams, conn, detachKeys)
+
+ // 4: send start message to child
+ if err := writeConmonPipeData(startFd); err != nil {
+ return err
+ }
+
+ return readStdio(conn, streams, receiveStdoutError, stdinDone)
+}
+
+func processDetachKeys(keys string) ([]byte, error) {
+ // Check the validity of the provided keys first
+ if len(keys) == 0 {
+ return []byte{}, nil
+ }
+ detachKeys, err := term.ToBytes(keys)
+ if err != nil {
+ return nil, fmt.Errorf("invalid detach keys: %w", err)
+ }
+ return detachKeys, nil
+}
+
+func registerResizeFunc(r <-chan resize.TerminalSize, bundlePath string) {
+ resize.HandleResizing(r, func(size resize.TerminalSize) {
+ controlPath := filepath.Join(bundlePath, "ctl")
+ controlFile, err := os.OpenFile(controlPath, unix.O_WRONLY, 0)
+ if err != nil {
+ logrus.Debugf("Could not open ctl file: %v", err)
+ return
+ }
+ defer controlFile.Close()
+
+ logrus.Debugf("Received a resize event: %+v", size)
+ if _, err = fmt.Fprintf(controlFile, "%d %d %d\n", 1, size.Height, size.Width); err != nil {
+ logrus.Warnf("Failed to write to control file to resize terminal: %v", err)
+ }
+ })
+}
+
+func setupStdioChannels(streams *define.AttachStreams, conn *net.UnixConn, detachKeys []byte) (chan error, chan error) {
+ receiveStdoutError := make(chan error)
+ go func() {
+ receiveStdoutError <- redirectResponseToOutputStreams(streams.OutputStream, streams.ErrorStream, streams.AttachOutput, streams.AttachError, conn)
+ }()
+
+ stdinDone := make(chan error)
+ go func() {
+ var err error
+ if streams.AttachInput {
+ _, err = util.CopyDetachable(conn, streams.InputStream, detachKeys)
+ }
+ stdinDone <- err
+ }()
+
+ return receiveStdoutError, stdinDone
+}
+
+func redirectResponseToOutputStreams(outputStream, errorStream io.Writer, writeOutput, writeError bool, conn io.Reader) error {
+ var err error
+ buf := make([]byte, 8192+1) /* Sync with conmon STDIO_BUF_SIZE */
+ for {
+ nr, er := conn.Read(buf)
+ if nr > 0 {
+ var dst io.Writer
+ var doWrite bool
+ switch buf[0] {
+ case AttachPipeStdout:
+ dst = outputStream
+ doWrite = writeOutput
+ case AttachPipeStderr:
+ dst = errorStream
+ doWrite = writeError
+ default:
+ logrus.Infof("Received unexpected attach type %+d", buf[0])
+ }
+ if dst == nil {
+ return errors.New("output destination cannot be nil")
+ }
+
+ if doWrite {
+ nw, ew := dst.Write(buf[1:nr])
+ if ew != nil {
+ err = ew
+ break
+ }
+ if nr != nw+1 {
+ err = io.ErrShortWrite
+ break
+ }
+ }
+ }
+ if errors.Is(er, io.EOF) || errors.Is(er, syscall.ECONNRESET) {
+ break
+ }
+ if er != nil {
+ err = er
+ break
+ }
+ }
+ return err
+}
+
+func readStdio(conn *net.UnixConn, streams *define.AttachStreams, receiveStdoutError, stdinDone chan error) error {
+ var err error
+ select {
+ case err = <-receiveStdoutError:
+ if err := conn.CloseWrite(); err != nil {
+ logrus.Errorf("Failed to close stdin: %v", err)
+ }
+ return err
+ case err = <-stdinDone:
+ if err == define.ErrDetach {
+ if err := conn.CloseWrite(); err != nil {
+ logrus.Errorf("Failed to close stdin: %v", err)
+ }
+ return err
+ }
+ if err == nil {
+ // copy stdin is done, close it
+ if connErr := conn.CloseWrite(); connErr != nil {
+ logrus.Errorf("Unable to close conn: %v", connErr)
+ }
+ }
+ if streams.AttachOutput || streams.AttachError {
+ return <-receiveStdoutError
+ }
+ }
+ return nil
+}
diff --git a/libpod/oci_conmon_attach_freebsd.go b/libpod/oci_conmon_attach_freebsd.go
new file mode 100644
index 000000000..de0054381
--- /dev/null
+++ b/libpod/oci_conmon_attach_freebsd.go
@@ -0,0 +1,21 @@
+package libpod
+
+import (
+ "net"
+ "os"
+ "path/filepath"
+)
+
+func openUnixSocket(path string) (*net.UnixConn, error) {
+ // socket paths can be too long to fit into a sockaddr_un so we create a shorter symlink.
+ tmpdir, err := os.MkdirTemp("", "podman")
+ if err != nil {
+ return nil, err
+ }
+ defer os.RemoveAll(tmpdir)
+ tmpsockpath := filepath.Join(tmpdir, "sock")
+ if err := os.Symlink(path, tmpsockpath); err != nil {
+ return nil, err
+ }
+ return net.DialUnix("unixpacket", nil, &net.UnixAddr{Name: tmpsockpath, Net: "unixpacket"})
+}
diff --git a/libpod/oci_conmon_attach_linux.go b/libpod/oci_conmon_attach_linux.go
index aa55aa6f5..f1aa89d3e 100644
--- a/libpod/oci_conmon_attach_linux.go
+++ b/libpod/oci_conmon_attach_linux.go
@@ -1,34 +1,12 @@
-//go:build linux
-// +build linux
-
package libpod
import (
- "errors"
"fmt"
- "io"
"net"
- "os"
- "path/filepath"
- "syscall"
- "github.com/containers/common/pkg/config"
- "github.com/containers/common/pkg/resize"
- "github.com/containers/common/pkg/util"
- "github.com/containers/podman/v4/libpod/define"
- "github.com/containers/podman/v4/pkg/errorhandling"
- "github.com/moby/term"
- "github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
-/* Sync with stdpipe_t in conmon.c */
-const (
- AttachPipeStdin = 1
- AttachPipeStdout = 2
- AttachPipeStderr = 3
-)
-
func openUnixSocket(path string) (*net.UnixConn, error) {
fd, err := unix.Open(path, unix.O_PATH, 0)
if err != nil {
@@ -37,278 +15,3 @@ func openUnixSocket(path string) (*net.UnixConn, error) {
defer unix.Close(fd)
return net.DialUnix("unixpacket", nil, &net.UnixAddr{Name: fmt.Sprintf("/proc/self/fd/%d", fd), Net: "unixpacket"})
}
-
-// Attach to the given container.
-// Does not check if state is appropriate.
-// started is only required if startContainer is true.
-func (r *ConmonOCIRuntime) Attach(c *Container, params *AttachOptions) error {
- passthrough := c.LogDriver() == define.PassthroughLogging
-
- if params == nil || params.Streams == nil {
- return fmt.Errorf("must provide parameters to Attach: %w", define.ErrInternal)
- }
-
- if !params.Streams.AttachOutput && !params.Streams.AttachError && !params.Streams.AttachInput && !passthrough {
- return fmt.Errorf("must provide at least one stream to attach to: %w", define.ErrInvalidArg)
- }
- if params.Start && params.Started == nil {
- return fmt.Errorf("started chan not passed when startContainer set: %w", define.ErrInternal)
- }
-
- keys := config.DefaultDetachKeys
- if params.DetachKeys != nil {
- keys = *params.DetachKeys
- }
-
- detachKeys, err := processDetachKeys(keys)
- if err != nil {
- return err
- }
-
- var conn *net.UnixConn
- if !passthrough {
- logrus.Debugf("Attaching to container %s", c.ID())
-
- // If we have a resize, do it.
- if params.InitialSize != nil {
- if err := r.AttachResize(c, *params.InitialSize); err != nil {
- return err
- }
- }
-
- attachSock, err := c.AttachSocketPath()
- if err != nil {
- return err
- }
-
- conn, err = openUnixSocket(attachSock)
- if err != nil {
- return fmt.Errorf("failed to connect to container's attach socket: %v: %w", attachSock, err)
- }
- defer func() {
- if err := conn.Close(); err != nil {
- logrus.Errorf("unable to close socket: %q", err)
- }
- }()
- }
-
- // If starting was requested, start the container and notify when that's
- // done.
- if params.Start {
- if err := c.start(); err != nil {
- return err
- }
- params.Started <- true
- }
-
- if passthrough {
- return nil
- }
-
- receiveStdoutError, stdinDone := setupStdioChannels(params.Streams, conn, detachKeys)
- if params.AttachReady != nil {
- params.AttachReady <- true
- }
- return readStdio(conn, params.Streams, receiveStdoutError, stdinDone)
-}
-
-// Attach to the given container's exec session
-// attachFd and startFd must be open file descriptors
-// attachFd must be the output side of the fd. attachFd is used for two things:
-// conmon will first send a nonce value across the pipe indicating it has set up its side of the console socket
-// this ensures attachToExec gets all of the output of the called process
-// conmon will then send the exit code of the exec process, or an error in the exec session
-// startFd must be the input side of the fd.
-// newSize resizes the tty to this size before the process is started, must be nil if the exec session has no tty
-// conmon will wait to start the exec session until the parent process has set up the console socket.
-// Once attachToExec successfully attaches to the console socket, the child conmon process responsible for calling runtime exec
-// will read from the output side of start fd, thus learning to start the child process.
-// Thus, the order goes as follow:
-// 1. conmon parent process sets up its console socket. sends on attachFd
-// 2. attachToExec attaches to the console socket after reading on attachFd and resizes the tty
-// 3. child waits on startFd for attachToExec to attach to said console socket
-// 4. attachToExec sends on startFd, signalling it has attached to the socket and child is ready to go
-// 5. child receives on startFd, runs the runtime exec command
-// attachToExec is responsible for closing startFd and attachFd
-func (c *Container) attachToExec(streams *define.AttachStreams, keys *string, sessionID string, startFd, attachFd *os.File, newSize *resize.TerminalSize) error {
- if !streams.AttachOutput && !streams.AttachError && !streams.AttachInput {
- return fmt.Errorf("must provide at least one stream to attach to: %w", define.ErrInvalidArg)
- }
- if startFd == nil || attachFd == nil {
- return fmt.Errorf("start sync pipe and attach sync pipe must be defined for exec attach: %w", define.ErrInvalidArg)
- }
-
- defer errorhandling.CloseQuiet(startFd)
- defer errorhandling.CloseQuiet(attachFd)
-
- detachString := config.DefaultDetachKeys
- if keys != nil {
- detachString = *keys
- }
- detachKeys, err := processDetachKeys(detachString)
- if err != nil {
- return err
- }
-
- logrus.Debugf("Attaching to container %s exec session %s", c.ID(), sessionID)
-
- // set up the socket path, such that it is the correct length and location for exec
- sockPath, err := c.execAttachSocketPath(sessionID)
- if err != nil {
- return err
- }
-
- // 2: read from attachFd that the parent process has set up the console socket
- if _, err := readConmonPipeData(c.ociRuntime.Name(), attachFd, ""); err != nil {
- return err
- }
-
- // resize before we start the container process
- if newSize != nil {
- err = c.ociRuntime.ExecAttachResize(c, sessionID, *newSize)
- if err != nil {
- logrus.Warnf("Resize failed: %v", err)
- }
- }
-
- // 2: then attach
- conn, err := openUnixSocket(sockPath)
- if err != nil {
- return fmt.Errorf("failed to connect to container's attach socket: %v: %w", sockPath, err)
- }
- defer func() {
- if err := conn.Close(); err != nil {
- logrus.Errorf("Unable to close socket: %q", err)
- }
- }()
-
- // start listening on stdio of the process
- receiveStdoutError, stdinDone := setupStdioChannels(streams, conn, detachKeys)
-
- // 4: send start message to child
- if err := writeConmonPipeData(startFd); err != nil {
- return err
- }
-
- return readStdio(conn, streams, receiveStdoutError, stdinDone)
-}
-
-func processDetachKeys(keys string) ([]byte, error) {
- // Check the validity of the provided keys first
- if len(keys) == 0 {
- return []byte{}, nil
- }
- detachKeys, err := term.ToBytes(keys)
- if err != nil {
- return nil, fmt.Errorf("invalid detach keys: %w", err)
- }
- return detachKeys, nil
-}
-
-func registerResizeFunc(r <-chan resize.TerminalSize, bundlePath string) {
- resize.HandleResizing(r, func(size resize.TerminalSize) {
- controlPath := filepath.Join(bundlePath, "ctl")
- controlFile, err := os.OpenFile(controlPath, unix.O_WRONLY, 0)
- if err != nil {
- logrus.Debugf("Could not open ctl file: %v", err)
- return
- }
- defer controlFile.Close()
-
- logrus.Debugf("Received a resize event: %+v", size)
- if _, err = fmt.Fprintf(controlFile, "%d %d %d\n", 1, size.Height, size.Width); err != nil {
- logrus.Warnf("Failed to write to control file to resize terminal: %v", err)
- }
- })
-}
-
-func setupStdioChannels(streams *define.AttachStreams, conn *net.UnixConn, detachKeys []byte) (chan error, chan error) {
- receiveStdoutError := make(chan error)
- go func() {
- receiveStdoutError <- redirectResponseToOutputStreams(streams.OutputStream, streams.ErrorStream, streams.AttachOutput, streams.AttachError, conn)
- }()
-
- stdinDone := make(chan error)
- go func() {
- var err error
- if streams.AttachInput {
- _, err = util.CopyDetachable(conn, streams.InputStream, detachKeys)
- }
- stdinDone <- err
- }()
-
- return receiveStdoutError, stdinDone
-}
-
-func redirectResponseToOutputStreams(outputStream, errorStream io.Writer, writeOutput, writeError bool, conn io.Reader) error {
- var err error
- buf := make([]byte, 8192+1) /* Sync with conmon STDIO_BUF_SIZE */
- for {
- nr, er := conn.Read(buf)
- if nr > 0 {
- var dst io.Writer
- var doWrite bool
- switch buf[0] {
- case AttachPipeStdout:
- dst = outputStream
- doWrite = writeOutput
- case AttachPipeStderr:
- dst = errorStream
- doWrite = writeError
- default:
- logrus.Infof("Received unexpected attach type %+d", buf[0])
- }
- if dst == nil {
- return errors.New("output destination cannot be nil")
- }
-
- if doWrite {
- nw, ew := dst.Write(buf[1:nr])
- if ew != nil {
- err = ew
- break
- }
- if nr != nw+1 {
- err = io.ErrShortWrite
- break
- }
- }
- }
- if errors.Is(er, io.EOF) || errors.Is(er, syscall.ECONNRESET) {
- break
- }
- if er != nil {
- err = er
- break
- }
- }
- return err
-}
-
-func readStdio(conn *net.UnixConn, streams *define.AttachStreams, receiveStdoutError, stdinDone chan error) error {
- var err error
- select {
- case err = <-receiveStdoutError:
- if err := conn.CloseWrite(); err != nil {
- logrus.Errorf("Failed to close stdin: %v", err)
- }
- return err
- case err = <-stdinDone:
- if err == define.ErrDetach {
- if err := conn.CloseWrite(); err != nil {
- logrus.Errorf("Failed to close stdin: %v", err)
- }
- return err
- }
- if err == nil {
- // copy stdin is done, close it
- if connErr := conn.CloseWrite(); connErr != nil {
- logrus.Errorf("Unable to close conn: %v", connErr)
- }
- }
- if streams.AttachOutput || streams.AttachError {
- return <-receiveStdoutError
- }
- }
- return nil
-}
diff --git a/libpod/oci_conmon_common.go b/libpod/oci_conmon_common.go
new file mode 100644
index 000000000..c3725cdb4
--- /dev/null
+++ b/libpod/oci_conmon_common.go
@@ -0,0 +1,1600 @@
+//go:build linux || freebsd
+// +build linux freebsd
+
+package libpod
+
+import (
+ "bufio"
+ "bytes"
+ "context"
+ "errors"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "net"
+ "net/http"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "sync"
+ "syscall"
+ "text/template"
+ "time"
+
+ "github.com/containers/common/pkg/config"
+ "github.com/containers/common/pkg/resize"
+ cutil "github.com/containers/common/pkg/util"
+ conmonConfig "github.com/containers/conmon/runner/config"
+ "github.com/containers/podman/v4/libpod/define"
+ "github.com/containers/podman/v4/libpod/logs"
+ "github.com/containers/podman/v4/pkg/checkpoint/crutils"
+ "github.com/containers/podman/v4/pkg/errorhandling"
+ "github.com/containers/podman/v4/pkg/rootless"
+ "github.com/containers/podman/v4/pkg/specgenutil"
+ "github.com/containers/podman/v4/pkg/util"
+ "github.com/containers/podman/v4/utils"
+ "github.com/containers/storage/pkg/homedir"
+ spec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/sys/unix"
+)
+
+const (
+ // This is Conmon's STDIO_BUF_SIZE. I don't believe we have access to it
+ // directly from the Go code, so const it here
+ // Important: The conmon attach socket uses an extra byte at the beginning of each
+ // message to specify the STREAM so we have to increase the buffer size by one
+ bufferSize = conmonConfig.BufSize + 1
+)
+
+// ConmonOCIRuntime is an OCI runtime managed by Conmon.
+// TODO: Make all calls to OCI runtime have a timeout.
+type ConmonOCIRuntime struct {
+ name string
+ path string
+ conmonPath string
+ conmonEnv []string
+ tmpDir string
+ exitsDir string
+ logSizeMax int64
+ noPivot bool
+ reservePorts bool
+ runtimeFlags []string
+ supportsJSON bool
+ supportsKVM bool
+ supportsNoCgroups bool
+ enableKeyring bool
+}
+
+// Make a new Conmon-based OCI runtime with the given options.
+// Conmon will wrap the given OCI runtime, which can be `runc`, `crun`, or
+// any runtime with a runc-compatible CLI.
+// The first path that points to a valid executable will be used.
+// Deliberately private. Someone should not be able to construct this outside of
+// libpod.
+func newConmonOCIRuntime(name string, paths []string, conmonPath string, runtimeFlags []string, runtimeCfg *config.Config) (OCIRuntime, error) {
+ if name == "" {
+ return nil, fmt.Errorf("the OCI runtime must be provided a non-empty name: %w", define.ErrInvalidArg)
+ }
+
+ // Make lookup tables for runtime support
+ supportsJSON := make(map[string]bool, len(runtimeCfg.Engine.RuntimeSupportsJSON))
+ supportsNoCgroups := make(map[string]bool, len(runtimeCfg.Engine.RuntimeSupportsNoCgroups))
+ supportsKVM := make(map[string]bool, len(runtimeCfg.Engine.RuntimeSupportsKVM))
+ for _, r := range runtimeCfg.Engine.RuntimeSupportsJSON {
+ supportsJSON[r] = true
+ }
+ for _, r := range runtimeCfg.Engine.RuntimeSupportsNoCgroups {
+ supportsNoCgroups[r] = true
+ }
+ for _, r := range runtimeCfg.Engine.RuntimeSupportsKVM {
+ supportsKVM[r] = true
+ }
+
+ runtime := new(ConmonOCIRuntime)
+ runtime.name = name
+ runtime.conmonPath = conmonPath
+ runtime.runtimeFlags = runtimeFlags
+
+ runtime.conmonEnv = runtimeCfg.Engine.ConmonEnvVars
+ runtime.tmpDir = runtimeCfg.Engine.TmpDir
+ runtime.logSizeMax = runtimeCfg.Containers.LogSizeMax
+ runtime.noPivot = runtimeCfg.Engine.NoPivotRoot
+ runtime.reservePorts = runtimeCfg.Engine.EnablePortReservation
+ runtime.enableKeyring = runtimeCfg.Containers.EnableKeyring
+
+ // TODO: probe OCI runtime for feature and enable automatically if
+ // available.
+
+ base := filepath.Base(name)
+ runtime.supportsJSON = supportsJSON[base]
+ runtime.supportsNoCgroups = supportsNoCgroups[base]
+ runtime.supportsKVM = supportsKVM[base]
+
+ foundPath := false
+ for _, path := range paths {
+ stat, err := os.Stat(path)
+ if err != nil {
+ if os.IsNotExist(err) {
+ continue
+ }
+ return nil, fmt.Errorf("cannot stat OCI runtime %s path: %w", name, err)
+ }
+ if !stat.Mode().IsRegular() {
+ continue
+ }
+ foundPath = true
+ logrus.Tracef("found runtime %q", path)
+ runtime.path = path
+ break
+ }
+
+ // Search the $PATH as last fallback
+ if !foundPath {
+ if foundRuntime, err := exec.LookPath(name); err == nil {
+ foundPath = true
+ runtime.path = foundRuntime
+ logrus.Debugf("using runtime %q from $PATH: %q", name, foundRuntime)
+ }
+ }
+
+ if !foundPath {
+ return nil, fmt.Errorf("no valid executable found for OCI runtime %s: %w", name, define.ErrInvalidArg)
+ }
+
+ runtime.exitsDir = filepath.Join(runtime.tmpDir, "exits")
+
+ // Create the exit files and attach sockets directories
+ if err := os.MkdirAll(runtime.exitsDir, 0750); err != nil {
+ // The directory is allowed to exist
+ if !os.IsExist(err) {
+ return nil, fmt.Errorf("error creating OCI runtime exit files directory: %w", err)
+ }
+ }
+ return runtime, nil
+}
+
+// Name returns the name of the runtime being wrapped by Conmon.
+func (r *ConmonOCIRuntime) Name() string {
+ return r.name
+}
+
+// Path returns the path of the OCI runtime being wrapped by Conmon.
+func (r *ConmonOCIRuntime) Path() string {
+ return r.path
+}
+
+// hasCurrentUserMapped checks whether the current user is mapped inside the container user namespace
+func hasCurrentUserMapped(ctr *Container) bool {
+ if len(ctr.config.IDMappings.UIDMap) == 0 && len(ctr.config.IDMappings.GIDMap) == 0 {
+ return true
+ }
+ uid := os.Geteuid()
+ for _, m := range ctr.config.IDMappings.UIDMap {
+ if uid >= m.HostID && uid < m.HostID+m.Size {
+ return true
+ }
+ }
+ return false
+}
+
+// CreateContainer creates a container.
+func (r *ConmonOCIRuntime) CreateContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) {
+ // always make the run dir accessible to the current user so that the PID files can be read without
+ // being in the rootless user namespace.
+ if err := makeAccessible(ctr.state.RunDir, 0, 0); err != nil {
+ return 0, err
+ }
+ if !hasCurrentUserMapped(ctr) {
+ for _, i := range []string{ctr.state.RunDir, ctr.runtime.config.Engine.TmpDir, ctr.config.StaticDir, ctr.state.Mountpoint, ctr.runtime.config.Engine.VolumePath} {
+ if err := makeAccessible(i, ctr.RootUID(), ctr.RootGID()); err != nil {
+ return 0, err
+ }
+ }
+
+ // if we are running a non privileged container, be sure to umount some kernel paths so they are not
+ // bind mounted inside the container at all.
+ if !ctr.config.Privileged && !rootless.IsRootless() {
+ return r.createRootlessContainer(ctr, restoreOptions)
+ }
+ }
+ return r.createOCIContainer(ctr, restoreOptions)
+}
+
+// UpdateContainerStatus retrieves the current status of the container from the
+// runtime. It updates the container's state but does not save it.
+// If useRuntime is false, we will not directly hit runc to see the container's
+// status, but will instead only check for the existence of the conmon exit file
+// and update state to stopped if it exists.
+func (r *ConmonOCIRuntime) UpdateContainerStatus(ctr *Container) error {
+ runtimeDir, err := util.GetRuntimeDir()
+ if err != nil {
+ return err
+ }
+
+ // Store old state so we know if we were already stopped
+ oldState := ctr.state.State
+
+ state := new(spec.State)
+
+ cmd := exec.Command(r.path, "state", ctr.ID())
+ cmd.Env = append(cmd.Env, fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir))
+
+ outPipe, err := cmd.StdoutPipe()
+ if err != nil {
+ return fmt.Errorf("getting stdout pipe: %w", err)
+ }
+ errPipe, err := cmd.StderrPipe()
+ if err != nil {
+ return fmt.Errorf("getting stderr pipe: %w", err)
+ }
+
+ if err := cmd.Start(); err != nil {
+ out, err2 := ioutil.ReadAll(errPipe)
+ if err2 != nil {
+ return fmt.Errorf("error getting container %s state: %w", ctr.ID(), err)
+ }
+ if strings.Contains(string(out), "does not exist") || strings.Contains(string(out), "No such file") {
+ if err := ctr.removeConmonFiles(); err != nil {
+ logrus.Debugf("unable to remove conmon files for container %s", ctr.ID())
+ }
+ ctr.state.ExitCode = -1
+ ctr.state.FinishedTime = time.Now()
+ ctr.state.State = define.ContainerStateExited
+ return ctr.runtime.state.AddContainerExitCode(ctr.ID(), ctr.state.ExitCode)
+ }
+ return fmt.Errorf("error getting container %s state. stderr/out: %s: %w", ctr.ID(), out, err)
+ }
+ defer func() {
+ _ = cmd.Wait()
+ }()
+
+ if err := errPipe.Close(); err != nil {
+ return err
+ }
+ out, err := ioutil.ReadAll(outPipe)
+ if err != nil {
+ return fmt.Errorf("error reading stdout: %s: %w", ctr.ID(), err)
+ }
+ if err := json.NewDecoder(bytes.NewBuffer(out)).Decode(state); err != nil {
+ return fmt.Errorf("error decoding container status for container %s: %w", ctr.ID(), err)
+ }
+ ctr.state.PID = state.Pid
+
+ switch state.Status {
+ case "created":
+ ctr.state.State = define.ContainerStateCreated
+ case "paused":
+ ctr.state.State = define.ContainerStatePaused
+ case "running":
+ ctr.state.State = define.ContainerStateRunning
+ case "stopped":
+ ctr.state.State = define.ContainerStateStopped
+ default:
+ return fmt.Errorf("unrecognized status returned by runtime for container %s: %s: %w",
+ ctr.ID(), state.Status, define.ErrInternal)
+ }
+
+ // Only grab exit status if we were not already stopped
+ // If we were, it should already be in the database
+ if ctr.state.State == define.ContainerStateStopped && oldState != define.ContainerStateStopped {
+ if _, err := ctr.Wait(context.Background()); err != nil {
+ logrus.Errorf("Waiting for container %s to exit: %v", ctr.ID(), err)
+ }
+ return nil
+ }
+
+ // Handle ContainerStateStopping - keep it unless the container
+ // transitioned to no longer running.
+ if oldState == define.ContainerStateStopping && (ctr.state.State == define.ContainerStatePaused || ctr.state.State == define.ContainerStateRunning) {
+ ctr.state.State = define.ContainerStateStopping
+ }
+
+ return nil
+}
+
+// StartContainer starts the given container.
+// Sets time the container was started, but does not save it.
+func (r *ConmonOCIRuntime) StartContainer(ctr *Container) error {
+ // TODO: streams should probably *not* be our STDIN/OUT/ERR - redirect to buffers?
+ runtimeDir, err := util.GetRuntimeDir()
+ if err != nil {
+ return err
+ }
+ env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
+ if path, ok := os.LookupEnv("PATH"); ok {
+ env = append(env, fmt.Sprintf("PATH=%s", path))
+ }
+ if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "start", ctr.ID())...); err != nil {
+ return err
+ }
+
+ ctr.state.StartedTime = time.Now()
+
+ return nil
+}
+
+// KillContainer sends the given signal to the given container.
+// If all is set, send to all PIDs in the container.
+// All is only supported if the container created cgroups.
+func (r *ConmonOCIRuntime) KillContainer(ctr *Container, signal uint, all bool) error {
+ logrus.Debugf("Sending signal %d to container %s", signal, ctr.ID())
+ runtimeDir, err := util.GetRuntimeDir()
+ if err != nil {
+ return err
+ }
+ env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
+ var args []string
+ args = append(args, r.runtimeFlags...)
+ if all {
+ args = append(args, "kill", "--all", ctr.ID(), fmt.Sprintf("%d", signal))
+ } else {
+ args = append(args, "kill", ctr.ID(), fmt.Sprintf("%d", signal))
+ }
+ if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, args...); err != nil {
+ // Update container state - there's a chance we failed because
+ // the container exited in the meantime.
+ if err2 := r.UpdateContainerStatus(ctr); err2 != nil {
+ logrus.Infof("Error updating status for container %s: %v", ctr.ID(), err2)
+ }
+ if ctr.ensureState(define.ContainerStateStopped, define.ContainerStateExited) {
+ return define.ErrCtrStateInvalid
+ }
+ return fmt.Errorf("error sending signal to container %s: %w", ctr.ID(), err)
+ }
+
+ return nil
+}
+
+// StopContainer stops a container, first using its given stop signal (or
+// SIGTERM if no signal was specified), then using SIGKILL.
+// Timeout is given in seconds. If timeout is 0, the container will be
+// immediately kill with SIGKILL.
+// Does not set finished time for container, assumes you will run updateStatus
+// after to pull the exit code.
+func (r *ConmonOCIRuntime) StopContainer(ctr *Container, timeout uint, all bool) error {
+ logrus.Debugf("Stopping container %s (PID %d)", ctr.ID(), ctr.state.PID)
+
+ // Ping the container to see if it's alive
+ // If it's not, it's already stopped, return
+ err := unix.Kill(ctr.state.PID, 0)
+ if err == unix.ESRCH {
+ return nil
+ }
+
+ stopSignal := ctr.config.StopSignal
+ if stopSignal == 0 {
+ stopSignal = uint(syscall.SIGTERM)
+ }
+
+ if timeout > 0 {
+ if err := r.KillContainer(ctr, stopSignal, all); err != nil {
+ // Is the container gone?
+ // If so, it probably died between the first check and
+ // our sending the signal
+ // The container is stopped, so exit cleanly
+ err := unix.Kill(ctr.state.PID, 0)
+ if err == unix.ESRCH {
+ return nil
+ }
+
+ return err
+ }
+
+ if err := waitContainerStop(ctr, time.Duration(timeout)*time.Second); err != nil {
+ logrus.Debugf("Timed out stopping container %s with %s, resorting to SIGKILL: %v", ctr.ID(), unix.SignalName(syscall.Signal(stopSignal)), err)
+ logrus.Warnf("StopSignal %s failed to stop container %s in %d seconds, resorting to SIGKILL", unix.SignalName(syscall.Signal(stopSignal)), ctr.Name(), timeout)
+ } else {
+ // No error, the container is dead
+ return nil
+ }
+ }
+
+ if err := r.KillContainer(ctr, 9, all); err != nil {
+ // Again, check if the container is gone. If it is, exit cleanly.
+ err := unix.Kill(ctr.state.PID, 0)
+ if err == unix.ESRCH {
+ return nil
+ }
+
+ return fmt.Errorf("error sending SIGKILL to container %s: %w", ctr.ID(), err)
+ }
+
+ // Give runtime a few seconds to make it happen
+ if err := waitContainerStop(ctr, killContainerTimeout); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// DeleteContainer deletes a container from the OCI runtime.
+func (r *ConmonOCIRuntime) DeleteContainer(ctr *Container) error {
+ runtimeDir, err := util.GetRuntimeDir()
+ if err != nil {
+ return err
+ }
+ env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
+ return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "delete", "--force", ctr.ID())...)
+}
+
+// PauseContainer pauses the given container.
+func (r *ConmonOCIRuntime) PauseContainer(ctr *Container) error {
+ runtimeDir, err := util.GetRuntimeDir()
+ if err != nil {
+ return err
+ }
+ env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
+ return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "pause", ctr.ID())...)
+}
+
+// UnpauseContainer unpauses the given container.
+func (r *ConmonOCIRuntime) UnpauseContainer(ctr *Container) error {
+ runtimeDir, err := util.GetRuntimeDir()
+ if err != nil {
+ return err
+ }
+ env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
+ return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "resume", ctr.ID())...)
+}
+
+// HTTPAttach performs an attach for the HTTP API.
+// The caller must handle closing the HTTP connection after this returns.
+// The cancel channel is not closed; it is up to the caller to do so after
+// this function returns.
+// If this is a container with a terminal, we will stream raw. If it is not, we
+// will stream with an 8-byte header to multiplex STDOUT and STDERR.
+// Returns any errors that occurred, and whether the connection was successfully
+// hijacked before that error occurred.
+func (r *ConmonOCIRuntime) HTTPAttach(ctr *Container, req *http.Request, w http.ResponseWriter, streams *HTTPAttachStreams, detachKeys *string, cancel <-chan bool, hijackDone chan<- bool, streamAttach, streamLogs bool) (deferredErr error) {
+ isTerminal := false
+ if ctr.config.Spec.Process != nil {
+ isTerminal = ctr.config.Spec.Process.Terminal
+ }
+
+ if streams != nil {
+ if !streams.Stdin && !streams.Stdout && !streams.Stderr {
+ return fmt.Errorf("must specify at least one stream to attach to: %w", define.ErrInvalidArg)
+ }
+ }
+
+ attachSock, err := r.AttachSocketPath(ctr)
+ if err != nil {
+ return err
+ }
+
+ var conn *net.UnixConn
+ if streamAttach {
+ newConn, err := openUnixSocket(attachSock)
+ if err != nil {
+ return fmt.Errorf("failed to connect to container's attach socket: %v: %w", attachSock, err)
+ }
+ conn = newConn
+ defer func() {
+ if err := conn.Close(); err != nil {
+ logrus.Errorf("Unable to close container %s attach socket: %q", ctr.ID(), err)
+ }
+ }()
+
+ logrus.Debugf("Successfully connected to container %s attach socket %s", ctr.ID(), attachSock)
+ }
+
+ detachString := ctr.runtime.config.Engine.DetachKeys
+ if detachKeys != nil {
+ detachString = *detachKeys
+ }
+ detach, err := processDetachKeys(detachString)
+ if err != nil {
+ return err
+ }
+
+ attachStdout := true
+ attachStderr := true
+ attachStdin := true
+ if streams != nil {
+ attachStdout = streams.Stdout
+ attachStderr = streams.Stderr
+ attachStdin = streams.Stdin
+ }
+
+ logrus.Debugf("Going to hijack container %s attach connection", ctr.ID())
+
+ // Alright, let's hijack.
+ hijacker, ok := w.(http.Hijacker)
+ if !ok {
+ return fmt.Errorf("unable to hijack connection")
+ }
+
+ httpCon, httpBuf, err := hijacker.Hijack()
+ if err != nil {
+ return fmt.Errorf("error hijacking connection: %w", err)
+ }
+
+ hijackDone <- true
+
+ writeHijackHeader(req, httpBuf)
+
+ // Force a flush after the header is written.
+ if err := httpBuf.Flush(); err != nil {
+ return fmt.Errorf("error flushing HTTP hijack header: %w", err)
+ }
+
+ defer func() {
+ hijackWriteErrorAndClose(deferredErr, ctr.ID(), isTerminal, httpCon, httpBuf)
+ }()
+
+ logrus.Debugf("Hijack for container %s attach session done, ready to stream", ctr.ID())
+
+ // TODO: This is gross. Really, really gross.
+ // I want to say we should read all the logs into an array before
+ // calling this, in container_api.go, but that could take a lot of
+ // memory...
+ // On the whole, we need to figure out a better way of doing this,
+ // though.
+ logSize := 0
+ if streamLogs {
+ logrus.Debugf("Will stream logs for container %s attach session", ctr.ID())
+
+ // Get all logs for the container
+ logChan := make(chan *logs.LogLine)
+ logOpts := new(logs.LogOptions)
+ logOpts.Tail = -1
+ logOpts.WaitGroup = new(sync.WaitGroup)
+ errChan := make(chan error)
+ go func() {
+ var err error
+ // In non-terminal mode we need to prepend with the
+ // stream header.
+ logrus.Debugf("Writing logs for container %s to HTTP attach", ctr.ID())
+ for logLine := range logChan {
+ if !isTerminal {
+ device := logLine.Device
+ var header []byte
+ headerLen := uint32(len(logLine.Msg))
+ logSize += len(logLine.Msg)
+ switch strings.ToLower(device) {
+ case "stdin":
+ header = makeHTTPAttachHeader(0, headerLen)
+ case "stdout":
+ header = makeHTTPAttachHeader(1, headerLen)
+ case "stderr":
+ header = makeHTTPAttachHeader(2, headerLen)
+ default:
+ logrus.Errorf("Unknown device for log line: %s", device)
+ header = makeHTTPAttachHeader(1, headerLen)
+ }
+ _, err = httpBuf.Write(header)
+ if err != nil {
+ break
+ }
+ }
+ _, err = httpBuf.Write([]byte(logLine.Msg))
+ if err != nil {
+ break
+ }
+ if !logLine.Partial() {
+ _, err = httpBuf.Write([]byte("\n"))
+ if err != nil {
+ break
+ }
+ }
+ err = httpBuf.Flush()
+ if err != nil {
+ break
+ }
+ }
+ errChan <- err
+ }()
+ if err := ctr.ReadLog(context.Background(), logOpts, logChan, 0); err != nil {
+ return err
+ }
+ go func() {
+ logOpts.WaitGroup.Wait()
+ close(logChan)
+ }()
+ logrus.Debugf("Done reading logs for container %s, %d bytes", ctr.ID(), logSize)
+ if err := <-errChan; err != nil {
+ return err
+ }
+ }
+ if !streamAttach {
+ logrus.Debugf("Done streaming logs for container %s attach, exiting as attach streaming not requested", ctr.ID())
+ return nil
+ }
+
+ logrus.Debugf("Forwarding attach output for container %s", ctr.ID())
+
+ stdoutChan := make(chan error)
+ stdinChan := make(chan error)
+
+ // Handle STDOUT/STDERR
+ go func() {
+ var err error
+ if isTerminal {
+ // Hack: return immediately if attachStdout not set to
+ // emulate Docker.
+ // Basically, when terminal is set, STDERR goes nowhere.
+ // Everything does over STDOUT.
+ // Therefore, if not attaching STDOUT - we'll never copy
+ // anything from here.
+ logrus.Debugf("Performing terminal HTTP attach for container %s", ctr.ID())
+ if attachStdout {
+ err = httpAttachTerminalCopy(conn, httpBuf, ctr.ID())
+ }
+ } else {
+ logrus.Debugf("Performing non-terminal HTTP attach for container %s", ctr.ID())
+ err = httpAttachNonTerminalCopy(conn, httpBuf, ctr.ID(), attachStdin, attachStdout, attachStderr)
+ }
+ stdoutChan <- err
+ logrus.Debugf("STDOUT/ERR copy completed")
+ }()
+ // Next, STDIN. Avoid entirely if attachStdin unset.
+ if attachStdin {
+ go func() {
+ _, err := cutil.CopyDetachable(conn, httpBuf, detach)
+ logrus.Debugf("STDIN copy completed")
+ stdinChan <- err
+ }()
+ }
+
+ for {
+ select {
+ case err := <-stdoutChan:
+ if err != nil {
+ return err
+ }
+
+ return nil
+ case err := <-stdinChan:
+ if err != nil {
+ return err
+ }
+ // copy stdin is done, close it
+ if connErr := conn.CloseWrite(); connErr != nil {
+ logrus.Errorf("Unable to close conn: %v", connErr)
+ }
+ case <-cancel:
+ return nil
+ }
+ }
+}
+
+// isRetryable returns whether the error was caused by a blocked syscall or the
+// specified operation on a non blocking file descriptor wasn't ready for completion.
+func isRetryable(err error) bool {
+ var errno syscall.Errno
+ if errors.As(err, &errno) {
+ return errno == syscall.EINTR || errno == syscall.EAGAIN
+ }
+ return false
+}
+
+// openControlFile opens the terminal control file.
+func openControlFile(ctr *Container, parentDir string) (*os.File, error) {
+ controlPath := filepath.Join(parentDir, "ctl")
+ for i := 0; i < 600; i++ {
+ controlFile, err := os.OpenFile(controlPath, unix.O_WRONLY|unix.O_NONBLOCK, 0)
+ if err == nil {
+ return controlFile, nil
+ }
+ if !isRetryable(err) {
+ return nil, fmt.Errorf("could not open ctl file for terminal resize for container %s: %w", ctr.ID(), err)
+ }
+ time.Sleep(time.Second / 10)
+ }
+ return nil, fmt.Errorf("timeout waiting for %q", controlPath)
+}
+
+// AttachResize resizes the terminal used by the given container.
+func (r *ConmonOCIRuntime) AttachResize(ctr *Container, newSize resize.TerminalSize) error {
+ controlFile, err := openControlFile(ctr, ctr.bundlePath())
+ if err != nil {
+ return err
+ }
+ defer controlFile.Close()
+
+ logrus.Debugf("Received a resize event for container %s: %+v", ctr.ID(), newSize)
+ if _, err = fmt.Fprintf(controlFile, "%d %d %d\n", 1, newSize.Height, newSize.Width); err != nil {
+ return fmt.Errorf("failed to write to ctl file to resize terminal: %w", err)
+ }
+
+ return nil
+}
+
+// CheckpointContainer checkpoints the given container.
+func (r *ConmonOCIRuntime) CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) (int64, error) {
+ // imagePath is used by CRIU to store the actual checkpoint files
+ imagePath := ctr.CheckpointPath()
+ if options.PreCheckPoint {
+ imagePath = ctr.PreCheckPointPath()
+ }
+ // workPath will be used to store dump.log and stats-dump
+ workPath := ctr.bundlePath()
+ logrus.Debugf("Writing checkpoint to %s", imagePath)
+ logrus.Debugf("Writing checkpoint logs to %s", workPath)
+ logrus.Debugf("Pre-dump the container %t", options.PreCheckPoint)
+ args := []string{}
+ args = append(args, r.runtimeFlags...)
+ args = append(args, "checkpoint")
+ args = append(args, "--image-path")
+ args = append(args, imagePath)
+ args = append(args, "--work-path")
+ args = append(args, workPath)
+ if options.KeepRunning {
+ args = append(args, "--leave-running")
+ }
+ if options.TCPEstablished {
+ args = append(args, "--tcp-established")
+ }
+ if options.FileLocks {
+ args = append(args, "--file-locks")
+ }
+ if !options.PreCheckPoint && options.KeepRunning {
+ args = append(args, "--leave-running")
+ }
+ if options.PreCheckPoint {
+ args = append(args, "--pre-dump")
+ }
+ if !options.PreCheckPoint && options.WithPrevious {
+ args = append(
+ args,
+ "--parent-path",
+ filepath.Join("..", preCheckpointDir),
+ )
+ }
+
+ args = append(args, ctr.ID())
+ logrus.Debugf("the args to checkpoint: %s %s", r.path, strings.Join(args, " "))
+
+ runtimeDir, err := util.GetRuntimeDir()
+ if err != nil {
+ return 0, err
+ }
+ env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
+ if path, ok := os.LookupEnv("PATH"); ok {
+ env = append(env, fmt.Sprintf("PATH=%s", path))
+ }
+
+ var runtimeCheckpointStarted time.Time
+ err = r.withContainerSocketLabel(ctr, func() error {
+ runtimeCheckpointStarted = time.Now()
+ return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, args...)
+ })
+
+ runtimeCheckpointDuration := func() int64 {
+ if options.PrintStats {
+ return time.Since(runtimeCheckpointStarted).Microseconds()
+ }
+ return 0
+ }()
+
+ return runtimeCheckpointDuration, err
+}
+
+func (r *ConmonOCIRuntime) CheckConmonRunning(ctr *Container) (bool, error) {
+ if ctr.state.ConmonPID == 0 {
+ // If the container is running or paused, assume Conmon is
+ // running. We didn't record Conmon PID on some old versions, so
+ // that is likely what's going on...
+ // Unusual enough that we should print a warning message though.
+ if ctr.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) {
+ logrus.Warnf("Conmon PID is not set, but container is running!")
+ return true, nil
+ }
+ // Container's not running, so conmon PID being unset is
+ // expected. Conmon is not running.
+ return false, nil
+ }
+
+ // We have a conmon PID. Ping it with signal 0.
+ if err := unix.Kill(ctr.state.ConmonPID, 0); err != nil {
+ if err == unix.ESRCH {
+ return false, nil
+ }
+ return false, fmt.Errorf("error pinging container %s conmon with signal 0: %w", ctr.ID(), err)
+ }
+ return true, nil
+}
+
+// SupportsCheckpoint checks if the OCI runtime supports checkpointing
+// containers.
+func (r *ConmonOCIRuntime) SupportsCheckpoint() bool {
+ return crutils.CRRuntimeSupportsCheckpointRestore(r.path)
+}
+
+// SupportsJSONErrors checks if the OCI runtime supports JSON-formatted error
+// messages.
+func (r *ConmonOCIRuntime) SupportsJSONErrors() bool {
+ return r.supportsJSON
+}
+
+// SupportsNoCgroups checks if the OCI runtime supports running containers
+// without cgroups (the --cgroup-manager=disabled flag).
+func (r *ConmonOCIRuntime) SupportsNoCgroups() bool {
+ return r.supportsNoCgroups
+}
+
+// SupportsKVM checks if the OCI runtime supports running containers
+// without KVM separation
+func (r *ConmonOCIRuntime) SupportsKVM() bool {
+ return r.supportsKVM
+}
+
+// AttachSocketPath is the path to a single container's attach socket.
+func (r *ConmonOCIRuntime) AttachSocketPath(ctr *Container) (string, error) {
+ if ctr == nil {
+ return "", fmt.Errorf("must provide a valid container to get attach socket path: %w", define.ErrInvalidArg)
+ }
+
+ return filepath.Join(ctr.bundlePath(), "attach"), nil
+}
+
+// ExitFilePath is the path to a container's exit file.
+func (r *ConmonOCIRuntime) ExitFilePath(ctr *Container) (string, error) {
+ if ctr == nil {
+ return "", fmt.Errorf("must provide a valid container to get exit file path: %w", define.ErrInvalidArg)
+ }
+ return filepath.Join(r.exitsDir, ctr.ID()), nil
+}
+
+// RuntimeInfo provides information on the runtime.
+func (r *ConmonOCIRuntime) RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntimeInfo, error) {
+ runtimePackage := packageVersion(r.path)
+ conmonPackage := packageVersion(r.conmonPath)
+ runtimeVersion, err := r.getOCIRuntimeVersion()
+ if err != nil {
+ return nil, nil, fmt.Errorf("error getting version of OCI runtime %s: %w", r.name, err)
+ }
+ conmonVersion, err := r.getConmonVersion()
+ if err != nil {
+ return nil, nil, fmt.Errorf("error getting conmon version: %w", err)
+ }
+
+ conmon := define.ConmonInfo{
+ Package: conmonPackage,
+ Path: r.conmonPath,
+ Version: conmonVersion,
+ }
+ ocirt := define.OCIRuntimeInfo{
+ Name: r.name,
+ Path: r.path,
+ Package: runtimePackage,
+ Version: runtimeVersion,
+ }
+ return &conmon, &ocirt, nil
+}
+
+// makeAccessible changes the path permission and each parent directory to have --x--x--x
+func makeAccessible(path string, uid, gid int) error {
+ for ; path != "/"; path = filepath.Dir(path) {
+ st, err := os.Stat(path)
+ if err != nil {
+ if os.IsNotExist(err) {
+ return nil
+ }
+ return err
+ }
+ if int(st.Sys().(*syscall.Stat_t).Uid) == uid && int(st.Sys().(*syscall.Stat_t).Gid) == gid {
+ continue
+ }
+ if st.Mode()&0111 != 0111 {
+ if err := os.Chmod(path, st.Mode()|0111); err != nil {
+ return err
+ }
+ }
+ }
+ return nil
+}
+
+// Wait for a container which has been sent a signal to stop
+func waitContainerStop(ctr *Container, timeout time.Duration) error {
+ return waitPidStop(ctr.state.PID, timeout)
+}
+
+// Wait for a given PID to stop
+func waitPidStop(pid int, timeout time.Duration) error {
+ done := make(chan struct{})
+ chControl := make(chan struct{})
+ go func() {
+ for {
+ select {
+ case <-chControl:
+ return
+ default:
+ if err := unix.Kill(pid, 0); err != nil {
+ if err == unix.ESRCH {
+ close(done)
+ return
+ }
+ logrus.Errorf("Pinging PID %d with signal 0: %v", pid, err)
+ }
+ time.Sleep(100 * time.Millisecond)
+ }
+ }
+ }()
+ select {
+ case <-done:
+ return nil
+ case <-time.After(timeout):
+ close(chControl)
+ return fmt.Errorf("given PIDs did not die within timeout")
+ }
+}
+
+func (r *ConmonOCIRuntime) getLogTag(ctr *Container) (string, error) {
+ logTag := ctr.LogTag()
+ if logTag == "" {
+ return "", nil
+ }
+ data, err := ctr.inspectLocked(false)
+ if err != nil {
+ // FIXME: this error should probably be returned
+ return "", nil //nolint: nilerr
+ }
+ tmpl, err := template.New("container").Parse(logTag)
+ if err != nil {
+ return "", fmt.Errorf("template parsing error %s: %w", logTag, err)
+ }
+ var b bytes.Buffer
+ err = tmpl.Execute(&b, data)
+ if err != nil {
+ return "", err
+ }
+ return b.String(), nil
+}
+
+// createOCIContainer generates this container's main conmon instance and prepares it for starting
+func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) {
+ var stderrBuf bytes.Buffer
+
+ runtimeDir, err := util.GetRuntimeDir()
+ if err != nil {
+ return 0, err
+ }
+
+ parentSyncPipe, childSyncPipe, err := newPipe()
+ if err != nil {
+ return 0, fmt.Errorf("error creating socket pair: %w", err)
+ }
+ defer errorhandling.CloseQuiet(parentSyncPipe)
+
+ childStartPipe, parentStartPipe, err := newPipe()
+ if err != nil {
+ return 0, fmt.Errorf("error creating socket pair for start pipe: %w", err)
+ }
+
+ defer errorhandling.CloseQuiet(parentStartPipe)
+
+ var ociLog string
+ if logrus.GetLevel() != logrus.DebugLevel && r.supportsJSON {
+ ociLog = filepath.Join(ctr.state.RunDir, "oci-log")
+ }
+
+ logTag, err := r.getLogTag(ctr)
+ if err != nil {
+ return 0, err
+ }
+
+ if ctr.config.CgroupsMode == cgroupSplit {
+ if err := utils.MoveUnderCgroupSubtree("runtime"); err != nil {
+ return 0, err
+ }
+ }
+
+ pidfile := ctr.config.PidFile
+ if pidfile == "" {
+ pidfile = filepath.Join(ctr.state.RunDir, "pidfile")
+ }
+
+ args := r.sharedConmonArgs(ctr, ctr.ID(), ctr.bundlePath(), pidfile, ctr.LogPath(), r.exitsDir, ociLog, ctr.LogDriver(), logTag)
+
+ if ctr.config.SdNotifyMode == define.SdNotifyModeContainer && ctr.config.SdNotifySocket != "" {
+ args = append(args, fmt.Sprintf("--sdnotify-socket=%s", ctr.config.SdNotifySocket))
+ }
+
+ if ctr.config.Spec.Process.Terminal {
+ args = append(args, "-t")
+ } else if ctr.config.Stdin {
+ args = append(args, "-i")
+ }
+
+ if ctr.config.Timeout > 0 {
+ args = append(args, fmt.Sprintf("--timeout=%d", ctr.config.Timeout))
+ }
+
+ if !r.enableKeyring {
+ args = append(args, "--no-new-keyring")
+ }
+ if ctr.config.ConmonPidFile != "" {
+ args = append(args, "--conmon-pidfile", ctr.config.ConmonPidFile)
+ }
+
+ if r.noPivot {
+ args = append(args, "--no-pivot")
+ }
+
+ exitCommand, err := specgenutil.CreateExitCommandArgs(ctr.runtime.storageConfig, ctr.runtime.config, logrus.IsLevelEnabled(logrus.DebugLevel), ctr.AutoRemove(), false)
+ if err != nil {
+ return 0, err
+ }
+ exitCommand = append(exitCommand, ctr.config.ID)
+
+ args = append(args, "--exit-command", exitCommand[0])
+ for _, arg := range exitCommand[1:] {
+ args = append(args, []string{"--exit-command-arg", arg}...)
+ }
+
+ // Pass down the LISTEN_* environment (see #10443).
+ preserveFDs := ctr.config.PreserveFDs
+ if val := os.Getenv("LISTEN_FDS"); val != "" {
+ if ctr.config.PreserveFDs > 0 {
+ logrus.Warnf("Ignoring LISTEN_FDS to preserve custom user-specified FDs")
+ } else {
+ fds, err := strconv.Atoi(val)
+ if err != nil {
+ return 0, fmt.Errorf("converting LISTEN_FDS=%s: %w", val, err)
+ }
+ preserveFDs = uint(fds)
+ }
+ }
+
+ if preserveFDs > 0 {
+ args = append(args, formatRuntimeOpts("--preserve-fds", fmt.Sprintf("%d", preserveFDs))...)
+ }
+
+ if restoreOptions != nil {
+ args = append(args, "--restore", ctr.CheckpointPath())
+ if restoreOptions.TCPEstablished {
+ args = append(args, "--runtime-opt", "--tcp-established")
+ }
+ if restoreOptions.FileLocks {
+ args = append(args, "--runtime-opt", "--file-locks")
+ }
+ if restoreOptions.Pod != "" {
+ mountLabel := ctr.config.MountLabel
+ processLabel := ctr.config.ProcessLabel
+ if mountLabel != "" {
+ args = append(
+ args,
+ "--runtime-opt",
+ fmt.Sprintf(
+ "--lsm-mount-context=%s",
+ mountLabel,
+ ),
+ )
+ }
+ if processLabel != "" {
+ args = append(
+ args,
+ "--runtime-opt",
+ fmt.Sprintf(
+ "--lsm-profile=selinux:%s",
+ processLabel,
+ ),
+ )
+ }
+ }
+ }
+
+ logrus.WithFields(logrus.Fields{
+ "args": args,
+ }).Debugf("running conmon: %s", r.conmonPath)
+
+ cmd := exec.Command(r.conmonPath, args...)
+ cmd.SysProcAttr = &syscall.SysProcAttr{
+ Setpgid: true,
+ }
+ // TODO this is probably a really bad idea for some uses
+ // Make this configurable
+ cmd.Stdin = os.Stdin
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = os.Stderr
+ if ctr.config.Spec.Process.Terminal {
+ cmd.Stderr = &stderrBuf
+ }
+
+ // 0, 1 and 2 are stdin, stdout and stderr
+ conmonEnv := r.configureConmonEnv(runtimeDir)
+
+ var filesToClose []*os.File
+ if preserveFDs > 0 {
+ for fd := 3; fd < int(3+preserveFDs); fd++ {
+ f := os.NewFile(uintptr(fd), fmt.Sprintf("fd-%d", fd))
+ filesToClose = append(filesToClose, f)
+ cmd.ExtraFiles = append(cmd.ExtraFiles, f)
+ }
+ }
+
+ cmd.Env = r.conmonEnv
+ // we don't want to step on users fds they asked to preserve
+ // Since 0-2 are used for stdio, start the fds we pass in at preserveFDs+3
+ cmd.Env = append(cmd.Env, fmt.Sprintf("_OCI_SYNCPIPE=%d", preserveFDs+3), fmt.Sprintf("_OCI_STARTPIPE=%d", preserveFDs+4))
+ cmd.Env = append(cmd.Env, conmonEnv...)
+ cmd.ExtraFiles = append(cmd.ExtraFiles, childSyncPipe, childStartPipe)
+
+ if r.reservePorts && !rootless.IsRootless() && !ctr.config.NetMode.IsSlirp4netns() {
+ ports, err := bindPorts(ctr.convertPortMappings())
+ if err != nil {
+ return 0, err
+ }
+ filesToClose = append(filesToClose, ports...)
+
+ // Leak the port we bound in the conmon process. These fd's won't be used
+ // by the container and conmon will keep the ports busy so that another
+ // process cannot use them.
+ cmd.ExtraFiles = append(cmd.ExtraFiles, ports...)
+ }
+
+ if ctr.config.NetMode.IsSlirp4netns() || rootless.IsRootless() {
+ if ctr.config.PostConfigureNetNS {
+ havePortMapping := len(ctr.config.PortMappings) > 0
+ if havePortMapping {
+ ctr.rootlessPortSyncR, ctr.rootlessPortSyncW, err = os.Pipe()
+ if err != nil {
+ return 0, fmt.Errorf("failed to create rootless port sync pipe: %w", err)
+ }
+ }
+ ctr.rootlessSlirpSyncR, ctr.rootlessSlirpSyncW, err = os.Pipe()
+ if err != nil {
+ return 0, fmt.Errorf("failed to create rootless network sync pipe: %w", err)
+ }
+ } else {
+ if ctr.rootlessSlirpSyncR != nil {
+ defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncR)
+ }
+ if ctr.rootlessSlirpSyncW != nil {
+ defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncW)
+ }
+ }
+ // Leak one end in conmon, the other one will be leaked into slirp4netns
+ cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessSlirpSyncW)
+
+ if ctr.rootlessPortSyncW != nil {
+ defer errorhandling.CloseQuiet(ctr.rootlessPortSyncW)
+ // Leak one end in conmon, the other one will be leaked into rootlessport
+ cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessPortSyncW)
+ }
+ }
+ var runtimeRestoreStarted time.Time
+ if restoreOptions != nil {
+ runtimeRestoreStarted = time.Now()
+ }
+ err = startCommand(cmd, ctr)
+
+ // regardless of whether we errored or not, we no longer need the children pipes
+ childSyncPipe.Close()
+ childStartPipe.Close()
+ if err != nil {
+ return 0, err
+ }
+ if err := r.moveConmonToCgroupAndSignal(ctr, cmd, parentStartPipe); err != nil {
+ return 0, err
+ }
+ /* Wait for initial setup and fork, and reap child */
+ err = cmd.Wait()
+ if err != nil {
+ return 0, err
+ }
+
+ pid, err := readConmonPipeData(r.name, parentSyncPipe, ociLog)
+ if err != nil {
+ if err2 := r.DeleteContainer(ctr); err2 != nil {
+ logrus.Errorf("Removing container %s from runtime after creation failed", ctr.ID())
+ }
+ return 0, err
+ }
+ ctr.state.PID = pid
+
+ conmonPID, err := readConmonPidFile(ctr.config.ConmonPidFile)
+ if err != nil {
+ logrus.Warnf("Error reading conmon pid file for container %s: %v", ctr.ID(), err)
+ } else if conmonPID > 0 {
+ // conmon not having a pid file is a valid state, so don't set it if we don't have it
+ logrus.Infof("Got Conmon PID as %d", conmonPID)
+ ctr.state.ConmonPID = conmonPID
+ }
+
+ runtimeRestoreDuration := func() int64 {
+ if restoreOptions != nil && restoreOptions.PrintStats {
+ return time.Since(runtimeRestoreStarted).Microseconds()
+ }
+ return 0
+ }()
+
+ // These fds were passed down to the runtime. Close them
+ // and not interfere
+ for _, f := range filesToClose {
+ errorhandling.CloseQuiet(f)
+ }
+
+ return runtimeRestoreDuration, nil
+}
+
+// configureConmonEnv gets the environment values to add to conmon's exec struct
+// TODO this may want to be less hardcoded/more configurable in the future
+func (r *ConmonOCIRuntime) configureConmonEnv(runtimeDir string) []string {
+ var env []string
+ for _, e := range os.Environ() {
+ if strings.HasPrefix(e, "LC_") {
+ env = append(env, e)
+ }
+ }
+ conf, ok := os.LookupEnv("CONTAINERS_CONF")
+ if ok {
+ env = append(env, fmt.Sprintf("CONTAINERS_CONF=%s", conf))
+ }
+ env = append(env, fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir))
+ env = append(env, fmt.Sprintf("_CONTAINERS_USERNS_CONFIGURED=%s", os.Getenv("_CONTAINERS_USERNS_CONFIGURED")))
+ env = append(env, fmt.Sprintf("_CONTAINERS_ROOTLESS_UID=%s", os.Getenv("_CONTAINERS_ROOTLESS_UID")))
+ home := homedir.Get()
+ if home != "" {
+ env = append(env, fmt.Sprintf("HOME=%s", home))
+ }
+
+ return env
+}
+
+// sharedConmonArgs takes common arguments for exec and create/restore and formats them for the conmon CLI
+func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, exitDir, ociLogPath, logDriver, logTag string) []string {
+ // set the conmon API version to be able to use the correct sync struct keys
+ args := []string{
+ "--api-version", "1",
+ "-c", ctr.ID(),
+ "-u", cuuid,
+ "-r", r.path,
+ "-b", bundlePath,
+ "-p", pidPath,
+ "-n", ctr.Name(),
+ "--exit-dir", exitDir,
+ "--full-attach",
+ }
+ if len(r.runtimeFlags) > 0 {
+ rFlags := []string{}
+ for _, arg := range r.runtimeFlags {
+ rFlags = append(rFlags, "--runtime-arg", arg)
+ }
+ args = append(args, rFlags...)
+ }
+
+ if ctr.CgroupManager() == config.SystemdCgroupsManager && !ctr.config.NoCgroups && ctr.config.CgroupsMode != cgroupSplit {
+ args = append(args, "-s")
+ }
+
+ var logDriverArg string
+ switch logDriver {
+ case define.JournaldLogging:
+ logDriverArg = define.JournaldLogging
+ case define.NoLogging:
+ logDriverArg = define.NoLogging
+ case define.PassthroughLogging:
+ logDriverArg = define.PassthroughLogging
+ //lint:ignore ST1015 the default case has to be here
+ default: //nolint:stylecheck,gocritic
+ // No case here should happen except JSONLogging, but keep this here in case the options are extended
+ logrus.Errorf("%s logging specified but not supported. Choosing k8s-file logging instead", ctr.LogDriver())
+ fallthrough
+ case "":
+ // to get here, either a user would specify `--log-driver ""`, or this came from another place in libpod
+ // since the former case is obscure, and the latter case isn't an error, let's silently fallthrough
+ fallthrough
+ case define.JSONLogging:
+ fallthrough
+ case define.KubernetesLogging:
+ logDriverArg = fmt.Sprintf("%s:%s", define.KubernetesLogging, logPath)
+ }
+
+ args = append(args, "-l", logDriverArg)
+ logLevel := logrus.GetLevel()
+ args = append(args, "--log-level", logLevel.String())
+
+ if logLevel == logrus.DebugLevel {
+ logrus.Debugf("%s messages will be logged to syslog", r.conmonPath)
+ args = append(args, "--syslog")
+ }
+
+ size := r.logSizeMax
+ if ctr.config.LogSize > 0 {
+ size = ctr.config.LogSize
+ }
+ if size > 0 {
+ args = append(args, "--log-size-max", fmt.Sprintf("%v", size))
+ }
+
+ if ociLogPath != "" {
+ args = append(args, "--runtime-arg", "--log-format=json", "--runtime-arg", "--log", fmt.Sprintf("--runtime-arg=%s", ociLogPath))
+ }
+ if logTag != "" {
+ args = append(args, "--log-tag", logTag)
+ }
+ if ctr.config.NoCgroups {
+ logrus.Debugf("Running with no Cgroups")
+ args = append(args, "--runtime-arg", "--cgroup-manager", "--runtime-arg", "disabled")
+ }
+ return args
+}
+
+func startCommand(cmd *exec.Cmd, ctr *Container) error {
+ // Make sure to unset the NOTIFY_SOCKET and reset it afterwards if needed.
+ switch ctr.config.SdNotifyMode {
+ case define.SdNotifyModeContainer, define.SdNotifyModeIgnore:
+ if prev := os.Getenv("NOTIFY_SOCKET"); prev != "" {
+ if err := os.Unsetenv("NOTIFY_SOCKET"); err != nil {
+ logrus.Warnf("Error unsetting NOTIFY_SOCKET %v", err)
+ }
+ defer func() {
+ if err := os.Setenv("NOTIFY_SOCKET", prev); err != nil {
+ logrus.Errorf("Resetting NOTIFY_SOCKET=%s", prev)
+ }
+ }()
+ }
+ }
+
+ return cmd.Start()
+}
+
+// newPipe creates a unix socket pair for communication.
+// Returns two files - first is parent, second is child.
+func newPipe() (*os.File, *os.File, error) {
+ fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_SEQPACKET|unix.SOCK_CLOEXEC, 0)
+ if err != nil {
+ return nil, nil, err
+ }
+ return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil
+}
+
+// readConmonPidFile attempts to read conmon's pid from its pid file
+func readConmonPidFile(pidFile string) (int, error) {
+ // Let's try reading the Conmon pid at the same time.
+ if pidFile != "" {
+ contents, err := ioutil.ReadFile(pidFile)
+ if err != nil {
+ return -1, err
+ }
+ // Convert it to an int
+ conmonPID, err := strconv.Atoi(string(contents))
+ if err != nil {
+ return -1, err
+ }
+ return conmonPID, nil
+ }
+ return 0, nil
+}
+
+// readConmonPipeData attempts to read a syncInfo struct from the pipe
+func readConmonPipeData(runtimeName string, pipe *os.File, ociLog string) (int, error) {
+ // syncInfo is used to return data from monitor process to daemon
+ type syncInfo struct {
+ Data int `json:"data"`
+ Message string `json:"message,omitempty"`
+ }
+
+ // Wait to get container pid from conmon
+ type syncStruct struct {
+ si *syncInfo
+ err error
+ }
+ ch := make(chan syncStruct)
+ go func() {
+ var si *syncInfo
+ rdr := bufio.NewReader(pipe)
+ b, err := rdr.ReadBytes('\n')
+ // ignore EOF here, error is returned even when data was read
+ // if it is no valid json unmarshal will fail below
+ if err != nil && !errors.Is(err, io.EOF) {
+ ch <- syncStruct{err: err}
+ }
+ if err := json.Unmarshal(b, &si); err != nil {
+ ch <- syncStruct{err: fmt.Errorf("conmon bytes %q: %w", string(b), err)}
+ return
+ }
+ ch <- syncStruct{si: si}
+ }()
+
+ data := -1 //nolint: wastedassign
+ select {
+ case ss := <-ch:
+ if ss.err != nil {
+ if ociLog != "" {
+ ociLogData, err := ioutil.ReadFile(ociLog)
+ if err == nil {
+ var ociErr ociError
+ if err := json.Unmarshal(ociLogData, &ociErr); err == nil {
+ return -1, getOCIRuntimeError(runtimeName, ociErr.Msg)
+ }
+ }
+ }
+ return -1, fmt.Errorf("container create failed (no logs from conmon): %w", ss.err)
+ }
+ logrus.Debugf("Received: %d", ss.si.Data)
+ if ss.si.Data < 0 {
+ if ociLog != "" {
+ ociLogData, err := ioutil.ReadFile(ociLog)
+ if err == nil {
+ var ociErr ociError
+ if err := json.Unmarshal(ociLogData, &ociErr); err == nil {
+ return ss.si.Data, getOCIRuntimeError(runtimeName, ociErr.Msg)
+ }
+ }
+ }
+ // If we failed to parse the JSON errors, then print the output as it is
+ if ss.si.Message != "" {
+ return ss.si.Data, getOCIRuntimeError(runtimeName, ss.si.Message)
+ }
+ return ss.si.Data, fmt.Errorf("container create failed: %w", define.ErrInternal)
+ }
+ data = ss.si.Data
+ case <-time.After(define.ContainerCreateTimeout):
+ return -1, fmt.Errorf("container creation timeout: %w", define.ErrInternal)
+ }
+ return data, nil
+}
+
+// writeConmonPipeData writes nonce data to a pipe
+func writeConmonPipeData(pipe *os.File) error {
+ someData := []byte{0}
+ _, err := pipe.Write(someData)
+ return err
+}
+
+// formatRuntimeOpts prepends opts passed to it with --runtime-opt for passing to conmon
+func formatRuntimeOpts(opts ...string) []string {
+ args := make([]string, 0, len(opts)*2)
+ for _, o := range opts {
+ args = append(args, "--runtime-opt", o)
+ }
+ return args
+}
+
+// getConmonVersion returns a string representation of the conmon version.
+func (r *ConmonOCIRuntime) getConmonVersion() (string, error) {
+ output, err := utils.ExecCmd(r.conmonPath, "--version")
+ if err != nil {
+ return "", err
+ }
+ return strings.TrimSuffix(strings.Replace(output, "\n", ", ", 1), "\n"), nil
+}
+
+// getOCIRuntimeVersion returns a string representation of the OCI runtime's
+// version.
+func (r *ConmonOCIRuntime) getOCIRuntimeVersion() (string, error) {
+ output, err := utils.ExecCmd(r.path, "--version")
+ if err != nil {
+ return "", err
+ }
+ return strings.TrimSuffix(output, "\n"), nil
+}
+
+// Copy data from container to HTTP connection, for terminal attach.
+// Container is the container's attach socket connection, http is a buffer for
+// the HTTP connection. cid is the ID of the container the attach session is
+// running for (used solely for error messages).
+func httpAttachTerminalCopy(container *net.UnixConn, http *bufio.ReadWriter, cid string) error {
+ buf := make([]byte, bufferSize)
+ for {
+ numR, err := container.Read(buf)
+ logrus.Debugf("Read fd(%d) %d/%d bytes for container %s", int(buf[0]), numR, len(buf), cid)
+
+ if numR > 0 {
+ switch buf[0] {
+ case AttachPipeStdout:
+ // Do nothing
+ default:
+ logrus.Errorf("Received unexpected attach type %+d, discarding %d bytes", buf[0], numR)
+ continue
+ }
+
+ numW, err2 := http.Write(buf[1:numR])
+ if err2 != nil {
+ if err != nil {
+ logrus.Errorf("Reading container %s STDOUT: %v", cid, err)
+ }
+ return err2
+ } else if numW+1 != numR {
+ return io.ErrShortWrite
+ }
+ // We need to force the buffer to write immediately, so
+ // there isn't a delay on the terminal side.
+ if err2 := http.Flush(); err2 != nil {
+ if err != nil {
+ logrus.Errorf("Reading container %s STDOUT: %v", cid, err)
+ }
+ return err2
+ }
+ }
+ if err != nil {
+ if err == io.EOF {
+ return nil
+ }
+ return err
+ }
+ }
+}
+
+// Copy data from a container to an HTTP connection, for non-terminal attach.
+// Appends a header to multiplex input.
+func httpAttachNonTerminalCopy(container *net.UnixConn, http *bufio.ReadWriter, cid string, stdin, stdout, stderr bool) error {
+ buf := make([]byte, bufferSize)
+ for {
+ numR, err := container.Read(buf)
+ if numR > 0 {
+ var headerBuf []byte
+
+ // Subtract 1 because we strip the first byte (used for
+ // multiplexing by Conmon).
+ headerLen := uint32(numR - 1)
+ // Practically speaking, we could make this buf[0] - 1,
+ // but we need to validate it anyway.
+ switch buf[0] {
+ case AttachPipeStdin:
+ headerBuf = makeHTTPAttachHeader(0, headerLen)
+ if !stdin {
+ continue
+ }
+ case AttachPipeStdout:
+ if !stdout {
+ continue
+ }
+ headerBuf = makeHTTPAttachHeader(1, headerLen)
+ case AttachPipeStderr:
+ if !stderr {
+ continue
+ }
+ headerBuf = makeHTTPAttachHeader(2, headerLen)
+ default:
+ logrus.Errorf("Received unexpected attach type %+d, discarding %d bytes", buf[0], numR)
+ continue
+ }
+
+ numH, err2 := http.Write(headerBuf)
+ if err2 != nil {
+ if err != nil {
+ logrus.Errorf("Reading container %s standard streams: %v", cid, err)
+ }
+
+ return err2
+ }
+ // Hardcoding header length is pretty gross, but
+ // fast. Should be safe, as this is a fixed part
+ // of the protocol.
+ if numH != 8 {
+ if err != nil {
+ logrus.Errorf("Reading container %s standard streams: %v", cid, err)
+ }
+
+ return io.ErrShortWrite
+ }
+
+ numW, err2 := http.Write(buf[1:numR])
+ if err2 != nil {
+ if err != nil {
+ logrus.Errorf("Reading container %s standard streams: %v", cid, err)
+ }
+
+ return err2
+ } else if numW+1 != numR {
+ if err != nil {
+ logrus.Errorf("Reading container %s standard streams: %v", cid, err)
+ }
+
+ return io.ErrShortWrite
+ }
+ // We need to force the buffer to write immediately, so
+ // there isn't a delay on the terminal side.
+ if err2 := http.Flush(); err2 != nil {
+ if err != nil {
+ logrus.Errorf("Reading container %s STDOUT: %v", cid, err)
+ }
+ return err2
+ }
+ }
+ if err != nil {
+ if err == io.EOF {
+ return nil
+ }
+
+ return err
+ }
+ }
+}
diff --git a/libpod/oci_conmon_exec_linux.go b/libpod/oci_conmon_exec_common.go
index 16cd7ef9f..16cd7ef9f 100644
--- a/libpod/oci_conmon_exec_linux.go
+++ b/libpod/oci_conmon_exec_common.go
diff --git a/libpod/oci_conmon_freebsd.go b/libpod/oci_conmon_freebsd.go
new file mode 100644
index 000000000..6f7ac7fc6
--- /dev/null
+++ b/libpod/oci_conmon_freebsd.go
@@ -0,0 +1,24 @@
+package libpod
+
+import (
+ "errors"
+ "os"
+ "os/exec"
+)
+
+func (r *ConmonOCIRuntime) createRootlessContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) {
+ return -1, errors.New("unsupported (*ConmonOCIRuntime) createRootlessContainer")
+}
+
+// Run the closure with the container's socket label set
+func (r *ConmonOCIRuntime) withContainerSocketLabel(ctr *Container, closure func() error) error {
+ // No label support yet
+ return closure()
+}
+
+// moveConmonToCgroupAndSignal gets a container's cgroupParent and moves the conmon process to that cgroup
+// it then signals for conmon to start by sending nonce data down the start fd
+func (r *ConmonOCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec.Cmd, startFd *os.File) error {
+ // No equivalent on FreeBSD
+ return nil
+}
diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go
index 1b654ed33..0964d4ea3 100644
--- a/libpod/oci_conmon_linux.go
+++ b/libpod/oci_conmon_linux.go
@@ -1,46 +1,21 @@
-//go:build linux
-// +build linux
-
package libpod
import (
- "bufio"
- "bytes"
- "context"
- "errors"
"fmt"
- "io"
- "io/ioutil"
- "net"
- "net/http"
"os"
"os/exec"
"path/filepath"
"runtime"
- "strconv"
"strings"
- "sync"
- "syscall"
- "text/template"
- "time"
runcconfig "github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/devices"
"github.com/containers/common/pkg/cgroups"
"github.com/containers/common/pkg/config"
- "github.com/containers/common/pkg/resize"
- cutil "github.com/containers/common/pkg/util"
- conmonConfig "github.com/containers/conmon/runner/config"
- "github.com/containers/podman/v4/libpod/define"
- "github.com/containers/podman/v4/libpod/logs"
- "github.com/containers/podman/v4/pkg/checkpoint/crutils"
"github.com/containers/podman/v4/pkg/errorhandling"
"github.com/containers/podman/v4/pkg/rootless"
- "github.com/containers/podman/v4/pkg/specgenutil"
- "github.com/containers/podman/v4/pkg/util"
"github.com/containers/podman/v4/utils"
- "github.com/containers/storage/pkg/homedir"
pmount "github.com/containers/storage/pkg/mount"
spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/selinux/go-selinux/label"
@@ -48,782 +23,70 @@ import (
"golang.org/x/sys/unix"
)
-const (
- // This is Conmon's STDIO_BUF_SIZE. I don't believe we have access to it
- // directly from the Go code, so const it here
- // Important: The conmon attach socket uses an extra byte at the beginning of each
- // message to specify the STREAM so we have to increase the buffer size by one
- bufferSize = conmonConfig.BufSize + 1
-)
-
-// ConmonOCIRuntime is an OCI runtime managed by Conmon.
-// TODO: Make all calls to OCI runtime have a timeout.
-type ConmonOCIRuntime struct {
- name string
- path string
- conmonPath string
- conmonEnv []string
- tmpDir string
- exitsDir string
- logSizeMax int64
- noPivot bool
- reservePorts bool
- runtimeFlags []string
- supportsJSON bool
- supportsKVM bool
- supportsNoCgroups bool
- enableKeyring bool
-}
-
-// Make a new Conmon-based OCI runtime with the given options.
-// Conmon will wrap the given OCI runtime, which can be `runc`, `crun`, or
-// any runtime with a runc-compatible CLI.
-// The first path that points to a valid executable will be used.
-// Deliberately private. Someone should not be able to construct this outside of
-// libpod.
-func newConmonOCIRuntime(name string, paths []string, conmonPath string, runtimeFlags []string, runtimeCfg *config.Config) (OCIRuntime, error) {
- if name == "" {
- return nil, fmt.Errorf("the OCI runtime must be provided a non-empty name: %w", define.ErrInvalidArg)
- }
-
- // Make lookup tables for runtime support
- supportsJSON := make(map[string]bool, len(runtimeCfg.Engine.RuntimeSupportsJSON))
- supportsNoCgroups := make(map[string]bool, len(runtimeCfg.Engine.RuntimeSupportsNoCgroups))
- supportsKVM := make(map[string]bool, len(runtimeCfg.Engine.RuntimeSupportsKVM))
- for _, r := range runtimeCfg.Engine.RuntimeSupportsJSON {
- supportsJSON[r] = true
- }
- for _, r := range runtimeCfg.Engine.RuntimeSupportsNoCgroups {
- supportsNoCgroups[r] = true
- }
- for _, r := range runtimeCfg.Engine.RuntimeSupportsKVM {
- supportsKVM[r] = true
- }
-
- runtime := new(ConmonOCIRuntime)
- runtime.name = name
- runtime.conmonPath = conmonPath
- runtime.runtimeFlags = runtimeFlags
-
- runtime.conmonEnv = runtimeCfg.Engine.ConmonEnvVars
- runtime.tmpDir = runtimeCfg.Engine.TmpDir
- runtime.logSizeMax = runtimeCfg.Containers.LogSizeMax
- runtime.noPivot = runtimeCfg.Engine.NoPivotRoot
- runtime.reservePorts = runtimeCfg.Engine.EnablePortReservation
- runtime.enableKeyring = runtimeCfg.Containers.EnableKeyring
-
- // TODO: probe OCI runtime for feature and enable automatically if
- // available.
-
- base := filepath.Base(name)
- runtime.supportsJSON = supportsJSON[base]
- runtime.supportsNoCgroups = supportsNoCgroups[base]
- runtime.supportsKVM = supportsKVM[base]
-
- foundPath := false
- for _, path := range paths {
- stat, err := os.Stat(path)
- if err != nil {
- if os.IsNotExist(err) {
- continue
- }
- return nil, fmt.Errorf("cannot stat OCI runtime %s path: %w", name, err)
- }
- if !stat.Mode().IsRegular() {
- continue
- }
- foundPath = true
- logrus.Tracef("found runtime %q", path)
- runtime.path = path
- break
- }
-
- // Search the $PATH as last fallback
- if !foundPath {
- if foundRuntime, err := exec.LookPath(name); err == nil {
- foundPath = true
- runtime.path = foundRuntime
- logrus.Debugf("using runtime %q from $PATH: %q", name, foundRuntime)
- }
- }
-
- if !foundPath {
- return nil, fmt.Errorf("no valid executable found for OCI runtime %s: %w", name, define.ErrInvalidArg)
- }
-
- runtime.exitsDir = filepath.Join(runtime.tmpDir, "exits")
-
- // Create the exit files and attach sockets directories
- if err := os.MkdirAll(runtime.exitsDir, 0750); err != nil {
- // The directory is allowed to exist
- if !os.IsExist(err) {
- return nil, fmt.Errorf("error creating OCI runtime exit files directory: %w", err)
- }
+func (r *ConmonOCIRuntime) createRootlessContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) {
+ type result struct {
+ restoreDuration int64
+ err error
}
- return runtime, nil
-}
-
-// Name returns the name of the runtime being wrapped by Conmon.
-func (r *ConmonOCIRuntime) Name() string {
- return r.name
-}
-
-// Path returns the path of the OCI runtime being wrapped by Conmon.
-func (r *ConmonOCIRuntime) Path() string {
- return r.path
-}
-
-// hasCurrentUserMapped checks whether the current user is mapped inside the container user namespace
-func hasCurrentUserMapped(ctr *Container) bool {
- if len(ctr.config.IDMappings.UIDMap) == 0 && len(ctr.config.IDMappings.GIDMap) == 0 {
- return true
- }
- uid := os.Geteuid()
- for _, m := range ctr.config.IDMappings.UIDMap {
- if uid >= m.HostID && uid < m.HostID+m.Size {
- return true
- }
- }
- return false
-}
-
-// CreateContainer creates a container.
-func (r *ConmonOCIRuntime) CreateContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) {
- // always make the run dir accessible to the current user so that the PID files can be read without
- // being in the rootless user namespace.
- if err := makeAccessible(ctr.state.RunDir, 0, 0); err != nil {
- return 0, err
- }
- if !hasCurrentUserMapped(ctr) {
- for _, i := range []string{ctr.state.RunDir, ctr.runtime.config.Engine.TmpDir, ctr.config.StaticDir, ctr.state.Mountpoint, ctr.runtime.config.Engine.VolumePath} {
- if err := makeAccessible(i, ctr.RootUID(), ctr.RootGID()); err != nil {
+ ch := make(chan result)
+ go func() {
+ runtime.LockOSThread()
+ restoreDuration, err := func() (int64, error) {
+ fd, err := os.Open(fmt.Sprintf("/proc/%d/task/%d/ns/mnt", os.Getpid(), unix.Gettid()))
+ if err != nil {
return 0, err
}
- }
+ defer errorhandling.CloseQuiet(fd)
- // if we are running a non privileged container, be sure to umount some kernel paths so they are not
- // bind mounted inside the container at all.
- if !ctr.config.Privileged && !rootless.IsRootless() {
- type result struct {
- restoreDuration int64
- err error
+ // create a new mountns on the current thread
+ if err = unix.Unshare(unix.CLONE_NEWNS); err != nil {
+ return 0, err
}
- ch := make(chan result)
- go func() {
- runtime.LockOSThread()
- restoreDuration, err := func() (int64, error) {
- fd, err := os.Open(fmt.Sprintf("/proc/%d/task/%d/ns/mnt", os.Getpid(), unix.Gettid()))
- if err != nil {
- return 0, err
- }
- defer errorhandling.CloseQuiet(fd)
-
- // create a new mountns on the current thread
- if err = unix.Unshare(unix.CLONE_NEWNS); err != nil {
- return 0, err
- }
- defer func() {
- if err := unix.Setns(int(fd.Fd()), unix.CLONE_NEWNS); err != nil {
- logrus.Errorf("Unable to clone new namespace: %q", err)
- }
- }()
-
- // don't spread our mounts around. We are setting only /sys to be slave
- // so that the cleanup process is still able to umount the storage and the
- // changes are propagated to the host.
- err = unix.Mount("/sys", "/sys", "none", unix.MS_REC|unix.MS_SLAVE, "")
- if err != nil {
- return 0, fmt.Errorf("cannot make /sys slave: %w", err)
- }
-
- mounts, err := pmount.GetMounts()
- if err != nil {
- return 0, err
- }
- for _, m := range mounts {
- if !strings.HasPrefix(m.Mountpoint, "/sys/kernel") {
- continue
- }
- err = unix.Unmount(m.Mountpoint, 0)
- if err != nil && !os.IsNotExist(err) {
- return 0, fmt.Errorf("cannot unmount %s: %w", m.Mountpoint, err)
- }
- }
- return r.createOCIContainer(ctr, restoreOptions)
- }()
- ch <- result{
- restoreDuration: restoreDuration,
- err: err,
+ defer func() {
+ if err := unix.Setns(int(fd.Fd()), unix.CLONE_NEWNS); err != nil {
+ logrus.Errorf("Unable to clone new namespace: %q", err)
}
}()
- r := <-ch
- return r.restoreDuration, r.err
- }
- }
- return r.createOCIContainer(ctr, restoreOptions)
-}
-
-// UpdateContainerStatus retrieves the current status of the container from the
-// runtime. It updates the container's state but does not save it.
-// If useRuntime is false, we will not directly hit runc to see the container's
-// status, but will instead only check for the existence of the conmon exit file
-// and update state to stopped if it exists.
-func (r *ConmonOCIRuntime) UpdateContainerStatus(ctr *Container) error {
- runtimeDir, err := util.GetRuntimeDir()
- if err != nil {
- return err
- }
-
- // Store old state so we know if we were already stopped
- oldState := ctr.state.State
- state := new(spec.State)
-
- cmd := exec.Command(r.path, "state", ctr.ID())
- cmd.Env = append(cmd.Env, fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir))
-
- outPipe, err := cmd.StdoutPipe()
- if err != nil {
- return fmt.Errorf("getting stdout pipe: %w", err)
- }
- errPipe, err := cmd.StderrPipe()
- if err != nil {
- return fmt.Errorf("getting stderr pipe: %w", err)
- }
-
- if err := cmd.Start(); err != nil {
- out, err2 := ioutil.ReadAll(errPipe)
- if err2 != nil {
- return fmt.Errorf("error getting container %s state: %w", ctr.ID(), err)
- }
- if strings.Contains(string(out), "does not exist") || strings.Contains(string(out), "No such file") {
- if err := ctr.removeConmonFiles(); err != nil {
- logrus.Debugf("unable to remove conmon files for container %s", ctr.ID())
- }
- ctr.state.ExitCode = -1
- ctr.state.FinishedTime = time.Now()
- ctr.state.State = define.ContainerStateExited
- return ctr.runtime.state.AddContainerExitCode(ctr.ID(), ctr.state.ExitCode)
- }
- return fmt.Errorf("error getting container %s state. stderr/out: %s: %w", ctr.ID(), out, err)
- }
- defer func() {
- _ = cmd.Wait()
- }()
-
- if err := errPipe.Close(); err != nil {
- return err
- }
- out, err := ioutil.ReadAll(outPipe)
- if err != nil {
- return fmt.Errorf("error reading stdout: %s: %w", ctr.ID(), err)
- }
- if err := json.NewDecoder(bytes.NewBuffer(out)).Decode(state); err != nil {
- return fmt.Errorf("error decoding container status for container %s: %w", ctr.ID(), err)
- }
- ctr.state.PID = state.Pid
-
- switch state.Status {
- case "created":
- ctr.state.State = define.ContainerStateCreated
- case "paused":
- ctr.state.State = define.ContainerStatePaused
- case "running":
- ctr.state.State = define.ContainerStateRunning
- case "stopped":
- ctr.state.State = define.ContainerStateStopped
- default:
- return fmt.Errorf("unrecognized status returned by runtime for container %s: %s: %w",
- ctr.ID(), state.Status, define.ErrInternal)
- }
-
- // Only grab exit status if we were not already stopped
- // If we were, it should already be in the database
- if ctr.state.State == define.ContainerStateStopped && oldState != define.ContainerStateStopped {
- if _, err := ctr.Wait(context.Background()); err != nil {
- logrus.Errorf("Waiting for container %s to exit: %v", ctr.ID(), err)
- }
- return nil
- }
-
- // Handle ContainerStateStopping - keep it unless the container
- // transitioned to no longer running.
- if oldState == define.ContainerStateStopping && (ctr.state.State == define.ContainerStatePaused || ctr.state.State == define.ContainerStateRunning) {
- ctr.state.State = define.ContainerStateStopping
- }
-
- return nil
-}
-
-// StartContainer starts the given container.
-// Sets time the container was started, but does not save it.
-func (r *ConmonOCIRuntime) StartContainer(ctr *Container) error {
- // TODO: streams should probably *not* be our STDIN/OUT/ERR - redirect to buffers?
- runtimeDir, err := util.GetRuntimeDir()
- if err != nil {
- return err
- }
- env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
- if path, ok := os.LookupEnv("PATH"); ok {
- env = append(env, fmt.Sprintf("PATH=%s", path))
- }
- if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "start", ctr.ID())...); err != nil {
- return err
- }
-
- ctr.state.StartedTime = time.Now()
-
- return nil
-}
-
-// KillContainer sends the given signal to the given container.
-// If all is set, send to all PIDs in the container.
-// All is only supported if the container created cgroups.
-func (r *ConmonOCIRuntime) KillContainer(ctr *Container, signal uint, all bool) error {
- logrus.Debugf("Sending signal %d to container %s", signal, ctr.ID())
- runtimeDir, err := util.GetRuntimeDir()
- if err != nil {
- return err
- }
- env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
- var args []string
- args = append(args, r.runtimeFlags...)
- if all {
- args = append(args, "kill", "--all", ctr.ID(), fmt.Sprintf("%d", signal))
- } else {
- args = append(args, "kill", ctr.ID(), fmt.Sprintf("%d", signal))
- }
- if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, args...); err != nil {
- // Update container state - there's a chance we failed because
- // the container exited in the meantime.
- if err2 := r.UpdateContainerStatus(ctr); err2 != nil {
- logrus.Infof("Error updating status for container %s: %v", ctr.ID(), err2)
- }
- if ctr.ensureState(define.ContainerStateStopped, define.ContainerStateExited) {
- return define.ErrCtrStateInvalid
- }
- return fmt.Errorf("error sending signal to container %s: %w", ctr.ID(), err)
- }
-
- return nil
-}
-
-// StopContainer stops a container, first using its given stop signal (or
-// SIGTERM if no signal was specified), then using SIGKILL.
-// Timeout is given in seconds. If timeout is 0, the container will be
-// immediately kill with SIGKILL.
-// Does not set finished time for container, assumes you will run updateStatus
-// after to pull the exit code.
-func (r *ConmonOCIRuntime) StopContainer(ctr *Container, timeout uint, all bool) error {
- logrus.Debugf("Stopping container %s (PID %d)", ctr.ID(), ctr.state.PID)
-
- // Ping the container to see if it's alive
- // If it's not, it's already stopped, return
- err := unix.Kill(ctr.state.PID, 0)
- if err == unix.ESRCH {
- return nil
- }
-
- stopSignal := ctr.config.StopSignal
- if stopSignal == 0 {
- stopSignal = uint(syscall.SIGTERM)
- }
-
- if timeout > 0 {
- if err := r.KillContainer(ctr, stopSignal, all); err != nil {
- // Is the container gone?
- // If so, it probably died between the first check and
- // our sending the signal
- // The container is stopped, so exit cleanly
- err := unix.Kill(ctr.state.PID, 0)
- if err == unix.ESRCH {
- return nil
+ // don't spread our mounts around. We are setting only /sys to be slave
+ // so that the cleanup process is still able to umount the storage and the
+ // changes are propagated to the host.
+ err = unix.Mount("/sys", "/sys", "none", unix.MS_REC|unix.MS_SLAVE, "")
+ if err != nil {
+ return 0, fmt.Errorf("cannot make /sys slave: %w", err)
}
- return err
- }
-
- if err := waitContainerStop(ctr, time.Duration(timeout)*time.Second); err != nil {
- logrus.Debugf("Timed out stopping container %s with %s, resorting to SIGKILL: %v", ctr.ID(), unix.SignalName(syscall.Signal(stopSignal)), err)
- logrus.Warnf("StopSignal %s failed to stop container %s in %d seconds, resorting to SIGKILL", unix.SignalName(syscall.Signal(stopSignal)), ctr.Name(), timeout)
- } else {
- // No error, the container is dead
- return nil
- }
- }
-
- if err := r.KillContainer(ctr, 9, all); err != nil {
- // Again, check if the container is gone. If it is, exit cleanly.
- err := unix.Kill(ctr.state.PID, 0)
- if err == unix.ESRCH {
- return nil
- }
-
- return fmt.Errorf("error sending SIGKILL to container %s: %w", ctr.ID(), err)
- }
-
- // Give runtime a few seconds to make it happen
- if err := waitContainerStop(ctr, killContainerTimeout); err != nil {
- return err
- }
-
- return nil
-}
-
-// DeleteContainer deletes a container from the OCI runtime.
-func (r *ConmonOCIRuntime) DeleteContainer(ctr *Container) error {
- runtimeDir, err := util.GetRuntimeDir()
- if err != nil {
- return err
- }
- env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
- return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "delete", "--force", ctr.ID())...)
-}
-
-// PauseContainer pauses the given container.
-func (r *ConmonOCIRuntime) PauseContainer(ctr *Container) error {
- runtimeDir, err := util.GetRuntimeDir()
- if err != nil {
- return err
- }
- env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
- return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "pause", ctr.ID())...)
-}
-
-// UnpauseContainer unpauses the given container.
-func (r *ConmonOCIRuntime) UnpauseContainer(ctr *Container) error {
- runtimeDir, err := util.GetRuntimeDir()
- if err != nil {
- return err
- }
- env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
- return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "resume", ctr.ID())...)
-}
-
-// HTTPAttach performs an attach for the HTTP API.
-// The caller must handle closing the HTTP connection after this returns.
-// The cancel channel is not closed; it is up to the caller to do so after
-// this function returns.
-// If this is a container with a terminal, we will stream raw. If it is not, we
-// will stream with an 8-byte header to multiplex STDOUT and STDERR.
-// Returns any errors that occurred, and whether the connection was successfully
-// hijacked before that error occurred.
-func (r *ConmonOCIRuntime) HTTPAttach(ctr *Container, req *http.Request, w http.ResponseWriter, streams *HTTPAttachStreams, detachKeys *string, cancel <-chan bool, hijackDone chan<- bool, streamAttach, streamLogs bool) (deferredErr error) {
- isTerminal := false
- if ctr.config.Spec.Process != nil {
- isTerminal = ctr.config.Spec.Process.Terminal
- }
-
- if streams != nil {
- if !streams.Stdin && !streams.Stdout && !streams.Stderr {
- return fmt.Errorf("must specify at least one stream to attach to: %w", define.ErrInvalidArg)
- }
- }
-
- attachSock, err := r.AttachSocketPath(ctr)
- if err != nil {
- return err
- }
-
- var conn *net.UnixConn
- if streamAttach {
- newConn, err := openUnixSocket(attachSock)
- if err != nil {
- return fmt.Errorf("failed to connect to container's attach socket: %v: %w", attachSock, err)
- }
- conn = newConn
- defer func() {
- if err := conn.Close(); err != nil {
- logrus.Errorf("Unable to close container %s attach socket: %q", ctr.ID(), err)
+ mounts, err := pmount.GetMounts()
+ if err != nil {
+ return 0, err
}
- }()
-
- logrus.Debugf("Successfully connected to container %s attach socket %s", ctr.ID(), attachSock)
- }
-
- detachString := ctr.runtime.config.Engine.DetachKeys
- if detachKeys != nil {
- detachString = *detachKeys
- }
- detach, err := processDetachKeys(detachString)
- if err != nil {
- return err
- }
-
- attachStdout := true
- attachStderr := true
- attachStdin := true
- if streams != nil {
- attachStdout = streams.Stdout
- attachStderr = streams.Stderr
- attachStdin = streams.Stdin
- }
-
- logrus.Debugf("Going to hijack container %s attach connection", ctr.ID())
-
- // Alright, let's hijack.
- hijacker, ok := w.(http.Hijacker)
- if !ok {
- return fmt.Errorf("unable to hijack connection")
- }
-
- httpCon, httpBuf, err := hijacker.Hijack()
- if err != nil {
- return fmt.Errorf("error hijacking connection: %w", err)
- }
-
- hijackDone <- true
-
- writeHijackHeader(req, httpBuf)
-
- // Force a flush after the header is written.
- if err := httpBuf.Flush(); err != nil {
- return fmt.Errorf("error flushing HTTP hijack header: %w", err)
- }
-
- defer func() {
- hijackWriteErrorAndClose(deferredErr, ctr.ID(), isTerminal, httpCon, httpBuf)
- }()
-
- logrus.Debugf("Hijack for container %s attach session done, ready to stream", ctr.ID())
-
- // TODO: This is gross. Really, really gross.
- // I want to say we should read all the logs into an array before
- // calling this, in container_api.go, but that could take a lot of
- // memory...
- // On the whole, we need to figure out a better way of doing this,
- // though.
- logSize := 0
- if streamLogs {
- logrus.Debugf("Will stream logs for container %s attach session", ctr.ID())
-
- // Get all logs for the container
- logChan := make(chan *logs.LogLine)
- logOpts := new(logs.LogOptions)
- logOpts.Tail = -1
- logOpts.WaitGroup = new(sync.WaitGroup)
- errChan := make(chan error)
- go func() {
- var err error
- // In non-terminal mode we need to prepend with the
- // stream header.
- logrus.Debugf("Writing logs for container %s to HTTP attach", ctr.ID())
- for logLine := range logChan {
- if !isTerminal {
- device := logLine.Device
- var header []byte
- headerLen := uint32(len(logLine.Msg))
- logSize += len(logLine.Msg)
- switch strings.ToLower(device) {
- case "stdin":
- header = makeHTTPAttachHeader(0, headerLen)
- case "stdout":
- header = makeHTTPAttachHeader(1, headerLen)
- case "stderr":
- header = makeHTTPAttachHeader(2, headerLen)
- default:
- logrus.Errorf("Unknown device for log line: %s", device)
- header = makeHTTPAttachHeader(1, headerLen)
- }
- _, err = httpBuf.Write(header)
- if err != nil {
- break
- }
- }
- _, err = httpBuf.Write([]byte(logLine.Msg))
- if err != nil {
- break
- }
- if !logLine.Partial() {
- _, err = httpBuf.Write([]byte("\n"))
- if err != nil {
- break
- }
+ for _, m := range mounts {
+ if !strings.HasPrefix(m.Mountpoint, "/sys/kernel") {
+ continue
}
- err = httpBuf.Flush()
- if err != nil {
- break
+ err = unix.Unmount(m.Mountpoint, 0)
+ if err != nil && !os.IsNotExist(err) {
+ return 0, fmt.Errorf("cannot unmount %s: %w", m.Mountpoint, err)
}
}
- errChan <- err
- }()
- if err := ctr.ReadLog(context.Background(), logOpts, logChan, 0); err != nil {
- return err
- }
- go func() {
- logOpts.WaitGroup.Wait()
- close(logChan)
+ return r.createOCIContainer(ctr, restoreOptions)
}()
- logrus.Debugf("Done reading logs for container %s, %d bytes", ctr.ID(), logSize)
- if err := <-errChan; err != nil {
- return err
+ ch <- result{
+ restoreDuration: restoreDuration,
+ err: err,
}
- }
- if !streamAttach {
- logrus.Debugf("Done streaming logs for container %s attach, exiting as attach streaming not requested", ctr.ID())
- return nil
- }
-
- logrus.Debugf("Forwarding attach output for container %s", ctr.ID())
-
- stdoutChan := make(chan error)
- stdinChan := make(chan error)
-
- // Handle STDOUT/STDERR
- go func() {
- var err error
- if isTerminal {
- // Hack: return immediately if attachStdout not set to
- // emulate Docker.
- // Basically, when terminal is set, STDERR goes nowhere.
- // Everything does over STDOUT.
- // Therefore, if not attaching STDOUT - we'll never copy
- // anything from here.
- logrus.Debugf("Performing terminal HTTP attach for container %s", ctr.ID())
- if attachStdout {
- err = httpAttachTerminalCopy(conn, httpBuf, ctr.ID())
- }
- } else {
- logrus.Debugf("Performing non-terminal HTTP attach for container %s", ctr.ID())
- err = httpAttachNonTerminalCopy(conn, httpBuf, ctr.ID(), attachStdin, attachStdout, attachStderr)
- }
- stdoutChan <- err
- logrus.Debugf("STDOUT/ERR copy completed")
}()
- // Next, STDIN. Avoid entirely if attachStdin unset.
- if attachStdin {
- go func() {
- _, err := cutil.CopyDetachable(conn, httpBuf, detach)
- logrus.Debugf("STDIN copy completed")
- stdinChan <- err
- }()
- }
-
- for {
- select {
- case err := <-stdoutChan:
- if err != nil {
- return err
- }
-
- return nil
- case err := <-stdinChan:
- if err != nil {
- return err
- }
- // copy stdin is done, close it
- if connErr := conn.CloseWrite(); connErr != nil {
- logrus.Errorf("Unable to close conn: %v", connErr)
- }
- case <-cancel:
- return nil
- }
- }
-}
-
-// isRetryable returns whether the error was caused by a blocked syscall or the
-// specified operation on a non blocking file descriptor wasn't ready for completion.
-func isRetryable(err error) bool {
- var errno syscall.Errno
- if errors.As(err, &errno) {
- return errno == syscall.EINTR || errno == syscall.EAGAIN
- }
- return false
+ res := <-ch
+ return res.restoreDuration, res.err
}
-// openControlFile opens the terminal control file.
-func openControlFile(ctr *Container, parentDir string) (*os.File, error) {
- controlPath := filepath.Join(parentDir, "ctl")
- for i := 0; i < 600; i++ {
- controlFile, err := os.OpenFile(controlPath, unix.O_WRONLY|unix.O_NONBLOCK, 0)
- if err == nil {
- return controlFile, nil
- }
- if !isRetryable(err) {
- return nil, fmt.Errorf("could not open ctl file for terminal resize for container %s: %w", ctr.ID(), err)
- }
- time.Sleep(time.Second / 10)
- }
- return nil, fmt.Errorf("timeout waiting for %q", controlPath)
-}
-
-// AttachResize resizes the terminal used by the given container.
-func (r *ConmonOCIRuntime) AttachResize(ctr *Container, newSize resize.TerminalSize) error {
- controlFile, err := openControlFile(ctr, ctr.bundlePath())
- if err != nil {
- return err
- }
- defer controlFile.Close()
-
- logrus.Debugf("Received a resize event for container %s: %+v", ctr.ID(), newSize)
- if _, err = fmt.Fprintf(controlFile, "%d %d %d\n", 1, newSize.Height, newSize.Width); err != nil {
- return fmt.Errorf("failed to write to ctl file to resize terminal: %w", err)
- }
-
- return nil
-}
-
-// CheckpointContainer checkpoints the given container.
-func (r *ConmonOCIRuntime) CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) (int64, error) {
- // imagePath is used by CRIU to store the actual checkpoint files
- imagePath := ctr.CheckpointPath()
- if options.PreCheckPoint {
- imagePath = ctr.PreCheckPointPath()
- }
- // workPath will be used to store dump.log and stats-dump
- workPath := ctr.bundlePath()
- logrus.Debugf("Writing checkpoint to %s", imagePath)
- logrus.Debugf("Writing checkpoint logs to %s", workPath)
- logrus.Debugf("Pre-dump the container %t", options.PreCheckPoint)
- args := []string{}
- args = append(args, r.runtimeFlags...)
- args = append(args, "checkpoint")
- args = append(args, "--image-path")
- args = append(args, imagePath)
- args = append(args, "--work-path")
- args = append(args, workPath)
- if options.KeepRunning {
- args = append(args, "--leave-running")
- }
- if options.TCPEstablished {
- args = append(args, "--tcp-established")
- }
- if options.FileLocks {
- args = append(args, "--file-locks")
- }
- if !options.PreCheckPoint && options.KeepRunning {
- args = append(args, "--leave-running")
- }
- if options.PreCheckPoint {
- args = append(args, "--pre-dump")
- }
- if !options.PreCheckPoint && options.WithPrevious {
- args = append(
- args,
- "--parent-path",
- filepath.Join("..", preCheckpointDir),
- )
- }
-
- args = append(args, ctr.ID())
- logrus.Debugf("the args to checkpoint: %s %s", r.path, strings.Join(args, " "))
-
- runtimeDir, err := util.GetRuntimeDir()
- if err != nil {
- return 0, err
- }
- env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
- if path, ok := os.LookupEnv("PATH"); ok {
- env = append(env, fmt.Sprintf("PATH=%s", path))
- }
-
+// Run the closure with the container's socket label set
+func (r *ConmonOCIRuntime) withContainerSocketLabel(ctr *Container, closure func() error) error {
runtime.LockOSThread()
if err := label.SetSocketLabel(ctr.ProcessLabel()); err != nil {
- return 0, err
+ return err
}
-
- runtimeCheckpointStarted := time.Now()
- err = utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, args...)
+ err := closure()
// Ignore error returned from SetSocketLabel("") call,
// can't recover.
if labelErr := label.SetSocketLabel(""); labelErr == nil {
@@ -834,576 +97,7 @@ func (r *ConmonOCIRuntime) CheckpointContainer(ctr *Container, options Container
} else {
logrus.Errorf("Unable to reset socket label: %q", labelErr)
}
-
- runtimeCheckpointDuration := func() int64 {
- if options.PrintStats {
- return time.Since(runtimeCheckpointStarted).Microseconds()
- }
- return 0
- }()
-
- return runtimeCheckpointDuration, err
-}
-
-func (r *ConmonOCIRuntime) CheckConmonRunning(ctr *Container) (bool, error) {
- if ctr.state.ConmonPID == 0 {
- // If the container is running or paused, assume Conmon is
- // running. We didn't record Conmon PID on some old versions, so
- // that is likely what's going on...
- // Unusual enough that we should print a warning message though.
- if ctr.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) {
- logrus.Warnf("Conmon PID is not set, but container is running!")
- return true, nil
- }
- // Container's not running, so conmon PID being unset is
- // expected. Conmon is not running.
- return false, nil
- }
-
- // We have a conmon PID. Ping it with signal 0.
- if err := unix.Kill(ctr.state.ConmonPID, 0); err != nil {
- if err == unix.ESRCH {
- return false, nil
- }
- return false, fmt.Errorf("error pinging container %s conmon with signal 0: %w", ctr.ID(), err)
- }
- return true, nil
-}
-
-// SupportsCheckpoint checks if the OCI runtime supports checkpointing
-// containers.
-func (r *ConmonOCIRuntime) SupportsCheckpoint() bool {
- return crutils.CRRuntimeSupportsCheckpointRestore(r.path)
-}
-
-// SupportsJSONErrors checks if the OCI runtime supports JSON-formatted error
-// messages.
-func (r *ConmonOCIRuntime) SupportsJSONErrors() bool {
- return r.supportsJSON
-}
-
-// SupportsNoCgroups checks if the OCI runtime supports running containers
-// without cgroups (the --cgroup-manager=disabled flag).
-func (r *ConmonOCIRuntime) SupportsNoCgroups() bool {
- return r.supportsNoCgroups
-}
-
-// SupportsKVM checks if the OCI runtime supports running containers
-// without KVM separation
-func (r *ConmonOCIRuntime) SupportsKVM() bool {
- return r.supportsKVM
-}
-
-// AttachSocketPath is the path to a single container's attach socket.
-func (r *ConmonOCIRuntime) AttachSocketPath(ctr *Container) (string, error) {
- if ctr == nil {
- return "", fmt.Errorf("must provide a valid container to get attach socket path: %w", define.ErrInvalidArg)
- }
-
- return filepath.Join(ctr.bundlePath(), "attach"), nil
-}
-
-// ExitFilePath is the path to a container's exit file.
-func (r *ConmonOCIRuntime) ExitFilePath(ctr *Container) (string, error) {
- if ctr == nil {
- return "", fmt.Errorf("must provide a valid container to get exit file path: %w", define.ErrInvalidArg)
- }
- return filepath.Join(r.exitsDir, ctr.ID()), nil
-}
-
-// RuntimeInfo provides information on the runtime.
-func (r *ConmonOCIRuntime) RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntimeInfo, error) {
- runtimePackage := packageVersion(r.path)
- conmonPackage := packageVersion(r.conmonPath)
- runtimeVersion, err := r.getOCIRuntimeVersion()
- if err != nil {
- return nil, nil, fmt.Errorf("error getting version of OCI runtime %s: %w", r.name, err)
- }
- conmonVersion, err := r.getConmonVersion()
- if err != nil {
- return nil, nil, fmt.Errorf("error getting conmon version: %w", err)
- }
-
- conmon := define.ConmonInfo{
- Package: conmonPackage,
- Path: r.conmonPath,
- Version: conmonVersion,
- }
- ocirt := define.OCIRuntimeInfo{
- Name: r.name,
- Path: r.path,
- Package: runtimePackage,
- Version: runtimeVersion,
- }
- return &conmon, &ocirt, nil
-}
-
-// makeAccessible changes the path permission and each parent directory to have --x--x--x
-func makeAccessible(path string, uid, gid int) error {
- for ; path != "/"; path = filepath.Dir(path) {
- st, err := os.Stat(path)
- if err != nil {
- if os.IsNotExist(err) {
- return nil
- }
- return err
- }
- if int(st.Sys().(*syscall.Stat_t).Uid) == uid && int(st.Sys().(*syscall.Stat_t).Gid) == gid {
- continue
- }
- if st.Mode()&0111 != 0111 {
- if err := os.Chmod(path, st.Mode()|0111); err != nil {
- return err
- }
- }
- }
- return nil
-}
-
-// Wait for a container which has been sent a signal to stop
-func waitContainerStop(ctr *Container, timeout time.Duration) error {
- return waitPidStop(ctr.state.PID, timeout)
-}
-
-// Wait for a given PID to stop
-func waitPidStop(pid int, timeout time.Duration) error {
- done := make(chan struct{})
- chControl := make(chan struct{})
- go func() {
- for {
- select {
- case <-chControl:
- return
- default:
- if err := unix.Kill(pid, 0); err != nil {
- if err == unix.ESRCH {
- close(done)
- return
- }
- logrus.Errorf("Pinging PID %d with signal 0: %v", pid, err)
- }
- time.Sleep(100 * time.Millisecond)
- }
- }
- }()
- select {
- case <-done:
- return nil
- case <-time.After(timeout):
- close(chControl)
- return fmt.Errorf("given PIDs did not die within timeout")
- }
-}
-
-func (r *ConmonOCIRuntime) getLogTag(ctr *Container) (string, error) {
- logTag := ctr.LogTag()
- if logTag == "" {
- return "", nil
- }
- data, err := ctr.inspectLocked(false)
- if err != nil {
- // FIXME: this error should probably be returned
- return "", nil //nolint: nilerr
- }
- tmpl, err := template.New("container").Parse(logTag)
- if err != nil {
- return "", fmt.Errorf("template parsing error %s: %w", logTag, err)
- }
- var b bytes.Buffer
- err = tmpl.Execute(&b, data)
- if err != nil {
- return "", err
- }
- return b.String(), nil
-}
-
-// createOCIContainer generates this container's main conmon instance and prepares it for starting
-func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) {
- var stderrBuf bytes.Buffer
-
- runtimeDir, err := util.GetRuntimeDir()
- if err != nil {
- return 0, err
- }
-
- parentSyncPipe, childSyncPipe, err := newPipe()
- if err != nil {
- return 0, fmt.Errorf("error creating socket pair: %w", err)
- }
- defer errorhandling.CloseQuiet(parentSyncPipe)
-
- childStartPipe, parentStartPipe, err := newPipe()
- if err != nil {
- return 0, fmt.Errorf("error creating socket pair for start pipe: %w", err)
- }
-
- defer errorhandling.CloseQuiet(parentStartPipe)
-
- var ociLog string
- if logrus.GetLevel() != logrus.DebugLevel && r.supportsJSON {
- ociLog = filepath.Join(ctr.state.RunDir, "oci-log")
- }
-
- logTag, err := r.getLogTag(ctr)
- if err != nil {
- return 0, err
- }
-
- if ctr.config.CgroupsMode == cgroupSplit {
- if err := utils.MoveUnderCgroupSubtree("runtime"); err != nil {
- return 0, err
- }
- }
-
- pidfile := ctr.config.PidFile
- if pidfile == "" {
- pidfile = filepath.Join(ctr.state.RunDir, "pidfile")
- }
-
- args := r.sharedConmonArgs(ctr, ctr.ID(), ctr.bundlePath(), pidfile, ctr.LogPath(), r.exitsDir, ociLog, ctr.LogDriver(), logTag)
-
- if ctr.config.SdNotifyMode == define.SdNotifyModeContainer && ctr.config.SdNotifySocket != "" {
- args = append(args, fmt.Sprintf("--sdnotify-socket=%s", ctr.config.SdNotifySocket))
- }
-
- if ctr.config.Spec.Process.Terminal {
- args = append(args, "-t")
- } else if ctr.config.Stdin {
- args = append(args, "-i")
- }
-
- if ctr.config.Timeout > 0 {
- args = append(args, fmt.Sprintf("--timeout=%d", ctr.config.Timeout))
- }
-
- if !r.enableKeyring {
- args = append(args, "--no-new-keyring")
- }
- if ctr.config.ConmonPidFile != "" {
- args = append(args, "--conmon-pidfile", ctr.config.ConmonPidFile)
- }
-
- if r.noPivot {
- args = append(args, "--no-pivot")
- }
-
- exitCommand, err := specgenutil.CreateExitCommandArgs(ctr.runtime.storageConfig, ctr.runtime.config, logrus.IsLevelEnabled(logrus.DebugLevel), ctr.AutoRemove(), false)
- if err != nil {
- return 0, err
- }
- exitCommand = append(exitCommand, ctr.config.ID)
-
- args = append(args, "--exit-command", exitCommand[0])
- for _, arg := range exitCommand[1:] {
- args = append(args, []string{"--exit-command-arg", arg}...)
- }
-
- // Pass down the LISTEN_* environment (see #10443).
- preserveFDs := ctr.config.PreserveFDs
- if val := os.Getenv("LISTEN_FDS"); val != "" {
- if ctr.config.PreserveFDs > 0 {
- logrus.Warnf("Ignoring LISTEN_FDS to preserve custom user-specified FDs")
- } else {
- fds, err := strconv.Atoi(val)
- if err != nil {
- return 0, fmt.Errorf("converting LISTEN_FDS=%s: %w", val, err)
- }
- preserveFDs = uint(fds)
- }
- }
-
- if preserveFDs > 0 {
- args = append(args, formatRuntimeOpts("--preserve-fds", fmt.Sprintf("%d", preserveFDs))...)
- }
-
- if restoreOptions != nil {
- args = append(args, "--restore", ctr.CheckpointPath())
- if restoreOptions.TCPEstablished {
- args = append(args, "--runtime-opt", "--tcp-established")
- }
- if restoreOptions.FileLocks {
- args = append(args, "--runtime-opt", "--file-locks")
- }
- if restoreOptions.Pod != "" {
- mountLabel := ctr.config.MountLabel
- processLabel := ctr.config.ProcessLabel
- if mountLabel != "" {
- args = append(
- args,
- "--runtime-opt",
- fmt.Sprintf(
- "--lsm-mount-context=%s",
- mountLabel,
- ),
- )
- }
- if processLabel != "" {
- args = append(
- args,
- "--runtime-opt",
- fmt.Sprintf(
- "--lsm-profile=selinux:%s",
- processLabel,
- ),
- )
- }
- }
- }
-
- logrus.WithFields(logrus.Fields{
- "args": args,
- }).Debugf("running conmon: %s", r.conmonPath)
-
- cmd := exec.Command(r.conmonPath, args...)
- cmd.SysProcAttr = &syscall.SysProcAttr{
- Setpgid: true,
- }
- // TODO this is probably a really bad idea for some uses
- // Make this configurable
- cmd.Stdin = os.Stdin
- cmd.Stdout = os.Stdout
- cmd.Stderr = os.Stderr
- if ctr.config.Spec.Process.Terminal {
- cmd.Stderr = &stderrBuf
- }
-
- // 0, 1 and 2 are stdin, stdout and stderr
- conmonEnv := r.configureConmonEnv(runtimeDir)
-
- var filesToClose []*os.File
- if preserveFDs > 0 {
- for fd := 3; fd < int(3+preserveFDs); fd++ {
- f := os.NewFile(uintptr(fd), fmt.Sprintf("fd-%d", fd))
- filesToClose = append(filesToClose, f)
- cmd.ExtraFiles = append(cmd.ExtraFiles, f)
- }
- }
-
- cmd.Env = r.conmonEnv
- // we don't want to step on users fds they asked to preserve
- // Since 0-2 are used for stdio, start the fds we pass in at preserveFDs+3
- cmd.Env = append(cmd.Env, fmt.Sprintf("_OCI_SYNCPIPE=%d", preserveFDs+3), fmt.Sprintf("_OCI_STARTPIPE=%d", preserveFDs+4))
- cmd.Env = append(cmd.Env, conmonEnv...)
- cmd.ExtraFiles = append(cmd.ExtraFiles, childSyncPipe, childStartPipe)
-
- if r.reservePorts && !rootless.IsRootless() && !ctr.config.NetMode.IsSlirp4netns() {
- ports, err := bindPorts(ctr.convertPortMappings())
- if err != nil {
- return 0, err
- }
- filesToClose = append(filesToClose, ports...)
-
- // Leak the port we bound in the conmon process. These fd's won't be used
- // by the container and conmon will keep the ports busy so that another
- // process cannot use them.
- cmd.ExtraFiles = append(cmd.ExtraFiles, ports...)
- }
-
- if ctr.config.NetMode.IsSlirp4netns() || rootless.IsRootless() {
- if ctr.config.PostConfigureNetNS {
- havePortMapping := len(ctr.config.PortMappings) > 0
- if havePortMapping {
- ctr.rootlessPortSyncR, ctr.rootlessPortSyncW, err = os.Pipe()
- if err != nil {
- return 0, fmt.Errorf("failed to create rootless port sync pipe: %w", err)
- }
- }
- ctr.rootlessSlirpSyncR, ctr.rootlessSlirpSyncW, err = os.Pipe()
- if err != nil {
- return 0, fmt.Errorf("failed to create rootless network sync pipe: %w", err)
- }
- } else {
- if ctr.rootlessSlirpSyncR != nil {
- defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncR)
- }
- if ctr.rootlessSlirpSyncW != nil {
- defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncW)
- }
- }
- // Leak one end in conmon, the other one will be leaked into slirp4netns
- cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessSlirpSyncW)
-
- if ctr.rootlessPortSyncW != nil {
- defer errorhandling.CloseQuiet(ctr.rootlessPortSyncW)
- // Leak one end in conmon, the other one will be leaked into rootlessport
- cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessPortSyncW)
- }
- }
- var runtimeRestoreStarted time.Time
- if restoreOptions != nil {
- runtimeRestoreStarted = time.Now()
- }
- err = startCommand(cmd, ctr)
-
- // regardless of whether we errored or not, we no longer need the children pipes
- childSyncPipe.Close()
- childStartPipe.Close()
- if err != nil {
- return 0, err
- }
- if err := r.moveConmonToCgroupAndSignal(ctr, cmd, parentStartPipe); err != nil {
- return 0, err
- }
- /* Wait for initial setup and fork, and reap child */
- err = cmd.Wait()
- if err != nil {
- return 0, err
- }
-
- pid, err := readConmonPipeData(r.name, parentSyncPipe, ociLog)
- if err != nil {
- if err2 := r.DeleteContainer(ctr); err2 != nil {
- logrus.Errorf("Removing container %s from runtime after creation failed", ctr.ID())
- }
- return 0, err
- }
- ctr.state.PID = pid
-
- conmonPID, err := readConmonPidFile(ctr.config.ConmonPidFile)
- if err != nil {
- logrus.Warnf("Error reading conmon pid file for container %s: %v", ctr.ID(), err)
- } else if conmonPID > 0 {
- // conmon not having a pid file is a valid state, so don't set it if we don't have it
- logrus.Infof("Got Conmon PID as %d", conmonPID)
- ctr.state.ConmonPID = conmonPID
- }
-
- runtimeRestoreDuration := func() int64 {
- if restoreOptions != nil && restoreOptions.PrintStats {
- return time.Since(runtimeRestoreStarted).Microseconds()
- }
- return 0
- }()
-
- // These fds were passed down to the runtime. Close them
- // and not interfere
- for _, f := range filesToClose {
- errorhandling.CloseQuiet(f)
- }
-
- return runtimeRestoreDuration, nil
-}
-
-// configureConmonEnv gets the environment values to add to conmon's exec struct
-// TODO this may want to be less hardcoded/more configurable in the future
-func (r *ConmonOCIRuntime) configureConmonEnv(runtimeDir string) []string {
- var env []string
- for _, e := range os.Environ() {
- if strings.HasPrefix(e, "LC_") {
- env = append(env, e)
- }
- }
- conf, ok := os.LookupEnv("CONTAINERS_CONF")
- if ok {
- env = append(env, fmt.Sprintf("CONTAINERS_CONF=%s", conf))
- }
- env = append(env, fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir))
- env = append(env, fmt.Sprintf("_CONTAINERS_USERNS_CONFIGURED=%s", os.Getenv("_CONTAINERS_USERNS_CONFIGURED")))
- env = append(env, fmt.Sprintf("_CONTAINERS_ROOTLESS_UID=%s", os.Getenv("_CONTAINERS_ROOTLESS_UID")))
- home := homedir.Get()
- if home != "" {
- env = append(env, fmt.Sprintf("HOME=%s", home))
- }
-
- return env
-}
-
-// sharedConmonArgs takes common arguments for exec and create/restore and formats them for the conmon CLI
-func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, exitDir, ociLogPath, logDriver, logTag string) []string {
- // set the conmon API version to be able to use the correct sync struct keys
- args := []string{
- "--api-version", "1",
- "-c", ctr.ID(),
- "-u", cuuid,
- "-r", r.path,
- "-b", bundlePath,
- "-p", pidPath,
- "-n", ctr.Name(),
- "--exit-dir", exitDir,
- "--full-attach",
- }
- if len(r.runtimeFlags) > 0 {
- rFlags := []string{}
- for _, arg := range r.runtimeFlags {
- rFlags = append(rFlags, "--runtime-arg", arg)
- }
- args = append(args, rFlags...)
- }
-
- if ctr.CgroupManager() == config.SystemdCgroupsManager && !ctr.config.NoCgroups && ctr.config.CgroupsMode != cgroupSplit {
- args = append(args, "-s")
- }
-
- var logDriverArg string
- switch logDriver {
- case define.JournaldLogging:
- logDriverArg = define.JournaldLogging
- case define.NoLogging:
- logDriverArg = define.NoLogging
- case define.PassthroughLogging:
- logDriverArg = define.PassthroughLogging
- //lint:ignore ST1015 the default case has to be here
- default: //nolint:stylecheck,gocritic
- // No case here should happen except JSONLogging, but keep this here in case the options are extended
- logrus.Errorf("%s logging specified but not supported. Choosing k8s-file logging instead", ctr.LogDriver())
- fallthrough
- case "":
- // to get here, either a user would specify `--log-driver ""`, or this came from another place in libpod
- // since the former case is obscure, and the latter case isn't an error, let's silently fallthrough
- fallthrough
- case define.JSONLogging:
- fallthrough
- case define.KubernetesLogging:
- logDriverArg = fmt.Sprintf("%s:%s", define.KubernetesLogging, logPath)
- }
-
- args = append(args, "-l", logDriverArg)
- logLevel := logrus.GetLevel()
- args = append(args, "--log-level", logLevel.String())
-
- if logLevel == logrus.DebugLevel {
- logrus.Debugf("%s messages will be logged to syslog", r.conmonPath)
- args = append(args, "--syslog")
- }
-
- size := r.logSizeMax
- if ctr.config.LogSize > 0 {
- size = ctr.config.LogSize
- }
- if size > 0 {
- args = append(args, "--log-size-max", fmt.Sprintf("%v", size))
- }
-
- if ociLogPath != "" {
- args = append(args, "--runtime-arg", "--log-format=json", "--runtime-arg", "--log", fmt.Sprintf("--runtime-arg=%s", ociLogPath))
- }
- if logTag != "" {
- args = append(args, "--log-tag", logTag)
- }
- if ctr.config.NoCgroups {
- logrus.Debugf("Running with no Cgroups")
- args = append(args, "--runtime-arg", "--cgroup-manager", "--runtime-arg", "disabled")
- }
- return args
-}
-
-func startCommand(cmd *exec.Cmd, ctr *Container) error {
- // Make sure to unset the NOTIFY_SOCKET and reset it afterwards if needed.
- switch ctr.config.SdNotifyMode {
- case define.SdNotifyModeContainer, define.SdNotifyModeIgnore:
- if prev := os.Getenv("NOTIFY_SOCKET"); prev != "" {
- if err := os.Unsetenv("NOTIFY_SOCKET"); err != nil {
- logrus.Warnf("Error unsetting NOTIFY_SOCKET %v", err)
- }
- defer func() {
- if err := os.Setenv("NOTIFY_SOCKET", prev); err != nil {
- logrus.Errorf("Resetting NOTIFY_SOCKET=%s", prev)
- }
- }()
- }
- }
-
- return cmd.Start()
+ return err
}
// moveConmonToCgroupAndSignal gets a container's cgroupParent and moves the conmon process to that cgroup
@@ -1475,271 +169,6 @@ func (r *ConmonOCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec
return nil
}
-// newPipe creates a unix socket pair for communication.
-// Returns two files - first is parent, second is child.
-func newPipe() (*os.File, *os.File, error) {
- fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_SEQPACKET|unix.SOCK_CLOEXEC, 0)
- if err != nil {
- return nil, nil, err
- }
- return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil
-}
-
-// readConmonPidFile attempts to read conmon's pid from its pid file
-func readConmonPidFile(pidFile string) (int, error) {
- // Let's try reading the Conmon pid at the same time.
- if pidFile != "" {
- contents, err := ioutil.ReadFile(pidFile)
- if err != nil {
- return -1, err
- }
- // Convert it to an int
- conmonPID, err := strconv.Atoi(string(contents))
- if err != nil {
- return -1, err
- }
- return conmonPID, nil
- }
- return 0, nil
-}
-
-// readConmonPipeData attempts to read a syncInfo struct from the pipe
-func readConmonPipeData(runtimeName string, pipe *os.File, ociLog string) (int, error) {
- // syncInfo is used to return data from monitor process to daemon
- type syncInfo struct {
- Data int `json:"data"`
- Message string `json:"message,omitempty"`
- }
-
- // Wait to get container pid from conmon
- type syncStruct struct {
- si *syncInfo
- err error
- }
- ch := make(chan syncStruct)
- go func() {
- var si *syncInfo
- rdr := bufio.NewReader(pipe)
- b, err := rdr.ReadBytes('\n')
- // ignore EOF here, error is returned even when data was read
- // if it is no valid json unmarshal will fail below
- if err != nil && !errors.Is(err, io.EOF) {
- ch <- syncStruct{err: err}
- }
- if err := json.Unmarshal(b, &si); err != nil {
- ch <- syncStruct{err: fmt.Errorf("conmon bytes %q: %w", string(b), err)}
- return
- }
- ch <- syncStruct{si: si}
- }()
-
- data := -1 //nolint: wastedassign
- select {
- case ss := <-ch:
- if ss.err != nil {
- if ociLog != "" {
- ociLogData, err := ioutil.ReadFile(ociLog)
- if err == nil {
- var ociErr ociError
- if err := json.Unmarshal(ociLogData, &ociErr); err == nil {
- return -1, getOCIRuntimeError(runtimeName, ociErr.Msg)
- }
- }
- }
- return -1, fmt.Errorf("container create failed (no logs from conmon): %w", ss.err)
- }
- logrus.Debugf("Received: %d", ss.si.Data)
- if ss.si.Data < 0 {
- if ociLog != "" {
- ociLogData, err := ioutil.ReadFile(ociLog)
- if err == nil {
- var ociErr ociError
- if err := json.Unmarshal(ociLogData, &ociErr); err == nil {
- return ss.si.Data, getOCIRuntimeError(runtimeName, ociErr.Msg)
- }
- }
- }
- // If we failed to parse the JSON errors, then print the output as it is
- if ss.si.Message != "" {
- return ss.si.Data, getOCIRuntimeError(runtimeName, ss.si.Message)
- }
- return ss.si.Data, fmt.Errorf("container create failed: %w", define.ErrInternal)
- }
- data = ss.si.Data
- case <-time.After(define.ContainerCreateTimeout):
- return -1, fmt.Errorf("container creation timeout: %w", define.ErrInternal)
- }
- return data, nil
-}
-
-// writeConmonPipeData writes nonce data to a pipe
-func writeConmonPipeData(pipe *os.File) error {
- someData := []byte{0}
- _, err := pipe.Write(someData)
- return err
-}
-
-// formatRuntimeOpts prepends opts passed to it with --runtime-opt for passing to conmon
-func formatRuntimeOpts(opts ...string) []string {
- args := make([]string, 0, len(opts)*2)
- for _, o := range opts {
- args = append(args, "--runtime-opt", o)
- }
- return args
-}
-
-// getConmonVersion returns a string representation of the conmon version.
-func (r *ConmonOCIRuntime) getConmonVersion() (string, error) {
- output, err := utils.ExecCmd(r.conmonPath, "--version")
- if err != nil {
- return "", err
- }
- return strings.TrimSuffix(strings.Replace(output, "\n", ", ", 1), "\n"), nil
-}
-
-// getOCIRuntimeVersion returns a string representation of the OCI runtime's
-// version.
-func (r *ConmonOCIRuntime) getOCIRuntimeVersion() (string, error) {
- output, err := utils.ExecCmd(r.path, "--version")
- if err != nil {
- return "", err
- }
- return strings.TrimSuffix(output, "\n"), nil
-}
-
-// Copy data from container to HTTP connection, for terminal attach.
-// Container is the container's attach socket connection, http is a buffer for
-// the HTTP connection. cid is the ID of the container the attach session is
-// running for (used solely for error messages).
-func httpAttachTerminalCopy(container *net.UnixConn, http *bufio.ReadWriter, cid string) error {
- buf := make([]byte, bufferSize)
- for {
- numR, err := container.Read(buf)
- logrus.Debugf("Read fd(%d) %d/%d bytes for container %s", int(buf[0]), numR, len(buf), cid)
-
- if numR > 0 {
- switch buf[0] {
- case AttachPipeStdout:
- // Do nothing
- default:
- logrus.Errorf("Received unexpected attach type %+d, discarding %d bytes", buf[0], numR)
- continue
- }
-
- numW, err2 := http.Write(buf[1:numR])
- if err2 != nil {
- if err != nil {
- logrus.Errorf("Reading container %s STDOUT: %v", cid, err)
- }
- return err2
- } else if numW+1 != numR {
- return io.ErrShortWrite
- }
- // We need to force the buffer to write immediately, so
- // there isn't a delay on the terminal side.
- if err2 := http.Flush(); err2 != nil {
- if err != nil {
- logrus.Errorf("Reading container %s STDOUT: %v", cid, err)
- }
- return err2
- }
- }
- if err != nil {
- if err == io.EOF {
- return nil
- }
- return err
- }
- }
-}
-
-// Copy data from a container to an HTTP connection, for non-terminal attach.
-// Appends a header to multiplex input.
-func httpAttachNonTerminalCopy(container *net.UnixConn, http *bufio.ReadWriter, cid string, stdin, stdout, stderr bool) error {
- buf := make([]byte, bufferSize)
- for {
- numR, err := container.Read(buf)
- if numR > 0 {
- var headerBuf []byte
-
- // Subtract 1 because we strip the first byte (used for
- // multiplexing by Conmon).
- headerLen := uint32(numR - 1)
- // Practically speaking, we could make this buf[0] - 1,
- // but we need to validate it anyway.
- switch buf[0] {
- case AttachPipeStdin:
- headerBuf = makeHTTPAttachHeader(0, headerLen)
- if !stdin {
- continue
- }
- case AttachPipeStdout:
- if !stdout {
- continue
- }
- headerBuf = makeHTTPAttachHeader(1, headerLen)
- case AttachPipeStderr:
- if !stderr {
- continue
- }
- headerBuf = makeHTTPAttachHeader(2, headerLen)
- default:
- logrus.Errorf("Received unexpected attach type %+d, discarding %d bytes", buf[0], numR)
- continue
- }
-
- numH, err2 := http.Write(headerBuf)
- if err2 != nil {
- if err != nil {
- logrus.Errorf("Reading container %s standard streams: %v", cid, err)
- }
-
- return err2
- }
- // Hardcoding header length is pretty gross, but
- // fast. Should be safe, as this is a fixed part
- // of the protocol.
- if numH != 8 {
- if err != nil {
- logrus.Errorf("Reading container %s standard streams: %v", cid, err)
- }
-
- return io.ErrShortWrite
- }
-
- numW, err2 := http.Write(buf[1:numR])
- if err2 != nil {
- if err != nil {
- logrus.Errorf("Reading container %s standard streams: %v", cid, err)
- }
-
- return err2
- } else if numW+1 != numR {
- if err != nil {
- logrus.Errorf("Reading container %s standard streams: %v", cid, err)
- }
-
- return io.ErrShortWrite
- }
- // We need to force the buffer to write immediately, so
- // there isn't a delay on the terminal side.
- if err2 := http.Flush(); err2 != nil {
- if err != nil {
- logrus.Errorf("Reading container %s STDOUT: %v", cid, err)
- }
- return err2
- }
- }
- if err != nil {
- if err == io.EOF {
- return nil
- }
-
- return err
- }
- }
-}
-
// GetLimits converts spec resource limits to cgroup consumable limits
func GetLimits(resource *spec.LinuxResources) (runcconfig.Resources, error) {
if resource == nil {
diff --git a/libpod/oci_conmon_unsupported.go b/libpod/oci_conmon_unsupported.go
index c72dc0f0d..cc6d68e89 100644
--- a/libpod/oci_conmon_unsupported.go
+++ b/libpod/oci_conmon_unsupported.go
@@ -1,5 +1,5 @@
-//go:build !linux
-// +build !linux
+//go:build !linux && !freebsd
+// +build !linux,!freebsd
package libpod
diff --git a/libpod/runtime.go b/libpod/runtime.go
index ea4b34954..684f4abd7 100644
--- a/libpod/runtime.go
+++ b/libpod/runtime.go
@@ -2,15 +2,11 @@ package libpod
import (
"bufio"
- "bytes"
"context"
"errors"
"fmt"
"os"
- "os/exec"
"path/filepath"
- "regexp"
- "strconv"
"strings"
"sync"
"syscall"
@@ -44,17 +40,6 @@ import (
"github.com/sirupsen/logrus"
)
-const (
- // conmonMinMajorVersion is the major version required for conmon.
- conmonMinMajorVersion = 2
-
- // conmonMinMinorVersion is the minor version required for conmon.
- conmonMinMinorVersion = 0
-
- // conmonMinPatchVersion is the sub-minor version required for conmon.
- conmonMinPatchVersion = 24
-)
-
// A RuntimeOption is a functional option which alters the Runtime created by
// NewRuntime
type RuntimeOption func(*Runtime) error
@@ -308,7 +293,7 @@ func getLockManager(runtime *Runtime) (lock.Manager, error) {
// Sets up containers/storage, state store, OCI runtime
func makeRuntime(runtime *Runtime) (retErr error) {
// Find a working conmon binary
- cPath, err := findConmon(runtime.config.Engine.ConmonPath)
+ cPath, err := runtime.config.FindConmon()
if err != nil {
return err
}
@@ -670,102 +655,6 @@ func makeRuntime(runtime *Runtime) (retErr error) {
return nil
}
-// findConmon iterates over conmonPaths and returns the path
-// to the first conmon binary with a new enough version. If none is found,
-// we try to do a path lookup of "conmon".
-func findConmon(conmonPaths []string) (string, error) {
- foundOutdatedConmon := false
- for _, path := range conmonPaths {
- stat, err := os.Stat(path)
- if err != nil {
- continue
- }
- if stat.IsDir() {
- continue
- }
- if err := probeConmon(path); err != nil {
- logrus.Warnf("Conmon at %s invalid: %v", path, err)
- foundOutdatedConmon = true
- continue
- }
- logrus.Debugf("Using conmon: %q", path)
- return path, nil
- }
-
- // Search the $PATH as last fallback
- if path, err := exec.LookPath("conmon"); err == nil {
- if err := probeConmon(path); err != nil {
- logrus.Warnf("Conmon at %s is invalid: %v", path, err)
- foundOutdatedConmon = true
- } else {
- logrus.Debugf("Using conmon from $PATH: %q", path)
- return path, nil
- }
- }
-
- if foundOutdatedConmon {
- return "", fmt.Errorf(
- "please update to v%d.%d.%d or later: %w",
- conmonMinMajorVersion, conmonMinMinorVersion, conmonMinPatchVersion, define.ErrConmonOutdated)
- }
-
- return "", fmt.Errorf(
- "could not find a working conmon binary (configured options: %v): %w",
- conmonPaths, define.ErrInvalidArg)
-}
-
-// probeConmon calls conmon --version and verifies it is a new enough version for
-// the runtime expectations the container engine currently has.
-func probeConmon(conmonBinary string) error {
- cmd := exec.Command(conmonBinary, "--version")
- var out bytes.Buffer
- cmd.Stdout = &out
- err := cmd.Run()
- if err != nil {
- return err
- }
- r := regexp.MustCompile(`^conmon version (?P<Major>\d+).(?P<Minor>\d+).(?P<Patch>\d+)`)
-
- matches := r.FindStringSubmatch(out.String())
- if len(matches) != 4 {
- return fmt.Errorf("%v: %w", define.ErrConmonVersionFormat, err)
- }
- major, err := strconv.Atoi(matches[1])
- if err != nil {
- return fmt.Errorf("%v: %w", define.ErrConmonVersionFormat, err)
- }
- if major < conmonMinMajorVersion {
- return define.ErrConmonOutdated
- }
- if major > conmonMinMajorVersion {
- return nil
- }
-
- minor, err := strconv.Atoi(matches[2])
- if err != nil {
- return fmt.Errorf("%v: %w", define.ErrConmonVersionFormat, err)
- }
- if minor < conmonMinMinorVersion {
- return define.ErrConmonOutdated
- }
- if minor > conmonMinMinorVersion {
- return nil
- }
-
- patch, err := strconv.Atoi(matches[3])
- if err != nil {
- return fmt.Errorf("%v: %w", define.ErrConmonVersionFormat, err)
- }
- if patch < conmonMinPatchVersion {
- return define.ErrConmonOutdated
- }
- if patch > conmonMinPatchVersion {
- return nil
- }
-
- return nil
-}
-
// TmpDir gets the current Libpod temporary files directory.
func (r *Runtime) TmpDir() (string, error) {
if !r.valid {
diff --git a/pkg/api/handlers/compat/containers.go b/pkg/api/handlers/compat/containers.go
index ae063dc9f..0b82c48f6 100644
--- a/pkg/api/handlers/compat/containers.go
+++ b/pkg/api/handlers/compat/containers.go
@@ -467,6 +467,7 @@ func LibpodToContainerJSON(l *libpod.Container, sz bool) (*types.ContainerJSON,
if err := json.Unmarshal(h, &hc); err != nil {
return nil, err
}
+ sort.Strings(hc.Binds)
// k8s-file == json-file
if hc.LogConfig.Type == define.KubernetesLogging {
diff --git a/test/apiv2/10-images.at b/test/apiv2/10-images.at
index f03b95786..4fd954e37 100644
--- a/test/apiv2/10-images.at
+++ b/test/apiv2/10-images.at
@@ -203,7 +203,7 @@ t POST "build?dockerfile=containerfile" $CONTAINERFILE_TAR application/json 200
# Libpod: allow building from url: https://github.com/alpinelinux/docker-alpine.git and must ignore any provided tar
t POST "libpod/build?remote=https%3A%2F%2Fgithub.com%2Falpinelinux%2Fdocker-alpine.git" $CONTAINERFILE_TAR 200 \
- .stream~"STEP 1/5: FROM alpine:3.14"
+ .stream~"STEP 1/5: FROM alpine:"
# Build api response header must contain Content-type: application/json
t POST "build?dockerfile=containerfile" $CONTAINERFILE_TAR application/json 200
diff --git a/test/e2e/manifest_test.go b/test/e2e/manifest_test.go
index 145a016ea..1c4aad710 100644
--- a/test/e2e/manifest_test.go
+++ b/test/e2e/manifest_test.go
@@ -46,17 +46,23 @@ var _ = Describe("Podman manifest", func() {
processTestResult(f)
})
It("create w/o image", func() {
- session := podmanTest.Podman([]string{"manifest", "create", "foo"})
- session.WaitWithDefaultTimeout()
- Expect(session).Should(Exit(0))
-
- session = podmanTest.Podman([]string{"manifest", "create", "foo"})
- session.WaitWithDefaultTimeout()
- Expect(session).To(ExitWithError())
-
- session = podmanTest.Podman([]string{"manifest", "create", "--amend", "foo"})
- session.WaitWithDefaultTimeout()
- Expect(session).Should(Exit(0))
+ for _, amend := range []string{"--amend", "-a"} {
+ session := podmanTest.Podman([]string{"manifest", "create", "foo"})
+ session.WaitWithDefaultTimeout()
+ Expect(session).Should(Exit(0))
+
+ session = podmanTest.Podman([]string{"manifest", "create", "foo"})
+ session.WaitWithDefaultTimeout()
+ Expect(session).To(ExitWithError())
+
+ session = podmanTest.Podman([]string{"manifest", "create", amend, "foo"})
+ session.WaitWithDefaultTimeout()
+ Expect(session).Should(Exit(0))
+
+ session = podmanTest.Podman([]string{"manifest", "rm", "foo"})
+ session.WaitWithDefaultTimeout()
+ Expect(session).Should(Exit(0))
+ }
})
It("create w/ image", func() {
diff --git a/test/e2e/stats_test.go b/test/e2e/stats_test.go
index 3000a819f..981c00316 100644
--- a/test/e2e/stats_test.go
+++ b/test/e2e/stats_test.go
@@ -79,9 +79,10 @@ var _ = Describe("Podman stats", func() {
session := podmanTest.RunTopContainer("")
session.WaitWithDefaultTimeout()
Expect(session).Should(Exit(0))
- session = podmanTest.Podman([]string{"stats", "--all", "--no-stream", "--format", "\"{{.ID}}\""})
+ session = podmanTest.Podman([]string{"stats", "--all", "--no-trunc", "--no-stream", "--format", "\"{{.ID}}\""})
session.WaitWithDefaultTimeout()
Expect(session).Should(Exit(0))
+ Expect(len(session.OutputToStringArray()[0])).Should(BeEquivalentTo(66))
})
It("podman stats with GO template", func() {
diff --git a/test/system/710-kube.bats b/test/system/710-kube.bats
new file mode 100644
index 000000000..58e42148a
--- /dev/null
+++ b/test/system/710-kube.bats
@@ -0,0 +1,171 @@
+#!/usr/bin/env bats -*- bats -*-
+#
+# Test podman kube generate
+#
+
+load helpers
+
+# standard capability drop list
+capabilities='{"drop":["CAP_MKNOD","CAP_NET_RAW","CAP_AUDIT_WRITE"]}'
+
+# Warning that is emitted once on containers, multiple times on pods
+kubernetes_63='Truncation Annotation: .* Kubernetes only allows 63 characters'
+
+# filter: convert yaml to json, because bash+yaml=madness
+function yaml2json() {
+ egrep -v "$kubernetes_63" | python3 -c 'import yaml
+import json
+import sys
+json.dump(yaml.safe_load(sys.stdin), sys.stdout)'
+}
+
+###############################################################################
+# BEGIN tests
+
+@test "podman kube generate - usage message" {
+ run_podman kube generate --help
+ is "$output" ".*podman.* kube generate \[options\] {CONTAINER...|POD...|VOLUME...}"
+ run_podman generate kube --help
+ is "$output" ".*podman.* generate kube \[options\] {CONTAINER...|POD...|VOLUME...}"
+}
+
+@test "podman kube generate - container" {
+ cname=c$(random_string 15)
+ run_podman container create --name $cname $IMAGE top
+ run_podman kube generate $cname
+
+ # Convert yaml to json, and dump to stdout (to help in case of errors)
+ json=$(yaml2json <<<"$output")
+ jq . <<<"$json"
+
+ # What we expect to see. This is by necessity an incomplete list.
+ # For instance, it does not include org.opencontainers.image.base.*
+ # because sometimes we get that, sometimes we don't. No clue why.
+ #
+ # And, unfortunately, if new fields are added to the YAML, we won't
+ # test those unless a developer remembers to add them here.
+ #
+ # Reasons for doing it this way, instead of straight-comparing yaml:
+ # 1) the arbitrariness of the org.opencontainers.image.base annotations
+ # 2) YAML order is nondeterministic, so on a pod with two containers
+ # (as in the pod test below) we cannot rely on cname1/cname2.
+ expect="
+apiVersion | = | v1
+kind | = | Pod
+
+metadata.annotations.\"io.kubernetes.cri-o.TTY/$cname\" | = | false
+metadata.annotations.\"io.podman.annotations.autoremove/$cname\" | = | FALSE
+metadata.annotations.\"io.podman.annotations.init/$cname\" | = | FALSE
+metadata.annotations.\"io.podman.annotations.privileged/$cname\" | = | FALSE
+metadata.annotations.\"io.podman.annotations.publish-all/$cname\" | = | FALSE
+
+metadata.creationTimestamp | =~ | [0-9T:-]\\+Z
+metadata.labels.app | = | ${cname}-pod
+metadata.name | = | ${cname}-pod
+
+spec.containers[0].command | = | [\"top\"]
+spec.containers[0].image | = | $IMAGE
+spec.containers[0].name | = | $cname
+
+spec.containers[0].securityContext.capabilities | = | $capabilities
+
+status | = | null
+"
+
+ # Parse and check all those
+ while read key op expect; do
+ actual=$(jq -r -c ".$key" <<<"$json")
+ assert "$actual" $op "$expect" ".$key"
+ done < <(parse_table "$expect")
+
+ if ! is_remote; then
+ count=$(egrep -c "$kubernetes_63" <<<"$output")
+ assert "$count" = 1 "1 instance of the Kubernetes-63-char warning"
+ fi
+
+ run_podman rm $cname
+}
+
+@test "podman kube generate - pod" {
+ local pname=p$(random_string 15)
+ local cname1=c1$(random_string 15)
+ local cname2=c2$(random_string 15)
+
+ run_podman pod create --name $pname --publish 9999:8888
+
+ # Needs at least one container. Error is slightly different between
+ # regular and remote podman:
+ # regular: Error: pod ... only has...
+ # remote: Error: error generating YAML: pod ... only has...
+ run_podman 125 kube generate $pname
+ assert "$output" =~ "Error: .* only has an infra container"
+
+ run_podman container create --name $cname1 --pod $pname $IMAGE top
+ run_podman container create --name $cname2 --pod $pname $IMAGE bottom
+ run_podman kube generate $pname
+
+ json=$(yaml2json <<<"$output")
+ jq . <<<"$json"
+
+ # See container test above for description of this table
+ expect="
+apiVersion | = | v1
+kind | = | Pod
+
+metadata.annotations.\"io.kubernetes.cri-o.ContainerType/$cname1\" | = | container
+metadata.annotations.\"io.kubernetes.cri-o.ContainerType/$cname2\" | = | container
+metadata.annotations.\"io.kubernetes.cri-o.SandboxID/$cname1\" | =~ | [0-9a-f]\\{56\\}
+metadata.annotations.\"io.kubernetes.cri-o.SandboxID/$cname2\" | =~ | [0-9a-f]\\{56\\}
+metadata.annotations.\"io.kubernetes.cri-o.TTY/$cname1\" | = | false
+metadata.annotations.\"io.kubernetes.cri-o.TTY/$cname2\" | = | false
+metadata.annotations.\"io.podman.annotations.autoremove/$cname1\" | = | FALSE
+metadata.annotations.\"io.podman.annotations.autoremove/$cname2\" | = | FALSE
+metadata.annotations.\"io.podman.annotations.init/$cname1\" | = | FALSE
+metadata.annotations.\"io.podman.annotations.init/$cname2\" | = | FALSE
+metadata.annotations.\"io.podman.annotations.privileged/$cname1\" | = | FALSE
+metadata.annotations.\"io.podman.annotations.privileged/$cname2\" | = | FALSE
+metadata.annotations.\"io.podman.annotations.publish-all/$cname1\" | = | FALSE
+metadata.annotations.\"io.podman.annotations.publish-all/$cname2\" | = | FALSE
+
+metadata.creationTimestamp | =~ | [0-9T:-]\\+Z
+metadata.labels.app | = | ${pname}
+metadata.name | = | ${pname}
+
+spec.hostname | = | $pname
+spec.restartPolicy | = | Never
+
+spec.containers[0].command | = | [\"top\"]
+spec.containers[0].image | = | $IMAGE
+spec.containers[0].name | = | $cname1
+spec.containers[0].ports[0].containerPort | = | 8888
+spec.containers[0].ports[0].hostPort | = | 9999
+spec.containers[0].resources | = | {}
+
+spec.containers[1].command | = | [\"bottom\"]
+spec.containers[1].image | = | $IMAGE
+spec.containers[1].name | = | $cname2
+spec.containers[1].ports | = | null
+spec.containers[1].resources | = | {}
+
+spec.containers[0].securityContext.capabilities | = | $capabilities
+
+status | = | {}
+"
+
+ while read key op expect; do
+ actual=$(jq -r -c ".$key" <<<"$json")
+ assert "$actual" $op "$expect" ".$key"
+ done < <(parse_table "$expect")
+
+ # Why 4? Maybe two for each container?
+ if ! is_remote; then
+ count=$(egrep -c "$kubernetes_63" <<<"$output")
+ assert "$count" = 4 "instances of the Kubernetes-63-char warning"
+ fi
+
+ run_podman rm $cname1 $cname2
+ run_podman pod rm $pname
+ run_podman rmi $(pause_image)
+}
+
+# vim: filetype=sh
diff --git a/test/system/helpers.bash b/test/system/helpers.bash
index 5ff3fae6d..f2eb3016c 100644
--- a/test/system/helpers.bash
+++ b/test/system/helpers.bash
@@ -36,20 +36,6 @@ fi
# That way individual tests can override with their own setup/teardown,
# while retaining the ability to include these if they so desire.
-# Some CI systems set this to runc, overriding the default crun.
-if [[ -n $OCI_RUNTIME ]]; then
- if [[ -z $CONTAINERS_CONF ]]; then
- # FIXME: BATS provides no mechanism for end-of-run cleanup[1]; how
- # can we avoid leaving this file behind when we finish?
- # [1] https://github.com/bats-core/bats-core/issues/39
- export CONTAINERS_CONF=$(mktemp --tmpdir=${BATS_TMPDIR:-/tmp} podman-bats-XXXXXXX.containers.conf)
- cat >$CONTAINERS_CONF <<EOF
-[engine]
-runtime="$OCI_RUNTIME"
-EOF
- fi
-fi
-
# Setup helper: establish a test environment with exactly the images needed
function basic_setup() {
# Clean up all containers
diff --git a/troubleshooting.md b/troubleshooting.md
index 6d46a543f..c4ce191ca 100644
--- a/troubleshooting.md
+++ b/troubleshooting.md
@@ -678,23 +678,28 @@ $ podman run --rootfs /path/to/rootfs:O ....
Modifications to the mount point are destroyed when the container
finishes executing, similar to a tmpfs mount point being unmounted.
-### 26) Running containers with CPU limits fails with a permissions error
+### 26) Running containers with resource limits fails with a permissions error
-On some systemd-based systems, non-root users do not have CPU limit delegation
-permissions. This causes setting CPU limits to fail.
+On some systemd-based systems, non-root users do not have resource limit delegation
+permissions. This causes setting resource limits to fail.
#### Symptom
-Running a container with a CPU limit options such as `--cpus`, `--cpu-period`,
-or `--cpu-quota` will fail with an error similar to the following:
+Running a container with a resource limit options will fail with an error similar to the following:
- Error: opening file `cpu.max` for writing: Permission denied: OCI runtime permission denied error
+`--cpus`, `--cpu-period`, `--cpu-quota`, `--cpu-shares`:
-This means that CPU limit delegation is not enabled for the current user.
+ Error: OCI runtime error: crun: the requested cgroup controller `cpu` is not available
+
+`--cpuset-cpus`, `--cpuset-mems`:
+
+ Error: OCI runtime error: crun: the requested cgroup controller `cpuset` is not available
+
+This means that resource limit delegation is not enabled for the current user.
#### Solution
-You can verify whether CPU limit delegation is enabled by running the following command:
+You can verify whether resource limit delegation is enabled by running the following command:
```console
$ cat "/sys/fs/cgroup/user.slice/user-$(id -u).slice/user@$(id -u).service/cgroup.controllers"
@@ -704,19 +709,19 @@ Example output might be:
memory pids
-In the above example, `cpu` is not listed, which means the current user does
-not have permission to set CPU limits.
+In the above example, `cpu` and `cpuset` are not listed, which means the current user does
+not have permission to set CPU or CPUSET limits.
-If you want to enable CPU limit delegation for all users, you can create the
+If you want to enable CPU or CPUSET limit delegation for all users, you can create the
file `/etc/systemd/system/user@.service.d/delegate.conf` with the contents:
```ini
[Service]
-Delegate=memory pids cpu io
+Delegate=memory pids cpu cpuset
```
-After logging out and logging back in, you should have permission to set CPU
-limits.
+After logging out and logging back in, you should have permission to set
+CPU and CPUSET limits.
### 26) `exec container process '/bin/sh': Exec format error` (or another binary than `bin/sh`)
diff --git a/vendor/github.com/containers/common/pkg/config/config.go b/vendor/github.com/containers/common/pkg/config/config.go
index a6276fbef..de1d91ae3 100644
--- a/vendor/github.com/containers/common/pkg/config/config.go
+++ b/vendor/github.com/containers/common/pkg/config/config.go
@@ -234,6 +234,10 @@ type EngineConfig struct {
// The first path pointing to a valid file will be used.
ConmonPath []string `toml:"conmon_path,omitempty"`
+ // ConmonRsPath is the path to the Conmon-rs binary used for managing containers.
+ // The first path pointing to a valid file will be used.
+ ConmonRsPath []string `toml:"conmonrs_path,omitempty"`
+
// CompatAPIEnforceDockerHub enforces using docker.io for completing
// short names in Podman's compatibility REST API. Note that this will
// ignore unqualified-search-registries and short-name aliases defined
@@ -915,8 +919,12 @@ func (c *NetworkConfig) Validate() error {
// to first (version) matching conmon binary. If non is found, we try
// to do a path lookup of "conmon".
func (c *Config) FindConmon() (string, error) {
+ return findConmonPath(c.Engine.ConmonPath, "conmon", _conmonMinMajorVersion, _conmonMinMinorVersion, _conmonMinPatchVersion)
+}
+
+func findConmonPath(paths []string, binaryName string, major int, minor int, patch int) (string, error) {
foundOutdatedConmon := false
- for _, path := range c.Engine.ConmonPath {
+ for _, path := range paths {
stat, err := os.Stat(path)
if err != nil {
continue
@@ -934,7 +942,7 @@ func (c *Config) FindConmon() (string, error) {
}
// Search the $PATH as last fallback
- if path, err := exec.LookPath("conmon"); err == nil {
+ if path, err := exec.LookPath(binaryName); err == nil {
if err := probeConmon(path); err != nil {
logrus.Warnf("Conmon at %s is invalid: %v", path, err)
foundOutdatedConmon = true
@@ -946,11 +954,18 @@ func (c *Config) FindConmon() (string, error) {
if foundOutdatedConmon {
return "", fmt.Errorf("please update to v%d.%d.%d or later: %w",
- _conmonMinMajorVersion, _conmonMinMinorVersion, _conmonMinPatchVersion, ErrConmonOutdated)
+ major, minor, patch, ErrConmonOutdated)
}
return "", fmt.Errorf("could not find a working conmon binary (configured options: %v: %w)",
- c.Engine.ConmonPath, ErrInvalidArg)
+ paths, ErrInvalidArg)
+}
+
+// FindConmonRs iterates over (*Config).ConmonRsPath and returns the path
+// to first (version) matching conmonrs binary. If non is found, we try
+// to do a path lookup of "conmonrs".
+func (c *Config) FindConmonRs() (string, error) {
+ return findConmonPath(c.Engine.ConmonRsPath, "conmonrs", _conmonrsMinMajorVersion, _conmonrsMinMinorVersion, _conmonrsMinPatchVersion)
}
// GetDefaultEnv returns the environment variables for the container.
diff --git a/vendor/github.com/containers/common/pkg/config/default.go b/vendor/github.com/containers/common/pkg/config/default.go
index c7ddf90ee..6bca7312a 100644
--- a/vendor/github.com/containers/common/pkg/config/default.go
+++ b/vendor/github.com/containers/common/pkg/config/default.go
@@ -33,6 +33,15 @@ const (
// _conmonMinPatchVersion is the sub-minor version required for conmon.
_conmonMinPatchVersion = 1
+ // _conmonrsMinMajorVersion is the major version required for conmonrs.
+ _conmonrsMinMajorVersion = 0
+
+ // _conmonrsMinMinorVersion is the minor version required for conmonrs.
+ _conmonrsMinMinorVersion = 1
+
+ // _conmonrsMinPatchVersion is the sub-minor version required for conmonrs.
+ _conmonrsMinPatchVersion = 0
+
// _conmonVersionFormatErr is used when the expected versio-format of conmon
// has changed.
_conmonVersionFormatErr = "conmon version changed format: %w"
@@ -276,7 +285,9 @@ func defaultConfigFromMemory() (*EngineConfig, error) {
c.CompatAPIEnforceDockerHub = true
if path, ok := os.LookupEnv("CONTAINERS_STORAGE_CONF"); ok {
- types.SetDefaultConfigFilePath(path)
+ if err := types.SetDefaultConfigFilePath(path); err != nil {
+ return nil, err
+ }
}
storeOpts, err := types.DefaultStoreOptions(unshare.IsRootless(), unshare.GetRootlessUID())
if err != nil {
@@ -372,6 +383,16 @@ func defaultConfigFromMemory() (*EngineConfig, error) {
"/usr/local/sbin/conmon",
"/run/current-system/sw/bin/conmon",
}
+ c.ConmonRsPath = []string{
+ "/usr/libexec/podman/conmonrs",
+ "/usr/local/libexec/podman/conmonrs",
+ "/usr/local/lib/podman/conmonrs",
+ "/usr/bin/conmonrs",
+ "/usr/sbin/conmonrs",
+ "/usr/local/bin/conmonrs",
+ "/usr/local/sbin/conmonrs",
+ "/run/current-system/sw/bin/conmonrs",
+ }
c.PullPolicy = DefaultPullPolicy
c.RuntimeSupportsJSON = []string{
"crun",
@@ -434,42 +455,55 @@ func probeConmon(conmonBinary string) error {
if err := cmd.Run(); err != nil {
return err
}
- r := regexp.MustCompile(`^conmon version (?P<Major>\d+).(?P<Minor>\d+).(?P<Patch>\d+)`)
+ r := regexp.MustCompile(`^(version:|conmon version)? (?P<Major>\d+).(?P<Minor>\d+).(?P<Patch>\d+)`)
matches := r.FindStringSubmatch(out.String())
- if len(matches) != 4 {
- return errors.New(_conmonVersionFormatErr)
+ if len(matches) != 5 {
+ return fmt.Errorf(_conmonVersionFormatErr, errors.New("invalid version format"))
+ }
+ major, err := strconv.Atoi(matches[2])
+
+ var minMajor, minMinor, minPatch int
+ // conmon-rs returns "^version:"
+ if matches[1] == "version:" {
+ minMajor = _conmonrsMinMajorVersion
+ minMinor = _conmonrsMinMinorVersion
+ minPatch = _conmonrsMinPatchVersion
+ } else {
+ minMajor = _conmonMinMajorVersion
+ minMinor = _conmonMinMinorVersion
+ minPatch = _conmonMinPatchVersion
}
- major, err := strconv.Atoi(matches[1])
+
if err != nil {
return fmt.Errorf(_conmonVersionFormatErr, err)
}
- if major < _conmonMinMajorVersion {
+ if major < minMajor {
return ErrConmonOutdated
}
- if major > _conmonMinMajorVersion {
+ if major > minMajor {
return nil
}
- minor, err := strconv.Atoi(matches[2])
+ minor, err := strconv.Atoi(matches[3])
if err != nil {
return fmt.Errorf(_conmonVersionFormatErr, err)
}
- if minor < _conmonMinMinorVersion {
+ if minor < minMinor {
return ErrConmonOutdated
}
- if minor > _conmonMinMinorVersion {
+ if minor > minMinor {
return nil
}
- patch, err := strconv.Atoi(matches[3])
+ patch, err := strconv.Atoi(matches[4])
if err != nil {
return fmt.Errorf(_conmonVersionFormatErr, err)
}
- if patch < _conmonMinPatchVersion {
+ if patch < minPatch {
return ErrConmonOutdated
}
- if patch > _conmonMinPatchVersion {
+ if patch > minPatch {
return nil
}
diff --git a/vendor/modules.txt b/vendor/modules.txt
index 9cf63d41b..eb9c7a34d 100644
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@@ -114,7 +114,7 @@ github.com/containers/buildah/pkg/rusage
github.com/containers/buildah/pkg/sshagent
github.com/containers/buildah/pkg/util
github.com/containers/buildah/util
-# github.com/containers/common v0.49.2-0.20220809074359-b0ea008ba661
+# github.com/containers/common v0.49.2-0.20220817132854-f6679f170eca
## explicit
github.com/containers/common/libimage
github.com/containers/common/libimage/define
diff --git a/version/version.go b/version/version.go
index 0a84bb235..417f3a9b1 100644
--- a/version/version.go
+++ b/version/version.go
@@ -27,7 +27,7 @@ const (
// NOTE: remember to bump the version at the top
// of the top-level README.md file when this is
// bumped.
-var Version = semver.MustParse("4.2.0-dev")
+var Version = semver.MustParse("4.3.0-dev")
// See https://docs.docker.com/engine/api/v1.40/
// libpod compat handlers are expected to honor docker API versions