diff options
31 files changed, 551 insertions, 68 deletions
@@ -1894,6 +1894,8 @@ pod [?bool](#?bool) quiet [?bool](#?bool) +size [?bool](#?bool) + sort [?string](#?string) sync [?bool](#?bool) diff --git a/cmd/podman/checkpoint.go b/cmd/podman/checkpoint.go index 6755bb073..22cdb1f39 100644 --- a/cmd/podman/checkpoint.go +++ b/cmd/podman/checkpoint.go @@ -46,6 +46,7 @@ func init() { flags.BoolVarP(&checkpointCommand.All, "all", "a", false, "Checkpoint all running containers") flags.BoolVarP(&checkpointCommand.Latest, "latest", "l", false, "Act on the latest container podman is aware of") flags.StringVarP(&checkpointCommand.Export, "export", "e", "", "Export the checkpoint image to a tar.gz") + flags.BoolVar(&checkpointCommand.IgnoreRootfs, "ignore-rootfs", false, "Do not include root file-system changes when exporting") markFlagHiddenForRemoteClient("latest", flags) } diff --git a/cmd/podman/cliconfig/config.go b/cmd/podman/cliconfig/config.go index e3e2edb95..be380cda0 100644 --- a/cmd/podman/cliconfig/config.go +++ b/cmd/podman/cliconfig/config.go @@ -92,6 +92,7 @@ type CheckpointValues struct { All bool Latest bool Export string + IgnoreRootfs bool } type CommitValues struct { @@ -433,6 +434,7 @@ type RestoreValues struct { TcpEstablished bool Import string Name string + IgnoreRootfs bool } type RmValues struct { diff --git a/cmd/podman/common.go b/cmd/podman/common.go index 15f753d55..1e9092bd6 100644 --- a/cmd/podman/common.go +++ b/cmd/podman/common.go @@ -130,6 +130,10 @@ func getCreateFlags(c *cliconfig.PodmanCommand) { "Drop capabilities from the container", ) createFlags.String( + "cgroupns", "host", + "cgroup namespace to use", + ) + createFlags.String( "cgroup-parent", "", "Optional parent cgroup for the container", ) diff --git a/cmd/podman/create.go b/cmd/podman/create.go index 262cdffe4..3c24729c5 100644 --- a/cmd/podman/create.go +++ b/cmd/podman/create.go @@ -2,6 +2,7 @@ package main import ( "fmt" + "strings" "github.com/containers/libpod/cmd/podman/cliconfig" "github.com/containers/libpod/pkg/adapter" @@ -77,6 +78,16 @@ func createInit(c *cliconfig.PodmanCommand) error { logrus.Warn("setting security options with --privileged has no effect") } + var setNet string + if c.IsSet("network") { + setNet = c.String("network") + } else if c.IsSet("net") { + setNet = c.String("net") + } + if (c.IsSet("dns") || c.IsSet("dns-opt") || c.IsSet("dns-search")) && (setNet == "none" || strings.HasPrefix(setNet, "container:")) { + return errors.Errorf("conflicting options: dns and the network mode.") + } + // Docker-compatibility: the "-h" flag for run/create is reserved for // the hostname (see https://github.com/containers/libpod/issues/1367). diff --git a/cmd/podman/restore.go b/cmd/podman/restore.go index c4146eff0..3ae141d41 100644 --- a/cmd/podman/restore.go +++ b/cmd/podman/restore.go @@ -45,6 +45,7 @@ func init() { flags.BoolVar(&restoreCommand.TcpEstablished, "tcp-established", false, "Restore a container with established TCP connections") flags.StringVarP(&restoreCommand.Import, "import", "i", "", "Restore from exported checkpoint archive (tar.gz)") flags.StringVarP(&restoreCommand.Name, "name", "n", "", "Specify new name for container restored from exported checkpoint (only works with --import)") + flags.BoolVar(&restoreCommand.IgnoreRootfs, "ignore-rootfs", false, "Do not apply root file-system changes when importing from exported checkpoint") markFlagHiddenForRemoteClient("latest", flags) } @@ -60,8 +61,12 @@ func restoreCmd(c *cliconfig.RestoreValues, cmd *cobra.Command) error { } defer runtime.DeferredShutdown(false) + if c.Import == "" && c.IgnoreRootfs { + return errors.Errorf("--ignore-rootfs can only be used with --import") + } + if c.Import == "" && c.Name != "" { - return errors.Errorf("--name can only used with --import") + return errors.Errorf("--name can only be used with --import") } if c.Name != "" && c.TcpEstablished { diff --git a/cmd/podman/shared/create.go b/cmd/podman/shared/create.go index 5c37a1875..b14ce431d 100644 --- a/cmd/podman/shared/create.go +++ b/cmd/podman/shared/create.go @@ -400,11 +400,12 @@ func ParseCreateOpts(ctx context.Context, c *GenericCLIResults, runtime *libpod. namespaceNet = c.String("net") } namespaces = map[string]string{ - "pid": c.String("pid"), - "net": namespaceNet, - "ipc": c.String("ipc"), - "user": c.String("userns"), - "uts": c.String("uts"), + "cgroup": c.String("cgroupns"), + "pid": c.String("pid"), + "net": namespaceNet, + "ipc": c.String("ipc"), + "user": c.String("userns"), + "uts": c.String("uts"), } originalPodName := c.String("pod") @@ -462,6 +463,11 @@ func ParseCreateOpts(ctx context.Context, c *GenericCLIResults, runtime *libpod. return nil, errors.Errorf("--uts %q is not valid", namespaces["uts"]) } + cgroupMode := ns.CgroupMode(namespaces["cgroup"]) + if !cgroupMode.Valid() { + return nil, errors.Errorf("--cgroup %q is not valid", namespaces["cgroup"]) + } + ipcMode := ns.IpcMode(namespaces["ipc"]) if !cc.Valid(string(ipcMode), ipcMode) { return nil, errors.Errorf("--ipc %q is not valid", ipcMode) @@ -652,6 +658,7 @@ func ParseCreateOpts(ctx context.Context, c *GenericCLIResults, runtime *libpod. CapAdd: c.StringSlice("cap-add"), CapDrop: c.StringSlice("cap-drop"), CidFile: c.String("cidfile"), + Cgroupns: c.String("cgroupns"), CgroupParent: c.String("cgroup-parent"), Command: command, Detach: c.Bool("detach"), @@ -687,6 +694,7 @@ func ParseCreateOpts(ctx context.Context, c *GenericCLIResults, runtime *libpod. NetMode: netMode, UtsMode: utsMode, PidMode: pidMode, + CgroupMode: cgroupMode, Pod: podName, Privileged: c.Bool("privileged"), Publish: c.StringSlice("publish"), diff --git a/cmd/podman/shared/intermediate.go b/cmd/podman/shared/intermediate.go index 8337dc647..4062ac48a 100644 --- a/cmd/podman/shared/intermediate.go +++ b/cmd/podman/shared/intermediate.go @@ -370,6 +370,7 @@ func NewIntermediateLayer(c *cliconfig.PodmanCommand, remote bool) GenericCLIRes m["blkio-weight-device"] = newCRStringSlice(c, "blkio-weight-device") m["cap-add"] = newCRStringSlice(c, "cap-add") m["cap-drop"] = newCRStringSlice(c, "cap-drop") + m["cgroupns"] = newCRString(c, "cgroupns") m["cgroup-parent"] = newCRString(c, "cgroup-parent") m["cidfile"] = newCRString(c, "cidfile") m["conmon-pidfile"] = newCRString(c, "conmon-pidfile") diff --git a/cmd/podman/varlink/io.podman.varlink b/cmd/podman/varlink/io.podman.varlink index 9410b9459..0bf236b77 100644 --- a/cmd/podman/varlink/io.podman.varlink +++ b/cmd/podman/varlink/io.podman.varlink @@ -144,10 +144,11 @@ type PsOpts ( last: ?int, latest: ?bool, noTrunc: ?bool, - pod: ?bool, - quiet: ?bool, - sort: ?string, - sync: ?bool + pod: ?bool, + quiet: ?bool, + size: ?bool, + sort: ?string, + sync: ?bool ) type PsContainer ( diff --git a/completions/bash/podman b/completions/bash/podman index 0703029ea..969f1054e 100644 --- a/completions/bash/podman +++ b/completions/bash/podman @@ -758,6 +758,7 @@ _podman_container_checkpoint() { -R --leave-running --tcp-established + --ignore-rootfs " case "$prev" in -e|--export) @@ -870,6 +871,7 @@ _podman_container_restore() { -l --latest --tcp-established + --ignore-rootfs " case "$prev" in -i|--import) diff --git a/contrib/cirrus/notice_branch_failure.sh b/contrib/cirrus/notice_branch_failure.sh index 423231dfd..f030c12e5 100755 --- a/contrib/cirrus/notice_branch_failure.sh +++ b/contrib/cirrus/notice_branch_failure.sh @@ -12,7 +12,7 @@ NOR="$(echo -n -e '\x0f')" if [[ "$CIRRUS_BRANCH" = "$DEST_BRANCH" ]] then BURL="https://cirrus-ci.com/build/$CIRRUS_BUILD_ID" - ircmsg "${RED}[Action Recommended]: ${NOR}Post-merge testing on ${RED}$CIRRUS_BRANCH failed${NOR} in $CIRRUS_TASK_NAME on $(OS_RELEASE_ID)-$(OS_RELEASE_VER): $BURL. Please investigate, and re-run if appropriate." + ircmsg "${RED}[Action Recommended]: ${NOR}Post-merge testing on ${RED}$CIRRUS_BRANCH failed${NOR} in $CIRRUS_TASK_NAME on ${OS_RELEASE_ID}-${OS_RELEASE_VER}: $BURL. Please investigate, and re-run if appropriate." fi # This script assumed to be executed on failure diff --git a/docs/podman-container-checkpoint.1.md b/docs/podman-container-checkpoint.1.md index 90c3919a9..034d338bb 100644 --- a/docs/podman-container-checkpoint.1.md +++ b/docs/podman-container-checkpoint.1.md @@ -42,7 +42,15 @@ connections. Export the checkpoint to a tar.gz file. The exported checkpoint can be used to import the container on another system and thus enabling container live -migration. +migration. This checkpoint archive also includes all changes to the container's +root file-system, if not explicitly disabled using **--ignore-rootfs** + +**--ignore-rootfs** + +This only works in combination with **--export, -e**. If a checkpoint is +exported to a tar.gz file it is possible with the help of **--ignore-rootfs** +to explicitly disable including changes to the root file-system into +the checkpoint archive file. ## EXAMPLE diff --git a/docs/podman-container-restore.1.md b/docs/podman-container-restore.1.md index c96a37f80..544a096d8 100644 --- a/docs/podman-container-restore.1.md +++ b/docs/podman-container-restore.1.md @@ -60,6 +60,13 @@ address to the container it was using before checkpointing as each IP address ca be used once and the restored container will have another IP address. This also means that **--name, -n** cannot be used in combination with **--tcp-established**. +**--ignore-rootfs** + +This is only available in combination with **--import, -i**. If a container is restored +from a checkpoint tar.gz file it is possible that it also contains all root file-system +changes. With **--ignore-rootfs** it is possible to explicitly disable applying these +root file-system changes to the restored container. + ## EXAMPLE podman container restore mywebserver diff --git a/docs/podman-create.1.md b/docs/podman-create.1.md index 89f146670..4008b64e6 100644 --- a/docs/podman-create.1.md +++ b/docs/podman-create.1.md @@ -63,6 +63,14 @@ Add Linux capabilities Drop Linux capabilities +**--cgroupns**=*mode* + +Set the cgroup namespace mode for the container, by default **host** is used. + **host**: use the host's cgroup namespace inside the container. + **container:<NAME|ID>**: join the namespace of the specified container. + **private**: create a new cgroup namespace. + **ns:<PATH>**: join the namespace at the specified path. + **--cgroup-parent**=*path* Path to cgroups under which the cgroup for the container will be created. If the path is not absolute, the path is considered to be relative to the cgroups path of the init process. Cgroups will be created if they do not already exist. @@ -206,7 +214,7 @@ Limit write rate (IO per second) to a device (e.g. --device-write-iops=/dev/sda: **--dns**=*dns* -Set custom DNS servers +Set custom DNS servers. Invalid if using **--dns** and **--network** that is set to 'none' or 'container:<name|id>'. This option can be used to override the DNS configuration passed to the container. Typically this is necessary when the @@ -218,11 +226,11 @@ The **/etc/resolv.conf** file in the image will be used without changes. **--dns-option**=*option* -Set custom DNS options +Set custom DNS options. Invalid if using **--dns-option** and **--network** that is set to 'none' or 'container:<name|id>'. **--dns-search**=*domain* -Set custom DNS search domains (Use --dns-search=. if you don't wish to set the search domain) +Set custom DNS search domains. Invalid if using **--dns-search** and **--network** that is set to 'none' or 'container:<name|id>'. (Use --dns-search=. if you don't wish to set the search domain) **--entrypoint**=*"command"* | *'["command", "arg1", ...]'* @@ -491,7 +499,7 @@ This works for both background and foreground containers. **--network**, **--net**="*bridge*" -Set the Network mode for the container +Set the Network mode for the container. Invalid if using **--dns**, **--dns-option**, or **--dns-search** with **--network** that is set to 'none' or 'container:<name|id>'. 'bridge': create a network stack on the default bridge 'none': no networking 'container:<name|id>': reuse another container's network stack diff --git a/docs/podman-run.1.md b/docs/podman-run.1.md index ebf774b24..6d95d6045 100644 --- a/docs/podman-run.1.md +++ b/docs/podman-run.1.md @@ -77,6 +77,14 @@ Add Linux capabilities Drop Linux capabilities +**--cgroupns**=*mode* + +Set the cgroup namespace mode for the container, by default **host** is used. + **host**: use the host's cgroup namespace inside the container. + **container:<NAME|ID>**: join the namespace of the specified container. + **private**: create a new cgroup namespace. + **ns:<PATH>**: join the namespace at the specified path. + **--cgroup-parent**=*cgroup* Path to cgroups under which the cgroup for the container will be created. If the path is not absolute, the path is considered to be relative to the cgroups path of the init process. Cgroups will be created if they do not already exist. @@ -212,7 +220,7 @@ Limit write rate (IO per second) to a device (e.g. --device-write-iops=/dev/sda: **--dns**=*dns* -Set custom DNS servers +Set custom DNS servers. Invalid if using **--dns** with **--network** that is set to 'none' or 'container:<name|id>'. This option can be used to override the DNS configuration passed to the container. Typically this is necessary when the @@ -224,11 +232,11 @@ The **/etc/resolv.conf** file in the image will be used without changes. **--dns-option**=*option* -Set custom DNS options +Set custom DNS options. Invalid if using **--dns-option** with **--network** that is set to 'none' or 'container:<name|id>'. **--dns-search**=*domain* -Set custom DNS search domains (Use --dns-search=. if you don't wish to set the search domain) +Set custom DNS search domains. Invalid if using **--dns-search** and **--network** that is set to 'none' or 'container:<name|id>'. (Use --dns-search=. if you don't wish to set the search domain) **--entrypoint**=*"command"* | *'["command", "arg1", ...]'* @@ -502,9 +510,9 @@ to the container with **--name** then it will generate a random string name. The name is useful any place you need to identify a container. This works for both background and foreground containers. -**--network**, **--net**=*mode* +**--network**, **--net**=*node* -Set the Network mode for the container: +Set the Network mode for the container. Invalid if using **--dns**, **--dns-option**, or **--dns-search** with **--network** that is set to 'none' or 'container:<name|id>'. - `bridge`: create a network stack on the default bridge - `none`: no networking - `container:<name|id>`: reuse another container's network stack diff --git a/libpod/container_api.go b/libpod/container_api.go index 3577b8e8c..6f530f75f 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -305,6 +305,11 @@ func (c *Container) Exec(tty, privileged bool, env, cmd []string, user, workDir if err != nil { if exited { // If the runtime exited, propagate the error we got from the process. + // We need to remove PID files to ensure no memory leaks + if err2 := os.Remove(pidFile); err2 != nil { + logrus.Errorf("Error removing exit file for container %s exec session %s: %v", c.ID(), sessionID, err2) + } + return err } return errors.Wrapf(err, "timed out waiting for runtime to create pidfile for exec session in container %s", c.ID()) @@ -312,6 +317,10 @@ func (c *Container) Exec(tty, privileged bool, env, cmd []string, user, workDir // Pidfile exists, read it contents, err := ioutil.ReadFile(pidFile) + // We need to remove PID files to ensure no memory leaks + if err2 := os.Remove(pidFile); err2 != nil { + logrus.Errorf("Error removing exit file for container %s exec session %s: %v", c.ID(), sessionID, err2) + } if err != nil { // We don't know the PID of the exec session // However, it may still be alive @@ -792,15 +801,16 @@ type ContainerCheckpointOptions struct { // TCPEstablished tells the API to checkpoint a container // even if it contains established TCP connections TCPEstablished bool - // Export tells the API to write the checkpoint image to - // the filename set in TargetFile - // Import tells the API to read the checkpoint image from - // the filename set in TargetFile + // TargetFile tells the API to read (or write) the checkpoint image + // from (or to) the filename set in TargetFile TargetFile string // Name tells the API that during restore from an exported // checkpoint archive a new name should be used for the // restored container Name string + // IgnoreRootfs tells the API to not export changes to + // the container's root file-system (or to not import) + IgnoreRootfs bool } // Checkpoint checkpoints a container diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index aa477611f..399220b9a 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -510,21 +510,44 @@ func (c *Container) addNamespaceContainer(g *generate.Generator, ns LinuxNS, ctr return nil } -func (c *Container) exportCheckpoint(dest string) (err error) { +func (c *Container) exportCheckpoint(dest string, ignoreRootfs bool) (err error) { if (len(c.config.NamedVolumes) > 0) || (len(c.Dependencies()) > 0) { return errors.Errorf("Cannot export checkpoints of containers with named volumes or dependencies") } logrus.Debugf("Exporting checkpoint image of container %q to %q", c.ID(), dest) + + includeFiles := []string{ + "checkpoint", + "artifacts", + "ctr.log", + "config.dump", + "spec.dump", + "network.status"} + + // Get root file-system changes included in the checkpoint archive + rootfsDiffPath := filepath.Join(c.bundlePath(), "rootfs-diff.tar") + if !ignoreRootfs { + rootfsDiffFile, err := os.Create(rootfsDiffPath) + if err != nil { + return errors.Wrapf(err, "error creating root file-system diff file %q", rootfsDiffPath) + } + tarStream, err := c.runtime.GetDiffTarStream("", c.ID()) + if err != nil { + return errors.Wrapf(err, "error exporting root file-system diff to %q", rootfsDiffPath) + } + _, err = io.Copy(rootfsDiffFile, tarStream) + if err != nil { + return errors.Wrapf(err, "error exporting root file-system diff to %q", rootfsDiffPath) + } + tarStream.Close() + rootfsDiffFile.Close() + includeFiles = append(includeFiles, "rootfs-diff.tar") + } + input, err := archive.TarWithOptions(c.bundlePath(), &archive.TarOptions{ Compression: archive.Gzip, IncludeSourceDir: true, - IncludeFiles: []string{ - "checkpoint", - "artifacts", - "ctr.log", - "config.dump", - "spec.dump", - "network.status"}, + IncludeFiles: includeFiles, }) if err != nil { @@ -546,6 +569,8 @@ func (c *Container) exportCheckpoint(dest string) (err error) { return err } + os.Remove(rootfsDiffPath) + return nil } @@ -605,7 +630,7 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO } if options.TargetFile != "" { - if err = c.exportCheckpoint(options.TargetFile); err != nil { + if err = c.exportCheckpoint(options.TargetFile, options.IgnoreRootfs); err != nil { return err } } @@ -792,6 +817,23 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti if err := c.saveSpec(g.Spec()); err != nil { return err } + + // Before actually restarting the container, apply the root file-system changes + if !options.IgnoreRootfs { + rootfsDiffPath := filepath.Join(c.bundlePath(), "rootfs-diff.tar") + if _, err := os.Stat(rootfsDiffPath); err == nil { + // Only do this if a rootfs-diff.tar actually exists + rootfsDiffFile, err := os.Open(rootfsDiffPath) + if err != nil { + return errors.Wrapf(err, "Failed to open root file-system diff file %s", rootfsDiffPath) + } + if err := c.runtime.ApplyDiffTarStream(c.ID(), rootfsDiffFile); err != nil { + return errors.Wrapf(err, "Failed to apply root file-system diff file %s", rootfsDiffPath) + } + rootfsDiffFile.Close() + } + } + if err := c.ociRuntime.createContainer(c, c.config.CgroupParent, &options); err != nil { return err } @@ -809,7 +851,7 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti if err != nil { logrus.Debugf("Non-fatal: removal of checkpoint directory (%s) failed: %v", c.CheckpointPath(), err) } - cleanup := [...]string{"restore.log", "dump.log", "stats-dump", "stats-restore", "network.status"} + cleanup := [...]string{"restore.log", "dump.log", "stats-dump", "stats-restore", "network.status", "rootfs-diff.tar"} for _, del := range cleanup { file := filepath.Join(c.bundlePath(), del) err = os.Remove(file) diff --git a/libpod/diff.go b/libpod/diff.go index f348e6b81..925bda927 100644 --- a/libpod/diff.go +++ b/libpod/diff.go @@ -1,6 +1,9 @@ package libpod import ( + "archive/tar" + "io" + "github.com/containers/libpod/libpod/layers" "github.com/containers/storage/pkg/archive" "github.com/pkg/errors" @@ -44,6 +47,59 @@ func (r *Runtime) GetDiff(from, to string) ([]archive.Change, error) { return rchanges, err } +// skipFileInTarAchive is an archive.TarModifierFunc function +// which tells archive.ReplaceFileTarWrapper to skip files +// from the tarstream +func skipFileInTarAchive(path string, header *tar.Header, content io.Reader) (*tar.Header, []byte, error) { + return nil, nil, nil +} + +// GetDiffTarStream returns the differences between the two images, layers, or containers. +// It is the same functionality as GetDiff() except that it returns a tarstream +func (r *Runtime) GetDiffTarStream(from, to string) (io.ReadCloser, error) { + toLayer, err := r.getLayerID(to) + if err != nil { + return nil, err + } + fromLayer := "" + if from != "" { + fromLayer, err = r.getLayerID(from) + if err != nil { + return nil, err + } + } + rc, err := r.store.Diff(fromLayer, toLayer, nil) + if err != nil { + return nil, err + } + + // Skip files in the tar archive which are listed + // in containerMounts map. Just as in the GetDiff() + // function from above + filterMap := make(map[string]archive.TarModifierFunc) + for key := range containerMounts { + filterMap[key[1:]] = skipFileInTarAchive + // In the tarstream directories always include a trailing '/'. + // For simplicity this duplicates every entry from + // containerMounts with a trailing '/', as containerMounts + // does not use trailing '/' for directories. + filterMap[key[1:]+"/"] = skipFileInTarAchive + } + + filteredTarStream := archive.ReplaceFileTarWrapper(rc, filterMap) + return filteredTarStream, nil +} + +// ApplyDiffTarStream applies the changes stored in 'diff' to the layer 'to' +func (r *Runtime) ApplyDiffTarStream(to string, diff io.Reader) error { + toLayer, err := r.getLayerID(to) + if err != nil { + return err + } + _, err = r.store.ApplyDiff(toLayer, diff) + return err +} + // GetLayerID gets a full layer id given a full or partial id // If the id matches a container or image, the id of the top layer is returned // If the id matches a layer, the top layer id is returned diff --git a/pkg/adapter/checkpoint_restore.go b/pkg/adapter/checkpoint_restore.go index ec1464fb1..533e9e3a2 100644 --- a/pkg/adapter/checkpoint_restore.go +++ b/pkg/adapter/checkpoint_restore.go @@ -58,6 +58,7 @@ func crImportCheckpoint(ctx context.Context, runtime *libpod.Runtime, input stri "checkpoint", "artifacts", "ctr.log", + "rootfs-diff.tar", "network.status", }, } diff --git a/pkg/adapter/containers.go b/pkg/adapter/containers.go index 86e9c0266..7e2384e18 100644 --- a/pkg/adapter/containers.go +++ b/pkg/adapter/containers.go @@ -524,6 +524,10 @@ func (r *LocalRuntime) Checkpoint(c *cliconfig.CheckpointValues) error { KeepRunning: c.LeaveRunning, TCPEstablished: c.TcpEstablished, TargetFile: c.Export, + IgnoreRootfs: c.IgnoreRootfs, + } + if c.Export == "" && c.IgnoreRootfs { + return errors.Errorf("--ignore-rootfs can only be used with --export") } if c.All { containers, err = r.Runtime.GetRunningContainers() @@ -560,6 +564,7 @@ func (r *LocalRuntime) Restore(ctx context.Context, c *cliconfig.RestoreValues) TCPEstablished: c.TcpEstablished, TargetFile: c.Import, Name: c.Name, + IgnoreRootfs: c.IgnoreRootfs, } filterFuncs = append(filterFuncs, func(c *libpod.Container) bool { diff --git a/pkg/adapter/containers_remote.go b/pkg/adapter/containers_remote.go index 5836d0788..fc23381a4 100644 --- a/pkg/adapter/containers_remote.go +++ b/pkg/adapter/containers_remote.go @@ -493,6 +493,7 @@ func (r *LocalRuntime) Ps(c *cliconfig.PsValues, opts shared.PsOptions) ([]share NoTrunc: &c.NoTrunct, Pod: &c.Pod, Quiet: &c.Quiet, + Size: &c.Size, Sort: &c.Sort, Sync: &c.Sync, } @@ -668,6 +669,9 @@ func (r *LocalRuntime) Checkpoint(c *cliconfig.CheckpointValues) error { if c.Export != "" { return errors.New("the remote client does not support exporting checkpoints") } + if c.IgnoreRootfs { + return errors.New("the remote client does not support --ignore-rootfs") + } var lastError error ids, err := iopodman.GetContainersByContext().Call(r.Conn, c.All, c.Latest, c.InputArgs) @@ -708,6 +712,9 @@ func (r *LocalRuntime) Restore(ctx context.Context, c *cliconfig.RestoreValues) if c.Import != "" { return errors.New("the remote client does not support importing checkpoints") } + if c.IgnoreRootfs { + return errors.New("the remote client does not support --ignore-rootfs") + } var lastError error ids, err := iopodman.GetContainersByContext().Call(r.Conn, c.All, c.Latest, c.InputArgs) diff --git a/pkg/namespaces/namespaces.go b/pkg/namespaces/namespaces.go index ec9276344..7ed95bd0f 100644 --- a/pkg/namespaces/namespaces.go +++ b/pkg/namespaces/namespaces.go @@ -4,6 +4,63 @@ import ( "strings" ) +// CgroupMode represents cgroup mode in the container. +type CgroupMode string + +// IsHost indicates whether the container uses the host's cgroup. +func (n CgroupMode) IsHost() bool { + return n == "host" +} + +// IsNS indicates a cgroup namespace passed in by path (ns:<path>) +func (n CgroupMode) IsNS() bool { + return strings.HasPrefix(string(n), "ns:") +} + +// NS gets the path associated with a ns:<path> cgroup ns +func (n CgroupMode) NS() string { + parts := strings.SplitN(string(n), ":", 2) + if len(parts) > 1 { + return parts[1] + } + return "" +} + +// IsContainer indicates whether the container uses a new cgroup namespace. +func (n CgroupMode) IsContainer() bool { + parts := strings.SplitN(string(n), ":", 2) + return len(parts) > 1 && parts[0] == "container" +} + +// Container returns the name of the container whose cgroup namespace is going to be used. +func (n CgroupMode) Container() string { + parts := strings.SplitN(string(n), ":", 2) + if len(parts) > 1 { + return parts[1] + } + return "" +} + +// IsPrivate indicates whether the container uses the a private cgroup. +func (n CgroupMode) IsPrivate() bool { + return n == "private" +} + +// Valid indicates whether the Cgroup namespace is valid. +func (n CgroupMode) Valid() bool { + parts := strings.Split(string(n), ":") + switch mode := parts[0]; mode { + case "", "host", "private", "ns": + case "container": + if len(parts) != 2 || parts[1] == "" { + return false + } + default: + return false + } + return true +} + // UsernsMode represents userns mode in the container. type UsernsMode string diff --git a/pkg/spec/config_linux.go b/pkg/spec/config_linux.go index 9f6a4a058..a84e9a72f 100644 --- a/pkg/spec/config_linux.go +++ b/pkg/spec/config_linux.go @@ -8,6 +8,7 @@ import ( "path/filepath" "strings" + "github.com/containers/libpod/pkg/rootless" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/devices" spec "github.com/opencontainers/runtime-spec/specs-go" @@ -118,8 +119,44 @@ func (c *CreateConfig) addPrivilegedDevices(g *generate.Generator) error { return err } g.ClearLinuxDevices() - for _, d := range hostDevices { - g.AddDevice(Device(d)) + + if rootless.IsRootless() { + mounts := make(map[string]interface{}) + for _, m := range g.Mounts() { + mounts[m.Destination] = true + } + newMounts := []spec.Mount{} + for _, d := range hostDevices { + devMnt := spec.Mount{ + Destination: d.Path, + Type: TypeBind, + Source: d.Path, + Options: []string{"slave", "nosuid", "noexec", "rw", "rbind"}, + } + if d.Path == "/dev/ptmx" || strings.HasPrefix(d.Path, "/dev/tty") { + continue + } + if _, found := mounts[d.Path]; found { + continue + } + st, err := os.Stat(d.Path) + if err != nil { + if err == unix.EPERM { + continue + } + return errors.Wrapf(err, "stat %s", d.Path) + } + // Skip devices that the user has not access to. + if st.Mode()&0007 == 0 { + continue + } + newMounts = append(newMounts, devMnt) + } + g.Config.Mounts = append(newMounts, g.Config.Mounts...) + } else { + for _, d := range hostDevices { + g.AddDevice(Device(d)) + } } // Add resources device - need to clear the existing one first. diff --git a/pkg/spec/createconfig.go b/pkg/spec/createconfig.go index 0042ed401..1fb1f829b 100644 --- a/pkg/spec/createconfig.go +++ b/pkg/spec/createconfig.go @@ -63,6 +63,7 @@ type CreateConfig struct { CapDrop []string // cap-drop CidFile string ConmonPidFile string + Cgroupns string CgroupParent string // cgroup-parent Command []string Detach bool // detach @@ -101,6 +102,7 @@ type CreateConfig struct { NetworkAlias []string //network-alias PidMode namespaces.PidMode //pid Pod string //pod + CgroupMode namespaces.CgroupMode //cgroup PortBindings nat.PortMap Privileged bool //privileged Publish []string //publish @@ -268,6 +270,23 @@ func (c *CreateConfig) getContainerCreateOptions(runtime *libpod.Runtime, pod *l options = append(options, libpod.WithNetNS(portBindings, postConfigureNetNS, string(c.NetMode), networks)) } + if c.CgroupMode.IsNS() { + ns := c.CgroupMode.NS() + if ns == "" { + return nil, errors.Errorf("invalid empty user-defined network namespace") + } + _, err := os.Stat(ns) + if err != nil { + return nil, err + } + } else if c.CgroupMode.IsContainer() { + connectedCtr, err := runtime.LookupContainer(c.CgroupMode.Container()) + if err != nil { + return nil, errors.Wrapf(err, "container %q not found", c.CgroupMode.Container()) + } + options = append(options, libpod.WithCgroupNSFrom(connectedCtr)) + } + if c.PidMode.IsContainer() { connectedCtr, err := runtime.LookupContainer(c.PidMode.Container()) if err != nil { diff --git a/pkg/spec/spec.go b/pkg/spec/spec.go index 41054633f..824c99025 100644 --- a/pkg/spec/spec.go +++ b/pkg/spec/spec.go @@ -264,10 +264,8 @@ func (config *CreateConfig) createConfigToOCISpec(runtime *libpod.Runtime, userM // If privileged, we need to add all the host devices to the // spec. We do not add the user provided ones because we are // already adding them all. - if !rootless.IsRootless() { - if err := config.AddPrivilegedDevices(&g); err != nil { - return nil, err - } + if err := config.AddPrivilegedDevices(&g); err != nil { + return nil, err } } else { for _, devicePath := range config.Devices { @@ -325,6 +323,10 @@ func (config *CreateConfig) createConfigToOCISpec(runtime *libpod.Runtime, userM if err := addIpcNS(config, &g); err != nil { return nil, err } + + if err := addCgroupNS(config, &g); err != nil { + return nil, err + } configSpec := g.Config // HANDLE CAPABILITIES @@ -622,6 +624,23 @@ func addIpcNS(config *CreateConfig, g *generate.Generator) error { return nil } +func addCgroupNS(config *CreateConfig, g *generate.Generator) error { + cgroupMode := config.CgroupMode + if cgroupMode.IsNS() { + return g.AddOrReplaceLinuxNamespace(string(spec.CgroupNamespace), NS(string(cgroupMode))) + } + if cgroupMode.IsHost() { + return g.RemoveLinuxNamespace(spec.CgroupNamespace) + } + if cgroupMode.IsPrivate() { + return g.AddOrReplaceLinuxNamespace(spec.CgroupNamespace, "") + } + if cgroupMode.IsContainer() { + logrus.Debug("Using container cgroup mode") + } + return nil +} + func addRlimits(config *CreateConfig, g *generate.Generator) error { var ( kernelMax uint64 = 1048576 diff --git a/pkg/varlinkapi/containers.go b/pkg/varlinkapi/containers.go index 700e02b0c..6f6909fac 100644 --- a/pkg/varlinkapi/containers.go +++ b/pkg/varlinkapi/containers.go @@ -65,32 +65,34 @@ func (i *LibpodAPI) Ps(call iopodman.VarlinkCall, opts iopodman.PsOpts) error { for _, ctr := range psContainerOutputs { container := iopodman.PsContainer{ - Id: ctr.ID, - Image: ctr.Image, - Command: ctr.Command, - Created: ctr.Created, - Ports: ctr.Ports, - Names: ctr.Names, - IsInfra: ctr.IsInfra, - Status: ctr.Status, - State: ctr.State.String(), - PidNum: int64(ctr.Pid), - RootFsSize: ctr.Size.RootFsSize, - RwSize: ctr.Size.RwSize, - Pod: ctr.Pod, - CreatedAt: ctr.CreatedAt.Format(time.RFC3339Nano), - ExitedAt: ctr.ExitedAt.Format(time.RFC3339Nano), - StartedAt: ctr.StartedAt.Format(time.RFC3339Nano), - Labels: ctr.Labels, - NsPid: ctr.PID, - Cgroup: ctr.Cgroup, - Ipc: ctr.Cgroup, - Mnt: ctr.MNT, - Net: ctr.NET, - PidNs: ctr.PIDNS, - User: ctr.User, - Uts: ctr.UTS, - Mounts: ctr.Mounts, + Id: ctr.ID, + Image: ctr.Image, + Command: ctr.Command, + Created: ctr.Created, + Ports: ctr.Ports, + Names: ctr.Names, + IsInfra: ctr.IsInfra, + Status: ctr.Status, + State: ctr.State.String(), + PidNum: int64(ctr.Pid), + Pod: ctr.Pod, + CreatedAt: ctr.CreatedAt.Format(time.RFC3339Nano), + ExitedAt: ctr.ExitedAt.Format(time.RFC3339Nano), + StartedAt: ctr.StartedAt.Format(time.RFC3339Nano), + Labels: ctr.Labels, + NsPid: ctr.PID, + Cgroup: ctr.Cgroup, + Ipc: ctr.Cgroup, + Mnt: ctr.MNT, + Net: ctr.NET, + PidNs: ctr.PIDNS, + User: ctr.User, + Uts: ctr.UTS, + Mounts: ctr.Mounts, + } + if ctr.Size != nil { + container.RootFsSize = ctr.Size.RootFsSize + container.RwSize = ctr.Size.RwSize } containers = append(containers, container) } diff --git a/pkg/varlinkapi/util.go b/pkg/varlinkapi/util.go index e8f74e6aa..d3a41f7ab 100644 --- a/pkg/varlinkapi/util.go +++ b/pkg/varlinkapi/util.go @@ -191,7 +191,7 @@ func makePsOpts(inOpts iopodman.PsOpts) shared.PsOptions { Latest: derefBool(inOpts.Latest), NoTrunc: derefBool(inOpts.NoTrunc), Pod: derefBool(inOpts.Pod), - Size: true, + Size: derefBool(inOpts.Size), Sort: derefString(inOpts.Sort), Namespace: true, Sync: derefBool(inOpts.Sync), diff --git a/test/e2e/checkpoint_test.go b/test/e2e/checkpoint_test.go index 65daf5e94..b77c48c8e 100644 --- a/test/e2e/checkpoint_test.go +++ b/test/e2e/checkpoint_test.go @@ -416,6 +416,130 @@ var _ = Describe("Podman checkpoint", func() { os.Remove(fileName) }) + It("podman checkpoint and restore container with root file-system changes", func() { + // Start the container + localRunString := getRunString([]string{"--rm", ALPINE, "top"}) + session := podmanTest.Podman(localRunString) + session.WaitWithDefaultTimeout() + Expect(session.ExitCode()).To(Equal(0)) + Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1)) + cid := session.OutputToString() + fileName := "/tmp/checkpoint-" + cid + ".tar.gz" + + // Change the container's root file-system + result := podmanTest.Podman([]string{"exec", "-l", "/bin/sh", "-c", "echo test" + cid + "test > /test.output"}) + result.WaitWithDefaultTimeout() + Expect(result.ExitCode()).To(Equal(0)) + + // Checkpoint the container + result = podmanTest.Podman([]string{"container", "checkpoint", "-l", "-e", fileName}) + result.WaitWithDefaultTimeout() + + Expect(result.ExitCode()).To(Equal(0)) + Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0)) + Expect(podmanTest.NumberOfContainers()).To(Equal(0)) + + // Restore the container + result = podmanTest.Podman([]string{"container", "restore", "-i", fileName}) + result.WaitWithDefaultTimeout() + + Expect(result.ExitCode()).To(Equal(0)) + Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1)) + Expect(podmanTest.NumberOfContainers()).To(Equal(1)) + Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Up")) + + // Verify the changes to the container's root file-system + result = podmanTest.Podman([]string{"exec", "-l", "cat", "/test.output"}) + result.WaitWithDefaultTimeout() + Expect(result.ExitCode()).To(Equal(0)) + Expect(result.OutputToString()).To(ContainSubstring("test" + cid + "test")) + + // Remove exported checkpoint + os.Remove(fileName) + }) + It("podman checkpoint and restore container with root file-system changes using --ignore-rootfs during restore", func() { + // Start the container + localRunString := getRunString([]string{"--rm", ALPINE, "top"}) + session := podmanTest.Podman(localRunString) + session.WaitWithDefaultTimeout() + Expect(session.ExitCode()).To(Equal(0)) + Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1)) + cid := session.OutputToString() + fileName := "/tmp/checkpoint-" + cid + ".tar.gz" + + // Change the container's root file-system + result := podmanTest.Podman([]string{"exec", "-l", "/bin/sh", "-c", "echo test" + cid + "test > /test.output"}) + result.WaitWithDefaultTimeout() + Expect(result.ExitCode()).To(Equal(0)) + + // Checkpoint the container + result = podmanTest.Podman([]string{"container", "checkpoint", "-l", "-e", fileName}) + result.WaitWithDefaultTimeout() + + Expect(result.ExitCode()).To(Equal(0)) + Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0)) + Expect(podmanTest.NumberOfContainers()).To(Equal(0)) + + // Restore the container + result = podmanTest.Podman([]string{"container", "restore", "--ignore-rootfs", "-i", fileName}) + result.WaitWithDefaultTimeout() + + Expect(result.ExitCode()).To(Equal(0)) + Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1)) + Expect(podmanTest.NumberOfContainers()).To(Equal(1)) + Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Up")) + + // Verify the changes to the container's root file-system + result = podmanTest.Podman([]string{"exec", "-l", "cat", "/test.output"}) + result.WaitWithDefaultTimeout() + Expect(result.ExitCode()).To(Equal(1)) + Expect(result.ErrorToString()).To(ContainSubstring("cat: can't open '/test.output': No such file or directory")) + + // Remove exported checkpoint + os.Remove(fileName) + }) + It("podman checkpoint and restore container with root file-system changes using --ignore-rootfs during checkpoint", func() { + // Start the container + localRunString := getRunString([]string{"--rm", ALPINE, "top"}) + session := podmanTest.Podman(localRunString) + session.WaitWithDefaultTimeout() + Expect(session.ExitCode()).To(Equal(0)) + Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1)) + cid := session.OutputToString() + fileName := "/tmp/checkpoint-" + cid + ".tar.gz" + + // Change the container's root file-system + result := podmanTest.Podman([]string{"exec", "-l", "/bin/sh", "-c", "echo test" + cid + "test > /test.output"}) + result.WaitWithDefaultTimeout() + Expect(result.ExitCode()).To(Equal(0)) + + // Checkpoint the container + result = podmanTest.Podman([]string{"container", "checkpoint", "--ignore-rootfs", "-l", "-e", fileName}) + result.WaitWithDefaultTimeout() + + Expect(result.ExitCode()).To(Equal(0)) + Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0)) + Expect(podmanTest.NumberOfContainers()).To(Equal(0)) + + // Restore the container + result = podmanTest.Podman([]string{"container", "restore", "-i", fileName}) + result.WaitWithDefaultTimeout() + + Expect(result.ExitCode()).To(Equal(0)) + Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1)) + Expect(podmanTest.NumberOfContainers()).To(Equal(1)) + Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Up")) + + // Verify the changes to the container's root file-system + result = podmanTest.Podman([]string{"exec", "-l", "cat", "/test.output"}) + result.WaitWithDefaultTimeout() + Expect(result.ExitCode()).To(Equal(1)) + Expect(result.ErrorToString()).To(ContainSubstring("cat: can't open '/test.output': No such file or directory")) + + // Remove exported checkpoint + os.Remove(fileName) + }) + It("podman checkpoint and run exec in restored container", func() { // Start the container localRunString := getRunString([]string{"--rm", ALPINE, "top"}) diff --git a/test/e2e/run_device_test.go b/test/e2e/run_device_test.go index 2e537a9f9..cf7ce9cdf 100644 --- a/test/e2e/run_device_test.go +++ b/test/e2e/run_device_test.go @@ -85,4 +85,13 @@ var _ = Describe("Podman run device", func() { session.WaitWithDefaultTimeout() Expect(session.ExitCode()).To(Equal(0)) }) + + It("podman run device host device with --privileged", func() { + if _, err := os.Stat("/dev/kvm"); err != nil { + Skip("/dev/kvm not available") + } + session := podmanTest.Podman([]string{"run", "--privileged", ALPINE, "ls", "/dev/kvm"}) + session.WaitWithDefaultTimeout() + Expect(session.ExitCode()).To(Equal(0)) + }) }) diff --git a/test/e2e/run_dns_test.go b/test/e2e/run_dns_test.go index f1196ff38..081fab3fd 100644 --- a/test/e2e/run_dns_test.go +++ b/test/e2e/run_dns_test.go @@ -94,4 +94,22 @@ var _ = Describe("Podman run dns", func() { Expect(session.ExitCode()).To(Equal(0)) Expect(session.LineInOutputContains("foobar")).To(BeTrue()) }) + + It("podman run mutually excludes --dns* and --network", func() { + session := podmanTest.Podman([]string{"run", "--dns=1.2.3.4", "--network", "container:ALPINE", ALPINE}) + session.WaitWithDefaultTimeout() + Expect(session.ExitCode()).To(Not(Equal(0))) + + session = podmanTest.Podman([]string{"run", "--dns-opt=1.2.3.4", "--network", "container:ALPINE", ALPINE}) + session.WaitWithDefaultTimeout() + Expect(session.ExitCode()).To(Not(Equal(0))) + + session = podmanTest.Podman([]string{"run", "--dns-search=foobar.com", "--network", "none", ALPINE}) + session.WaitWithDefaultTimeout() + Expect(session.ExitCode()).To(Not(Equal(0))) + + session = podmanTest.Podman([]string{"run", "--dns=1.2.3.4", "--network", "host", ALPINE}) + session.WaitWithDefaultTimeout() + Expect(session.ExitCode()).To((Equal(0))) + }) }) diff --git a/test/e2e/run_ns_test.go b/test/e2e/run_ns_test.go index 6ba0d1aba..e3e86fc66 100644 --- a/test/e2e/run_ns_test.go +++ b/test/e2e/run_ns_test.go @@ -51,6 +51,15 @@ var _ = Describe("Podman run ns", func() { Expect(session.ExitCode()).To(Not(Equal(0))) }) + It("podman run --cgroup private test", func() { + session := podmanTest.Podman([]string{"run", "--cgroupns=private", fedoraMinimal, "cat", "/proc/self/cgroup"}) + session.WaitWithDefaultTimeout() + Expect(session.ExitCode()).To(Equal(0)) + + output := session.OutputToString() + Expect(output).ToNot(ContainSubstring("slice")) + }) + It("podman run ipcns test", func() { setup := SystemExec("ls", []string{"--inode", "-d", "/dev/shm"}) Expect(setup.ExitCode()).To(Equal(0)) |