From 7e0a6a57ef0d35939a41a23f46747291593fa275 Mon Sep 17 00:00:00 2001
From: Matthew Heon <matthew.heon@pm.me>
Date: Wed, 10 Jul 2019 15:09:33 -0400
Subject: podman: fix memleak caused by renaming and not deleting  the exit
 file

If the container exit code needs to be retained, it cannot be retained
in tmpfs, because libpod runs in a memcg itself so it can't leave
traces with a daemon-less design.

This wasn't a memleak detectable by kmemleak for example. The kernel
never lost track of the memory and there was no erroneous refcounting
either. The reference count dependencies however are not easy to track
because when a refcount is increased, there's no way to tell who's
still holding the reference. In this case it was a single page of
tmpfs pagecache holding a refcount that kept pinned a whole hierarchy
of dying memcg, slab kmem, cgropups, unrechable kernfs nodes and the
respective dentries and inodes. Such a problem wouldn't happen if the
exit file was stored in a regular filesystem because the pagecache
could be reclaimed in such case under memory pressure. The tmpfs page
can be swapped out, but that's not enough to release the memcg with
CONFIG_MEMCG_SWAP_ENABLED=y.

No amount of more aggressive kernel slab shrinking could have solved
this. Not even assigning slab kmem of dying cgroups to alive cgroup
would fully solve this. The only way to free the memory of a dying
cgroup when a struct page still references it, would be to loop over
all "struct page" in the kernel to find which one is associated with
the dying cgroup which is a O(N) operation (where N is the number of
pages and can reach billions). Linking all the tmpfs pages to the
memcg would cost less during memcg offlining, but it would waste lots
of memory and CPU globally. So this can't be optimized in the kernel.

A cronjob running this command can act as workaround and will allow
all slab cache to be released, not just the single tmpfs pages.

    rm -f /run/libpod/exits/*

This patch solved the memleak with a reproducer, booting with
cgroup.memory=nokmem and with selinux disabled. The reason memcg kmem
and selinux were disabled for testing of this fix, is because kmem
greatly decreases the kernel effectiveness in reusing partial slab
objects. cgroup.memory=nokmem is strongly recommended at least for
workstation usage. selinux needs to be further analyzed because it
causes further slab allocations.

The upstream podman commit used for testing is
1fe2965e4f672674f7b66648e9973a0ed5434bb4 (v1.4.4).

The upstream kernel commit used for testing is
f16fea666898dbdd7812ce94068c76da3e3fcf1e (v5.2-rc6).

Reported-by: Michele Baldessari <michele@redhat.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>

<Applied with small tweaks to comments>
Signed-off-by: Matthew Heon <matthew.heon@pm.me>

<Further tweaks to cherry pick into 1.4.2>
Signed-off-by: Matthew Heon <matthew.heon@pm.me>
---
 libpod/container_internal.go | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

(limited to 'libpod')
diff --git a/libpod/container_internal.go b/libpod/container_internal.go
index 9245a8840..f6fe9cd0c 100644
--- a/libpod/container_internal.go
+++ b/libpod/container_internal.go
@@ -544,19 +544,15 @@ func (c *Container) removeConmonFiles() error {
 		return errors.Wrapf(err, "error removing container %s OOM file", c.ID())
 	}
 
-	// Instead of outright deleting the exit file, rename it (if it exists).
-	// We want to retain it so we can get the exit code of containers which
-	// are removed (at least until we have a workable events system)
+	// Remove the exit file so we don't leak memory in tmpfs
 	exitFile := filepath.Join(c.runtime.ociRuntime.exitsDir, c.ID())
-	oldExitFile := filepath.Join(c.runtime.ociRuntime.exitsDir, fmt.Sprintf("%s-old", c.ID()))
 	if _, err := os.Stat(exitFile); err != nil {
 		if !os.IsNotExist(err) {
 			return errors.Wrapf(err, "error running stat on container %s exit file", c.ID())
 		}
-	} else if err == nil {
-		// Rename should replace the old exit file (if it exists)
-		if err := os.Rename(exitFile, oldExitFile); err != nil {
-			return errors.Wrapf(err, "error renaming container %s exit file", c.ID())
+	} else {
+		if err := os.Remove(exitFile); err != nil {
+			return errors.Wrapf(err, "error removing container %s exit file", c.ID())
 		}
 	}
 
-- 
cgit v1.2.3-54-g00ecf


From c115f4eca0973a42e33dd87a82f5b9166e5b6331 Mon Sep 17 00:00:00 2001
From: baude <bbaude@redhat.com>
Date: Sun, 7 Jul 2019 08:09:48 -0500
Subject: get last container event

an internal change in libpod will soon required the ability to lookup
the last container event using the continer name or id and the type of
event.  this pr is in preperation for that need.

Signed-off-by: baude <bbaude@redhat.com>
---
 libpod/events.go               | 55 ++++++++++++++++++++++++++++++++++++++++++
 libpod/events/config.go        | 11 +++++++++
 libpod/events/events.go        |  2 ++
 libpod/events/journal_linux.go |  4 ++-
 4 files changed, 71 insertions(+), 1 deletion(-)

(limited to 'libpod')

diff --git a/libpod/events.go b/libpod/events.go
index 13bb5bdde..be21e510a 100644
--- a/libpod/events.go
+++ b/libpod/events.go
@@ -1,7 +1,10 @@
 package libpod
 
 import (
+	"fmt"
+
 	"github.com/containers/libpod/libpod/events"
+	"github.com/pkg/errors"
 	"github.com/sirupsen/logrus"
 )
 
@@ -79,3 +82,55 @@ func (r *Runtime) Events(options events.ReadOptions) error {
 	}
 	return eventer.Read(options)
 }
+
+// GetEvents reads the event log and returns events based on input filters
+func (r *Runtime) GetEvents(filters []string) ([]*events.Event, error) {
+	var (
+		logEvents []*events.Event
+		readErr   error
+	)
+	eventChannel := make(chan *events.Event)
+	options := events.ReadOptions{
+		EventChannel: eventChannel,
+		Filters:      filters,
+		FromStart:    true,
+		Stream:       false,
+	}
+	eventer, err := r.newEventer()
+	if err != nil {
+		return nil, err
+	}
+	go func() {
+		readErr = eventer.Read(options)
+	}()
+	if readErr != nil {
+		return nil, readErr
+	}
+	for e := range eventChannel {
+		logEvents = append(logEvents, e)
+	}
+	return logEvents, nil
+}
+
+// GetLastContainerEvent takes a container name or ID and an event status and returns
+// the last occurrence of the container event
+func (r *Runtime) GetLastContainerEvent(nameOrID string, containerEvent events.Status) (*events.Event, error) {
+	// check to make sure the event.Status is valid
+	if _, err := events.StringToStatus(containerEvent.String()); err != nil {
+		return nil, err
+	}
+	filters := []string{
+		fmt.Sprintf("container=%s", nameOrID),
+		fmt.Sprintf("event=%s", containerEvent),
+		"type=container",
+	}
+	containerEvents, err := r.GetEvents(filters)
+	if err != nil {
+		return nil, err
+	}
+	if len(containerEvents) < 1 {
+		return nil, errors.Wrapf(events.ErrEventNotFound, "%s not found", containerEvent.String())
+	}
+	// return the last element in the slice
+	return containerEvents[len(containerEvents)-1], nil
+}
diff --git a/libpod/events/config.go b/libpod/events/config.go
index 810988205..b9f01f3a5 100644
--- a/libpod/events/config.go
+++ b/libpod/events/config.go
@@ -2,6 +2,8 @@ package events
 
 import (
 	"time"
+
+	"github.com/pkg/errors"
 )
 
 // EventerType ...
@@ -158,3 +160,12 @@ const (
 
 // EventFilter for filtering events
 type EventFilter func(*Event) bool
+
+var (
+	// ErrEventTypeBlank indicates the event log found something done by podman
+	// but it isnt likely an event
+	ErrEventTypeBlank = errors.New("event type blank")
+
+	// ErrEventNotFound indicates that the event was not found in the event log
+	ErrEventNotFound = errors.New("unable to find event")
+)
diff --git a/libpod/events/events.go b/libpod/events/events.go
index 1ec79bcd7..2bebff162 100644
--- a/libpod/events/events.go
+++ b/libpod/events/events.go
@@ -95,6 +95,8 @@ func StringToType(name string) (Type, error) {
 		return System, nil
 	case Volume.String():
 		return Volume, nil
+	case "":
+		return "", ErrEventTypeBlank
 	}
 	return "", errors.Errorf("unknown event type %q", name)
 }
diff --git a/libpod/events/journal_linux.go b/libpod/events/journal_linux.go
index 78a630e9a..d5bce4334 100644
--- a/libpod/events/journal_linux.go
+++ b/libpod/events/journal_linux.go
@@ -101,7 +101,9 @@ func (e EventJournalD) Read(options ReadOptions) error {
 			// We can't decode this event.
 			// Don't fail hard - that would make events unusable.
 			// Instead, log and continue.
-			logrus.Errorf("Unable to decode event: %v", err)
+			if errors.Cause(err) != ErrEventTypeBlank {
+				logrus.Errorf("Unable to decode event: %v", err)
+			}
 			continue
 		}
 		include := true
-- 
cgit v1.2.3-54-g00ecf


From c528d7a622267fcee0f38f25c94198b23deed9dd Mon Sep 17 00:00:00 2001
From: Matthew Heon <matthew.heon@pm.me>
Date: Thu, 11 Jul 2019 13:32:42 -0400
Subject: Use file-based eventer for integration tests

This adds several top-level Podman flags for specifying different
events backend types, which are then used in CI. It resolves a
number of serious issues with events-based testing.

Signed-off-by: Matthew Heon <matthew.heon@pm.me>
---
 cmd/podman/cliconfig/config.go             |  1 +
 cmd/podman/libpodruntime/runtime.go        |  4 ++++
 cmd/podman/main_local.go                   |  1 +
 docs/libpod.conf.5.md                      |  2 +-
 docs/podman.1.md                           |  4 ++++
 libpod/container_api.go                    |  2 +-
 libpod/container_internal.go               |  8 +++++++-
 libpod/events/config.go                    |  2 ++
 libpod/events/events.go                    | 23 +++++++++++++++++++++--
 libpod/events/events_linux.go              |  4 +++-
 libpod/events/logfile.go                   |  2 +-
 libpod/options.go                          | 20 ++++++++++++++++++++
 libpod/runtime_ctr.go                      |  7 +------
 pkg/spec/createconfig.go                   |  3 +++
 test/e2e/common_test.go                    |  2 +-
 test/e2e/libpod_suite_remoteclient_test.go | 15 +++++++++++----
 test/e2e/libpod_suite_test.go              | 28 ++++++++++++++++++++--------
 test/e2e/run_test.go                       |  3 ++-
 test/utils/podmantest_test.go              |  2 +-
 test/utils/utils.go                        | 28 ++++++++++++++--------------
 test/utils/utils_suite_test.go             |  2 +-
 21 files changed, 120 insertions(+), 43 deletions(-)

(limited to 'libpod')

diff --git a/cmd/podman/cliconfig/config.go b/cmd/podman/cliconfig/config.go
index 4a4c839cc..cb809060a 100644
--- a/cmd/podman/cliconfig/config.go
+++ b/cmd/podman/cliconfig/config.go
@@ -16,6 +16,7 @@ type MainFlags struct {
 	CniConfigDir      string
 	ConmonPath        string
 	DefaultMountsFile string
+	EventsBackend     string
 	HooksDir          []string
 	MaxWorks          int
 	Namespace         string
diff --git a/cmd/podman/libpodruntime/runtime.go b/cmd/podman/libpodruntime/runtime.go
index d83a71250..5685e2c4b 100644
--- a/cmd/podman/libpodruntime/runtime.go
+++ b/cmd/podman/libpodruntime/runtime.go
@@ -109,6 +109,10 @@ func getRuntime(ctx context.Context, c *cliconfig.PodmanCommand, renumber bool,
 		options = append(options, libpod.WithNetworkCmdPath(c.GlobalFlags.NetworkCmdPath))
 	}
 
+	if c.Flags().Changed("events-backend") {
+		options = append(options, libpod.WithEventsLogger(c.GlobalFlags.EventsBackend))
+	}
+
 	if c.Flags().Changed("cgroup-manager") {
 		options = append(options, libpod.WithCgroupManager(c.GlobalFlags.CGroupManager))
 	} else {
diff --git a/cmd/podman/main_local.go b/cmd/podman/main_local.go
index 132f35ab5..e1f661fbe 100644
--- a/cmd/podman/main_local.go
+++ b/cmd/podman/main_local.go
@@ -35,6 +35,7 @@ func init() {
 	rootCmd.PersistentFlags().StringVar(&MainGlobalOpts.CniConfigDir, "cni-config-dir", "", "Path of the configuration directory for CNI networks")
 	rootCmd.PersistentFlags().StringVar(&MainGlobalOpts.DefaultMountsFile, "default-mounts-file", "", "Path to default mounts file")
 	rootCmd.PersistentFlags().MarkHidden("defaults-mount-file")
+	rootCmd.PersistentFlags().StringVar(&MainGlobalOpts.EventsBackend, "events-backend", "", "Events backend to use")
 	// Override default --help information of `--help` global flag
 	var dummyHelp bool
 	rootCmd.PersistentFlags().BoolVar(&dummyHelp, "help", false, "Help for podman")
diff --git a/docs/libpod.conf.5.md b/docs/libpod.conf.5.md
index cb08f0eb0..dad1dde90 100644
--- a/docs/libpod.conf.5.md
+++ b/docs/libpod.conf.5.md
@@ -96,7 +96,7 @@ libpod to manage containers.
   a slirp4netns network.  If "" is used then the binary is looked up using the $PATH environment variable.
 
 **events_logger**=""
-  Default method to use when logging events. Valid values are "journald" and "file".
+  Default method to use when logging events. Valid values are "file", "journald", and "none".
 
 ## FILES
   `/usr/share/containers/libpod.conf`, default libpod configuration path
diff --git a/docs/podman.1.md b/docs/podman.1.md
index c23075718..32b233971 100644
--- a/docs/podman.1.md
+++ b/docs/podman.1.md
@@ -33,6 +33,10 @@ CGroup manager to use for container cgroups. Supported values are cgroupfs or sy
 
 Path to where the cpu performance results should be written
 
+**--events-logger**=*type*
+
+Backend to use for storing events. Allowed values are **file**, **journald**, and **none**.
+
 **--hooks-dir**=*path*
 
 Each `*.json` file in the path configures a hook for Podman containers.  For more details on the syntax of the JSON files and the semantics of hook injection, see `oci-hooks(5)`.  Podman and libpod currently support both the 1.0.0 and 0.1.0 hook schemas, although the 0.1.0 schema is deprecated.
diff --git a/libpod/container_api.go b/libpod/container_api.go
index 0e877d04e..c52b27db3 100644
--- a/libpod/container_api.go
+++ b/libpod/container_api.go
@@ -187,7 +187,7 @@ func (c *Container) StopWithTimeout(timeout uint) error {
 		c.state.State == ContainerStateExited {
 		return ErrCtrStopped
 	}
-	defer c.newContainerEvent(events.Stop)
+
 	return c.stop(timeout)
 }
 
diff --git a/libpod/container_internal.go b/libpod/container_internal.go
index f6fe9cd0c..ab0ad6516 100644
--- a/libpod/container_internal.go
+++ b/libpod/container_internal.go
@@ -1044,7 +1044,13 @@ func (c *Container) stop(timeout uint) error {
 	}
 
 	// Wait until we have an exit file, and sync once we do
-	return c.waitForExitFileAndSync()
+	if err := c.waitForExitFileAndSync(); err != nil {
+		return err
+	}
+
+	c.newContainerEvent(events.Stop)
+
+	return nil
 }
 
 // Internal, non-locking function to pause a container
diff --git a/libpod/events/config.go b/libpod/events/config.go
index b9f01f3a5..96172d47b 100644
--- a/libpod/events/config.go
+++ b/libpod/events/config.go
@@ -14,6 +14,8 @@ const (
 	LogFile EventerType = iota
 	// Journald indicates journald should be used to log events
 	Journald EventerType = iota
+	// Null is a no-op events logger. It does not read or write events.
+	Null EventerType = iota
 )
 
 // Event describes the attributes of a libpod event
diff --git a/libpod/events/events.go b/libpod/events/events.go
index 2bebff162..5e828bc8a 100644
--- a/libpod/events/events.go
+++ b/libpod/events/events.go
@@ -16,11 +16,30 @@ var ErrNoJournaldLogging = errors.New("No support for journald logging")
 
 // String returns a string representation of EventerType
 func (et EventerType) String() string {
-	if et == LogFile {
+	switch et {
+	case LogFile:
 		return "file"
+	case Journald:
+		return "journald"
+	case Null:
+		return "none"
+	default:
+		return "invalid"
+	}
+}
 
+// IsValidEventer checks if the given string is a valid eventer type.
+func IsValidEventer(eventer string) bool {
+	switch eventer {
+	case LogFile.String():
+		return true
+	case Journald.String():
+		return true
+	case Null.String():
+		return true
+	default:
+		return false
 	}
-	return "journald"
 }
 
 // NewEvent creates a event struct and populates with
diff --git a/libpod/events/events_linux.go b/libpod/events/events_linux.go
index 11f309574..ffb100be8 100644
--- a/libpod/events/events_linux.go
+++ b/libpod/events/events_linux.go
@@ -18,8 +18,10 @@ func NewEventer(options EventerOptions) (eventer Eventer, err error) {
 		}
 	case strings.ToUpper(LogFile.String()):
 		eventer = EventLogFile{options}
+	case strings.ToUpper(Null.String()):
+		eventer = NewNullEventer()
 	default:
-		return eventer, errors.Errorf("unknown event logger type: %s", strings.ToUpper(options.EventerType))
+		return nil, errors.Errorf("unknown event logger type: %s", strings.ToUpper(options.EventerType))
 	}
 	return eventer, nil
 }
diff --git a/libpod/events/logfile.go b/libpod/events/logfile.go
index e5efc09bb..30d72b9fc 100644
--- a/libpod/events/logfile.go
+++ b/libpod/events/logfile.go
@@ -55,7 +55,7 @@ func (e EventLogFile) Read(options ReadOptions) error {
 			return err
 		}
 		switch event.Type {
-		case Image, Volume, Pod, Container:
+		case Image, Volume, Pod, System, Container:
 		//	no-op
 		default:
 			return errors.Errorf("event type %s is not valid in %s", event.Type.String(), e.options.LogFilePath)
diff --git a/libpod/options.go b/libpod/options.go
index cdac09654..62571957f 100644
--- a/libpod/options.go
+++ b/libpod/options.go
@@ -8,6 +8,7 @@ import (
 	"syscall"
 
 	"github.com/containers/image/manifest"
+	"github.com/containers/libpod/libpod/events"
 	"github.com/containers/libpod/pkg/namespaces"
 	"github.com/containers/libpod/pkg/rootless"
 	"github.com/containers/libpod/pkg/util"
@@ -421,6 +422,25 @@ func WithDefaultInfraCommand(cmd string) RuntimeOption {
 	}
 }
 
+// WithEventsLogger sets the events backend to use.
+// Currently supported values are "file" for file backend and "journald" for
+// journald backend.
+func WithEventsLogger(logger string) RuntimeOption {
+	return func(rt *Runtime) error {
+		if rt.valid {
+			return ErrRuntimeFinalized
+		}
+
+		if !events.IsValidEventer(logger) {
+			return errors.Wrapf(ErrInvalidArg, "%q is not a valid events backend", logger)
+		}
+
+		rt.config.EventsLogger = logger
+
+		return nil
+	}
+}
+
 // WithRenumber instructs libpod to perform a lock renumbering while
 // initializing. This will handle migrations from early versions of libpod with
 // file locks to newer versions with SHM locking, as well as changes in the
diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go
index 0871b83a7..7ef7b8828 100644
--- a/libpod/runtime_ctr.go
+++ b/libpod/runtime_ctr.go
@@ -376,14 +376,9 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool,
 
 	// Check that the container's in a good state to be removed
 	if c.state.State == ContainerStateRunning {
-		if err := r.ociRuntime.stopContainer(c, c.StopTimeout()); err != nil {
+		if err := c.stop(c.StopTimeout()); err != nil {
 			return errors.Wrapf(err, "cannot remove container %s as it could not be stopped", c.ID())
 		}
-
-		// Need to update container state to make sure we know it's stopped
-		if err := c.waitForExitFileAndSync(); err != nil {
-			return err
-		}
 	}
 
 	// Check that all of our exec sessions have finished
diff --git a/pkg/spec/createconfig.go b/pkg/spec/createconfig.go
index a8413d6c7..eaebf119d 100644
--- a/pkg/spec/createconfig.go
+++ b/pkg/spec/createconfig.go
@@ -165,6 +165,9 @@ func (c *CreateConfig) createExitCommand(runtime *libpod.Runtime) ([]string, err
 	for _, opt := range config.StorageConfig.GraphDriverOptions {
 		command = append(command, []string{"--storage-opt", opt}...)
 	}
+	if config.EventsLogger != "" {
+		command = append(command, []string{"--events-backend", config.EventsLogger}...)
+	}
 
 	if c.Syslog {
 		command = append(command, "--syslog", "true")
diff --git a/test/e2e/common_test.go b/test/e2e/common_test.go
index 8b6eab892..953e14f84 100644
--- a/test/e2e/common_test.go
+++ b/test/e2e/common_test.go
@@ -405,7 +405,7 @@ func (p *PodmanTestIntegration) BuildImage(dockerfile, imageName string, layers
 
 // PodmanPID execs podman and returns its PID
 func (p *PodmanTestIntegration) PodmanPID(args []string) (*PodmanSessionIntegration, int) {
-	podmanOptions := p.MakeOptions(args)
+	podmanOptions := p.MakeOptions(args, false)
 	fmt.Printf("Running: %s %s\n", p.PodmanBinary, strings.Join(podmanOptions, " "))
 	command := exec.Command(p.PodmanBinary, podmanOptions...)
 	session, err := gexec.Start(command, GinkgoWriter, GinkgoWriter)
diff --git a/test/e2e/libpod_suite_remoteclient_test.go b/test/e2e/libpod_suite_remoteclient_test.go
index c8210f7d1..7f33fec87 100644
--- a/test/e2e/libpod_suite_remoteclient_test.go
+++ b/test/e2e/libpod_suite_remoteclient_test.go
@@ -30,13 +30,20 @@ func SkipIfRootless() {
 
 // Podman is the exec call to podman on the filesystem
 func (p *PodmanTestIntegration) Podman(args []string) *PodmanSessionIntegration {
-	podmanSession := p.PodmanBase(args, false)
+	podmanSession := p.PodmanBase(args, false, false)
 	return &PodmanSessionIntegration{podmanSession}
 }
 
 // PodmanNoCache calls podman with out adding the imagecache
 func (p *PodmanTestIntegration) PodmanNoCache(args []string) *PodmanSessionIntegration {
-	podmanSession := p.PodmanBase(args, true)
+	podmanSession := p.PodmanBase(args, true, false)
+	return &PodmanSessionIntegration{podmanSession}
+}
+
+// PodmanNoEvents calls the Podman command without an imagecache and without an
+// events backend. It is used mostly for caching and uncaching images.
+func (p *PodmanTestIntegration) PodmanNoEvents(args []string) *PodmanSessionIntegration {
+	podmanSession := p.PodmanBase(args, true, true)
 	return &PodmanSessionIntegration{podmanSession}
 }
 
@@ -135,7 +142,7 @@ func (p *PodmanTestIntegration) StopVarlink() {
 }
 
 //MakeOptions assembles all the podman main options
-func (p *PodmanTestIntegration) makeOptions(args []string) []string {
+func (p *PodmanTestIntegration) makeOptions(args []string, noEvents bool) []string {
 	return args
 }
 
@@ -156,7 +163,7 @@ func (p *PodmanTestIntegration) RestoreArtifactToCache(image string) error {
 	dest := strings.Split(image, "/")
 	destName := fmt.Sprintf("/tmp/%s.tar", strings.Replace(strings.Join(strings.Split(dest[len(dest)-1], "/"), ""), ":", "-", -1))
 	p.CrioRoot = p.ImageCacheDir
-	restore := p.PodmanNoCache([]string{"load", "-q", "-i", destName})
+	restore := p.PodmanNoEvents([]string{"load", "-q", "-i", destName})
 	restore.WaitWithDefaultTimeout()
 	return nil
 }
diff --git a/test/e2e/libpod_suite_test.go b/test/e2e/libpod_suite_test.go
index 8d993ee72..1df59dbe3 100644
--- a/test/e2e/libpod_suite_test.go
+++ b/test/e2e/libpod_suite_test.go
@@ -23,19 +23,26 @@ func SkipIfRootless() {
 
 // Podman is the exec call to podman on the filesystem
 func (p *PodmanTestIntegration) Podman(args []string) *PodmanSessionIntegration {
-	podmanSession := p.PodmanBase(args, false)
+	podmanSession := p.PodmanBase(args, false, false)
 	return &PodmanSessionIntegration{podmanSession}
 }
 
 // PodmanNoCache calls the podman command with no configured imagecache
 func (p *PodmanTestIntegration) PodmanNoCache(args []string) *PodmanSessionIntegration {
-	podmanSession := p.PodmanBase(args, true)
+	podmanSession := p.PodmanBase(args, true, false)
+	return &PodmanSessionIntegration{podmanSession}
+}
+
+// PodmanNoEvents calls the Podman command without an imagecache and without an
+// events backend. It is used mostly for caching and uncaching images.
+func (p *PodmanTestIntegration) PodmanNoEvents(args []string) *PodmanSessionIntegration {
+	podmanSession := p.PodmanBase(args, true, true)
 	return &PodmanSessionIntegration{podmanSession}
 }
 
 // PodmanAsUser is the exec call to podman on the filesystem with the specified uid/gid and environment
 func (p *PodmanTestIntegration) PodmanAsUser(args []string, uid, gid uint32, cwd string, env []string) *PodmanSessionIntegration {
-	podmanSession := p.PodmanAsUserBase(args, uid, gid, cwd, env, false)
+	podmanSession := p.PodmanAsUserBase(args, uid, gid, cwd, env, false, false)
 	return &PodmanSessionIntegration{podmanSession}
 }
 
@@ -59,14 +66,19 @@ func PodmanTestCreate(tempDir string) *PodmanTestIntegration {
 }
 
 // MakeOptions assembles all the podman main options
-func (p *PodmanTestIntegration) makeOptions(args []string) []string {
+func (p *PodmanTestIntegration) makeOptions(args []string, noEvents bool) []string {
 	var debug string
 	if _, ok := os.LookupEnv("DEBUG"); ok {
 		debug = "--log-level=debug --syslog=true "
 	}
 
-	podmanOptions := strings.Split(fmt.Sprintf("%s--root %s --runroot %s --runtime %s --conmon %s --cni-config-dir %s --cgroup-manager %s --tmpdir %s",
-		debug, p.CrioRoot, p.RunRoot, p.OCIRuntime, p.ConmonBinary, p.CNIConfigDir, p.CgroupManager, p.TmpDir), " ")
+	eventsType := "file"
+	if noEvents {
+		eventsType = "none"
+	}
+
+	podmanOptions := strings.Split(fmt.Sprintf("%s--root %s --runroot %s --runtime %s --conmon %s --cni-config-dir %s --cgroup-manager %s --tmpdir %s --events-backend %s",
+		debug, p.CrioRoot, p.RunRoot, p.OCIRuntime, p.ConmonBinary, p.CNIConfigDir, p.CgroupManager, p.TmpDir, eventsType), " ")
 	if os.Getenv("HOOK_OPTION") != "" {
 		podmanOptions = append(podmanOptions, os.Getenv("HOOK_OPTION"))
 	}
@@ -81,7 +93,7 @@ func (p *PodmanTestIntegration) RestoreArtifact(image string) error {
 	fmt.Printf("Restoring %s...\n", image)
 	dest := strings.Split(image, "/")
 	destName := fmt.Sprintf("/tmp/%s.tar", strings.Replace(strings.Join(strings.Split(dest[len(dest)-1], "/"), ""), ":", "-", -1))
-	restore := p.PodmanNoCache([]string{"load", "-q", "-i", destName})
+	restore := p.PodmanNoEvents([]string{"load", "-q", "-i", destName})
 	restore.Wait(90)
 	return nil
 }
@@ -93,7 +105,7 @@ func (p *PodmanTestIntegration) RestoreArtifactToCache(image string) error {
 	destName := fmt.Sprintf("/tmp/%s.tar", strings.Replace(strings.Join(strings.Split(dest[len(dest)-1], "/"), ""), ":", "-", -1))
 
 	p.CrioRoot = p.ImageCacheDir
-	restore := p.PodmanNoCache([]string{"load", "-q", "-i", destName})
+	restore := p.PodmanNoEvents([]string{"load", "-q", "-i", destName})
 	restore.WaitWithDefaultTimeout()
 	return nil
 }
diff --git a/test/e2e/run_test.go b/test/e2e/run_test.go
index 3ba3c2bb3..c26a8f38d 100644
--- a/test/e2e/run_test.go
+++ b/test/e2e/run_test.go
@@ -750,9 +750,10 @@ USER mail`
 		match, _ := session.GrepString("1.2.3.4")
 		Expect(match).Should(BeTrue())
 
-		session = podmanTest.Podman([]string{"run", "--rm", "--http-proxy=false", ALPINE, "printenv", "http_proxy"})
+		session = podmanTest.Podman([]string{"run", "--http-proxy=false", ALPINE, "printenv", "http_proxy"})
 		session.WaitWithDefaultTimeout()
 		Expect(session.ExitCode()).To(Equal(1))
+		Expect(session.OutputToString()).To(Equal(""))
 		os.Unsetenv("http_proxy")
 	})
 
diff --git a/test/utils/podmantest_test.go b/test/utils/podmantest_test.go
index cb31d5548..9620898af 100644
--- a/test/utils/podmantest_test.go
+++ b/test/utils/podmantest_test.go
@@ -23,7 +23,7 @@ var _ = Describe("PodmanTest test", func() {
 		FakeOutputs["check"] = []string{"check"}
 		os.Setenv("HOOK_OPTION", "hook_option")
 		env := os.Environ()
-		session := podmanTest.PodmanAsUserBase([]string{"check"}, 1000, 1000, "", env, true)
+		session := podmanTest.PodmanAsUserBase([]string{"check"}, 1000, 1000, "", env, true, false)
 		os.Unsetenv("HOOK_OPTION")
 		session.WaitWithDefaultTimeout()
 		Expect(session.Command.Process).ShouldNot(BeNil())
diff --git a/test/utils/utils.go b/test/utils/utils.go
index 43819350c..028107d46 100644
--- a/test/utils/utils.go
+++ b/test/utils/utils.go
@@ -26,14 +26,14 @@ var (
 // PodmanTestCommon contains common functions will be updated later in
 // the inheritance structs
 type PodmanTestCommon interface {
-	MakeOptions(args []string) []string
+	MakeOptions(args []string, noEvents bool) []string
 	WaitForContainer() bool
 	WaitContainerReady(id string, expStr string, timeout int, step int) bool
 }
 
 // PodmanTest struct for command line options
 type PodmanTest struct {
-	PodmanMakeOptions  func(args []string) []string
+	PodmanMakeOptions  func(args []string, noEvents bool) []string
 	PodmanBinary       string
 	ArtifactPath       string
 	TempDir            string
@@ -59,15 +59,15 @@ type HostOS struct {
 }
 
 // MakeOptions assembles all podman options
-func (p *PodmanTest) MakeOptions(args []string) []string {
-	return p.PodmanMakeOptions(args)
+func (p *PodmanTest) MakeOptions(args []string, noEvents bool) []string {
+	return p.PodmanMakeOptions(args, noEvents)
 }
 
 // PodmanAsUserBase exec podman as user. uid and gid is set for credentials usage. env is used
 // to record the env for debugging
-func (p *PodmanTest) PodmanAsUserBase(args []string, uid, gid uint32, cwd string, env []string, nocache bool) *PodmanSession {
+func (p *PodmanTest) PodmanAsUserBase(args []string, uid, gid uint32, cwd string, env []string, nocache, noEvents bool) *PodmanSession {
 	var command *exec.Cmd
-	podmanOptions := p.MakeOptions(args)
+	podmanOptions := p.MakeOptions(args, noEvents)
 	podmanBinary := p.PodmanBinary
 	if p.RemoteTest {
 		podmanBinary = p.RemotePodmanBinary
@@ -105,8 +105,8 @@ func (p *PodmanTest) PodmanAsUserBase(args []string, uid, gid uint32, cwd string
 }
 
 // PodmanBase exec podman with default env.
-func (p *PodmanTest) PodmanBase(args []string, nocache bool) *PodmanSession {
-	return p.PodmanAsUserBase(args, 0, 0, "", nil, nocache)
+func (p *PodmanTest) PodmanBase(args []string, nocache, noEvents bool) *PodmanSession {
+	return p.PodmanAsUserBase(args, 0, 0, "", nil, nocache, noEvents)
 }
 
 // WaitForContainer waits on a started container
@@ -124,7 +124,7 @@ func (p *PodmanTest) WaitForContainer() bool {
 // containers are currently running.
 func (p *PodmanTest) NumberOfContainersRunning() int {
 	var containers []string
-	ps := p.PodmanBase([]string{"ps", "-q"}, true)
+	ps := p.PodmanBase([]string{"ps", "-q"}, true, false)
 	ps.WaitWithDefaultTimeout()
 	Expect(ps.ExitCode()).To(Equal(0))
 	for _, i := range ps.OutputToStringArray() {
@@ -139,7 +139,7 @@ func (p *PodmanTest) NumberOfContainersRunning() int {
 // containers are currently defined.
 func (p *PodmanTest) NumberOfContainers() int {
 	var containers []string
-	ps := p.PodmanBase([]string{"ps", "-aq"}, true)
+	ps := p.PodmanBase([]string{"ps", "-aq"}, true, false)
 	ps.WaitWithDefaultTimeout()
 	Expect(ps.ExitCode()).To(Equal(0))
 	for _, i := range ps.OutputToStringArray() {
@@ -154,7 +154,7 @@ func (p *PodmanTest) NumberOfContainers() int {
 // pods are currently defined.
 func (p *PodmanTest) NumberOfPods() int {
 	var pods []string
-	ps := p.PodmanBase([]string{"pod", "ps", "-q"}, true)
+	ps := p.PodmanBase([]string{"pod", "ps", "-q"}, true, false)
 	ps.WaitWithDefaultTimeout()
 	Expect(ps.ExitCode()).To(Equal(0))
 	for _, i := range ps.OutputToStringArray() {
@@ -170,7 +170,7 @@ func (p *PodmanTest) NumberOfPods() int {
 func (p *PodmanTest) GetContainerStatus() string {
 	var podmanArgs = []string{"ps"}
 	podmanArgs = append(podmanArgs, "--all", "--format={{.Status}}")
-	session := p.PodmanBase(podmanArgs, true)
+	session := p.PodmanBase(podmanArgs, true, false)
 	session.WaitWithDefaultTimeout()
 	return session.OutputToString()
 }
@@ -178,7 +178,7 @@ func (p *PodmanTest) GetContainerStatus() string {
 // WaitContainerReady waits process or service inside container start, and ready to be used.
 func (p *PodmanTest) WaitContainerReady(id string, expStr string, timeout int, step int) bool {
 	startTime := time.Now()
-	s := p.PodmanBase([]string{"logs", id}, true)
+	s := p.PodmanBase([]string{"logs", id}, true, false)
 	s.WaitWithDefaultTimeout()
 
 	for {
@@ -191,7 +191,7 @@ func (p *PodmanTest) WaitContainerReady(id string, expStr string, timeout int, s
 			return true
 		}
 		time.Sleep(time.Duration(step) * time.Second)
-		s = p.PodmanBase([]string{"logs", id}, true)
+		s = p.PodmanBase([]string{"logs", id}, true, false)
 		s.WaitWithDefaultTimeout()
 	}
 }
diff --git a/test/utils/utils_suite_test.go b/test/utils/utils_suite_test.go
index b1100892b..5904d37dc 100644
--- a/test/utils/utils_suite_test.go
+++ b/test/utils/utils_suite_test.go
@@ -32,7 +32,7 @@ func FakePodmanTestCreate() *FakePodmanTest {
 	return p
 }
 
-func (p *FakePodmanTest) makeOptions(args []string) []string {
+func (p *FakePodmanTest) makeOptions(args []string, noEvents bool) []string {
 	return FakeOutputs[strings.Join(args, " ")]
 }
 
-- 
cgit v1.2.3-54-g00ecf


From 9e5175b06ca432f3d8a657aeed946d7c8c486345 Mon Sep 17 00:00:00 2001
From: Matthew Heon <mheon@redhat.com>
Date: Thu, 18 Jul 2019 08:59:44 -0400
Subject: Remove exec PID files after use to prevent memory leaks

We have another patch running to do the same for exit files, with
a much more in-depth explanation of why it's necessary. Suffice
to say that persistent files in tmpfs tied to container CGroups
lead to significant memory allocations that last for the lifetime
of the file.

Based on a patch by Andrea Arcangeli (aarcange@redhat.com).

Signed-off-by: Matthew Heon <mheon@redhat.com>
---
 libpod/container_api.go | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'libpod')

diff --git a/libpod/container_api.go b/libpod/container_api.go
index c52b27db3..1de956521 100644
--- a/libpod/container_api.go
+++ b/libpod/container_api.go
@@ -301,6 +301,11 @@ func (c *Container) Exec(tty, privileged bool, env, cmd []string, user, workDir
 	if err != nil {
 		if exited {
 			// If the runtime exited, propagate the error we got from the process.
+			// We need to remove PID files to ensure no memory leaks
+			if err2 := os.Remove(pidFile); err2 != nil {
+				logrus.Errorf("Error removing exit file for container %s exec session %s: %v", c.ID(), sessionID, err2)
+			}
+
 			return err
 		}
 		return errors.Wrapf(err, "timed out waiting for runtime to create pidfile for exec session in container %s", c.ID())
@@ -308,6 +313,10 @@ func (c *Container) Exec(tty, privileged bool, env, cmd []string, user, workDir
 
 	// Pidfile exists, read it
 	contents, err := ioutil.ReadFile(pidFile)
+	// We need to remove PID files to ensure no memory leaks
+	if err2 := os.Remove(pidFile); err2 != nil {
+		logrus.Errorf("Error removing exit file for container %s exec session %s: %v", c.ID(), sessionID, err2)
+	}
 	if err != nil {
 		// We don't know the PID of the exec session
 		// However, it may still be alive
-- 
cgit v1.2.3-54-g00ecf


From a682cd3814cb4ea959a1e96e8b54cf068c1e4aed Mon Sep 17 00:00:00 2001
From: Matthew Heon <matthew.heon@pm.me>
Date: Thu, 20 Jun 2019 14:04:16 -0400
Subject: Make configuration validation not require a DB commit

If there are missing fields, we still require a commit, but that
should not happen often.

Signed-off-by: Matthew Heon <matthew.heon@pm.me>
---
 libpod/boltdb_state_internal.go | 186 ++++++++++++++++++++++++++--------------
 1 file changed, 124 insertions(+), 62 deletions(-)

(limited to 'libpod')

diff --git a/libpod/boltdb_state_internal.go b/libpod/boltdb_state_internal.go
index 313e5f4d7..e06de631d 100644
--- a/libpod/boltdb_state_internal.go
+++ b/libpod/boltdb_state_internal.go
@@ -72,98 +72,160 @@ var (
 	volPathKey     = []byte(volPathName)
 )
 
+// This represents a field in the runtime configuration that will be validated
+// against the DB to ensure no configuration mismatches occur.
+type dbConfigValidation struct {
+	name         string // Only used for error messages
+	runtimeValue string
+	key          []byte
+	defaultValue string
+}
+
 // Check if the configuration of the database is compatible with the
 // configuration of the runtime opening it
 // If there is no runtime configuration loaded, load our own
 func checkRuntimeConfig(db *bolt.DB, rt *Runtime) error {
-	err := db.Update(func(tx *bolt.Tx) error {
+	storeOpts, err := storage.DefaultStoreOptions(rootless.IsRootless(), rootless.GetRootlessUID())
+	if err != nil {
+		return err
+	}
+
+	// We need to validate the following things
+	checks := []dbConfigValidation{
+		{
+			"OS",
+			runtime.GOOS,
+			osKey,
+			runtime.GOOS,
+		},
+		{
+			"libpod root directory (staticdir)",
+			rt.config.StaticDir,
+			staticDirKey,
+			"",
+		},
+		{
+			"libpod temporary files directory (tmpdir)",
+			rt.config.TmpDir,
+			tmpDirKey,
+			"",
+		},
+		{
+			"storage temporary directory (runroot)",
+			rt.config.StorageConfig.RunRoot,
+			runRootKey,
+			storeOpts.RunRoot,
+		},
+		{
+			"storage graph root directory (graphroot)",
+			rt.config.StorageConfig.GraphRoot,
+			graphRootKey,
+			storeOpts.GraphRoot,
+		},
+		{
+			"storage graph driver",
+			rt.config.StorageConfig.GraphDriverName,
+			graphDriverKey,
+			storeOpts.GraphDriverName,
+		},
+		{
+			"volume path",
+			rt.config.VolumePath,
+			volPathKey,
+			"",
+		},
+	}
+
+	// These fields were missing and will have to be recreated.
+	missingFields := []dbConfigValidation{}
+
+	// Let's try and validate read-only first
+	err = db.View(func(tx *bolt.Tx) error {
 		configBkt, err := getRuntimeConfigBucket(tx)
 		if err != nil {
 			return err
 		}
 
-		if err := validateDBAgainstConfig(configBkt, "OS", runtime.GOOS, osKey, runtime.GOOS); err != nil {
-			return err
+		for _, check := range checks {
+			exists, err := readOnlyValidateConfig(configBkt, check)
+			if err != nil {
+				return err
+			}
+			if !exists {
+				missingFields = append(missingFields, check)
+			}
 		}
 
-		if err := validateDBAgainstConfig(configBkt, "libpod root directory (staticdir)",
-			rt.config.StaticDir, staticDirKey, ""); err != nil {
-			return err
-		}
+		return nil
+	})
+	if err != nil {
+		return err
+	}
 
-		if err := validateDBAgainstConfig(configBkt, "libpod temporary files directory (tmpdir)",
-			rt.config.TmpDir, tmpDirKey, ""); err != nil {
-			return err
-		}
+	if len(missingFields) == 0 {
+		return nil
+	}
 
-		storeOpts, err := storage.DefaultStoreOptions(rootless.IsRootless(), rootless.GetRootlessUID())
+	// Populate missing fields
+	return db.Update(func(tx *bolt.Tx) error {
+		configBkt, err := getRuntimeConfigBucket(tx)
 		if err != nil {
 			return err
 		}
-		if err := validateDBAgainstConfig(configBkt, "storage temporary directory (runroot)",
-			rt.config.StorageConfig.RunRoot, runRootKey,
-			storeOpts.RunRoot); err != nil {
-			return err
-		}
 
-		if err := validateDBAgainstConfig(configBkt, "storage graph root directory (graphroot)",
-			rt.config.StorageConfig.GraphRoot, graphRootKey,
-			storeOpts.GraphRoot); err != nil {
-			return err
-		}
+		for _, missing := range missingFields {
+			dbValue := []byte(missing.runtimeValue)
+			if missing.runtimeValue == "" && missing.defaultValue != "" {
+				dbValue = []byte(missing.defaultValue)
+			}
 
-		if err := validateDBAgainstConfig(configBkt, "storage graph driver",
-			rt.config.StorageConfig.GraphDriverName,
-			graphDriverKey,
-			storeOpts.GraphDriverName); err != nil {
-			return err
+			if err := configBkt.Put(missing.key, dbValue); err != nil {
+				return errors.Wrapf(err, "error updating %s in DB runtime config", missing.name)
+			}
 		}
 
-		return validateDBAgainstConfig(configBkt, "volume path",
-			rt.config.VolumePath, volPathKey, "")
+		return nil
 	})
-
-	return err
 }
 
-// Validate a configuration entry in the DB against current runtime config
-// If the given configuration key does not exist it will be created
-// If the given runtimeValue or value retrieved from the database are the empty
-// string and defaultValue is not, defaultValue will be checked instead. This
-// ensures that we will not fail on configuration changes in configured c/storage.
-func validateDBAgainstConfig(bucket *bolt.Bucket, fieldName, runtimeValue string, keyName []byte, defaultValue string) error {
-	keyBytes := bucket.Get(keyName)
+// Attempt a read-only validation of a configuration entry in the DB against an
+// element of the current runtime configuration.
+// If the configuration key in question does not exist, (false, nil) will be
+// returned.
+// If the configuration key does exist, and matches the runtime configuration
+// successfully, (true, nil) is returned.
+// An error is only returned when validation fails.
+// if the given runtimeValue or value retrieved from the database are empty,
+// and defaultValue is not, defaultValue will be checked instead. This ensures
+// that we will not fail on configuration changes in c/storage (where we may
+// pass the empty string to use defaults).
+func readOnlyValidateConfig(bucket *bolt.Bucket, toCheck dbConfigValidation) (bool, error) {
+	keyBytes := bucket.Get(toCheck.key)
 	if keyBytes == nil {
-		dbValue := []byte(runtimeValue)
-		if runtimeValue == "" && defaultValue != "" {
-			dbValue = []byte(defaultValue)
-		}
+		// False return indicates missing key
+		return false, nil
+	}
 
-		if err := bucket.Put(keyName, dbValue); err != nil {
-			return errors.Wrapf(err, "error updating %s in DB runtime config", fieldName)
-		}
-	} else {
-		if runtimeValue != string(keyBytes) {
-			// If runtimeValue is the empty string, check against
-			// the default
-			if runtimeValue == "" && defaultValue != "" &&
-				string(keyBytes) == defaultValue {
-				return nil
-			}
+	dbValue := string(keyBytes)
 
-			// If DB value is the empty string, check that the
-			// runtime value is the default
-			if string(keyBytes) == "" && defaultValue != "" &&
-				runtimeValue == defaultValue {
-				return nil
-			}
+	if toCheck.runtimeValue != dbValue {
+		// If the runtime value is the empty string and default is not,
+		// check against default.
+		if toCheck.runtimeValue == "" && toCheck.defaultValue != "" && dbValue == toCheck.defaultValue {
+			return true, nil
+		}
 
-			return errors.Wrapf(ErrDBBadConfig, "database %s %s does not match our %s %s",
-				fieldName, string(keyBytes), fieldName, runtimeValue)
+		// If the DB value is the empty string, check that the runtime
+		// value is the default.
+		if dbValue == "" && toCheck.defaultValue != "" && toCheck.runtimeValue == toCheck.defaultValue {
+			return true, nil
 		}
+
+		return true, errors.Wrapf(ErrDBBadConfig, "database %s %q does not match our %s %q",
+			toCheck.name, dbValue, toCheck.name, toCheck.runtimeValue)
 	}
 
-	return nil
+	return true, nil
 }
 
 // Open a connection to the database.
-- 
cgit v1.2.3-54-g00ecf


From 5255a1bb11da5e3a9310c672011efb2796ba9412 Mon Sep 17 00:00:00 2001
From: Matthew Heon <mheon@redhat.com>
Date: Thu, 20 Jun 2019 10:17:38 -0400
Subject: Avoid a read-write transaction on DB init

Instead, use a less expensive read-only transaction to see if the
DB is ready for use (it probably is), and only fire the expensive
RW transaction if absolutely necessary.

Signed-off-by: Matthew Heon <mheon@redhat.com>
---
 libpod/boltdb_state.go | 70 ++++++++++++++++++++++++++++----------------------
 1 file changed, 39 insertions(+), 31 deletions(-)

(limited to 'libpod')

diff --git a/libpod/boltdb_state.go b/libpod/boltdb_state.go
index 63e40a98f..12c364993 100644
--- a/libpod/boltdb_state.go
+++ b/libpod/boltdb_state.go
@@ -73,42 +73,50 @@ func NewBoltState(path string, runtime *Runtime) (State, error) {
 	// As such, just a db.Close() is fine here.
 	defer db.Close()
 
-	// Perform initial database setup
-	err = db.Update(func(tx *bolt.Tx) error {
-		if _, err := tx.CreateBucketIfNotExists(idRegistryBkt); err != nil {
-			return errors.Wrapf(err, "error creating id-registry bucket")
-		}
-		if _, err := tx.CreateBucketIfNotExists(nameRegistryBkt); err != nil {
-			return errors.Wrapf(err, "error creating name-registry bucket")
-		}
-		if _, err := tx.CreateBucketIfNotExists(nsRegistryBkt); err != nil {
-			return errors.Wrapf(err, "error creating ns-registry bucket")
-		}
-		if _, err := tx.CreateBucketIfNotExists(ctrBkt); err != nil {
-			return errors.Wrapf(err, "error creating containers bucket")
-		}
-		if _, err := tx.CreateBucketIfNotExists(allCtrsBkt); err != nil {
-			return errors.Wrapf(err, "error creating all containers bucket")
-		}
-		if _, err := tx.CreateBucketIfNotExists(podBkt); err != nil {
-			return errors.Wrapf(err, "error creating pods bucket")
-		}
-		if _, err := tx.CreateBucketIfNotExists(allPodsBkt); err != nil {
-			return errors.Wrapf(err, "error creating all pods bucket")
-		}
-		if _, err := tx.CreateBucketIfNotExists(volBkt); err != nil {
-			return errors.Wrapf(err, "error creating volume bucket")
-		}
-		if _, err := tx.CreateBucketIfNotExists(allVolsBkt); err != nil {
-			return errors.Wrapf(err, "error creating all volumes bucket")
+	createBuckets := [][]byte{
+		idRegistryBkt,
+		nameRegistryBkt,
+		nsRegistryBkt,
+		ctrBkt,
+		allCtrsBkt,
+		podBkt,
+		allPodsBkt,
+		volBkt,
+		allVolsBkt,
+		runtimeConfigBkt,
+	}
+
+	// Does the DB need an update?
+	needsUpdate := false
+	err = db.View(func(tx *bolt.Tx) error {
+		for _, bkt := range createBuckets {
+			if test := tx.Bucket(bkt); test == nil {
+				needsUpdate = true
+				break
+			}
 		}
-		if _, err := tx.CreateBucketIfNotExists(runtimeConfigBkt); err != nil {
-			return errors.Wrapf(err, "error creating runtime-config bucket")
+		return nil
+	})
+	if err != nil {
+		return nil, errors.Wrapf(err, "error checking DB schema")
+	}
+
+	if !needsUpdate {
+		state.valid = true
+		return state, nil
+	}
+
+	// Ensure schema is properly created in DB
+	err = db.Update(func(tx *bolt.Tx) error {
+		for _, bkt := range createBuckets {
+			if _, err := tx.CreateBucketIfNotExists(bkt); err != nil {
+				return errors.Wrapf(err, "error creating bucket %s", string(bkt))
+			}
 		}
 		return nil
 	})
 	if err != nil {
-		return nil, errors.Wrapf(err, "error creating initial database layout")
+		return nil, errors.Wrapf(err, "error creating buckets for DB")
 	}
 
 	state.valid = true
-- 
cgit v1.2.3-54-g00ecf