From ae5aac50aaacbdcc20e622e86f40c02136690fe7 Mon Sep 17 00:00:00 2001 From: Matthew Heon Date: Fri, 1 Dec 2017 13:26:58 -0500 Subject: Add handling for system restart in libpod Signed-off-by: Matthew Heon --- libpod/container.go | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++-- libpod/runtime.go | 45 ++++++++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+), 2 deletions(-) (limited to 'libpod') diff --git a/libpod/container.go b/libpod/container.go index bd7455147..e1b002548 100644 --- a/libpod/container.go +++ b/libpod/container.go @@ -407,8 +407,54 @@ func (c *Container) teardownStorage() error { return nil } +// Refresh refreshes the container's state after a restart +func (c *Container) refresh() error { + c.lock.Lock() + defer c.lock.Unlock() + + if !c.valid { + return errors.Wrapf(ErrCtrRemoved, "container %s has been removed from the state", c.ID()) + } + + // We need to get the container's temporary directory from c/storage + // It was lost in the reboot and must be recreated + dir, err := c.runtime.storageService.GetRunDir(c.ID()) + if err != nil { + return errors.Wrapf(err, "error retrieving temporary directory for container %s", c.ID()) + } + c.state.RunDir = dir + + // The container is no longer mounted + c.state.Mounted = false + c.state.Mountpoint = "" + + // The container is no longe running + c.state.PID = 0 + + // Check the container's state. If it's not created in runc yet, we're + // done + if c.state.State == ContainerStateConfigured { + if err := c.runtime.state.SaveContainer(c); err != nil { + return errors.Wrapf(err, "error refreshing state for container %s", c.ID()) + } + + return nil + } + + // The container must be recreated in runc + if err := c.init(); err != nil { + return err + } + + if err := c.runtime.state.SaveContainer(c); err != nil { + return errors.Wrapf(err, "error refreshing state for container %s", c.ID()) + } + + return nil +} + // Init creates a container in the OCI runtime -func (c *Container) Init() (err error) { +func (c *Container) Init() error { c.lock.Lock() defer c.lock.Unlock() @@ -420,7 +466,12 @@ func (c *Container) Init() (err error) { return errors.Wrapf(ErrCtrExists, "container %s has already been created in runtime", c.ID()) } - // Mount storage for the container + return c.init() +} + +// Creates container in OCI runtime +// Internal only - does not lock or check state +func (c *Container) init() (err error) { if err := c.mountStorage(); err != nil { return err } @@ -436,6 +487,17 @@ func (c *Container) Init() (err error) { // Save the OCI spec to disk jsonPath := filepath.Join(c.bundlePath(), "config.json") + // If the OCI spec already exists, replace it + if _, err := os.Stat(jsonPath); err != nil { + if !os.IsNotExist(err) { + return errors.Wrapf(err, "error doing stat on container %s spec", c.ID()) + } + } else { + // No error, the spec exists. Remove so we can replace. + if err := os.Remove(jsonPath); err != nil { + return errors.Wrapf(err, "error replacing spec of container %s", c.ID()) + } + } fileJSON, err := json.Marshal(c.runningSpec) if err != nil { return errors.Wrapf(err, "error exporting runtime spec for container %s to JSON", c.ID()) diff --git a/libpod/runtime.go b/libpod/runtime.go index b86e06546..7997679e2 100644 --- a/libpod/runtime.go +++ b/libpod/runtime.go @@ -173,6 +173,24 @@ func NewRuntime(options ...RuntimeOption) (runtime *Runtime, err error) { runtime.state = state } + // We now need to see if the system has restarted + // We check for the presence of a file in our tmp directory to verify this + runtimeAliveFile := filepath.Join(runtime.config.TmpDir, "alive") + _, err = os.Stat(runtimeAliveFile) + if err != nil { + // If the file doesn't exist, we need to refresh the state + // This will trigger on first use as well, but refreshing an + // empty state only creates a single file + // As such, it's not really a performance concern + if os.IsNotExist(err) { + if err2 := runtime.refresh(runtimeAliveFile); err2 != nil { + return nil, err2 + } + } else { + return nil, errors.Wrapf(err, "error reading runtime status file %s", runtimeAliveFile) + } + } + // Mark the runtime as valid - ready to be used, cannot be modified // further runtime.valid = true @@ -238,3 +256,30 @@ func (r *Runtime) Shutdown(force bool) error { return lastError } + +// Reconfigures the runtime after a reboot +// Refreshes the state, recreating temporary files +// Does not check validity as the runtime is not valid until after this has run +// TODO: there's a potential race here, where multiple libpods could be in this +// function before the runtime ready file is created +// This probably doesn't matter as the actual container operations are locked +func (r *Runtime) refresh(alivePath string) error { + // We need to refresh the state of all containers + ctrs, err := r.state.AllContainers() + if err != nil { + return errors.Wrapf(err, "error retrieving all containers from state") + } + for _, ctr := range ctrs { + if err := ctr.refresh(); err != nil { + return err + } + } + + file, err := os.OpenFile(alivePath, os.O_RDONLY|os.O_CREATE, 0644) + if err != nil { + return errors.Wrapf(err, "error creating runtime status file %s", alivePath) + } + defer file.Close() + + return nil +} -- cgit v1.2.3-54-g00ecf From 92bc448624e29fc84087bafcccd0e52b328a53e4 Mon Sep 17 00:00:00 2001 From: Matthew Heon Date: Mon, 4 Dec 2017 14:44:03 -0500 Subject: Fix potential race condition in initializing libpod Signed-off-by: Matthew Heon --- libpod/container.go | 2 +- libpod/runtime.go | 15 ++++++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) (limited to 'libpod') diff --git a/libpod/container.go b/libpod/container.go index e1b002548..c53184ea3 100644 --- a/libpod/container.go +++ b/libpod/container.go @@ -413,7 +413,7 @@ func (c *Container) refresh() error { defer c.lock.Unlock() if !c.valid { - return errors.Wrapf(ErrCtrRemoved, "container %s has been removed from the state", c.ID()) + return errors.Wrapf(ErrCtrRemoved, "container %s is not valid - may have been removed", c.ID()) } // We need to get the container's temporary directory from c/storage diff --git a/libpod/runtime.go b/libpod/runtime.go index 7997679e2..103faf3f8 100644 --- a/libpod/runtime.go +++ b/libpod/runtime.go @@ -175,7 +175,19 @@ func NewRuntime(options ...RuntimeOption) (runtime *Runtime, err error) { // We now need to see if the system has restarted // We check for the presence of a file in our tmp directory to verify this + // This check must be locked to prevent races + runtimeAliveLock := filepath.Join(runtime.config.TmpDir, "alive.lck") runtimeAliveFile := filepath.Join(runtime.config.TmpDir, "alive") + aliveLock, err := storage.GetLockfile(runtimeAliveLock) + if err != nil { + return nil, errors.Wrapf(err, "error acquiring runtime init lock") + } + // Acquire the lock and hold it until we return + // This ensures that no two processes will be in runtime.refresh at once + // TODO: we can't close the FD in this lock, so we should keep it around + // and use it to lock important operations + aliveLock.Lock() + defer aliveLock.Unlock() _, err = os.Stat(runtimeAliveFile) if err != nil { // If the file doesn't exist, we need to refresh the state @@ -260,9 +272,6 @@ func (r *Runtime) Shutdown(force bool) error { // Reconfigures the runtime after a reboot // Refreshes the state, recreating temporary files // Does not check validity as the runtime is not valid until after this has run -// TODO: there's a potential race here, where multiple libpods could be in this -// function before the runtime ready file is created -// This probably doesn't matter as the actual container operations are locked func (r *Runtime) refresh(alivePath string) error { // We need to refresh the state of all containers ctrs, err := r.state.AllContainers() -- cgit v1.2.3-54-g00ecf From ceb7fb1c60e559337523259f1e0063135e686c95 Mon Sep 17 00:00:00 2001 From: Matthew Heon Date: Tue, 5 Dec 2017 09:30:51 -0500 Subject: Do not recreate OCI spec in init() if it exists Signed-off-by: Matthew Heon --- libpod/container.go | 45 ++++++++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 23 deletions(-) (limited to 'libpod') diff --git a/libpod/container.go b/libpod/container.go index c53184ea3..d12167d15 100644 --- a/libpod/container.go +++ b/libpod/container.go @@ -476,38 +476,37 @@ func (c *Container) init() (err error) { return err } - // Make the OCI runtime spec we will use - g := generate.NewFromSpec(c.config.Spec) - // Mount ShmDir from host into container - g.AddBindMount(c.config.ShmDir, "/dev/shm", []string{"rw"}) - c.runningSpec = g.Spec() - c.runningSpec.Root.Path = c.state.Mountpoint - c.runningSpec.Annotations[crioAnnotations.Created] = c.config.CreatedTime.Format(time.RFC3339Nano) - c.runningSpec.Annotations["org.opencontainers.image.stopSignal"] = fmt.Sprintf("%d", c.config.StopSignal) - // Save the OCI spec to disk jsonPath := filepath.Join(c.bundlePath(), "config.json") - // If the OCI spec already exists, replace it if _, err := os.Stat(jsonPath); err != nil { if !os.IsNotExist(err) { return errors.Wrapf(err, "error doing stat on container %s spec", c.ID()) } - } else { - // No error, the spec exists. Remove so we can replace. - if err := os.Remove(jsonPath); err != nil { - return errors.Wrapf(err, "error replacing spec of container %s", c.ID()) + + // The spec does not exist, needs to be created + g := generate.NewFromSpec(c.config.Spec) + // Mount ShmDir from host into container + g.AddBindMount(c.config.ShmDir, "/dev/shm", []string{"rw"}) + c.runningSpec = g.Spec() + c.runningSpec.Root.Path = c.state.Mountpoint + c.runningSpec.Annotations[crioAnnotations.Created] = c.config.CreatedTime.Format(time.RFC3339Nano) + c.runningSpec.Annotations["org.opencontainers.image.stopSignal"] = fmt.Sprintf("%d", c.config.StopSignal) + + fileJSON, err := json.Marshal(c.runningSpec) + if err != nil { + return errors.Wrapf(err, "error exporting runtime spec for container %s to JSON", c.ID()) } + if err := ioutil.WriteFile(jsonPath, fileJSON, 0644); err != nil { + return errors.Wrapf(err, "error writing runtime spec JSON to file for container %s", c.ID()) + } + + logrus.Debugf("Created OCI spec for container %s at %s", c.ID(), jsonPath) + } else { + // The spec exists + logrus.Debugf("Using existing OCI spec for container %s at %s", c.ID(), jsonPath) } - fileJSON, err := json.Marshal(c.runningSpec) - if err != nil { - return errors.Wrapf(err, "error exporting runtime spec for container %s to JSON", c.ID()) - } - if err := ioutil.WriteFile(jsonPath, fileJSON, 0644); err != nil { - return errors.Wrapf(err, "error writing runtime spec JSON to file for container %s", c.ID()) - } - c.state.ConfigPath = jsonPath - logrus.Debugf("Created OCI spec for container %s at %s", c.ID(), jsonPath) + c.state.ConfigPath = jsonPath // With the spec complete, do an OCI create // TODO set cgroup parent in a sane fashion -- cgit v1.2.3-54-g00ecf From b66287689a0c8c9f2f8c01cdca065fb1f6e8d872 Mon Sep 17 00:00:00 2001 From: Matthew Heon Date: Wed, 6 Dec 2017 11:15:17 -0500 Subject: Do not recreate containers in runc after restart Signed-off-by: Matthew Heon --- libpod/container.go | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) (limited to 'libpod') diff --git a/libpod/container.go b/libpod/container.go index d12167d15..adc52d85f 100644 --- a/libpod/container.go +++ b/libpod/container.go @@ -428,23 +428,11 @@ func (c *Container) refresh() error { c.state.Mounted = false c.state.Mountpoint = "" - // The container is no longe running + // The container is no longer running c.state.PID = 0 - // Check the container's state. If it's not created in runc yet, we're - // done - if c.state.State == ContainerStateConfigured { - if err := c.runtime.state.SaveContainer(c); err != nil { - return errors.Wrapf(err, "error refreshing state for container %s", c.ID()) - } - - return nil - } - - // The container must be recreated in runc - if err := c.init(); err != nil { - return err - } + // The container no longer exists in runc + c.state.State = ContainerStateConfigured if err := c.runtime.state.SaveContainer(c); err != nil { return errors.Wrapf(err, "error refreshing state for container %s", c.ID()) @@ -454,7 +442,7 @@ func (c *Container) refresh() error { } // Init creates a container in the OCI runtime -func (c *Container) Init() error { +func (c *Container) Init() (err error) { c.lock.Lock() defer c.lock.Unlock() @@ -466,12 +454,6 @@ func (c *Container) Init() error { return errors.Wrapf(ErrCtrExists, "container %s has already been created in runtime", c.ID()) } - return c.init() -} - -// Creates container in OCI runtime -// Internal only - does not lock or check state -func (c *Container) init() (err error) { if err := c.mountStorage(); err != nil { return err } -- cgit v1.2.3-54-g00ecf From b71cde19c8b511c054ee42084113ce97ed6d5b62 Mon Sep 17 00:00:00 2001 From: Matthew Heon Date: Thu, 7 Dec 2017 13:15:34 -0500 Subject: Add ability to refresh state in DB Also, ensure we always recreate runtime spec so our net namespace paths will be correct Signed-off-by: Matthew Heon --- libpod/container.go | 57 +++++++++++++++++++++-------------------------- libpod/in_memory_state.go | 6 +++++ libpod/runtime.go | 8 ++++++- libpod/sql_state.go | 46 ++++++++++++++++++++++++++++++++++++++ libpod/state.go | 3 +++ 5 files changed, 88 insertions(+), 32 deletions(-) (limited to 'libpod') diff --git a/libpod/container.go b/libpod/container.go index adc52d85f..ce4d6ea84 100644 --- a/libpod/container.go +++ b/libpod/container.go @@ -424,16 +424,6 @@ func (c *Container) refresh() error { } c.state.RunDir = dir - // The container is no longer mounted - c.state.Mounted = false - c.state.Mountpoint = "" - - // The container is no longer running - c.state.PID = 0 - - // The container no longer exists in runc - c.state.State = ContainerStateConfigured - if err := c.runtime.state.SaveContainer(c); err != nil { return errors.Wrapf(err, "error refreshing state for container %s", c.ID()) } @@ -458,35 +448,40 @@ func (c *Container) Init() (err error) { return err } - // Save the OCI spec to disk + // If the OCI spec already exists, we need to replace it + // Cannot guarantee some things, e.g. network namespaces, have the same + // paths jsonPath := filepath.Join(c.bundlePath(), "config.json") if _, err := os.Stat(jsonPath); err != nil { if !os.IsNotExist(err) { return errors.Wrapf(err, "error doing stat on container %s spec", c.ID()) } - - // The spec does not exist, needs to be created - g := generate.NewFromSpec(c.config.Spec) - // Mount ShmDir from host into container - g.AddBindMount(c.config.ShmDir, "/dev/shm", []string{"rw"}) - c.runningSpec = g.Spec() - c.runningSpec.Root.Path = c.state.Mountpoint - c.runningSpec.Annotations[crioAnnotations.Created] = c.config.CreatedTime.Format(time.RFC3339Nano) - c.runningSpec.Annotations["org.opencontainers.image.stopSignal"] = fmt.Sprintf("%d", c.config.StopSignal) - - fileJSON, err := json.Marshal(c.runningSpec) - if err != nil { - return errors.Wrapf(err, "error exporting runtime spec for container %s to JSON", c.ID()) - } - if err := ioutil.WriteFile(jsonPath, fileJSON, 0644); err != nil { - return errors.Wrapf(err, "error writing runtime spec JSON to file for container %s", c.ID()) + // The spec does not exist, we're fine + } else { + // The spec exists, need to remove it + if err := os.Remove(jsonPath); err != nil { + return errors.Wrapf(err, "error replacing runtime spec for container %s", c.ID()) } + } - logrus.Debugf("Created OCI spec for container %s at %s", c.ID(), jsonPath) - } else { - // The spec exists - logrus.Debugf("Using existing OCI spec for container %s at %s", c.ID(), jsonPath) + // Save OCI spec to disk + g := generate.NewFromSpec(c.config.Spec) + // Mount ShmDir from host into container + g.AddBindMount(c.config.ShmDir, "/dev/shm", []string{"rw"}) + c.runningSpec = g.Spec() + c.runningSpec.Root.Path = c.state.Mountpoint + c.runningSpec.Annotations[crioAnnotations.Created] = c.config.CreatedTime.Format(time.RFC3339Nano) + c.runningSpec.Annotations["org.opencontainers.image.stopSignal"] = fmt.Sprintf("%d", c.config.StopSignal) + + fileJSON, err := json.Marshal(c.runningSpec) + if err != nil { + return errors.Wrapf(err, "error exporting runtime spec for container %s to JSON", c.ID()) } + if err := ioutil.WriteFile(jsonPath, fileJSON, 0644); err != nil { + return errors.Wrapf(err, "error writing runtime spec JSON to file for container %s", c.ID()) + } + + logrus.Debugf("Created OCI spec for container %s at %s", c.ID(), jsonPath) c.state.ConfigPath = jsonPath diff --git a/libpod/in_memory_state.go b/libpod/in_memory_state.go index 5d03e62e6..4e4cbb664 100644 --- a/libpod/in_memory_state.go +++ b/libpod/in_memory_state.go @@ -38,6 +38,12 @@ func (s *InMemoryState) Close() error { return nil } +// Refresh clears container and pod stats after a reboot +// In-memory state won't survive a reboot so this is a no-op +func (s *InMemoryState) Refresh() error { + return nil +} + // Container retrieves a container from its full ID func (s *InMemoryState) Container(id string) (*Container, error) { if id == "" { diff --git a/libpod/runtime.go b/libpod/runtime.go index 103faf3f8..36225bf69 100644 --- a/libpod/runtime.go +++ b/libpod/runtime.go @@ -273,7 +273,13 @@ func (r *Runtime) Shutdown(force bool) error { // Refreshes the state, recreating temporary files // Does not check validity as the runtime is not valid until after this has run func (r *Runtime) refresh(alivePath string) error { - // We need to refresh the state of all containers + // First clear the state in the database + if err := r.state.Refresh(); err != nil { + return err + } + + // Next refresh the state of all containers to recreate dirs and + // namespaces ctrs, err := r.state.AllContainers() if err != nil { return errors.Wrapf(err, "error retrieving all containers from state") diff --git a/libpod/sql_state.go b/libpod/sql_state.go index fb6be85d7..bba697d18 100644 --- a/libpod/sql_state.go +++ b/libpod/sql_state.go @@ -103,6 +103,52 @@ func (s *SQLState) Close() error { return nil } +// Refresh clears the state after a reboot +// Resets mountpoint, PID, state for all containers +func (s *SQLState) Refresh() (err error) { + const refresh = `UPDATE containerState SET + State=?, + Mountpoint=?, + Pid=?;` + + s.lock.Lock() + defer s.lock.Unlock() + + if !s.valid { + return ErrDBClosed + } + + tx, err := s.db.Begin() + if err != nil { + return errors.Wrapf(err, "error beginning database transaction") + } + defer func() { + if err != nil { + if err2 := tx.Rollback(); err2 != nil { + logrus.Errorf("Error rolling back transaction to refresh state: %v", err2) + } + } + }() + + // Refresh container state + // The constants could be moved into the SQL, but keeping them here + // will keep us in sync in case ContainerStateConfigured ever changes in + // the container state + _, err = tx.Exec(refresh, + ContainerStateConfigured, + "", + 0) + if err != nil { + return errors.Wrapf(err, "error refreshing database state") + } + + if err := tx.Commit(); err != nil { + return errors.Wrapf(err, "error committing transaction to refresh database") + } + + return nil +} + // Container retrieves a container from its full ID func (s *SQLState) Container(id string) (*Container, error) { const query = `SELECT containers.*, diff --git a/libpod/state.go b/libpod/state.go index 4093f14f1..4a79b8d2d 100644 --- a/libpod/state.go +++ b/libpod/state.go @@ -6,6 +6,9 @@ type State interface { // connections) that may be required Close() error + // Refresh clears container and pod states after a reboot + Refresh() error + // Accepts full ID of container Container(id string) (*Container, error) // Accepts full or partial IDs (as long as they are unique) and names -- cgit v1.2.3-54-g00ecf