1 files changed, 54 insertions, 33 deletions
diff --git a/libpod/container_api.go b/libpod/container_api.go
index d99aec5b4..09bc46905 100644
--- a/libpod/container_api.go
+++ b/libpod/container_api.go
@@ -39,7 +39,7 @@ func (c *Container) Init(ctx context.Context) (err error) {
 
 	notRunning, err := c.checkDependenciesRunning()
 	if err != nil {
-		return errors.Wrapf(err, "error checking dependencies for container %s")
+		return errors.Wrapf(err, "error checking dependencies for container %s", c.ID())
 	}
 	if len(notRunning) > 0 {
 		depString := strings.Join(notRunning, ",")
@@ -93,7 +93,7 @@ func (c *Container) Start(ctx context.Context) (err error) {
 
 	notRunning, err := c.checkDependenciesRunning()
 	if err != nil {
-		return errors.Wrapf(err, "error checking dependencies for container %s")
+		return errors.Wrapf(err, "error checking dependencies for container %s", c.ID())
 	}
 	if len(notRunning) > 0 {
 		depString := strings.Join(notRunning, ",")
@@ -159,7 +159,7 @@ func (c *Container) StartAndAttach(ctx context.Context, streams *AttachStreams,
 
 	notRunning, err := c.checkDependenciesRunning()
 	if err != nil {
-		return nil, errors.Wrapf(err, "error checking dependencies for container %s")
+		return nil, errors.Wrapf(err, "error checking dependencies for container %s", c.ID())
 	}
 	if len(notRunning) > 0 {
 		depString := strings.Join(notRunning, ",")
@@ -328,25 +328,25 @@ func (c *Container) Exec(tty, privileged bool, env, cmd []string, user string) e
 	if err != nil {
 		return errors.Wrapf(err, "error exec %s", c.ID())
 	}
+	chWait := make(chan error)
+	go func() {
+		chWait <- execCmd.Wait()
+	}()
+	defer close(chWait)
 
 	pidFile := c.execPidPath(sessionID)
-	// 1 second seems a reasonable time to wait
-	// See https://github.com/containers/libpod/issues/1495
-	const pidWaitTimeout = 1000
+	// 60 second seems a reasonable time to wait
+	// https://github.com/containers/libpod/issues/1495
+	// https://github.com/containers/libpod/issues/1816
+	const pidWaitTimeout = 60000
 
 	// Wait until the runtime makes the pidfile
-	// TODO: If runtime errors before the PID file is created, we have to
-	// wait for timeout here
-	if err := WaitForFile(pidFile, pidWaitTimeout*time.Millisecond); err != nil {
-		logrus.Debugf("Timed out waiting for pidfile from runtime for container %s exec", c.ID())
-
-		// Check if an error occurred in the process before we made a pidfile
-		// TODO: Wait() here is a poor choice - is there a way to see if
-		// a process has finished, instead of waiting for it to finish?
-		if err := execCmd.Wait(); err != nil {
+	exited, err := WaitForFile(pidFile, chWait, pidWaitTimeout*time.Millisecond)
+	if err != nil {
+		if exited {
+			// If the runtime exited, propagate the error we got from the process.
 			return err
 		}
-
 		return errors.Wrapf(err, "timed out waiting for runtime to create pidfile for exec session in container %s", c.ID())
 	}
 
@@ -388,7 +388,10 @@ func (c *Container) Exec(tty, privileged bool, env, cmd []string, user string) e
 		locked = false
 	}
 
-	waitErr := execCmd.Wait()
+	var waitErr error
+	if !exited {
+		waitErr = <-chWait
+	}
 
 	// Lock again
 	if !c.batched {
@@ -672,22 +675,27 @@ func (c *Container) Batch(batchFunc func(*Container) error) error {
 	return err
 }
 
-// Sync updates the current state of the container, checking whether its state
-// has changed
-// Sync can only be used inside Batch() - otherwise, it will be done
-// automatically.
-// When called outside Batch(), Sync() is a no-op
+// Sync updates the status of a container by querying the OCI runtime.
+// If the container has not been created inside the OCI runtime, nothing will be
+// done.
+// Most of the time, Podman does not explicitly query the OCI runtime for
+// container status, and instead relies upon exit files created by conmon.
+// This can cause a disconnect between running state and what Podman sees in
+// cases where Conmon was killed unexpected, or runc was upgraded.
+// Running a manual Sync() ensures that container state will be correct in
+// such situations.
 func (c *Container) Sync() error {
 	if !c.batched {
-		return nil
+		c.lock.Lock()
+		defer c.lock.Unlock()
 	}
 
 	// If runtime knows about the container, update its status in runtime
 	// And then save back to disk
 	if (c.state.State != ContainerStateUnknown) &&
-		(c.state.State != ContainerStateConfigured) {
+		(c.state.State != ContainerStateConfigured) &&
+		(c.state.State != ContainerStateExited) {
 		oldState := c.state.State
-		// TODO: optionally replace this with a stat for the exit file
 		if err := c.runtime.ociRuntime.updateContainerStatus(c, true); err != nil {
 			return err
 		}
@@ -715,7 +723,7 @@ func (c *Container) RestartWithTimeout(ctx context.Context, timeout uint) (err e
 
 	notRunning, err := c.checkDependenciesRunning()
 	if err != nil {
-		return errors.Wrapf(err, "error checking dependencies for container %s")
+		return errors.Wrapf(err, "error checking dependencies for container %s", c.ID())
 	}
 	if len(notRunning) > 0 {
 		depString := strings.Join(notRunning, ",")
@@ -800,7 +808,7 @@ func (c *Container) Refresh(ctx context.Context) error {
 		return err
 	}
 
-	logrus.Debugf("Successfully refresh container %s state")
+	logrus.Debugf("Successfully refresh container %s state", c.ID())
 
 	// Initialize the container if it was created in runc
 	if wasCreated || wasRunning || wasPaused {
@@ -829,9 +837,22 @@ func (c *Container) Refresh(ctx context.Context) error {
 	return nil
 }
 
+// ContainerCheckpointOptions is a struct used to pass the parameters
+// for checkpointing (and restoring) to the corresponding functions
+type ContainerCheckpointOptions struct {
+	// Keep tells the API to not delete checkpoint artifacts
+	Keep bool
+	// KeepRunning tells the API to keep the container running
+	// after writing the checkpoint to disk
+	KeepRunning bool
+	// TCPEstablished tells the API to checkpoint a container
+	// even if it contains established TCP connections
+	TCPEstablished bool
+}
+
 // Checkpoint checkpoints a container
-func (c *Container) Checkpoint(ctx context.Context, keep bool) error {
-	logrus.Debugf("Trying to checkpoint container %s", c)
+func (c *Container) Checkpoint(ctx context.Context, options ContainerCheckpointOptions) error {
+	logrus.Debugf("Trying to checkpoint container %s", c.ID())
 	if !c.batched {
 		c.lock.Lock()
 		defer c.lock.Unlock()
@@ -841,12 +862,12 @@ func (c *Container) Checkpoint(ctx context.Context, keep bool) error {
 		}
 	}
 
-	return c.checkpoint(ctx, keep)
+	return c.checkpoint(ctx, options)
 }
 
 // Restore restores a container
-func (c *Container) Restore(ctx context.Context, keep bool) (err error) {
-	logrus.Debugf("Trying to restore container %s", c)
+func (c *Container) Restore(ctx context.Context, options ContainerCheckpointOptions) (err error) {
+	logrus.Debugf("Trying to restore container %s", c.ID())
 	if !c.batched {
 		c.lock.Lock()
 		defer c.lock.Unlock()
@@ -856,5 +877,5 @@ func (c *Container) Restore(ctx context.Context, keep bool) (err error) {
 		}
 	}
 
-	return c.restore(ctx, keep)
+	return c.restore(ctx, options)
 }