package libpod import ( "context" "fmt" "io" "io/ioutil" "os" "strconv" "sync" "time" "github.com/containers/libpod/libpod/events" "github.com/containers/libpod/pkg/lookup" "github.com/containers/storage/pkg/stringid" "github.com/docker/docker/oci/caps" "github.com/opentracing/opentracing-go" "github.com/pkg/errors" "github.com/sirupsen/logrus" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/tools/remotecommand" ) // Init creates a container in the OCI runtime func (c *Container) Init(ctx context.Context) (err error) { span, _ := opentracing.StartSpanFromContext(ctx, "containerInit") span.SetTag("struct", "container") defer span.Finish() if !c.batched { c.lock.Lock() defer c.lock.Unlock() if err := c.syncContainer(); err != nil { return err } } if !(c.state.State == ContainerStateConfigured || c.state.State == ContainerStateStopped || c.state.State == ContainerStateExited) { return errors.Wrapf(ErrCtrStateInvalid, "container %s has already been created in runtime", c.ID()) } // don't recursively start if err := c.checkDependenciesAndHandleError(ctx); err != nil { return err } if err := c.prepare(); err != nil { if err2 := c.cleanup(ctx); err2 != nil { logrus.Errorf("error cleaning up container %s: %v", c.ID(), err2) } return err } if c.state.State == ContainerStateStopped { // Reinitialize the container return c.reinit(ctx, false) } // Initialize the container for the first time return c.init(ctx, false) } // Start starts a container. // Start can start configured, created or stopped containers. // For configured containers, the container will be initialized first, then // started. // Stopped containers will be deleted and re-created in runc, undergoing a fresh // Init(). // If recursive is set, Start will also start all containers this container depends on. func (c *Container) Start(ctx context.Context, recursive bool) (err error) { span, _ := opentracing.StartSpanFromContext(ctx, "containerStart") span.SetTag("struct", "container") defer span.Finish() if !c.batched { c.lock.Lock() defer c.lock.Unlock() if err := c.syncContainer(); err != nil { return err } } if err := c.prepareToStart(ctx, recursive); err != nil { return err } // Start the container return c.start() } // StartAndAttach starts a container and attaches to it. // StartAndAttach can start configured, created or stopped containers. // For configured containers, the container will be initialized first, then // started. // Stopped containers will be deleted and re-created in runc, undergoing a fresh // Init(). // If successful, an error channel will be returned containing the result of the // attach call. // The channel will be closed automatically after the result of attach has been // sent. // If recursive is set, StartAndAttach will also start all containers this container depends on. func (c *Container) StartAndAttach(ctx context.Context, streams *AttachStreams, keys string, resize <-chan remotecommand.TerminalSize, recursive bool) (attachResChan <-chan error, err error) { if !c.batched { c.lock.Lock() defer c.lock.Unlock() if err := c.syncContainer(); err != nil { return nil, err } } if err := c.prepareToStart(ctx, recursive); err != nil { return nil, err } attachChan := make(chan error) // We need to ensure that we don't return until start() fired in attach. // Use a WaitGroup to sync this. wg := new(sync.WaitGroup) wg.Add(1) // Attach to the container before starting it go func() { if err := c.attach(streams, keys, resize, true, wg); err != nil { attachChan <- err } close(attachChan) }() wg.Wait() c.newContainerEvent(events.Attach) return attachChan, nil } // RestartWithTimeout restarts a running container and takes a given timeout in uint func (c *Container) RestartWithTimeout(ctx context.Context, timeout uint) (err error) { if !c.batched { c.lock.Lock() defer c.lock.Unlock() if err := c.syncContainer(); err != nil { return err } } if err = c.checkDependenciesAndHandleError(ctx); err != nil { return err } return c.restartWithTimeout(ctx, timeout) } // Stop uses the container's stop signal (or SIGTERM if no signal was specified) // to stop the container, and if it has not stopped after container's stop // timeout, SIGKILL is used to attempt to forcibly stop the container // Default stop timeout is 10 seconds, but can be overridden when the container // is created func (c *Container) Stop() error { // Stop with the container's given timeout return c.StopWithTimeout(c.config.StopTimeout) } // StopWithTimeout is a version of Stop that allows a timeout to be specified // manually. If timeout is 0, SIGKILL will be used immediately to kill the // container. func (c *Container) StopWithTimeout(timeout uint) error { if !c.batched { c.lock.Lock() defer c.lock.Unlock() if err := c.syncContainer(); err != nil { return err } } if c.state.State == ContainerStateConfigured || c.state.State == ContainerStateUnknown || c.state.State == ContainerStatePaused { return errors.Wrapf(ErrCtrStateInvalid, "can only stop created, running, or stopped containers. %s in state %s", c.ID(), c.state.State.String()) } if c.state.State == ContainerStateStopped || c.state.State == ContainerStateExited { return ErrCtrStopped } defer c.newContainerEvent(events.Stop) return c.stop(timeout) } // Kill sends a signal to a container func (c *Container) Kill(signal uint) error { if !c.batched { c.lock.Lock() defer c.lock.Unlock() if err := c.syncContainer(); err != nil { return err } } if c.state.State != ContainerStateRunning { return errors.Wrapf(ErrCtrStateInvalid, "can only kill running containers. %s in state %s", c.ID(), c.state.State.String()) } defer c.newContainerEvent(events.Kill) if err := c.runtime.ociRuntime.killContainer(c, signal); err != nil { return err } c.state.StoppedByUser = true return c.save() } // Exec starts a new process inside the container // TODO investigate allowing exec without attaching func (c *Container) Exec(tty, privileged bool, env, cmd []string, user, workDir string, streams *AttachStreams, preserveFDs int) error { var capList []string locked := false if !c.batched { locked = true c.lock.Lock() defer func() { if locked { c.lock.Unlock() } }() if err := c.syncContainer(); err != nil { return err } } conState := c.state.State // TODO can probably relax this once we track exec sessions if conState != ContainerStateRunning { return errors.Wrapf(ErrCtrStateInvalid, "cannot exec into container that is not running") } if privileged || c.config.Privileged { capList = caps.GetAllCapabilities() } // If user was set, look it up in the container to get a UID to use on // the host hostUser := "" if user != "" { execUser, err := lookup.GetUserGroupInfo(c.state.Mountpoint, user, nil) if err != nil { return err } // runc expects user formatted as uid:gid hostUser = fmt.Sprintf("%d:%d", execUser.Uid, execUser.Gid) } // Generate exec session ID // Ensure we don't conflict with an existing session ID sessionID := stringid.GenerateNonCryptoID() found := true // This really ought to be a do-while, but Go doesn't have those... for found { found = false for id := range c.state.ExecSessions { if id == sessionID { found = true break } } if found == true { sessionID = stringid.GenerateNonCryptoID() } } logrus.Debugf("Creating new exec session in container %s with session id %s", c.ID(), sessionID) execCmd, err := c.runtime.ociRuntime.execContainer(c, cmd, capList, env, tty, workDir, hostUser, sessionID, streams, preserveFDs) if err != nil { return errors.Wrapf(err, "error exec %s", c.ID()) } chWait := make(chan error) go func() { chWait <- execCmd.Wait() close(chWait) }() pidFile := c.execPidPath(sessionID) // 60 second seems a reasonable time to wait // https://github.com/containers/libpod/issues/1495 // https://github.com/containers/libpod/issues/1816 const pidWaitTimeout = 60000 // Wait until the runtime makes the pidfile exited, err := WaitForFile(pidFile, chWait, pidWaitTimeout*time.Millisecond) if err != nil { if exited { // If the runtime exited, propagate the error we got from the process. return err } return errors.Wrapf(err, "timed out waiting for runtime to create pidfile for exec session in container %s", c.ID()) } // Pidfile exists, read it contents, err := ioutil.ReadFile(pidFile) if err != nil { // We don't know the PID of the exec session // However, it may still be alive // TODO handle this better return errors.Wrapf(err, "could not read pidfile for exec session %s in container %s", sessionID, c.ID()) } pid, err := strconv.ParseInt(string(contents), 10, 32) if err != nil { // As above, we don't have a valid PID, but the exec session is likely still alive // TODO handle this better return errors.Wrapf(err, "error parsing PID of exec session %s in container %s", sessionID, c.ID()) } // We have the PID, add it to state if c.state.ExecSessions == nil { c.state.ExecSessions = make(map[string]*ExecSession) } session := new(ExecSession) session.ID = sessionID session.Command = cmd session.PID = int(pid) c.state.ExecSessions[sessionID] = session if err := c.save(); err != nil { // Now we have a PID but we can't save it in the DB // TODO handle this better return errors.Wrapf(err, "error saving exec sessions %s for container %s", sessionID, c.ID()) } c.newContainerEvent(events.Exec) logrus.Debugf("Successfully started exec session %s in container %s", sessionID, c.ID()) // Unlock so other processes can use the container if !c.batched { c.lock.Unlock() locked = false } var waitErr error if !exited { waitErr = <-chWait } // Lock again if !c.batched { locked = true c.lock.Lock() } // Sync the container again to pick up changes in state if err := c.syncContainer(); err != nil { return errors.Wrapf(err, "error syncing container %s state to remove exec session %s", c.ID(), sessionID) } // Remove the exec session from state delete(c.state.ExecSessions, sessionID) if err := c.save(); err != nil { logrus.Errorf("Error removing exec session %s from container %s state: %v", sessionID, c.ID(), err) } return waitErr } // AttachStreams contains streams that will be attached to the container type AttachStreams struct { // OutputStream will be attached to container's STDOUT OutputStream io.WriteCloser // ErrorStream will be attached to container's STDERR ErrorStream io.WriteCloser // InputStream will be attached to container's STDIN InputStream io.Reader // AttachOutput is whether to attach to STDOUT // If false, stdout will not be attached AttachOutput bool // AttachError is whether to attach to STDERR // If false, stdout will not be attached AttachError bool // AttachInput is whether to attach to STDIN // If false, stdout will not be attached AttachInput bool } // Attach attaches to a container func (c *Container) Attach(streams *AttachStreams, keys string, resize <-chan remotecommand.TerminalSize) error { if !c.batched { c.lock.Lock() if err := c.syncContainer(); err != nil { c.lock.Unlock() return err } c.lock.Unlock() } if c.state.State != ContainerStateCreated && c.state.State != ContainerStateRunning && c.state.State != ContainerStateExited { return errors.Wrapf(ErrCtrStateInvalid, "can only attach to created or running containers") } defer c.newContainerEvent(events.Attach) return c.attach(streams, keys, resize, false, nil) } // Mount mounts a container's filesystem on the host // The path where the container has been mounted is returned func (c *Container) Mount() (string, error) { if !c.batched { c.lock.Lock() defer c.lock.Unlock() if err := c.syncContainer(); err != nil { return "", err } } defer c.newContainerEvent(events.Mount) return c.mount() } // Unmount unmounts a container's filesystem on the host func (c *Container) Unmount(force bool) error { if !c.batched { c.lock.Lock() defer c.lock.Unlock() if err := c.syncContainer(); err != nil { return err } } if c.state.Mounted { mounted, err := c.runtime.storageService.MountedContainerImage(c.ID()) if err != nil { return errors.Wrapf(err, "can't determine how many times %s is mounted, refusing to unmount", c.ID()) } if mounted == 1 { if c.state.State == ContainerStateRunning || c.state.State == ContainerStatePaused { return errors.Wrapf(ErrCtrStateInvalid, "cannot unmount storage for container %s as it is running or paused", c.ID()) } if len(c.state.ExecSessions) != 0 { return errors.Wrapf(ErrCtrStateInvalid, "container %s has active exec sessions, refusing to unmount", c.ID()) } return errors.Wrapf(ErrInternal, "can't unmount %s last mount, it is still in use", c.ID()) } } defer c.newContainerEvent(events.Unmount) return c.unmount(force) } // Pause pauses a container func (c *Container) Pause() error { if !c.batched { c.lock.Lock() defer c.lock.Unlock() if err := c.syncContainer(); err != nil { return err } } if c.state.State == ContainerStatePaused { return errors.Wrapf(ErrCtrStateInvalid, "%q is already paused", c.ID()) } if c.state.State != ContainerStateRunning { return errors.Wrapf(ErrCtrStateInvalid, "%q is not running, can't pause", c.state.State) } defer c.newContainerEvent(events.Pause) return c.pause() } // Unpause unpauses a container func (c *Container) Unpause() error { if !c.batched { c.lock.Lock() defer c.lock.Unlock() if err := c.syncContainer(); err != nil { return err } } if c.state.State != ContainerStatePaused { return errors.Wrapf(ErrCtrStateInvalid, "%q is not paused, can't unpause", c.ID()) } defer c.newContainerEvent(events.Unpause) return c.unpause() } // Export exports a container's root filesystem as a tar archive // The archive will be saved as a file at the given path func (c *Container) Export(path string) error { if !c.batched { c.lock.Lock() defer c.lock.Unlock() if err := c.syncContainer(); err != nil { return err } } defer c.newContainerEvent(events.Export) return c.export(path) } // AddArtifact creates and writes to an artifact file for the container func (c *Container) AddArtifact(name string, data []byte) error { if !c.valid { return ErrCtrRemoved } return ioutil.WriteFile(c.getArtifactPath(name), data, 0740) } // GetArtifact reads the specified artifact file from the container func (c *Container) GetArtifact(name string) ([]byte, error) { if !c.valid { return nil, ErrCtrRemoved } return ioutil.ReadFile(c.getArtifactPath(name)) } // RemoveArtifact deletes the specified artifacts file func (c *Container) RemoveArtifact(name string) error { if !c.valid { return ErrCtrRemoved } return os.Remove(c.getArtifactPath(name)) } // Wait blocks until the container exits and returns its exit code. func (c *Container) Wait() (int32, error) { return c.WaitWithInterval(DefaultWaitInterval) } // WaitWithInterval blocks until the container to exit and returns its exit // code. The argument is the interval at which checks the container's status. func (c *Container) WaitWithInterval(waitTimeout time.Duration) (int32, error) { if !c.valid { return -1, ErrCtrRemoved } err := wait.PollImmediateInfinite(waitTimeout, func() (bool, error) { logrus.Debugf("Checking container %s status...", c.ID()) stopped, err := c.isStopped() if err != nil { return false, err } if !stopped { return false, nil } return true, nil }, ) if err != nil { return 0, err } exitCode := c.state.ExitCode return exitCode, nil } // Cleanup unmounts all mount points in container and cleans up container storage // It also cleans up the network stack func (c *Container) Cleanup(ctx context.Context) error { if !c.batched { c.lock.Lock() defer c.lock.Unlock() if err := c.syncContainer(); err != nil { return err } } // Check if state is good if c.state.State == ContainerStateRunning || c.state.State == ContainerStatePaused { return errors.Wrapf(ErrCtrStateInvalid, "container %s is running or paused, refusing to clean up", c.ID()) } // Handle restart policy. // Returns a bool indicating whether we actually restarted. // If we did, don't proceed to cleanup - just exit. didRestart, err := c.handleRestartPolicy(ctx) if err != nil { return err } if didRestart { return nil } // If we didn't restart, we perform a normal cleanup // Check if we have active exec sessions if len(c.state.ExecSessions) != 0 { return errors.Wrapf(ErrCtrStateInvalid, "container %s has active exec sessions, refusing to clean up", c.ID()) } defer c.newContainerEvent(events.Cleanup) return c.cleanup(ctx) } // Batch starts a batch operation on the given container // All commands in the passed function will execute under the same lock and // without syncronyzing state after each operation // This will result in substantial performance benefits when running numerous // commands on the same container // Note that the container passed into the Batch function cannot be removed // during batched operations. runtime.RemoveContainer can only be called outside // of Batch // Any error returned by the given batch function will be returned unmodified by // Batch // As Batch normally disables updating the current state of the container, the // Sync() function is provided to enable container state to be updated and // checked within Batch. func (c *Container) Batch(batchFunc func(*Container) error) error { c.lock.Lock() defer c.lock.Unlock() if err := c.syncContainer(); err != nil { return err } newCtr := new(Container) newCtr.config = c.config newCtr.state = c.state newCtr.runtime = c.runtime newCtr.lock = c.lock newCtr.valid = true newCtr.batched = true err := batchFunc(newCtr) newCtr.batched = false return err } // Sync updates the status of a container by querying the OCI runtime. // If the container has not been created inside the OCI runtime, nothing will be // done. // Most of the time, Podman does not explicitly query the OCI runtime for // container status, and instead relies upon exit files created by conmon. // This can cause a disconnect between running state and what Podman sees in // cases where Conmon was killed unexpected, or runc was upgraded. // Running a manual Sync() ensures that container state will be correct in // such situations. func (c *Container) Sync() error { if !c.batched { c.lock.Lock() defer c.lock.Unlock() } // If runtime knows about the container, update its status in runtime // And then save back to disk if (c.state.State != ContainerStateUnknown) && (c.state.State != ContainerStateConfigured) && (c.state.State != ContainerStateExited) { oldState := c.state.State if err := c.runtime.ociRuntime.updateContainerStatus(c, true); err != nil { return err } // Only save back to DB if state changed if c.state.State != oldState { if err := c.save(); err != nil { return err } } } defer c.newContainerEvent(events.Sync) return nil } // Refresh refreshes a container's state in the database, restarting the // container if it is running func (c *Container) Refresh(ctx context.Context) error { if !c.batched { c.lock.Lock() defer c.lock.Unlock() if err := c.syncContainer(); err != nil { return err } } wasCreated := false if c.state.State == ContainerStateCreated { wasCreated = true } wasRunning := false if c.state.State == ContainerStateRunning { wasRunning = true } wasPaused := false if c.state.State == ContainerStatePaused { wasPaused = true } // First, unpause the container if it's paused if c.state.State == ContainerStatePaused { if err := c.unpause(); err != nil { return err } } // Next, if the container is running, stop it if c.state.State == ContainerStateRunning { if err := c.stop(c.config.StopTimeout); err != nil { return err } } // If there are active exec sessions, we need to kill them if len(c.state.ExecSessions) > 0 { logrus.Infof("Killing %d exec sessions in container %s. They will not be restored after refresh.", len(c.state.ExecSessions), c.ID()) if err := c.runtime.ociRuntime.execStopContainer(c, c.config.StopTimeout); err != nil { return err } } // If the container is in ContainerStateStopped, we need to delete it // from the runtime and clear conmon state if c.state.State == ContainerStateStopped { if err := c.delete(ctx); err != nil { return err } if err := c.removeConmonFiles(); err != nil { return err } } // Fire cleanup code one more time unconditionally to ensure we are good // to refresh if err := c.cleanup(ctx); err != nil { return err } logrus.Debugf("Resetting state of container %s", c.ID()) // We've finished unwinding the container back to its initial state // Now safe to refresh container state if err := resetState(c.state); err != nil { return errors.Wrapf(err, "error resetting state of container %s", c.ID()) } if err := c.refresh(); err != nil { return err } logrus.Debugf("Successfully refresh container %s state", c.ID()) // Initialize the container if it was created in runc if wasCreated || wasRunning || wasPaused { if err := c.prepare(); err != nil { return err } if err := c.init(ctx, false); err != nil { return err } } // If the container was running before, start it if wasRunning || wasPaused { if err := c.start(); err != nil { return err } } // If the container was paused before, re-pause it if wasPaused { if err := c.pause(); err != nil { return err } } return nil } // ContainerCheckpointOptions is a struct used to pass the parameters // for checkpointing (and restoring) to the corresponding functions type ContainerCheckpointOptions struct { // Keep tells the API to not delete checkpoint artifacts Keep bool // KeepRunning tells the API to keep the container running // after writing the checkpoint to disk KeepRunning bool // TCPEstablished tells the API to checkpoint a container // even if it contains established TCP connections TCPEstablished bool // Export tells the API to write the checkpoint image to // the filename set in TargetFile // Import tells the API to read the checkpoint image from // the filename set in TargetFile TargetFile string // Name tells the API that during restore from an exported // checkpoint archive a new name should be used for the // restored container Name string } // Checkpoint checkpoints a container func (c *Container) Checkpoint(ctx context.Context, options ContainerCheckpointOptions) error { logrus.Debugf("Trying to checkpoint container %s", c.ID()) if options.TargetFile != "" { if err := c.prepareCheckpointExport(); err != nil { return err } } if !c.batched { c.lock.Lock() defer c.lock.Unlock() if err := c.syncContainer(); err != nil { return err } } defer c.newContainerEvent(events.Checkpoint) return c.checkpoint(ctx, options) } // Restore restores a container func (c *Container) Restore(ctx context.Context, options ContainerCheckpointOptions) (err error) { logrus.Debugf("Trying to restore container %s", c.ID()) if !c.batched { c.lock.Lock() defer c.lock.Unlock() if err := c.syncContainer(); err != nil { return err } } defer c.newContainerEvent(events.Restore) return c.restore(ctx, options) }