diff options
Diffstat (limited to 'oci/oci.go')
-rw-r--r-- | oci/oci.go | 748 |
1 files changed, 0 insertions, 748 deletions
diff --git a/oci/oci.go b/oci/oci.go deleted file mode 100644 index 2e7e44b84..000000000 --- a/oci/oci.go +++ /dev/null @@ -1,748 +0,0 @@ -package oci - -import ( - "bytes" - "encoding/json" - "fmt" - "io/ioutil" - "os" - "os/exec" - "path/filepath" - "strconv" - "strings" - "syscall" - "time" - - "github.com/containerd/cgroups" - rspec "github.com/opencontainers/runtime-spec/specs-go" - "github.com/projectatomic/libpod/utils" - "github.com/sirupsen/logrus" - "golang.org/x/net/context" - "golang.org/x/sys/unix" - kwait "k8s.io/apimachinery/pkg/util/wait" -) - -const ( - // ContainerStateCreated represents the created state of a container - ContainerStateCreated = "created" - // ContainerStatePaused represents the paused state of a container - ContainerStatePaused = "paused" - // ContainerStateRunning represents the running state of a container - ContainerStateRunning = "running" - // ContainerStateStopped represents the stopped state of a container - ContainerStateStopped = "stopped" - // ContainerCreateTimeout represents the value of container creating timeout - ContainerCreateTimeout = 240 * time.Second - - // CgroupfsCgroupsManager represents cgroupfs native cgroup manager - CgroupfsCgroupsManager = "cgroupfs" - // SystemdCgroupsManager represents systemd native cgroup manager - SystemdCgroupsManager = "systemd" - // ContainerExitsDir is the location of container exit dirs - ContainerExitsDir = "/var/run/crio/exits" - // ContainerAttachSocketDir is the location for container attach sockets - ContainerAttachSocketDir = "/var/run/crio" - - // killContainerTimeout is the timeout that we wait for the container to - // be SIGKILLed. - killContainerTimeout = 2 * time.Minute -) - -// New creates a new Runtime with options provided -func New(runtimeTrustedPath string, - runtimeUntrustedPath string, - trustLevel string, - conmonPath string, - conmonEnv []string, - cgroupManager string, - containerExitsDir string, - logSizeMax int64, - noPivot bool) (*Runtime, error) { - r := &Runtime{ - name: filepath.Base(runtimeTrustedPath), - trustedPath: runtimeTrustedPath, - untrustedPath: runtimeUntrustedPath, - trustLevel: trustLevel, - conmonPath: conmonPath, - conmonEnv: conmonEnv, - cgroupManager: cgroupManager, - containerExitsDir: containerExitsDir, - logSizeMax: logSizeMax, - noPivot: noPivot, - } - return r, nil -} - -// Runtime stores the information about a oci runtime -type Runtime struct { - name string - trustedPath string - untrustedPath string - trustLevel string - conmonPath string - conmonEnv []string - cgroupManager string - containerExitsDir string - logSizeMax int64 - noPivot bool -} - -// syncInfo is used to return data from monitor process to daemon -type syncInfo struct { - Pid int `json:"pid"` - Message string `json:"message,omitempty"` -} - -// exitCodeInfo is used to return the monitored process exit code to the daemon -type exitCodeInfo struct { - ExitCode int32 `json:"exit_code"` - Message string `json:"message,omitempty"` -} - -// Name returns the name of the OCI Runtime -func (r *Runtime) Name() string { - return r.name -} - -// Path returns the full path the OCI Runtime executable. -// Depending if the container is privileged and/or trusted, -// this will return either the trusted or untrusted runtime path. -func (r *Runtime) Path(c *Container) string { - if !c.trusted { - // We have an explicitly untrusted container. - if c.privileged { - logrus.Warnf("Running an untrusted but privileged container") - return r.trustedPath - } - - if r.untrustedPath != "" { - return r.untrustedPath - } - - return r.trustedPath - } - - // Our container is trusted. Let's look at the configured trust level. - if r.trustLevel == "trusted" { - return r.trustedPath - } - - // Our container is trusted, but we are running untrusted. - // We will use the untrusted container runtime if it's set - // and if it's not a privileged container. - if c.privileged || r.untrustedPath == "" { - return r.trustedPath - } - - return r.untrustedPath -} - -// Version returns the version of the OCI Runtime -func (r *Runtime) Version() (string, error) { - runtimeVersion, err := getOCIVersion(r.trustedPath, "-v") - if err != nil { - return "", err - } - return runtimeVersion, nil -} - -func getOCIVersion(name string, args ...string) (string, error) { - out, err := utils.ExecCmd(name, args...) - if err != nil { - return "", err - } - - firstLine := out[:strings.Index(out, "\n")] - v := firstLine[strings.LastIndex(firstLine, " ")+1:] - return v, nil -} - -// CreateContainer creates a container. -func (r *Runtime) CreateContainer(c *Container, cgroupParent string) (err error) { - var stderrBuf bytes.Buffer - parentPipe, childPipe, err := newPipe() - childStartPipe, parentStartPipe, err := newPipe() - if err != nil { - return fmt.Errorf("error creating socket pair: %v", err) - } - defer parentPipe.Close() - defer parentStartPipe.Close() - - var args []string - if r.cgroupManager == SystemdCgroupsManager { - args = append(args, "-s") - } - args = append(args, "-c", c.id) - args = append(args, "-u", c.id) - args = append(args, "-r", r.Path(c)) - args = append(args, "-b", c.bundlePath) - args = append(args, "-p", filepath.Join(c.bundlePath, "pidfile")) - args = append(args, "-l", c.logPath) - args = append(args, "--exit-dir", r.containerExitsDir) - args = append(args, "--socket-dir-path", ContainerAttachSocketDir) - if r.logSizeMax >= 0 { - args = append(args, "--log-size-max", fmt.Sprintf("%v", r.logSizeMax)) - } - if r.noPivot { - args = append(args, "--no-pivot") - } - if c.terminal { - args = append(args, "-t") - } else if c.stdin { - args = append(args, "-i") - } - logrus.WithFields(logrus.Fields{ - "args": args, - }).Debugf("running conmon: %s", r.conmonPath) - - cmd := exec.Command(r.conmonPath, args...) - cmd.Dir = c.bundlePath - cmd.SysProcAttr = &syscall.SysProcAttr{ - Setpgid: true, - } - cmd.Stdin = os.Stdin - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - if c.terminal { - cmd.Stderr = &stderrBuf - } - cmd.ExtraFiles = append(cmd.ExtraFiles, childPipe, childStartPipe) - // 0, 1 and 2 are stdin, stdout and stderr - cmd.Env = append(r.conmonEnv, fmt.Sprintf("_OCI_SYNCPIPE=%d", 3)) - cmd.Env = append(cmd.Env, fmt.Sprintf("_OCI_STARTPIPE=%d", 4)) - - err = cmd.Start() - if err != nil { - childPipe.Close() - return err - } - - // We don't need childPipe on the parent side - childPipe.Close() - childStartPipe.Close() - - // Move conmon to specified cgroup - if r.cgroupManager == SystemdCgroupsManager { - logrus.Infof("Running conmon under slice %s and unitName %s", cgroupParent, createUnitName("crio-conmon", c.id)) - if err = utils.RunUnderSystemdScope(cmd.Process.Pid, cgroupParent, createUnitName("crio-conmon", c.id)); err != nil { - logrus.Warnf("Failed to add conmon to systemd sandbox cgroup: %v", err) - } - } else { - control, err := cgroups.New(cgroups.V1, cgroups.StaticPath(filepath.Join(cgroupParent, "/crio-conmon-"+c.id)), &rspec.LinuxResources{}) - if err != nil { - logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err) - } else { - // Here we should defer a crio-connmon- cgroup hierarchy deletion, but it will - // always fail as conmon's pid is still there. - // Fortunately, kubelet takes care of deleting this for us, so the leak will - // only happens in corner case where one does a manual deletion of the container - // through e.g. runc. This should be handled by implementing a conmon monitoring - // routine that does the cgroup cleanup once conmon is terminated. - if err := control.Add(cgroups.Process{Pid: cmd.Process.Pid}); err != nil { - logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err) - } - } - } - - /* We set the cgroup, now the child can start creating children */ - someData := []byte{0} - _, err = parentStartPipe.Write(someData) - if err != nil { - return err - } - - /* Wait for initial setup and fork, and reap child */ - err = cmd.Wait() - if err != nil { - return err - } - - // We will delete all container resources if creation fails - defer func() { - if err != nil { - r.DeleteContainer(c) - } - }() - - // Wait to get container pid from conmon - type syncStruct struct { - si *syncInfo - err error - } - ch := make(chan syncStruct) - go func() { - var si *syncInfo - if err = json.NewDecoder(parentPipe).Decode(&si); err != nil { - ch <- syncStruct{err: err} - return - } - ch <- syncStruct{si: si} - }() - - select { - case ss := <-ch: - if ss.err != nil { - return fmt.Errorf("error reading container (probably exited) json message: %v", ss.err) - } - logrus.Debugf("Received container pid: %d", ss.si.Pid) - if ss.si.Pid == -1 { - if ss.si.Message != "" { - logrus.Errorf("Container creation error: %s", ss.si.Message) - return fmt.Errorf("container create failed: %s", ss.si.Message) - } - logrus.Errorf("Container creation failed") - return fmt.Errorf("container create failed") - } - case <-time.After(ContainerCreateTimeout): - logrus.Errorf("Container creation timeout (%v)", ContainerCreateTimeout) - return fmt.Errorf("create container timeout") - } - return nil -} - -func createUnitName(prefix string, name string) string { - return fmt.Sprintf("%s-%s.scope", prefix, name) -} - -// StartContainer starts a container. -func (r *Runtime) StartContainer(c *Container) error { - c.opLock.Lock() - defer c.opLock.Unlock() - if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.Path(c), "start", c.id); err != nil { - return err - } - c.state.Started = time.Now() - return nil -} - -// ExecSyncResponse is returned from ExecSync. -type ExecSyncResponse struct { - Stdout []byte - Stderr []byte - ExitCode int32 -} - -// ExecSyncError wraps command's streams, exit code and error on ExecSync error. -type ExecSyncError struct { - Stdout bytes.Buffer - Stderr bytes.Buffer - ExitCode int32 - Err error -} - -func (e ExecSyncError) Error() string { - return fmt.Sprintf("command error: %+v, stdout: %s, stderr: %s, exit code %d", e.Err, e.Stdout.Bytes(), e.Stderr.Bytes(), e.ExitCode) -} - -func prepareExec() (pidFile, parentPipe, childPipe *os.File, err error) { - parentPipe, childPipe, err = os.Pipe() - if err != nil { - return nil, nil, nil, err - } - - pidFile, err = ioutil.TempFile("", "pidfile") - if err != nil { - parentPipe.Close() - childPipe.Close() - return nil, nil, nil, err - } - - return -} - -func parseLog(log []byte) (stdout, stderr []byte) { - // Split the log on newlines, which is what separates entries. - lines := bytes.SplitAfter(log, []byte{'\n'}) - for _, line := range lines { - // Ignore empty lines. - if len(line) == 0 { - continue - } - - // The format of log lines is "DATE pipe REST". - parts := bytes.SplitN(line, []byte{' '}, 3) - if len(parts) < 3 { - // Ignore the line if it's formatted incorrectly, but complain - // about it so it can be debugged. - logrus.Warnf("hit invalid log format: %q", string(line)) - continue - } - - pipe := string(parts[1]) - content := parts[2] - - switch pipe { - case "stdout": - stdout = append(stdout, content...) - case "stderr": - stderr = append(stderr, content...) - default: - // Complain about unknown pipes. - logrus.Warnf("hit invalid log format [unknown pipe %s]: %q", pipe, string(line)) - continue - } - } - - return stdout, stderr -} - -// ExecSync execs a command in a container and returns it's stdout, stderr and return code. -func (r *Runtime) ExecSync(c *Container, command []string, timeout int64) (resp *ExecSyncResponse, err error) { - pidFile, parentPipe, childPipe, err := prepareExec() - if err != nil { - return nil, ExecSyncError{ - ExitCode: -1, - Err: err, - } - } - defer parentPipe.Close() - defer func() { - if e := os.Remove(pidFile.Name()); e != nil { - logrus.Warnf("could not remove temporary PID file %s", pidFile.Name()) - } - }() - - logFile, err := ioutil.TempFile("", "crio-log-"+c.id) - if err != nil { - return nil, ExecSyncError{ - ExitCode: -1, - Err: err, - } - } - logPath := logFile.Name() - defer func() { - logFile.Close() - os.RemoveAll(logPath) - }() - - f, err := ioutil.TempFile("", "exec-process") - if err != nil { - return nil, ExecSyncError{ - ExitCode: -1, - Err: err, - } - } - defer os.RemoveAll(f.Name()) - - var args []string - args = append(args, "-c", c.id) - args = append(args, "-r", r.Path(c)) - args = append(args, "-p", pidFile.Name()) - args = append(args, "-e") - if c.terminal { - args = append(args, "-t") - } - if timeout > 0 { - args = append(args, "-T") - args = append(args, fmt.Sprintf("%d", timeout)) - } - args = append(args, "-l", logPath) - args = append(args, "--socket-dir-path", ContainerAttachSocketDir) - - pspec := c.Spec().Process - pspec.Env = append(pspec.Env, r.conmonEnv...) - pspec.Args = command - processJSON, err := json.Marshal(pspec) - if err != nil { - return nil, ExecSyncError{ - ExitCode: -1, - Err: err, - } - } - - if err := ioutil.WriteFile(f.Name(), processJSON, 0644); err != nil { - return nil, ExecSyncError{ - ExitCode: -1, - Err: err, - } - } - - args = append(args, "--exec-process-spec", f.Name()) - - cmd := exec.Command(r.conmonPath, args...) - - var stdoutBuf, stderrBuf bytes.Buffer - cmd.Stdout = &stdoutBuf - cmd.Stderr = &stderrBuf - cmd.ExtraFiles = append(cmd.ExtraFiles, childPipe) - // 0, 1 and 2 are stdin, stdout and stderr - cmd.Env = append(r.conmonEnv, fmt.Sprintf("_OCI_SYNCPIPE=%d", 3)) - - err = cmd.Start() - if err != nil { - childPipe.Close() - return nil, ExecSyncError{ - Stdout: stdoutBuf, - Stderr: stderrBuf, - ExitCode: -1, - Err: err, - } - } - - // We don't need childPipe on the parent side - childPipe.Close() - - err = cmd.Wait() - if err != nil { - if exitErr, ok := err.(*exec.ExitError); ok { - if status, ok := exitErr.Sys().(unix.WaitStatus); ok { - return nil, ExecSyncError{ - Stdout: stdoutBuf, - Stderr: stderrBuf, - ExitCode: int32(status.ExitStatus()), - Err: err, - } - } - } else { - return nil, ExecSyncError{ - Stdout: stdoutBuf, - Stderr: stderrBuf, - ExitCode: -1, - Err: err, - } - } - } - - var ec *exitCodeInfo - if err := json.NewDecoder(parentPipe).Decode(&ec); err != nil { - return nil, ExecSyncError{ - Stdout: stdoutBuf, - Stderr: stderrBuf, - ExitCode: -1, - Err: err, - } - } - - logrus.Infof("Received container exit code: %v, message: %s", ec.ExitCode, ec.Message) - - if ec.ExitCode == -1 { - return nil, ExecSyncError{ - Stdout: stdoutBuf, - Stderr: stderrBuf, - ExitCode: -1, - Err: fmt.Errorf(ec.Message), - } - } - - // The actual logged output is not the same as stdoutBuf and stderrBuf, - // which are used for getting error information. For the actual - // ExecSyncResponse we have to read the logfile. - // XXX: Currently runC dups the same console over both stdout and stderr, - // so we can't differentiate between the two. - - logBytes, err := ioutil.ReadFile(logPath) - if err != nil { - return nil, ExecSyncError{ - Stdout: stdoutBuf, - Stderr: stderrBuf, - ExitCode: -1, - Err: err, - } - } - - // We have to parse the log output into {stdout, stderr} buffers. - stdoutBytes, stderrBytes := parseLog(logBytes) - return &ExecSyncResponse{ - Stdout: stdoutBytes, - Stderr: stderrBytes, - ExitCode: ec.ExitCode, - }, nil -} - -func waitContainerStop(ctx context.Context, c *Container, timeout time.Duration) error { - done := make(chan struct{}) - // we could potentially re-use "done" channel to exit the loop on timeout - // but we use another channel "chControl" so that we won't never incur in the - // case the "done" channel is closed in the "default" select case and we also - // reach the timeout in the select below. If that happens we could raise - // a panic closing a closed channel so better be safe and use another new - // channel just to control the loop. - chControl := make(chan struct{}) - go func() { - for { - select { - case <-chControl: - return - default: - // Check if the process is still around - err := unix.Kill(c.state.Pid, 0) - if err == unix.ESRCH { - close(done) - return - } - time.Sleep(100 * time.Millisecond) - } - } - }() - select { - case <-done: - return nil - case <-ctx.Done(): - close(chControl) - return ctx.Err() - case <-time.After(timeout): - close(chControl) - err := unix.Kill(c.state.Pid, unix.SIGKILL) - if err != nil && err != unix.ESRCH { - return fmt.Errorf("failed to kill process: %v", err) - } - } - - c.state.Finished = time.Now() - return nil -} - -// StopContainer stops a container. Timeout is given in seconds. -func (r *Runtime) StopContainer(ctx context.Context, c *Container, timeout int64) error { - c.opLock.Lock() - defer c.opLock.Unlock() - - // Check if the process is around before sending a signal - err := unix.Kill(c.state.Pid, 0) - if err == unix.ESRCH { - c.state.Finished = time.Now() - return nil - } - - if timeout > 0 { - if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.Path(c), "kill", c.id, c.GetStopSignal()); err != nil { - return fmt.Errorf("failed to stop container %s, %v", c.id, err) - } - err = waitContainerStop(ctx, c, time.Duration(timeout)*time.Second) - if err == nil { - return nil - } - logrus.Warnf("Stop container %q timed out: %v", c.ID(), err) - } - - if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.Path(c), "kill", "--all", c.id, "KILL"); err != nil { - return fmt.Errorf("failed to stop container %s, %v", c.id, err) - } - - return waitContainerStop(ctx, c, killContainerTimeout) -} - -// DeleteContainer deletes a container. -func (r *Runtime) DeleteContainer(c *Container) error { - c.opLock.Lock() - defer c.opLock.Unlock() - _, err := utils.ExecCmd(r.Path(c), "delete", "--force", c.id) - return err -} - -// SetStartFailed sets the container state appropriately after a start failure -func (r *Runtime) SetStartFailed(c *Container, err error) { - c.opLock.Lock() - defer c.opLock.Unlock() - // adjust finished and started times - c.state.Finished, c.state.Started = c.state.Created, c.state.Created - c.state.Error = err.Error() -} - -// UpdateStatus refreshes the status of the container. -func (r *Runtime) UpdateStatus(c *Container) error { - c.opLock.Lock() - defer c.opLock.Unlock() - out, err := exec.Command(r.Path(c), "state", c.id).CombinedOutput() - if err != nil { - // there are many code paths that could lead to have a bad state in the - // underlying runtime. - // On any error like a container went away or we rebooted and containers - // went away we do not error out stopping kubernetes to recover. - // We always populate the fields below so kube can restart/reschedule - // containers failing. - c.state.Status = ContainerStateStopped - c.state.Finished = time.Now() - c.state.ExitCode = 255 - return nil - } - if err := json.NewDecoder(bytes.NewBuffer(out)).Decode(&c.state); err != nil { - return fmt.Errorf("failed to decode container status for %s: %s", c.id, err) - } - - if c.state.Status == ContainerStateStopped { - exitFilePath := filepath.Join(r.containerExitsDir, c.id) - var fi os.FileInfo - err = kwait.ExponentialBackoff( - kwait.Backoff{ - Duration: 500 * time.Millisecond, - Factor: 1.2, - Steps: 6, - }, - func() (bool, error) { - var err error - fi, err = os.Stat(exitFilePath) - if err != nil { - // wait longer - return false, nil - } - return true, nil - }) - if err != nil { - logrus.Warnf("failed to find container exit file: %v", err) - c.state.ExitCode = -1 - } else { - c.state.Finished = getFinishedTime(fi) - statusCodeStr, err := ioutil.ReadFile(exitFilePath) - if err != nil { - return fmt.Errorf("failed to read exit file: %v", err) - } - statusCode, err := strconv.Atoi(string(statusCodeStr)) - if err != nil { - return fmt.Errorf("status code conversion failed: %v", err) - } - c.state.ExitCode = int32(statusCode) - } - - oomFilePath := filepath.Join(c.bundlePath, "oom") - if _, err = os.Stat(oomFilePath); err == nil { - c.state.OOMKilled = true - } - } - - return nil -} - -// ContainerStatus returns the state of a container. -func (r *Runtime) ContainerStatus(c *Container) *ContainerState { - c.opLock.Lock() - defer c.opLock.Unlock() - return c.state -} - -// newPipe creates a unix socket pair for communication -func newPipe() (parent *os.File, child *os.File, err error) { - fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0) - if err != nil { - return nil, nil, err - } - return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil -} - -// RuntimeReady checks if the runtime is up and ready to accept -// basic containers e.g. container only needs host network. -func (r *Runtime) RuntimeReady() (bool, error) { - return true, nil -} - -// NetworkReady checks if the runtime network is up and ready to -// accept containers which require container network. -func (r *Runtime) NetworkReady() (bool, error) { - return true, nil -} - -// PauseContainer pauses a container. -func (r *Runtime) PauseContainer(c *Container) error { - c.opLock.Lock() - defer c.opLock.Unlock() - _, err := utils.ExecCmd(r.Path(c), "pause", c.id) - return err -} - -// UnpauseContainer unpauses a container. -func (r *Runtime) UnpauseContainer(c *Container) error { - c.opLock.Lock() - defer c.opLock.Unlock() - _, err := utils.ExecCmd(r.Path(c), "resume", c.id) - return err -} |