diff options
Diffstat (limited to 'libpod/oci.go')
-rw-r--r-- | libpod/oci.go | 273 |
1 files changed, 273 insertions, 0 deletions
diff --git a/libpod/oci.go b/libpod/oci.go new file mode 100644 index 000000000..0ed1c1f66 --- /dev/null +++ b/libpod/oci.go @@ -0,0 +1,273 @@ +package libpod + +import ( + "bytes" + "encoding/json" + "fmt" + "os" + "os/exec" + "path/filepath" + "syscall" + "time" + + "github.com/containerd/cgroups" + spec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" + + // TODO import these functions into libpod and remove the import + // Trying to keep libpod from depending on CRI-O code + "github.com/kubernetes-incubator/cri-o/utils" +) + +// OCI code is undergoing heavy rewrite + +const ( + // CgroupfsCgroupsManager represents cgroupfs native cgroup manager + CgroupfsCgroupsManager = "cgroupfs" + // SystemdCgroupsManager represents systemd native cgroup manager + SystemdCgroupsManager = "systemd" + + // ContainerCreateTimeout represents the value of container creating timeout + ContainerCreateTimeout = 240 * time.Second +) + +// OCIRuntime represents an OCI-compatible runtime that libpod can call into +// to perform container operations +type OCIRuntime struct { + name string + path string + conmonPath string + conmonEnv []string + cgroupManager string + tmpDir string + exitsDir string + socketsDir string + logSizeMax int64 + noPivot bool +} + +// syncInfo is used to return data from monitor process to daemon +type syncInfo struct { + Pid int `json:"pid"` + Message string `json:"message,omitempty"` +} + +// Make a new OCI runtime with provided options +func newOCIRuntime(name string, path string, conmonPath string, conmonEnv []string, cgroupManager string, tmpDir string, logSizeMax int64, noPivotRoot bool) (*OCIRuntime, error) { + runtime := new(OCIRuntime) + runtime.name = name + runtime.path = path + runtime.conmonPath = conmonPath + runtime.conmonEnv = conmonEnv + runtime.cgroupManager = cgroupManager + runtime.tmpDir = tmpDir + runtime.logSizeMax = logSizeMax + runtime.noPivot = noPivotRoot + + runtime.exitsDir = filepath.Join(runtime.tmpDir, "exits") + runtime.socketsDir = filepath.Join(runtime.tmpDir, "socket") + + if cgroupManager != CgroupfsCgroupsManager && cgroupManager != SystemdCgroupsManager { + return nil, errors.Wrapf(ErrInvalidArg, "invalid cgroup manager specified: %s", cgroupManager) + } + + // Create the exit files and attach sockets directories + if err := os.MkdirAll(runtime.exitsDir, 0750); err != nil { + // The directory is allowed to exist + if !os.IsExist(err) { + return nil, errors.Wrapf(err, "error creating OCI runtime exit files directory %s", + runtime.exitsDir) + } + } + if err := os.MkdirAll(runtime.socketsDir, 0750); err != nil { + // The directory is allowed to exist + if !os.IsExist(err) { + return nil, errors.Wrapf(err, "error creating OCI runtime attach sockets directory %s", + runtime.socketsDir) + } + } + + return runtime, nil +} + +// newPipe creates a unix socket pair for communication +func newPipe() (parent *os.File, child *os.File, err error) { + fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0) + if err != nil { + return nil, nil, err + } + return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil +} + +// Create systemd unit name for cgroup scopes +func createUnitName(prefix string, name string) string { + return fmt.Sprintf("%s-%s.scope", prefix, name) +} + +// CreateContainer creates a container in the OCI runtime +// TODO terminal support for container +// Presently just ignoring conmon opts related to it +func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string) error { + var stderrBuf bytes.Buffer + + parentPipe, childPipe, err := newPipe() + if err != nil { + return errors.Wrapf(err, "error creating socket pair") + } + + childStartPipe, parentStartPipe, err := newPipe() + if err != nil { + return errors.Wrapf(err, "error creating socket pair for start pipe") + } + + defer parentPipe.Close() + defer parentStartPipe.Close() + + args := []string{} + if r.cgroupManager == SystemdCgroupsManager { + args = append(args, "-s") + } + args = append(args, "-c", ctr.ID()) + args = append(args, "-u", ctr.ID()) + args = append(args, "-r", r.path) + args = append(args, "-b", ctr.bundlePath()) + args = append(args, "-p", filepath.Join(ctr.state.RunDir, "pidfile")) + // TODO container log location should be configurable + // The default also likely shouldn't be this + args = append(args, "-l", filepath.Join(ctr.config.StaticDir, "ctr.log")) + args = append(args, "--exit-dir", r.exitsDir) + args = append(args, "--socket-dir-path", r.socketsDir) + if ctr.config.Spec.Process.Terminal { + args = append(args, "-t") + } else if ctr.config.Stdin { + args = append(args, "-i") + } + if r.logSizeMax >= 0 { + args = append(args, "--log-size-max", fmt.Sprintf("%v", r.logSizeMax)) + } + if r.noPivot { + args = append(args, "--no-pivot") + } + logrus.WithFields(logrus.Fields{ + "args": args, + }).Debugf("running conmon: %s", r.conmonPath) + + cmd := exec.Command(r.conmonPath, args...) + cmd.Dir = ctr.state.RunDir + cmd.SysProcAttr = &syscall.SysProcAttr{ + Setpgid: true, + } + // TODO this is probably a really bad idea for some uses + // Make this configurable + cmd.Stdin = os.Stdin + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if ctr.config.Spec.Process.Terminal { + cmd.Stderr = &stderrBuf + } + + cmd.ExtraFiles = append(cmd.ExtraFiles, childPipe, childStartPipe) + // 0, 1 and 2 are stdin, stdout and stderr + cmd.Env = append(r.conmonEnv, fmt.Sprintf("_OCI_SYNCPIPE=%d", 3)) + cmd.Env = append(cmd.Env, fmt.Sprintf("_OCI_STARTPIPE=%d", 4)) + + err = cmd.Start() + if err != nil { + childPipe.Close() + return err + } + + // We don't need childPipe on the parent side + childPipe.Close() + childStartPipe.Close() + + // Move conmon to specified cgroup + if r.cgroupManager == SystemdCgroupsManager { + logrus.Infof("Running conmon under slice %s and unitName %s", cgroupParent, createUnitName("libpod-conmon", ctr.ID())) + if err = utils.RunUnderSystemdScope(cmd.Process.Pid, cgroupParent, createUnitName("libpod-conmon", ctr.ID())); err != nil { + logrus.Warnf("Failed to add conmon to systemd sandbox cgroup: %v", err) + } + } else { + control, err := cgroups.New(cgroups.V1, cgroups.StaticPath(filepath.Join(cgroupParent, "/libpod-conmon-"+ctr.ID())), &spec.LinuxResources{}) + if err != nil { + logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err) + } else { + // XXX: this defer does nothing as the cgroup can't be deleted cause + // it contains the conmon pid in tasks + // we need to remove this defer and delete the cgroup once conmon exits + // maybe need a conmon monitor? + defer control.Delete() + if err := control.Add(cgroups.Process{Pid: cmd.Process.Pid}); err != nil { + logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err) + } + } + } + + /* We set the cgroup, now the child can start creating children */ + someData := []byte{0} + _, err = parentStartPipe.Write(someData) + if err != nil { + return err + } + + /* Wait for initial setup and fork, and reap child */ + err = cmd.Wait() + if err != nil { + return err + } + + // TODO should do a defer r.deleteContainer(ctr) here if err != nil + // Need deleteContainer to be working first, though... + + // Wait to get container pid from conmon + type syncStruct struct { + si *syncInfo + err error + } + ch := make(chan syncStruct) + go func() { + var si *syncInfo + if err = json.NewDecoder(parentPipe).Decode(&si); err != nil { + ch <- syncStruct{err: err} + return + } + ch <- syncStruct{si: si} + }() + + select { + case ss := <-ch: + if ss.err != nil { + return errors.Wrapf(ss.err, "error reading container (probably exited) json message") + } + logrus.Debugf("Received container pid: %d", ss.si.Pid) + if ss.si.Pid == -1 { + if ss.si.Message != "" { + return errors.Wrapf(ErrInternal, "container create failed: %s", ss.si.Message) + } + return errors.Wrapf(ErrInternal, "container create failed") + } + case <-time.After(ContainerCreateTimeout): + return errors.Wrapf(ErrInternal, "container creation timeout") + } + return nil +} + +// updateContainerStatus retrieves the current status of the container from the +// runtime +func (r *OCIRuntime) updateContainerStatus(ctr *Container) error { + return ErrNotImplemented +} + +// startContainer starts the given container +func (r *OCIRuntime) startContainer(ctr *Container) error { + // TODO: streams should probably *not* be our STDIN/OUT/ERR - redirect to buffers? + if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.path, "start", ctr.ID()); err != nil { + return err + } + + // TODO record start time in container struct + + return nil +} |