summaryrefslogtreecommitdiff
path: root/libpod/oci.go
diff options
context:
space:
mode:
Diffstat (limited to 'libpod/oci.go')
-rw-r--r--libpod/oci.go273
1 files changed, 273 insertions, 0 deletions
diff --git a/libpod/oci.go b/libpod/oci.go
new file mode 100644
index 000000000..0ed1c1f66
--- /dev/null
+++ b/libpod/oci.go
@@ -0,0 +1,273 @@
+package libpod
+
+import (
+ "bytes"
+ "encoding/json"
+ "fmt"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "syscall"
+ "time"
+
+ "github.com/containerd/cgroups"
+ spec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/sys/unix"
+
+ // TODO import these functions into libpod and remove the import
+ // Trying to keep libpod from depending on CRI-O code
+ "github.com/kubernetes-incubator/cri-o/utils"
+)
+
+// OCI code is undergoing heavy rewrite
+
+const (
+ // CgroupfsCgroupsManager represents cgroupfs native cgroup manager
+ CgroupfsCgroupsManager = "cgroupfs"
+ // SystemdCgroupsManager represents systemd native cgroup manager
+ SystemdCgroupsManager = "systemd"
+
+ // ContainerCreateTimeout represents the value of container creating timeout
+ ContainerCreateTimeout = 240 * time.Second
+)
+
+// OCIRuntime represents an OCI-compatible runtime that libpod can call into
+// to perform container operations
+type OCIRuntime struct {
+ name string
+ path string
+ conmonPath string
+ conmonEnv []string
+ cgroupManager string
+ tmpDir string
+ exitsDir string
+ socketsDir string
+ logSizeMax int64
+ noPivot bool
+}
+
+// syncInfo is used to return data from monitor process to daemon
+type syncInfo struct {
+ Pid int `json:"pid"`
+ Message string `json:"message,omitempty"`
+}
+
+// Make a new OCI runtime with provided options
+func newOCIRuntime(name string, path string, conmonPath string, conmonEnv []string, cgroupManager string, tmpDir string, logSizeMax int64, noPivotRoot bool) (*OCIRuntime, error) {
+ runtime := new(OCIRuntime)
+ runtime.name = name
+ runtime.path = path
+ runtime.conmonPath = conmonPath
+ runtime.conmonEnv = conmonEnv
+ runtime.cgroupManager = cgroupManager
+ runtime.tmpDir = tmpDir
+ runtime.logSizeMax = logSizeMax
+ runtime.noPivot = noPivotRoot
+
+ runtime.exitsDir = filepath.Join(runtime.tmpDir, "exits")
+ runtime.socketsDir = filepath.Join(runtime.tmpDir, "socket")
+
+ if cgroupManager != CgroupfsCgroupsManager && cgroupManager != SystemdCgroupsManager {
+ return nil, errors.Wrapf(ErrInvalidArg, "invalid cgroup manager specified: %s", cgroupManager)
+ }
+
+ // Create the exit files and attach sockets directories
+ if err := os.MkdirAll(runtime.exitsDir, 0750); err != nil {
+ // The directory is allowed to exist
+ if !os.IsExist(err) {
+ return nil, errors.Wrapf(err, "error creating OCI runtime exit files directory %s",
+ runtime.exitsDir)
+ }
+ }
+ if err := os.MkdirAll(runtime.socketsDir, 0750); err != nil {
+ // The directory is allowed to exist
+ if !os.IsExist(err) {
+ return nil, errors.Wrapf(err, "error creating OCI runtime attach sockets directory %s",
+ runtime.socketsDir)
+ }
+ }
+
+ return runtime, nil
+}
+
+// newPipe creates a unix socket pair for communication
+func newPipe() (parent *os.File, child *os.File, err error) {
+ fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0)
+ if err != nil {
+ return nil, nil, err
+ }
+ return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil
+}
+
+// Create systemd unit name for cgroup scopes
+func createUnitName(prefix string, name string) string {
+ return fmt.Sprintf("%s-%s.scope", prefix, name)
+}
+
+// CreateContainer creates a container in the OCI runtime
+// TODO terminal support for container
+// Presently just ignoring conmon opts related to it
+func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string) error {
+ var stderrBuf bytes.Buffer
+
+ parentPipe, childPipe, err := newPipe()
+ if err != nil {
+ return errors.Wrapf(err, "error creating socket pair")
+ }
+
+ childStartPipe, parentStartPipe, err := newPipe()
+ if err != nil {
+ return errors.Wrapf(err, "error creating socket pair for start pipe")
+ }
+
+ defer parentPipe.Close()
+ defer parentStartPipe.Close()
+
+ args := []string{}
+ if r.cgroupManager == SystemdCgroupsManager {
+ args = append(args, "-s")
+ }
+ args = append(args, "-c", ctr.ID())
+ args = append(args, "-u", ctr.ID())
+ args = append(args, "-r", r.path)
+ args = append(args, "-b", ctr.bundlePath())
+ args = append(args, "-p", filepath.Join(ctr.state.RunDir, "pidfile"))
+ // TODO container log location should be configurable
+ // The default also likely shouldn't be this
+ args = append(args, "-l", filepath.Join(ctr.config.StaticDir, "ctr.log"))
+ args = append(args, "--exit-dir", r.exitsDir)
+ args = append(args, "--socket-dir-path", r.socketsDir)
+ if ctr.config.Spec.Process.Terminal {
+ args = append(args, "-t")
+ } else if ctr.config.Stdin {
+ args = append(args, "-i")
+ }
+ if r.logSizeMax >= 0 {
+ args = append(args, "--log-size-max", fmt.Sprintf("%v", r.logSizeMax))
+ }
+ if r.noPivot {
+ args = append(args, "--no-pivot")
+ }
+ logrus.WithFields(logrus.Fields{
+ "args": args,
+ }).Debugf("running conmon: %s", r.conmonPath)
+
+ cmd := exec.Command(r.conmonPath, args...)
+ cmd.Dir = ctr.state.RunDir
+ cmd.SysProcAttr = &syscall.SysProcAttr{
+ Setpgid: true,
+ }
+ // TODO this is probably a really bad idea for some uses
+ // Make this configurable
+ cmd.Stdin = os.Stdin
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = os.Stderr
+ if ctr.config.Spec.Process.Terminal {
+ cmd.Stderr = &stderrBuf
+ }
+
+ cmd.ExtraFiles = append(cmd.ExtraFiles, childPipe, childStartPipe)
+ // 0, 1 and 2 are stdin, stdout and stderr
+ cmd.Env = append(r.conmonEnv, fmt.Sprintf("_OCI_SYNCPIPE=%d", 3))
+ cmd.Env = append(cmd.Env, fmt.Sprintf("_OCI_STARTPIPE=%d", 4))
+
+ err = cmd.Start()
+ if err != nil {
+ childPipe.Close()
+ return err
+ }
+
+ // We don't need childPipe on the parent side
+ childPipe.Close()
+ childStartPipe.Close()
+
+ // Move conmon to specified cgroup
+ if r.cgroupManager == SystemdCgroupsManager {
+ logrus.Infof("Running conmon under slice %s and unitName %s", cgroupParent, createUnitName("libpod-conmon", ctr.ID()))
+ if err = utils.RunUnderSystemdScope(cmd.Process.Pid, cgroupParent, createUnitName("libpod-conmon", ctr.ID())); err != nil {
+ logrus.Warnf("Failed to add conmon to systemd sandbox cgroup: %v", err)
+ }
+ } else {
+ control, err := cgroups.New(cgroups.V1, cgroups.StaticPath(filepath.Join(cgroupParent, "/libpod-conmon-"+ctr.ID())), &spec.LinuxResources{})
+ if err != nil {
+ logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err)
+ } else {
+ // XXX: this defer does nothing as the cgroup can't be deleted cause
+ // it contains the conmon pid in tasks
+ // we need to remove this defer and delete the cgroup once conmon exits
+ // maybe need a conmon monitor?
+ defer control.Delete()
+ if err := control.Add(cgroups.Process{Pid: cmd.Process.Pid}); err != nil {
+ logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err)
+ }
+ }
+ }
+
+ /* We set the cgroup, now the child can start creating children */
+ someData := []byte{0}
+ _, err = parentStartPipe.Write(someData)
+ if err != nil {
+ return err
+ }
+
+ /* Wait for initial setup and fork, and reap child */
+ err = cmd.Wait()
+ if err != nil {
+ return err
+ }
+
+ // TODO should do a defer r.deleteContainer(ctr) here if err != nil
+ // Need deleteContainer to be working first, though...
+
+ // Wait to get container pid from conmon
+ type syncStruct struct {
+ si *syncInfo
+ err error
+ }
+ ch := make(chan syncStruct)
+ go func() {
+ var si *syncInfo
+ if err = json.NewDecoder(parentPipe).Decode(&si); err != nil {
+ ch <- syncStruct{err: err}
+ return
+ }
+ ch <- syncStruct{si: si}
+ }()
+
+ select {
+ case ss := <-ch:
+ if ss.err != nil {
+ return errors.Wrapf(ss.err, "error reading container (probably exited) json message")
+ }
+ logrus.Debugf("Received container pid: %d", ss.si.Pid)
+ if ss.si.Pid == -1 {
+ if ss.si.Message != "" {
+ return errors.Wrapf(ErrInternal, "container create failed: %s", ss.si.Message)
+ }
+ return errors.Wrapf(ErrInternal, "container create failed")
+ }
+ case <-time.After(ContainerCreateTimeout):
+ return errors.Wrapf(ErrInternal, "container creation timeout")
+ }
+ return nil
+}
+
+// updateContainerStatus retrieves the current status of the container from the
+// runtime
+func (r *OCIRuntime) updateContainerStatus(ctr *Container) error {
+ return ErrNotImplemented
+}
+
+// startContainer starts the given container
+func (r *OCIRuntime) startContainer(ctr *Container) error {
+ // TODO: streams should probably *not* be our STDIN/OUT/ERR - redirect to buffers?
+ if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.path, "start", ctr.ID()); err != nil {
+ return err
+ }
+
+ // TODO record start time in container struct
+
+ return nil
+}