summaryrefslogtreecommitdiff
path: root/server
diff options
context:
space:
mode:
authorMatthew Heon <matthew.heon@gmail.com>2017-11-01 11:24:59 -0400
committerMatthew Heon <matthew.heon@gmail.com>2017-11-01 11:24:59 -0400
commita031b83a09a8628435317a03f199cdc18b78262f (patch)
treebc017a96769ce6de33745b8b0b1304ccf38e9df0 /server
parent2b74391cd5281f6fdf391ff8ad50fd1490f6bf89 (diff)
downloadpodman-a031b83a09a8628435317a03f199cdc18b78262f.tar.gz
podman-a031b83a09a8628435317a03f199cdc18b78262f.tar.bz2
podman-a031b83a09a8628435317a03f199cdc18b78262f.zip
Initial checkin from CRI-O repo
Signed-off-by: Matthew Heon <matthew.heon@gmail.com>
Diffstat (limited to 'server')
-rw-r--r--server/apparmor/aaparser.go89
-rw-r--r--server/apparmor/apparmor_common.go14
-rw-r--r--server/apparmor/apparmor_supported.go145
-rw-r--r--server/apparmor/apparmor_unsupported.go18
-rw-r--r--server/apparmor/template.go45
-rw-r--r--server/config.go112
-rw-r--r--server/container_attach.go147
-rw-r--r--server/container_create.go1215
-rw-r--r--server/container_exec.go108
-rw-r--r--server/container_execsync.go46
-rw-r--r--server/container_list.go112
-rw-r--r--server/container_portforward.go91
-rw-r--r--server/container_remove.go20
-rw-r--r--server/container_start.go43
-rw-r--r--server/container_stats.go14
-rw-r--r--server/container_stats_list.go13
-rw-r--r--server/container_status.go102
-rw-r--r--server/container_stop.go19
-rw-r--r--server/container_updateruntimeconfig.go11
-rw-r--r--server/image_fs_info.go13
-rw-r--r--server/image_list.go41
-rw-r--r--server/image_pull.go108
-rw-r--r--server/image_remove.go52
-rw-r--r--server/image_status.go53
-rw-r--r--server/inspect.go105
-rw-r--r--server/inspect_test.go235
-rw-r--r--server/naming.go86
-rw-r--r--server/runtime_status.go41
-rw-r--r--server/sandbox_list.go94
-rw-r--r--server/sandbox_network.go70
-rw-r--r--server/sandbox_remove.go98
-rw-r--r--server/sandbox_run.go615
-rw-r--r--server/sandbox_status.go41
-rw-r--r--server/sandbox_stop.go114
-rw-r--r--server/seccomp/seccomp.go165
-rw-r--r--server/seccomp/seccomp_unsupported.go20
-rw-r--r--server/seccomp/types.go93
-rw-r--r--server/secrets.go162
-rw-r--r--server/server.go423
-rw-r--r--server/utils.go183
-rw-r--r--server/version.go27
41 files changed, 5203 insertions, 0 deletions
diff --git a/server/apparmor/aaparser.go b/server/apparmor/aaparser.go
new file mode 100644
index 000000000..7f0f02ac5
--- /dev/null
+++ b/server/apparmor/aaparser.go
@@ -0,0 +1,89 @@
+// +build apparmor
+
+package apparmor
+
+import (
+ "fmt"
+ "os/exec"
+ "strconv"
+ "strings"
+)
+
+const (
+ binary = "apparmor_parser"
+)
+
+// GetVersion returns the major and minor version of apparmor_parser.
+func GetVersion() (int, error) {
+ output, err := cmd("", "--version")
+ if err != nil {
+ return -1, err
+ }
+
+ return parseVersion(output)
+}
+
+// LoadProfile runs `apparmor_parser -r` on a specified apparmor profile to
+// replace the profile.
+func LoadProfile(profilePath string) error {
+ _, err := cmd("", "-r", profilePath)
+ return err
+}
+
+// cmd runs `apparmor_parser` with the passed arguments.
+func cmd(dir string, arg ...string) (string, error) {
+ c := exec.Command(binary, arg...)
+ c.Dir = dir
+
+ output, err := c.CombinedOutput()
+ if err != nil {
+ return "", fmt.Errorf("running `%s %s` failed with output: %s\nerror: %v", c.Path, strings.Join(c.Args, " "), output, err)
+ }
+
+ return string(output), nil
+}
+
+// parseVersion takes the output from `apparmor_parser --version` and returns
+// a representation of the {major, minor, patch} version as a single number of
+// the form MMmmPPP {major, minor, patch}.
+func parseVersion(output string) (int, error) {
+ // output is in the form of the following:
+ // AppArmor parser version 2.9.1
+ // Copyright (C) 1999-2008 Novell Inc.
+ // Copyright 2009-2012 Canonical Ltd.
+
+ lines := strings.SplitN(output, "\n", 2)
+ words := strings.Split(lines[0], " ")
+ version := words[len(words)-1]
+
+ // split by major minor version
+ v := strings.Split(version, ".")
+ if len(v) == 0 || len(v) > 3 {
+ return -1, fmt.Errorf("parsing version failed for output: `%s`", output)
+ }
+
+ // Default the versions to 0.
+ var majorVersion, minorVersion, patchLevel int
+
+ majorVersion, err := strconv.Atoi(v[0])
+ if err != nil {
+ return -1, err
+ }
+
+ if len(v) > 1 {
+ minorVersion, err = strconv.Atoi(v[1])
+ if err != nil {
+ return -1, err
+ }
+ }
+ if len(v) > 2 {
+ patchLevel, err = strconv.Atoi(v[2])
+ if err != nil {
+ return -1, err
+ }
+ }
+
+ // major*10^5 + minor*10^3 + patch*10^0
+ numericVersion := majorVersion*1e5 + minorVersion*1e3 + patchLevel
+ return numericVersion, nil
+}
diff --git a/server/apparmor/apparmor_common.go b/server/apparmor/apparmor_common.go
new file mode 100644
index 000000000..6366a66e6
--- /dev/null
+++ b/server/apparmor/apparmor_common.go
@@ -0,0 +1,14 @@
+package apparmor
+
+const (
+ // DefaultApparmorProfile is the name of default apparmor profile name.
+ DefaultApparmorProfile = "crio-default"
+
+ // ContainerAnnotationKeyPrefix is the prefix to an annotation key specifying a container profile.
+ ContainerAnnotationKeyPrefix = "container.apparmor.security.beta.kubernetes.io/"
+
+ // ProfileRuntimeDefault is he profile specifying the runtime default.
+ ProfileRuntimeDefault = "runtime/default"
+ // ProfileNamePrefix is the prefix for specifying profiles loaded on the node.
+ ProfileNamePrefix = "localhost/"
+)
diff --git a/server/apparmor/apparmor_supported.go b/server/apparmor/apparmor_supported.go
new file mode 100644
index 000000000..d765c9de9
--- /dev/null
+++ b/server/apparmor/apparmor_supported.go
@@ -0,0 +1,145 @@
+// +build apparmor
+
+package apparmor
+
+import (
+ "bufio"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "os"
+ "path"
+ "strings"
+
+ "github.com/docker/docker/utils/templates"
+ "github.com/opencontainers/runc/libcontainer/apparmor"
+)
+
+const (
+ // profileDirectory is the file store for apparmor profiles and macros.
+ profileDirectory = "/etc/apparmor.d"
+)
+
+// profileData holds information about the given profile for generation.
+type profileData struct {
+ // Name is profile name.
+ Name string
+ // Imports defines the apparmor functions to import, before defining the profile.
+ Imports []string
+ // InnerImports defines the apparmor functions to import in the profile.
+ InnerImports []string
+ // Version is the {major, minor, patch} version of apparmor_parser as a single number.
+ Version int
+}
+
+// EnsureDefaultApparmorProfile loads default apparmor profile, if it is not loaded.
+func EnsureDefaultApparmorProfile() error {
+ if apparmor.IsEnabled() {
+ loaded, err := IsLoaded(DefaultApparmorProfile)
+ if err != nil {
+ return fmt.Errorf("Could not check if %s AppArmor profile was loaded: %s", DefaultApparmorProfile, err)
+ }
+
+ // Nothing to do.
+ if loaded {
+ return nil
+ }
+
+ // Load the profile.
+ if err := InstallDefault(DefaultApparmorProfile); err != nil {
+ return fmt.Errorf("AppArmor enabled on system but the %s profile could not be loaded.", DefaultApparmorProfile)
+ }
+ }
+
+ return nil
+}
+
+// IsEnabled returns true if apparmor is enabled for the host.
+func IsEnabled() bool {
+ return apparmor.IsEnabled()
+}
+
+// GetProfileNameFromPodAnnotations gets the name of the profile to use with container from
+// pod annotations
+func GetProfileNameFromPodAnnotations(annotations map[string]string, containerName string) string {
+ return annotations[ContainerAnnotationKeyPrefix+containerName]
+}
+
+// InstallDefault generates a default profile in a temp directory determined by
+// os.TempDir(), then loads the profile into the kernel using 'apparmor_parser'.
+func InstallDefault(name string) error {
+ p := profileData{
+ Name: name,
+ }
+
+ // Install to a temporary directory.
+ f, err := ioutil.TempFile("", name)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+
+ if err := p.generateDefault(f); err != nil {
+ return err
+ }
+
+ return LoadProfile(f.Name())
+}
+
+// IsLoaded checks if a profile with the given name has been loaded into the
+// kernel.
+func IsLoaded(name string) (bool, error) {
+ file, err := os.Open("/sys/kernel/security/apparmor/profiles")
+ if err != nil {
+ return false, err
+ }
+ defer file.Close()
+
+ r := bufio.NewReader(file)
+ for {
+ p, err := r.ReadString('\n')
+ if err == io.EOF {
+ break
+ }
+ if err != nil {
+ return false, err
+ }
+ if strings.HasPrefix(p, name+" ") {
+ return true, nil
+ }
+ }
+
+ return false, nil
+}
+
+// generateDefault creates an apparmor profile from ProfileData.
+func (p *profileData) generateDefault(out io.Writer) error {
+ compiled, err := templates.NewParse("apparmor_profile", baseTemplate)
+ if err != nil {
+ return err
+ }
+
+ if macroExists("tunables/global") {
+ p.Imports = append(p.Imports, "#include <tunables/global>")
+ } else {
+ p.Imports = append(p.Imports, "@{PROC}=/proc/")
+ }
+
+ if macroExists("abstractions/base") {
+ p.InnerImports = append(p.InnerImports, "#include <abstractions/base>")
+ }
+
+ ver, err := GetVersion()
+ if err != nil {
+ return err
+ }
+ p.Version = ver
+
+ return compiled.Execute(out, p)
+}
+
+// macrosExists checks if the passed macro exists.
+func macroExists(m string) bool {
+ _, err := os.Stat(path.Join(profileDirectory, m))
+ return err == nil
+}
diff --git a/server/apparmor/apparmor_unsupported.go b/server/apparmor/apparmor_unsupported.go
new file mode 100644
index 000000000..fbd1d87a0
--- /dev/null
+++ b/server/apparmor/apparmor_unsupported.go
@@ -0,0 +1,18 @@
+// +build !apparmor
+
+package apparmor
+
+// IsEnabled returns false, when build without apparmor build tag.
+func IsEnabled() bool {
+ return false
+}
+
+// EnsureDefaultApparmorProfile dose nothing, when build without apparmor build tag.
+func EnsureDefaultApparmorProfile() error {
+ return nil
+}
+
+// GetProfileNameFromPodAnnotations dose nothing, when build without apparmor build tag.
+func GetProfileNameFromPodAnnotations(annotations map[string]string, containerName string) string {
+ return ""
+}
diff --git a/server/apparmor/template.go b/server/apparmor/template.go
new file mode 100644
index 000000000..6656ff61c
--- /dev/null
+++ b/server/apparmor/template.go
@@ -0,0 +1,45 @@
+// +build apparmor
+
+package apparmor
+
+// baseTemplate defines the default apparmor profile for containers.
+const baseTemplate = `
+{{range $value := .Imports}}
+{{$value}}
+{{end}}
+
+profile {{.Name}} flags=(attach_disconnected,mediate_deleted) {
+{{range $value := .InnerImports}}
+ {{$value}}
+{{end}}
+
+ network,
+ capability,
+ file,
+ umount,
+
+ deny @{PROC}/* w, # deny write for all files directly in /proc (not in a subdir)
+ # deny write to files not in /proc/<number>/** or /proc/sys/**
+ deny @{PROC}/{[^1-9],[^1-9][^0-9],[^1-9s][^0-9y][^0-9s],[^1-9][^0-9][^0-9][^0-9]*}/** w,
+ deny @{PROC}/sys/[^k]** w, # deny /proc/sys except /proc/sys/k* (effectively /proc/sys/kernel)
+ deny @{PROC}/sys/kernel/{?,??,[^s][^h][^m]**} w, # deny everything except shm* in /proc/sys/kernel/
+ deny @{PROC}/sysrq-trigger rwklx,
+ deny @{PROC}/mem rwklx,
+ deny @{PROC}/kmem rwklx,
+ deny @{PROC}/kcore rwklx,
+
+ deny mount,
+
+ deny /sys/[^f]*/** wklx,
+ deny /sys/f[^s]*/** wklx,
+ deny /sys/fs/[^c]*/** wklx,
+ deny /sys/fs/c[^g]*/** wklx,
+ deny /sys/fs/cg[^r]*/** wklx,
+ deny /sys/firmware/** rwklx,
+ deny /sys/kernel/security/** rwklx,
+
+{{if ge .Version 208095}}
+ ptrace (trace,read) peer={{.Name}},
+{{end}}
+}
+`
diff --git a/server/config.go b/server/config.go
new file mode 100644
index 000000000..6c2d26cd0
--- /dev/null
+++ b/server/config.go
@@ -0,0 +1,112 @@
+package server
+
+import (
+ "bytes"
+ "io/ioutil"
+
+ "github.com/BurntSushi/toml"
+ "github.com/kubernetes-incubator/cri-o/libkpod"
+)
+
+//CrioConfigPath is the default location for the conf file
+const CrioConfigPath = "/etc/crio/crio.conf"
+
+// Config represents the entire set of configuration values that can be set for
+// the server. This is intended to be loaded from a toml-encoded config file.
+type Config struct {
+ libkpod.Config
+ APIConfig
+}
+
+// APIConfig represents the "crio.api" TOML config table.
+type APIConfig struct {
+ // Listen is the path to the AF_LOCAL socket on which cri-o will listen.
+ // This may support proto://addr formats later, but currently this is just
+ // a path.
+ Listen string `toml:"listen"`
+
+ // StreamAddress is the IP address on which the stream server will listen.
+ StreamAddress string `toml:"stream_address"`
+
+ // StreamPort is the port on which the stream server will listen.
+ StreamPort string `toml:"stream_port"`
+}
+
+// tomlConfig is another way of looking at a Config, which is
+// TOML-friendly (it has all of the explicit tables). It's just used for
+// conversions.
+type tomlConfig struct {
+ Crio struct {
+ libkpod.RootConfig
+ API struct{ APIConfig } `toml:"api"`
+ Runtime struct{ libkpod.RuntimeConfig } `toml:"runtime"`
+ Image struct{ libkpod.ImageConfig } `toml:"image"`
+ Network struct{ libkpod.NetworkConfig } `toml:"network"`
+ } `toml:"crio"`
+}
+
+func (t *tomlConfig) toConfig(c *Config) {
+ c.RootConfig = t.Crio.RootConfig
+ c.APIConfig = t.Crio.API.APIConfig
+ c.RuntimeConfig = t.Crio.Runtime.RuntimeConfig
+ c.ImageConfig = t.Crio.Image.ImageConfig
+ c.NetworkConfig = t.Crio.Network.NetworkConfig
+}
+
+func (t *tomlConfig) fromConfig(c *Config) {
+ t.Crio.RootConfig = c.RootConfig
+ t.Crio.API.APIConfig = c.APIConfig
+ t.Crio.Runtime.RuntimeConfig = c.RuntimeConfig
+ t.Crio.Image.ImageConfig = c.ImageConfig
+ t.Crio.Network.NetworkConfig = c.NetworkConfig
+}
+
+// UpdateFromFile populates the Config from the TOML-encoded file at the given path.
+// Returns errors encountered when reading or parsing the files, or nil
+// otherwise.
+func (c *Config) UpdateFromFile(path string) error {
+ data, err := ioutil.ReadFile(path)
+ if err != nil {
+ return err
+ }
+
+ t := new(tomlConfig)
+ t.fromConfig(c)
+
+ _, err = toml.Decode(string(data), t)
+ if err != nil {
+ return err
+ }
+
+ t.toConfig(c)
+ return nil
+}
+
+// ToFile outputs the given Config as a TOML-encoded file at the given path.
+// Returns errors encountered when generating or writing the file, or nil
+// otherwise.
+func (c *Config) ToFile(path string) error {
+ var w bytes.Buffer
+ e := toml.NewEncoder(&w)
+
+ t := new(tomlConfig)
+ t.fromConfig(c)
+
+ if err := e.Encode(*t); err != nil {
+ return err
+ }
+
+ return ioutil.WriteFile(path, w.Bytes(), 0644)
+}
+
+// DefaultConfig returns the default configuration for crio.
+func DefaultConfig() *Config {
+ return &Config{
+ Config: *libkpod.DefaultConfig(),
+ APIConfig: APIConfig{
+ Listen: "/var/run/crio.sock",
+ StreamAddress: "",
+ StreamPort: "10010",
+ },
+ }
+}
diff --git a/server/container_attach.go b/server/container_attach.go
new file mode 100644
index 000000000..6a822858d
--- /dev/null
+++ b/server/container_attach.go
@@ -0,0 +1,147 @@
+package server
+
+import (
+ "fmt"
+ "io"
+ "net"
+ "os"
+ "path/filepath"
+
+ "github.com/kubernetes-incubator/cri-o/oci"
+ "github.com/kubernetes-incubator/cri-o/utils"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/net/context"
+ "golang.org/x/sys/unix"
+ "k8s.io/client-go/tools/remotecommand"
+ pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
+ kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
+)
+
+/* Sync with stdpipe_t in conmon.c */
+const (
+ AttachPipeStdin = 1
+ AttachPipeStdout = 2
+ AttachPipeStderr = 3
+)
+
+// Attach prepares a streaming endpoint to attach to a running container.
+func (s *Server) Attach(ctx context.Context, req *pb.AttachRequest) (*pb.AttachResponse, error) {
+ logrus.Debugf("AttachRequest %+v", req)
+
+ resp, err := s.GetAttach(req)
+ if err != nil {
+ return nil, fmt.Errorf("unable to prepare attach endpoint")
+ }
+
+ return resp, nil
+}
+
+// Attach endpoint for streaming.Runtime
+func (ss streamService) Attach(containerID string, inputStream io.Reader, outputStream, errorStream io.WriteCloser, tty bool, resize <-chan remotecommand.TerminalSize) error {
+ c := ss.runtimeServer.GetContainer(containerID)
+
+ if c == nil {
+ return fmt.Errorf("could not find container %q", containerID)
+ }
+
+ if err := ss.runtimeServer.Runtime().UpdateStatus(c); err != nil {
+ return err
+ }
+
+ cState := ss.runtimeServer.Runtime().ContainerStatus(c)
+ if !(cState.Status == oci.ContainerStateRunning || cState.Status == oci.ContainerStateCreated) {
+ return fmt.Errorf("container is not created or running")
+ }
+
+ controlPath := filepath.Join(c.BundlePath(), "ctl")
+ controlFile, err := os.OpenFile(controlPath, unix.O_WRONLY, 0)
+ if err != nil {
+ return fmt.Errorf("failed to open container ctl file: %v", err)
+ }
+
+ kubecontainer.HandleResizing(resize, func(size remotecommand.TerminalSize) {
+ logrus.Infof("Got a resize event: %+v", size)
+ _, err := fmt.Fprintf(controlFile, "%d %d %d\n", 1, size.Height, size.Width)
+ if err != nil {
+ logrus.Infof("Failed to write to control file to resize terminal: %v", err)
+ }
+ })
+
+ attachSocketPath := filepath.Join(oci.ContainerAttachSocketDir, c.ID(), "attach")
+ conn, err := net.DialUnix("unixpacket", nil, &net.UnixAddr{Name: attachSocketPath, Net: "unixpacket"})
+ if err != nil {
+ return fmt.Errorf("failed to connect to container %s attach socket: %v", c.ID(), err)
+ }
+ defer conn.Close()
+
+ receiveStdout := make(chan error)
+ if outputStream != nil || errorStream != nil {
+ go func() {
+ receiveStdout <- redirectResponseToOutputStreams(outputStream, errorStream, conn)
+ }()
+ }
+
+ stdinDone := make(chan error)
+ go func() {
+ var err error
+ if inputStream != nil {
+ _, err = utils.CopyDetachable(conn, inputStream, nil)
+ conn.CloseWrite()
+ }
+ stdinDone <- err
+ }()
+
+ select {
+ case err := <-receiveStdout:
+ return err
+ case err := <-stdinDone:
+ if _, ok := err.(utils.DetachError); ok {
+ return nil
+ }
+ if outputStream != nil || errorStream != nil {
+ return <-receiveStdout
+ }
+ }
+
+ return nil
+}
+
+func redirectResponseToOutputStreams(outputStream, errorStream io.Writer, conn io.Reader) error {
+ var err error
+ buf := make([]byte, 8192+1) /* Sync with conmon STDIO_BUF_SIZE */
+
+ for {
+ nr, er := conn.Read(buf)
+ if nr > 0 {
+ var dst io.Writer
+ if buf[0] == AttachPipeStdout {
+ dst = outputStream
+ } else if buf[0] == AttachPipeStderr {
+ dst = errorStream
+ } else {
+ logrus.Infof("Got unexpected attach type %+d", buf[0])
+ }
+
+ if dst != nil {
+ nw, ew := dst.Write(buf[1:nr])
+ if ew != nil {
+ err = ew
+ break
+ }
+ if nr != nw+1 {
+ err = io.ErrShortWrite
+ break
+ }
+ }
+ }
+ if er == io.EOF {
+ break
+ }
+ if er != nil {
+ err = er
+ break
+ }
+ }
+
+ return err
+}
diff --git a/server/container_create.go b/server/container_create.go
new file mode 100644
index 000000000..b28498c8d
--- /dev/null
+++ b/server/container_create.go
@@ -0,0 +1,1215 @@
+package server
+
+import (
+ "encoding/json"
+ "errors"
+ "fmt"
+ "io"
+ "os"
+ "path/filepath"
+ "regexp"
+ "sort"
+ "strconv"
+ "strings"
+ "time"
+
+ "github.com/docker/distribution/reference"
+ "github.com/docker/docker/pkg/stringid"
+ "github.com/docker/docker/pkg/symlink"
+ "github.com/kubernetes-incubator/cri-o/libkpod"
+ "github.com/kubernetes-incubator/cri-o/libkpod/sandbox"
+ "github.com/kubernetes-incubator/cri-o/oci"
+ "github.com/kubernetes-incubator/cri-o/pkg/annotations"
+ "github.com/kubernetes-incubator/cri-o/pkg/storage"
+ "github.com/kubernetes-incubator/cri-o/server/apparmor"
+ "github.com/kubernetes-incubator/cri-o/server/seccomp"
+ "github.com/opencontainers/image-spec/specs-go/v1"
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/devices"
+ "github.com/opencontainers/runc/libcontainer/user"
+ rspec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/opencontainers/runtime-tools/generate"
+ "github.com/opencontainers/selinux/go-selinux/label"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/net/context"
+ "golang.org/x/sys/unix"
+ pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
+)
+
+const (
+ seccompUnconfined = "unconfined"
+ seccompRuntimeDefault = "runtime/default"
+ seccompDockerDefault = "docker/default"
+ seccompLocalhostPrefix = "localhost/"
+
+ scopePrefix = "crio"
+ defaultCgroupfsParent = "/crio"
+ defaultSystemdParent = "system.slice"
+)
+
+type orderedMounts []rspec.Mount
+
+// Len returns the number of mounts. Used in sorting.
+func (m orderedMounts) Len() int {
+ return len(m)
+}
+
+// Less returns true if the number of parts (a/b/c would be 3 parts) in the
+// mount indexed by parameter 1 is less than that of the mount indexed by
+// parameter 2. Used in sorting.
+func (m orderedMounts) Less(i, j int) bool {
+ return m.parts(i) < m.parts(j)
+}
+
+// Swap swaps two items in an array of mounts. Used in sorting
+func (m orderedMounts) Swap(i, j int) {
+ m[i], m[j] = m[j], m[i]
+}
+
+// parts returns the number of parts in the destination of a mount. Used in sorting.
+func (m orderedMounts) parts(i int) int {
+ return strings.Count(filepath.Clean(m[i].Destination), string(os.PathSeparator))
+}
+
+func addOCIBindMounts(mountLabel string, containerConfig *pb.ContainerConfig, specgen *generate.Generator) ([]oci.ContainerVolume, []rspec.Mount, error) {
+ volumes := []oci.ContainerVolume{}
+ ociMounts := []rspec.Mount{}
+ mounts := containerConfig.GetMounts()
+ for _, mount := range mounts {
+ dest := mount.ContainerPath
+ if dest == "" {
+ return nil, nil, fmt.Errorf("Mount.ContainerPath is empty")
+ }
+
+ src := mount.HostPath
+ if src == "" {
+ return nil, nil, fmt.Errorf("Mount.HostPath is empty")
+ }
+
+ if _, err := os.Stat(src); err != nil && os.IsNotExist(err) {
+ if err1 := os.MkdirAll(src, 0644); err1 != nil {
+ return nil, nil, fmt.Errorf("Failed to mkdir %s: %s", src, err)
+ }
+ }
+
+ src, err := resolveSymbolicLink(src)
+ if err != nil {
+ return nil, nil, fmt.Errorf("failed to resolve symlink %q: %v", src, err)
+ }
+
+ options := []string{"rw"}
+ if mount.Readonly {
+ options = []string{"ro"}
+ }
+ options = append(options, []string{"rbind", "rprivate"}...)
+
+ if mount.SelinuxRelabel {
+ // Need a way in kubernetes to determine if the volume is shared or private
+ if err := label.Relabel(src, mountLabel, true); err != nil && err != unix.ENOTSUP {
+ return nil, nil, fmt.Errorf("relabel failed %s: %v", src, err)
+ }
+ }
+
+ volumes = append(volumes, oci.ContainerVolume{
+ ContainerPath: dest,
+ HostPath: src,
+ Readonly: mount.Readonly,
+ })
+
+ ociMounts = append(ociMounts, rspec.Mount{
+ Source: src,
+ Destination: dest,
+ Options: options,
+ })
+ }
+
+ return volumes, ociMounts, nil
+}
+
+func addImageVolumes(rootfs string, s *Server, containerInfo *storage.ContainerInfo, specgen *generate.Generator, mountLabel string) ([]rspec.Mount, error) {
+ mounts := []rspec.Mount{}
+ for dest := range containerInfo.Config.Config.Volumes {
+ fp, err := symlink.FollowSymlinkInScope(filepath.Join(rootfs, dest), rootfs)
+ if err != nil {
+ return nil, err
+ }
+ switch s.config.ImageVolumes {
+ case libkpod.ImageVolumesMkdir:
+ if err1 := os.MkdirAll(fp, 0644); err1 != nil {
+ return nil, err1
+ }
+ case libkpod.ImageVolumesBind:
+ volumeDirName := stringid.GenerateNonCryptoID()
+ src := filepath.Join(containerInfo.RunDir, "mounts", volumeDirName)
+ if err1 := os.MkdirAll(src, 0644); err1 != nil {
+ return nil, err1
+ }
+ // Label the source with the sandbox selinux mount label
+ if mountLabel != "" {
+ if err1 := label.Relabel(src, mountLabel, true); err1 != nil && err1 != unix.ENOTSUP {
+ return nil, fmt.Errorf("relabel failed %s: %v", src, err1)
+ }
+ }
+
+ logrus.Debugf("Adding bind mounted volume: %s to %s", src, dest)
+ mounts = append(mounts, rspec.Mount{
+ Source: src,
+ Destination: dest,
+ Options: []string{"rw"},
+ })
+
+ case libkpod.ImageVolumesIgnore:
+ logrus.Debugf("Ignoring volume %v", dest)
+ default:
+ logrus.Fatalf("Unrecognized image volumes setting")
+ }
+ }
+ return mounts, nil
+}
+
+// resolveSymbolicLink resolves a possbile symlink path. If the path is a symlink, returns resolved
+// path; if not, returns the original path.
+func resolveSymbolicLink(path string) (string, error) {
+ info, err := os.Lstat(path)
+ if err != nil {
+ return "", err
+ }
+ if info.Mode()&os.ModeSymlink != os.ModeSymlink {
+ return path, nil
+ }
+ return filepath.EvalSymlinks(path)
+}
+
+func addDevices(sb *sandbox.Sandbox, containerConfig *pb.ContainerConfig, specgen *generate.Generator) error {
+ sp := specgen.Spec()
+ if containerConfig.GetLinux().GetSecurityContext().GetPrivileged() {
+ hostDevices, err := devices.HostDevices()
+ if err != nil {
+ return err
+ }
+ for _, hostDevice := range hostDevices {
+ rd := rspec.LinuxDevice{
+ Path: hostDevice.Path,
+ Type: string(hostDevice.Type),
+ Major: hostDevice.Major,
+ Minor: hostDevice.Minor,
+ UID: &hostDevice.Uid,
+ GID: &hostDevice.Gid,
+ }
+ if hostDevice.Major == 0 && hostDevice.Minor == 0 {
+ // Invalid device, most likely a symbolic link, skip it.
+ continue
+ }
+ specgen.AddDevice(rd)
+ }
+ sp.Linux.Resources.Devices = []rspec.LinuxDeviceCgroup{
+ {
+ Allow: true,
+ Access: "rwm",
+ },
+ }
+ return nil
+ }
+ for _, device := range containerConfig.GetDevices() {
+ path, err := resolveSymbolicLink(device.HostPath)
+ if err != nil {
+ return err
+ }
+ dev, err := devices.DeviceFromPath(path, device.Permissions)
+ // if there was no error, return the device
+ if err == nil {
+ rd := rspec.LinuxDevice{
+ Path: device.ContainerPath,
+ Type: string(dev.Type),
+ Major: dev.Major,
+ Minor: dev.Minor,
+ UID: &dev.Uid,
+ GID: &dev.Gid,
+ }
+ specgen.AddDevice(rd)
+ sp.Linux.Resources.Devices = append(sp.Linux.Resources.Devices, rspec.LinuxDeviceCgroup{
+ Allow: true,
+ Type: string(dev.Type),
+ Major: &dev.Major,
+ Minor: &dev.Minor,
+ Access: dev.Permissions,
+ })
+ continue
+ }
+ // if the device is not a device node
+ // try to see if it's a directory holding many devices
+ if err == devices.ErrNotADevice {
+
+ // check if it is a directory
+ if src, e := os.Stat(path); e == nil && src.IsDir() {
+
+ // mount the internal devices recursively
+ filepath.Walk(path, func(dpath string, f os.FileInfo, e error) error {
+ childDevice, e := devices.DeviceFromPath(dpath, device.Permissions)
+ if e != nil {
+ // ignore the device
+ return nil
+ }
+ cPath := strings.Replace(dpath, path, device.ContainerPath, 1)
+ rd := rspec.LinuxDevice{
+ Path: cPath,
+ Type: string(childDevice.Type),
+ Major: childDevice.Major,
+ Minor: childDevice.Minor,
+ UID: &childDevice.Uid,
+ GID: &childDevice.Gid,
+ }
+ specgen.AddDevice(rd)
+ sp.Linux.Resources.Devices = append(sp.Linux.Resources.Devices, rspec.LinuxDeviceCgroup{
+ Allow: true,
+ Type: string(childDevice.Type),
+ Major: &childDevice.Major,
+ Minor: &childDevice.Minor,
+ Access: childDevice.Permissions,
+ })
+
+ return nil
+ })
+ }
+ }
+ }
+ return nil
+}
+
+// buildOCIProcessArgs build an OCI compatible process arguments slice.
+func buildOCIProcessArgs(containerKubeConfig *pb.ContainerConfig, imageOCIConfig *v1.Image) ([]string, error) {
+ //# Start the nginx container using the default command, but use custom
+ //arguments (arg1 .. argN) for that command.
+ //kubectl run nginx --image=nginx -- <arg1> <arg2> ... <argN>
+
+ //# Start the nginx container using a different command and custom arguments.
+ //kubectl run nginx --image=nginx --command -- <cmd> <arg1> ... <argN>
+
+ kubeCommands := containerKubeConfig.Command
+ kubeArgs := containerKubeConfig.Args
+
+ // merge image config and kube config
+ // same as docker does today...
+ if imageOCIConfig != nil {
+ if len(kubeCommands) == 0 {
+ if len(kubeArgs) == 0 {
+ kubeArgs = imageOCIConfig.Config.Cmd
+ }
+ if kubeCommands == nil {
+ kubeCommands = imageOCIConfig.Config.Entrypoint
+ }
+ }
+ }
+
+ if len(kubeCommands) == 0 && len(kubeArgs) == 0 {
+ return nil, fmt.Errorf("no command specified")
+ }
+
+ // create entrypoint and args
+ var entrypoint string
+ var args []string
+ if len(kubeCommands) != 0 {
+ entrypoint = kubeCommands[0]
+ args = append(kubeCommands[1:], kubeArgs...)
+ } else {
+ entrypoint = kubeArgs[0]
+ args = kubeArgs[1:]
+ }
+
+ processArgs := append([]string{entrypoint}, args...)
+
+ logrus.Debugf("OCI process args %v", processArgs)
+
+ return processArgs, nil
+}
+
+// addOCIHook look for hooks programs installed in hooksDirPath and add them to spec
+func addOCIHook(specgen *generate.Generator, hook libkpod.HookParams) error {
+ logrus.Debugf("AddOCIHook", hook)
+ for _, stage := range hook.Stage {
+ switch stage {
+ case "prestart":
+ specgen.AddPreStartHook(hook.Hook, []string{hook.Hook, "prestart"})
+
+ case "poststart":
+ specgen.AddPostStartHook(hook.Hook, []string{hook.Hook, "poststart"})
+
+ case "poststop":
+ specgen.AddPostStopHook(hook.Hook, []string{hook.Hook, "poststop"})
+ }
+ }
+ return nil
+}
+
+// setupContainerUser sets the UID, GID and supplemental groups in OCI runtime config
+func setupContainerUser(specgen *generate.Generator, rootfs string, sc *pb.LinuxContainerSecurityContext, imageConfig *v1.Image) error {
+ if sc != nil {
+ containerUser := ""
+ // Case 1: run as user is set by kubelet
+ if sc.GetRunAsUser() != nil {
+ containerUser = strconv.FormatInt(sc.GetRunAsUser().Value, 10)
+ } else {
+ // Case 2: run as username is set by kubelet
+ userName := sc.GetRunAsUsername()
+ if userName != "" {
+ containerUser = userName
+ } else {
+ // Case 3: get user from image config
+ if imageConfig != nil {
+ imageUser := imageConfig.Config.User
+ if imageUser != "" {
+ containerUser = imageUser
+ }
+ }
+ }
+ }
+
+ logrus.Debugf("CONTAINER USER: %+v", containerUser)
+
+ // Add uid, gid and groups from user
+ uid, gid, addGroups, err1 := getUserInfo(rootfs, containerUser)
+ if err1 != nil {
+ return err1
+ }
+
+ logrus.Debugf("UID: %v, GID: %v, Groups: %+v", uid, gid, addGroups)
+ specgen.SetProcessUID(uid)
+ specgen.SetProcessGID(gid)
+ for _, group := range addGroups {
+ specgen.AddProcessAdditionalGid(group)
+ }
+
+ // Add groups from CRI
+ groups := sc.GetSupplementalGroups()
+ for _, group := range groups {
+ specgen.AddProcessAdditionalGid(uint32(group))
+ }
+ }
+ return nil
+}
+
+func hostNetwork(containerConfig *pb.ContainerConfig) bool {
+ securityContext := containerConfig.GetLinux().GetSecurityContext()
+ if securityContext == nil || securityContext.GetNamespaceOptions() == nil {
+ return false
+ }
+
+ return securityContext.GetNamespaceOptions().HostNetwork
+}
+
+// ensureSaneLogPath is a hack to fix https://issues.k8s.io/44043 which causes
+// logPath to be a broken symlink to some magical Docker path. Ideally we
+// wouldn't have to deal with this, but until that issue is fixed we have to
+// remove the path if it's a broken symlink.
+func ensureSaneLogPath(logPath string) error {
+ // If the path exists but the resolved path does not, then we have a broken
+ // symlink and we need to remove it.
+ fi, err := os.Lstat(logPath)
+ if err != nil || fi.Mode()&os.ModeSymlink == 0 {
+ // Non-existent files and non-symlinks aren't our problem.
+ return nil
+ }
+
+ _, err = os.Stat(logPath)
+ if os.IsNotExist(err) {
+ err = os.RemoveAll(logPath)
+ if err != nil {
+ return fmt.Errorf("ensureSaneLogPath remove bad logPath: %s", err)
+ }
+ }
+ return nil
+}
+
+// addSecretsBindMounts mounts user defined secrets to the container
+func addSecretsBindMounts(mountLabel, ctrRunDir string, defaultMounts []string, specgen generate.Generator) ([]rspec.Mount, error) {
+ containerMounts := specgen.Spec().Mounts
+ mounts, err := secretMounts(defaultMounts, mountLabel, ctrRunDir, containerMounts)
+ if err != nil {
+ return nil, err
+ }
+ return mounts, nil
+}
+
+// CreateContainer creates a new container in specified PodSandbox
+func (s *Server) CreateContainer(ctx context.Context, req *pb.CreateContainerRequest) (res *pb.CreateContainerResponse, err error) {
+ logrus.Debugf("CreateContainerRequest %+v", req)
+
+ s.updateLock.RLock()
+ defer s.updateLock.RUnlock()
+
+ sbID := req.PodSandboxId
+ if sbID == "" {
+ return nil, fmt.Errorf("PodSandboxId should not be empty")
+ }
+
+ sandboxID, err := s.PodIDIndex().Get(sbID)
+ if err != nil {
+ return nil, fmt.Errorf("PodSandbox with ID starting with %s not found: %v", sbID, err)
+ }
+
+ sb := s.getSandbox(sandboxID)
+ if sb == nil {
+ return nil, fmt.Errorf("specified sandbox not found: %s", sandboxID)
+ }
+
+ // The config of the container
+ containerConfig := req.GetConfig()
+ if containerConfig == nil {
+ return nil, fmt.Errorf("CreateContainerRequest.ContainerConfig is nil")
+ }
+
+ name := containerConfig.GetMetadata().Name
+ if name == "" {
+ return nil, fmt.Errorf("CreateContainerRequest.ContainerConfig.Name is empty")
+ }
+
+ containerID, containerName, err := s.generateContainerIDandName(sb.Metadata(), containerConfig)
+ if err != nil {
+ return nil, err
+ }
+
+ defer func() {
+ if err != nil {
+ s.ReleaseContainerName(containerName)
+ }
+ }()
+
+ container, err := s.createSandboxContainer(ctx, containerID, containerName, sb, req.GetSandboxConfig(), containerConfig)
+ if err != nil {
+ return nil, err
+ }
+ defer func() {
+ if err != nil {
+ err2 := s.StorageRuntimeServer().DeleteContainer(containerID)
+ if err2 != nil {
+ logrus.Warnf("Failed to cleanup container directory: %v", err2)
+ }
+ }
+ }()
+
+ if err = s.Runtime().CreateContainer(container, sb.CgroupParent()); err != nil {
+ return nil, err
+ }
+
+ s.addContainer(container)
+
+ if err = s.CtrIDIndex().Add(containerID); err != nil {
+ s.removeContainer(container)
+ return nil, err
+ }
+
+ s.ContainerStateToDisk(container)
+
+ resp := &pb.CreateContainerResponse{
+ ContainerId: containerID,
+ }
+
+ logrus.Debugf("CreateContainerResponse: %+v", resp)
+ return resp, nil
+}
+
+func (s *Server) setupOCIHooks(specgen *generate.Generator, sb *sandbox.Sandbox, containerConfig *pb.ContainerConfig, command string) error {
+ mounts := containerConfig.GetMounts()
+ addedHooks := map[string]struct{}{}
+ addHook := func(hook libkpod.HookParams) error {
+ // Only add a hook once
+ if _, ok := addedHooks[hook.Hook]; !ok {
+ if err := addOCIHook(specgen, hook); err != nil {
+ return err
+ }
+ addedHooks[hook.Hook] = struct{}{}
+ }
+ return nil
+ }
+ for _, hook := range s.Hooks() {
+ logrus.Debugf("SetupOCIHooks", hook)
+ if hook.HasBindMounts && len(mounts) > 0 {
+ if err := addHook(hook); err != nil {
+ return err
+ }
+ continue
+ }
+ for _, cmd := range hook.Cmds {
+ match, err := regexp.MatchString(cmd, command)
+ if err != nil {
+ logrus.Errorf("Invalid regex %q:%q", cmd, err)
+ continue
+ }
+ if match {
+ if err := addHook(hook); err != nil {
+ return err
+ }
+ }
+ }
+ for _, annotationRegex := range hook.Annotations {
+ for _, annotation := range sb.Annotations() {
+ match, err := regexp.MatchString(annotationRegex, annotation)
+ if err != nil {
+ logrus.Errorf("Invalid regex %q:%q", annotationRegex, err)
+ continue
+ }
+ if match {
+ if err := addHook(hook); err != nil {
+ return err
+ }
+ }
+ }
+ }
+ }
+ return nil
+}
+func (s *Server) createSandboxContainer(ctx context.Context, containerID string, containerName string, sb *sandbox.Sandbox, SandboxConfig *pb.PodSandboxConfig, containerConfig *pb.ContainerConfig) (*oci.Container, error) {
+ if sb == nil {
+ return nil, errors.New("createSandboxContainer needs a sandbox")
+ }
+
+ // TODO: simplify this function (cyclomatic complexity here is high)
+ // TODO: factor generating/updating the spec into something other projects can vendor
+
+ // creates a spec Generator with the default spec.
+ specgen := generate.New()
+ specgen.HostSpecific = true
+ specgen.ClearProcessRlimits()
+
+ var readOnlyRootfs bool
+ var privileged bool
+ if containerConfig.GetLinux().GetSecurityContext() != nil {
+ if containerConfig.GetLinux().GetSecurityContext().Privileged {
+ privileged = true
+ }
+
+ if containerConfig.GetLinux().GetSecurityContext().ReadonlyRootfs {
+ readOnlyRootfs = true
+ specgen.SetRootReadonly(true)
+ }
+ }
+
+ mountLabel := sb.MountLabel()
+ processLabel := sb.ProcessLabel()
+ selinuxConfig := containerConfig.GetLinux().GetSecurityContext().GetSelinuxOptions()
+ if selinuxConfig != nil {
+ var err error
+ processLabel, mountLabel, err = getSELinuxLabels(selinuxConfig, privileged)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ containerVolumes, ociMounts, err := addOCIBindMounts(mountLabel, containerConfig, &specgen)
+ if err != nil {
+ return nil, err
+ }
+
+ volumesJSON, err := json.Marshal(containerVolumes)
+ if err != nil {
+ return nil, err
+ }
+ specgen.AddAnnotation(annotations.Volumes, string(volumesJSON))
+
+ // Add cgroup mount so container process can introspect its own limits
+ specgen.AddCgroupsMount("ro")
+
+ if err := addDevices(sb, containerConfig, &specgen); err != nil {
+ return nil, err
+ }
+
+ labels := containerConfig.GetLabels()
+
+ metadata := containerConfig.GetMetadata()
+
+ kubeAnnotations := containerConfig.GetAnnotations()
+ if kubeAnnotations != nil {
+ for k, v := range kubeAnnotations {
+ specgen.AddAnnotation(k, v)
+ }
+ }
+ if labels != nil {
+ for k, v := range labels {
+ specgen.AddAnnotation(k, v)
+ }
+ }
+
+ // set this container's apparmor profile if it is set by sandbox
+ if s.appArmorEnabled && !privileged {
+ appArmorProfileName := s.getAppArmorProfileName(sb.Annotations(), metadata.Name)
+ if appArmorProfileName != "" {
+ // reload default apparmor profile if it is unloaded.
+ if s.appArmorProfile == apparmor.DefaultApparmorProfile {
+ if err := apparmor.EnsureDefaultApparmorProfile(); err != nil {
+ return nil, err
+ }
+ }
+
+ specgen.SetProcessApparmorProfile(appArmorProfileName)
+ }
+ }
+
+ logPath := containerConfig.LogPath
+ if logPath == "" {
+ // TODO: Should we use sandboxConfig.GetLogDirectory() here?
+ logPath = filepath.Join(sb.LogDir(), containerID+".log")
+ }
+ if !filepath.IsAbs(logPath) {
+ // XXX: It's not really clear what this should be versus the sbox logDirectory.
+ logrus.Warnf("requested logPath for ctr id %s is a relative path: %s", containerID, logPath)
+ logPath = filepath.Join(sb.LogDir(), logPath)
+ }
+
+ // Handle https://issues.k8s.io/44043
+ if err := ensureSaneLogPath(logPath); err != nil {
+ return nil, err
+ }
+
+ logrus.WithFields(logrus.Fields{
+ "sbox.logdir": sb.LogDir(),
+ "ctr.logfile": containerConfig.LogPath,
+ "log_path": logPath,
+ }).Debugf("setting container's log_path")
+
+ specgen.SetProcessTerminal(containerConfig.Tty)
+ if containerConfig.Tty {
+ specgen.AddProcessEnv("TERM", "xterm")
+ }
+
+ linux := containerConfig.GetLinux()
+ if linux != nil {
+ resources := linux.GetResources()
+ if resources != nil {
+ cpuPeriod := resources.CpuPeriod
+ if cpuPeriod != 0 {
+ specgen.SetLinuxResourcesCPUPeriod(uint64(cpuPeriod))
+ }
+
+ cpuQuota := resources.CpuQuota
+ if cpuQuota != 0 {
+ specgen.SetLinuxResourcesCPUQuota(cpuQuota)
+ }
+
+ cpuShares := resources.CpuShares
+ if cpuShares != 0 {
+ specgen.SetLinuxResourcesCPUShares(uint64(cpuShares))
+ }
+
+ memoryLimit := resources.MemoryLimitInBytes
+ if memoryLimit != 0 {
+ specgen.SetLinuxResourcesMemoryLimit(memoryLimit)
+ }
+
+ oomScoreAdj := resources.OomScoreAdj
+ specgen.SetProcessOOMScoreAdj(int(oomScoreAdj))
+ }
+
+ var cgPath string
+ parent := defaultCgroupfsParent
+ useSystemd := s.config.CgroupManager == oci.SystemdCgroupsManager
+ if useSystemd {
+ parent = defaultSystemdParent
+ }
+ if sb.CgroupParent() != "" {
+ parent = sb.CgroupParent()
+ }
+ if useSystemd {
+ cgPath = parent + ":" + scopePrefix + ":" + containerID
+ } else {
+ cgPath = filepath.Join(parent, scopePrefix+"-"+containerID)
+ }
+ specgen.SetLinuxCgroupsPath(cgPath)
+
+ capabilities := linux.GetSecurityContext().GetCapabilities()
+ if privileged {
+ // this is setting correct capabilities as well for privileged mode
+ specgen.SetupPrivileged(true)
+ setOCIBindMountsPrivileged(&specgen)
+ } else {
+ toCAPPrefixed := func(cap string) string {
+ if !strings.HasPrefix(strings.ToLower(cap), "cap_") {
+ return "CAP_" + strings.ToUpper(cap)
+ }
+ return cap
+ }
+
+ // Add/drop all capabilities if "all" is specified, so that
+ // following individual add/drop could still work. E.g.
+ // AddCapabilities: []string{"ALL"}, DropCapabilities: []string{"CHOWN"}
+ // will be all capabilities without `CAP_CHOWN`.
+ // see https://github.com/kubernetes/kubernetes/issues/51980
+ if inStringSlice(capabilities.GetAddCapabilities(), "ALL") {
+ for _, c := range getOCICapabilitiesList() {
+ if err := specgen.AddProcessCapability(c); err != nil {
+ return nil, err
+ }
+ }
+ }
+ if inStringSlice(capabilities.GetDropCapabilities(), "ALL") {
+ for _, c := range getOCICapabilitiesList() {
+ if err := specgen.DropProcessCapability(c); err != nil {
+ return nil, err
+ }
+ }
+ }
+
+ if capabilities != nil {
+ for _, cap := range capabilities.GetAddCapabilities() {
+ if strings.ToUpper(cap) == "ALL" {
+ continue
+ }
+ if err := specgen.AddProcessCapability(toCAPPrefixed(cap)); err != nil {
+ return nil, err
+ }
+ }
+
+ for _, cap := range capabilities.GetDropCapabilities() {
+ if strings.ToUpper(cap) == "ALL" {
+ continue
+ }
+ if err := specgen.DropProcessCapability(toCAPPrefixed(cap)); err != nil {
+ return nil, fmt.Errorf("failed to drop cap %s %v", toCAPPrefixed(cap), err)
+ }
+ }
+ }
+ }
+ specgen.SetProcessSelinuxLabel(processLabel)
+ specgen.SetLinuxMountLabel(mountLabel)
+
+ if containerConfig.GetLinux().GetSecurityContext() != nil &&
+ !containerConfig.GetLinux().GetSecurityContext().Privileged {
+ for _, mp := range []string{
+ "/proc/kcore",
+ "/proc/latency_stats",
+ "/proc/timer_list",
+ "/proc/timer_stats",
+ "/proc/sched_debug",
+ "/sys/firmware",
+ } {
+ specgen.AddLinuxMaskedPaths(mp)
+ }
+
+ for _, rp := range []string{
+ "/proc/asound",
+ "/proc/bus",
+ "/proc/fs",
+ "/proc/irq",
+ "/proc/sys",
+ "/proc/sysrq-trigger",
+ } {
+ specgen.AddLinuxReadonlyPaths(rp)
+ }
+ }
+ }
+ // Join the namespace paths for the pod sandbox container.
+ podInfraState := s.Runtime().ContainerStatus(sb.InfraContainer())
+
+ logrus.Debugf("pod container state %+v", podInfraState)
+
+ ipcNsPath := fmt.Sprintf("/proc/%d/ns/ipc", podInfraState.Pid)
+ if err := specgen.AddOrReplaceLinuxNamespace(string(rspec.IPCNamespace), ipcNsPath); err != nil {
+ return nil, err
+ }
+
+ utsNsPath := fmt.Sprintf("/proc/%d/ns/uts", podInfraState.Pid)
+ if err := specgen.AddOrReplaceLinuxNamespace(string(rspec.UTSNamespace), utsNsPath); err != nil {
+ return nil, err
+ }
+
+ // Do not share pid ns for now
+ if containerConfig.GetLinux().GetSecurityContext().GetNamespaceOptions().GetHostPid() {
+ specgen.RemoveLinuxNamespace(string(rspec.PIDNamespace))
+ }
+
+ netNsPath := sb.NetNsPath()
+ if netNsPath == "" {
+ // The sandbox does not have a permanent namespace,
+ // it's on the host one.
+ netNsPath = fmt.Sprintf("/proc/%d/ns/net", podInfraState.Pid)
+ }
+
+ if err := specgen.AddOrReplaceLinuxNamespace(string(rspec.NetworkNamespace), netNsPath); err != nil {
+ return nil, err
+ }
+
+ imageSpec := containerConfig.GetImage()
+ if imageSpec == nil {
+ return nil, fmt.Errorf("CreateContainerRequest.ContainerConfig.Image is nil")
+ }
+
+ image := imageSpec.Image
+ if image == "" {
+ return nil, fmt.Errorf("CreateContainerRequest.ContainerConfig.Image.Image is empty")
+ }
+ images, err := s.StorageImageServer().ResolveNames(image)
+ if err != nil {
+ // This means we got an image ID
+ if strings.Contains(err.Error(), "cannot specify 64-byte hexadecimal strings") {
+ images = append(images, image)
+ } else {
+ return nil, err
+ }
+ }
+ image = images[0]
+
+ // Get imageName and imageRef that are requested in container status
+ imageName := image
+ status, err := s.StorageImageServer().ImageStatus(s.ImageContext(), image)
+ if err != nil {
+ return nil, err
+ }
+
+ imageRef := status.ID
+ //
+ // TODO: https://github.com/kubernetes-incubator/cri-o/issues/531
+ //
+ //for _, n := range status.Names {
+ //r, err := reference.ParseNormalizedNamed(n)
+ //if err != nil {
+ //return nil, fmt.Errorf("failed to normalize image name for ImageRef: %v", err)
+ //}
+ //if digested, isDigested := r.(reference.Canonical); isDigested {
+ //imageRef = reference.FamiliarString(digested)
+ //break
+ //}
+ //}
+ for _, n := range status.Names {
+ r, err := reference.ParseNormalizedNamed(n)
+ if err != nil {
+ return nil, fmt.Errorf("failed to normalize image name for Image: %v", err)
+ }
+ if tagged, isTagged := r.(reference.Tagged); isTagged {
+ imageName = reference.FamiliarString(tagged)
+ break
+ }
+ }
+
+ specgen.AddAnnotation(annotations.ImageName, imageName)
+ specgen.AddAnnotation(annotations.ImageRef, imageRef)
+ specgen.AddAnnotation(annotations.IP, sb.IP())
+
+ // bind mount the pod shm
+ specgen.AddBindMount(sb.ShmPath(), "/dev/shm", []string{"rw"})
+
+ options := []string{"rw"}
+ if readOnlyRootfs {
+ options = []string{"ro"}
+ }
+ if sb.ResolvPath() != "" {
+ if err := label.Relabel(sb.ResolvPath(), mountLabel, true); err != nil && err != unix.ENOTSUP {
+ return nil, err
+ }
+
+ // bind mount the pod resolver file
+ specgen.AddBindMount(sb.ResolvPath(), "/etc/resolv.conf", options)
+ }
+
+ if sb.HostnamePath() != "" {
+ if err := label.Relabel(sb.HostnamePath(), mountLabel, true); err != nil && err != unix.ENOTSUP {
+ return nil, err
+ }
+
+ specgen.AddBindMount(sb.HostnamePath(), "/etc/hostname", options)
+ }
+
+ // Bind mount /etc/hosts for host networking containers
+ if hostNetwork(containerConfig) {
+ specgen.AddBindMount("/etc/hosts", "/etc/hosts", options)
+ }
+
+ specgen.SetHostname(sb.Hostname())
+
+ specgen.AddAnnotation(annotations.Name, containerName)
+ specgen.AddAnnotation(annotations.ContainerID, containerID)
+ specgen.AddAnnotation(annotations.SandboxID, sb.ID())
+ specgen.AddAnnotation(annotations.SandboxName, sb.InfraContainer().Name())
+ specgen.AddAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer)
+ specgen.AddAnnotation(annotations.LogPath, logPath)
+ specgen.AddAnnotation(annotations.TTY, fmt.Sprintf("%v", containerConfig.Tty))
+ specgen.AddAnnotation(annotations.Stdin, fmt.Sprintf("%v", containerConfig.Stdin))
+ specgen.AddAnnotation(annotations.StdinOnce, fmt.Sprintf("%v", containerConfig.StdinOnce))
+ specgen.AddAnnotation(annotations.Image, image)
+ specgen.AddAnnotation(annotations.ResolvPath, sb.InfraContainer().CrioAnnotations()[annotations.ResolvPath])
+
+ created := time.Now()
+ specgen.AddAnnotation(annotations.Created, created.Format(time.RFC3339Nano))
+
+ metadataJSON, err := json.Marshal(metadata)
+ if err != nil {
+ return nil, err
+ }
+ specgen.AddAnnotation(annotations.Metadata, string(metadataJSON))
+
+ labelsJSON, err := json.Marshal(labels)
+ if err != nil {
+ return nil, err
+ }
+ specgen.AddAnnotation(annotations.Labels, string(labelsJSON))
+
+ kubeAnnotationsJSON, err := json.Marshal(kubeAnnotations)
+ if err != nil {
+ return nil, err
+ }
+ specgen.AddAnnotation(annotations.Annotations, string(kubeAnnotationsJSON))
+
+ metaname := metadata.Name
+ if !privileged {
+ if err = s.setupSeccomp(&specgen, metaname, sb.Annotations()); err != nil {
+ return nil, err
+ }
+ }
+
+ attempt := metadata.Attempt
+ containerInfo, err := s.StorageRuntimeServer().CreateContainer(s.ImageContext(),
+ sb.Name(), sb.ID(),
+ image, image,
+ containerName, containerID,
+ metaname,
+ attempt,
+ mountLabel,
+ nil)
+ if err != nil {
+ return nil, err
+ }
+
+ mountPoint, err := s.StorageRuntimeServer().StartContainer(containerID)
+ if err != nil {
+ return nil, fmt.Errorf("failed to mount container %s(%s): %v", containerName, containerID, err)
+ }
+ specgen.AddAnnotation(annotations.MountPoint, mountPoint)
+
+ containerImageConfig := containerInfo.Config
+ if containerImageConfig == nil {
+ return nil, fmt.Errorf("empty image config for %s", image)
+ }
+
+ if containerImageConfig.Config.StopSignal != "" {
+ // this key is defined in image-spec conversion document at https://github.com/opencontainers/image-spec/pull/492/files#diff-8aafbe2c3690162540381b8cdb157112R57
+ specgen.AddAnnotation("org.opencontainers.image.stopSignal", containerImageConfig.Config.StopSignal)
+ }
+
+ // Add image volumes
+ volumeMounts, err := addImageVolumes(mountPoint, s, &containerInfo, &specgen, mountLabel)
+ if err != nil {
+ return nil, err
+ }
+
+ processArgs, err := buildOCIProcessArgs(containerConfig, containerImageConfig)
+ if err != nil {
+ return nil, err
+ }
+ specgen.SetProcessArgs(processArgs)
+
+ // Add environment variables from CRI and image config
+ envs := containerConfig.GetEnvs()
+ if envs != nil {
+ for _, item := range envs {
+ key := item.Key
+ value := item.Value
+ if key == "" {
+ continue
+ }
+ specgen.AddProcessEnv(key, value)
+ }
+ }
+ if containerImageConfig != nil {
+ for _, item := range containerImageConfig.Config.Env {
+ parts := strings.SplitN(item, "=", 2)
+ if len(parts) != 2 {
+ return nil, fmt.Errorf("invalid env from image: %s", item)
+ }
+
+ if parts[0] == "" {
+ continue
+ }
+ specgen.AddProcessEnv(parts[0], parts[1])
+ }
+ }
+
+ // Set working directory
+ // Pick it up from image config first and override if specified in CRI
+ containerCwd := "/"
+ if containerImageConfig != nil {
+ imageCwd := containerImageConfig.Config.WorkingDir
+ if imageCwd != "" {
+ containerCwd = imageCwd
+ }
+ }
+ runtimeCwd := containerConfig.WorkingDir
+ if runtimeCwd != "" {
+ containerCwd = runtimeCwd
+ }
+ specgen.SetProcessCwd(containerCwd)
+
+ var secretMounts []rspec.Mount
+ if len(s.config.DefaultMounts) > 0 {
+ var err error
+ secretMounts, err = addSecretsBindMounts(mountLabel, containerInfo.RunDir, s.config.DefaultMounts, specgen)
+ if err != nil {
+ return nil, fmt.Errorf("failed to mount secrets: %v", err)
+ }
+ }
+
+ mounts := []rspec.Mount{}
+ mounts = append(mounts, ociMounts...)
+ mounts = append(mounts, volumeMounts...)
+ mounts = append(mounts, secretMounts...)
+
+ sort.Sort(orderedMounts(mounts))
+
+ for _, m := range mounts {
+ specgen.AddBindMount(m.Source, m.Destination, m.Options)
+ }
+
+ if err := s.setupOCIHooks(&specgen, sb, containerConfig, processArgs[0]); err != nil {
+ return nil, err
+ }
+
+ // Setup user and groups
+ if linux != nil {
+ if err = setupContainerUser(&specgen, mountPoint, linux.GetSecurityContext(), containerImageConfig); err != nil {
+ return nil, err
+ }
+ }
+
+ // Set up pids limit if pids cgroup is mounted
+ _, err = cgroups.FindCgroupMountpoint("pids")
+ if err == nil {
+ specgen.SetLinuxResourcesPidsLimit(s.config.PidsLimit)
+ }
+
+ // by default, the root path is an empty string. set it now.
+ specgen.SetRootPath(mountPoint)
+
+ saveOptions := generate.ExportOptions{}
+ if err = specgen.SaveToFile(filepath.Join(containerInfo.Dir, "config.json"), saveOptions); err != nil {
+ return nil, err
+ }
+ if err = specgen.SaveToFile(filepath.Join(containerInfo.RunDir, "config.json"), saveOptions); err != nil {
+ return nil, err
+ }
+
+ crioAnnotations := specgen.Spec().Annotations
+
+ container, err := oci.NewContainer(containerID, containerName, containerInfo.RunDir, logPath, sb.NetNs(), labels, crioAnnotations, kubeAnnotations, image, imageName, imageRef, metadata, sb.ID(), containerConfig.Tty, containerConfig.Stdin, containerConfig.StdinOnce, sb.Privileged(), sb.Trusted(), containerInfo.Dir, created, containerImageConfig.Config.StopSignal)
+ if err != nil {
+ return nil, err
+ }
+ container.SetSpec(specgen.Spec())
+ container.SetMountPoint(mountPoint)
+
+ for _, cv := range containerVolumes {
+ container.AddVolume(cv)
+ }
+
+ return container, nil
+}
+
+func (s *Server) setupSeccomp(specgen *generate.Generator, cname string, sbAnnotations map[string]string) error {
+ profile, ok := sbAnnotations["container.seccomp.security.alpha.kubernetes.io/"+cname]
+ if !ok {
+ profile, ok = sbAnnotations["seccomp.security.alpha.kubernetes.io/pod"]
+ if !ok {
+ // running w/o seccomp, aka unconfined
+ profile = seccompUnconfined
+ }
+ }
+ if !s.seccompEnabled {
+ if profile != seccompUnconfined {
+ return fmt.Errorf("seccomp is not enabled in your kernel, cannot run with a profile")
+ }
+ logrus.Warn("seccomp is not enabled in your kernel, running container without profile")
+ }
+ if profile == seccompUnconfined {
+ // running w/o seccomp, aka unconfined
+ specgen.Spec().Linux.Seccomp = nil
+ return nil
+ }
+ if profile == seccompRuntimeDefault || profile == seccompDockerDefault {
+ return seccomp.LoadProfileFromStruct(s.seccompProfile, specgen)
+ }
+ if !strings.HasPrefix(profile, seccompLocalhostPrefix) {
+ return fmt.Errorf("unknown seccomp profile option: %q", profile)
+ }
+ // FIXME: https://github.com/kubernetes/kubernetes/issues/39128
+ return nil
+}
+
+// getAppArmorProfileName gets the profile name for the given container.
+func (s *Server) getAppArmorProfileName(annotations map[string]string, ctrName string) string {
+ profile := apparmor.GetProfileNameFromPodAnnotations(annotations, ctrName)
+
+ if profile == "" {
+ return ""
+ }
+
+ if profile == apparmor.ProfileRuntimeDefault {
+ // If the value is runtime/default, then return default profile.
+ return s.appArmorProfile
+ }
+
+ return strings.TrimPrefix(profile, apparmor.ProfileNamePrefix)
+}
+
+// openContainerFile opens a file inside a container rootfs safely
+func openContainerFile(rootfs string, path string) (io.ReadCloser, error) {
+ fp, err := symlink.FollowSymlinkInScope(filepath.Join(rootfs, path), rootfs)
+ if err != nil {
+ return nil, err
+ }
+ return os.Open(fp)
+}
+
+// getUserInfo returns UID, GID and additional groups for specified user
+// by looking them up in /etc/passwd and /etc/group
+func getUserInfo(rootfs string, userName string) (uint32, uint32, []uint32, error) {
+ // We don't care if we can't open the file because
+ // not all images will have these files
+ passwdFile, err := openContainerFile(rootfs, "/etc/passwd")
+ if err != nil {
+ logrus.Warnf("Failed to open /etc/passwd: %v", err)
+ } else {
+ defer passwdFile.Close()
+ }
+
+ groupFile, err := openContainerFile(rootfs, "/etc/group")
+ if err != nil {
+ logrus.Warnf("Failed to open /etc/group: %v", err)
+ } else {
+ defer groupFile.Close()
+ }
+
+ execUser, err := user.GetExecUser(userName, nil, passwdFile, groupFile)
+ if err != nil {
+ return 0, 0, nil, err
+ }
+
+ uid := uint32(execUser.Uid)
+ gid := uint32(execUser.Gid)
+ var additionalGids []uint32
+ for _, g := range execUser.Sgids {
+ additionalGids = append(additionalGids, uint32(g))
+ }
+
+ return uid, gid, additionalGids, nil
+}
+
+func setOCIBindMountsPrivileged(g *generate.Generator) {
+ spec := g.Spec()
+ // clear readonly for /sys and cgroup
+ for i, m := range spec.Mounts {
+ if spec.Mounts[i].Destination == "/sys" && !spec.Root.Readonly {
+ clearReadOnly(&spec.Mounts[i])
+ }
+ if m.Type == "cgroup" {
+ clearReadOnly(&spec.Mounts[i])
+ }
+ }
+ spec.Linux.ReadonlyPaths = nil
+ spec.Linux.MaskedPaths = nil
+}
+
+func clearReadOnly(m *rspec.Mount) {
+ var opt []string
+ for _, o := range m.Options {
+ if o != "ro" {
+ opt = append(opt, o)
+ }
+ }
+ m.Options = opt
+}
diff --git a/server/container_exec.go b/server/container_exec.go
new file mode 100644
index 000000000..0cdb9579e
--- /dev/null
+++ b/server/container_exec.go
@@ -0,0 +1,108 @@
+package server
+
+import (
+ "fmt"
+ "io"
+ "os"
+ "os/exec"
+
+ "github.com/docker/docker/pkg/pools"
+ "github.com/kubernetes-incubator/cri-o/oci"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/net/context"
+ "k8s.io/client-go/tools/remotecommand"
+ pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
+ kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
+ utilexec "k8s.io/kubernetes/pkg/util/exec"
+ "k8s.io/kubernetes/pkg/util/term"
+)
+
+// Exec prepares a streaming endpoint to execute a command in the container.
+func (s *Server) Exec(ctx context.Context, req *pb.ExecRequest) (*pb.ExecResponse, error) {
+ logrus.Debugf("ExecRequest %+v", req)
+
+ resp, err := s.GetExec(req)
+ if err != nil {
+ return nil, fmt.Errorf("unable to prepare exec endpoint")
+ }
+
+ return resp, nil
+}
+
+// Exec endpoint for streaming.Runtime
+func (ss streamService) Exec(containerID string, cmd []string, stdin io.Reader, stdout, stderr io.WriteCloser, tty bool, resize <-chan remotecommand.TerminalSize) error {
+ c := ss.runtimeServer.GetContainer(containerID)
+
+ if c == nil {
+ return fmt.Errorf("could not find container %q", containerID)
+ }
+
+ if err := ss.runtimeServer.Runtime().UpdateStatus(c); err != nil {
+ return err
+ }
+
+ cState := ss.runtimeServer.Runtime().ContainerStatus(c)
+ if !(cState.Status == oci.ContainerStateRunning || cState.Status == oci.ContainerStateCreated) {
+ return fmt.Errorf("container is not created or running")
+ }
+
+ args := []string{"exec"}
+ if tty {
+ args = append(args, "-t")
+ }
+ args = append(args, c.ID())
+ args = append(args, cmd...)
+ execCmd := exec.Command(ss.runtimeServer.Runtime().Path(c), args...)
+ var cmdErr error
+ if tty {
+ p, err := kubecontainer.StartPty(execCmd)
+ if err != nil {
+ return err
+ }
+ defer p.Close()
+
+ // make sure to close the stdout stream
+ defer stdout.Close()
+
+ kubecontainer.HandleResizing(resize, func(size remotecommand.TerminalSize) {
+ term.SetSize(p.Fd(), size)
+ })
+
+ if stdin != nil {
+ go pools.Copy(p, stdin)
+ }
+
+ if stdout != nil {
+ go pools.Copy(stdout, p)
+ }
+
+ cmdErr = execCmd.Wait()
+ } else {
+ if stdin != nil {
+ // Use an os.Pipe here as it returns true *os.File objects.
+ // This way, if you run 'kubectl exec <pod> -i bash' (no tty) and type 'exit',
+ // the call below to execCmd.Run() can unblock because its Stdin is the read half
+ // of the pipe.
+ r, w, err := os.Pipe()
+ if err != nil {
+ return err
+ }
+ go pools.Copy(w, stdin)
+
+ execCmd.Stdin = r
+ }
+ if stdout != nil {
+ execCmd.Stdout = stdout
+ }
+ if stderr != nil {
+ execCmd.Stderr = stderr
+ }
+
+ cmdErr = execCmd.Run()
+ }
+
+ if exitErr, ok := cmdErr.(*exec.ExitError); ok {
+ return &utilexec.ExitErrorWrapper{ExitError: exitErr}
+ }
+ return cmdErr
+}
diff --git a/server/container_execsync.go b/server/container_execsync.go
new file mode 100644
index 000000000..35f7896c5
--- /dev/null
+++ b/server/container_execsync.go
@@ -0,0 +1,46 @@
+package server
+
+import (
+ "fmt"
+
+ "github.com/kubernetes-incubator/cri-o/oci"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/net/context"
+ pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
+)
+
+// ExecSync runs a command in a container synchronously.
+func (s *Server) ExecSync(ctx context.Context, req *pb.ExecSyncRequest) (*pb.ExecSyncResponse, error) {
+ logrus.Debugf("ExecSyncRequest %+v", req)
+ c, err := s.GetContainerFromRequest(req.ContainerId)
+ if err != nil {
+ return nil, err
+ }
+
+ if err = s.Runtime().UpdateStatus(c); err != nil {
+ return nil, err
+ }
+
+ cState := s.Runtime().ContainerStatus(c)
+ if !(cState.Status == oci.ContainerStateRunning || cState.Status == oci.ContainerStateCreated) {
+ return nil, fmt.Errorf("container is not created or running")
+ }
+
+ cmd := req.Cmd
+ if cmd == nil {
+ return nil, fmt.Errorf("exec command cannot be empty")
+ }
+
+ execResp, err := s.Runtime().ExecSync(c, cmd, req.Timeout)
+ if err != nil {
+ return nil, err
+ }
+ resp := &pb.ExecSyncResponse{
+ Stdout: execResp.Stdout,
+ Stderr: execResp.Stderr,
+ ExitCode: execResp.ExitCode,
+ }
+
+ logrus.Debugf("ExecSyncResponse: %+v", resp)
+ return resp, nil
+}
diff --git a/server/container_list.go b/server/container_list.go
new file mode 100644
index 000000000..42204ae1f
--- /dev/null
+++ b/server/container_list.go
@@ -0,0 +1,112 @@
+package server
+
+import (
+ "github.com/kubernetes-incubator/cri-o/oci"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/net/context"
+ "k8s.io/apimachinery/pkg/fields"
+ pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
+)
+
+// filterContainer returns whether passed container matches filtering criteria
+func filterContainer(c *pb.Container, filter *pb.ContainerFilter) bool {
+ if filter != nil {
+ if filter.State != nil {
+ if c.State != filter.State.State {
+ return false
+ }
+ }
+ if filter.LabelSelector != nil {
+ sel := fields.SelectorFromSet(filter.LabelSelector)
+ if !sel.Matches(fields.Set(c.Labels)) {
+ return false
+ }
+ }
+ }
+ return true
+}
+
+// ListContainers lists all containers by filters.
+func (s *Server) ListContainers(ctx context.Context, req *pb.ListContainersRequest) (*pb.ListContainersResponse, error) {
+ logrus.Debugf("ListContainersRequest %+v", req)
+ var ctrs []*pb.Container
+ filter := req.Filter
+ ctrList, err := s.ContainerServer.ListContainers()
+ if err != nil {
+ return nil, err
+ }
+
+ // Filter using container id and pod id first.
+ if filter.Id != "" {
+ id, err := s.CtrIDIndex().Get(filter.Id)
+ if err != nil {
+ // If we don't find a container ID with a filter, it should not
+ // be considered an error. Log a warning and return an empty struct
+ logrus.Warn("unable to find container ID %s", filter.Id)
+ return &pb.ListContainersResponse{}, nil
+ }
+ c := s.ContainerServer.GetContainer(id)
+ if c != nil {
+ if filter.PodSandboxId != "" {
+ if c.Sandbox() == filter.PodSandboxId {
+ ctrList = []*oci.Container{c}
+ } else {
+ ctrList = []*oci.Container{}
+ }
+
+ } else {
+ ctrList = []*oci.Container{c}
+ }
+ }
+ } else {
+ if filter.PodSandboxId != "" {
+ pod := s.ContainerServer.GetSandbox(filter.PodSandboxId)
+ if pod == nil {
+ ctrList = []*oci.Container{}
+ } else {
+ ctrList = pod.Containers().List()
+ }
+ }
+ }
+
+ for _, ctr := range ctrList {
+ podSandboxID := ctr.Sandbox()
+ cState := s.Runtime().ContainerStatus(ctr)
+ created := cState.Created.UnixNano()
+ rState := pb.ContainerState_CONTAINER_UNKNOWN
+ cID := ctr.ID()
+ img := &pb.ImageSpec{
+ Image: ctr.Image(),
+ }
+ c := &pb.Container{
+ Id: cID,
+ PodSandboxId: podSandboxID,
+ CreatedAt: created,
+ Labels: ctr.Labels(),
+ Metadata: ctr.Metadata(),
+ Annotations: ctr.Annotations(),
+ Image: img,
+ }
+
+ switch cState.Status {
+ case oci.ContainerStateCreated:
+ rState = pb.ContainerState_CONTAINER_CREATED
+ case oci.ContainerStateRunning:
+ rState = pb.ContainerState_CONTAINER_RUNNING
+ case oci.ContainerStateStopped:
+ rState = pb.ContainerState_CONTAINER_EXITED
+ }
+ c.State = rState
+
+ // Filter by other criteria such as state and labels.
+ if filterContainer(c, req.Filter) {
+ ctrs = append(ctrs, c)
+ }
+ }
+
+ resp := &pb.ListContainersResponse{
+ Containers: ctrs,
+ }
+ logrus.Debugf("ListContainersResponse: %+v", resp)
+ return resp, nil
+}
diff --git a/server/container_portforward.go b/server/container_portforward.go
new file mode 100644
index 000000000..97dd53423
--- /dev/null
+++ b/server/container_portforward.go
@@ -0,0 +1,91 @@
+package server
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "os/exec"
+ "strings"
+
+ "github.com/docker/docker/pkg/pools"
+ "github.com/kubernetes-incubator/cri-o/oci"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/net/context"
+ pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
+)
+
+// PortForward prepares a streaming endpoint to forward ports from a PodSandbox.
+func (s *Server) PortForward(ctx context.Context, req *pb.PortForwardRequest) (*pb.PortForwardResponse, error) {
+ logrus.Debugf("PortForwardRequest %+v", req)
+
+ resp, err := s.GetPortForward(req)
+
+ if err != nil {
+ return nil, fmt.Errorf("unable to prepare portforward endpoint")
+ }
+
+ return resp, nil
+}
+
+func (ss streamService) PortForward(podSandboxID string, port int32, stream io.ReadWriteCloser) error {
+ c := ss.runtimeServer.GetSandboxContainer(podSandboxID)
+
+ if c == nil {
+ return fmt.Errorf("could not find container for sandbox %q", podSandboxID)
+ }
+
+ if err := ss.runtimeServer.Runtime().UpdateStatus(c); err != nil {
+ return err
+ }
+
+ cState := ss.runtimeServer.Runtime().ContainerStatus(c)
+ if !(cState.Status == oci.ContainerStateRunning || cState.Status == oci.ContainerStateCreated) {
+ return fmt.Errorf("container is not created or running")
+ }
+
+ containerPid := cState.Pid
+ socatPath, lookupErr := exec.LookPath("socat")
+ if lookupErr != nil {
+ return fmt.Errorf("unable to do port forwarding: socat not found")
+ }
+
+ args := []string{"-t", fmt.Sprintf("%d", containerPid), "-n", socatPath, "-", fmt.Sprintf("TCP4:localhost:%d", port)}
+
+ nsenterPath, lookupErr := exec.LookPath("nsenter")
+ if lookupErr != nil {
+ return fmt.Errorf("unable to do port forwarding: nsenter not found")
+ }
+
+ commandString := fmt.Sprintf("%s %s", nsenterPath, strings.Join(args, " "))
+ logrus.Debugf("executing port forwarding command: %s", commandString)
+
+ command := exec.Command(nsenterPath, args...)
+ command.Stdout = stream
+
+ stderr := new(bytes.Buffer)
+ command.Stderr = stderr
+
+ // If we use Stdin, command.Run() won't return until the goroutine that's copying
+ // from stream finishes. Unfortunately, if you have a client like telnet connected
+ // via port forwarding, as long as the user's telnet client is connected to the user's
+ // local listener that port forwarding sets up, the telnet session never exits. This
+ // means that even if socat has finished running, command.Run() won't ever return
+ // (because the client still has the connection and stream open).
+ //
+ // The work around is to use StdinPipe(), as Wait() (called by Run()) closes the pipe
+ // when the command (socat) exits.
+ inPipe, err := command.StdinPipe()
+ if err != nil {
+ return fmt.Errorf("unable to do port forwarding: error creating stdin pipe: %v", err)
+ }
+ go func() {
+ pools.Copy(inPipe, stream)
+ inPipe.Close()
+ }()
+
+ if err := command.Run(); err != nil {
+ return fmt.Errorf("%v: %s", err, stderr.String())
+ }
+
+ return nil
+}
diff --git a/server/container_remove.go b/server/container_remove.go
new file mode 100644
index 000000000..871023726
--- /dev/null
+++ b/server/container_remove.go
@@ -0,0 +1,20 @@
+package server
+
+import (
+ "github.com/sirupsen/logrus"
+ "golang.org/x/net/context"
+ pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
+)
+
+// RemoveContainer removes the container. If the container is running, the container
+// should be force removed.
+func (s *Server) RemoveContainer(ctx context.Context, req *pb.RemoveContainerRequest) (*pb.RemoveContainerResponse, error) {
+ _, err := s.ContainerServer.Remove(ctx, req.ContainerId, true)
+ if err != nil {
+ return nil, err
+ }
+
+ resp := &pb.RemoveContainerResponse{}
+ logrus.Debugf("RemoveContainerResponse: %+v", resp)
+ return resp, nil
+}
diff --git a/server/container_start.go b/server/container_start.go
new file mode 100644
index 000000000..85be09484
--- /dev/null
+++ b/server/container_start.go
@@ -0,0 +1,43 @@
+package server
+
+import (
+ "fmt"
+
+ "github.com/kubernetes-incubator/cri-o/oci"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/net/context"
+ pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
+)
+
+// StartContainer starts the container.
+func (s *Server) StartContainer(ctx context.Context, req *pb.StartContainerRequest) (*pb.StartContainerResponse, error) {
+ logrus.Debugf("StartContainerRequest %+v", req)
+ c, err := s.GetContainerFromRequest(req.ContainerId)
+ if err != nil {
+ return nil, err
+ }
+ state := s.Runtime().ContainerStatus(c)
+ if state.Status != oci.ContainerStateCreated {
+ return nil, fmt.Errorf("container %s is not in created state: %s", c.ID(), state.Status)
+ }
+
+ defer func() {
+ // if the call to StartContainer fails below we still want to fill
+ // some fields of a container status. In particular, we're going to
+ // adjust container started/finished time and set an error to be
+ // returned in the Reason field for container status call.
+ if err != nil {
+ s.Runtime().SetStartFailed(c, err)
+ }
+ s.ContainerStateToDisk(c)
+ }()
+
+ err = s.Runtime().StartContainer(c)
+ if err != nil {
+ return nil, fmt.Errorf("failed to start container %s: %v", c.ID(), err)
+ }
+
+ resp := &pb.StartContainerResponse{}
+ logrus.Debugf("StartContainerResponse %+v", resp)
+ return resp, nil
+}
diff --git a/server/container_stats.go b/server/container_stats.go
new file mode 100644
index 000000000..22b87c453
--- /dev/null
+++ b/server/container_stats.go
@@ -0,0 +1,14 @@
+package server
+
+import (
+ "fmt"
+
+ "golang.org/x/net/context"
+ pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
+)
+
+// ContainerStats returns stats of the container. If the container does not
+// exist, the call returns an error.
+func (s *Server) ContainerStats(ctx context.Context, req *pb.ContainerStatsRequest) (*pb.ContainerStatsResponse, error) {
+ return nil, fmt.Errorf("not implemented")
+}
diff --git a/server/container_stats_list.go b/server/container_stats_list.go
new file mode 100644
index 000000000..929220994
--- /dev/null
+++ b/server/container_stats_list.go
@@ -0,0 +1,13 @@
+package server
+
+import (
+ "fmt"
+
+ "golang.org/x/net/context"
+ pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
+)
+
+// ListContainerStats returns stats of all running containers.
+func (s *Server) ListContainerStats(ctx context.Context, req *pb.ListContainerStatsRequest) (*pb.ListContainerStatsResponse, error) {
+ return nil, fmt.Errorf("not implemented")
+}
diff --git a/server/container_status.go b/server/container_status.go
new file mode 100644
index 000000000..b1512e0cd
--- /dev/null
+++ b/server/container_status.go
@@ -0,0 +1,102 @@
+package server
+
+import (
+ "github.com/kubernetes-incubator/cri-o/oci"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/net/context"
+ pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
+)
+
+const (
+ oomKilledReason = "OOMKilled"
+ completedReason = "Completed"
+ errorReason = "Error"
+)
+
+// ContainerStatus returns status of the container.
+func (s *Server) ContainerStatus(ctx context.Context, req *pb.ContainerStatusRequest) (*pb.ContainerStatusResponse, error) {
+ logrus.Debugf("ContainerStatusRequest %+v", req)
+ c, err := s.GetContainerFromRequest(req.ContainerId)
+ if err != nil {
+ return nil, err
+ }
+
+ containerID := c.ID()
+ resp := &pb.ContainerStatusResponse{
+ Status: &pb.ContainerStatus{
+ Id: containerID,
+ Metadata: c.Metadata(),
+ Labels: c.Labels(),
+ Annotations: c.Annotations(),
+ ImageRef: c.ImageRef(),
+ },
+ }
+ resp.Status.Image = &pb.ImageSpec{Image: c.ImageName()}
+
+ mounts := []*pb.Mount{}
+ for _, cv := range c.Volumes() {
+ mounts = append(mounts, &pb.Mount{
+ ContainerPath: cv.ContainerPath,
+ HostPath: cv.HostPath,
+ Readonly: cv.Readonly,
+ })
+ }
+ resp.Status.Mounts = mounts
+
+ cState := s.Runtime().ContainerStatus(c)
+ rStatus := pb.ContainerState_CONTAINER_UNKNOWN
+
+ imageName := c.Image()
+ status, err := s.StorageImageServer().ImageStatus(s.ImageContext(), imageName)
+ if err != nil {
+ return nil, err
+ }
+
+ resp.Status.ImageRef = status.ImageRef
+
+ // If we defaulted to exit code -1 earlier then we attempt to
+ // get the exit code from the exit file again.
+ if cState.ExitCode == -1 {
+ err := s.Runtime().UpdateStatus(c)
+ if err != nil {
+ logrus.Warnf("Failed to UpdateStatus of container %s: %v", c.ID(), err)
+ }
+ cState = s.Runtime().ContainerStatus(c)
+ }
+
+ switch cState.Status {
+ case oci.ContainerStateCreated:
+ rStatus = pb.ContainerState_CONTAINER_CREATED
+ created := cState.Created.UnixNano()
+ resp.Status.CreatedAt = created
+ case oci.ContainerStateRunning:
+ rStatus = pb.ContainerState_CONTAINER_RUNNING
+ created := cState.Created.UnixNano()
+ resp.Status.CreatedAt = created
+ started := cState.Started.UnixNano()
+ resp.Status.StartedAt = started
+ case oci.ContainerStateStopped:
+ rStatus = pb.ContainerState_CONTAINER_EXITED
+ created := cState.Created.UnixNano()
+ resp.Status.CreatedAt = created
+ started := cState.Started.UnixNano()
+ resp.Status.StartedAt = started
+ finished := cState.Finished.UnixNano()
+ resp.Status.FinishedAt = finished
+ resp.Status.ExitCode = cState.ExitCode
+ switch {
+ case cState.OOMKilled:
+ resp.Status.Reason = oomKilledReason
+ case cState.ExitCode == 0:
+ resp.Status.Reason = completedReason
+ default:
+ resp.Status.Reason = errorReason
+ resp.Status.Message = cState.Error
+ }
+ }
+
+ resp.Status.State = rStatus
+
+ logrus.Debugf("ContainerStatusResponse: %+v", resp)
+ return resp, nil
+}
diff --git a/server/container_stop.go b/server/container_stop.go
new file mode 100644
index 000000000..f74ed86e0
--- /dev/null
+++ b/server/container_stop.go
@@ -0,0 +1,19 @@
+package server
+
+import (
+ "github.com/sirupsen/logrus"
+ "golang.org/x/net/context"
+ pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
+)
+
+// StopContainer stops a running container with a grace period (i.e., timeout).
+func (s *Server) StopContainer(ctx context.Context, req *pb.StopContainerRequest) (*pb.StopContainerResponse, error) {
+ _, err := s.ContainerServer.ContainerStop(ctx, req.ContainerId, req.Timeout)
+ if err != nil {
+ return nil, err
+ }
+
+ resp := &pb.StopContainerResponse{}
+ logrus.Debugf("StopContainerResponse %s: %+v", req.ContainerId, resp)
+ return resp, nil
+}
diff --git a/server/container_updateruntimeconfig.go b/server/container_updateruntimeconfig.go
new file mode 100644
index 000000000..b900c9b18
--- /dev/null
+++ b/server/container_updateruntimeconfig.go
@@ -0,0 +1,11 @@
+package server
+
+import (
+ "golang.org/x/net/context"
+ pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
+)
+
+// UpdateRuntimeConfig updates the configuration of a running container.
+func (s *Server) UpdateRuntimeConfig(ctx context.Context, req *pb.UpdateRuntimeConfigRequest) (*pb.UpdateRuntimeConfigResponse, error) {
+ return &pb.UpdateRuntimeConfigResponse{}, nil
+}
diff --git a/server/image_fs_info.go b/server/image_fs_info.go
new file mode 100644
index 000000000..969bdc342
--- /dev/null
+++ b/server/image_fs_info.go
@@ -0,0 +1,13 @@
+package server
+
+import (
+ "fmt"
+
+ "golang.org/x/net/context"
+ pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
+)
+
+// ImageFsInfo returns information of the filesystem that is used to store images.
+func (s *Server) ImageFsInfo(ctx context.Context, req *pb.ImageFsInfoRequest) (*pb.ImageFsInfoResponse, error) {
+ return nil, fmt.Errorf("not implemented")
+}
diff --git a/server/image_list.go b/server/image_list.go
new file mode 100644
index 000000000..ebcc6f6a1
--- /dev/null
+++ b/server/image_list.go
@@ -0,0 +1,41 @@
+package server
+
+import (
+ "github.com/sirupsen/logrus"
+ "golang.org/x/net/context"
+ pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
+)
+
+// ListImages lists existing images.
+func (s *Server) ListImages(ctx context.Context, req *pb.ListImagesRequest) (*pb.ListImagesResponse, error) {
+ logrus.Debugf("ListImagesRequest: %+v", req)
+ filter := ""
+ reqFilter := req.GetFilter()
+ if reqFilter != nil {
+ filterImage := reqFilter.GetImage()
+ if filterImage != nil {
+ filter = filterImage.Image
+ }
+ }
+ results, err := s.StorageImageServer().ListImages(s.ImageContext(), filter)
+ if err != nil {
+ return nil, err
+ }
+ response := pb.ListImagesResponse{}
+ for _, result := range results {
+ if result.Size != nil {
+ response.Images = append(response.Images, &pb.Image{
+ Id: result.ID,
+ RepoTags: result.Names,
+ Size_: *result.Size,
+ })
+ } else {
+ response.Images = append(response.Images, &pb.Image{
+ Id: result.ID,
+ RepoTags: result.Names,
+ })
+ }
+ }
+ logrus.Debugf("ListImagesResponse: %+v", response)
+ return &response, nil
+}
diff --git a/server/image_pull.go b/server/image_pull.go
new file mode 100644
index 000000000..26d08912f
--- /dev/null
+++ b/server/image_pull.go
@@ -0,0 +1,108 @@
+package server
+
+import (
+ "encoding/base64"
+ "strings"
+
+ "github.com/containers/image/copy"
+ "github.com/containers/image/types"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/net/context"
+ pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
+)
+
+// PullImage pulls a image with authentication config.
+func (s *Server) PullImage(ctx context.Context, req *pb.PullImageRequest) (*pb.PullImageResponse, error) {
+ logrus.Debugf("PullImageRequest: %+v", req)
+ // TODO: what else do we need here? (Signatures when the story isn't just pulling from docker://)
+ image := ""
+ img := req.GetImage()
+ if img != nil {
+ image = img.Image
+ }
+
+ var (
+ images []string
+ pulled string
+ err error
+ )
+ images, err = s.StorageImageServer().ResolveNames(image)
+ if err != nil {
+ return nil, err
+ }
+ for _, img := range images {
+ var (
+ username string
+ password string
+ )
+ if req.GetAuth() != nil {
+ username = req.GetAuth().Username
+ password = req.GetAuth().Password
+ if req.GetAuth().Auth != "" {
+ username, password, err = decodeDockerAuth(req.GetAuth().Auth)
+ if err != nil {
+ logrus.Debugf("error decoding authentication for image %s: %v", img, err)
+ continue
+ }
+ }
+ }
+ options := &copy.Options{
+ SourceCtx: &types.SystemContext{},
+ }
+ // Specifying a username indicates the user intends to send authentication to the registry.
+ if username != "" {
+ options.SourceCtx = &types.SystemContext{
+ DockerAuthConfig: &types.DockerAuthConfig{
+ Username: username,
+ Password: password,
+ },
+ }
+ }
+
+ var canPull bool
+ canPull, err = s.StorageImageServer().CanPull(img, options)
+ if err != nil && !canPull {
+ logrus.Debugf("error checking image %s: %v", img, err)
+ continue
+ }
+
+ // let's be smart, docker doesn't repull if image already exists.
+ _, err = s.StorageImageServer().ImageStatus(s.ImageContext(), img)
+ if err == nil {
+ logrus.Debugf("image %s already in store, skipping pull", img)
+ pulled = img
+ break
+ }
+
+ _, err = s.StorageImageServer().PullImage(s.ImageContext(), img, options)
+ if err != nil {
+ logrus.Debugf("error pulling image %s: %v", img, err)
+ continue
+ }
+ pulled = img
+ break
+ }
+ if pulled == "" && err != nil {
+ return nil, err
+ }
+ resp := &pb.PullImageResponse{
+ ImageRef: pulled,
+ }
+ logrus.Debugf("PullImageResponse: %+v", resp)
+ return resp, nil
+}
+
+func decodeDockerAuth(s string) (string, string, error) {
+ decoded, err := base64.StdEncoding.DecodeString(s)
+ if err != nil {
+ return "", "", err
+ }
+ parts := strings.SplitN(string(decoded), ":", 2)
+ if len(parts) != 2 {
+ // if it's invalid just skip, as docker does
+ return "", "", nil
+ }
+ user := parts[0]
+ password := strings.Trim(parts[1], "\x00")
+ return user, password, nil
+}
diff --git a/server/image_remove.go b/server/image_remove.go
new file mode 100644
index 000000000..d15296ccc
--- /dev/null
+++ b/server/image_remove.go
@@ -0,0 +1,52 @@
+package server
+
+import (
+ "fmt"
+ "strings"
+
+ "github.com/sirupsen/logrus"
+ "golang.org/x/net/context"
+ pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
+)
+
+// RemoveImage removes the image.
+func (s *Server) RemoveImage(ctx context.Context, req *pb.RemoveImageRequest) (*pb.RemoveImageResponse, error) {
+ logrus.Debugf("RemoveImageRequest: %+v", req)
+ image := ""
+ img := req.GetImage()
+ if img != nil {
+ image = img.Image
+ }
+ if image == "" {
+ return nil, fmt.Errorf("no image specified")
+ }
+ var (
+ images []string
+ err error
+ deleted bool
+ )
+ images, err = s.StorageImageServer().ResolveNames(image)
+ if err != nil {
+ // This means we got an image ID
+ if strings.Contains(err.Error(), "cannot specify 64-byte hexadecimal strings") {
+ images = append(images, image)
+ } else {
+ return nil, err
+ }
+ }
+ for _, img := range images {
+ err = s.StorageImageServer().UntagImage(s.ImageContext(), img)
+ if err != nil {
+ logrus.Debugf("error deleting image %s: %v", img, err)
+ continue
+ }
+ deleted = true
+ break
+ }
+ if !deleted && err != nil {
+ return nil, err
+ }
+ resp := &pb.RemoveImageResponse{}
+ logrus.Debugf("RemoveImageResponse: %+v", resp)
+ return resp, nil
+}
diff --git a/server/image_status.go b/server/image_status.go
new file mode 100644
index 000000000..5571c3023
--- /dev/null
+++ b/server/image_status.go
@@ -0,0 +1,53 @@
+package server
+
+import (
+ "fmt"
+ "strings"
+
+ "github.com/containers/storage"
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/net/context"
+ pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
+)
+
+// ImageStatus returns the status of the image.
+func (s *Server) ImageStatus(ctx context.Context, req *pb.ImageStatusRequest) (*pb.ImageStatusResponse, error) {
+ logrus.Debugf("ImageStatusRequest: %+v", req)
+ image := ""
+ img := req.GetImage()
+ if img != nil {
+ image = img.Image
+ }
+ if image == "" {
+ return nil, fmt.Errorf("no image specified")
+ }
+ images, err := s.StorageImageServer().ResolveNames(image)
+ if err != nil {
+ // This means we got an image ID
+ if strings.Contains(err.Error(), "cannot specify 64-byte hexadecimal strings") {
+ images = append(images, image)
+ } else {
+ return nil, err
+ }
+ }
+ // match just the first registry as that's what kube meant
+ image = images[0]
+ status, err := s.StorageImageServer().ImageStatus(s.ImageContext(), image)
+ if err != nil {
+ if errors.Cause(err) == storage.ErrImageUnknown {
+ return &pb.ImageStatusResponse{}, nil
+ }
+ return nil, err
+ }
+ resp := &pb.ImageStatusResponse{
+ Image: &pb.Image{
+ Id: status.ID,
+ RepoTags: status.Names,
+ RepoDigests: status.Digests,
+ Size_: *status.Size,
+ },
+ }
+ logrus.Debugf("ImageStatusResponse: %+v", resp)
+ return resp, nil
+}
diff --git a/server/inspect.go b/server/inspect.go
new file mode 100644
index 000000000..6e3e813cc
--- /dev/null
+++ b/server/inspect.go
@@ -0,0 +1,105 @@
+package server
+
+import (
+ "encoding/json"
+ "errors"
+ "fmt"
+ "net/http"
+
+ "github.com/go-zoo/bone"
+ "github.com/kubernetes-incubator/cri-o/libkpod/sandbox"
+ "github.com/kubernetes-incubator/cri-o/oci"
+ "github.com/kubernetes-incubator/cri-o/types"
+ "github.com/sirupsen/logrus"
+)
+
+func (s *Server) getInfo() types.CrioInfo {
+ return types.CrioInfo{
+ StorageDriver: s.config.Config.Storage,
+ StorageRoot: s.config.Config.Root,
+ CgroupDriver: s.config.Config.CgroupManager,
+ }
+}
+
+var (
+ errCtrNotFound = errors.New("container not found")
+ errCtrStateNil = errors.New("container state is nil")
+ errSandboxNotFound = errors.New("sandbox for container not found")
+)
+
+func (s *Server) getContainerInfo(id string, getContainerFunc func(id string) *oci.Container, getInfraContainerFunc func(id string) *oci.Container, getSandboxFunc func(id string) *sandbox.Sandbox) (types.ContainerInfo, error) {
+ ctr := getContainerFunc(id)
+ if ctr == nil {
+ ctr = getInfraContainerFunc(id)
+ if ctr == nil {
+ return types.ContainerInfo{}, errCtrNotFound
+ }
+ }
+ // TODO(mrunalp): should we call UpdateStatus()?
+ ctrState := ctr.State()
+ if ctrState == nil {
+ return types.ContainerInfo{}, errCtrStateNil
+ }
+ sb := getSandboxFunc(ctr.Sandbox())
+ if sb == nil {
+ logrus.Debugf("can't find sandbox %s for container %s", ctr.Sandbox(), id)
+ return types.ContainerInfo{}, errSandboxNotFound
+ }
+ return types.ContainerInfo{
+ Name: ctr.Name(),
+ Pid: ctrState.Pid,
+ Image: ctr.Image(),
+ CreatedTime: ctrState.Created.UnixNano(),
+ Labels: ctr.Labels(),
+ Annotations: ctr.Annotations(),
+ CrioAnnotations: ctr.CrioAnnotations(),
+ Root: ctr.MountPoint(),
+ LogPath: ctr.LogPath(),
+ Sandbox: ctr.Sandbox(),
+ IP: sb.IP(),
+ }, nil
+
+}
+
+// GetInfoMux returns the mux used to serve info requests
+func (s *Server) GetInfoMux() *bone.Mux {
+ mux := bone.New()
+
+ mux.Get("/info", http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
+ ci := s.getInfo()
+ js, err := json.Marshal(ci)
+ if err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+ w.Header().Set("Content-Type", "application/json")
+ w.Write(js)
+ }))
+
+ mux.Get("/containers/:id", http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
+ containerID := bone.GetValue(req, "id")
+ ci, err := s.getContainerInfo(containerID, s.GetContainer, s.getInfraContainer, s.getSandbox)
+ if err != nil {
+ switch err {
+ case errCtrNotFound:
+ http.Error(w, fmt.Sprintf("can't find the container with id %s", containerID), http.StatusNotFound)
+ case errCtrStateNil:
+ http.Error(w, fmt.Sprintf("can't find container state for container with id %s", containerID), http.StatusInternalServerError)
+ case errSandboxNotFound:
+ http.Error(w, fmt.Sprintf("can't find the sandbox for container id %s", containerID), http.StatusNotFound)
+ default:
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ }
+ return
+ }
+ js, err := json.Marshal(ci)
+ if err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+ w.Header().Set("Content-Type", "application/json")
+ w.Write(js)
+ }))
+
+ return mux
+}
diff --git a/server/inspect_test.go b/server/inspect_test.go
new file mode 100644
index 000000000..7be46c4ed
--- /dev/null
+++ b/server/inspect_test.go
@@ -0,0 +1,235 @@
+package server
+
+import (
+ "testing"
+ "time"
+
+ "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
+
+ "github.com/containernetworking/plugins/pkg/ns"
+ "github.com/kubernetes-incubator/cri-o/libkpod"
+ "github.com/kubernetes-incubator/cri-o/libkpod/sandbox"
+ "github.com/kubernetes-incubator/cri-o/oci"
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+)
+
+func TestGetInfo(t *testing.T) {
+ c := libkpod.DefaultConfig()
+ c.RootConfig.Storage = "afoobarstorage"
+ c.RootConfig.Root = "afoobarroot"
+ c.RuntimeConfig.CgroupManager = "systemd"
+ apiConfig := APIConfig{}
+ s := &Server{
+ config: Config{*c, apiConfig},
+ }
+ ci := s.getInfo()
+ if ci.CgroupDriver != "systemd" {
+ t.Fatalf("expected 'systemd', got %q", ci.CgroupDriver)
+ }
+ if ci.StorageDriver != "afoobarstorage" {
+ t.Fatalf("expected 'afoobarstorage', got %q", ci.StorageDriver)
+ }
+ if ci.StorageRoot != "afoobarroot" {
+ t.Fatalf("expected 'afoobarroot', got %q", ci.StorageRoot)
+ }
+}
+
+type mockNetNS struct {
+}
+
+func (ns mockNetNS) Close() error {
+ return nil
+}
+func (ns mockNetNS) Fd() uintptr {
+ ptr := new(uintptr)
+ return *ptr
+}
+func (ns mockNetNS) Do(toRun func(ns.NetNS) error) error {
+ return nil
+}
+func (ns mockNetNS) Set() error {
+ return nil
+}
+func (ns mockNetNS) Path() string {
+ return ""
+}
+
+func TestGetContainerInfo(t *testing.T) {
+ s := &Server{}
+ created := time.Now()
+ labels := map[string]string{
+ "io.kubernetes.container.name": "POD",
+ "io.kubernetes.test2": "value2",
+ "io.kubernetes.test3": "value3",
+ }
+ annotations := map[string]string{
+ "io.kubernetes.test": "value",
+ "io.kubernetes.test1": "value1",
+ }
+ getContainerFunc := func(id string) *oci.Container {
+ container, err := oci.NewContainer("testid", "testname", "", "/container/logs", mockNetNS{}, labels, annotations, annotations, "imageName", "imageName", "imageRef", &runtime.ContainerMetadata{}, "testsandboxid", false, false, false, false, false, "/root/for/container", created, "SIGKILL")
+ if err != nil {
+ t.Fatal(err)
+ }
+ container.SetMountPoint("/var/foo/container")
+ cstate := &oci.ContainerState{}
+ cstate.State = specs.State{
+ Pid: 42,
+ }
+ cstate.Created = created
+ container.SetState(cstate)
+ return container
+ }
+ getInfraContainerFunc := func(id string) *oci.Container {
+ return nil
+ }
+ getSandboxFunc := func(id string) *sandbox.Sandbox {
+ s := &sandbox.Sandbox{}
+ s.AddIP("1.1.1.42")
+ return s
+ }
+ ci, err := s.getContainerInfo("", getContainerFunc, getInfraContainerFunc, getSandboxFunc)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if ci.CreatedTime != created.UnixNano() {
+ t.Fatalf("expected same created time %d, got %d", created.UnixNano(), ci.CreatedTime)
+ }
+ if ci.Pid != 42 {
+ t.Fatalf("expected pid 42, got %v", ci.Pid)
+ }
+ if ci.Name != "testname" {
+ t.Fatalf("expected name testname, got %s", ci.Name)
+ }
+ if ci.Image != "imageName" {
+ t.Fatalf("expected image name imageName, got %s", ci.Image)
+ }
+ if ci.Root != "/var/foo/container" {
+ t.Fatalf("expected root to be /var/foo/container, got %s", ci.Root)
+ }
+ if ci.LogPath != "/container/logs" {
+ t.Fatalf("expected log path to be /containers/logs, got %s", ci.LogPath)
+ }
+ if ci.Sandbox != "testsandboxid" {
+ t.Fatalf("expected sandbox to be testsandboxid, got %s", ci.Sandbox)
+ }
+ if ci.IP != "1.1.1.42" {
+ t.Fatalf("expected ip 1.1.1.42, got %s", ci.IP)
+ }
+ if len(ci.Annotations) == 0 {
+ t.Fatal("annotations are empty")
+ }
+ if len(ci.Labels) == 0 {
+ t.Fatal("labels are empty")
+ }
+ if len(ci.Annotations) != len(annotations) {
+ t.Fatalf("container info annotations len (%d) isn't the same as original annotations len (%d)", len(ci.Annotations), len(annotations))
+ }
+ if len(ci.Labels) != len(labels) {
+ t.Fatalf("container info labels len (%d) isn't the same as original labels len (%d)", len(ci.Labels), len(labels))
+ }
+ var found bool
+ for k, v := range annotations {
+ found = false
+ for key, value := range ci.Annotations {
+ if k == key && v == value {
+ found = true
+ break
+ }
+ }
+ if !found {
+ t.Fatalf("key %s with value %v wasn't in container info annotations", k, v)
+ }
+ }
+ for k, v := range labels {
+ found = false
+ for key, value := range ci.Labels {
+ if k == key && v == value {
+ found = true
+ break
+ }
+ }
+ if !found {
+ t.Fatalf("key %s with value %v wasn't in container info labels", k, v)
+ }
+ }
+}
+
+func TestGetContainerInfoCtrNotFound(t *testing.T) {
+ s := &Server{}
+ getContainerFunc := func(id string) *oci.Container {
+ return nil
+ }
+ getInfraContainerFunc := func(id string) *oci.Container {
+ return nil
+ }
+ getSandboxFunc := func(id string) *sandbox.Sandbox {
+ return nil
+ }
+ _, err := s.getContainerInfo("", getContainerFunc, getInfraContainerFunc, getSandboxFunc)
+ if err == nil {
+ t.Fatal("expected an error but got nothing")
+ }
+ if err != errCtrNotFound {
+ t.Fatalf("expected errCtrNotFound error, got %v", err)
+ }
+}
+
+func TestGetContainerInfoCtrStateNil(t *testing.T) {
+ s := &Server{}
+ created := time.Now()
+ labels := map[string]string{}
+ annotations := map[string]string{}
+ getContainerFunc := func(id string) *oci.Container {
+ container, err := oci.NewContainer("testid", "testname", "", "/container/logs", mockNetNS{}, labels, annotations, annotations, "imageName", "imageName", "imageRef", &runtime.ContainerMetadata{}, "testsandboxid", false, false, false, false, false, "/root/for/container", created, "SIGKILL")
+ if err != nil {
+ t.Fatal(err)
+ }
+ container.SetMountPoint("/var/foo/container")
+ container.SetState(nil)
+ return container
+ }
+ getInfraContainerFunc := func(id string) *oci.Container {
+ return nil
+ }
+ getSandboxFunc := func(id string) *sandbox.Sandbox {
+ s := &sandbox.Sandbox{}
+ s.AddIP("1.1.1.42")
+ return s
+ }
+ _, err := s.getContainerInfo("", getContainerFunc, getInfraContainerFunc, getSandboxFunc)
+ if err == nil {
+ t.Fatal("expected an error but got nothing")
+ }
+ if err != errCtrStateNil {
+ t.Fatalf("expected errCtrStateNil error, got %v", err)
+ }
+}
+
+func TestGetContainerInfoSandboxNotFound(t *testing.T) {
+ s := &Server{}
+ created := time.Now()
+ labels := map[string]string{}
+ annotations := map[string]string{}
+ getContainerFunc := func(id string) *oci.Container {
+ container, err := oci.NewContainer("testid", "testname", "", "/container/logs", mockNetNS{}, labels, annotations, annotations, "imageName", "imageName", "imageRef", &runtime.ContainerMetadata{}, "testsandboxid", false, false, false, false, false, "/root/for/container", created, "SIGKILL")
+ if err != nil {
+ t.Fatal(err)
+ }
+ container.SetMountPoint("/var/foo/container")
+ return container
+ }
+ getInfraContainerFunc := func(id string) *oci.Container {
+ return nil
+ }
+ getSandboxFunc := func(id string) *sandbox.Sandbox {
+ return nil
+ }
+ _, err := s.getContainerInfo("", getContainerFunc, getInfraContainerFunc, getSandboxFunc)
+ if err == nil {
+ t.Fatal("expected an error but got nothing")
+ }
+ if err != errSandboxNotFound {
+ t.Fatalf("expected errSandboxNotFound error, got %v", err)
+ }
+}
diff --git a/server/naming.go b/server/naming.go
new file mode 100644
index 000000000..cc18ba409
--- /dev/null
+++ b/server/naming.go
@@ -0,0 +1,86 @@
+package server
+
+import (
+ "fmt"
+ "strings"
+
+ "github.com/docker/docker/pkg/stringid"
+ pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
+)
+
+const (
+ kubePrefix = "k8s"
+ infraName = "POD"
+ nameDelimiter = "_"
+)
+
+func makeSandboxName(sandboxConfig *pb.PodSandboxConfig) string {
+ return strings.Join([]string{
+ kubePrefix,
+ sandboxConfig.Metadata.Name,
+ sandboxConfig.Metadata.Namespace,
+ sandboxConfig.Metadata.Uid,
+ fmt.Sprintf("%d", sandboxConfig.Metadata.Attempt),
+ }, nameDelimiter)
+}
+
+func makeSandboxContainerName(sandboxConfig *pb.PodSandboxConfig) string {
+ return strings.Join([]string{
+ kubePrefix,
+ infraName,
+ sandboxConfig.Metadata.Name,
+ sandboxConfig.Metadata.Namespace,
+ sandboxConfig.Metadata.Uid,
+ fmt.Sprintf("%d", sandboxConfig.Metadata.Attempt),
+ }, nameDelimiter)
+}
+
+func makeContainerName(sandboxMetadata *pb.PodSandboxMetadata, containerConfig *pb.ContainerConfig) string {
+ return strings.Join([]string{
+ kubePrefix,
+ containerConfig.Metadata.Name,
+ sandboxMetadata.Name,
+ sandboxMetadata.Namespace,
+ sandboxMetadata.Uid,
+ fmt.Sprintf("%d", containerConfig.Metadata.Attempt),
+ }, nameDelimiter)
+}
+
+func (s *Server) generatePodIDandName(sandboxConfig *pb.PodSandboxConfig) (string, string, error) {
+ var (
+ err error
+ id = stringid.GenerateNonCryptoID()
+ )
+ if sandboxConfig.Metadata.Namespace == "" {
+ return "", "", fmt.Errorf("cannot generate pod ID without namespace")
+ }
+ name, err := s.ReservePodName(id, makeSandboxName(sandboxConfig))
+ if err != nil {
+ return "", "", err
+ }
+ return id, name, err
+}
+
+func (s *Server) generateContainerIDandNameForSandbox(sandboxConfig *pb.PodSandboxConfig) (string, string, error) {
+ var (
+ err error
+ id = stringid.GenerateNonCryptoID()
+ )
+ name, err := s.ReserveContainerName(id, makeSandboxContainerName(sandboxConfig))
+ if err != nil {
+ return "", "", err
+ }
+ return id, name, err
+}
+
+func (s *Server) generateContainerIDandName(sandboxMetadata *pb.PodSandboxMetadata, containerConfig *pb.ContainerConfig) (string, string, error) {
+ var (
+ err error
+ id = stringid.GenerateNonCryptoID()
+ )
+ name, err := s.ReserveContainerName(id, makeContainerName(sandboxMetadata, containerConfig))
+ if err != nil {
+ return "", "", err
+ }
+ return id, name, err
+}
diff --git a/server/runtime_status.go b/server/runtime_status.go
new file mode 100644
index 000000000..67fc87b6f
--- /dev/null
+++ b/server/runtime_status.go
@@ -0,0 +1,41 @@
+package server
+
+import (
+ "golang.org/x/net/context"
+ pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
+)
+
+// Status returns the status of the runtime
+func (s *Server) Status(ctx context.Context, req *pb.StatusRequest) (*pb.StatusResponse, error) {
+
+ // Deal with Runtime conditions
+ runtimeReady, err := s.Runtime().RuntimeReady()
+ if err != nil {
+ return nil, err
+ }
+ networkReady, err := s.Runtime().NetworkReady()
+ if err != nil {
+ return nil, err
+ }
+
+ // Use vendored strings
+ runtimeReadyConditionString := pb.RuntimeReady
+ networkReadyConditionString := pb.NetworkReady
+
+ resp := &pb.StatusResponse{
+ Status: &pb.RuntimeStatus{
+ Conditions: []*pb.RuntimeCondition{
+ {
+ Type: runtimeReadyConditionString,
+ Status: runtimeReady,
+ },
+ {
+ Type: networkReadyConditionString,
+ Status: networkReady,
+ },
+ },
+ },
+ }
+
+ return resp, nil
+}
diff --git a/server/sandbox_list.go b/server/sandbox_list.go
new file mode 100644
index 000000000..fbc5eafa8
--- /dev/null
+++ b/server/sandbox_list.go
@@ -0,0 +1,94 @@
+package server
+
+import (
+ "github.com/kubernetes-incubator/cri-o/libkpod/sandbox"
+ "github.com/kubernetes-incubator/cri-o/oci"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/net/context"
+ "k8s.io/apimachinery/pkg/fields"
+ pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
+)
+
+// filterSandbox returns whether passed container matches filtering criteria
+func filterSandbox(p *pb.PodSandbox, filter *pb.PodSandboxFilter) bool {
+ if filter != nil {
+ if filter.State != nil {
+ if p.State != filter.State.State {
+ return false
+ }
+ }
+ if filter.LabelSelector != nil {
+ sel := fields.SelectorFromSet(filter.LabelSelector)
+ if !sel.Matches(fields.Set(p.Labels)) {
+ return false
+ }
+ }
+ }
+ return true
+}
+
+// ListPodSandbox returns a list of SandBoxes.
+func (s *Server) ListPodSandbox(ctx context.Context, req *pb.ListPodSandboxRequest) (*pb.ListPodSandboxResponse, error) {
+ logrus.Debugf("ListPodSandboxRequest %+v", req)
+ var pods []*pb.PodSandbox
+ var podList []*sandbox.Sandbox
+ for _, sb := range s.ContainerServer.ListSandboxes() {
+ podList = append(podList, sb)
+ }
+
+ filter := req.Filter
+ // Filter by pod id first.
+ if filter != nil {
+ if filter.Id != "" {
+ id, err := s.PodIDIndex().Get(filter.Id)
+ if err != nil {
+ // Not finding an ID in a filtered list should not be considered
+ // and error; it might have been deleted when stop was done.
+ // Log and return an empty struct.
+ logrus.Warn("unable to find pod %s with filter", filter.Id)
+ return &pb.ListPodSandboxResponse{}, nil
+ }
+ sb := s.getSandbox(id)
+ if sb == nil {
+ podList = []*sandbox.Sandbox{}
+ } else {
+ podList = []*sandbox.Sandbox{sb}
+ }
+ }
+ }
+
+ for _, sb := range podList {
+ podInfraContainer := sb.InfraContainer()
+ if podInfraContainer == nil {
+ // this can't really happen, but if it does because of a bug
+ // it's better not to panic
+ continue
+ }
+ cState := s.Runtime().ContainerStatus(podInfraContainer)
+ created := cState.Created.UnixNano()
+ rStatus := pb.PodSandboxState_SANDBOX_NOTREADY
+ if cState.Status == oci.ContainerStateRunning {
+ rStatus = pb.PodSandboxState_SANDBOX_READY
+ }
+
+ pod := &pb.PodSandbox{
+ Id: sb.ID(),
+ CreatedAt: created,
+ State: rStatus,
+ Labels: sb.Labels(),
+ Annotations: sb.Annotations(),
+ Metadata: sb.Metadata(),
+ }
+
+ // Filter by other criteria such as state and labels.
+ if filterSandbox(pod, req.Filter) {
+ pods = append(pods, pod)
+ }
+ }
+
+ resp := &pb.ListPodSandboxResponse{
+ Items: pods,
+ }
+ logrus.Debugf("ListPodSandboxResponse %+v", resp)
+ return resp, nil
+}
diff --git a/server/sandbox_network.go b/server/sandbox_network.go
new file mode 100644
index 000000000..15cf99c8f
--- /dev/null
+++ b/server/sandbox_network.go
@@ -0,0 +1,70 @@
+package server
+
+import (
+ "fmt"
+ "net"
+
+ "github.com/kubernetes-incubator/cri-o/libkpod/sandbox"
+ "github.com/sirupsen/logrus"
+ "k8s.io/kubernetes/pkg/kubelet/network/hostport"
+)
+
+// networkStart sets up the sandbox's network and returns the pod IP on success
+// or an error
+func (s *Server) networkStart(hostNetwork bool, sb *sandbox.Sandbox) (string, error) {
+ if hostNetwork {
+ return s.BindAddress(), nil
+ }
+
+ podNetwork := newPodNetwork(sb)
+ err := s.netPlugin.SetUpPod(podNetwork)
+ if err != nil {
+ return "", fmt.Errorf("failed to create pod network sandbox %s(%s): %v", sb.Name(), sb.ID(), err)
+ }
+
+ var ip string
+ if ip, err = s.netPlugin.GetPodNetworkStatus(podNetwork); err != nil {
+ return "", fmt.Errorf("failed to get network status for pod sandbox %s(%s): %v", sb.Name(), sb.ID(), err)
+ }
+
+ if len(sb.PortMappings()) > 0 {
+ ip4 := net.ParseIP(ip).To4()
+ if ip4 == nil {
+ return "", fmt.Errorf("failed to get valid ipv4 address for sandbox %s(%s)", sb.Name(), sb.ID())
+ }
+
+ if err = s.hostportManager.Add(sb.ID(), &hostport.PodPortMapping{
+ Name: sb.Name(),
+ PortMappings: sb.PortMappings(),
+ IP: ip4,
+ HostNetwork: false,
+ }, "lo"); err != nil {
+ return "", fmt.Errorf("failed to add hostport mapping for sandbox %s(%s): %v", sb.Name(), sb.ID(), err)
+ }
+
+ }
+ return ip, nil
+}
+
+// networkStop cleans up and removes a pod's network. It is best-effort and
+// must call the network plugin even if the network namespace is already gone
+func (s *Server) networkStop(hostNetwork bool, sb *sandbox.Sandbox) error {
+ if !hostNetwork {
+ if err := s.hostportManager.Remove(sb.ID(), &hostport.PodPortMapping{
+ Name: sb.Name(),
+ PortMappings: sb.PortMappings(),
+ HostNetwork: false,
+ }); err != nil {
+ logrus.Warnf("failed to remove hostport for pod sandbox %s(%s): %v",
+ sb.Name(), sb.ID(), err)
+ }
+
+ podNetwork := newPodNetwork(sb)
+ if err := s.netPlugin.TearDownPod(podNetwork); err != nil {
+ logrus.Warnf("failed to destroy network for pod sandbox %s(%s): %v",
+ sb.Name(), sb.ID(), err)
+ }
+ }
+
+ return nil
+}
diff --git a/server/sandbox_remove.go b/server/sandbox_remove.go
new file mode 100644
index 000000000..b0e073844
--- /dev/null
+++ b/server/sandbox_remove.go
@@ -0,0 +1,98 @@
+package server
+
+import (
+ "fmt"
+
+ "github.com/containers/storage"
+ "github.com/kubernetes-incubator/cri-o/libkpod/sandbox"
+ "github.com/kubernetes-incubator/cri-o/oci"
+ pkgstorage "github.com/kubernetes-incubator/cri-o/pkg/storage"
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/net/context"
+ pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
+)
+
+// RemovePodSandbox deletes the sandbox. If there are any running containers in the
+// sandbox, they should be force deleted.
+func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxRequest) (*pb.RemovePodSandboxResponse, error) {
+ logrus.Debugf("RemovePodSandboxRequest %+v", req)
+ sb, err := s.getPodSandboxFromRequest(req.PodSandboxId)
+ if err != nil {
+ if err == sandbox.ErrIDEmpty {
+ return nil, err
+ }
+
+ // If the sandbox isn't found we just return an empty response to adhere
+ // the the CRI interface which expects to not error out in not found
+ // cases.
+
+ resp := &pb.RemovePodSandboxResponse{}
+ logrus.Warnf("could not get sandbox %s, it's probably been removed already: %v", req.PodSandboxId, err)
+ return resp, nil
+ }
+
+ podInfraContainer := sb.InfraContainer()
+ containers := sb.Containers().List()
+ containers = append(containers, podInfraContainer)
+
+ // Delete all the containers in the sandbox
+ for _, c := range containers {
+ if !sb.Stopped() {
+ cState := s.Runtime().ContainerStatus(c)
+ if cState.Status == oci.ContainerStateCreated || cState.Status == oci.ContainerStateRunning {
+ if err := s.Runtime().StopContainer(ctx, c, 10); err != nil {
+ // Assume container is already stopped
+ logrus.Warnf("failed to stop container %s: %v", c.Name(), err)
+ }
+ }
+ }
+
+ if err := s.Runtime().DeleteContainer(c); err != nil {
+ return nil, fmt.Errorf("failed to delete container %s in pod sandbox %s: %v", c.Name(), sb.ID(), err)
+ }
+
+ if c.ID() == podInfraContainer.ID() {
+ continue
+ }
+
+ if err := s.StorageRuntimeServer().StopContainer(c.ID()); err != nil && err != storage.ErrContainerUnknown {
+ // assume container already umounted
+ logrus.Warnf("failed to stop container %s in pod sandbox %s: %v", c.Name(), sb.ID(), err)
+ }
+ if err := s.StorageRuntimeServer().DeleteContainer(c.ID()); err != nil && err != storage.ErrContainerUnknown {
+ return nil, fmt.Errorf("failed to delete container %s in pod sandbox %s: %v", c.Name(), sb.ID(), err)
+ }
+
+ s.ReleaseContainerName(c.Name())
+ s.removeContainer(c)
+ if err := s.CtrIDIndex().Delete(c.ID()); err != nil {
+ return nil, fmt.Errorf("failed to delete container %s in pod sandbox %s from index: %v", c.Name(), sb.ID(), err)
+ }
+ }
+
+ s.removeInfraContainer(podInfraContainer)
+
+ // Remove the files related to the sandbox
+ if err := s.StorageRuntimeServer().StopContainer(sb.ID()); err != nil && errors.Cause(err) != storage.ErrContainerUnknown {
+ logrus.Warnf("failed to stop sandbox container in pod sandbox %s: %v", sb.ID(), err)
+ }
+ if err := s.StorageRuntimeServer().RemovePodSandbox(sb.ID()); err != nil && err != pkgstorage.ErrInvalidSandboxID {
+ return nil, fmt.Errorf("failed to remove pod sandbox %s: %v", sb.ID(), err)
+ }
+
+ s.ReleaseContainerName(podInfraContainer.Name())
+ if err := s.CtrIDIndex().Delete(podInfraContainer.ID()); err != nil {
+ return nil, fmt.Errorf("failed to delete infra container %s in pod sandbox %s from index: %v", podInfraContainer.ID(), sb.ID(), err)
+ }
+
+ s.ReleasePodName(sb.Name())
+ s.removeSandbox(sb.ID())
+ if err := s.PodIDIndex().Delete(sb.ID()); err != nil {
+ return nil, fmt.Errorf("failed to delete pod sandbox %s from index: %v", sb.ID(), err)
+ }
+
+ resp := &pb.RemovePodSandboxResponse{}
+ logrus.Debugf("RemovePodSandboxResponse %+v", resp)
+ return resp, nil
+}
diff --git a/server/sandbox_run.go b/server/sandbox_run.go
new file mode 100644
index 000000000..72afdb229
--- /dev/null
+++ b/server/sandbox_run.go
@@ -0,0 +1,615 @@
+package server
+
+import (
+ "encoding/json"
+ "fmt"
+ "io/ioutil"
+ "os"
+ "path"
+ "path/filepath"
+ "regexp"
+ "strconv"
+ "strings"
+ "time"
+
+ "github.com/containers/storage"
+ "github.com/kubernetes-incubator/cri-o/libkpod/sandbox"
+ "github.com/kubernetes-incubator/cri-o/oci"
+ "github.com/kubernetes-incubator/cri-o/pkg/annotations"
+ runtimespec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/opencontainers/runtime-tools/generate"
+ "github.com/opencontainers/selinux/go-selinux/label"
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/net/context"
+ "golang.org/x/sys/unix"
+ "k8s.io/kubernetes/pkg/api/v1"
+ pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
+ "k8s.io/kubernetes/pkg/kubelet/leaky"
+ "k8s.io/kubernetes/pkg/kubelet/network/hostport"
+ "k8s.io/kubernetes/pkg/kubelet/types"
+)
+
+const (
+ // PodInfraOOMAdj is the value that we set for oom score adj for
+ // the pod infra container.
+ // TODO: Remove this const once this value is provided over CRI
+ // See https://github.com/kubernetes/kubernetes/issues/47938
+ PodInfraOOMAdj int = -998
+ // PodInfraCPUshares is default cpu shares for sandbox container.
+ PodInfraCPUshares = 2
+)
+
+// privilegedSandbox returns true if the sandbox configuration
+// requires additional host privileges for the sandbox.
+func (s *Server) privilegedSandbox(req *pb.RunPodSandboxRequest) bool {
+ securityContext := req.GetConfig().GetLinux().GetSecurityContext()
+ if securityContext == nil {
+ return false
+ }
+
+ if securityContext.Privileged {
+ return true
+ }
+
+ namespaceOptions := securityContext.GetNamespaceOptions()
+ if namespaceOptions == nil {
+ return false
+ }
+
+ if namespaceOptions.HostNetwork ||
+ namespaceOptions.HostPid ||
+ namespaceOptions.HostIpc {
+ return true
+ }
+
+ return false
+}
+
+// trustedSandbox returns true if the sandbox will run trusted workloads.
+func (s *Server) trustedSandbox(req *pb.RunPodSandboxRequest) bool {
+ kubeAnnotations := req.GetConfig().GetAnnotations()
+
+ trustedAnnotation, ok := kubeAnnotations[annotations.TrustedSandbox]
+ if !ok {
+ // A sandbox is trusted by default.
+ return true
+ }
+
+ return isTrue(trustedAnnotation)
+}
+
+func (s *Server) runContainer(container *oci.Container, cgroupParent string) error {
+ if err := s.Runtime().CreateContainer(container, cgroupParent); err != nil {
+ return err
+ }
+ return s.Runtime().StartContainer(container)
+}
+
+var (
+ conflictRE = regexp.MustCompile(`already reserved for pod "([0-9a-z]+)"`)
+)
+
+// RunPodSandbox creates and runs a pod-level sandbox.
+func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest) (resp *pb.RunPodSandboxResponse, err error) {
+ s.updateLock.RLock()
+ defer s.updateLock.RUnlock()
+
+ logrus.Debugf("RunPodSandboxRequest %+v", req)
+ var processLabel, mountLabel, resolvPath string
+ // process req.Name
+ kubeName := req.GetConfig().GetMetadata().Name
+ if kubeName == "" {
+ return nil, fmt.Errorf("PodSandboxConfig.Name should not be empty")
+ }
+
+ namespace := req.GetConfig().GetMetadata().Namespace
+ attempt := req.GetConfig().GetMetadata().Attempt
+
+ id, name, err := s.generatePodIDandName(req.GetConfig())
+ if err != nil {
+ if strings.Contains(err.Error(), "already reserved for pod") {
+ matches := conflictRE.FindStringSubmatch(err.Error())
+ if len(matches) != 2 {
+ return nil, err
+ }
+ dupID := matches[1]
+ if _, err := s.StopPodSandbox(ctx, &pb.StopPodSandboxRequest{PodSandboxId: dupID}); err != nil {
+ return nil, err
+ }
+ if _, err := s.RemovePodSandbox(ctx, &pb.RemovePodSandboxRequest{PodSandboxId: dupID}); err != nil {
+ return nil, err
+ }
+ id, name, err = s.generatePodIDandName(req.GetConfig())
+ if err != nil {
+ return nil, err
+ }
+ } else {
+ return nil, err
+ }
+ }
+
+ defer func() {
+ if err != nil {
+ s.ReleasePodName(name)
+ }
+ }()
+
+ _, containerName, err := s.generateContainerIDandNameForSandbox(req.GetConfig())
+ if err != nil {
+ return nil, err
+ }
+
+ defer func() {
+ if err != nil {
+ s.ReleaseContainerName(containerName)
+ }
+ }()
+
+ podContainer, err := s.StorageRuntimeServer().CreatePodSandbox(s.ImageContext(),
+ name, id,
+ s.config.PauseImage, "",
+ containerName,
+ req.GetConfig().GetMetadata().Name,
+ req.GetConfig().GetMetadata().Uid,
+ namespace,
+ attempt,
+ nil)
+ if errors.Cause(err) == storage.ErrDuplicateName {
+ return nil, fmt.Errorf("pod sandbox with name %q already exists", name)
+ }
+ if err != nil {
+ return nil, fmt.Errorf("error creating pod sandbox with name %q: %v", name, err)
+ }
+ defer func() {
+ if err != nil {
+ if err2 := s.StorageRuntimeServer().RemovePodSandbox(id); err2 != nil {
+ logrus.Warnf("couldn't cleanup pod sandbox %q: %v", id, err2)
+ }
+ }
+ }()
+
+ // TODO: factor generating/updating the spec into something other projects can vendor
+
+ // creates a spec Generator with the default spec.
+ g := generate.New()
+
+ // setup defaults for the pod sandbox
+ g.SetRootReadonly(true)
+ if s.config.PauseCommand == "" {
+ if podContainer.Config != nil {
+ g.SetProcessArgs(podContainer.Config.Config.Cmd)
+ } else {
+ g.SetProcessArgs([]string{sandbox.PodInfraCommand})
+ }
+ } else {
+ g.SetProcessArgs([]string{s.config.PauseCommand})
+ }
+
+ // set DNS options
+ if req.GetConfig().GetDnsConfig() != nil {
+ dnsServers := req.GetConfig().GetDnsConfig().Servers
+ dnsSearches := req.GetConfig().GetDnsConfig().Searches
+ dnsOptions := req.GetConfig().GetDnsConfig().Options
+ resolvPath = fmt.Sprintf("%s/resolv.conf", podContainer.RunDir)
+ err = parseDNSOptions(dnsServers, dnsSearches, dnsOptions, resolvPath)
+ if err != nil {
+ err1 := removeFile(resolvPath)
+ if err1 != nil {
+ err = err1
+ return nil, fmt.Errorf("%v; failed to remove %s: %v", err, resolvPath, err1)
+ }
+ return nil, err
+ }
+ if err := label.Relabel(resolvPath, mountLabel, true); err != nil && err != unix.ENOTSUP {
+ return nil, err
+ }
+
+ g.AddBindMount(resolvPath, "/etc/resolv.conf", []string{"ro"})
+ }
+
+ // add metadata
+ metadata := req.GetConfig().GetMetadata()
+ metadataJSON, err := json.Marshal(metadata)
+ if err != nil {
+ return nil, err
+ }
+
+ // add labels
+ labels := req.GetConfig().GetLabels()
+
+ // Add special container name label for the infra container
+ labelsJSON := []byte{}
+ if labels != nil {
+ labels[types.KubernetesContainerNameLabel] = leaky.PodInfraContainerName
+ labelsJSON, err = json.Marshal(labels)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ // add annotations
+ kubeAnnotations := req.GetConfig().GetAnnotations()
+ kubeAnnotationsJSON, err := json.Marshal(kubeAnnotations)
+ if err != nil {
+ return nil, err
+ }
+
+ // set log directory
+ logDir := req.GetConfig().LogDirectory
+ if logDir == "" {
+ logDir = filepath.Join(s.config.LogDir, id)
+ }
+ if err = os.MkdirAll(logDir, 0700); err != nil {
+ return nil, err
+ }
+ // This should always be absolute from k8s.
+ if !filepath.IsAbs(logDir) {
+ return nil, fmt.Errorf("requested logDir for sbox id %s is a relative path: %s", id, logDir)
+ }
+
+ privileged := s.privilegedSandbox(req)
+
+ securityContext := req.GetConfig().GetLinux().GetSecurityContext()
+ if securityContext == nil {
+ logrus.Warn("no security context found in config.")
+ }
+
+ processLabel, mountLabel, err = getSELinuxLabels(securityContext.GetSelinuxOptions(), privileged)
+ if err != nil {
+ return nil, err
+ }
+
+ // Don't use SELinux separation with Host Pid or IPC Namespace or privileged.
+ if securityContext.GetNamespaceOptions().GetHostPid() || securityContext.GetNamespaceOptions().GetHostIpc() {
+ processLabel, mountLabel = "", ""
+ }
+ g.SetProcessSelinuxLabel(processLabel)
+ g.SetLinuxMountLabel(mountLabel)
+
+ // create shm mount for the pod containers.
+ var shmPath string
+ if securityContext.GetNamespaceOptions().GetHostIpc() {
+ shmPath = "/dev/shm"
+ } else {
+ shmPath, err = setupShm(podContainer.RunDir, mountLabel)
+ if err != nil {
+ return nil, err
+ }
+ defer func() {
+ if err != nil {
+ if err2 := unix.Unmount(shmPath, unix.MNT_DETACH); err2 != nil {
+ logrus.Warnf("failed to unmount shm for pod: %v", err2)
+ }
+ }
+ }()
+ }
+
+ err = s.setPodSandboxMountLabel(id, mountLabel)
+ if err != nil {
+ return nil, err
+ }
+
+ if err = s.CtrIDIndex().Add(id); err != nil {
+ return nil, err
+ }
+
+ defer func() {
+ if err != nil {
+ if err2 := s.CtrIDIndex().Delete(id); err2 != nil {
+ logrus.Warnf("couldn't delete ctr id %s from idIndex", id)
+ }
+ }
+ }()
+
+ // set log path inside log directory
+ logPath := filepath.Join(logDir, id+".log")
+
+ // Handle https://issues.k8s.io/44043
+ if err := ensureSaneLogPath(logPath); err != nil {
+ return nil, err
+ }
+
+ hostNetwork := securityContext.GetNamespaceOptions().GetHostNetwork()
+
+ hostname, err := getHostname(id, req.GetConfig().Hostname, hostNetwork)
+ if err != nil {
+ return nil, err
+ }
+ g.SetHostname(hostname)
+
+ trusted := s.trustedSandbox(req)
+ g.AddAnnotation(annotations.Metadata, string(metadataJSON))
+ g.AddAnnotation(annotations.Labels, string(labelsJSON))
+ g.AddAnnotation(annotations.Annotations, string(kubeAnnotationsJSON))
+ g.AddAnnotation(annotations.LogPath, logPath)
+ g.AddAnnotation(annotations.Name, name)
+ g.AddAnnotation(annotations.ContainerType, annotations.ContainerTypeSandbox)
+ g.AddAnnotation(annotations.SandboxID, id)
+ g.AddAnnotation(annotations.ContainerName, containerName)
+ g.AddAnnotation(annotations.ContainerID, id)
+ g.AddAnnotation(annotations.ShmPath, shmPath)
+ g.AddAnnotation(annotations.PrivilegedRuntime, fmt.Sprintf("%v", privileged))
+ g.AddAnnotation(annotations.TrustedSandbox, fmt.Sprintf("%v", trusted))
+ g.AddAnnotation(annotations.ResolvPath, resolvPath)
+ g.AddAnnotation(annotations.HostName, hostname)
+ g.AddAnnotation(annotations.KubeName, kubeName)
+ if podContainer.Config.Config.StopSignal != "" {
+ // this key is defined in image-spec conversion document at https://github.com/opencontainers/image-spec/pull/492/files#diff-8aafbe2c3690162540381b8cdb157112R57
+ g.AddAnnotation("org.opencontainers.image.stopSignal", podContainer.Config.Config.StopSignal)
+ }
+
+ created := time.Now()
+ g.AddAnnotation(annotations.Created, created.Format(time.RFC3339Nano))
+
+ portMappings := convertPortMappings(req.GetConfig().GetPortMappings())
+
+ // setup cgroup settings
+ cgroupParent := req.GetConfig().GetLinux().GetCgroupParent()
+ if cgroupParent != "" {
+ if s.config.CgroupManager == oci.SystemdCgroupsManager {
+ if len(cgroupParent) <= 6 || !strings.HasSuffix(path.Base(cgroupParent), ".slice") {
+ return nil, fmt.Errorf("cri-o configured with systemd cgroup manager, but did not receive slice as parent: %s", cgroupParent)
+ }
+ cgPath, err := convertCgroupFsNameToSystemd(cgroupParent)
+ if err != nil {
+ return nil, err
+ }
+ g.SetLinuxCgroupsPath(cgPath + ":" + "crio" + ":" + id)
+ cgroupParent = cgPath
+ } else {
+ if strings.HasSuffix(path.Base(cgroupParent), ".slice") {
+ return nil, fmt.Errorf("cri-o configured with cgroupfs cgroup manager, but received systemd slice as parent: %s", cgroupParent)
+ }
+ cgPath := filepath.Join(cgroupParent, scopePrefix+"-"+id)
+ g.SetLinuxCgroupsPath(cgPath)
+ }
+ }
+
+ sb, err := sandbox.New(id, namespace, name, kubeName, logDir, labels, kubeAnnotations, processLabel, mountLabel, metadata, shmPath, cgroupParent, privileged, trusted, resolvPath, hostname, portMappings)
+ if err != nil {
+ return nil, err
+ }
+
+ s.addSandbox(sb)
+ defer func() {
+ if err != nil {
+ s.removeSandbox(id)
+ }
+ }()
+
+ if err = s.PodIDIndex().Add(id); err != nil {
+ return nil, err
+ }
+
+ defer func() {
+ if err != nil {
+ if err := s.PodIDIndex().Delete(id); err != nil {
+ logrus.Warnf("couldn't delete pod id %s from idIndex", id)
+ }
+ }
+ }()
+
+ for k, v := range kubeAnnotations {
+ g.AddAnnotation(k, v)
+ }
+ for k, v := range labels {
+ g.AddAnnotation(k, v)
+ }
+
+ // extract linux sysctls from annotations and pass down to oci runtime
+ safe, unsafe, err := SysctlsFromPodAnnotations(kubeAnnotations)
+ if err != nil {
+ return nil, err
+ }
+ for _, sysctl := range safe {
+ g.AddLinuxSysctl(sysctl.Name, sysctl.Value)
+ }
+ for _, sysctl := range unsafe {
+ g.AddLinuxSysctl(sysctl.Name, sysctl.Value)
+ }
+
+ // Set OOM score adjust of the infra container to be very low
+ // so it doesn't get killed.
+ g.SetProcessOOMScoreAdj(PodInfraOOMAdj)
+
+ g.SetLinuxResourcesCPUShares(PodInfraCPUshares)
+
+ // set up namespaces
+ if hostNetwork {
+ err = g.RemoveLinuxNamespace(string(runtimespec.NetworkNamespace))
+ if err != nil {
+ return nil, err
+ }
+ } else {
+ // Create the sandbox network namespace
+ if err = sb.NetNsCreate(); err != nil {
+ return nil, err
+ }
+
+ defer func() {
+ if err == nil {
+ return
+ }
+
+ if netnsErr := sb.NetNsRemove(); netnsErr != nil {
+ logrus.Warnf("Failed to remove networking namespace: %v", netnsErr)
+ }
+ }()
+
+ // Pass the created namespace path to the runtime
+ err = g.AddOrReplaceLinuxNamespace(string(runtimespec.NetworkNamespace), sb.NetNsPath())
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ if securityContext.GetNamespaceOptions().GetHostPid() {
+ err = g.RemoveLinuxNamespace(string(runtimespec.PIDNamespace))
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ if securityContext.GetNamespaceOptions().GetHostIpc() {
+ err = g.RemoveLinuxNamespace(string(runtimespec.IPCNamespace))
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ if !s.seccompEnabled {
+ g.Spec().Linux.Seccomp = nil
+ }
+
+ saveOptions := generate.ExportOptions{}
+ mountPoint, err := s.StorageRuntimeServer().StartContainer(id)
+ if err != nil {
+ return nil, fmt.Errorf("failed to mount container %s in pod sandbox %s(%s): %v", containerName, sb.Name(), id, err)
+ }
+ g.AddAnnotation(annotations.MountPoint, mountPoint)
+ g.SetRootPath(mountPoint)
+
+ hostnamePath := fmt.Sprintf("%s/hostname", podContainer.RunDir)
+ if err := ioutil.WriteFile(hostnamePath, []byte(hostname+"\n"), 0644); err != nil {
+ return nil, err
+ }
+ if err := label.Relabel(hostnamePath, mountLabel, true); err != nil && err != unix.ENOTSUP {
+ return nil, err
+ }
+ g.AddBindMount(hostnamePath, "/etc/hostname", []string{"ro"})
+ g.AddAnnotation(annotations.HostnamePath, hostnamePath)
+ sb.AddHostnamePath(hostnamePath)
+
+ container, err := oci.NewContainer(id, containerName, podContainer.RunDir, logPath, sb.NetNs(), labels, g.Spec().Annotations, kubeAnnotations, "", "", "", nil, id, false, false, false, sb.Privileged(), sb.Trusted(), podContainer.Dir, created, podContainer.Config.Config.StopSignal)
+ if err != nil {
+ return nil, err
+ }
+ container.SetSpec(g.Spec())
+ container.SetMountPoint(mountPoint)
+
+ sb.SetInfraContainer(container)
+
+ var ip string
+ ip, err = s.networkStart(hostNetwork, sb)
+ if err != nil {
+ return nil, err
+ }
+ defer func() {
+ if err != nil {
+ s.networkStop(hostNetwork, sb)
+ }
+ }()
+
+ g.AddAnnotation(annotations.IP, ip)
+ sb.AddIP(ip)
+
+ err = g.SaveToFile(filepath.Join(podContainer.Dir, "config.json"), saveOptions)
+ if err != nil {
+ return nil, fmt.Errorf("failed to save template configuration for pod sandbox %s(%s): %v", sb.Name(), id, err)
+ }
+ if err = g.SaveToFile(filepath.Join(podContainer.RunDir, "config.json"), saveOptions); err != nil {
+ return nil, fmt.Errorf("failed to write runtime configuration for pod sandbox %s(%s): %v", sb.Name(), id, err)
+ }
+
+ if err = s.runContainer(container, sb.CgroupParent()); err != nil {
+ return nil, err
+ }
+
+ s.addInfraContainer(container)
+
+ s.ContainerStateToDisk(container)
+
+ resp = &pb.RunPodSandboxResponse{PodSandboxId: id}
+ logrus.Debugf("RunPodSandboxResponse: %+v", resp)
+ return resp, nil
+}
+
+func convertPortMappings(in []*pb.PortMapping) []*hostport.PortMapping {
+ if in == nil {
+ return nil
+ }
+ out := make([]*hostport.PortMapping, len(in))
+ for i, v := range in {
+ out[i] = &hostport.PortMapping{
+ HostPort: v.HostPort,
+ ContainerPort: v.ContainerPort,
+ Protocol: v1.Protocol(v.Protocol.String()),
+ HostIP: v.HostIp,
+ }
+ }
+ return out
+}
+
+func getHostname(id, hostname string, hostNetwork bool) (string, error) {
+ if hostNetwork {
+ if hostname == "" {
+ h, err := os.Hostname()
+ if err != nil {
+ return "", err
+ }
+ hostname = h
+ }
+ } else {
+ if hostname == "" {
+ hostname = id[:12]
+ }
+ }
+ return hostname, nil
+}
+
+func (s *Server) setPodSandboxMountLabel(id, mountLabel string) error {
+ storageMetadata, err := s.StorageRuntimeServer().GetContainerMetadata(id)
+ if err != nil {
+ return err
+ }
+ storageMetadata.SetMountLabel(mountLabel)
+ return s.StorageRuntimeServer().SetContainerMetadata(id, storageMetadata)
+}
+
+func getSELinuxLabels(selinuxOptions *pb.SELinuxOption, privileged bool) (processLabel string, mountLabel string, err error) {
+ if privileged {
+ return "", "", nil
+ }
+ labels := []string{}
+ if selinuxOptions != nil {
+ if selinuxOptions.User != "" {
+ labels = append(labels, "user:"+selinuxOptions.User)
+ }
+ if selinuxOptions.Role != "" {
+ labels = append(labels, "role:"+selinuxOptions.Role)
+ }
+ if selinuxOptions.Type != "" {
+ labels = append(labels, "type:"+selinuxOptions.Type)
+ }
+ if selinuxOptions.Level != "" {
+ labels = append(labels, "level:"+selinuxOptions.Level)
+ }
+ }
+ return label.InitLabels(labels)
+}
+
+func setupShm(podSandboxRunDir, mountLabel string) (shmPath string, err error) {
+ shmPath = filepath.Join(podSandboxRunDir, "shm")
+ if err = os.Mkdir(shmPath, 0700); err != nil {
+ return "", err
+ }
+ shmOptions := "mode=1777,size=" + strconv.Itoa(sandbox.DefaultShmSize)
+ if err = unix.Mount("shm", shmPath, "tmpfs", unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV,
+ label.FormatMountLabel(shmOptions, mountLabel)); err != nil {
+ return "", fmt.Errorf("failed to mount shm tmpfs for pod: %v", err)
+ }
+ return shmPath, nil
+}
+
+// convertCgroupFsNameToSystemd converts an expanded cgroupfs name to its systemd name.
+// For example, it will convert test.slice/test-a.slice/test-a-b.slice to become test-a-b.slice
+// NOTE: this is public right now to allow its usage in dockermanager and dockershim, ideally both those
+// code areas could use something from libcontainer if we get this style function upstream.
+func convertCgroupFsNameToSystemd(cgroupfsName string) (string, error) {
+ // TODO: see if libcontainer systemd implementation could use something similar, and if so, move
+ // this function up to that library. At that time, it would most likely do validation specific to systemd
+ // above and beyond the simple assumption here that the base of the path encodes the hierarchy
+ // per systemd convention.
+ return path.Base(cgroupfsName), nil
+}
diff --git a/server/sandbox_status.go b/server/sandbox_status.go
new file mode 100644
index 000000000..f5b6dd09a
--- /dev/null
+++ b/server/sandbox_status.go
@@ -0,0 +1,41 @@
+package server
+
+import (
+ "github.com/kubernetes-incubator/cri-o/oci"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/net/context"
+ pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
+)
+
+// PodSandboxStatus returns the Status of the PodSandbox.
+func (s *Server) PodSandboxStatus(ctx context.Context, req *pb.PodSandboxStatusRequest) (*pb.PodSandboxStatusResponse, error) {
+ logrus.Debugf("PodSandboxStatusRequest %+v", req)
+ sb, err := s.getPodSandboxFromRequest(req.PodSandboxId)
+ if err != nil {
+ return nil, err
+ }
+
+ podInfraContainer := sb.InfraContainer()
+ cState := s.Runtime().ContainerStatus(podInfraContainer)
+
+ rStatus := pb.PodSandboxState_SANDBOX_NOTREADY
+ if cState.Status == oci.ContainerStateRunning {
+ rStatus = pb.PodSandboxState_SANDBOX_READY
+ }
+
+ sandboxID := sb.ID()
+ resp := &pb.PodSandboxStatusResponse{
+ Status: &pb.PodSandboxStatus{
+ Id: sandboxID,
+ CreatedAt: podInfraContainer.CreatedAt().UnixNano(),
+ Network: &pb.PodSandboxNetworkStatus{Ip: sb.IP()},
+ State: rStatus,
+ Labels: sb.Labels(),
+ Annotations: sb.Annotations(),
+ Metadata: sb.Metadata(),
+ },
+ }
+
+ logrus.Debugf("PodSandboxStatusResponse: %+v", resp)
+ return resp, nil
+}
diff --git a/server/sandbox_stop.go b/server/sandbox_stop.go
new file mode 100644
index 000000000..9d6a5aa3c
--- /dev/null
+++ b/server/sandbox_stop.go
@@ -0,0 +1,114 @@
+package server
+
+import (
+ "fmt"
+
+ "github.com/containers/storage"
+ "github.com/docker/docker/pkg/mount"
+ "github.com/docker/docker/pkg/symlink"
+ "github.com/kubernetes-incubator/cri-o/libkpod/sandbox"
+ "github.com/kubernetes-incubator/cri-o/oci"
+ "github.com/opencontainers/selinux/go-selinux/label"
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/net/context"
+ "golang.org/x/sys/unix"
+ pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
+)
+
+// StopPodSandbox stops the sandbox. If there are any running containers in the
+// sandbox, they should be force terminated.
+func (s *Server) StopPodSandbox(ctx context.Context, req *pb.StopPodSandboxRequest) (*pb.StopPodSandboxResponse, error) {
+ logrus.Debugf("StopPodSandboxRequest %+v", req)
+ sb, err := s.getPodSandboxFromRequest(req.PodSandboxId)
+ if err != nil {
+ if err == sandbox.ErrIDEmpty {
+ return nil, err
+ }
+
+ // If the sandbox isn't found we just return an empty response to adhere
+ // the the CRI interface which expects to not error out in not found
+ // cases.
+
+ resp := &pb.StopPodSandboxResponse{}
+ logrus.Warnf("could not get sandbox %s, it's probably been stopped already: %v", req.PodSandboxId, err)
+ logrus.Debugf("StopPodSandboxResponse %s: %+v", req.PodSandboxId, resp)
+ return resp, nil
+ }
+
+ if sb.Stopped() {
+ resp := &pb.StopPodSandboxResponse{}
+ logrus.Debugf("StopPodSandboxResponse %s: %+v", sb.ID(), resp)
+ return resp, nil
+ }
+
+ // Clean up sandbox networking and close its network namespace.
+ hostNetwork := sb.NetNsPath() == ""
+ s.networkStop(hostNetwork, sb)
+ if err := sb.NetNsRemove(); err != nil {
+ return nil, err
+ }
+
+ podInfraContainer := sb.InfraContainer()
+ containers := sb.Containers().List()
+ containers = append(containers, podInfraContainer)
+
+ for _, c := range containers {
+ cStatus := s.Runtime().ContainerStatus(c)
+ if cStatus.Status != oci.ContainerStateStopped {
+ if err := s.Runtime().StopContainer(ctx, c, 10); err != nil {
+ return nil, fmt.Errorf("failed to stop container %s in pod sandbox %s: %v", c.Name(), sb.ID(), err)
+ }
+ if c.ID() == podInfraContainer.ID() {
+ continue
+ }
+ if err := s.StorageRuntimeServer().StopContainer(c.ID()); err != nil && errors.Cause(err) != storage.ErrContainerUnknown {
+ // assume container already umounted
+ logrus.Warnf("failed to stop container %s in pod sandbox %s: %v", c.Name(), sb.ID(), err)
+ }
+ }
+ s.ContainerStateToDisk(c)
+ }
+
+ if err := label.ReleaseLabel(sb.ProcessLabel()); err != nil {
+ return nil, err
+ }
+
+ // unmount the shm for the pod
+ if sb.ShmPath() != "/dev/shm" {
+ // we got namespaces in the form of
+ // /var/run/containers/storage/overlay-containers/CID/userdata/shm
+ // but /var/run on most system is symlinked to /run so we first resolve
+ // the symlink and then try and see if it's mounted
+ fp, err := symlink.FollowSymlinkInScope(sb.ShmPath(), "/")
+ if err != nil {
+ return nil, err
+ }
+ if mounted, err := mount.Mounted(fp); err == nil && mounted {
+ if err := unix.Unmount(fp, unix.MNT_DETACH); err != nil {
+ return nil, err
+ }
+ }
+ }
+ if err := s.StorageRuntimeServer().StopContainer(sb.ID()); err != nil && errors.Cause(err) != storage.ErrContainerUnknown {
+ logrus.Warnf("failed to stop sandbox container in pod sandbox %s: %v", sb.ID(), err)
+ }
+
+ sb.SetStopped()
+ resp := &pb.StopPodSandboxResponse{}
+ logrus.Debugf("StopPodSandboxResponse %s: %+v", sb.ID(), resp)
+ return resp, nil
+}
+
+// StopAllPodSandboxes removes all pod sandboxes
+func (s *Server) StopAllPodSandboxes() {
+ logrus.Debugf("StopAllPodSandboxes")
+ for _, sb := range s.ContainerServer.ListSandboxes() {
+ pod := &pb.StopPodSandboxRequest{
+ PodSandboxId: sb.ID(),
+ }
+ if _, err := s.StopPodSandbox(nil, pod); err != nil {
+ logrus.Warnf("could not StopPodSandbox %s: %v", sb.ID(), err)
+ }
+ }
+}
diff --git a/server/seccomp/seccomp.go b/server/seccomp/seccomp.go
new file mode 100644
index 000000000..cf77c8274
--- /dev/null
+++ b/server/seccomp/seccomp.go
@@ -0,0 +1,165 @@
+// +build seccomp
+
+package seccomp
+
+import (
+ "encoding/json"
+ "errors"
+ "fmt"
+
+ "github.com/docker/docker/pkg/stringutils"
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/opencontainers/runtime-tools/generate"
+ libseccomp "github.com/seccomp/libseccomp-golang"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/sys/unix"
+)
+
+// IsEnabled returns true if seccomp is enabled for the host.
+func IsEnabled() bool {
+ enabled := false
+ // Check if Seccomp is supported, via CONFIG_SECCOMP.
+ if err := unix.Prctl(unix.PR_GET_SECCOMP, 0, 0, 0, 0); err != unix.EINVAL {
+ // Make sure the kernel has CONFIG_SECCOMP_FILTER.
+ if err := unix.Prctl(unix.PR_SET_SECCOMP, unix.SECCOMP_MODE_FILTER, 0, 0, 0); err != unix.EINVAL {
+ enabled = true
+ }
+ }
+ logrus.Debugf("seccomp status: %v", enabled)
+ return enabled
+}
+
+// LoadProfileFromStruct takes a Seccomp struct and setup seccomp in the spec.
+func LoadProfileFromStruct(config Seccomp, specgen *generate.Generator) error {
+ return setupSeccomp(&config, specgen)
+}
+
+// LoadProfileFromBytes takes a byte slice and decodes the seccomp profile.
+func LoadProfileFromBytes(body []byte, specgen *generate.Generator) error {
+ var config Seccomp
+ if err := json.Unmarshal(body, &config); err != nil {
+ return fmt.Errorf("decoding seccomp profile failed: %v", err)
+ }
+ return setupSeccomp(&config, specgen)
+}
+
+var nativeToSeccomp = map[string]Arch{
+ "amd64": ArchX86_64,
+ "arm64": ArchAARCH64,
+ "mips64": ArchMIPS64,
+ "mips64n32": ArchMIPS64N32,
+ "mipsel64": ArchMIPSEL64,
+ "mipsel64n32": ArchMIPSEL64N32,
+ "s390x": ArchS390X,
+}
+
+func setupSeccomp(config *Seccomp, specgen *generate.Generator) error {
+ if config == nil {
+ return nil
+ }
+
+ // No default action specified, no syscalls listed, assume seccomp disabled
+ if config.DefaultAction == "" && len(config.Syscalls) == 0 {
+ return nil
+ }
+
+ var arch string
+ var native, err = libseccomp.GetNativeArch()
+ if err == nil {
+ arch = native.String()
+ }
+
+ if len(config.Architectures) != 0 && len(config.ArchMap) != 0 {
+ return errors.New("'architectures' and 'archMap' were specified in the seccomp profile, use either 'architectures' or 'archMap'")
+ }
+
+ customspec := specgen.Spec()
+ customspec.Linux.Seccomp = &specs.LinuxSeccomp{}
+
+ // if config.Architectures == 0 then libseccomp will figure out the architecture to use
+ if len(config.Architectures) != 0 {
+ for _, a := range config.Architectures {
+ customspec.Linux.Seccomp.Architectures = append(customspec.Linux.Seccomp.Architectures, specs.Arch(a))
+ }
+ }
+
+ if len(config.ArchMap) != 0 {
+ for _, a := range config.ArchMap {
+ seccompArch, ok := nativeToSeccomp[arch]
+ if ok {
+ if a.Arch == seccompArch {
+ customspec.Linux.Seccomp.Architectures = append(customspec.Linux.Seccomp.Architectures, specs.Arch(a.Arch))
+ for _, sa := range a.SubArches {
+ customspec.Linux.Seccomp.Architectures = append(customspec.Linux.Seccomp.Architectures, specs.Arch(sa))
+ }
+ break
+ }
+ }
+ }
+ }
+
+ customspec.Linux.Seccomp.DefaultAction = specs.LinuxSeccompAction(config.DefaultAction)
+
+Loop:
+ // Loop through all syscall blocks and convert them to libcontainer format after filtering them
+ for _, call := range config.Syscalls {
+ if len(call.Excludes.Arches) > 0 {
+ if stringutils.InSlice(call.Excludes.Arches, arch) {
+ continue Loop
+ }
+ }
+ if len(call.Excludes.Caps) > 0 {
+ for _, c := range call.Excludes.Caps {
+ if stringutils.InSlice(customspec.Process.Capabilities.Permitted, c) {
+ continue Loop
+ }
+ }
+ }
+ if len(call.Includes.Arches) > 0 {
+ if !stringutils.InSlice(call.Includes.Arches, arch) {
+ continue Loop
+ }
+ }
+ if len(call.Includes.Caps) > 0 {
+ for _, c := range call.Includes.Caps {
+ if !stringutils.InSlice(customspec.Process.Capabilities.Permitted, c) {
+ continue Loop
+ }
+ }
+ }
+
+ if call.Name != "" && len(call.Names) != 0 {
+ return errors.New("'name' and 'names' were specified in the seccomp profile, use either 'name' or 'names'")
+ }
+
+ if call.Name != "" {
+ customspec.Linux.Seccomp.Syscalls = append(customspec.Linux.Seccomp.Syscalls, createSpecsSyscall(call.Name, call.Action, call.Args))
+ }
+
+ for _, n := range call.Names {
+ customspec.Linux.Seccomp.Syscalls = append(customspec.Linux.Seccomp.Syscalls, createSpecsSyscall(n, call.Action, call.Args))
+ }
+ }
+
+ return nil
+}
+
+func createSpecsSyscall(name string, action Action, args []*Arg) specs.LinuxSyscall {
+ newCall := specs.LinuxSyscall{
+ Names: []string{name},
+ Action: specs.LinuxSeccompAction(action),
+ }
+
+ // Loop through all the arguments of the syscall and convert them
+ for _, arg := range args {
+ newArg := specs.LinuxSeccompArg{
+ Index: arg.Index,
+ Value: arg.Value,
+ ValueTwo: arg.ValueTwo,
+ Op: specs.LinuxSeccompOperator(arg.Op),
+ }
+
+ newCall.Args = append(newCall.Args, newArg)
+ }
+ return newCall
+}
diff --git a/server/seccomp/seccomp_unsupported.go b/server/seccomp/seccomp_unsupported.go
new file mode 100644
index 000000000..efb36bdf9
--- /dev/null
+++ b/server/seccomp/seccomp_unsupported.go
@@ -0,0 +1,20 @@
+// +build !seccomp
+
+package seccomp
+
+import "github.com/opencontainers/runtime-tools/generate"
+
+// IsEnabled returns false, when build without seccomp build tag.
+func IsEnabled() bool {
+ return false
+}
+
+// LoadProfileFromStruct takes a Seccomp struct and setup seccomp in the spec.
+func LoadProfileFromStruct(config Seccomp, specgen *generate.Generator) error {
+ return nil
+}
+
+// LoadProfileFromBytes takes a byte slice and decodes the seccomp profile.
+func LoadProfileFromBytes(body []byte, specgen *generate.Generator) error {
+ return nil
+}
diff --git a/server/seccomp/types.go b/server/seccomp/types.go
new file mode 100644
index 000000000..5b07f8c03
--- /dev/null
+++ b/server/seccomp/types.go
@@ -0,0 +1,93 @@
+package seccomp
+
+// Seccomp represents the config for a seccomp profile for syscall restriction.
+type Seccomp struct {
+ DefaultAction Action `json:"defaultAction"`
+ // Architectures is kept to maintain backward compatibility with the old
+ // seccomp profile.
+ Architectures []Arch `json:"architectures,omitempty"`
+ ArchMap []Architecture `json:"archMap,omitempty"`
+ Syscalls []*Syscall `json:"syscalls"`
+}
+
+// Architecture is used to represent an specific architecture
+// and its sub-architectures
+type Architecture struct {
+ Arch Arch `json:"architecture"`
+ SubArches []Arch `json:"subArchitectures"`
+}
+
+// Arch used for architectures
+type Arch string
+
+// Additional architectures permitted to be used for system calls
+// By default only the native architecture of the kernel is permitted
+const (
+ ArchX86 Arch = "SCMP_ARCH_X86"
+ ArchX86_64 Arch = "SCMP_ARCH_X86_64"
+ ArchX32 Arch = "SCMP_ARCH_X32"
+ ArchARM Arch = "SCMP_ARCH_ARM"
+ ArchAARCH64 Arch = "SCMP_ARCH_AARCH64"
+ ArchMIPS Arch = "SCMP_ARCH_MIPS"
+ ArchMIPS64 Arch = "SCMP_ARCH_MIPS64"
+ ArchMIPS64N32 Arch = "SCMP_ARCH_MIPS64N32"
+ ArchMIPSEL Arch = "SCMP_ARCH_MIPSEL"
+ ArchMIPSEL64 Arch = "SCMP_ARCH_MIPSEL64"
+ ArchMIPSEL64N32 Arch = "SCMP_ARCH_MIPSEL64N32"
+ ArchPPC Arch = "SCMP_ARCH_PPC"
+ ArchPPC64 Arch = "SCMP_ARCH_PPC64"
+ ArchPPC64LE Arch = "SCMP_ARCH_PPC64LE"
+ ArchS390 Arch = "SCMP_ARCH_S390"
+ ArchS390X Arch = "SCMP_ARCH_S390X"
+)
+
+// Action taken upon Seccomp rule match
+type Action string
+
+// Define actions for Seccomp rules
+const (
+ ActKill Action = "SCMP_ACT_KILL"
+ ActTrap Action = "SCMP_ACT_TRAP"
+ ActErrno Action = "SCMP_ACT_ERRNO"
+ ActTrace Action = "SCMP_ACT_TRACE"
+ ActAllow Action = "SCMP_ACT_ALLOW"
+)
+
+// Operator used to match syscall arguments in Seccomp
+type Operator string
+
+// Define operators for syscall arguments in Seccomp
+const (
+ OpNotEqual Operator = "SCMP_CMP_NE"
+ OpLessThan Operator = "SCMP_CMP_LT"
+ OpLessEqual Operator = "SCMP_CMP_LE"
+ OpEqualTo Operator = "SCMP_CMP_EQ"
+ OpGreaterEqual Operator = "SCMP_CMP_GE"
+ OpGreaterThan Operator = "SCMP_CMP_GT"
+ OpMaskedEqual Operator = "SCMP_CMP_MASKED_EQ"
+)
+
+// Arg used for matching specific syscall arguments in Seccomp
+type Arg struct {
+ Index uint `json:"index"`
+ Value uint64 `json:"value"`
+ ValueTwo uint64 `json:"valueTwo"`
+ Op Operator `json:"op"`
+}
+
+// Filter is used to conditionally apply Seccomp rules
+type Filter struct {
+ Caps []string `json:"caps,omitempty"`
+ Arches []string `json:"arches,omitempty"`
+}
+
+// Syscall is used to match a group of syscalls in Seccomp
+type Syscall struct {
+ Name string `json:"name,omitempty"`
+ Names []string `json:"names,omitempty"`
+ Action Action `json:"action"`
+ Args []*Arg `json:"args"`
+ Comment string `json:"comment"`
+ Includes Filter `json:"includes"`
+ Excludes Filter `json:"excludes"`
+}
diff --git a/server/secrets.go b/server/secrets.go
new file mode 100644
index 000000000..56d3ba81a
--- /dev/null
+++ b/server/secrets.go
@@ -0,0 +1,162 @@
+package server
+
+import (
+ "fmt"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "strings"
+
+ rspec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/opencontainers/selinux/go-selinux/label"
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+)
+
+// SecretData info
+type SecretData struct {
+ Name string
+ Data []byte
+}
+
+// SaveTo saves secret data to given directory
+func (s SecretData) SaveTo(dir string) error {
+ path := filepath.Join(dir, s.Name)
+ if err := os.MkdirAll(filepath.Dir(path), 0700); err != nil && !os.IsExist(err) {
+ return err
+ }
+ return ioutil.WriteFile(path, s.Data, 0700)
+}
+
+func readAll(root, prefix string) ([]SecretData, error) {
+ path := filepath.Join(root, prefix)
+
+ data := []SecretData{}
+
+ files, err := ioutil.ReadDir(path)
+ if err != nil {
+ if os.IsNotExist(err) {
+ return data, nil
+ }
+
+ return nil, err
+ }
+
+ for _, f := range files {
+ fileData, err := readFile(root, filepath.Join(prefix, f.Name()))
+ if err != nil {
+ // If the file did not exist, might be a dangling symlink
+ // Ignore the error
+ if os.IsNotExist(err) {
+ continue
+ }
+ return nil, err
+ }
+ data = append(data, fileData...)
+ }
+
+ return data, nil
+}
+
+func readFile(root, name string) ([]SecretData, error) {
+ path := filepath.Join(root, name)
+
+ s, err := os.Stat(path)
+ if err != nil {
+ return nil, err
+ }
+
+ if s.IsDir() {
+ dirData, err := readAll(root, name)
+ if err != nil {
+ return nil, err
+ }
+ return dirData, nil
+ }
+ bytes, err := ioutil.ReadFile(path)
+ if err != nil {
+ return nil, err
+ }
+ return []SecretData{{Name: name, Data: bytes}}, nil
+}
+
+// getHostAndCtrDir separates the host:container paths
+func getMountsMap(path string) (string, string, error) {
+ arr := strings.SplitN(path, ":", 2)
+ if len(arr) == 2 {
+ return arr[0], arr[1], nil
+ }
+ return "", "", errors.Errorf("unable to get host and container dir")
+}
+
+func getHostSecretData(hostDir string) ([]SecretData, error) {
+ var allSecrets []SecretData
+ hostSecrets, err := readAll(hostDir, "")
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to read secrets from %q", hostDir)
+ }
+ return append(allSecrets, hostSecrets...), nil
+}
+
+// secretMount copies the contents of host directory to container directory
+// and returns a list of mounts
+func secretMounts(defaultMountsPaths []string, mountLabel, containerWorkingDir string, runtimeMounts []rspec.Mount) ([]rspec.Mount, error) {
+ var mounts []rspec.Mount
+ for _, path := range defaultMountsPaths {
+ hostDir, ctrDir, err := getMountsMap(path)
+ if err != nil {
+ return nil, err
+ }
+ // skip if the hostDir path doesn't exist
+ if _, err := os.Stat(hostDir); os.IsNotExist(err) {
+ logrus.Warnf("%q doesn't exist, skipping", hostDir)
+ continue
+ }
+
+ ctrDirOnHost := filepath.Join(containerWorkingDir, ctrDir)
+ // skip if ctrDir has already been mounted by caller
+ if isAlreadyMounted(runtimeMounts, ctrDir) {
+ logrus.Warnf("%q has already been mounted; cannot override mount", ctrDir)
+ continue
+ }
+
+ if err := os.RemoveAll(ctrDirOnHost); err != nil {
+ return nil, fmt.Errorf("remove container directory failed: %v", err)
+ }
+
+ if err := os.MkdirAll(ctrDirOnHost, 0755); err != nil {
+ return nil, fmt.Errorf("making container directory failed: %v", err)
+ }
+
+ hostDir, err = resolveSymbolicLink(hostDir)
+ if err != nil {
+ return nil, err
+ }
+
+ data, err := getHostSecretData(hostDir)
+ if err != nil {
+ return nil, errors.Wrapf(err, "getting host secret data failed")
+ }
+ for _, s := range data {
+ s.SaveTo(ctrDirOnHost)
+ }
+ label.Relabel(ctrDirOnHost, mountLabel, false)
+
+ m := rspec.Mount{
+ Source: ctrDirOnHost,
+ Destination: ctrDir,
+ }
+
+ mounts = append(mounts, m)
+ }
+ return mounts, nil
+}
+
+func isAlreadyMounted(mounts []rspec.Mount, mountPath string) bool {
+ for _, mount := range mounts {
+ if mount.Destination == mountPath {
+ return true
+ }
+ }
+ return false
+}
diff --git a/server/server.go b/server/server.go
new file mode 100644
index 000000000..a308e7d29
--- /dev/null
+++ b/server/server.go
@@ -0,0 +1,423 @@
+package server
+
+import (
+ "encoding/json"
+ "fmt"
+ "io/ioutil"
+ "net"
+ "net/http"
+ "os"
+ "path/filepath"
+ "runtime/debug"
+ "strconv"
+ "strings"
+ "sync"
+
+ "github.com/cri-o/ocicni/pkg/ocicni"
+ "github.com/fsnotify/fsnotify"
+ "github.com/kubernetes-incubator/cri-o/libkpod"
+ "github.com/kubernetes-incubator/cri-o/libkpod/sandbox"
+ "github.com/kubernetes-incubator/cri-o/oci"
+ "github.com/kubernetes-incubator/cri-o/pkg/storage"
+ "github.com/kubernetes-incubator/cri-o/server/apparmor"
+ "github.com/kubernetes-incubator/cri-o/server/seccomp"
+ "github.com/pkg/errors"
+ "github.com/prometheus/client_golang/prometheus"
+ "github.com/sirupsen/logrus"
+ knet "k8s.io/apimachinery/pkg/util/net"
+ pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
+ "k8s.io/kubernetes/pkg/kubelet/network/hostport"
+ "k8s.io/kubernetes/pkg/kubelet/server/streaming"
+ iptablesproxy "k8s.io/kubernetes/pkg/proxy/iptables"
+ utildbus "k8s.io/kubernetes/pkg/util/dbus"
+ utilexec "k8s.io/kubernetes/pkg/util/exec"
+ utiliptables "k8s.io/kubernetes/pkg/util/iptables"
+)
+
+const (
+ shutdownFile = "/var/lib/crio/crio.shutdown"
+)
+
+func isTrue(annotaton string) bool {
+ return annotaton == "true"
+}
+
+// streamService implements streaming.Runtime.
+type streamService struct {
+ runtimeServer *Server // needed by Exec() endpoint
+ streamServer streaming.Server
+ streamServerCloseCh chan struct{}
+ streaming.Runtime
+}
+
+// Server implements the RuntimeService and ImageService
+type Server struct {
+ *libkpod.ContainerServer
+ config Config
+
+ updateLock sync.RWMutex
+ netPlugin ocicni.CNIPlugin
+ hostportManager hostport.HostPortManager
+
+ seccompEnabled bool
+ seccompProfile seccomp.Seccomp
+
+ appArmorEnabled bool
+ appArmorProfile string
+
+ bindAddress string
+ stream streamService
+ exitMonitorChan chan struct{}
+}
+
+// StopStreamServer stops the stream server
+func (s *Server) StopStreamServer() error {
+ return s.stream.streamServer.Stop()
+}
+
+// StreamingServerCloseChan returns the close channel for the streaming server
+func (s *Server) StreamingServerCloseChan() chan struct{} {
+ return s.stream.streamServerCloseCh
+}
+
+// GetExec returns exec stream request
+func (s *Server) GetExec(req *pb.ExecRequest) (*pb.ExecResponse, error) {
+ return s.stream.streamServer.GetExec(req)
+}
+
+// GetAttach returns attach stream request
+func (s *Server) GetAttach(req *pb.AttachRequest) (*pb.AttachResponse, error) {
+ return s.stream.streamServer.GetAttach(req)
+}
+
+// GetPortForward returns port forward stream request
+func (s *Server) GetPortForward(req *pb.PortForwardRequest) (*pb.PortForwardResponse, error) {
+ return s.stream.streamServer.GetPortForward(req)
+}
+
+func (s *Server) restore() {
+ containers, err := s.Store().Containers()
+ if err != nil && !os.IsNotExist(errors.Cause(err)) {
+ logrus.Warnf("could not read containers and sandboxes: %v", err)
+ }
+ pods := map[string]*storage.RuntimeContainerMetadata{}
+ podContainers := map[string]*storage.RuntimeContainerMetadata{}
+ for _, container := range containers {
+ metadata, err2 := s.StorageRuntimeServer().GetContainerMetadata(container.ID)
+ if err2 != nil {
+ logrus.Warnf("error parsing metadata for %s: %v, ignoring", container.ID, err2)
+ continue
+ }
+ if metadata.Pod {
+ pods[container.ID] = &metadata
+ } else {
+ podContainers[container.ID] = &metadata
+ }
+ }
+ for containerID, metadata := range pods {
+ if err = s.LoadSandbox(containerID); err != nil {
+ logrus.Warnf("could not restore sandbox %s container %s: %v", metadata.PodID, containerID, err)
+ }
+ }
+ for containerID := range podContainers {
+ if err := s.LoadContainer(containerID); err != nil {
+ logrus.Warnf("could not restore container %s: %v", containerID, err)
+ }
+ }
+}
+
+// Update makes changes to the server's state (lists of pods and containers) to
+// reflect the list of pods and containers that are stored on disk, possibly
+// having been modified by other parties
+func (s *Server) Update() {
+ logrus.Debugf("updating sandbox and container information")
+ if err := s.ContainerServer.Update(); err != nil {
+ logrus.Errorf("error updating sandbox and container information: %v", err)
+ }
+}
+
+// cleanupSandboxesOnShutdown Remove all running Sandboxes on system shutdown
+func (s *Server) cleanupSandboxesOnShutdown() {
+ _, err := os.Stat(shutdownFile)
+ if err == nil || !os.IsNotExist(err) {
+ logrus.Debugf("shutting down all sandboxes, on shutdown")
+ s.StopAllPodSandboxes()
+ err = os.Remove(shutdownFile)
+ if err != nil {
+ logrus.Warnf("Failed to remove %q", shutdownFile)
+ }
+
+ }
+}
+
+// Shutdown attempts to shut down the server's storage cleanly
+func (s *Server) Shutdown() error {
+ // why do this on clean shutdown! we want containers left running when crio
+ // is down for whatever reason no?!
+ // notice this won't trigger just on system halt but also on normal
+ // crio.service restart!!!
+ s.cleanupSandboxesOnShutdown()
+ return s.ContainerServer.Shutdown()
+}
+
+// configureMaxThreads sets the Go runtime max threads threshold
+// which is 90% of the kernel setting from /proc/sys/kernel/threads-max
+func configureMaxThreads() error {
+ mt, err := ioutil.ReadFile("/proc/sys/kernel/threads-max")
+ if err != nil {
+ return err
+ }
+ mtint, err := strconv.Atoi(strings.TrimSpace(string(mt)))
+ if err != nil {
+ return err
+ }
+ maxThreads := (mtint / 100) * 90
+ debug.SetMaxThreads(maxThreads)
+ logrus.Debugf("Golang's threads limit set to %d", maxThreads)
+ return nil
+}
+
+// New creates a new Server with options provided
+func New(config *Config) (*Server, error) {
+ if err := os.MkdirAll("/var/run/crio", 0755); err != nil {
+ return nil, err
+ }
+
+ config.ContainerExitsDir = oci.ContainerExitsDir
+
+ // This is used to monitor container exits using inotify
+ if err := os.MkdirAll(config.ContainerExitsDir, 0755); err != nil {
+ return nil, err
+ }
+ containerServer, err := libkpod.New(&config.Config)
+ if err != nil {
+ return nil, err
+ }
+
+ netPlugin, err := ocicni.InitCNI(config.NetworkDir, config.PluginDir)
+ if err != nil {
+ return nil, err
+ }
+ iptInterface := utiliptables.New(utilexec.New(), utildbus.New(), utiliptables.ProtocolIpv4)
+ iptInterface.EnsureChain(utiliptables.TableNAT, iptablesproxy.KubeMarkMasqChain)
+ hostportManager := hostport.NewHostportManager()
+
+ s := &Server{
+ ContainerServer: containerServer,
+ netPlugin: netPlugin,
+ hostportManager: hostportManager,
+ config: *config,
+ seccompEnabled: seccomp.IsEnabled(),
+ appArmorEnabled: apparmor.IsEnabled(),
+ appArmorProfile: config.ApparmorProfile,
+ exitMonitorChan: make(chan struct{}),
+ }
+
+ if s.seccompEnabled {
+ seccompProfile, fileErr := ioutil.ReadFile(config.SeccompProfile)
+ if fileErr != nil {
+ return nil, fmt.Errorf("opening seccomp profile (%s) failed: %v", config.SeccompProfile, fileErr)
+ }
+ var seccompConfig seccomp.Seccomp
+ if jsonErr := json.Unmarshal(seccompProfile, &seccompConfig); jsonErr != nil {
+ return nil, fmt.Errorf("decoding seccomp profile failed: %v", jsonErr)
+ }
+ s.seccompProfile = seccompConfig
+ }
+
+ if s.appArmorEnabled && s.appArmorProfile == apparmor.DefaultApparmorProfile {
+ if apparmorErr := apparmor.EnsureDefaultApparmorProfile(); apparmorErr != nil {
+ return nil, fmt.Errorf("ensuring the default apparmor profile is installed failed: %v", apparmorErr)
+ }
+ }
+
+ if err := configureMaxThreads(); err != nil {
+ return nil, err
+ }
+
+ s.restore()
+ s.cleanupSandboxesOnShutdown()
+
+ bindAddress := net.ParseIP(config.StreamAddress)
+ if bindAddress == nil {
+ bindAddress, err = knet.ChooseBindAddress(net.IP{0, 0, 0, 0})
+ if err != nil {
+ return nil, err
+ }
+ }
+ s.bindAddress = bindAddress.String()
+
+ _, err = net.LookupPort("tcp", config.StreamPort)
+ if err != nil {
+ return nil, err
+ }
+
+ // Prepare streaming server
+ streamServerConfig := streaming.DefaultConfig
+ streamServerConfig.Addr = net.JoinHostPort(bindAddress.String(), config.StreamPort)
+ s.stream.runtimeServer = s
+ s.stream.streamServer, err = streaming.NewServer(streamServerConfig, s.stream)
+ if err != nil {
+ return nil, fmt.Errorf("unable to create streaming server")
+ }
+
+ s.stream.streamServerCloseCh = make(chan struct{})
+ go func() {
+ defer close(s.stream.streamServerCloseCh)
+ if err := s.stream.streamServer.Start(true); err != nil {
+ logrus.Errorf("Failed to start streaming server: %v", err)
+ }
+ }()
+
+ logrus.Debugf("sandboxes: %v", s.ContainerServer.ListSandboxes())
+ return s, nil
+}
+
+func (s *Server) addSandbox(sb *sandbox.Sandbox) {
+ s.ContainerServer.AddSandbox(sb)
+}
+
+func (s *Server) getSandbox(id string) *sandbox.Sandbox {
+ return s.ContainerServer.GetSandbox(id)
+}
+
+func (s *Server) hasSandbox(id string) bool {
+ return s.ContainerServer.HasSandbox(id)
+}
+
+func (s *Server) removeSandbox(id string) {
+ s.ContainerServer.RemoveSandbox(id)
+}
+
+func (s *Server) addContainer(c *oci.Container) {
+ s.ContainerServer.AddContainer(c)
+}
+
+func (s *Server) addInfraContainer(c *oci.Container) {
+ s.ContainerServer.AddInfraContainer(c)
+}
+
+func (s *Server) getContainer(id string) *oci.Container {
+ return s.ContainerServer.GetContainer(id)
+}
+
+func (s *Server) getInfraContainer(id string) *oci.Container {
+ return s.ContainerServer.GetInfraContainer(id)
+}
+
+// BindAddress is used to retrieve host's IP
+func (s *Server) BindAddress() string {
+ return s.bindAddress
+}
+
+// GetSandboxContainer returns the infra container for a given sandbox
+func (s *Server) GetSandboxContainer(id string) *oci.Container {
+ return s.ContainerServer.GetSandboxContainer(id)
+}
+
+// GetContainer returns a container by its ID
+func (s *Server) GetContainer(id string) *oci.Container {
+ return s.getContainer(id)
+}
+
+func (s *Server) removeContainer(c *oci.Container) {
+ s.ContainerServer.RemoveContainer(c)
+}
+
+func (s *Server) removeInfraContainer(c *oci.Container) {
+ s.ContainerServer.RemoveInfraContainer(c)
+}
+
+func (s *Server) getPodSandboxFromRequest(podSandboxID string) (*sandbox.Sandbox, error) {
+ if podSandboxID == "" {
+ return nil, sandbox.ErrIDEmpty
+ }
+
+ sandboxID, err := s.PodIDIndex().Get(podSandboxID)
+ if err != nil {
+ return nil, fmt.Errorf("PodSandbox with ID starting with %s not found: %v", podSandboxID, err)
+ }
+
+ sb := s.getSandbox(sandboxID)
+ if sb == nil {
+ return nil, fmt.Errorf("specified pod sandbox not found: %s", sandboxID)
+ }
+ return sb, nil
+}
+
+// CreateMetricsEndpoint creates a /metrics endpoint
+// for prometheus monitoring
+func (s *Server) CreateMetricsEndpoint() (*http.ServeMux, error) {
+ mux := &http.ServeMux{}
+ mux.Handle("/metrics", prometheus.Handler())
+ return mux, nil
+}
+
+// StopExitMonitor stops the exit monitor
+func (s *Server) StopExitMonitor() {
+ close(s.exitMonitorChan)
+}
+
+// ExitMonitorCloseChan returns the close chan for the exit monitor
+func (s *Server) ExitMonitorCloseChan() chan struct{} {
+ return s.exitMonitorChan
+}
+
+// StartExitMonitor start a routine that monitors container exits
+// and updates the container status
+func (s *Server) StartExitMonitor() {
+ watcher, err := fsnotify.NewWatcher()
+ if err != nil {
+ logrus.Fatalf("Failed to create new watch: %v", err)
+ }
+ defer watcher.Close()
+
+ done := make(chan struct{})
+ go func() {
+ for {
+ select {
+ case event := <-watcher.Events:
+ logrus.Debugf("event: %v", event)
+ if event.Op&fsnotify.Create == fsnotify.Create {
+ containerID := filepath.Base(event.Name)
+ logrus.Debugf("container or sandbox exited: %v", containerID)
+ c := s.GetContainer(containerID)
+ if c != nil {
+ logrus.Debugf("container exited and found: %v", containerID)
+ err := s.Runtime().UpdateStatus(c)
+ if err != nil {
+ logrus.Warnf("Failed to update container status %s: %v", c, err)
+ } else {
+ s.ContainerStateToDisk(c)
+ }
+ } else {
+ sb := s.GetSandbox(containerID)
+ if sb != nil {
+ c := sb.InfraContainer()
+ logrus.Debugf("sandbox exited and found: %v", containerID)
+ err := s.Runtime().UpdateStatus(c)
+ if err != nil {
+ logrus.Warnf("Failed to update sandbox infra container status %s: %v", c, err)
+ } else {
+ s.ContainerStateToDisk(c)
+ }
+ }
+ }
+ }
+ case err := <-watcher.Errors:
+ logrus.Debugf("watch error: %v", err)
+ close(done)
+ return
+ case <-s.exitMonitorChan:
+ logrus.Debug("closing exit monitor...")
+ close(done)
+ return
+ }
+ }
+ }()
+ if err := watcher.Add(s.config.ContainerExitsDir); err != nil {
+ logrus.Errorf("watcher.Add(%q) failed: %s", s.config.ContainerExitsDir, err)
+ close(done)
+ }
+ <-done
+}
diff --git a/server/utils.go b/server/utils.go
new file mode 100644
index 000000000..195942d38
--- /dev/null
+++ b/server/utils.go
@@ -0,0 +1,183 @@
+package server
+
+import (
+ "fmt"
+ "io"
+ "os"
+ "strings"
+
+ "github.com/cri-o/ocicni/pkg/ocicni"
+ "github.com/kubernetes-incubator/cri-o/libkpod/sandbox"
+ "github.com/opencontainers/runtime-tools/validate"
+ "github.com/syndtr/gocapability/capability"
+)
+
+const (
+ // According to http://man7.org/linux/man-pages/man5/resolv.conf.5.html:
+ // "The search list is currently limited to six domains with a total of 256 characters."
+ maxDNSSearches = 6
+)
+
+func copyFile(src, dest string) error {
+ in, err := os.Open(src)
+ if err != nil {
+ return err
+ }
+ defer in.Close()
+
+ out, err := os.Create(dest)
+ if err != nil {
+ return err
+ }
+ defer out.Close()
+
+ _, err = io.Copy(out, in)
+ return err
+}
+
+func removeFile(path string) error {
+ if _, err := os.Stat(path); err == nil {
+ if err := os.Remove(path); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func parseDNSOptions(servers, searches, options []string, path string) error {
+ nServers := len(servers)
+ nSearches := len(searches)
+ nOptions := len(options)
+ if nServers == 0 && nSearches == 0 && nOptions == 0 {
+ return copyFile("/etc/resolv.conf", path)
+ }
+
+ if nSearches > maxDNSSearches {
+ return fmt.Errorf("DNSOption.Searches has more than 6 domains")
+ }
+
+ f, err := os.Create(path)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+
+ if nSearches > 0 {
+ data := fmt.Sprintf("search %s\n", strings.Join(searches, " "))
+ _, err = f.Write([]byte(data))
+ if err != nil {
+ return err
+ }
+ }
+
+ if nServers > 0 {
+ data := fmt.Sprintf("nameserver %s\n", strings.Join(servers, "\nnameserver "))
+ _, err = f.Write([]byte(data))
+ if err != nil {
+ return err
+ }
+ }
+
+ if nOptions > 0 {
+ data := fmt.Sprintf("options %s\n", strings.Join(options, " "))
+ _, err = f.Write([]byte(data))
+ if err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+// TODO: remove sysctl extraction related code here, instead we import from k8s directly.
+
+const (
+ // SysctlsPodAnnotationKey represents the key of sysctls which are set for the infrastructure
+ // container of a pod. The annotation value is a comma separated list of sysctl_name=value
+ // key-value pairs. Only a limited set of whitelisted and isolated sysctls is supported by
+ // the kubelet. Pods with other sysctls will fail to launch.
+ SysctlsPodAnnotationKey string = "security.alpha.kubernetes.io/sysctls"
+
+ // UnsafeSysctlsPodAnnotationKey represents the key of sysctls which are set for the infrastructure
+ // container of a pod. The annotation value is a comma separated list of sysctl_name=value
+ // key-value pairs. Unsafe sysctls must be explicitly enabled for a kubelet. They are properly
+ // namespaced to a pod or a container, but their isolation is usually unclear or weak. Their use
+ // is at-your-own-risk. Pods that attempt to set an unsafe sysctl that is not enabled for a kubelet
+ // will fail to launch.
+ UnsafeSysctlsPodAnnotationKey string = "security.alpha.kubernetes.io/unsafe-sysctls"
+)
+
+// Sysctl defines a kernel parameter to be set
+type Sysctl struct {
+ // Name of a property to set
+ Name string `json:"name"`
+ // Value of a property to set
+ Value string `json:"value"`
+}
+
+// SysctlsFromPodAnnotations parses the sysctl annotations into a slice of safe Sysctls
+// and a slice of unsafe Sysctls. This is only a convenience wrapper around
+// SysctlsFromPodAnnotation.
+func SysctlsFromPodAnnotations(a map[string]string) ([]Sysctl, []Sysctl, error) {
+ safe, err := SysctlsFromPodAnnotation(a[SysctlsPodAnnotationKey])
+ if err != nil {
+ return nil, nil, err
+ }
+ unsafe, err := SysctlsFromPodAnnotation(a[UnsafeSysctlsPodAnnotationKey])
+ if err != nil {
+ return nil, nil, err
+ }
+
+ return safe, unsafe, nil
+}
+
+// SysctlsFromPodAnnotation parses an annotation value into a slice of Sysctls.
+func SysctlsFromPodAnnotation(annotation string) ([]Sysctl, error) {
+ if len(annotation) == 0 {
+ return nil, nil
+ }
+
+ kvs := strings.Split(annotation, ",")
+ sysctls := make([]Sysctl, len(kvs))
+ for i, kv := range kvs {
+ cs := strings.Split(kv, "=")
+ if len(cs) != 2 || len(cs[0]) == 0 {
+ return nil, fmt.Errorf("sysctl %q not of the format sysctl_name=value", kv)
+ }
+ sysctls[i].Name = cs[0]
+ sysctls[i].Value = cs[1]
+ }
+ return sysctls, nil
+}
+
+func newPodNetwork(sb *sandbox.Sandbox) ocicni.PodNetwork {
+ return ocicni.PodNetwork{
+ Name: sb.KubeName(),
+ Namespace: sb.Namespace(),
+ ID: sb.ID(),
+ NetNS: sb.NetNsPath(),
+ }
+}
+
+// inStringSlice checks whether a string is inside a string slice.
+// Comparison is case insensitive.
+func inStringSlice(ss []string, str string) bool {
+ for _, s := range ss {
+ if strings.ToLower(s) == strings.ToLower(str) {
+ return true
+ }
+ }
+ return false
+}
+
+// getOCICapabilitiesList returns a list of all available capabilities.
+func getOCICapabilitiesList() []string {
+ var caps []string
+ for _, cap := range capability.List() {
+ if cap > validate.LastCap() {
+ continue
+ }
+ caps = append(caps, "CAP_"+strings.ToUpper(cap.String()))
+ }
+ return caps
+}
diff --git a/server/version.go b/server/version.go
new file mode 100644
index 000000000..5f98e5f0c
--- /dev/null
+++ b/server/version.go
@@ -0,0 +1,27 @@
+package server
+
+import (
+ "github.com/kubernetes-incubator/cri-o/version"
+ "golang.org/x/net/context"
+ pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
+)
+
+const (
+ // kubeAPIVersion is the api version of kubernetes.
+ // TODO: Track upstream code. For now it expects 0.1.0
+ kubeAPIVersion = "0.1.0"
+ // containerName is the name prepended in kubectl describe->Container ID:
+ // cri-o://<CONTAINER_ID>
+ containerName = "cri-o"
+ runtimeAPIVersion = "v1alpha1"
+)
+
+// Version returns the runtime name, runtime version and runtime API version
+func (s *Server) Version(ctx context.Context, req *pb.VersionRequest) (*pb.VersionResponse, error) {
+ return &pb.VersionResponse{
+ Version: kubeAPIVersion,
+ RuntimeName: containerName,
+ RuntimeVersion: version.Version,
+ RuntimeApiVersion: runtimeAPIVersion,
+ }, nil
+}