diff options
author | Matthew Heon <matthew.heon@gmail.com> | 2017-11-01 11:24:59 -0400 |
---|---|---|
committer | Matthew Heon <matthew.heon@gmail.com> | 2017-11-01 11:24:59 -0400 |
commit | a031b83a09a8628435317a03f199cdc18b78262f (patch) | |
tree | bc017a96769ce6de33745b8b0b1304ccf38e9df0 /server | |
parent | 2b74391cd5281f6fdf391ff8ad50fd1490f6bf89 (diff) | |
download | podman-a031b83a09a8628435317a03f199cdc18b78262f.tar.gz podman-a031b83a09a8628435317a03f199cdc18b78262f.tar.bz2 podman-a031b83a09a8628435317a03f199cdc18b78262f.zip |
Initial checkin from CRI-O repo
Signed-off-by: Matthew Heon <matthew.heon@gmail.com>
Diffstat (limited to 'server')
41 files changed, 5203 insertions, 0 deletions
diff --git a/server/apparmor/aaparser.go b/server/apparmor/aaparser.go new file mode 100644 index 000000000..7f0f02ac5 --- /dev/null +++ b/server/apparmor/aaparser.go @@ -0,0 +1,89 @@ +// +build apparmor + +package apparmor + +import ( + "fmt" + "os/exec" + "strconv" + "strings" +) + +const ( + binary = "apparmor_parser" +) + +// GetVersion returns the major and minor version of apparmor_parser. +func GetVersion() (int, error) { + output, err := cmd("", "--version") + if err != nil { + return -1, err + } + + return parseVersion(output) +} + +// LoadProfile runs `apparmor_parser -r` on a specified apparmor profile to +// replace the profile. +func LoadProfile(profilePath string) error { + _, err := cmd("", "-r", profilePath) + return err +} + +// cmd runs `apparmor_parser` with the passed arguments. +func cmd(dir string, arg ...string) (string, error) { + c := exec.Command(binary, arg...) + c.Dir = dir + + output, err := c.CombinedOutput() + if err != nil { + return "", fmt.Errorf("running `%s %s` failed with output: %s\nerror: %v", c.Path, strings.Join(c.Args, " "), output, err) + } + + return string(output), nil +} + +// parseVersion takes the output from `apparmor_parser --version` and returns +// a representation of the {major, minor, patch} version as a single number of +// the form MMmmPPP {major, minor, patch}. +func parseVersion(output string) (int, error) { + // output is in the form of the following: + // AppArmor parser version 2.9.1 + // Copyright (C) 1999-2008 Novell Inc. + // Copyright 2009-2012 Canonical Ltd. + + lines := strings.SplitN(output, "\n", 2) + words := strings.Split(lines[0], " ") + version := words[len(words)-1] + + // split by major minor version + v := strings.Split(version, ".") + if len(v) == 0 || len(v) > 3 { + return -1, fmt.Errorf("parsing version failed for output: `%s`", output) + } + + // Default the versions to 0. + var majorVersion, minorVersion, patchLevel int + + majorVersion, err := strconv.Atoi(v[0]) + if err != nil { + return -1, err + } + + if len(v) > 1 { + minorVersion, err = strconv.Atoi(v[1]) + if err != nil { + return -1, err + } + } + if len(v) > 2 { + patchLevel, err = strconv.Atoi(v[2]) + if err != nil { + return -1, err + } + } + + // major*10^5 + minor*10^3 + patch*10^0 + numericVersion := majorVersion*1e5 + minorVersion*1e3 + patchLevel + return numericVersion, nil +} diff --git a/server/apparmor/apparmor_common.go b/server/apparmor/apparmor_common.go new file mode 100644 index 000000000..6366a66e6 --- /dev/null +++ b/server/apparmor/apparmor_common.go @@ -0,0 +1,14 @@ +package apparmor + +const ( + // DefaultApparmorProfile is the name of default apparmor profile name. + DefaultApparmorProfile = "crio-default" + + // ContainerAnnotationKeyPrefix is the prefix to an annotation key specifying a container profile. + ContainerAnnotationKeyPrefix = "container.apparmor.security.beta.kubernetes.io/" + + // ProfileRuntimeDefault is he profile specifying the runtime default. + ProfileRuntimeDefault = "runtime/default" + // ProfileNamePrefix is the prefix for specifying profiles loaded on the node. + ProfileNamePrefix = "localhost/" +) diff --git a/server/apparmor/apparmor_supported.go b/server/apparmor/apparmor_supported.go new file mode 100644 index 000000000..d765c9de9 --- /dev/null +++ b/server/apparmor/apparmor_supported.go @@ -0,0 +1,145 @@ +// +build apparmor + +package apparmor + +import ( + "bufio" + "fmt" + "io" + "io/ioutil" + "os" + "path" + "strings" + + "github.com/docker/docker/utils/templates" + "github.com/opencontainers/runc/libcontainer/apparmor" +) + +const ( + // profileDirectory is the file store for apparmor profiles and macros. + profileDirectory = "/etc/apparmor.d" +) + +// profileData holds information about the given profile for generation. +type profileData struct { + // Name is profile name. + Name string + // Imports defines the apparmor functions to import, before defining the profile. + Imports []string + // InnerImports defines the apparmor functions to import in the profile. + InnerImports []string + // Version is the {major, minor, patch} version of apparmor_parser as a single number. + Version int +} + +// EnsureDefaultApparmorProfile loads default apparmor profile, if it is not loaded. +func EnsureDefaultApparmorProfile() error { + if apparmor.IsEnabled() { + loaded, err := IsLoaded(DefaultApparmorProfile) + if err != nil { + return fmt.Errorf("Could not check if %s AppArmor profile was loaded: %s", DefaultApparmorProfile, err) + } + + // Nothing to do. + if loaded { + return nil + } + + // Load the profile. + if err := InstallDefault(DefaultApparmorProfile); err != nil { + return fmt.Errorf("AppArmor enabled on system but the %s profile could not be loaded.", DefaultApparmorProfile) + } + } + + return nil +} + +// IsEnabled returns true if apparmor is enabled for the host. +func IsEnabled() bool { + return apparmor.IsEnabled() +} + +// GetProfileNameFromPodAnnotations gets the name of the profile to use with container from +// pod annotations +func GetProfileNameFromPodAnnotations(annotations map[string]string, containerName string) string { + return annotations[ContainerAnnotationKeyPrefix+containerName] +} + +// InstallDefault generates a default profile in a temp directory determined by +// os.TempDir(), then loads the profile into the kernel using 'apparmor_parser'. +func InstallDefault(name string) error { + p := profileData{ + Name: name, + } + + // Install to a temporary directory. + f, err := ioutil.TempFile("", name) + if err != nil { + return err + } + defer f.Close() + + if err := p.generateDefault(f); err != nil { + return err + } + + return LoadProfile(f.Name()) +} + +// IsLoaded checks if a profile with the given name has been loaded into the +// kernel. +func IsLoaded(name string) (bool, error) { + file, err := os.Open("/sys/kernel/security/apparmor/profiles") + if err != nil { + return false, err + } + defer file.Close() + + r := bufio.NewReader(file) + for { + p, err := r.ReadString('\n') + if err == io.EOF { + break + } + if err != nil { + return false, err + } + if strings.HasPrefix(p, name+" ") { + return true, nil + } + } + + return false, nil +} + +// generateDefault creates an apparmor profile from ProfileData. +func (p *profileData) generateDefault(out io.Writer) error { + compiled, err := templates.NewParse("apparmor_profile", baseTemplate) + if err != nil { + return err + } + + if macroExists("tunables/global") { + p.Imports = append(p.Imports, "#include <tunables/global>") + } else { + p.Imports = append(p.Imports, "@{PROC}=/proc/") + } + + if macroExists("abstractions/base") { + p.InnerImports = append(p.InnerImports, "#include <abstractions/base>") + } + + ver, err := GetVersion() + if err != nil { + return err + } + p.Version = ver + + return compiled.Execute(out, p) +} + +// macrosExists checks if the passed macro exists. +func macroExists(m string) bool { + _, err := os.Stat(path.Join(profileDirectory, m)) + return err == nil +} diff --git a/server/apparmor/apparmor_unsupported.go b/server/apparmor/apparmor_unsupported.go new file mode 100644 index 000000000..fbd1d87a0 --- /dev/null +++ b/server/apparmor/apparmor_unsupported.go @@ -0,0 +1,18 @@ +// +build !apparmor + +package apparmor + +// IsEnabled returns false, when build without apparmor build tag. +func IsEnabled() bool { + return false +} + +// EnsureDefaultApparmorProfile dose nothing, when build without apparmor build tag. +func EnsureDefaultApparmorProfile() error { + return nil +} + +// GetProfileNameFromPodAnnotations dose nothing, when build without apparmor build tag. +func GetProfileNameFromPodAnnotations(annotations map[string]string, containerName string) string { + return "" +} diff --git a/server/apparmor/template.go b/server/apparmor/template.go new file mode 100644 index 000000000..6656ff61c --- /dev/null +++ b/server/apparmor/template.go @@ -0,0 +1,45 @@ +// +build apparmor + +package apparmor + +// baseTemplate defines the default apparmor profile for containers. +const baseTemplate = ` +{{range $value := .Imports}} +{{$value}} +{{end}} + +profile {{.Name}} flags=(attach_disconnected,mediate_deleted) { +{{range $value := .InnerImports}} + {{$value}} +{{end}} + + network, + capability, + file, + umount, + + deny @{PROC}/* w, # deny write for all files directly in /proc (not in a subdir) + # deny write to files not in /proc/<number>/** or /proc/sys/** + deny @{PROC}/{[^1-9],[^1-9][^0-9],[^1-9s][^0-9y][^0-9s],[^1-9][^0-9][^0-9][^0-9]*}/** w, + deny @{PROC}/sys/[^k]** w, # deny /proc/sys except /proc/sys/k* (effectively /proc/sys/kernel) + deny @{PROC}/sys/kernel/{?,??,[^s][^h][^m]**} w, # deny everything except shm* in /proc/sys/kernel/ + deny @{PROC}/sysrq-trigger rwklx, + deny @{PROC}/mem rwklx, + deny @{PROC}/kmem rwklx, + deny @{PROC}/kcore rwklx, + + deny mount, + + deny /sys/[^f]*/** wklx, + deny /sys/f[^s]*/** wklx, + deny /sys/fs/[^c]*/** wklx, + deny /sys/fs/c[^g]*/** wklx, + deny /sys/fs/cg[^r]*/** wklx, + deny /sys/firmware/** rwklx, + deny /sys/kernel/security/** rwklx, + +{{if ge .Version 208095}} + ptrace (trace,read) peer={{.Name}}, +{{end}} +} +` diff --git a/server/config.go b/server/config.go new file mode 100644 index 000000000..6c2d26cd0 --- /dev/null +++ b/server/config.go @@ -0,0 +1,112 @@ +package server + +import ( + "bytes" + "io/ioutil" + + "github.com/BurntSushi/toml" + "github.com/kubernetes-incubator/cri-o/libkpod" +) + +//CrioConfigPath is the default location for the conf file +const CrioConfigPath = "/etc/crio/crio.conf" + +// Config represents the entire set of configuration values that can be set for +// the server. This is intended to be loaded from a toml-encoded config file. +type Config struct { + libkpod.Config + APIConfig +} + +// APIConfig represents the "crio.api" TOML config table. +type APIConfig struct { + // Listen is the path to the AF_LOCAL socket on which cri-o will listen. + // This may support proto://addr formats later, but currently this is just + // a path. + Listen string `toml:"listen"` + + // StreamAddress is the IP address on which the stream server will listen. + StreamAddress string `toml:"stream_address"` + + // StreamPort is the port on which the stream server will listen. + StreamPort string `toml:"stream_port"` +} + +// tomlConfig is another way of looking at a Config, which is +// TOML-friendly (it has all of the explicit tables). It's just used for +// conversions. +type tomlConfig struct { + Crio struct { + libkpod.RootConfig + API struct{ APIConfig } `toml:"api"` + Runtime struct{ libkpod.RuntimeConfig } `toml:"runtime"` + Image struct{ libkpod.ImageConfig } `toml:"image"` + Network struct{ libkpod.NetworkConfig } `toml:"network"` + } `toml:"crio"` +} + +func (t *tomlConfig) toConfig(c *Config) { + c.RootConfig = t.Crio.RootConfig + c.APIConfig = t.Crio.API.APIConfig + c.RuntimeConfig = t.Crio.Runtime.RuntimeConfig + c.ImageConfig = t.Crio.Image.ImageConfig + c.NetworkConfig = t.Crio.Network.NetworkConfig +} + +func (t *tomlConfig) fromConfig(c *Config) { + t.Crio.RootConfig = c.RootConfig + t.Crio.API.APIConfig = c.APIConfig + t.Crio.Runtime.RuntimeConfig = c.RuntimeConfig + t.Crio.Image.ImageConfig = c.ImageConfig + t.Crio.Network.NetworkConfig = c.NetworkConfig +} + +// UpdateFromFile populates the Config from the TOML-encoded file at the given path. +// Returns errors encountered when reading or parsing the files, or nil +// otherwise. +func (c *Config) UpdateFromFile(path string) error { + data, err := ioutil.ReadFile(path) + if err != nil { + return err + } + + t := new(tomlConfig) + t.fromConfig(c) + + _, err = toml.Decode(string(data), t) + if err != nil { + return err + } + + t.toConfig(c) + return nil +} + +// ToFile outputs the given Config as a TOML-encoded file at the given path. +// Returns errors encountered when generating or writing the file, or nil +// otherwise. +func (c *Config) ToFile(path string) error { + var w bytes.Buffer + e := toml.NewEncoder(&w) + + t := new(tomlConfig) + t.fromConfig(c) + + if err := e.Encode(*t); err != nil { + return err + } + + return ioutil.WriteFile(path, w.Bytes(), 0644) +} + +// DefaultConfig returns the default configuration for crio. +func DefaultConfig() *Config { + return &Config{ + Config: *libkpod.DefaultConfig(), + APIConfig: APIConfig{ + Listen: "/var/run/crio.sock", + StreamAddress: "", + StreamPort: "10010", + }, + } +} diff --git a/server/container_attach.go b/server/container_attach.go new file mode 100644 index 000000000..6a822858d --- /dev/null +++ b/server/container_attach.go @@ -0,0 +1,147 @@ +package server + +import ( + "fmt" + "io" + "net" + "os" + "path/filepath" + + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/kubernetes-incubator/cri-o/utils" + "github.com/sirupsen/logrus" + "golang.org/x/net/context" + "golang.org/x/sys/unix" + "k8s.io/client-go/tools/remotecommand" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" + kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" +) + +/* Sync with stdpipe_t in conmon.c */ +const ( + AttachPipeStdin = 1 + AttachPipeStdout = 2 + AttachPipeStderr = 3 +) + +// Attach prepares a streaming endpoint to attach to a running container. +func (s *Server) Attach(ctx context.Context, req *pb.AttachRequest) (*pb.AttachResponse, error) { + logrus.Debugf("AttachRequest %+v", req) + + resp, err := s.GetAttach(req) + if err != nil { + return nil, fmt.Errorf("unable to prepare attach endpoint") + } + + return resp, nil +} + +// Attach endpoint for streaming.Runtime +func (ss streamService) Attach(containerID string, inputStream io.Reader, outputStream, errorStream io.WriteCloser, tty bool, resize <-chan remotecommand.TerminalSize) error { + c := ss.runtimeServer.GetContainer(containerID) + + if c == nil { + return fmt.Errorf("could not find container %q", containerID) + } + + if err := ss.runtimeServer.Runtime().UpdateStatus(c); err != nil { + return err + } + + cState := ss.runtimeServer.Runtime().ContainerStatus(c) + if !(cState.Status == oci.ContainerStateRunning || cState.Status == oci.ContainerStateCreated) { + return fmt.Errorf("container is not created or running") + } + + controlPath := filepath.Join(c.BundlePath(), "ctl") + controlFile, err := os.OpenFile(controlPath, unix.O_WRONLY, 0) + if err != nil { + return fmt.Errorf("failed to open container ctl file: %v", err) + } + + kubecontainer.HandleResizing(resize, func(size remotecommand.TerminalSize) { + logrus.Infof("Got a resize event: %+v", size) + _, err := fmt.Fprintf(controlFile, "%d %d %d\n", 1, size.Height, size.Width) + if err != nil { + logrus.Infof("Failed to write to control file to resize terminal: %v", err) + } + }) + + attachSocketPath := filepath.Join(oci.ContainerAttachSocketDir, c.ID(), "attach") + conn, err := net.DialUnix("unixpacket", nil, &net.UnixAddr{Name: attachSocketPath, Net: "unixpacket"}) + if err != nil { + return fmt.Errorf("failed to connect to container %s attach socket: %v", c.ID(), err) + } + defer conn.Close() + + receiveStdout := make(chan error) + if outputStream != nil || errorStream != nil { + go func() { + receiveStdout <- redirectResponseToOutputStreams(outputStream, errorStream, conn) + }() + } + + stdinDone := make(chan error) + go func() { + var err error + if inputStream != nil { + _, err = utils.CopyDetachable(conn, inputStream, nil) + conn.CloseWrite() + } + stdinDone <- err + }() + + select { + case err := <-receiveStdout: + return err + case err := <-stdinDone: + if _, ok := err.(utils.DetachError); ok { + return nil + } + if outputStream != nil || errorStream != nil { + return <-receiveStdout + } + } + + return nil +} + +func redirectResponseToOutputStreams(outputStream, errorStream io.Writer, conn io.Reader) error { + var err error + buf := make([]byte, 8192+1) /* Sync with conmon STDIO_BUF_SIZE */ + + for { + nr, er := conn.Read(buf) + if nr > 0 { + var dst io.Writer + if buf[0] == AttachPipeStdout { + dst = outputStream + } else if buf[0] == AttachPipeStderr { + dst = errorStream + } else { + logrus.Infof("Got unexpected attach type %+d", buf[0]) + } + + if dst != nil { + nw, ew := dst.Write(buf[1:nr]) + if ew != nil { + err = ew + break + } + if nr != nw+1 { + err = io.ErrShortWrite + break + } + } + } + if er == io.EOF { + break + } + if er != nil { + err = er + break + } + } + + return err +} diff --git a/server/container_create.go b/server/container_create.go new file mode 100644 index 000000000..b28498c8d --- /dev/null +++ b/server/container_create.go @@ -0,0 +1,1215 @@ +package server + +import ( + "encoding/json" + "errors" + "fmt" + "io" + "os" + "path/filepath" + "regexp" + "sort" + "strconv" + "strings" + "time" + + "github.com/docker/distribution/reference" + "github.com/docker/docker/pkg/stringid" + "github.com/docker/docker/pkg/symlink" + "github.com/kubernetes-incubator/cri-o/libkpod" + "github.com/kubernetes-incubator/cri-o/libkpod/sandbox" + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/kubernetes-incubator/cri-o/pkg/annotations" + "github.com/kubernetes-incubator/cri-o/pkg/storage" + "github.com/kubernetes-incubator/cri-o/server/apparmor" + "github.com/kubernetes-incubator/cri-o/server/seccomp" + "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/devices" + "github.com/opencontainers/runc/libcontainer/user" + rspec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/runtime-tools/generate" + "github.com/opencontainers/selinux/go-selinux/label" + "github.com/sirupsen/logrus" + "golang.org/x/net/context" + "golang.org/x/sys/unix" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" +) + +const ( + seccompUnconfined = "unconfined" + seccompRuntimeDefault = "runtime/default" + seccompDockerDefault = "docker/default" + seccompLocalhostPrefix = "localhost/" + + scopePrefix = "crio" + defaultCgroupfsParent = "/crio" + defaultSystemdParent = "system.slice" +) + +type orderedMounts []rspec.Mount + +// Len returns the number of mounts. Used in sorting. +func (m orderedMounts) Len() int { + return len(m) +} + +// Less returns true if the number of parts (a/b/c would be 3 parts) in the +// mount indexed by parameter 1 is less than that of the mount indexed by +// parameter 2. Used in sorting. +func (m orderedMounts) Less(i, j int) bool { + return m.parts(i) < m.parts(j) +} + +// Swap swaps two items in an array of mounts. Used in sorting +func (m orderedMounts) Swap(i, j int) { + m[i], m[j] = m[j], m[i] +} + +// parts returns the number of parts in the destination of a mount. Used in sorting. +func (m orderedMounts) parts(i int) int { + return strings.Count(filepath.Clean(m[i].Destination), string(os.PathSeparator)) +} + +func addOCIBindMounts(mountLabel string, containerConfig *pb.ContainerConfig, specgen *generate.Generator) ([]oci.ContainerVolume, []rspec.Mount, error) { + volumes := []oci.ContainerVolume{} + ociMounts := []rspec.Mount{} + mounts := containerConfig.GetMounts() + for _, mount := range mounts { + dest := mount.ContainerPath + if dest == "" { + return nil, nil, fmt.Errorf("Mount.ContainerPath is empty") + } + + src := mount.HostPath + if src == "" { + return nil, nil, fmt.Errorf("Mount.HostPath is empty") + } + + if _, err := os.Stat(src); err != nil && os.IsNotExist(err) { + if err1 := os.MkdirAll(src, 0644); err1 != nil { + return nil, nil, fmt.Errorf("Failed to mkdir %s: %s", src, err) + } + } + + src, err := resolveSymbolicLink(src) + if err != nil { + return nil, nil, fmt.Errorf("failed to resolve symlink %q: %v", src, err) + } + + options := []string{"rw"} + if mount.Readonly { + options = []string{"ro"} + } + options = append(options, []string{"rbind", "rprivate"}...) + + if mount.SelinuxRelabel { + // Need a way in kubernetes to determine if the volume is shared or private + if err := label.Relabel(src, mountLabel, true); err != nil && err != unix.ENOTSUP { + return nil, nil, fmt.Errorf("relabel failed %s: %v", src, err) + } + } + + volumes = append(volumes, oci.ContainerVolume{ + ContainerPath: dest, + HostPath: src, + Readonly: mount.Readonly, + }) + + ociMounts = append(ociMounts, rspec.Mount{ + Source: src, + Destination: dest, + Options: options, + }) + } + + return volumes, ociMounts, nil +} + +func addImageVolumes(rootfs string, s *Server, containerInfo *storage.ContainerInfo, specgen *generate.Generator, mountLabel string) ([]rspec.Mount, error) { + mounts := []rspec.Mount{} + for dest := range containerInfo.Config.Config.Volumes { + fp, err := symlink.FollowSymlinkInScope(filepath.Join(rootfs, dest), rootfs) + if err != nil { + return nil, err + } + switch s.config.ImageVolumes { + case libkpod.ImageVolumesMkdir: + if err1 := os.MkdirAll(fp, 0644); err1 != nil { + return nil, err1 + } + case libkpod.ImageVolumesBind: + volumeDirName := stringid.GenerateNonCryptoID() + src := filepath.Join(containerInfo.RunDir, "mounts", volumeDirName) + if err1 := os.MkdirAll(src, 0644); err1 != nil { + return nil, err1 + } + // Label the source with the sandbox selinux mount label + if mountLabel != "" { + if err1 := label.Relabel(src, mountLabel, true); err1 != nil && err1 != unix.ENOTSUP { + return nil, fmt.Errorf("relabel failed %s: %v", src, err1) + } + } + + logrus.Debugf("Adding bind mounted volume: %s to %s", src, dest) + mounts = append(mounts, rspec.Mount{ + Source: src, + Destination: dest, + Options: []string{"rw"}, + }) + + case libkpod.ImageVolumesIgnore: + logrus.Debugf("Ignoring volume %v", dest) + default: + logrus.Fatalf("Unrecognized image volumes setting") + } + } + return mounts, nil +} + +// resolveSymbolicLink resolves a possbile symlink path. If the path is a symlink, returns resolved +// path; if not, returns the original path. +func resolveSymbolicLink(path string) (string, error) { + info, err := os.Lstat(path) + if err != nil { + return "", err + } + if info.Mode()&os.ModeSymlink != os.ModeSymlink { + return path, nil + } + return filepath.EvalSymlinks(path) +} + +func addDevices(sb *sandbox.Sandbox, containerConfig *pb.ContainerConfig, specgen *generate.Generator) error { + sp := specgen.Spec() + if containerConfig.GetLinux().GetSecurityContext().GetPrivileged() { + hostDevices, err := devices.HostDevices() + if err != nil { + return err + } + for _, hostDevice := range hostDevices { + rd := rspec.LinuxDevice{ + Path: hostDevice.Path, + Type: string(hostDevice.Type), + Major: hostDevice.Major, + Minor: hostDevice.Minor, + UID: &hostDevice.Uid, + GID: &hostDevice.Gid, + } + if hostDevice.Major == 0 && hostDevice.Minor == 0 { + // Invalid device, most likely a symbolic link, skip it. + continue + } + specgen.AddDevice(rd) + } + sp.Linux.Resources.Devices = []rspec.LinuxDeviceCgroup{ + { + Allow: true, + Access: "rwm", + }, + } + return nil + } + for _, device := range containerConfig.GetDevices() { + path, err := resolveSymbolicLink(device.HostPath) + if err != nil { + return err + } + dev, err := devices.DeviceFromPath(path, device.Permissions) + // if there was no error, return the device + if err == nil { + rd := rspec.LinuxDevice{ + Path: device.ContainerPath, + Type: string(dev.Type), + Major: dev.Major, + Minor: dev.Minor, + UID: &dev.Uid, + GID: &dev.Gid, + } + specgen.AddDevice(rd) + sp.Linux.Resources.Devices = append(sp.Linux.Resources.Devices, rspec.LinuxDeviceCgroup{ + Allow: true, + Type: string(dev.Type), + Major: &dev.Major, + Minor: &dev.Minor, + Access: dev.Permissions, + }) + continue + } + // if the device is not a device node + // try to see if it's a directory holding many devices + if err == devices.ErrNotADevice { + + // check if it is a directory + if src, e := os.Stat(path); e == nil && src.IsDir() { + + // mount the internal devices recursively + filepath.Walk(path, func(dpath string, f os.FileInfo, e error) error { + childDevice, e := devices.DeviceFromPath(dpath, device.Permissions) + if e != nil { + // ignore the device + return nil + } + cPath := strings.Replace(dpath, path, device.ContainerPath, 1) + rd := rspec.LinuxDevice{ + Path: cPath, + Type: string(childDevice.Type), + Major: childDevice.Major, + Minor: childDevice.Minor, + UID: &childDevice.Uid, + GID: &childDevice.Gid, + } + specgen.AddDevice(rd) + sp.Linux.Resources.Devices = append(sp.Linux.Resources.Devices, rspec.LinuxDeviceCgroup{ + Allow: true, + Type: string(childDevice.Type), + Major: &childDevice.Major, + Minor: &childDevice.Minor, + Access: childDevice.Permissions, + }) + + return nil + }) + } + } + } + return nil +} + +// buildOCIProcessArgs build an OCI compatible process arguments slice. +func buildOCIProcessArgs(containerKubeConfig *pb.ContainerConfig, imageOCIConfig *v1.Image) ([]string, error) { + //# Start the nginx container using the default command, but use custom + //arguments (arg1 .. argN) for that command. + //kubectl run nginx --image=nginx -- <arg1> <arg2> ... <argN> + + //# Start the nginx container using a different command and custom arguments. + //kubectl run nginx --image=nginx --command -- <cmd> <arg1> ... <argN> + + kubeCommands := containerKubeConfig.Command + kubeArgs := containerKubeConfig.Args + + // merge image config and kube config + // same as docker does today... + if imageOCIConfig != nil { + if len(kubeCommands) == 0 { + if len(kubeArgs) == 0 { + kubeArgs = imageOCIConfig.Config.Cmd + } + if kubeCommands == nil { + kubeCommands = imageOCIConfig.Config.Entrypoint + } + } + } + + if len(kubeCommands) == 0 && len(kubeArgs) == 0 { + return nil, fmt.Errorf("no command specified") + } + + // create entrypoint and args + var entrypoint string + var args []string + if len(kubeCommands) != 0 { + entrypoint = kubeCommands[0] + args = append(kubeCommands[1:], kubeArgs...) + } else { + entrypoint = kubeArgs[0] + args = kubeArgs[1:] + } + + processArgs := append([]string{entrypoint}, args...) + + logrus.Debugf("OCI process args %v", processArgs) + + return processArgs, nil +} + +// addOCIHook look for hooks programs installed in hooksDirPath and add them to spec +func addOCIHook(specgen *generate.Generator, hook libkpod.HookParams) error { + logrus.Debugf("AddOCIHook", hook) + for _, stage := range hook.Stage { + switch stage { + case "prestart": + specgen.AddPreStartHook(hook.Hook, []string{hook.Hook, "prestart"}) + + case "poststart": + specgen.AddPostStartHook(hook.Hook, []string{hook.Hook, "poststart"}) + + case "poststop": + specgen.AddPostStopHook(hook.Hook, []string{hook.Hook, "poststop"}) + } + } + return nil +} + +// setupContainerUser sets the UID, GID and supplemental groups in OCI runtime config +func setupContainerUser(specgen *generate.Generator, rootfs string, sc *pb.LinuxContainerSecurityContext, imageConfig *v1.Image) error { + if sc != nil { + containerUser := "" + // Case 1: run as user is set by kubelet + if sc.GetRunAsUser() != nil { + containerUser = strconv.FormatInt(sc.GetRunAsUser().Value, 10) + } else { + // Case 2: run as username is set by kubelet + userName := sc.GetRunAsUsername() + if userName != "" { + containerUser = userName + } else { + // Case 3: get user from image config + if imageConfig != nil { + imageUser := imageConfig.Config.User + if imageUser != "" { + containerUser = imageUser + } + } + } + } + + logrus.Debugf("CONTAINER USER: %+v", containerUser) + + // Add uid, gid and groups from user + uid, gid, addGroups, err1 := getUserInfo(rootfs, containerUser) + if err1 != nil { + return err1 + } + + logrus.Debugf("UID: %v, GID: %v, Groups: %+v", uid, gid, addGroups) + specgen.SetProcessUID(uid) + specgen.SetProcessGID(gid) + for _, group := range addGroups { + specgen.AddProcessAdditionalGid(group) + } + + // Add groups from CRI + groups := sc.GetSupplementalGroups() + for _, group := range groups { + specgen.AddProcessAdditionalGid(uint32(group)) + } + } + return nil +} + +func hostNetwork(containerConfig *pb.ContainerConfig) bool { + securityContext := containerConfig.GetLinux().GetSecurityContext() + if securityContext == nil || securityContext.GetNamespaceOptions() == nil { + return false + } + + return securityContext.GetNamespaceOptions().HostNetwork +} + +// ensureSaneLogPath is a hack to fix https://issues.k8s.io/44043 which causes +// logPath to be a broken symlink to some magical Docker path. Ideally we +// wouldn't have to deal with this, but until that issue is fixed we have to +// remove the path if it's a broken symlink. +func ensureSaneLogPath(logPath string) error { + // If the path exists but the resolved path does not, then we have a broken + // symlink and we need to remove it. + fi, err := os.Lstat(logPath) + if err != nil || fi.Mode()&os.ModeSymlink == 0 { + // Non-existent files and non-symlinks aren't our problem. + return nil + } + + _, err = os.Stat(logPath) + if os.IsNotExist(err) { + err = os.RemoveAll(logPath) + if err != nil { + return fmt.Errorf("ensureSaneLogPath remove bad logPath: %s", err) + } + } + return nil +} + +// addSecretsBindMounts mounts user defined secrets to the container +func addSecretsBindMounts(mountLabel, ctrRunDir string, defaultMounts []string, specgen generate.Generator) ([]rspec.Mount, error) { + containerMounts := specgen.Spec().Mounts + mounts, err := secretMounts(defaultMounts, mountLabel, ctrRunDir, containerMounts) + if err != nil { + return nil, err + } + return mounts, nil +} + +// CreateContainer creates a new container in specified PodSandbox +func (s *Server) CreateContainer(ctx context.Context, req *pb.CreateContainerRequest) (res *pb.CreateContainerResponse, err error) { + logrus.Debugf("CreateContainerRequest %+v", req) + + s.updateLock.RLock() + defer s.updateLock.RUnlock() + + sbID := req.PodSandboxId + if sbID == "" { + return nil, fmt.Errorf("PodSandboxId should not be empty") + } + + sandboxID, err := s.PodIDIndex().Get(sbID) + if err != nil { + return nil, fmt.Errorf("PodSandbox with ID starting with %s not found: %v", sbID, err) + } + + sb := s.getSandbox(sandboxID) + if sb == nil { + return nil, fmt.Errorf("specified sandbox not found: %s", sandboxID) + } + + // The config of the container + containerConfig := req.GetConfig() + if containerConfig == nil { + return nil, fmt.Errorf("CreateContainerRequest.ContainerConfig is nil") + } + + name := containerConfig.GetMetadata().Name + if name == "" { + return nil, fmt.Errorf("CreateContainerRequest.ContainerConfig.Name is empty") + } + + containerID, containerName, err := s.generateContainerIDandName(sb.Metadata(), containerConfig) + if err != nil { + return nil, err + } + + defer func() { + if err != nil { + s.ReleaseContainerName(containerName) + } + }() + + container, err := s.createSandboxContainer(ctx, containerID, containerName, sb, req.GetSandboxConfig(), containerConfig) + if err != nil { + return nil, err + } + defer func() { + if err != nil { + err2 := s.StorageRuntimeServer().DeleteContainer(containerID) + if err2 != nil { + logrus.Warnf("Failed to cleanup container directory: %v", err2) + } + } + }() + + if err = s.Runtime().CreateContainer(container, sb.CgroupParent()); err != nil { + return nil, err + } + + s.addContainer(container) + + if err = s.CtrIDIndex().Add(containerID); err != nil { + s.removeContainer(container) + return nil, err + } + + s.ContainerStateToDisk(container) + + resp := &pb.CreateContainerResponse{ + ContainerId: containerID, + } + + logrus.Debugf("CreateContainerResponse: %+v", resp) + return resp, nil +} + +func (s *Server) setupOCIHooks(specgen *generate.Generator, sb *sandbox.Sandbox, containerConfig *pb.ContainerConfig, command string) error { + mounts := containerConfig.GetMounts() + addedHooks := map[string]struct{}{} + addHook := func(hook libkpod.HookParams) error { + // Only add a hook once + if _, ok := addedHooks[hook.Hook]; !ok { + if err := addOCIHook(specgen, hook); err != nil { + return err + } + addedHooks[hook.Hook] = struct{}{} + } + return nil + } + for _, hook := range s.Hooks() { + logrus.Debugf("SetupOCIHooks", hook) + if hook.HasBindMounts && len(mounts) > 0 { + if err := addHook(hook); err != nil { + return err + } + continue + } + for _, cmd := range hook.Cmds { + match, err := regexp.MatchString(cmd, command) + if err != nil { + logrus.Errorf("Invalid regex %q:%q", cmd, err) + continue + } + if match { + if err := addHook(hook); err != nil { + return err + } + } + } + for _, annotationRegex := range hook.Annotations { + for _, annotation := range sb.Annotations() { + match, err := regexp.MatchString(annotationRegex, annotation) + if err != nil { + logrus.Errorf("Invalid regex %q:%q", annotationRegex, err) + continue + } + if match { + if err := addHook(hook); err != nil { + return err + } + } + } + } + } + return nil +} +func (s *Server) createSandboxContainer(ctx context.Context, containerID string, containerName string, sb *sandbox.Sandbox, SandboxConfig *pb.PodSandboxConfig, containerConfig *pb.ContainerConfig) (*oci.Container, error) { + if sb == nil { + return nil, errors.New("createSandboxContainer needs a sandbox") + } + + // TODO: simplify this function (cyclomatic complexity here is high) + // TODO: factor generating/updating the spec into something other projects can vendor + + // creates a spec Generator with the default spec. + specgen := generate.New() + specgen.HostSpecific = true + specgen.ClearProcessRlimits() + + var readOnlyRootfs bool + var privileged bool + if containerConfig.GetLinux().GetSecurityContext() != nil { + if containerConfig.GetLinux().GetSecurityContext().Privileged { + privileged = true + } + + if containerConfig.GetLinux().GetSecurityContext().ReadonlyRootfs { + readOnlyRootfs = true + specgen.SetRootReadonly(true) + } + } + + mountLabel := sb.MountLabel() + processLabel := sb.ProcessLabel() + selinuxConfig := containerConfig.GetLinux().GetSecurityContext().GetSelinuxOptions() + if selinuxConfig != nil { + var err error + processLabel, mountLabel, err = getSELinuxLabels(selinuxConfig, privileged) + if err != nil { + return nil, err + } + } + + containerVolumes, ociMounts, err := addOCIBindMounts(mountLabel, containerConfig, &specgen) + if err != nil { + return nil, err + } + + volumesJSON, err := json.Marshal(containerVolumes) + if err != nil { + return nil, err + } + specgen.AddAnnotation(annotations.Volumes, string(volumesJSON)) + + // Add cgroup mount so container process can introspect its own limits + specgen.AddCgroupsMount("ro") + + if err := addDevices(sb, containerConfig, &specgen); err != nil { + return nil, err + } + + labels := containerConfig.GetLabels() + + metadata := containerConfig.GetMetadata() + + kubeAnnotations := containerConfig.GetAnnotations() + if kubeAnnotations != nil { + for k, v := range kubeAnnotations { + specgen.AddAnnotation(k, v) + } + } + if labels != nil { + for k, v := range labels { + specgen.AddAnnotation(k, v) + } + } + + // set this container's apparmor profile if it is set by sandbox + if s.appArmorEnabled && !privileged { + appArmorProfileName := s.getAppArmorProfileName(sb.Annotations(), metadata.Name) + if appArmorProfileName != "" { + // reload default apparmor profile if it is unloaded. + if s.appArmorProfile == apparmor.DefaultApparmorProfile { + if err := apparmor.EnsureDefaultApparmorProfile(); err != nil { + return nil, err + } + } + + specgen.SetProcessApparmorProfile(appArmorProfileName) + } + } + + logPath := containerConfig.LogPath + if logPath == "" { + // TODO: Should we use sandboxConfig.GetLogDirectory() here? + logPath = filepath.Join(sb.LogDir(), containerID+".log") + } + if !filepath.IsAbs(logPath) { + // XXX: It's not really clear what this should be versus the sbox logDirectory. + logrus.Warnf("requested logPath for ctr id %s is a relative path: %s", containerID, logPath) + logPath = filepath.Join(sb.LogDir(), logPath) + } + + // Handle https://issues.k8s.io/44043 + if err := ensureSaneLogPath(logPath); err != nil { + return nil, err + } + + logrus.WithFields(logrus.Fields{ + "sbox.logdir": sb.LogDir(), + "ctr.logfile": containerConfig.LogPath, + "log_path": logPath, + }).Debugf("setting container's log_path") + + specgen.SetProcessTerminal(containerConfig.Tty) + if containerConfig.Tty { + specgen.AddProcessEnv("TERM", "xterm") + } + + linux := containerConfig.GetLinux() + if linux != nil { + resources := linux.GetResources() + if resources != nil { + cpuPeriod := resources.CpuPeriod + if cpuPeriod != 0 { + specgen.SetLinuxResourcesCPUPeriod(uint64(cpuPeriod)) + } + + cpuQuota := resources.CpuQuota + if cpuQuota != 0 { + specgen.SetLinuxResourcesCPUQuota(cpuQuota) + } + + cpuShares := resources.CpuShares + if cpuShares != 0 { + specgen.SetLinuxResourcesCPUShares(uint64(cpuShares)) + } + + memoryLimit := resources.MemoryLimitInBytes + if memoryLimit != 0 { + specgen.SetLinuxResourcesMemoryLimit(memoryLimit) + } + + oomScoreAdj := resources.OomScoreAdj + specgen.SetProcessOOMScoreAdj(int(oomScoreAdj)) + } + + var cgPath string + parent := defaultCgroupfsParent + useSystemd := s.config.CgroupManager == oci.SystemdCgroupsManager + if useSystemd { + parent = defaultSystemdParent + } + if sb.CgroupParent() != "" { + parent = sb.CgroupParent() + } + if useSystemd { + cgPath = parent + ":" + scopePrefix + ":" + containerID + } else { + cgPath = filepath.Join(parent, scopePrefix+"-"+containerID) + } + specgen.SetLinuxCgroupsPath(cgPath) + + capabilities := linux.GetSecurityContext().GetCapabilities() + if privileged { + // this is setting correct capabilities as well for privileged mode + specgen.SetupPrivileged(true) + setOCIBindMountsPrivileged(&specgen) + } else { + toCAPPrefixed := func(cap string) string { + if !strings.HasPrefix(strings.ToLower(cap), "cap_") { + return "CAP_" + strings.ToUpper(cap) + } + return cap + } + + // Add/drop all capabilities if "all" is specified, so that + // following individual add/drop could still work. E.g. + // AddCapabilities: []string{"ALL"}, DropCapabilities: []string{"CHOWN"} + // will be all capabilities without `CAP_CHOWN`. + // see https://github.com/kubernetes/kubernetes/issues/51980 + if inStringSlice(capabilities.GetAddCapabilities(), "ALL") { + for _, c := range getOCICapabilitiesList() { + if err := specgen.AddProcessCapability(c); err != nil { + return nil, err + } + } + } + if inStringSlice(capabilities.GetDropCapabilities(), "ALL") { + for _, c := range getOCICapabilitiesList() { + if err := specgen.DropProcessCapability(c); err != nil { + return nil, err + } + } + } + + if capabilities != nil { + for _, cap := range capabilities.GetAddCapabilities() { + if strings.ToUpper(cap) == "ALL" { + continue + } + if err := specgen.AddProcessCapability(toCAPPrefixed(cap)); err != nil { + return nil, err + } + } + + for _, cap := range capabilities.GetDropCapabilities() { + if strings.ToUpper(cap) == "ALL" { + continue + } + if err := specgen.DropProcessCapability(toCAPPrefixed(cap)); err != nil { + return nil, fmt.Errorf("failed to drop cap %s %v", toCAPPrefixed(cap), err) + } + } + } + } + specgen.SetProcessSelinuxLabel(processLabel) + specgen.SetLinuxMountLabel(mountLabel) + + if containerConfig.GetLinux().GetSecurityContext() != nil && + !containerConfig.GetLinux().GetSecurityContext().Privileged { + for _, mp := range []string{ + "/proc/kcore", + "/proc/latency_stats", + "/proc/timer_list", + "/proc/timer_stats", + "/proc/sched_debug", + "/sys/firmware", + } { + specgen.AddLinuxMaskedPaths(mp) + } + + for _, rp := range []string{ + "/proc/asound", + "/proc/bus", + "/proc/fs", + "/proc/irq", + "/proc/sys", + "/proc/sysrq-trigger", + } { + specgen.AddLinuxReadonlyPaths(rp) + } + } + } + // Join the namespace paths for the pod sandbox container. + podInfraState := s.Runtime().ContainerStatus(sb.InfraContainer()) + + logrus.Debugf("pod container state %+v", podInfraState) + + ipcNsPath := fmt.Sprintf("/proc/%d/ns/ipc", podInfraState.Pid) + if err := specgen.AddOrReplaceLinuxNamespace(string(rspec.IPCNamespace), ipcNsPath); err != nil { + return nil, err + } + + utsNsPath := fmt.Sprintf("/proc/%d/ns/uts", podInfraState.Pid) + if err := specgen.AddOrReplaceLinuxNamespace(string(rspec.UTSNamespace), utsNsPath); err != nil { + return nil, err + } + + // Do not share pid ns for now + if containerConfig.GetLinux().GetSecurityContext().GetNamespaceOptions().GetHostPid() { + specgen.RemoveLinuxNamespace(string(rspec.PIDNamespace)) + } + + netNsPath := sb.NetNsPath() + if netNsPath == "" { + // The sandbox does not have a permanent namespace, + // it's on the host one. + netNsPath = fmt.Sprintf("/proc/%d/ns/net", podInfraState.Pid) + } + + if err := specgen.AddOrReplaceLinuxNamespace(string(rspec.NetworkNamespace), netNsPath); err != nil { + return nil, err + } + + imageSpec := containerConfig.GetImage() + if imageSpec == nil { + return nil, fmt.Errorf("CreateContainerRequest.ContainerConfig.Image is nil") + } + + image := imageSpec.Image + if image == "" { + return nil, fmt.Errorf("CreateContainerRequest.ContainerConfig.Image.Image is empty") + } + images, err := s.StorageImageServer().ResolveNames(image) + if err != nil { + // This means we got an image ID + if strings.Contains(err.Error(), "cannot specify 64-byte hexadecimal strings") { + images = append(images, image) + } else { + return nil, err + } + } + image = images[0] + + // Get imageName and imageRef that are requested in container status + imageName := image + status, err := s.StorageImageServer().ImageStatus(s.ImageContext(), image) + if err != nil { + return nil, err + } + + imageRef := status.ID + // + // TODO: https://github.com/kubernetes-incubator/cri-o/issues/531 + // + //for _, n := range status.Names { + //r, err := reference.ParseNormalizedNamed(n) + //if err != nil { + //return nil, fmt.Errorf("failed to normalize image name for ImageRef: %v", err) + //} + //if digested, isDigested := r.(reference.Canonical); isDigested { + //imageRef = reference.FamiliarString(digested) + //break + //} + //} + for _, n := range status.Names { + r, err := reference.ParseNormalizedNamed(n) + if err != nil { + return nil, fmt.Errorf("failed to normalize image name for Image: %v", err) + } + if tagged, isTagged := r.(reference.Tagged); isTagged { + imageName = reference.FamiliarString(tagged) + break + } + } + + specgen.AddAnnotation(annotations.ImageName, imageName) + specgen.AddAnnotation(annotations.ImageRef, imageRef) + specgen.AddAnnotation(annotations.IP, sb.IP()) + + // bind mount the pod shm + specgen.AddBindMount(sb.ShmPath(), "/dev/shm", []string{"rw"}) + + options := []string{"rw"} + if readOnlyRootfs { + options = []string{"ro"} + } + if sb.ResolvPath() != "" { + if err := label.Relabel(sb.ResolvPath(), mountLabel, true); err != nil && err != unix.ENOTSUP { + return nil, err + } + + // bind mount the pod resolver file + specgen.AddBindMount(sb.ResolvPath(), "/etc/resolv.conf", options) + } + + if sb.HostnamePath() != "" { + if err := label.Relabel(sb.HostnamePath(), mountLabel, true); err != nil && err != unix.ENOTSUP { + return nil, err + } + + specgen.AddBindMount(sb.HostnamePath(), "/etc/hostname", options) + } + + // Bind mount /etc/hosts for host networking containers + if hostNetwork(containerConfig) { + specgen.AddBindMount("/etc/hosts", "/etc/hosts", options) + } + + specgen.SetHostname(sb.Hostname()) + + specgen.AddAnnotation(annotations.Name, containerName) + specgen.AddAnnotation(annotations.ContainerID, containerID) + specgen.AddAnnotation(annotations.SandboxID, sb.ID()) + specgen.AddAnnotation(annotations.SandboxName, sb.InfraContainer().Name()) + specgen.AddAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer) + specgen.AddAnnotation(annotations.LogPath, logPath) + specgen.AddAnnotation(annotations.TTY, fmt.Sprintf("%v", containerConfig.Tty)) + specgen.AddAnnotation(annotations.Stdin, fmt.Sprintf("%v", containerConfig.Stdin)) + specgen.AddAnnotation(annotations.StdinOnce, fmt.Sprintf("%v", containerConfig.StdinOnce)) + specgen.AddAnnotation(annotations.Image, image) + specgen.AddAnnotation(annotations.ResolvPath, sb.InfraContainer().CrioAnnotations()[annotations.ResolvPath]) + + created := time.Now() + specgen.AddAnnotation(annotations.Created, created.Format(time.RFC3339Nano)) + + metadataJSON, err := json.Marshal(metadata) + if err != nil { + return nil, err + } + specgen.AddAnnotation(annotations.Metadata, string(metadataJSON)) + + labelsJSON, err := json.Marshal(labels) + if err != nil { + return nil, err + } + specgen.AddAnnotation(annotations.Labels, string(labelsJSON)) + + kubeAnnotationsJSON, err := json.Marshal(kubeAnnotations) + if err != nil { + return nil, err + } + specgen.AddAnnotation(annotations.Annotations, string(kubeAnnotationsJSON)) + + metaname := metadata.Name + if !privileged { + if err = s.setupSeccomp(&specgen, metaname, sb.Annotations()); err != nil { + return nil, err + } + } + + attempt := metadata.Attempt + containerInfo, err := s.StorageRuntimeServer().CreateContainer(s.ImageContext(), + sb.Name(), sb.ID(), + image, image, + containerName, containerID, + metaname, + attempt, + mountLabel, + nil) + if err != nil { + return nil, err + } + + mountPoint, err := s.StorageRuntimeServer().StartContainer(containerID) + if err != nil { + return nil, fmt.Errorf("failed to mount container %s(%s): %v", containerName, containerID, err) + } + specgen.AddAnnotation(annotations.MountPoint, mountPoint) + + containerImageConfig := containerInfo.Config + if containerImageConfig == nil { + return nil, fmt.Errorf("empty image config for %s", image) + } + + if containerImageConfig.Config.StopSignal != "" { + // this key is defined in image-spec conversion document at https://github.com/opencontainers/image-spec/pull/492/files#diff-8aafbe2c3690162540381b8cdb157112R57 + specgen.AddAnnotation("org.opencontainers.image.stopSignal", containerImageConfig.Config.StopSignal) + } + + // Add image volumes + volumeMounts, err := addImageVolumes(mountPoint, s, &containerInfo, &specgen, mountLabel) + if err != nil { + return nil, err + } + + processArgs, err := buildOCIProcessArgs(containerConfig, containerImageConfig) + if err != nil { + return nil, err + } + specgen.SetProcessArgs(processArgs) + + // Add environment variables from CRI and image config + envs := containerConfig.GetEnvs() + if envs != nil { + for _, item := range envs { + key := item.Key + value := item.Value + if key == "" { + continue + } + specgen.AddProcessEnv(key, value) + } + } + if containerImageConfig != nil { + for _, item := range containerImageConfig.Config.Env { + parts := strings.SplitN(item, "=", 2) + if len(parts) != 2 { + return nil, fmt.Errorf("invalid env from image: %s", item) + } + + if parts[0] == "" { + continue + } + specgen.AddProcessEnv(parts[0], parts[1]) + } + } + + // Set working directory + // Pick it up from image config first and override if specified in CRI + containerCwd := "/" + if containerImageConfig != nil { + imageCwd := containerImageConfig.Config.WorkingDir + if imageCwd != "" { + containerCwd = imageCwd + } + } + runtimeCwd := containerConfig.WorkingDir + if runtimeCwd != "" { + containerCwd = runtimeCwd + } + specgen.SetProcessCwd(containerCwd) + + var secretMounts []rspec.Mount + if len(s.config.DefaultMounts) > 0 { + var err error + secretMounts, err = addSecretsBindMounts(mountLabel, containerInfo.RunDir, s.config.DefaultMounts, specgen) + if err != nil { + return nil, fmt.Errorf("failed to mount secrets: %v", err) + } + } + + mounts := []rspec.Mount{} + mounts = append(mounts, ociMounts...) + mounts = append(mounts, volumeMounts...) + mounts = append(mounts, secretMounts...) + + sort.Sort(orderedMounts(mounts)) + + for _, m := range mounts { + specgen.AddBindMount(m.Source, m.Destination, m.Options) + } + + if err := s.setupOCIHooks(&specgen, sb, containerConfig, processArgs[0]); err != nil { + return nil, err + } + + // Setup user and groups + if linux != nil { + if err = setupContainerUser(&specgen, mountPoint, linux.GetSecurityContext(), containerImageConfig); err != nil { + return nil, err + } + } + + // Set up pids limit if pids cgroup is mounted + _, err = cgroups.FindCgroupMountpoint("pids") + if err == nil { + specgen.SetLinuxResourcesPidsLimit(s.config.PidsLimit) + } + + // by default, the root path is an empty string. set it now. + specgen.SetRootPath(mountPoint) + + saveOptions := generate.ExportOptions{} + if err = specgen.SaveToFile(filepath.Join(containerInfo.Dir, "config.json"), saveOptions); err != nil { + return nil, err + } + if err = specgen.SaveToFile(filepath.Join(containerInfo.RunDir, "config.json"), saveOptions); err != nil { + return nil, err + } + + crioAnnotations := specgen.Spec().Annotations + + container, err := oci.NewContainer(containerID, containerName, containerInfo.RunDir, logPath, sb.NetNs(), labels, crioAnnotations, kubeAnnotations, image, imageName, imageRef, metadata, sb.ID(), containerConfig.Tty, containerConfig.Stdin, containerConfig.StdinOnce, sb.Privileged(), sb.Trusted(), containerInfo.Dir, created, containerImageConfig.Config.StopSignal) + if err != nil { + return nil, err + } + container.SetSpec(specgen.Spec()) + container.SetMountPoint(mountPoint) + + for _, cv := range containerVolumes { + container.AddVolume(cv) + } + + return container, nil +} + +func (s *Server) setupSeccomp(specgen *generate.Generator, cname string, sbAnnotations map[string]string) error { + profile, ok := sbAnnotations["container.seccomp.security.alpha.kubernetes.io/"+cname] + if !ok { + profile, ok = sbAnnotations["seccomp.security.alpha.kubernetes.io/pod"] + if !ok { + // running w/o seccomp, aka unconfined + profile = seccompUnconfined + } + } + if !s.seccompEnabled { + if profile != seccompUnconfined { + return fmt.Errorf("seccomp is not enabled in your kernel, cannot run with a profile") + } + logrus.Warn("seccomp is not enabled in your kernel, running container without profile") + } + if profile == seccompUnconfined { + // running w/o seccomp, aka unconfined + specgen.Spec().Linux.Seccomp = nil + return nil + } + if profile == seccompRuntimeDefault || profile == seccompDockerDefault { + return seccomp.LoadProfileFromStruct(s.seccompProfile, specgen) + } + if !strings.HasPrefix(profile, seccompLocalhostPrefix) { + return fmt.Errorf("unknown seccomp profile option: %q", profile) + } + // FIXME: https://github.com/kubernetes/kubernetes/issues/39128 + return nil +} + +// getAppArmorProfileName gets the profile name for the given container. +func (s *Server) getAppArmorProfileName(annotations map[string]string, ctrName string) string { + profile := apparmor.GetProfileNameFromPodAnnotations(annotations, ctrName) + + if profile == "" { + return "" + } + + if profile == apparmor.ProfileRuntimeDefault { + // If the value is runtime/default, then return default profile. + return s.appArmorProfile + } + + return strings.TrimPrefix(profile, apparmor.ProfileNamePrefix) +} + +// openContainerFile opens a file inside a container rootfs safely +func openContainerFile(rootfs string, path string) (io.ReadCloser, error) { + fp, err := symlink.FollowSymlinkInScope(filepath.Join(rootfs, path), rootfs) + if err != nil { + return nil, err + } + return os.Open(fp) +} + +// getUserInfo returns UID, GID and additional groups for specified user +// by looking them up in /etc/passwd and /etc/group +func getUserInfo(rootfs string, userName string) (uint32, uint32, []uint32, error) { + // We don't care if we can't open the file because + // not all images will have these files + passwdFile, err := openContainerFile(rootfs, "/etc/passwd") + if err != nil { + logrus.Warnf("Failed to open /etc/passwd: %v", err) + } else { + defer passwdFile.Close() + } + + groupFile, err := openContainerFile(rootfs, "/etc/group") + if err != nil { + logrus.Warnf("Failed to open /etc/group: %v", err) + } else { + defer groupFile.Close() + } + + execUser, err := user.GetExecUser(userName, nil, passwdFile, groupFile) + if err != nil { + return 0, 0, nil, err + } + + uid := uint32(execUser.Uid) + gid := uint32(execUser.Gid) + var additionalGids []uint32 + for _, g := range execUser.Sgids { + additionalGids = append(additionalGids, uint32(g)) + } + + return uid, gid, additionalGids, nil +} + +func setOCIBindMountsPrivileged(g *generate.Generator) { + spec := g.Spec() + // clear readonly for /sys and cgroup + for i, m := range spec.Mounts { + if spec.Mounts[i].Destination == "/sys" && !spec.Root.Readonly { + clearReadOnly(&spec.Mounts[i]) + } + if m.Type == "cgroup" { + clearReadOnly(&spec.Mounts[i]) + } + } + spec.Linux.ReadonlyPaths = nil + spec.Linux.MaskedPaths = nil +} + +func clearReadOnly(m *rspec.Mount) { + var opt []string + for _, o := range m.Options { + if o != "ro" { + opt = append(opt, o) + } + } + m.Options = opt +} diff --git a/server/container_exec.go b/server/container_exec.go new file mode 100644 index 000000000..0cdb9579e --- /dev/null +++ b/server/container_exec.go @@ -0,0 +1,108 @@ +package server + +import ( + "fmt" + "io" + "os" + "os/exec" + + "github.com/docker/docker/pkg/pools" + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/sirupsen/logrus" + "golang.org/x/net/context" + "k8s.io/client-go/tools/remotecommand" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" + kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" + utilexec "k8s.io/kubernetes/pkg/util/exec" + "k8s.io/kubernetes/pkg/util/term" +) + +// Exec prepares a streaming endpoint to execute a command in the container. +func (s *Server) Exec(ctx context.Context, req *pb.ExecRequest) (*pb.ExecResponse, error) { + logrus.Debugf("ExecRequest %+v", req) + + resp, err := s.GetExec(req) + if err != nil { + return nil, fmt.Errorf("unable to prepare exec endpoint") + } + + return resp, nil +} + +// Exec endpoint for streaming.Runtime +func (ss streamService) Exec(containerID string, cmd []string, stdin io.Reader, stdout, stderr io.WriteCloser, tty bool, resize <-chan remotecommand.TerminalSize) error { + c := ss.runtimeServer.GetContainer(containerID) + + if c == nil { + return fmt.Errorf("could not find container %q", containerID) + } + + if err := ss.runtimeServer.Runtime().UpdateStatus(c); err != nil { + return err + } + + cState := ss.runtimeServer.Runtime().ContainerStatus(c) + if !(cState.Status == oci.ContainerStateRunning || cState.Status == oci.ContainerStateCreated) { + return fmt.Errorf("container is not created or running") + } + + args := []string{"exec"} + if tty { + args = append(args, "-t") + } + args = append(args, c.ID()) + args = append(args, cmd...) + execCmd := exec.Command(ss.runtimeServer.Runtime().Path(c), args...) + var cmdErr error + if tty { + p, err := kubecontainer.StartPty(execCmd) + if err != nil { + return err + } + defer p.Close() + + // make sure to close the stdout stream + defer stdout.Close() + + kubecontainer.HandleResizing(resize, func(size remotecommand.TerminalSize) { + term.SetSize(p.Fd(), size) + }) + + if stdin != nil { + go pools.Copy(p, stdin) + } + + if stdout != nil { + go pools.Copy(stdout, p) + } + + cmdErr = execCmd.Wait() + } else { + if stdin != nil { + // Use an os.Pipe here as it returns true *os.File objects. + // This way, if you run 'kubectl exec <pod> -i bash' (no tty) and type 'exit', + // the call below to execCmd.Run() can unblock because its Stdin is the read half + // of the pipe. + r, w, err := os.Pipe() + if err != nil { + return err + } + go pools.Copy(w, stdin) + + execCmd.Stdin = r + } + if stdout != nil { + execCmd.Stdout = stdout + } + if stderr != nil { + execCmd.Stderr = stderr + } + + cmdErr = execCmd.Run() + } + + if exitErr, ok := cmdErr.(*exec.ExitError); ok { + return &utilexec.ExitErrorWrapper{ExitError: exitErr} + } + return cmdErr +} diff --git a/server/container_execsync.go b/server/container_execsync.go new file mode 100644 index 000000000..35f7896c5 --- /dev/null +++ b/server/container_execsync.go @@ -0,0 +1,46 @@ +package server + +import ( + "fmt" + + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/sirupsen/logrus" + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" +) + +// ExecSync runs a command in a container synchronously. +func (s *Server) ExecSync(ctx context.Context, req *pb.ExecSyncRequest) (*pb.ExecSyncResponse, error) { + logrus.Debugf("ExecSyncRequest %+v", req) + c, err := s.GetContainerFromRequest(req.ContainerId) + if err != nil { + return nil, err + } + + if err = s.Runtime().UpdateStatus(c); err != nil { + return nil, err + } + + cState := s.Runtime().ContainerStatus(c) + if !(cState.Status == oci.ContainerStateRunning || cState.Status == oci.ContainerStateCreated) { + return nil, fmt.Errorf("container is not created or running") + } + + cmd := req.Cmd + if cmd == nil { + return nil, fmt.Errorf("exec command cannot be empty") + } + + execResp, err := s.Runtime().ExecSync(c, cmd, req.Timeout) + if err != nil { + return nil, err + } + resp := &pb.ExecSyncResponse{ + Stdout: execResp.Stdout, + Stderr: execResp.Stderr, + ExitCode: execResp.ExitCode, + } + + logrus.Debugf("ExecSyncResponse: %+v", resp) + return resp, nil +} diff --git a/server/container_list.go b/server/container_list.go new file mode 100644 index 000000000..42204ae1f --- /dev/null +++ b/server/container_list.go @@ -0,0 +1,112 @@ +package server + +import ( + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/sirupsen/logrus" + "golang.org/x/net/context" + "k8s.io/apimachinery/pkg/fields" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" +) + +// filterContainer returns whether passed container matches filtering criteria +func filterContainer(c *pb.Container, filter *pb.ContainerFilter) bool { + if filter != nil { + if filter.State != nil { + if c.State != filter.State.State { + return false + } + } + if filter.LabelSelector != nil { + sel := fields.SelectorFromSet(filter.LabelSelector) + if !sel.Matches(fields.Set(c.Labels)) { + return false + } + } + } + return true +} + +// ListContainers lists all containers by filters. +func (s *Server) ListContainers(ctx context.Context, req *pb.ListContainersRequest) (*pb.ListContainersResponse, error) { + logrus.Debugf("ListContainersRequest %+v", req) + var ctrs []*pb.Container + filter := req.Filter + ctrList, err := s.ContainerServer.ListContainers() + if err != nil { + return nil, err + } + + // Filter using container id and pod id first. + if filter.Id != "" { + id, err := s.CtrIDIndex().Get(filter.Id) + if err != nil { + // If we don't find a container ID with a filter, it should not + // be considered an error. Log a warning and return an empty struct + logrus.Warn("unable to find container ID %s", filter.Id) + return &pb.ListContainersResponse{}, nil + } + c := s.ContainerServer.GetContainer(id) + if c != nil { + if filter.PodSandboxId != "" { + if c.Sandbox() == filter.PodSandboxId { + ctrList = []*oci.Container{c} + } else { + ctrList = []*oci.Container{} + } + + } else { + ctrList = []*oci.Container{c} + } + } + } else { + if filter.PodSandboxId != "" { + pod := s.ContainerServer.GetSandbox(filter.PodSandboxId) + if pod == nil { + ctrList = []*oci.Container{} + } else { + ctrList = pod.Containers().List() + } + } + } + + for _, ctr := range ctrList { + podSandboxID := ctr.Sandbox() + cState := s.Runtime().ContainerStatus(ctr) + created := cState.Created.UnixNano() + rState := pb.ContainerState_CONTAINER_UNKNOWN + cID := ctr.ID() + img := &pb.ImageSpec{ + Image: ctr.Image(), + } + c := &pb.Container{ + Id: cID, + PodSandboxId: podSandboxID, + CreatedAt: created, + Labels: ctr.Labels(), + Metadata: ctr.Metadata(), + Annotations: ctr.Annotations(), + Image: img, + } + + switch cState.Status { + case oci.ContainerStateCreated: + rState = pb.ContainerState_CONTAINER_CREATED + case oci.ContainerStateRunning: + rState = pb.ContainerState_CONTAINER_RUNNING + case oci.ContainerStateStopped: + rState = pb.ContainerState_CONTAINER_EXITED + } + c.State = rState + + // Filter by other criteria such as state and labels. + if filterContainer(c, req.Filter) { + ctrs = append(ctrs, c) + } + } + + resp := &pb.ListContainersResponse{ + Containers: ctrs, + } + logrus.Debugf("ListContainersResponse: %+v", resp) + return resp, nil +} diff --git a/server/container_portforward.go b/server/container_portforward.go new file mode 100644 index 000000000..97dd53423 --- /dev/null +++ b/server/container_portforward.go @@ -0,0 +1,91 @@ +package server + +import ( + "bytes" + "fmt" + "io" + "os/exec" + "strings" + + "github.com/docker/docker/pkg/pools" + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/sirupsen/logrus" + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" +) + +// PortForward prepares a streaming endpoint to forward ports from a PodSandbox. +func (s *Server) PortForward(ctx context.Context, req *pb.PortForwardRequest) (*pb.PortForwardResponse, error) { + logrus.Debugf("PortForwardRequest %+v", req) + + resp, err := s.GetPortForward(req) + + if err != nil { + return nil, fmt.Errorf("unable to prepare portforward endpoint") + } + + return resp, nil +} + +func (ss streamService) PortForward(podSandboxID string, port int32, stream io.ReadWriteCloser) error { + c := ss.runtimeServer.GetSandboxContainer(podSandboxID) + + if c == nil { + return fmt.Errorf("could not find container for sandbox %q", podSandboxID) + } + + if err := ss.runtimeServer.Runtime().UpdateStatus(c); err != nil { + return err + } + + cState := ss.runtimeServer.Runtime().ContainerStatus(c) + if !(cState.Status == oci.ContainerStateRunning || cState.Status == oci.ContainerStateCreated) { + return fmt.Errorf("container is not created or running") + } + + containerPid := cState.Pid + socatPath, lookupErr := exec.LookPath("socat") + if lookupErr != nil { + return fmt.Errorf("unable to do port forwarding: socat not found") + } + + args := []string{"-t", fmt.Sprintf("%d", containerPid), "-n", socatPath, "-", fmt.Sprintf("TCP4:localhost:%d", port)} + + nsenterPath, lookupErr := exec.LookPath("nsenter") + if lookupErr != nil { + return fmt.Errorf("unable to do port forwarding: nsenter not found") + } + + commandString := fmt.Sprintf("%s %s", nsenterPath, strings.Join(args, " ")) + logrus.Debugf("executing port forwarding command: %s", commandString) + + command := exec.Command(nsenterPath, args...) + command.Stdout = stream + + stderr := new(bytes.Buffer) + command.Stderr = stderr + + // If we use Stdin, command.Run() won't return until the goroutine that's copying + // from stream finishes. Unfortunately, if you have a client like telnet connected + // via port forwarding, as long as the user's telnet client is connected to the user's + // local listener that port forwarding sets up, the telnet session never exits. This + // means that even if socat has finished running, command.Run() won't ever return + // (because the client still has the connection and stream open). + // + // The work around is to use StdinPipe(), as Wait() (called by Run()) closes the pipe + // when the command (socat) exits. + inPipe, err := command.StdinPipe() + if err != nil { + return fmt.Errorf("unable to do port forwarding: error creating stdin pipe: %v", err) + } + go func() { + pools.Copy(inPipe, stream) + inPipe.Close() + }() + + if err := command.Run(); err != nil { + return fmt.Errorf("%v: %s", err, stderr.String()) + } + + return nil +} diff --git a/server/container_remove.go b/server/container_remove.go new file mode 100644 index 000000000..871023726 --- /dev/null +++ b/server/container_remove.go @@ -0,0 +1,20 @@ +package server + +import ( + "github.com/sirupsen/logrus" + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" +) + +// RemoveContainer removes the container. If the container is running, the container +// should be force removed. +func (s *Server) RemoveContainer(ctx context.Context, req *pb.RemoveContainerRequest) (*pb.RemoveContainerResponse, error) { + _, err := s.ContainerServer.Remove(ctx, req.ContainerId, true) + if err != nil { + return nil, err + } + + resp := &pb.RemoveContainerResponse{} + logrus.Debugf("RemoveContainerResponse: %+v", resp) + return resp, nil +} diff --git a/server/container_start.go b/server/container_start.go new file mode 100644 index 000000000..85be09484 --- /dev/null +++ b/server/container_start.go @@ -0,0 +1,43 @@ +package server + +import ( + "fmt" + + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/sirupsen/logrus" + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" +) + +// StartContainer starts the container. +func (s *Server) StartContainer(ctx context.Context, req *pb.StartContainerRequest) (*pb.StartContainerResponse, error) { + logrus.Debugf("StartContainerRequest %+v", req) + c, err := s.GetContainerFromRequest(req.ContainerId) + if err != nil { + return nil, err + } + state := s.Runtime().ContainerStatus(c) + if state.Status != oci.ContainerStateCreated { + return nil, fmt.Errorf("container %s is not in created state: %s", c.ID(), state.Status) + } + + defer func() { + // if the call to StartContainer fails below we still want to fill + // some fields of a container status. In particular, we're going to + // adjust container started/finished time and set an error to be + // returned in the Reason field for container status call. + if err != nil { + s.Runtime().SetStartFailed(c, err) + } + s.ContainerStateToDisk(c) + }() + + err = s.Runtime().StartContainer(c) + if err != nil { + return nil, fmt.Errorf("failed to start container %s: %v", c.ID(), err) + } + + resp := &pb.StartContainerResponse{} + logrus.Debugf("StartContainerResponse %+v", resp) + return resp, nil +} diff --git a/server/container_stats.go b/server/container_stats.go new file mode 100644 index 000000000..22b87c453 --- /dev/null +++ b/server/container_stats.go @@ -0,0 +1,14 @@ +package server + +import ( + "fmt" + + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" +) + +// ContainerStats returns stats of the container. If the container does not +// exist, the call returns an error. +func (s *Server) ContainerStats(ctx context.Context, req *pb.ContainerStatsRequest) (*pb.ContainerStatsResponse, error) { + return nil, fmt.Errorf("not implemented") +} diff --git a/server/container_stats_list.go b/server/container_stats_list.go new file mode 100644 index 000000000..929220994 --- /dev/null +++ b/server/container_stats_list.go @@ -0,0 +1,13 @@ +package server + +import ( + "fmt" + + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" +) + +// ListContainerStats returns stats of all running containers. +func (s *Server) ListContainerStats(ctx context.Context, req *pb.ListContainerStatsRequest) (*pb.ListContainerStatsResponse, error) { + return nil, fmt.Errorf("not implemented") +} diff --git a/server/container_status.go b/server/container_status.go new file mode 100644 index 000000000..b1512e0cd --- /dev/null +++ b/server/container_status.go @@ -0,0 +1,102 @@ +package server + +import ( + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/sirupsen/logrus" + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" +) + +const ( + oomKilledReason = "OOMKilled" + completedReason = "Completed" + errorReason = "Error" +) + +// ContainerStatus returns status of the container. +func (s *Server) ContainerStatus(ctx context.Context, req *pb.ContainerStatusRequest) (*pb.ContainerStatusResponse, error) { + logrus.Debugf("ContainerStatusRequest %+v", req) + c, err := s.GetContainerFromRequest(req.ContainerId) + if err != nil { + return nil, err + } + + containerID := c.ID() + resp := &pb.ContainerStatusResponse{ + Status: &pb.ContainerStatus{ + Id: containerID, + Metadata: c.Metadata(), + Labels: c.Labels(), + Annotations: c.Annotations(), + ImageRef: c.ImageRef(), + }, + } + resp.Status.Image = &pb.ImageSpec{Image: c.ImageName()} + + mounts := []*pb.Mount{} + for _, cv := range c.Volumes() { + mounts = append(mounts, &pb.Mount{ + ContainerPath: cv.ContainerPath, + HostPath: cv.HostPath, + Readonly: cv.Readonly, + }) + } + resp.Status.Mounts = mounts + + cState := s.Runtime().ContainerStatus(c) + rStatus := pb.ContainerState_CONTAINER_UNKNOWN + + imageName := c.Image() + status, err := s.StorageImageServer().ImageStatus(s.ImageContext(), imageName) + if err != nil { + return nil, err + } + + resp.Status.ImageRef = status.ImageRef + + // If we defaulted to exit code -1 earlier then we attempt to + // get the exit code from the exit file again. + if cState.ExitCode == -1 { + err := s.Runtime().UpdateStatus(c) + if err != nil { + logrus.Warnf("Failed to UpdateStatus of container %s: %v", c.ID(), err) + } + cState = s.Runtime().ContainerStatus(c) + } + + switch cState.Status { + case oci.ContainerStateCreated: + rStatus = pb.ContainerState_CONTAINER_CREATED + created := cState.Created.UnixNano() + resp.Status.CreatedAt = created + case oci.ContainerStateRunning: + rStatus = pb.ContainerState_CONTAINER_RUNNING + created := cState.Created.UnixNano() + resp.Status.CreatedAt = created + started := cState.Started.UnixNano() + resp.Status.StartedAt = started + case oci.ContainerStateStopped: + rStatus = pb.ContainerState_CONTAINER_EXITED + created := cState.Created.UnixNano() + resp.Status.CreatedAt = created + started := cState.Started.UnixNano() + resp.Status.StartedAt = started + finished := cState.Finished.UnixNano() + resp.Status.FinishedAt = finished + resp.Status.ExitCode = cState.ExitCode + switch { + case cState.OOMKilled: + resp.Status.Reason = oomKilledReason + case cState.ExitCode == 0: + resp.Status.Reason = completedReason + default: + resp.Status.Reason = errorReason + resp.Status.Message = cState.Error + } + } + + resp.Status.State = rStatus + + logrus.Debugf("ContainerStatusResponse: %+v", resp) + return resp, nil +} diff --git a/server/container_stop.go b/server/container_stop.go new file mode 100644 index 000000000..f74ed86e0 --- /dev/null +++ b/server/container_stop.go @@ -0,0 +1,19 @@ +package server + +import ( + "github.com/sirupsen/logrus" + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" +) + +// StopContainer stops a running container with a grace period (i.e., timeout). +func (s *Server) StopContainer(ctx context.Context, req *pb.StopContainerRequest) (*pb.StopContainerResponse, error) { + _, err := s.ContainerServer.ContainerStop(ctx, req.ContainerId, req.Timeout) + if err != nil { + return nil, err + } + + resp := &pb.StopContainerResponse{} + logrus.Debugf("StopContainerResponse %s: %+v", req.ContainerId, resp) + return resp, nil +} diff --git a/server/container_updateruntimeconfig.go b/server/container_updateruntimeconfig.go new file mode 100644 index 000000000..b900c9b18 --- /dev/null +++ b/server/container_updateruntimeconfig.go @@ -0,0 +1,11 @@ +package server + +import ( + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" +) + +// UpdateRuntimeConfig updates the configuration of a running container. +func (s *Server) UpdateRuntimeConfig(ctx context.Context, req *pb.UpdateRuntimeConfigRequest) (*pb.UpdateRuntimeConfigResponse, error) { + return &pb.UpdateRuntimeConfigResponse{}, nil +} diff --git a/server/image_fs_info.go b/server/image_fs_info.go new file mode 100644 index 000000000..969bdc342 --- /dev/null +++ b/server/image_fs_info.go @@ -0,0 +1,13 @@ +package server + +import ( + "fmt" + + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" +) + +// ImageFsInfo returns information of the filesystem that is used to store images. +func (s *Server) ImageFsInfo(ctx context.Context, req *pb.ImageFsInfoRequest) (*pb.ImageFsInfoResponse, error) { + return nil, fmt.Errorf("not implemented") +} diff --git a/server/image_list.go b/server/image_list.go new file mode 100644 index 000000000..ebcc6f6a1 --- /dev/null +++ b/server/image_list.go @@ -0,0 +1,41 @@ +package server + +import ( + "github.com/sirupsen/logrus" + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" +) + +// ListImages lists existing images. +func (s *Server) ListImages(ctx context.Context, req *pb.ListImagesRequest) (*pb.ListImagesResponse, error) { + logrus.Debugf("ListImagesRequest: %+v", req) + filter := "" + reqFilter := req.GetFilter() + if reqFilter != nil { + filterImage := reqFilter.GetImage() + if filterImage != nil { + filter = filterImage.Image + } + } + results, err := s.StorageImageServer().ListImages(s.ImageContext(), filter) + if err != nil { + return nil, err + } + response := pb.ListImagesResponse{} + for _, result := range results { + if result.Size != nil { + response.Images = append(response.Images, &pb.Image{ + Id: result.ID, + RepoTags: result.Names, + Size_: *result.Size, + }) + } else { + response.Images = append(response.Images, &pb.Image{ + Id: result.ID, + RepoTags: result.Names, + }) + } + } + logrus.Debugf("ListImagesResponse: %+v", response) + return &response, nil +} diff --git a/server/image_pull.go b/server/image_pull.go new file mode 100644 index 000000000..26d08912f --- /dev/null +++ b/server/image_pull.go @@ -0,0 +1,108 @@ +package server + +import ( + "encoding/base64" + "strings" + + "github.com/containers/image/copy" + "github.com/containers/image/types" + "github.com/sirupsen/logrus" + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" +) + +// PullImage pulls a image with authentication config. +func (s *Server) PullImage(ctx context.Context, req *pb.PullImageRequest) (*pb.PullImageResponse, error) { + logrus.Debugf("PullImageRequest: %+v", req) + // TODO: what else do we need here? (Signatures when the story isn't just pulling from docker://) + image := "" + img := req.GetImage() + if img != nil { + image = img.Image + } + + var ( + images []string + pulled string + err error + ) + images, err = s.StorageImageServer().ResolveNames(image) + if err != nil { + return nil, err + } + for _, img := range images { + var ( + username string + password string + ) + if req.GetAuth() != nil { + username = req.GetAuth().Username + password = req.GetAuth().Password + if req.GetAuth().Auth != "" { + username, password, err = decodeDockerAuth(req.GetAuth().Auth) + if err != nil { + logrus.Debugf("error decoding authentication for image %s: %v", img, err) + continue + } + } + } + options := ©.Options{ + SourceCtx: &types.SystemContext{}, + } + // Specifying a username indicates the user intends to send authentication to the registry. + if username != "" { + options.SourceCtx = &types.SystemContext{ + DockerAuthConfig: &types.DockerAuthConfig{ + Username: username, + Password: password, + }, + } + } + + var canPull bool + canPull, err = s.StorageImageServer().CanPull(img, options) + if err != nil && !canPull { + logrus.Debugf("error checking image %s: %v", img, err) + continue + } + + // let's be smart, docker doesn't repull if image already exists. + _, err = s.StorageImageServer().ImageStatus(s.ImageContext(), img) + if err == nil { + logrus.Debugf("image %s already in store, skipping pull", img) + pulled = img + break + } + + _, err = s.StorageImageServer().PullImage(s.ImageContext(), img, options) + if err != nil { + logrus.Debugf("error pulling image %s: %v", img, err) + continue + } + pulled = img + break + } + if pulled == "" && err != nil { + return nil, err + } + resp := &pb.PullImageResponse{ + ImageRef: pulled, + } + logrus.Debugf("PullImageResponse: %+v", resp) + return resp, nil +} + +func decodeDockerAuth(s string) (string, string, error) { + decoded, err := base64.StdEncoding.DecodeString(s) + if err != nil { + return "", "", err + } + parts := strings.SplitN(string(decoded), ":", 2) + if len(parts) != 2 { + // if it's invalid just skip, as docker does + return "", "", nil + } + user := parts[0] + password := strings.Trim(parts[1], "\x00") + return user, password, nil +} diff --git a/server/image_remove.go b/server/image_remove.go new file mode 100644 index 000000000..d15296ccc --- /dev/null +++ b/server/image_remove.go @@ -0,0 +1,52 @@ +package server + +import ( + "fmt" + "strings" + + "github.com/sirupsen/logrus" + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" +) + +// RemoveImage removes the image. +func (s *Server) RemoveImage(ctx context.Context, req *pb.RemoveImageRequest) (*pb.RemoveImageResponse, error) { + logrus.Debugf("RemoveImageRequest: %+v", req) + image := "" + img := req.GetImage() + if img != nil { + image = img.Image + } + if image == "" { + return nil, fmt.Errorf("no image specified") + } + var ( + images []string + err error + deleted bool + ) + images, err = s.StorageImageServer().ResolveNames(image) + if err != nil { + // This means we got an image ID + if strings.Contains(err.Error(), "cannot specify 64-byte hexadecimal strings") { + images = append(images, image) + } else { + return nil, err + } + } + for _, img := range images { + err = s.StorageImageServer().UntagImage(s.ImageContext(), img) + if err != nil { + logrus.Debugf("error deleting image %s: %v", img, err) + continue + } + deleted = true + break + } + if !deleted && err != nil { + return nil, err + } + resp := &pb.RemoveImageResponse{} + logrus.Debugf("RemoveImageResponse: %+v", resp) + return resp, nil +} diff --git a/server/image_status.go b/server/image_status.go new file mode 100644 index 000000000..5571c3023 --- /dev/null +++ b/server/image_status.go @@ -0,0 +1,53 @@ +package server + +import ( + "fmt" + "strings" + + "github.com/containers/storage" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" +) + +// ImageStatus returns the status of the image. +func (s *Server) ImageStatus(ctx context.Context, req *pb.ImageStatusRequest) (*pb.ImageStatusResponse, error) { + logrus.Debugf("ImageStatusRequest: %+v", req) + image := "" + img := req.GetImage() + if img != nil { + image = img.Image + } + if image == "" { + return nil, fmt.Errorf("no image specified") + } + images, err := s.StorageImageServer().ResolveNames(image) + if err != nil { + // This means we got an image ID + if strings.Contains(err.Error(), "cannot specify 64-byte hexadecimal strings") { + images = append(images, image) + } else { + return nil, err + } + } + // match just the first registry as that's what kube meant + image = images[0] + status, err := s.StorageImageServer().ImageStatus(s.ImageContext(), image) + if err != nil { + if errors.Cause(err) == storage.ErrImageUnknown { + return &pb.ImageStatusResponse{}, nil + } + return nil, err + } + resp := &pb.ImageStatusResponse{ + Image: &pb.Image{ + Id: status.ID, + RepoTags: status.Names, + RepoDigests: status.Digests, + Size_: *status.Size, + }, + } + logrus.Debugf("ImageStatusResponse: %+v", resp) + return resp, nil +} diff --git a/server/inspect.go b/server/inspect.go new file mode 100644 index 000000000..6e3e813cc --- /dev/null +++ b/server/inspect.go @@ -0,0 +1,105 @@ +package server + +import ( + "encoding/json" + "errors" + "fmt" + "net/http" + + "github.com/go-zoo/bone" + "github.com/kubernetes-incubator/cri-o/libkpod/sandbox" + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/kubernetes-incubator/cri-o/types" + "github.com/sirupsen/logrus" +) + +func (s *Server) getInfo() types.CrioInfo { + return types.CrioInfo{ + StorageDriver: s.config.Config.Storage, + StorageRoot: s.config.Config.Root, + CgroupDriver: s.config.Config.CgroupManager, + } +} + +var ( + errCtrNotFound = errors.New("container not found") + errCtrStateNil = errors.New("container state is nil") + errSandboxNotFound = errors.New("sandbox for container not found") +) + +func (s *Server) getContainerInfo(id string, getContainerFunc func(id string) *oci.Container, getInfraContainerFunc func(id string) *oci.Container, getSandboxFunc func(id string) *sandbox.Sandbox) (types.ContainerInfo, error) { + ctr := getContainerFunc(id) + if ctr == nil { + ctr = getInfraContainerFunc(id) + if ctr == nil { + return types.ContainerInfo{}, errCtrNotFound + } + } + // TODO(mrunalp): should we call UpdateStatus()? + ctrState := ctr.State() + if ctrState == nil { + return types.ContainerInfo{}, errCtrStateNil + } + sb := getSandboxFunc(ctr.Sandbox()) + if sb == nil { + logrus.Debugf("can't find sandbox %s for container %s", ctr.Sandbox(), id) + return types.ContainerInfo{}, errSandboxNotFound + } + return types.ContainerInfo{ + Name: ctr.Name(), + Pid: ctrState.Pid, + Image: ctr.Image(), + CreatedTime: ctrState.Created.UnixNano(), + Labels: ctr.Labels(), + Annotations: ctr.Annotations(), + CrioAnnotations: ctr.CrioAnnotations(), + Root: ctr.MountPoint(), + LogPath: ctr.LogPath(), + Sandbox: ctr.Sandbox(), + IP: sb.IP(), + }, nil + +} + +// GetInfoMux returns the mux used to serve info requests +func (s *Server) GetInfoMux() *bone.Mux { + mux := bone.New() + + mux.Get("/info", http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { + ci := s.getInfo() + js, err := json.Marshal(ci) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + w.Header().Set("Content-Type", "application/json") + w.Write(js) + })) + + mux.Get("/containers/:id", http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { + containerID := bone.GetValue(req, "id") + ci, err := s.getContainerInfo(containerID, s.GetContainer, s.getInfraContainer, s.getSandbox) + if err != nil { + switch err { + case errCtrNotFound: + http.Error(w, fmt.Sprintf("can't find the container with id %s", containerID), http.StatusNotFound) + case errCtrStateNil: + http.Error(w, fmt.Sprintf("can't find container state for container with id %s", containerID), http.StatusInternalServerError) + case errSandboxNotFound: + http.Error(w, fmt.Sprintf("can't find the sandbox for container id %s", containerID), http.StatusNotFound) + default: + http.Error(w, err.Error(), http.StatusInternalServerError) + } + return + } + js, err := json.Marshal(ci) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + w.Header().Set("Content-Type", "application/json") + w.Write(js) + })) + + return mux +} diff --git a/server/inspect_test.go b/server/inspect_test.go new file mode 100644 index 000000000..7be46c4ed --- /dev/null +++ b/server/inspect_test.go @@ -0,0 +1,235 @@ +package server + +import ( + "testing" + "time" + + "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" + + "github.com/containernetworking/plugins/pkg/ns" + "github.com/kubernetes-incubator/cri-o/libkpod" + "github.com/kubernetes-incubator/cri-o/libkpod/sandbox" + "github.com/kubernetes-incubator/cri-o/oci" + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +func TestGetInfo(t *testing.T) { + c := libkpod.DefaultConfig() + c.RootConfig.Storage = "afoobarstorage" + c.RootConfig.Root = "afoobarroot" + c.RuntimeConfig.CgroupManager = "systemd" + apiConfig := APIConfig{} + s := &Server{ + config: Config{*c, apiConfig}, + } + ci := s.getInfo() + if ci.CgroupDriver != "systemd" { + t.Fatalf("expected 'systemd', got %q", ci.CgroupDriver) + } + if ci.StorageDriver != "afoobarstorage" { + t.Fatalf("expected 'afoobarstorage', got %q", ci.StorageDriver) + } + if ci.StorageRoot != "afoobarroot" { + t.Fatalf("expected 'afoobarroot', got %q", ci.StorageRoot) + } +} + +type mockNetNS struct { +} + +func (ns mockNetNS) Close() error { + return nil +} +func (ns mockNetNS) Fd() uintptr { + ptr := new(uintptr) + return *ptr +} +func (ns mockNetNS) Do(toRun func(ns.NetNS) error) error { + return nil +} +func (ns mockNetNS) Set() error { + return nil +} +func (ns mockNetNS) Path() string { + return "" +} + +func TestGetContainerInfo(t *testing.T) { + s := &Server{} + created := time.Now() + labels := map[string]string{ + "io.kubernetes.container.name": "POD", + "io.kubernetes.test2": "value2", + "io.kubernetes.test3": "value3", + } + annotations := map[string]string{ + "io.kubernetes.test": "value", + "io.kubernetes.test1": "value1", + } + getContainerFunc := func(id string) *oci.Container { + container, err := oci.NewContainer("testid", "testname", "", "/container/logs", mockNetNS{}, labels, annotations, annotations, "imageName", "imageName", "imageRef", &runtime.ContainerMetadata{}, "testsandboxid", false, false, false, false, false, "/root/for/container", created, "SIGKILL") + if err != nil { + t.Fatal(err) + } + container.SetMountPoint("/var/foo/container") + cstate := &oci.ContainerState{} + cstate.State = specs.State{ + Pid: 42, + } + cstate.Created = created + container.SetState(cstate) + return container + } + getInfraContainerFunc := func(id string) *oci.Container { + return nil + } + getSandboxFunc := func(id string) *sandbox.Sandbox { + s := &sandbox.Sandbox{} + s.AddIP("1.1.1.42") + return s + } + ci, err := s.getContainerInfo("", getContainerFunc, getInfraContainerFunc, getSandboxFunc) + if err != nil { + t.Fatal(err) + } + if ci.CreatedTime != created.UnixNano() { + t.Fatalf("expected same created time %d, got %d", created.UnixNano(), ci.CreatedTime) + } + if ci.Pid != 42 { + t.Fatalf("expected pid 42, got %v", ci.Pid) + } + if ci.Name != "testname" { + t.Fatalf("expected name testname, got %s", ci.Name) + } + if ci.Image != "imageName" { + t.Fatalf("expected image name imageName, got %s", ci.Image) + } + if ci.Root != "/var/foo/container" { + t.Fatalf("expected root to be /var/foo/container, got %s", ci.Root) + } + if ci.LogPath != "/container/logs" { + t.Fatalf("expected log path to be /containers/logs, got %s", ci.LogPath) + } + if ci.Sandbox != "testsandboxid" { + t.Fatalf("expected sandbox to be testsandboxid, got %s", ci.Sandbox) + } + if ci.IP != "1.1.1.42" { + t.Fatalf("expected ip 1.1.1.42, got %s", ci.IP) + } + if len(ci.Annotations) == 0 { + t.Fatal("annotations are empty") + } + if len(ci.Labels) == 0 { + t.Fatal("labels are empty") + } + if len(ci.Annotations) != len(annotations) { + t.Fatalf("container info annotations len (%d) isn't the same as original annotations len (%d)", len(ci.Annotations), len(annotations)) + } + if len(ci.Labels) != len(labels) { + t.Fatalf("container info labels len (%d) isn't the same as original labels len (%d)", len(ci.Labels), len(labels)) + } + var found bool + for k, v := range annotations { + found = false + for key, value := range ci.Annotations { + if k == key && v == value { + found = true + break + } + } + if !found { + t.Fatalf("key %s with value %v wasn't in container info annotations", k, v) + } + } + for k, v := range labels { + found = false + for key, value := range ci.Labels { + if k == key && v == value { + found = true + break + } + } + if !found { + t.Fatalf("key %s with value %v wasn't in container info labels", k, v) + } + } +} + +func TestGetContainerInfoCtrNotFound(t *testing.T) { + s := &Server{} + getContainerFunc := func(id string) *oci.Container { + return nil + } + getInfraContainerFunc := func(id string) *oci.Container { + return nil + } + getSandboxFunc := func(id string) *sandbox.Sandbox { + return nil + } + _, err := s.getContainerInfo("", getContainerFunc, getInfraContainerFunc, getSandboxFunc) + if err == nil { + t.Fatal("expected an error but got nothing") + } + if err != errCtrNotFound { + t.Fatalf("expected errCtrNotFound error, got %v", err) + } +} + +func TestGetContainerInfoCtrStateNil(t *testing.T) { + s := &Server{} + created := time.Now() + labels := map[string]string{} + annotations := map[string]string{} + getContainerFunc := func(id string) *oci.Container { + container, err := oci.NewContainer("testid", "testname", "", "/container/logs", mockNetNS{}, labels, annotations, annotations, "imageName", "imageName", "imageRef", &runtime.ContainerMetadata{}, "testsandboxid", false, false, false, false, false, "/root/for/container", created, "SIGKILL") + if err != nil { + t.Fatal(err) + } + container.SetMountPoint("/var/foo/container") + container.SetState(nil) + return container + } + getInfraContainerFunc := func(id string) *oci.Container { + return nil + } + getSandboxFunc := func(id string) *sandbox.Sandbox { + s := &sandbox.Sandbox{} + s.AddIP("1.1.1.42") + return s + } + _, err := s.getContainerInfo("", getContainerFunc, getInfraContainerFunc, getSandboxFunc) + if err == nil { + t.Fatal("expected an error but got nothing") + } + if err != errCtrStateNil { + t.Fatalf("expected errCtrStateNil error, got %v", err) + } +} + +func TestGetContainerInfoSandboxNotFound(t *testing.T) { + s := &Server{} + created := time.Now() + labels := map[string]string{} + annotations := map[string]string{} + getContainerFunc := func(id string) *oci.Container { + container, err := oci.NewContainer("testid", "testname", "", "/container/logs", mockNetNS{}, labels, annotations, annotations, "imageName", "imageName", "imageRef", &runtime.ContainerMetadata{}, "testsandboxid", false, false, false, false, false, "/root/for/container", created, "SIGKILL") + if err != nil { + t.Fatal(err) + } + container.SetMountPoint("/var/foo/container") + return container + } + getInfraContainerFunc := func(id string) *oci.Container { + return nil + } + getSandboxFunc := func(id string) *sandbox.Sandbox { + return nil + } + _, err := s.getContainerInfo("", getContainerFunc, getInfraContainerFunc, getSandboxFunc) + if err == nil { + t.Fatal("expected an error but got nothing") + } + if err != errSandboxNotFound { + t.Fatalf("expected errSandboxNotFound error, got %v", err) + } +} diff --git a/server/naming.go b/server/naming.go new file mode 100644 index 000000000..cc18ba409 --- /dev/null +++ b/server/naming.go @@ -0,0 +1,86 @@ +package server + +import ( + "fmt" + "strings" + + "github.com/docker/docker/pkg/stringid" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" +) + +const ( + kubePrefix = "k8s" + infraName = "POD" + nameDelimiter = "_" +) + +func makeSandboxName(sandboxConfig *pb.PodSandboxConfig) string { + return strings.Join([]string{ + kubePrefix, + sandboxConfig.Metadata.Name, + sandboxConfig.Metadata.Namespace, + sandboxConfig.Metadata.Uid, + fmt.Sprintf("%d", sandboxConfig.Metadata.Attempt), + }, nameDelimiter) +} + +func makeSandboxContainerName(sandboxConfig *pb.PodSandboxConfig) string { + return strings.Join([]string{ + kubePrefix, + infraName, + sandboxConfig.Metadata.Name, + sandboxConfig.Metadata.Namespace, + sandboxConfig.Metadata.Uid, + fmt.Sprintf("%d", sandboxConfig.Metadata.Attempt), + }, nameDelimiter) +} + +func makeContainerName(sandboxMetadata *pb.PodSandboxMetadata, containerConfig *pb.ContainerConfig) string { + return strings.Join([]string{ + kubePrefix, + containerConfig.Metadata.Name, + sandboxMetadata.Name, + sandboxMetadata.Namespace, + sandboxMetadata.Uid, + fmt.Sprintf("%d", containerConfig.Metadata.Attempt), + }, nameDelimiter) +} + +func (s *Server) generatePodIDandName(sandboxConfig *pb.PodSandboxConfig) (string, string, error) { + var ( + err error + id = stringid.GenerateNonCryptoID() + ) + if sandboxConfig.Metadata.Namespace == "" { + return "", "", fmt.Errorf("cannot generate pod ID without namespace") + } + name, err := s.ReservePodName(id, makeSandboxName(sandboxConfig)) + if err != nil { + return "", "", err + } + return id, name, err +} + +func (s *Server) generateContainerIDandNameForSandbox(sandboxConfig *pb.PodSandboxConfig) (string, string, error) { + var ( + err error + id = stringid.GenerateNonCryptoID() + ) + name, err := s.ReserveContainerName(id, makeSandboxContainerName(sandboxConfig)) + if err != nil { + return "", "", err + } + return id, name, err +} + +func (s *Server) generateContainerIDandName(sandboxMetadata *pb.PodSandboxMetadata, containerConfig *pb.ContainerConfig) (string, string, error) { + var ( + err error + id = stringid.GenerateNonCryptoID() + ) + name, err := s.ReserveContainerName(id, makeContainerName(sandboxMetadata, containerConfig)) + if err != nil { + return "", "", err + } + return id, name, err +} diff --git a/server/runtime_status.go b/server/runtime_status.go new file mode 100644 index 000000000..67fc87b6f --- /dev/null +++ b/server/runtime_status.go @@ -0,0 +1,41 @@ +package server + +import ( + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" +) + +// Status returns the status of the runtime +func (s *Server) Status(ctx context.Context, req *pb.StatusRequest) (*pb.StatusResponse, error) { + + // Deal with Runtime conditions + runtimeReady, err := s.Runtime().RuntimeReady() + if err != nil { + return nil, err + } + networkReady, err := s.Runtime().NetworkReady() + if err != nil { + return nil, err + } + + // Use vendored strings + runtimeReadyConditionString := pb.RuntimeReady + networkReadyConditionString := pb.NetworkReady + + resp := &pb.StatusResponse{ + Status: &pb.RuntimeStatus{ + Conditions: []*pb.RuntimeCondition{ + { + Type: runtimeReadyConditionString, + Status: runtimeReady, + }, + { + Type: networkReadyConditionString, + Status: networkReady, + }, + }, + }, + } + + return resp, nil +} diff --git a/server/sandbox_list.go b/server/sandbox_list.go new file mode 100644 index 000000000..fbc5eafa8 --- /dev/null +++ b/server/sandbox_list.go @@ -0,0 +1,94 @@ +package server + +import ( + "github.com/kubernetes-incubator/cri-o/libkpod/sandbox" + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/sirupsen/logrus" + "golang.org/x/net/context" + "k8s.io/apimachinery/pkg/fields" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" +) + +// filterSandbox returns whether passed container matches filtering criteria +func filterSandbox(p *pb.PodSandbox, filter *pb.PodSandboxFilter) bool { + if filter != nil { + if filter.State != nil { + if p.State != filter.State.State { + return false + } + } + if filter.LabelSelector != nil { + sel := fields.SelectorFromSet(filter.LabelSelector) + if !sel.Matches(fields.Set(p.Labels)) { + return false + } + } + } + return true +} + +// ListPodSandbox returns a list of SandBoxes. +func (s *Server) ListPodSandbox(ctx context.Context, req *pb.ListPodSandboxRequest) (*pb.ListPodSandboxResponse, error) { + logrus.Debugf("ListPodSandboxRequest %+v", req) + var pods []*pb.PodSandbox + var podList []*sandbox.Sandbox + for _, sb := range s.ContainerServer.ListSandboxes() { + podList = append(podList, sb) + } + + filter := req.Filter + // Filter by pod id first. + if filter != nil { + if filter.Id != "" { + id, err := s.PodIDIndex().Get(filter.Id) + if err != nil { + // Not finding an ID in a filtered list should not be considered + // and error; it might have been deleted when stop was done. + // Log and return an empty struct. + logrus.Warn("unable to find pod %s with filter", filter.Id) + return &pb.ListPodSandboxResponse{}, nil + } + sb := s.getSandbox(id) + if sb == nil { + podList = []*sandbox.Sandbox{} + } else { + podList = []*sandbox.Sandbox{sb} + } + } + } + + for _, sb := range podList { + podInfraContainer := sb.InfraContainer() + if podInfraContainer == nil { + // this can't really happen, but if it does because of a bug + // it's better not to panic + continue + } + cState := s.Runtime().ContainerStatus(podInfraContainer) + created := cState.Created.UnixNano() + rStatus := pb.PodSandboxState_SANDBOX_NOTREADY + if cState.Status == oci.ContainerStateRunning { + rStatus = pb.PodSandboxState_SANDBOX_READY + } + + pod := &pb.PodSandbox{ + Id: sb.ID(), + CreatedAt: created, + State: rStatus, + Labels: sb.Labels(), + Annotations: sb.Annotations(), + Metadata: sb.Metadata(), + } + + // Filter by other criteria such as state and labels. + if filterSandbox(pod, req.Filter) { + pods = append(pods, pod) + } + } + + resp := &pb.ListPodSandboxResponse{ + Items: pods, + } + logrus.Debugf("ListPodSandboxResponse %+v", resp) + return resp, nil +} diff --git a/server/sandbox_network.go b/server/sandbox_network.go new file mode 100644 index 000000000..15cf99c8f --- /dev/null +++ b/server/sandbox_network.go @@ -0,0 +1,70 @@ +package server + +import ( + "fmt" + "net" + + "github.com/kubernetes-incubator/cri-o/libkpod/sandbox" + "github.com/sirupsen/logrus" + "k8s.io/kubernetes/pkg/kubelet/network/hostport" +) + +// networkStart sets up the sandbox's network and returns the pod IP on success +// or an error +func (s *Server) networkStart(hostNetwork bool, sb *sandbox.Sandbox) (string, error) { + if hostNetwork { + return s.BindAddress(), nil + } + + podNetwork := newPodNetwork(sb) + err := s.netPlugin.SetUpPod(podNetwork) + if err != nil { + return "", fmt.Errorf("failed to create pod network sandbox %s(%s): %v", sb.Name(), sb.ID(), err) + } + + var ip string + if ip, err = s.netPlugin.GetPodNetworkStatus(podNetwork); err != nil { + return "", fmt.Errorf("failed to get network status for pod sandbox %s(%s): %v", sb.Name(), sb.ID(), err) + } + + if len(sb.PortMappings()) > 0 { + ip4 := net.ParseIP(ip).To4() + if ip4 == nil { + return "", fmt.Errorf("failed to get valid ipv4 address for sandbox %s(%s)", sb.Name(), sb.ID()) + } + + if err = s.hostportManager.Add(sb.ID(), &hostport.PodPortMapping{ + Name: sb.Name(), + PortMappings: sb.PortMappings(), + IP: ip4, + HostNetwork: false, + }, "lo"); err != nil { + return "", fmt.Errorf("failed to add hostport mapping for sandbox %s(%s): %v", sb.Name(), sb.ID(), err) + } + + } + return ip, nil +} + +// networkStop cleans up and removes a pod's network. It is best-effort and +// must call the network plugin even if the network namespace is already gone +func (s *Server) networkStop(hostNetwork bool, sb *sandbox.Sandbox) error { + if !hostNetwork { + if err := s.hostportManager.Remove(sb.ID(), &hostport.PodPortMapping{ + Name: sb.Name(), + PortMappings: sb.PortMappings(), + HostNetwork: false, + }); err != nil { + logrus.Warnf("failed to remove hostport for pod sandbox %s(%s): %v", + sb.Name(), sb.ID(), err) + } + + podNetwork := newPodNetwork(sb) + if err := s.netPlugin.TearDownPod(podNetwork); err != nil { + logrus.Warnf("failed to destroy network for pod sandbox %s(%s): %v", + sb.Name(), sb.ID(), err) + } + } + + return nil +} diff --git a/server/sandbox_remove.go b/server/sandbox_remove.go new file mode 100644 index 000000000..b0e073844 --- /dev/null +++ b/server/sandbox_remove.go @@ -0,0 +1,98 @@ +package server + +import ( + "fmt" + + "github.com/containers/storage" + "github.com/kubernetes-incubator/cri-o/libkpod/sandbox" + "github.com/kubernetes-incubator/cri-o/oci" + pkgstorage "github.com/kubernetes-incubator/cri-o/pkg/storage" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" +) + +// RemovePodSandbox deletes the sandbox. If there are any running containers in the +// sandbox, they should be force deleted. +func (s *Server) RemovePodSandbox(ctx context.Context, req *pb.RemovePodSandboxRequest) (*pb.RemovePodSandboxResponse, error) { + logrus.Debugf("RemovePodSandboxRequest %+v", req) + sb, err := s.getPodSandboxFromRequest(req.PodSandboxId) + if err != nil { + if err == sandbox.ErrIDEmpty { + return nil, err + } + + // If the sandbox isn't found we just return an empty response to adhere + // the the CRI interface which expects to not error out in not found + // cases. + + resp := &pb.RemovePodSandboxResponse{} + logrus.Warnf("could not get sandbox %s, it's probably been removed already: %v", req.PodSandboxId, err) + return resp, nil + } + + podInfraContainer := sb.InfraContainer() + containers := sb.Containers().List() + containers = append(containers, podInfraContainer) + + // Delete all the containers in the sandbox + for _, c := range containers { + if !sb.Stopped() { + cState := s.Runtime().ContainerStatus(c) + if cState.Status == oci.ContainerStateCreated || cState.Status == oci.ContainerStateRunning { + if err := s.Runtime().StopContainer(ctx, c, 10); err != nil { + // Assume container is already stopped + logrus.Warnf("failed to stop container %s: %v", c.Name(), err) + } + } + } + + if err := s.Runtime().DeleteContainer(c); err != nil { + return nil, fmt.Errorf("failed to delete container %s in pod sandbox %s: %v", c.Name(), sb.ID(), err) + } + + if c.ID() == podInfraContainer.ID() { + continue + } + + if err := s.StorageRuntimeServer().StopContainer(c.ID()); err != nil && err != storage.ErrContainerUnknown { + // assume container already umounted + logrus.Warnf("failed to stop container %s in pod sandbox %s: %v", c.Name(), sb.ID(), err) + } + if err := s.StorageRuntimeServer().DeleteContainer(c.ID()); err != nil && err != storage.ErrContainerUnknown { + return nil, fmt.Errorf("failed to delete container %s in pod sandbox %s: %v", c.Name(), sb.ID(), err) + } + + s.ReleaseContainerName(c.Name()) + s.removeContainer(c) + if err := s.CtrIDIndex().Delete(c.ID()); err != nil { + return nil, fmt.Errorf("failed to delete container %s in pod sandbox %s from index: %v", c.Name(), sb.ID(), err) + } + } + + s.removeInfraContainer(podInfraContainer) + + // Remove the files related to the sandbox + if err := s.StorageRuntimeServer().StopContainer(sb.ID()); err != nil && errors.Cause(err) != storage.ErrContainerUnknown { + logrus.Warnf("failed to stop sandbox container in pod sandbox %s: %v", sb.ID(), err) + } + if err := s.StorageRuntimeServer().RemovePodSandbox(sb.ID()); err != nil && err != pkgstorage.ErrInvalidSandboxID { + return nil, fmt.Errorf("failed to remove pod sandbox %s: %v", sb.ID(), err) + } + + s.ReleaseContainerName(podInfraContainer.Name()) + if err := s.CtrIDIndex().Delete(podInfraContainer.ID()); err != nil { + return nil, fmt.Errorf("failed to delete infra container %s in pod sandbox %s from index: %v", podInfraContainer.ID(), sb.ID(), err) + } + + s.ReleasePodName(sb.Name()) + s.removeSandbox(sb.ID()) + if err := s.PodIDIndex().Delete(sb.ID()); err != nil { + return nil, fmt.Errorf("failed to delete pod sandbox %s from index: %v", sb.ID(), err) + } + + resp := &pb.RemovePodSandboxResponse{} + logrus.Debugf("RemovePodSandboxResponse %+v", resp) + return resp, nil +} diff --git a/server/sandbox_run.go b/server/sandbox_run.go new file mode 100644 index 000000000..72afdb229 --- /dev/null +++ b/server/sandbox_run.go @@ -0,0 +1,615 @@ +package server + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "os" + "path" + "path/filepath" + "regexp" + "strconv" + "strings" + "time" + + "github.com/containers/storage" + "github.com/kubernetes-incubator/cri-o/libkpod/sandbox" + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/kubernetes-incubator/cri-o/pkg/annotations" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/runtime-tools/generate" + "github.com/opencontainers/selinux/go-selinux/label" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "golang.org/x/net/context" + "golang.org/x/sys/unix" + "k8s.io/kubernetes/pkg/api/v1" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" + "k8s.io/kubernetes/pkg/kubelet/leaky" + "k8s.io/kubernetes/pkg/kubelet/network/hostport" + "k8s.io/kubernetes/pkg/kubelet/types" +) + +const ( + // PodInfraOOMAdj is the value that we set for oom score adj for + // the pod infra container. + // TODO: Remove this const once this value is provided over CRI + // See https://github.com/kubernetes/kubernetes/issues/47938 + PodInfraOOMAdj int = -998 + // PodInfraCPUshares is default cpu shares for sandbox container. + PodInfraCPUshares = 2 +) + +// privilegedSandbox returns true if the sandbox configuration +// requires additional host privileges for the sandbox. +func (s *Server) privilegedSandbox(req *pb.RunPodSandboxRequest) bool { + securityContext := req.GetConfig().GetLinux().GetSecurityContext() + if securityContext == nil { + return false + } + + if securityContext.Privileged { + return true + } + + namespaceOptions := securityContext.GetNamespaceOptions() + if namespaceOptions == nil { + return false + } + + if namespaceOptions.HostNetwork || + namespaceOptions.HostPid || + namespaceOptions.HostIpc { + return true + } + + return false +} + +// trustedSandbox returns true if the sandbox will run trusted workloads. +func (s *Server) trustedSandbox(req *pb.RunPodSandboxRequest) bool { + kubeAnnotations := req.GetConfig().GetAnnotations() + + trustedAnnotation, ok := kubeAnnotations[annotations.TrustedSandbox] + if !ok { + // A sandbox is trusted by default. + return true + } + + return isTrue(trustedAnnotation) +} + +func (s *Server) runContainer(container *oci.Container, cgroupParent string) error { + if err := s.Runtime().CreateContainer(container, cgroupParent); err != nil { + return err + } + return s.Runtime().StartContainer(container) +} + +var ( + conflictRE = regexp.MustCompile(`already reserved for pod "([0-9a-z]+)"`) +) + +// RunPodSandbox creates and runs a pod-level sandbox. +func (s *Server) RunPodSandbox(ctx context.Context, req *pb.RunPodSandboxRequest) (resp *pb.RunPodSandboxResponse, err error) { + s.updateLock.RLock() + defer s.updateLock.RUnlock() + + logrus.Debugf("RunPodSandboxRequest %+v", req) + var processLabel, mountLabel, resolvPath string + // process req.Name + kubeName := req.GetConfig().GetMetadata().Name + if kubeName == "" { + return nil, fmt.Errorf("PodSandboxConfig.Name should not be empty") + } + + namespace := req.GetConfig().GetMetadata().Namespace + attempt := req.GetConfig().GetMetadata().Attempt + + id, name, err := s.generatePodIDandName(req.GetConfig()) + if err != nil { + if strings.Contains(err.Error(), "already reserved for pod") { + matches := conflictRE.FindStringSubmatch(err.Error()) + if len(matches) != 2 { + return nil, err + } + dupID := matches[1] + if _, err := s.StopPodSandbox(ctx, &pb.StopPodSandboxRequest{PodSandboxId: dupID}); err != nil { + return nil, err + } + if _, err := s.RemovePodSandbox(ctx, &pb.RemovePodSandboxRequest{PodSandboxId: dupID}); err != nil { + return nil, err + } + id, name, err = s.generatePodIDandName(req.GetConfig()) + if err != nil { + return nil, err + } + } else { + return nil, err + } + } + + defer func() { + if err != nil { + s.ReleasePodName(name) + } + }() + + _, containerName, err := s.generateContainerIDandNameForSandbox(req.GetConfig()) + if err != nil { + return nil, err + } + + defer func() { + if err != nil { + s.ReleaseContainerName(containerName) + } + }() + + podContainer, err := s.StorageRuntimeServer().CreatePodSandbox(s.ImageContext(), + name, id, + s.config.PauseImage, "", + containerName, + req.GetConfig().GetMetadata().Name, + req.GetConfig().GetMetadata().Uid, + namespace, + attempt, + nil) + if errors.Cause(err) == storage.ErrDuplicateName { + return nil, fmt.Errorf("pod sandbox with name %q already exists", name) + } + if err != nil { + return nil, fmt.Errorf("error creating pod sandbox with name %q: %v", name, err) + } + defer func() { + if err != nil { + if err2 := s.StorageRuntimeServer().RemovePodSandbox(id); err2 != nil { + logrus.Warnf("couldn't cleanup pod sandbox %q: %v", id, err2) + } + } + }() + + // TODO: factor generating/updating the spec into something other projects can vendor + + // creates a spec Generator with the default spec. + g := generate.New() + + // setup defaults for the pod sandbox + g.SetRootReadonly(true) + if s.config.PauseCommand == "" { + if podContainer.Config != nil { + g.SetProcessArgs(podContainer.Config.Config.Cmd) + } else { + g.SetProcessArgs([]string{sandbox.PodInfraCommand}) + } + } else { + g.SetProcessArgs([]string{s.config.PauseCommand}) + } + + // set DNS options + if req.GetConfig().GetDnsConfig() != nil { + dnsServers := req.GetConfig().GetDnsConfig().Servers + dnsSearches := req.GetConfig().GetDnsConfig().Searches + dnsOptions := req.GetConfig().GetDnsConfig().Options + resolvPath = fmt.Sprintf("%s/resolv.conf", podContainer.RunDir) + err = parseDNSOptions(dnsServers, dnsSearches, dnsOptions, resolvPath) + if err != nil { + err1 := removeFile(resolvPath) + if err1 != nil { + err = err1 + return nil, fmt.Errorf("%v; failed to remove %s: %v", err, resolvPath, err1) + } + return nil, err + } + if err := label.Relabel(resolvPath, mountLabel, true); err != nil && err != unix.ENOTSUP { + return nil, err + } + + g.AddBindMount(resolvPath, "/etc/resolv.conf", []string{"ro"}) + } + + // add metadata + metadata := req.GetConfig().GetMetadata() + metadataJSON, err := json.Marshal(metadata) + if err != nil { + return nil, err + } + + // add labels + labels := req.GetConfig().GetLabels() + + // Add special container name label for the infra container + labelsJSON := []byte{} + if labels != nil { + labels[types.KubernetesContainerNameLabel] = leaky.PodInfraContainerName + labelsJSON, err = json.Marshal(labels) + if err != nil { + return nil, err + } + } + + // add annotations + kubeAnnotations := req.GetConfig().GetAnnotations() + kubeAnnotationsJSON, err := json.Marshal(kubeAnnotations) + if err != nil { + return nil, err + } + + // set log directory + logDir := req.GetConfig().LogDirectory + if logDir == "" { + logDir = filepath.Join(s.config.LogDir, id) + } + if err = os.MkdirAll(logDir, 0700); err != nil { + return nil, err + } + // This should always be absolute from k8s. + if !filepath.IsAbs(logDir) { + return nil, fmt.Errorf("requested logDir for sbox id %s is a relative path: %s", id, logDir) + } + + privileged := s.privilegedSandbox(req) + + securityContext := req.GetConfig().GetLinux().GetSecurityContext() + if securityContext == nil { + logrus.Warn("no security context found in config.") + } + + processLabel, mountLabel, err = getSELinuxLabels(securityContext.GetSelinuxOptions(), privileged) + if err != nil { + return nil, err + } + + // Don't use SELinux separation with Host Pid or IPC Namespace or privileged. + if securityContext.GetNamespaceOptions().GetHostPid() || securityContext.GetNamespaceOptions().GetHostIpc() { + processLabel, mountLabel = "", "" + } + g.SetProcessSelinuxLabel(processLabel) + g.SetLinuxMountLabel(mountLabel) + + // create shm mount for the pod containers. + var shmPath string + if securityContext.GetNamespaceOptions().GetHostIpc() { + shmPath = "/dev/shm" + } else { + shmPath, err = setupShm(podContainer.RunDir, mountLabel) + if err != nil { + return nil, err + } + defer func() { + if err != nil { + if err2 := unix.Unmount(shmPath, unix.MNT_DETACH); err2 != nil { + logrus.Warnf("failed to unmount shm for pod: %v", err2) + } + } + }() + } + + err = s.setPodSandboxMountLabel(id, mountLabel) + if err != nil { + return nil, err + } + + if err = s.CtrIDIndex().Add(id); err != nil { + return nil, err + } + + defer func() { + if err != nil { + if err2 := s.CtrIDIndex().Delete(id); err2 != nil { + logrus.Warnf("couldn't delete ctr id %s from idIndex", id) + } + } + }() + + // set log path inside log directory + logPath := filepath.Join(logDir, id+".log") + + // Handle https://issues.k8s.io/44043 + if err := ensureSaneLogPath(logPath); err != nil { + return nil, err + } + + hostNetwork := securityContext.GetNamespaceOptions().GetHostNetwork() + + hostname, err := getHostname(id, req.GetConfig().Hostname, hostNetwork) + if err != nil { + return nil, err + } + g.SetHostname(hostname) + + trusted := s.trustedSandbox(req) + g.AddAnnotation(annotations.Metadata, string(metadataJSON)) + g.AddAnnotation(annotations.Labels, string(labelsJSON)) + g.AddAnnotation(annotations.Annotations, string(kubeAnnotationsJSON)) + g.AddAnnotation(annotations.LogPath, logPath) + g.AddAnnotation(annotations.Name, name) + g.AddAnnotation(annotations.ContainerType, annotations.ContainerTypeSandbox) + g.AddAnnotation(annotations.SandboxID, id) + g.AddAnnotation(annotations.ContainerName, containerName) + g.AddAnnotation(annotations.ContainerID, id) + g.AddAnnotation(annotations.ShmPath, shmPath) + g.AddAnnotation(annotations.PrivilegedRuntime, fmt.Sprintf("%v", privileged)) + g.AddAnnotation(annotations.TrustedSandbox, fmt.Sprintf("%v", trusted)) + g.AddAnnotation(annotations.ResolvPath, resolvPath) + g.AddAnnotation(annotations.HostName, hostname) + g.AddAnnotation(annotations.KubeName, kubeName) + if podContainer.Config.Config.StopSignal != "" { + // this key is defined in image-spec conversion document at https://github.com/opencontainers/image-spec/pull/492/files#diff-8aafbe2c3690162540381b8cdb157112R57 + g.AddAnnotation("org.opencontainers.image.stopSignal", podContainer.Config.Config.StopSignal) + } + + created := time.Now() + g.AddAnnotation(annotations.Created, created.Format(time.RFC3339Nano)) + + portMappings := convertPortMappings(req.GetConfig().GetPortMappings()) + + // setup cgroup settings + cgroupParent := req.GetConfig().GetLinux().GetCgroupParent() + if cgroupParent != "" { + if s.config.CgroupManager == oci.SystemdCgroupsManager { + if len(cgroupParent) <= 6 || !strings.HasSuffix(path.Base(cgroupParent), ".slice") { + return nil, fmt.Errorf("cri-o configured with systemd cgroup manager, but did not receive slice as parent: %s", cgroupParent) + } + cgPath, err := convertCgroupFsNameToSystemd(cgroupParent) + if err != nil { + return nil, err + } + g.SetLinuxCgroupsPath(cgPath + ":" + "crio" + ":" + id) + cgroupParent = cgPath + } else { + if strings.HasSuffix(path.Base(cgroupParent), ".slice") { + return nil, fmt.Errorf("cri-o configured with cgroupfs cgroup manager, but received systemd slice as parent: %s", cgroupParent) + } + cgPath := filepath.Join(cgroupParent, scopePrefix+"-"+id) + g.SetLinuxCgroupsPath(cgPath) + } + } + + sb, err := sandbox.New(id, namespace, name, kubeName, logDir, labels, kubeAnnotations, processLabel, mountLabel, metadata, shmPath, cgroupParent, privileged, trusted, resolvPath, hostname, portMappings) + if err != nil { + return nil, err + } + + s.addSandbox(sb) + defer func() { + if err != nil { + s.removeSandbox(id) + } + }() + + if err = s.PodIDIndex().Add(id); err != nil { + return nil, err + } + + defer func() { + if err != nil { + if err := s.PodIDIndex().Delete(id); err != nil { + logrus.Warnf("couldn't delete pod id %s from idIndex", id) + } + } + }() + + for k, v := range kubeAnnotations { + g.AddAnnotation(k, v) + } + for k, v := range labels { + g.AddAnnotation(k, v) + } + + // extract linux sysctls from annotations and pass down to oci runtime + safe, unsafe, err := SysctlsFromPodAnnotations(kubeAnnotations) + if err != nil { + return nil, err + } + for _, sysctl := range safe { + g.AddLinuxSysctl(sysctl.Name, sysctl.Value) + } + for _, sysctl := range unsafe { + g.AddLinuxSysctl(sysctl.Name, sysctl.Value) + } + + // Set OOM score adjust of the infra container to be very low + // so it doesn't get killed. + g.SetProcessOOMScoreAdj(PodInfraOOMAdj) + + g.SetLinuxResourcesCPUShares(PodInfraCPUshares) + + // set up namespaces + if hostNetwork { + err = g.RemoveLinuxNamespace(string(runtimespec.NetworkNamespace)) + if err != nil { + return nil, err + } + } else { + // Create the sandbox network namespace + if err = sb.NetNsCreate(); err != nil { + return nil, err + } + + defer func() { + if err == nil { + return + } + + if netnsErr := sb.NetNsRemove(); netnsErr != nil { + logrus.Warnf("Failed to remove networking namespace: %v", netnsErr) + } + }() + + // Pass the created namespace path to the runtime + err = g.AddOrReplaceLinuxNamespace(string(runtimespec.NetworkNamespace), sb.NetNsPath()) + if err != nil { + return nil, err + } + } + + if securityContext.GetNamespaceOptions().GetHostPid() { + err = g.RemoveLinuxNamespace(string(runtimespec.PIDNamespace)) + if err != nil { + return nil, err + } + } + + if securityContext.GetNamespaceOptions().GetHostIpc() { + err = g.RemoveLinuxNamespace(string(runtimespec.IPCNamespace)) + if err != nil { + return nil, err + } + } + + if !s.seccompEnabled { + g.Spec().Linux.Seccomp = nil + } + + saveOptions := generate.ExportOptions{} + mountPoint, err := s.StorageRuntimeServer().StartContainer(id) + if err != nil { + return nil, fmt.Errorf("failed to mount container %s in pod sandbox %s(%s): %v", containerName, sb.Name(), id, err) + } + g.AddAnnotation(annotations.MountPoint, mountPoint) + g.SetRootPath(mountPoint) + + hostnamePath := fmt.Sprintf("%s/hostname", podContainer.RunDir) + if err := ioutil.WriteFile(hostnamePath, []byte(hostname+"\n"), 0644); err != nil { + return nil, err + } + if err := label.Relabel(hostnamePath, mountLabel, true); err != nil && err != unix.ENOTSUP { + return nil, err + } + g.AddBindMount(hostnamePath, "/etc/hostname", []string{"ro"}) + g.AddAnnotation(annotations.HostnamePath, hostnamePath) + sb.AddHostnamePath(hostnamePath) + + container, err := oci.NewContainer(id, containerName, podContainer.RunDir, logPath, sb.NetNs(), labels, g.Spec().Annotations, kubeAnnotations, "", "", "", nil, id, false, false, false, sb.Privileged(), sb.Trusted(), podContainer.Dir, created, podContainer.Config.Config.StopSignal) + if err != nil { + return nil, err + } + container.SetSpec(g.Spec()) + container.SetMountPoint(mountPoint) + + sb.SetInfraContainer(container) + + var ip string + ip, err = s.networkStart(hostNetwork, sb) + if err != nil { + return nil, err + } + defer func() { + if err != nil { + s.networkStop(hostNetwork, sb) + } + }() + + g.AddAnnotation(annotations.IP, ip) + sb.AddIP(ip) + + err = g.SaveToFile(filepath.Join(podContainer.Dir, "config.json"), saveOptions) + if err != nil { + return nil, fmt.Errorf("failed to save template configuration for pod sandbox %s(%s): %v", sb.Name(), id, err) + } + if err = g.SaveToFile(filepath.Join(podContainer.RunDir, "config.json"), saveOptions); err != nil { + return nil, fmt.Errorf("failed to write runtime configuration for pod sandbox %s(%s): %v", sb.Name(), id, err) + } + + if err = s.runContainer(container, sb.CgroupParent()); err != nil { + return nil, err + } + + s.addInfraContainer(container) + + s.ContainerStateToDisk(container) + + resp = &pb.RunPodSandboxResponse{PodSandboxId: id} + logrus.Debugf("RunPodSandboxResponse: %+v", resp) + return resp, nil +} + +func convertPortMappings(in []*pb.PortMapping) []*hostport.PortMapping { + if in == nil { + return nil + } + out := make([]*hostport.PortMapping, len(in)) + for i, v := range in { + out[i] = &hostport.PortMapping{ + HostPort: v.HostPort, + ContainerPort: v.ContainerPort, + Protocol: v1.Protocol(v.Protocol.String()), + HostIP: v.HostIp, + } + } + return out +} + +func getHostname(id, hostname string, hostNetwork bool) (string, error) { + if hostNetwork { + if hostname == "" { + h, err := os.Hostname() + if err != nil { + return "", err + } + hostname = h + } + } else { + if hostname == "" { + hostname = id[:12] + } + } + return hostname, nil +} + +func (s *Server) setPodSandboxMountLabel(id, mountLabel string) error { + storageMetadata, err := s.StorageRuntimeServer().GetContainerMetadata(id) + if err != nil { + return err + } + storageMetadata.SetMountLabel(mountLabel) + return s.StorageRuntimeServer().SetContainerMetadata(id, storageMetadata) +} + +func getSELinuxLabels(selinuxOptions *pb.SELinuxOption, privileged bool) (processLabel string, mountLabel string, err error) { + if privileged { + return "", "", nil + } + labels := []string{} + if selinuxOptions != nil { + if selinuxOptions.User != "" { + labels = append(labels, "user:"+selinuxOptions.User) + } + if selinuxOptions.Role != "" { + labels = append(labels, "role:"+selinuxOptions.Role) + } + if selinuxOptions.Type != "" { + labels = append(labels, "type:"+selinuxOptions.Type) + } + if selinuxOptions.Level != "" { + labels = append(labels, "level:"+selinuxOptions.Level) + } + } + return label.InitLabels(labels) +} + +func setupShm(podSandboxRunDir, mountLabel string) (shmPath string, err error) { + shmPath = filepath.Join(podSandboxRunDir, "shm") + if err = os.Mkdir(shmPath, 0700); err != nil { + return "", err + } + shmOptions := "mode=1777,size=" + strconv.Itoa(sandbox.DefaultShmSize) + if err = unix.Mount("shm", shmPath, "tmpfs", unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV, + label.FormatMountLabel(shmOptions, mountLabel)); err != nil { + return "", fmt.Errorf("failed to mount shm tmpfs for pod: %v", err) + } + return shmPath, nil +} + +// convertCgroupFsNameToSystemd converts an expanded cgroupfs name to its systemd name. +// For example, it will convert test.slice/test-a.slice/test-a-b.slice to become test-a-b.slice +// NOTE: this is public right now to allow its usage in dockermanager and dockershim, ideally both those +// code areas could use something from libcontainer if we get this style function upstream. +func convertCgroupFsNameToSystemd(cgroupfsName string) (string, error) { + // TODO: see if libcontainer systemd implementation could use something similar, and if so, move + // this function up to that library. At that time, it would most likely do validation specific to systemd + // above and beyond the simple assumption here that the base of the path encodes the hierarchy + // per systemd convention. + return path.Base(cgroupfsName), nil +} diff --git a/server/sandbox_status.go b/server/sandbox_status.go new file mode 100644 index 000000000..f5b6dd09a --- /dev/null +++ b/server/sandbox_status.go @@ -0,0 +1,41 @@ +package server + +import ( + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/sirupsen/logrus" + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" +) + +// PodSandboxStatus returns the Status of the PodSandbox. +func (s *Server) PodSandboxStatus(ctx context.Context, req *pb.PodSandboxStatusRequest) (*pb.PodSandboxStatusResponse, error) { + logrus.Debugf("PodSandboxStatusRequest %+v", req) + sb, err := s.getPodSandboxFromRequest(req.PodSandboxId) + if err != nil { + return nil, err + } + + podInfraContainer := sb.InfraContainer() + cState := s.Runtime().ContainerStatus(podInfraContainer) + + rStatus := pb.PodSandboxState_SANDBOX_NOTREADY + if cState.Status == oci.ContainerStateRunning { + rStatus = pb.PodSandboxState_SANDBOX_READY + } + + sandboxID := sb.ID() + resp := &pb.PodSandboxStatusResponse{ + Status: &pb.PodSandboxStatus{ + Id: sandboxID, + CreatedAt: podInfraContainer.CreatedAt().UnixNano(), + Network: &pb.PodSandboxNetworkStatus{Ip: sb.IP()}, + State: rStatus, + Labels: sb.Labels(), + Annotations: sb.Annotations(), + Metadata: sb.Metadata(), + }, + } + + logrus.Debugf("PodSandboxStatusResponse: %+v", resp) + return resp, nil +} diff --git a/server/sandbox_stop.go b/server/sandbox_stop.go new file mode 100644 index 000000000..9d6a5aa3c --- /dev/null +++ b/server/sandbox_stop.go @@ -0,0 +1,114 @@ +package server + +import ( + "fmt" + + "github.com/containers/storage" + "github.com/docker/docker/pkg/mount" + "github.com/docker/docker/pkg/symlink" + "github.com/kubernetes-incubator/cri-o/libkpod/sandbox" + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/opencontainers/selinux/go-selinux/label" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "golang.org/x/net/context" + "golang.org/x/sys/unix" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" +) + +// StopPodSandbox stops the sandbox. If there are any running containers in the +// sandbox, they should be force terminated. +func (s *Server) StopPodSandbox(ctx context.Context, req *pb.StopPodSandboxRequest) (*pb.StopPodSandboxResponse, error) { + logrus.Debugf("StopPodSandboxRequest %+v", req) + sb, err := s.getPodSandboxFromRequest(req.PodSandboxId) + if err != nil { + if err == sandbox.ErrIDEmpty { + return nil, err + } + + // If the sandbox isn't found we just return an empty response to adhere + // the the CRI interface which expects to not error out in not found + // cases. + + resp := &pb.StopPodSandboxResponse{} + logrus.Warnf("could not get sandbox %s, it's probably been stopped already: %v", req.PodSandboxId, err) + logrus.Debugf("StopPodSandboxResponse %s: %+v", req.PodSandboxId, resp) + return resp, nil + } + + if sb.Stopped() { + resp := &pb.StopPodSandboxResponse{} + logrus.Debugf("StopPodSandboxResponse %s: %+v", sb.ID(), resp) + return resp, nil + } + + // Clean up sandbox networking and close its network namespace. + hostNetwork := sb.NetNsPath() == "" + s.networkStop(hostNetwork, sb) + if err := sb.NetNsRemove(); err != nil { + return nil, err + } + + podInfraContainer := sb.InfraContainer() + containers := sb.Containers().List() + containers = append(containers, podInfraContainer) + + for _, c := range containers { + cStatus := s.Runtime().ContainerStatus(c) + if cStatus.Status != oci.ContainerStateStopped { + if err := s.Runtime().StopContainer(ctx, c, 10); err != nil { + return nil, fmt.Errorf("failed to stop container %s in pod sandbox %s: %v", c.Name(), sb.ID(), err) + } + if c.ID() == podInfraContainer.ID() { + continue + } + if err := s.StorageRuntimeServer().StopContainer(c.ID()); err != nil && errors.Cause(err) != storage.ErrContainerUnknown { + // assume container already umounted + logrus.Warnf("failed to stop container %s in pod sandbox %s: %v", c.Name(), sb.ID(), err) + } + } + s.ContainerStateToDisk(c) + } + + if err := label.ReleaseLabel(sb.ProcessLabel()); err != nil { + return nil, err + } + + // unmount the shm for the pod + if sb.ShmPath() != "/dev/shm" { + // we got namespaces in the form of + // /var/run/containers/storage/overlay-containers/CID/userdata/shm + // but /var/run on most system is symlinked to /run so we first resolve + // the symlink and then try and see if it's mounted + fp, err := symlink.FollowSymlinkInScope(sb.ShmPath(), "/") + if err != nil { + return nil, err + } + if mounted, err := mount.Mounted(fp); err == nil && mounted { + if err := unix.Unmount(fp, unix.MNT_DETACH); err != nil { + return nil, err + } + } + } + if err := s.StorageRuntimeServer().StopContainer(sb.ID()); err != nil && errors.Cause(err) != storage.ErrContainerUnknown { + logrus.Warnf("failed to stop sandbox container in pod sandbox %s: %v", sb.ID(), err) + } + + sb.SetStopped() + resp := &pb.StopPodSandboxResponse{} + logrus.Debugf("StopPodSandboxResponse %s: %+v", sb.ID(), resp) + return resp, nil +} + +// StopAllPodSandboxes removes all pod sandboxes +func (s *Server) StopAllPodSandboxes() { + logrus.Debugf("StopAllPodSandboxes") + for _, sb := range s.ContainerServer.ListSandboxes() { + pod := &pb.StopPodSandboxRequest{ + PodSandboxId: sb.ID(), + } + if _, err := s.StopPodSandbox(nil, pod); err != nil { + logrus.Warnf("could not StopPodSandbox %s: %v", sb.ID(), err) + } + } +} diff --git a/server/seccomp/seccomp.go b/server/seccomp/seccomp.go new file mode 100644 index 000000000..cf77c8274 --- /dev/null +++ b/server/seccomp/seccomp.go @@ -0,0 +1,165 @@ +// +build seccomp + +package seccomp + +import ( + "encoding/json" + "errors" + "fmt" + + "github.com/docker/docker/pkg/stringutils" + specs "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/runtime-tools/generate" + libseccomp "github.com/seccomp/libseccomp-golang" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +// IsEnabled returns true if seccomp is enabled for the host. +func IsEnabled() bool { + enabled := false + // Check if Seccomp is supported, via CONFIG_SECCOMP. + if err := unix.Prctl(unix.PR_GET_SECCOMP, 0, 0, 0, 0); err != unix.EINVAL { + // Make sure the kernel has CONFIG_SECCOMP_FILTER. + if err := unix.Prctl(unix.PR_SET_SECCOMP, unix.SECCOMP_MODE_FILTER, 0, 0, 0); err != unix.EINVAL { + enabled = true + } + } + logrus.Debugf("seccomp status: %v", enabled) + return enabled +} + +// LoadProfileFromStruct takes a Seccomp struct and setup seccomp in the spec. +func LoadProfileFromStruct(config Seccomp, specgen *generate.Generator) error { + return setupSeccomp(&config, specgen) +} + +// LoadProfileFromBytes takes a byte slice and decodes the seccomp profile. +func LoadProfileFromBytes(body []byte, specgen *generate.Generator) error { + var config Seccomp + if err := json.Unmarshal(body, &config); err != nil { + return fmt.Errorf("decoding seccomp profile failed: %v", err) + } + return setupSeccomp(&config, specgen) +} + +var nativeToSeccomp = map[string]Arch{ + "amd64": ArchX86_64, + "arm64": ArchAARCH64, + "mips64": ArchMIPS64, + "mips64n32": ArchMIPS64N32, + "mipsel64": ArchMIPSEL64, + "mipsel64n32": ArchMIPSEL64N32, + "s390x": ArchS390X, +} + +func setupSeccomp(config *Seccomp, specgen *generate.Generator) error { + if config == nil { + return nil + } + + // No default action specified, no syscalls listed, assume seccomp disabled + if config.DefaultAction == "" && len(config.Syscalls) == 0 { + return nil + } + + var arch string + var native, err = libseccomp.GetNativeArch() + if err == nil { + arch = native.String() + } + + if len(config.Architectures) != 0 && len(config.ArchMap) != 0 { + return errors.New("'architectures' and 'archMap' were specified in the seccomp profile, use either 'architectures' or 'archMap'") + } + + customspec := specgen.Spec() + customspec.Linux.Seccomp = &specs.LinuxSeccomp{} + + // if config.Architectures == 0 then libseccomp will figure out the architecture to use + if len(config.Architectures) != 0 { + for _, a := range config.Architectures { + customspec.Linux.Seccomp.Architectures = append(customspec.Linux.Seccomp.Architectures, specs.Arch(a)) + } + } + + if len(config.ArchMap) != 0 { + for _, a := range config.ArchMap { + seccompArch, ok := nativeToSeccomp[arch] + if ok { + if a.Arch == seccompArch { + customspec.Linux.Seccomp.Architectures = append(customspec.Linux.Seccomp.Architectures, specs.Arch(a.Arch)) + for _, sa := range a.SubArches { + customspec.Linux.Seccomp.Architectures = append(customspec.Linux.Seccomp.Architectures, specs.Arch(sa)) + } + break + } + } + } + } + + customspec.Linux.Seccomp.DefaultAction = specs.LinuxSeccompAction(config.DefaultAction) + +Loop: + // Loop through all syscall blocks and convert them to libcontainer format after filtering them + for _, call := range config.Syscalls { + if len(call.Excludes.Arches) > 0 { + if stringutils.InSlice(call.Excludes.Arches, arch) { + continue Loop + } + } + if len(call.Excludes.Caps) > 0 { + for _, c := range call.Excludes.Caps { + if stringutils.InSlice(customspec.Process.Capabilities.Permitted, c) { + continue Loop + } + } + } + if len(call.Includes.Arches) > 0 { + if !stringutils.InSlice(call.Includes.Arches, arch) { + continue Loop + } + } + if len(call.Includes.Caps) > 0 { + for _, c := range call.Includes.Caps { + if !stringutils.InSlice(customspec.Process.Capabilities.Permitted, c) { + continue Loop + } + } + } + + if call.Name != "" && len(call.Names) != 0 { + return errors.New("'name' and 'names' were specified in the seccomp profile, use either 'name' or 'names'") + } + + if call.Name != "" { + customspec.Linux.Seccomp.Syscalls = append(customspec.Linux.Seccomp.Syscalls, createSpecsSyscall(call.Name, call.Action, call.Args)) + } + + for _, n := range call.Names { + customspec.Linux.Seccomp.Syscalls = append(customspec.Linux.Seccomp.Syscalls, createSpecsSyscall(n, call.Action, call.Args)) + } + } + + return nil +} + +func createSpecsSyscall(name string, action Action, args []*Arg) specs.LinuxSyscall { + newCall := specs.LinuxSyscall{ + Names: []string{name}, + Action: specs.LinuxSeccompAction(action), + } + + // Loop through all the arguments of the syscall and convert them + for _, arg := range args { + newArg := specs.LinuxSeccompArg{ + Index: arg.Index, + Value: arg.Value, + ValueTwo: arg.ValueTwo, + Op: specs.LinuxSeccompOperator(arg.Op), + } + + newCall.Args = append(newCall.Args, newArg) + } + return newCall +} diff --git a/server/seccomp/seccomp_unsupported.go b/server/seccomp/seccomp_unsupported.go new file mode 100644 index 000000000..efb36bdf9 --- /dev/null +++ b/server/seccomp/seccomp_unsupported.go @@ -0,0 +1,20 @@ +// +build !seccomp + +package seccomp + +import "github.com/opencontainers/runtime-tools/generate" + +// IsEnabled returns false, when build without seccomp build tag. +func IsEnabled() bool { + return false +} + +// LoadProfileFromStruct takes a Seccomp struct and setup seccomp in the spec. +func LoadProfileFromStruct(config Seccomp, specgen *generate.Generator) error { + return nil +} + +// LoadProfileFromBytes takes a byte slice and decodes the seccomp profile. +func LoadProfileFromBytes(body []byte, specgen *generate.Generator) error { + return nil +} diff --git a/server/seccomp/types.go b/server/seccomp/types.go new file mode 100644 index 000000000..5b07f8c03 --- /dev/null +++ b/server/seccomp/types.go @@ -0,0 +1,93 @@ +package seccomp + +// Seccomp represents the config for a seccomp profile for syscall restriction. +type Seccomp struct { + DefaultAction Action `json:"defaultAction"` + // Architectures is kept to maintain backward compatibility with the old + // seccomp profile. + Architectures []Arch `json:"architectures,omitempty"` + ArchMap []Architecture `json:"archMap,omitempty"` + Syscalls []*Syscall `json:"syscalls"` +} + +// Architecture is used to represent an specific architecture +// and its sub-architectures +type Architecture struct { + Arch Arch `json:"architecture"` + SubArches []Arch `json:"subArchitectures"` +} + +// Arch used for architectures +type Arch string + +// Additional architectures permitted to be used for system calls +// By default only the native architecture of the kernel is permitted +const ( + ArchX86 Arch = "SCMP_ARCH_X86" + ArchX86_64 Arch = "SCMP_ARCH_X86_64" + ArchX32 Arch = "SCMP_ARCH_X32" + ArchARM Arch = "SCMP_ARCH_ARM" + ArchAARCH64 Arch = "SCMP_ARCH_AARCH64" + ArchMIPS Arch = "SCMP_ARCH_MIPS" + ArchMIPS64 Arch = "SCMP_ARCH_MIPS64" + ArchMIPS64N32 Arch = "SCMP_ARCH_MIPS64N32" + ArchMIPSEL Arch = "SCMP_ARCH_MIPSEL" + ArchMIPSEL64 Arch = "SCMP_ARCH_MIPSEL64" + ArchMIPSEL64N32 Arch = "SCMP_ARCH_MIPSEL64N32" + ArchPPC Arch = "SCMP_ARCH_PPC" + ArchPPC64 Arch = "SCMP_ARCH_PPC64" + ArchPPC64LE Arch = "SCMP_ARCH_PPC64LE" + ArchS390 Arch = "SCMP_ARCH_S390" + ArchS390X Arch = "SCMP_ARCH_S390X" +) + +// Action taken upon Seccomp rule match +type Action string + +// Define actions for Seccomp rules +const ( + ActKill Action = "SCMP_ACT_KILL" + ActTrap Action = "SCMP_ACT_TRAP" + ActErrno Action = "SCMP_ACT_ERRNO" + ActTrace Action = "SCMP_ACT_TRACE" + ActAllow Action = "SCMP_ACT_ALLOW" +) + +// Operator used to match syscall arguments in Seccomp +type Operator string + +// Define operators for syscall arguments in Seccomp +const ( + OpNotEqual Operator = "SCMP_CMP_NE" + OpLessThan Operator = "SCMP_CMP_LT" + OpLessEqual Operator = "SCMP_CMP_LE" + OpEqualTo Operator = "SCMP_CMP_EQ" + OpGreaterEqual Operator = "SCMP_CMP_GE" + OpGreaterThan Operator = "SCMP_CMP_GT" + OpMaskedEqual Operator = "SCMP_CMP_MASKED_EQ" +) + +// Arg used for matching specific syscall arguments in Seccomp +type Arg struct { + Index uint `json:"index"` + Value uint64 `json:"value"` + ValueTwo uint64 `json:"valueTwo"` + Op Operator `json:"op"` +} + +// Filter is used to conditionally apply Seccomp rules +type Filter struct { + Caps []string `json:"caps,omitempty"` + Arches []string `json:"arches,omitempty"` +} + +// Syscall is used to match a group of syscalls in Seccomp +type Syscall struct { + Name string `json:"name,omitempty"` + Names []string `json:"names,omitempty"` + Action Action `json:"action"` + Args []*Arg `json:"args"` + Comment string `json:"comment"` + Includes Filter `json:"includes"` + Excludes Filter `json:"excludes"` +} diff --git a/server/secrets.go b/server/secrets.go new file mode 100644 index 000000000..56d3ba81a --- /dev/null +++ b/server/secrets.go @@ -0,0 +1,162 @@ +package server + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strings" + + rspec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/selinux/go-selinux/label" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +// SecretData info +type SecretData struct { + Name string + Data []byte +} + +// SaveTo saves secret data to given directory +func (s SecretData) SaveTo(dir string) error { + path := filepath.Join(dir, s.Name) + if err := os.MkdirAll(filepath.Dir(path), 0700); err != nil && !os.IsExist(err) { + return err + } + return ioutil.WriteFile(path, s.Data, 0700) +} + +func readAll(root, prefix string) ([]SecretData, error) { + path := filepath.Join(root, prefix) + + data := []SecretData{} + + files, err := ioutil.ReadDir(path) + if err != nil { + if os.IsNotExist(err) { + return data, nil + } + + return nil, err + } + + for _, f := range files { + fileData, err := readFile(root, filepath.Join(prefix, f.Name())) + if err != nil { + // If the file did not exist, might be a dangling symlink + // Ignore the error + if os.IsNotExist(err) { + continue + } + return nil, err + } + data = append(data, fileData...) + } + + return data, nil +} + +func readFile(root, name string) ([]SecretData, error) { + path := filepath.Join(root, name) + + s, err := os.Stat(path) + if err != nil { + return nil, err + } + + if s.IsDir() { + dirData, err := readAll(root, name) + if err != nil { + return nil, err + } + return dirData, nil + } + bytes, err := ioutil.ReadFile(path) + if err != nil { + return nil, err + } + return []SecretData{{Name: name, Data: bytes}}, nil +} + +// getHostAndCtrDir separates the host:container paths +func getMountsMap(path string) (string, string, error) { + arr := strings.SplitN(path, ":", 2) + if len(arr) == 2 { + return arr[0], arr[1], nil + } + return "", "", errors.Errorf("unable to get host and container dir") +} + +func getHostSecretData(hostDir string) ([]SecretData, error) { + var allSecrets []SecretData + hostSecrets, err := readAll(hostDir, "") + if err != nil { + return nil, errors.Wrapf(err, "failed to read secrets from %q", hostDir) + } + return append(allSecrets, hostSecrets...), nil +} + +// secretMount copies the contents of host directory to container directory +// and returns a list of mounts +func secretMounts(defaultMountsPaths []string, mountLabel, containerWorkingDir string, runtimeMounts []rspec.Mount) ([]rspec.Mount, error) { + var mounts []rspec.Mount + for _, path := range defaultMountsPaths { + hostDir, ctrDir, err := getMountsMap(path) + if err != nil { + return nil, err + } + // skip if the hostDir path doesn't exist + if _, err := os.Stat(hostDir); os.IsNotExist(err) { + logrus.Warnf("%q doesn't exist, skipping", hostDir) + continue + } + + ctrDirOnHost := filepath.Join(containerWorkingDir, ctrDir) + // skip if ctrDir has already been mounted by caller + if isAlreadyMounted(runtimeMounts, ctrDir) { + logrus.Warnf("%q has already been mounted; cannot override mount", ctrDir) + continue + } + + if err := os.RemoveAll(ctrDirOnHost); err != nil { + return nil, fmt.Errorf("remove container directory failed: %v", err) + } + + if err := os.MkdirAll(ctrDirOnHost, 0755); err != nil { + return nil, fmt.Errorf("making container directory failed: %v", err) + } + + hostDir, err = resolveSymbolicLink(hostDir) + if err != nil { + return nil, err + } + + data, err := getHostSecretData(hostDir) + if err != nil { + return nil, errors.Wrapf(err, "getting host secret data failed") + } + for _, s := range data { + s.SaveTo(ctrDirOnHost) + } + label.Relabel(ctrDirOnHost, mountLabel, false) + + m := rspec.Mount{ + Source: ctrDirOnHost, + Destination: ctrDir, + } + + mounts = append(mounts, m) + } + return mounts, nil +} + +func isAlreadyMounted(mounts []rspec.Mount, mountPath string) bool { + for _, mount := range mounts { + if mount.Destination == mountPath { + return true + } + } + return false +} diff --git a/server/server.go b/server/server.go new file mode 100644 index 000000000..a308e7d29 --- /dev/null +++ b/server/server.go @@ -0,0 +1,423 @@ +package server + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "net" + "net/http" + "os" + "path/filepath" + "runtime/debug" + "strconv" + "strings" + "sync" + + "github.com/cri-o/ocicni/pkg/ocicni" + "github.com/fsnotify/fsnotify" + "github.com/kubernetes-incubator/cri-o/libkpod" + "github.com/kubernetes-incubator/cri-o/libkpod/sandbox" + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/kubernetes-incubator/cri-o/pkg/storage" + "github.com/kubernetes-incubator/cri-o/server/apparmor" + "github.com/kubernetes-incubator/cri-o/server/seccomp" + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + "github.com/sirupsen/logrus" + knet "k8s.io/apimachinery/pkg/util/net" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" + "k8s.io/kubernetes/pkg/kubelet/network/hostport" + "k8s.io/kubernetes/pkg/kubelet/server/streaming" + iptablesproxy "k8s.io/kubernetes/pkg/proxy/iptables" + utildbus "k8s.io/kubernetes/pkg/util/dbus" + utilexec "k8s.io/kubernetes/pkg/util/exec" + utiliptables "k8s.io/kubernetes/pkg/util/iptables" +) + +const ( + shutdownFile = "/var/lib/crio/crio.shutdown" +) + +func isTrue(annotaton string) bool { + return annotaton == "true" +} + +// streamService implements streaming.Runtime. +type streamService struct { + runtimeServer *Server // needed by Exec() endpoint + streamServer streaming.Server + streamServerCloseCh chan struct{} + streaming.Runtime +} + +// Server implements the RuntimeService and ImageService +type Server struct { + *libkpod.ContainerServer + config Config + + updateLock sync.RWMutex + netPlugin ocicni.CNIPlugin + hostportManager hostport.HostPortManager + + seccompEnabled bool + seccompProfile seccomp.Seccomp + + appArmorEnabled bool + appArmorProfile string + + bindAddress string + stream streamService + exitMonitorChan chan struct{} +} + +// StopStreamServer stops the stream server +func (s *Server) StopStreamServer() error { + return s.stream.streamServer.Stop() +} + +// StreamingServerCloseChan returns the close channel for the streaming server +func (s *Server) StreamingServerCloseChan() chan struct{} { + return s.stream.streamServerCloseCh +} + +// GetExec returns exec stream request +func (s *Server) GetExec(req *pb.ExecRequest) (*pb.ExecResponse, error) { + return s.stream.streamServer.GetExec(req) +} + +// GetAttach returns attach stream request +func (s *Server) GetAttach(req *pb.AttachRequest) (*pb.AttachResponse, error) { + return s.stream.streamServer.GetAttach(req) +} + +// GetPortForward returns port forward stream request +func (s *Server) GetPortForward(req *pb.PortForwardRequest) (*pb.PortForwardResponse, error) { + return s.stream.streamServer.GetPortForward(req) +} + +func (s *Server) restore() { + containers, err := s.Store().Containers() + if err != nil && !os.IsNotExist(errors.Cause(err)) { + logrus.Warnf("could not read containers and sandboxes: %v", err) + } + pods := map[string]*storage.RuntimeContainerMetadata{} + podContainers := map[string]*storage.RuntimeContainerMetadata{} + for _, container := range containers { + metadata, err2 := s.StorageRuntimeServer().GetContainerMetadata(container.ID) + if err2 != nil { + logrus.Warnf("error parsing metadata for %s: %v, ignoring", container.ID, err2) + continue + } + if metadata.Pod { + pods[container.ID] = &metadata + } else { + podContainers[container.ID] = &metadata + } + } + for containerID, metadata := range pods { + if err = s.LoadSandbox(containerID); err != nil { + logrus.Warnf("could not restore sandbox %s container %s: %v", metadata.PodID, containerID, err) + } + } + for containerID := range podContainers { + if err := s.LoadContainer(containerID); err != nil { + logrus.Warnf("could not restore container %s: %v", containerID, err) + } + } +} + +// Update makes changes to the server's state (lists of pods and containers) to +// reflect the list of pods and containers that are stored on disk, possibly +// having been modified by other parties +func (s *Server) Update() { + logrus.Debugf("updating sandbox and container information") + if err := s.ContainerServer.Update(); err != nil { + logrus.Errorf("error updating sandbox and container information: %v", err) + } +} + +// cleanupSandboxesOnShutdown Remove all running Sandboxes on system shutdown +func (s *Server) cleanupSandboxesOnShutdown() { + _, err := os.Stat(shutdownFile) + if err == nil || !os.IsNotExist(err) { + logrus.Debugf("shutting down all sandboxes, on shutdown") + s.StopAllPodSandboxes() + err = os.Remove(shutdownFile) + if err != nil { + logrus.Warnf("Failed to remove %q", shutdownFile) + } + + } +} + +// Shutdown attempts to shut down the server's storage cleanly +func (s *Server) Shutdown() error { + // why do this on clean shutdown! we want containers left running when crio + // is down for whatever reason no?! + // notice this won't trigger just on system halt but also on normal + // crio.service restart!!! + s.cleanupSandboxesOnShutdown() + return s.ContainerServer.Shutdown() +} + +// configureMaxThreads sets the Go runtime max threads threshold +// which is 90% of the kernel setting from /proc/sys/kernel/threads-max +func configureMaxThreads() error { + mt, err := ioutil.ReadFile("/proc/sys/kernel/threads-max") + if err != nil { + return err + } + mtint, err := strconv.Atoi(strings.TrimSpace(string(mt))) + if err != nil { + return err + } + maxThreads := (mtint / 100) * 90 + debug.SetMaxThreads(maxThreads) + logrus.Debugf("Golang's threads limit set to %d", maxThreads) + return nil +} + +// New creates a new Server with options provided +func New(config *Config) (*Server, error) { + if err := os.MkdirAll("/var/run/crio", 0755); err != nil { + return nil, err + } + + config.ContainerExitsDir = oci.ContainerExitsDir + + // This is used to monitor container exits using inotify + if err := os.MkdirAll(config.ContainerExitsDir, 0755); err != nil { + return nil, err + } + containerServer, err := libkpod.New(&config.Config) + if err != nil { + return nil, err + } + + netPlugin, err := ocicni.InitCNI(config.NetworkDir, config.PluginDir) + if err != nil { + return nil, err + } + iptInterface := utiliptables.New(utilexec.New(), utildbus.New(), utiliptables.ProtocolIpv4) + iptInterface.EnsureChain(utiliptables.TableNAT, iptablesproxy.KubeMarkMasqChain) + hostportManager := hostport.NewHostportManager() + + s := &Server{ + ContainerServer: containerServer, + netPlugin: netPlugin, + hostportManager: hostportManager, + config: *config, + seccompEnabled: seccomp.IsEnabled(), + appArmorEnabled: apparmor.IsEnabled(), + appArmorProfile: config.ApparmorProfile, + exitMonitorChan: make(chan struct{}), + } + + if s.seccompEnabled { + seccompProfile, fileErr := ioutil.ReadFile(config.SeccompProfile) + if fileErr != nil { + return nil, fmt.Errorf("opening seccomp profile (%s) failed: %v", config.SeccompProfile, fileErr) + } + var seccompConfig seccomp.Seccomp + if jsonErr := json.Unmarshal(seccompProfile, &seccompConfig); jsonErr != nil { + return nil, fmt.Errorf("decoding seccomp profile failed: %v", jsonErr) + } + s.seccompProfile = seccompConfig + } + + if s.appArmorEnabled && s.appArmorProfile == apparmor.DefaultApparmorProfile { + if apparmorErr := apparmor.EnsureDefaultApparmorProfile(); apparmorErr != nil { + return nil, fmt.Errorf("ensuring the default apparmor profile is installed failed: %v", apparmorErr) + } + } + + if err := configureMaxThreads(); err != nil { + return nil, err + } + + s.restore() + s.cleanupSandboxesOnShutdown() + + bindAddress := net.ParseIP(config.StreamAddress) + if bindAddress == nil { + bindAddress, err = knet.ChooseBindAddress(net.IP{0, 0, 0, 0}) + if err != nil { + return nil, err + } + } + s.bindAddress = bindAddress.String() + + _, err = net.LookupPort("tcp", config.StreamPort) + if err != nil { + return nil, err + } + + // Prepare streaming server + streamServerConfig := streaming.DefaultConfig + streamServerConfig.Addr = net.JoinHostPort(bindAddress.String(), config.StreamPort) + s.stream.runtimeServer = s + s.stream.streamServer, err = streaming.NewServer(streamServerConfig, s.stream) + if err != nil { + return nil, fmt.Errorf("unable to create streaming server") + } + + s.stream.streamServerCloseCh = make(chan struct{}) + go func() { + defer close(s.stream.streamServerCloseCh) + if err := s.stream.streamServer.Start(true); err != nil { + logrus.Errorf("Failed to start streaming server: %v", err) + } + }() + + logrus.Debugf("sandboxes: %v", s.ContainerServer.ListSandboxes()) + return s, nil +} + +func (s *Server) addSandbox(sb *sandbox.Sandbox) { + s.ContainerServer.AddSandbox(sb) +} + +func (s *Server) getSandbox(id string) *sandbox.Sandbox { + return s.ContainerServer.GetSandbox(id) +} + +func (s *Server) hasSandbox(id string) bool { + return s.ContainerServer.HasSandbox(id) +} + +func (s *Server) removeSandbox(id string) { + s.ContainerServer.RemoveSandbox(id) +} + +func (s *Server) addContainer(c *oci.Container) { + s.ContainerServer.AddContainer(c) +} + +func (s *Server) addInfraContainer(c *oci.Container) { + s.ContainerServer.AddInfraContainer(c) +} + +func (s *Server) getContainer(id string) *oci.Container { + return s.ContainerServer.GetContainer(id) +} + +func (s *Server) getInfraContainer(id string) *oci.Container { + return s.ContainerServer.GetInfraContainer(id) +} + +// BindAddress is used to retrieve host's IP +func (s *Server) BindAddress() string { + return s.bindAddress +} + +// GetSandboxContainer returns the infra container for a given sandbox +func (s *Server) GetSandboxContainer(id string) *oci.Container { + return s.ContainerServer.GetSandboxContainer(id) +} + +// GetContainer returns a container by its ID +func (s *Server) GetContainer(id string) *oci.Container { + return s.getContainer(id) +} + +func (s *Server) removeContainer(c *oci.Container) { + s.ContainerServer.RemoveContainer(c) +} + +func (s *Server) removeInfraContainer(c *oci.Container) { + s.ContainerServer.RemoveInfraContainer(c) +} + +func (s *Server) getPodSandboxFromRequest(podSandboxID string) (*sandbox.Sandbox, error) { + if podSandboxID == "" { + return nil, sandbox.ErrIDEmpty + } + + sandboxID, err := s.PodIDIndex().Get(podSandboxID) + if err != nil { + return nil, fmt.Errorf("PodSandbox with ID starting with %s not found: %v", podSandboxID, err) + } + + sb := s.getSandbox(sandboxID) + if sb == nil { + return nil, fmt.Errorf("specified pod sandbox not found: %s", sandboxID) + } + return sb, nil +} + +// CreateMetricsEndpoint creates a /metrics endpoint +// for prometheus monitoring +func (s *Server) CreateMetricsEndpoint() (*http.ServeMux, error) { + mux := &http.ServeMux{} + mux.Handle("/metrics", prometheus.Handler()) + return mux, nil +} + +// StopExitMonitor stops the exit monitor +func (s *Server) StopExitMonitor() { + close(s.exitMonitorChan) +} + +// ExitMonitorCloseChan returns the close chan for the exit monitor +func (s *Server) ExitMonitorCloseChan() chan struct{} { + return s.exitMonitorChan +} + +// StartExitMonitor start a routine that monitors container exits +// and updates the container status +func (s *Server) StartExitMonitor() { + watcher, err := fsnotify.NewWatcher() + if err != nil { + logrus.Fatalf("Failed to create new watch: %v", err) + } + defer watcher.Close() + + done := make(chan struct{}) + go func() { + for { + select { + case event := <-watcher.Events: + logrus.Debugf("event: %v", event) + if event.Op&fsnotify.Create == fsnotify.Create { + containerID := filepath.Base(event.Name) + logrus.Debugf("container or sandbox exited: %v", containerID) + c := s.GetContainer(containerID) + if c != nil { + logrus.Debugf("container exited and found: %v", containerID) + err := s.Runtime().UpdateStatus(c) + if err != nil { + logrus.Warnf("Failed to update container status %s: %v", c, err) + } else { + s.ContainerStateToDisk(c) + } + } else { + sb := s.GetSandbox(containerID) + if sb != nil { + c := sb.InfraContainer() + logrus.Debugf("sandbox exited and found: %v", containerID) + err := s.Runtime().UpdateStatus(c) + if err != nil { + logrus.Warnf("Failed to update sandbox infra container status %s: %v", c, err) + } else { + s.ContainerStateToDisk(c) + } + } + } + } + case err := <-watcher.Errors: + logrus.Debugf("watch error: %v", err) + close(done) + return + case <-s.exitMonitorChan: + logrus.Debug("closing exit monitor...") + close(done) + return + } + } + }() + if err := watcher.Add(s.config.ContainerExitsDir); err != nil { + logrus.Errorf("watcher.Add(%q) failed: %s", s.config.ContainerExitsDir, err) + close(done) + } + <-done +} diff --git a/server/utils.go b/server/utils.go new file mode 100644 index 000000000..195942d38 --- /dev/null +++ b/server/utils.go @@ -0,0 +1,183 @@ +package server + +import ( + "fmt" + "io" + "os" + "strings" + + "github.com/cri-o/ocicni/pkg/ocicni" + "github.com/kubernetes-incubator/cri-o/libkpod/sandbox" + "github.com/opencontainers/runtime-tools/validate" + "github.com/syndtr/gocapability/capability" +) + +const ( + // According to http://man7.org/linux/man-pages/man5/resolv.conf.5.html: + // "The search list is currently limited to six domains with a total of 256 characters." + maxDNSSearches = 6 +) + +func copyFile(src, dest string) error { + in, err := os.Open(src) + if err != nil { + return err + } + defer in.Close() + + out, err := os.Create(dest) + if err != nil { + return err + } + defer out.Close() + + _, err = io.Copy(out, in) + return err +} + +func removeFile(path string) error { + if _, err := os.Stat(path); err == nil { + if err := os.Remove(path); err != nil { + return err + } + } + return nil +} + +func parseDNSOptions(servers, searches, options []string, path string) error { + nServers := len(servers) + nSearches := len(searches) + nOptions := len(options) + if nServers == 0 && nSearches == 0 && nOptions == 0 { + return copyFile("/etc/resolv.conf", path) + } + + if nSearches > maxDNSSearches { + return fmt.Errorf("DNSOption.Searches has more than 6 domains") + } + + f, err := os.Create(path) + if err != nil { + return err + } + defer f.Close() + + if nSearches > 0 { + data := fmt.Sprintf("search %s\n", strings.Join(searches, " ")) + _, err = f.Write([]byte(data)) + if err != nil { + return err + } + } + + if nServers > 0 { + data := fmt.Sprintf("nameserver %s\n", strings.Join(servers, "\nnameserver ")) + _, err = f.Write([]byte(data)) + if err != nil { + return err + } + } + + if nOptions > 0 { + data := fmt.Sprintf("options %s\n", strings.Join(options, " ")) + _, err = f.Write([]byte(data)) + if err != nil { + return err + } + } + + return nil +} + +// TODO: remove sysctl extraction related code here, instead we import from k8s directly. + +const ( + // SysctlsPodAnnotationKey represents the key of sysctls which are set for the infrastructure + // container of a pod. The annotation value is a comma separated list of sysctl_name=value + // key-value pairs. Only a limited set of whitelisted and isolated sysctls is supported by + // the kubelet. Pods with other sysctls will fail to launch. + SysctlsPodAnnotationKey string = "security.alpha.kubernetes.io/sysctls" + + // UnsafeSysctlsPodAnnotationKey represents the key of sysctls which are set for the infrastructure + // container of a pod. The annotation value is a comma separated list of sysctl_name=value + // key-value pairs. Unsafe sysctls must be explicitly enabled for a kubelet. They are properly + // namespaced to a pod or a container, but their isolation is usually unclear or weak. Their use + // is at-your-own-risk. Pods that attempt to set an unsafe sysctl that is not enabled for a kubelet + // will fail to launch. + UnsafeSysctlsPodAnnotationKey string = "security.alpha.kubernetes.io/unsafe-sysctls" +) + +// Sysctl defines a kernel parameter to be set +type Sysctl struct { + // Name of a property to set + Name string `json:"name"` + // Value of a property to set + Value string `json:"value"` +} + +// SysctlsFromPodAnnotations parses the sysctl annotations into a slice of safe Sysctls +// and a slice of unsafe Sysctls. This is only a convenience wrapper around +// SysctlsFromPodAnnotation. +func SysctlsFromPodAnnotations(a map[string]string) ([]Sysctl, []Sysctl, error) { + safe, err := SysctlsFromPodAnnotation(a[SysctlsPodAnnotationKey]) + if err != nil { + return nil, nil, err + } + unsafe, err := SysctlsFromPodAnnotation(a[UnsafeSysctlsPodAnnotationKey]) + if err != nil { + return nil, nil, err + } + + return safe, unsafe, nil +} + +// SysctlsFromPodAnnotation parses an annotation value into a slice of Sysctls. +func SysctlsFromPodAnnotation(annotation string) ([]Sysctl, error) { + if len(annotation) == 0 { + return nil, nil + } + + kvs := strings.Split(annotation, ",") + sysctls := make([]Sysctl, len(kvs)) + for i, kv := range kvs { + cs := strings.Split(kv, "=") + if len(cs) != 2 || len(cs[0]) == 0 { + return nil, fmt.Errorf("sysctl %q not of the format sysctl_name=value", kv) + } + sysctls[i].Name = cs[0] + sysctls[i].Value = cs[1] + } + return sysctls, nil +} + +func newPodNetwork(sb *sandbox.Sandbox) ocicni.PodNetwork { + return ocicni.PodNetwork{ + Name: sb.KubeName(), + Namespace: sb.Namespace(), + ID: sb.ID(), + NetNS: sb.NetNsPath(), + } +} + +// inStringSlice checks whether a string is inside a string slice. +// Comparison is case insensitive. +func inStringSlice(ss []string, str string) bool { + for _, s := range ss { + if strings.ToLower(s) == strings.ToLower(str) { + return true + } + } + return false +} + +// getOCICapabilitiesList returns a list of all available capabilities. +func getOCICapabilitiesList() []string { + var caps []string + for _, cap := range capability.List() { + if cap > validate.LastCap() { + continue + } + caps = append(caps, "CAP_"+strings.ToUpper(cap.String())) + } + return caps +} diff --git a/server/version.go b/server/version.go new file mode 100644 index 000000000..5f98e5f0c --- /dev/null +++ b/server/version.go @@ -0,0 +1,27 @@ +package server + +import ( + "github.com/kubernetes-incubator/cri-o/version" + "golang.org/x/net/context" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" +) + +const ( + // kubeAPIVersion is the api version of kubernetes. + // TODO: Track upstream code. For now it expects 0.1.0 + kubeAPIVersion = "0.1.0" + // containerName is the name prepended in kubectl describe->Container ID: + // cri-o://<CONTAINER_ID> + containerName = "cri-o" + runtimeAPIVersion = "v1alpha1" +) + +// Version returns the runtime name, runtime version and runtime API version +func (s *Server) Version(ctx context.Context, req *pb.VersionRequest) (*pb.VersionResponse, error) { + return &pb.VersionResponse{ + Version: kubeAPIVersion, + RuntimeName: containerName, + RuntimeVersion: version.Version, + RuntimeApiVersion: runtimeAPIVersion, + }, nil +} |