diff options
Diffstat (limited to 'libkpod')
-rw-r--r-- | libkpod/config.go | 308 | ||||
-rw-r--r-- | libkpod/config_test.go | 54 | ||||
-rw-r--r-- | libkpod/container.go | 157 | ||||
-rw-r--r-- | libkpod/container_data.go | 210 | ||||
-rw-r--r-- | libkpod/container_server.go | 775 | ||||
-rw-r--r-- | libkpod/hooks.go | 98 | ||||
-rw-r--r-- | libkpod/kill.go | 45 | ||||
-rw-r--r-- | libkpod/logs.go | 80 | ||||
-rw-r--r-- | libkpod/pause.go | 46 | ||||
-rw-r--r-- | libkpod/remove.go | 53 | ||||
-rw-r--r-- | libkpod/rename.go | 114 | ||||
-rw-r--r-- | libkpod/sandbox/sandbox.go | 484 | ||||
-rw-r--r-- | libkpod/stats.go | 111 | ||||
-rw-r--r-- | libkpod/stop.go | 36 | ||||
-rw-r--r-- | libkpod/testdata/config.toml | 28 | ||||
-rw-r--r-- | libkpod/wait.go | 42 |
16 files changed, 2641 insertions, 0 deletions
diff --git a/libkpod/config.go b/libkpod/config.go new file mode 100644 index 000000000..687b4b380 --- /dev/null +++ b/libkpod/config.go @@ -0,0 +1,308 @@ +package libkpod + +import ( + "bytes" + "io/ioutil" + + "github.com/BurntSushi/toml" + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/opencontainers/selinux/go-selinux" +) + +// Default paths if none are specified +const ( + crioRoot = "/var/lib/containers/storage" + crioRunRoot = "/var/run/containers/storage" + conmonPath = "/usr/local/libexec/crio/conmon" + pauseImage = "kubernetes/pause" + pauseCommand = "/pause" + defaultTransport = "docker://" + seccompProfilePath = "/etc/crio/seccomp.json" + apparmorProfileName = "crio-default" + cniConfigDir = "/etc/cni/net.d/" + cniBinDir = "/opt/cni/bin/" + cgroupManager = oci.CgroupfsCgroupsManager + lockPath = "/run/crio.lock" + containerExitsDir = oci.ContainerExitsDir +) + +// Config represents the entire set of configuration values that can be set for +// the server. This is intended to be loaded from a toml-encoded config file. +type Config struct { + RootConfig + RuntimeConfig + ImageConfig + NetworkConfig +} + +// ImageVolumesType describes image volume handling strategies +type ImageVolumesType string + +const ( + // ImageVolumesMkdir option is for using mkdir to handle image volumes + ImageVolumesMkdir ImageVolumesType = "mkdir" + // ImageVolumesIgnore option is for ignoring image volumes altogether + ImageVolumesIgnore ImageVolumesType = "ignore" + // ImageVolumesBind option is for using bind mounted volumes + ImageVolumesBind ImageVolumesType = "bind" +) + +const ( + // DefaultPidsLimit is the default value for maximum number of processes + // allowed inside a container + DefaultPidsLimit = 1024 + + // DefaultLogSizeMax is the default value for the maximum log size + // allowed for a container. Negative values mean that no limit is imposed. + DefaultLogSizeMax = -1 +) + +// This structure is necessary to fake the TOML tables when parsing, +// while also not requiring a bunch of layered structs for no good +// reason. + +// RootConfig represents the root of the "crio" TOML config table. +type RootConfig struct { + // Root is a path to the "root directory" where data not + // explicitly handled by other options will be stored. + Root string `toml:"root"` + + // RunRoot is a path to the "run directory" where state information not + // explicitly handled by other options will be stored. + RunRoot string `toml:"runroot"` + + // Storage is the name of the storage driver which handles actually + // storing the contents of containers. + Storage string `toml:"storage_driver"` + + // StorageOption is a list of storage driver specific options. + StorageOptions []string `toml:"storage_option"` + + // LogDir is the default log directory were all logs will go unless kubelet + // tells us to put them somewhere else. + LogDir string `toml:"log_dir"` + + // FileLocking specifies whether to use file-based or in-memory locking + // File-based locking is required when multiple users of libkpod are + // present on the same system + FileLocking bool `toml:"file_locking"` +} + +// RuntimeConfig represents the "crio.runtime" TOML config table. +type RuntimeConfig struct { + // Runtime is the OCI compatible runtime used for trusted container workloads. + // This is a mandatory setting as this runtime will be the default one and + // will also be used for untrusted container workloads if + // RuntimeUntrustedWorkload is not set. + Runtime string `toml:"runtime"` + + // RuntimeUntrustedWorkload is the OCI compatible runtime used for untrusted + // container workloads. This is an optional setting, except if + // DefaultWorkloadTrust is set to "untrusted". + RuntimeUntrustedWorkload string `toml:"runtime_untrusted_workload"` + + // DefaultWorkloadTrust is the default level of trust crio puts in container + // workloads. This can either be "trusted" or "untrusted" and the default + // is "trusted" + // Containers can be run through different container runtimes, depending on + // the trust hints we receive from kubelet: + // - If kubelet tags a container workload as untrusted, crio will try first + // to run it through the untrusted container workload runtime. If it is not + // set, crio will use the trusted runtime. + // - If kubelet does not provide any information about the container workload trust + // level, the selected runtime will depend on the DefaultWorkloadTrust setting. + // If it is set to "untrusted", then all containers except for the host privileged + // ones, will be run by the RuntimeUntrustedWorkload runtime. Host privileged + // containers are by definition trusted and will always use the trusted container + // runtime. If DefaultWorkloadTrust is set to "trusted", crio will use the trusted + // container runtime for all containers. + DefaultWorkloadTrust string `toml:"default_workload_trust"` + + // NoPivot instructs the runtime to not use `pivot_root`, but instead use `MS_MOVE` + NoPivot bool `toml:"no_pivot"` + + // Conmon is the path to conmon binary, used for managing the runtime. + Conmon string `toml:"conmon"` + + // ConmonEnv is the environment variable list for conmon process. + ConmonEnv []string `toml:"conmon_env"` + + // SELinux determines whether or not SELinux is used for pod separation. + SELinux bool `toml:"selinux"` + + // SeccompProfile is the seccomp json profile path which is used as the + // default for the runtime. + SeccompProfile string `toml:"seccomp_profile"` + + // ApparmorProfile is the apparmor profile name which is used as the + // default for the runtime. + ApparmorProfile string `toml:"apparmor_profile"` + + // CgroupManager is the manager implementation name which is used to + // handle cgroups for containers. + CgroupManager string `toml:"cgroup_manager"` + + // HooksDirPath location of oci hooks config files + HooksDirPath string `toml:"hooks_dir_path"` + + // DefaultMounts is the list of mounts to be mounted for each container + // The format of each mount is "host-path:container-path" + DefaultMounts []string `toml:"default_mounts"` + + // Hooks List of hooks to run with container + Hooks map[string]HookParams + + // PidsLimit is the number of processes each container is restricted to + // by the cgroup process number controller. + PidsLimit int64 `toml:"pids_limit"` + + // LogSizeMax is the maximum number of bytes after which the log file + // will be truncated. It can be expressed as a human-friendly string + // that is parsed to bytes. + // Negative values indicate that the log file won't be truncated. + LogSizeMax int64 `toml:"log_size_max"` + + // ContainerExitsDir is the directory in which container exit files are + // written to by conmon. + ContainerExitsDir string `toml:"container_exits_dir"` +} + +// ImageConfig represents the "crio.image" TOML config table. +type ImageConfig struct { + // DefaultTransport is a value we prefix to image names that fail to + // validate source references. + DefaultTransport string `toml:"default_transport"` + // PauseImage is the name of an image which we use to instantiate infra + // containers. + PauseImage string `toml:"pause_image"` + // PauseCommand is the path of the binary we run in an infra + // container that's been instantiated using PauseImage. + PauseCommand string `toml:"pause_command"` + // SignaturePolicyPath is the name of the file which decides what sort + // of policy we use when deciding whether or not to trust an image that + // we've pulled. Outside of testing situations, it is strongly advised + // that this be left unspecified so that the default system-wide policy + // will be used. + SignaturePolicyPath string `toml:"signature_policy"` + // InsecureRegistries is a list of registries that must be contacted w/o + // TLS verification. + InsecureRegistries []string `toml:"insecure_registries"` + // ImageVolumes controls how volumes specified in image config are handled + ImageVolumes ImageVolumesType `toml:"image_volumes"` + // Registries holds a list of registries used to pull unqualified images + Registries []string `toml:"registries"` +} + +// NetworkConfig represents the "crio.network" TOML config table +type NetworkConfig struct { + // NetworkDir is where CNI network configuration files are stored. + NetworkDir string `toml:"network_dir"` + + // PluginDir is where CNI plugin binaries are stored. + PluginDir string `toml:"plugin_dir"` +} + +// tomlConfig is another way of looking at a Config, which is +// TOML-friendly (it has all of the explicit tables). It's just used for +// conversions. +type tomlConfig struct { + Crio struct { + RootConfig + Runtime struct{ RuntimeConfig } `toml:"runtime"` + Image struct{ ImageConfig } `toml:"image"` + Network struct{ NetworkConfig } `toml:"network"` + } `toml:"crio"` +} + +func (t *tomlConfig) toConfig(c *Config) { + c.RootConfig = t.Crio.RootConfig + c.RuntimeConfig = t.Crio.Runtime.RuntimeConfig + c.ImageConfig = t.Crio.Image.ImageConfig + c.NetworkConfig = t.Crio.Network.NetworkConfig +} + +func (t *tomlConfig) fromConfig(c *Config) { + t.Crio.RootConfig = c.RootConfig + t.Crio.Runtime.RuntimeConfig = c.RuntimeConfig + t.Crio.Image.ImageConfig = c.ImageConfig + t.Crio.Network.NetworkConfig = c.NetworkConfig +} + +// UpdateFromFile populates the Config from the TOML-encoded file at the given path. +// Returns errors encountered when reading or parsing the files, or nil +// otherwise. +func (c *Config) UpdateFromFile(path string) error { + data, err := ioutil.ReadFile(path) + if err != nil { + return err + } + + t := new(tomlConfig) + t.fromConfig(c) + + _, err = toml.Decode(string(data), t) + if err != nil { + return err + } + + t.toConfig(c) + return nil +} + +// ToFile outputs the given Config as a TOML-encoded file at the given path. +// Returns errors encountered when generating or writing the file, or nil +// otherwise. +func (c *Config) ToFile(path string) error { + var w bytes.Buffer + e := toml.NewEncoder(&w) + + t := new(tomlConfig) + t.fromConfig(c) + + if err := e.Encode(*t); err != nil { + return err + } + + return ioutil.WriteFile(path, w.Bytes(), 0644) +} + +// DefaultConfig returns the default configuration for crio. +func DefaultConfig() *Config { + return &Config{ + RootConfig: RootConfig{ + Root: crioRoot, + RunRoot: crioRunRoot, + LogDir: "/var/log/crio/pods", + FileLocking: true, + }, + RuntimeConfig: RuntimeConfig{ + Runtime: "/usr/bin/runc", + RuntimeUntrustedWorkload: "", + DefaultWorkloadTrust: "trusted", + + Conmon: conmonPath, + ConmonEnv: []string{ + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + }, + SELinux: selinux.GetEnabled(), + SeccompProfile: seccompProfilePath, + ApparmorProfile: apparmorProfileName, + CgroupManager: cgroupManager, + PidsLimit: DefaultPidsLimit, + ContainerExitsDir: containerExitsDir, + HooksDirPath: DefaultHooksDirPath, + LogSizeMax: DefaultLogSizeMax, + }, + ImageConfig: ImageConfig{ + DefaultTransport: defaultTransport, + PauseImage: pauseImage, + PauseCommand: pauseCommand, + SignaturePolicyPath: "", + ImageVolumes: ImageVolumesMkdir, + }, + NetworkConfig: NetworkConfig{ + NetworkDir: cniConfigDir, + PluginDir: cniBinDir, + }, + } +} diff --git a/libkpod/config_test.go b/libkpod/config_test.go new file mode 100644 index 000000000..e6820d3c0 --- /dev/null +++ b/libkpod/config_test.go @@ -0,0 +1,54 @@ +package libkpod + +import ( + "io/ioutil" + "os" + "testing" +) + +// TestConfigToFile ensures Config.ToFile(..) encodes and writes out +// a Config instance toa a file on disk. +func TestConfigToFile(t *testing.T) { + // Test with a default configuration + c := DefaultConfig() + tmpfile, err := ioutil.TempFile("", "config") + if err != nil { + t.Fatalf("Unable to create temporary file: %+v", err) + } + // Clean up temporary file + defer os.Remove(tmpfile.Name()) + + // Make the ToFile calls + err = c.ToFile(tmpfile.Name()) + // Make sure no errors occurred while populating the file + if err != nil { + t.Fatalf("Unable to write to temporary file: %+v", err) + } + + // Make sure the file is on disk + if _, err := os.Stat(tmpfile.Name()); os.IsNotExist(err) { + t.Fatalf("The config file was not written to disk: %+v", err) + } +} + +// TestConfigUpdateFromFile ensures Config.UpdateFromFile(..) properly +// updates an already create Config instancec with new data. +func TestConfigUpdateFromFile(t *testing.T) { + // Test with a default configuration + c := DefaultConfig() + // Make the ToFile calls + err := c.UpdateFromFile("testdata/config.toml") + // Make sure no errors occurred while populating from the file + if err != nil { + t.Fatalf("Unable update config from file: %+v", err) + } + + // Check fields that should have changed after UpdateFromFile + if c.Storage != "overlay2" { + t.Fatalf("Update failed. Storage did not change to overlay2") + } + + if c.RuntimeConfig.PidsLimit != 2048 { + t.Fatalf("Update failed. RuntimeConfig.PidsLimit did not change to 2048") + } +} diff --git a/libkpod/container.go b/libkpod/container.go new file mode 100644 index 000000000..7835952d2 --- /dev/null +++ b/libkpod/container.go @@ -0,0 +1,157 @@ +package libkpod + +import ( + "fmt" + + cstorage "github.com/containers/storage" + "github.com/kubernetes-incubator/cri-o/libkpod/sandbox" + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/kubernetes-incubator/cri-o/pkg/registrar" + "github.com/pkg/errors" +) + +// GetStorageContainer searches for a container with the given name or ID in the given store +func (c *ContainerServer) GetStorageContainer(container string) (*cstorage.Container, error) { + ociCtr, err := c.LookupContainer(container) + if err != nil { + return nil, err + } + return c.store.Container(ociCtr.ID()) +} + +// GetContainerTopLayerID gets the ID of the top layer of the given container +func (c *ContainerServer) GetContainerTopLayerID(containerID string) (string, error) { + ctr, err := c.GetStorageContainer(containerID) + if err != nil { + return "", err + } + return ctr.LayerID, nil +} + +// GetContainerRwSize Gets the size of the mutable top layer of the container +func (c *ContainerServer) GetContainerRwSize(containerID string) (int64, error) { + container, err := c.store.Container(containerID) + if err != nil { + return 0, err + } + + // Get the size of the top layer by calculating the size of the diff + // between the layer and its parent. The top layer of a container is + // the only RW layer, all others are immutable + layer, err := c.store.Layer(container.LayerID) + if err != nil { + return 0, err + } + return c.store.DiffSize(layer.Parent, layer.ID) +} + +// GetContainerRootFsSize gets the size of the container's root filesystem +// A container FS is split into two parts. The first is the top layer, a +// mutable layer, and the rest is the RootFS: the set of immutable layers +// that make up the image on which the container is based +func (c *ContainerServer) GetContainerRootFsSize(containerID string) (int64, error) { + container, err := c.store.Container(containerID) + if err != nil { + return 0, err + } + + // Ignore the size of the top layer. The top layer is a mutable RW layer + // and is not considered a part of the rootfs + rwLayer, err := c.store.Layer(container.LayerID) + if err != nil { + return 0, err + } + layer, err := c.store.Layer(rwLayer.Parent) + if err != nil { + return 0, err + } + + size := int64(0) + for layer.Parent != "" { + layerSize, err := c.store.DiffSize(layer.Parent, layer.ID) + if err != nil { + return size, errors.Wrapf(err, "getting diffsize of layer %q and its parent %q", layer.ID, layer.Parent) + } + size += layerSize + layer, err = c.store.Layer(layer.Parent) + if err != nil { + return 0, err + } + } + // Get the size of the last layer. Has to be outside of the loop + // because the parent of the last layer is "", andlstore.Get("") + // will return an error + layerSize, err := c.store.DiffSize(layer.Parent, layer.ID) + return size + layerSize, err +} + +// GetContainerFromRequest gets an oci container matching the specified full or partial id +func (c *ContainerServer) GetContainerFromRequest(cid string) (*oci.Container, error) { + if cid == "" { + return nil, fmt.Errorf("container ID should not be empty") + } + + containerID, err := c.ctrIDIndex.Get(cid) + if err != nil { + return nil, fmt.Errorf("container with ID starting with %s not found: %v", cid, err) + } + + ctr := c.GetContainer(containerID) + if ctr == nil { + return nil, fmt.Errorf("specified container not found: %s", containerID) + } + return ctr, nil +} + +func (c *ContainerServer) getSandboxFromRequest(pid string) (*sandbox.Sandbox, error) { + if pid == "" { + return nil, fmt.Errorf("pod ID should not be empty") + } + + podID, err := c.podIDIndex.Get(pid) + if err != nil { + return nil, fmt.Errorf("pod with ID starting with %s not found: %v", pid, err) + } + + sb := c.GetSandbox(podID) + if sb == nil { + return nil, fmt.Errorf("specified pod not found: %s", podID) + } + return sb, nil +} + +// LookupContainer returns the container with the given name or full or partial id +func (c *ContainerServer) LookupContainer(idOrName string) (*oci.Container, error) { + if idOrName == "" { + return nil, fmt.Errorf("container ID or name should not be empty") + } + + ctrID, err := c.ctrNameIndex.Get(idOrName) + if err != nil { + if err == registrar.ErrNameNotReserved { + ctrID = idOrName + } else { + return nil, err + } + } + + return c.GetContainerFromRequest(ctrID) +} + +// LookupSandbox returns the pod sandbox with the given name or full or partial id +func (c *ContainerServer) LookupSandbox(idOrName string) (*sandbox.Sandbox, error) { + if idOrName == "" { + return nil, fmt.Errorf("container ID or name should not be empty") + } + + podID, err := c.podNameIndex.Get(idOrName) + if err != nil { + if err == registrar.ErrNameNotReserved { + podID = idOrName + } else { + return nil, err + } + } + + return c.getSandboxFromRequest(podID) +} diff --git a/libkpod/container_data.go b/libkpod/container_data.go new file mode 100644 index 000000000..2ade63ba0 --- /dev/null +++ b/libkpod/container_data.go @@ -0,0 +1,210 @@ +package libkpod + +import ( + "encoding/json" + "os" + "time" + + "k8s.io/apimachinery/pkg/fields" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" + + "github.com/kubernetes-incubator/cri-o/libpod/driver" + "github.com/kubernetes-incubator/cri-o/libpod/images" + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/opencontainers/image-spec/specs-go/v1" + specs "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" +) + +// ContainerData handles the data used when inspecting a container +type ContainerData struct { + ID string + Name string + LogPath string + Labels fields.Set + Annotations fields.Set + State *ContainerState + Metadata *pb.ContainerMetadata + BundlePath string + StopSignal string + FromImage string `json:"Image,omitempty"` + FromImageID string `json:"ImageID"` + MountPoint string `json:"Mountpoint,omitempty"` + MountLabel string + Mounts []specs.Mount + AppArmorProfile string + ImageAnnotations map[string]string `json:"Annotations,omitempty"` + ImageCreatedBy string `json:"CreatedBy,omitempty"` + Config v1.ImageConfig `json:"Config,omitempty"` + SizeRw uint `json:"SizeRw,omitempty"` + SizeRootFs uint `json:"SizeRootFs,omitempty"` + Args []string + ResolvConfPath string + HostnamePath string + HostsPath string + GraphDriver driverData +} + +type driverData struct { + Name string + Data map[string]string +} + +// ContainerState represents the status of a container. +type ContainerState struct { + specs.State + Created time.Time `json:"created"` + Started time.Time `json:"started,omitempty"` + Finished time.Time `json:"finished,omitempty"` + ExitCode int32 `json:"exitCode"` + OOMKilled bool `json:"oomKilled,omitempty"` + Error string `json:"error,omitempty"` +} + +// GetContainerData gets the ContainerData for a container with the given name in the given store. +// If size is set to true, it will also determine the size of the container +func (c *ContainerServer) GetContainerData(name string, size bool) (*ContainerData, error) { + ctr, err := c.inspectContainer(name) + if err != nil { + return nil, errors.Wrapf(err, "error reading build container %q", name) + } + container, err := c.store.Container(name) + if err != nil { + return nil, errors.Wrapf(err, "error reading container data") + } + + // The runtime configuration won't exist if the container has never been started by cri-o or kpod, + // so treat a not-exist error as non-fatal. + m := getBlankSpec() + config, err := c.store.FromContainerDirectory(ctr.ID(), "config.json") + if err != nil && !os.IsNotExist(errors.Cause(err)) { + return nil, err + } + if len(config) > 0 { + if err = json.Unmarshal(config, &m); err != nil { + return nil, err + } + } + + if container.ImageID == "" { + return nil, errors.Errorf("error reading container image data: container is not based on an image") + } + imageData, err := images.GetData(c.store, container.ImageID) + if err != nil { + return nil, errors.Wrapf(err, "error reading container image data") + } + + driverName, err := driver.GetDriverName(c.store) + if err != nil { + return nil, err + } + topLayer, err := c.GetContainerTopLayerID(ctr.ID()) + if err != nil { + return nil, err + } + layer, err := c.store.Layer(topLayer) + if err != nil { + return nil, err + } + driverMetadata, err := driver.GetDriverMetadata(c.store, topLayer) + if err != nil { + return nil, err + } + imageName := "" + if len(imageData.Tags) > 0 { + imageName = imageData.Tags[0] + } else if len(imageData.Digests) > 0 { + imageName = imageData.Digests[0] + } + data := &ContainerData{ + ID: ctr.ID(), + Name: ctr.Name(), + LogPath: ctr.LogPath(), + Labels: ctr.Labels(), + Annotations: ctr.Annotations(), + State: c.State(ctr), + Metadata: ctr.Metadata(), + BundlePath: ctr.BundlePath(), + StopSignal: ctr.GetStopSignal(), + Args: m.Process.Args, + FromImage: imageName, + FromImageID: container.ImageID, + MountPoint: layer.MountPoint, + ImageAnnotations: imageData.Annotations, + ImageCreatedBy: imageData.CreatedBy, + Config: imageData.Config, + GraphDriver: driverData{ + Name: driverName, + Data: driverMetadata, + }, + MountLabel: m.Linux.MountLabel, + Mounts: m.Mounts, + AppArmorProfile: m.Process.ApparmorProfile, + ResolvConfPath: "", + HostnamePath: "", + HostsPath: "", + } + + if size { + sizeRootFs, err := c.GetContainerRootFsSize(data.ID) + if err != nil { + + return nil, errors.Wrapf(err, "error reading size for container %q", name) + } + data.SizeRootFs = uint(sizeRootFs) + sizeRw, err := c.GetContainerRwSize(data.ID) + if err != nil { + return nil, errors.Wrapf(err, "error reading RWSize for container %q", name) + } + data.SizeRw = uint(sizeRw) + } + + return data, nil +} + +// Get an oci.Container and update its status +func (c *ContainerServer) inspectContainer(container string) (*oci.Container, error) { + ociCtr, err := c.LookupContainer(container) + if err != nil { + return nil, err + } + // call runtime.UpdateStatus() + err = c.Runtime().UpdateStatus(ociCtr) + if err != nil { + return nil, err + } + return ociCtr, nil +} + +func getBlankSpec() specs.Spec { + return specs.Spec{ + Process: &specs.Process{}, + Root: &specs.Root{}, + Mounts: []specs.Mount{}, + Hooks: &specs.Hooks{}, + Annotations: make(map[string]string), + Linux: &specs.Linux{}, + Solaris: &specs.Solaris{}, + Windows: &specs.Windows{}, + } +} + +// State copies the crio container state to ContainerState type for kpod +func (c *ContainerServer) State(ctr *oci.Container) *ContainerState { + crioState := ctr.State() + specState := specs.State{ + Version: crioState.Version, + ID: crioState.ID, + Status: crioState.Status, + Pid: crioState.Pid, + Bundle: crioState.Bundle, + Annotations: crioState.Annotations, + } + cState := &ContainerState{ + Started: crioState.Started, + Created: crioState.Created, + Finished: crioState.Finished, + } + cState.State = specState + return cState +} diff --git a/libkpod/container_server.go b/libkpod/container_server.go new file mode 100644 index 000000000..8da465fdf --- /dev/null +++ b/libkpod/container_server.go @@ -0,0 +1,775 @@ +package libkpod + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "sync" + "time" + + "github.com/containers/image/types" + cstorage "github.com/containers/storage" + "github.com/docker/docker/pkg/ioutils" + "github.com/docker/docker/pkg/truncindex" + "github.com/kubernetes-incubator/cri-o/libkpod/sandbox" + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/kubernetes-incubator/cri-o/pkg/annotations" + "github.com/kubernetes-incubator/cri-o/pkg/registrar" + "github.com/kubernetes-incubator/cri-o/pkg/storage" + "github.com/opencontainers/runc/libcontainer" + rspec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/selinux/go-selinux" + "github.com/opencontainers/selinux/go-selinux/label" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" +) + +// ContainerServer implements the ImageServer +type ContainerServer struct { + runtime *oci.Runtime + store cstorage.Store + storageImageServer storage.ImageServer + storageRuntimeServer storage.RuntimeServer + updateLock sync.RWMutex + ctrNameIndex *registrar.Registrar + ctrIDIndex *truncindex.TruncIndex + podNameIndex *registrar.Registrar + podIDIndex *truncindex.TruncIndex + hooks map[string]HookParams + + imageContext *types.SystemContext + stateLock sync.Locker + state *containerServerState + config *Config +} + +// Runtime returns the oci runtime for the ContainerServer +func (c *ContainerServer) Runtime() *oci.Runtime { + return c.runtime +} + +// Hooks returns the oci hooks for the ContainerServer +func (c *ContainerServer) Hooks() map[string]HookParams { + return c.hooks +} + +// Store returns the Store for the ContainerServer +func (c *ContainerServer) Store() cstorage.Store { + return c.store +} + +// StorageImageServer returns the ImageServer for the ContainerServer +func (c *ContainerServer) StorageImageServer() storage.ImageServer { + return c.storageImageServer +} + +// CtrNameIndex returns the Registrar for the ContainerServer +func (c *ContainerServer) CtrNameIndex() *registrar.Registrar { + return c.ctrNameIndex +} + +// CtrIDIndex returns the TruncIndex for the ContainerServer +func (c *ContainerServer) CtrIDIndex() *truncindex.TruncIndex { + return c.ctrIDIndex +} + +// PodNameIndex returns the index of pod names +func (c *ContainerServer) PodNameIndex() *registrar.Registrar { + return c.podNameIndex +} + +// PodIDIndex returns the index of pod IDs +func (c *ContainerServer) PodIDIndex() *truncindex.TruncIndex { + return c.podIDIndex +} + +// ImageContext returns the SystemContext for the ContainerServer +func (c *ContainerServer) ImageContext() *types.SystemContext { + return c.imageContext +} + +// Config gets the configuration for the ContainerServer +func (c *ContainerServer) Config() *Config { + return c.config +} + +// StorageRuntimeServer gets the runtime server for the ContainerServer +func (c *ContainerServer) StorageRuntimeServer() storage.RuntimeServer { + return c.storageRuntimeServer +} + +// New creates a new ContainerServer with options provided +func New(config *Config) (*ContainerServer, error) { + store, err := cstorage.GetStore(cstorage.StoreOptions{ + RunRoot: config.RunRoot, + GraphRoot: config.Root, + GraphDriverName: config.Storage, + GraphDriverOptions: config.StorageOptions, + }) + if err != nil { + return nil, err + } + + imageService, err := storage.GetImageService(store, config.DefaultTransport, config.InsecureRegistries, config.Registries) + if err != nil { + return nil, err + } + + storageRuntimeService := storage.GetRuntimeService(imageService, config.PauseImage) + if err != nil { + return nil, err + } + + runtime, err := oci.New(config.Runtime, config.RuntimeUntrustedWorkload, config.DefaultWorkloadTrust, config.Conmon, config.ConmonEnv, config.CgroupManager, config.ContainerExitsDir, config.LogSizeMax, config.NoPivot) + if err != nil { + return nil, err + } + + var lock sync.Locker + if config.FileLocking { + fileLock, err := cstorage.GetLockfile(lockPath) + if err != nil { + return nil, fmt.Errorf("error obtaining lockfile: %v", err) + } + lock = fileLock + } else { + lock = new(sync.Mutex) + } + + hooks := make(map[string]HookParams) + // If hooks directory is set in config use it + if config.HooksDirPath != "" { + if err := readHooks(config.HooksDirPath, hooks); err != nil { + return nil, err + } + // If user overrode default hooks, this means it is in a test, so don't + // use OverrideHooksDirPath + if config.HooksDirPath == DefaultHooksDirPath { + if err := readHooks(OverrideHooksDirPath, hooks); err != nil { + return nil, err + } + } + } + + return &ContainerServer{ + runtime: runtime, + store: store, + storageImageServer: imageService, + storageRuntimeServer: storageRuntimeService, + ctrNameIndex: registrar.NewRegistrar(), + ctrIDIndex: truncindex.NewTruncIndex([]string{}), + podNameIndex: registrar.NewRegistrar(), + podIDIndex: truncindex.NewTruncIndex([]string{}), + imageContext: &types.SystemContext{SignaturePolicyPath: config.SignaturePolicyPath}, + hooks: hooks, + stateLock: lock, + state: &containerServerState{ + containers: oci.NewMemoryStore(), + infraContainers: oci.NewMemoryStore(), + sandboxes: make(map[string]*sandbox.Sandbox), + processLevels: make(map[string]int), + }, + config: config, + }, nil +} + +// Update makes changes to the server's state (lists of pods and containers) to +// reflect the list of pods and containers that are stored on disk, possibly +// having been modified by other parties +func (c *ContainerServer) Update() error { + c.updateLock.Lock() + defer c.updateLock.Unlock() + + containers, err := c.store.Containers() + if err != nil && !os.IsNotExist(errors.Cause(err)) { + logrus.Warnf("could not read containers and sandboxes: %v", err) + return err + } + newPods := map[string]*storage.RuntimeContainerMetadata{} + oldPods := map[string]string{} + removedPods := map[string]string{} + newPodContainers := map[string]*storage.RuntimeContainerMetadata{} + oldPodContainers := map[string]string{} + removedPodContainers := map[string]string{} + for _, container := range containers { + if c.HasSandbox(container.ID) { + // FIXME: do we need to reload/update any info about the sandbox? + oldPods[container.ID] = container.ID + oldPodContainers[container.ID] = container.ID + continue + } + if c.GetContainer(container.ID) != nil { + // FIXME: do we need to reload/update any info about the container? + oldPodContainers[container.ID] = container.ID + continue + } + // not previously known, so figure out what it is + metadata, err2 := c.storageRuntimeServer.GetContainerMetadata(container.ID) + if err2 != nil { + logrus.Errorf("error parsing metadata for %s: %v, ignoring", container.ID, err2) + continue + } + if metadata.Pod { + newPods[container.ID] = &metadata + } else { + newPodContainers[container.ID] = &metadata + } + } + c.ctrIDIndex.Iterate(func(id string) { + if _, ok := oldPodContainers[id]; !ok { + // this container's ID wasn't in the updated list -> removed + removedPodContainers[id] = id + } else { + ctr := c.GetContainer(id) + if ctr != nil { + // if the container exists, update its state + c.ContainerStateFromDisk(c.GetContainer(id)) + } + } + }) + for removedPodContainer := range removedPodContainers { + // forget this container + ctr := c.GetContainer(removedPodContainer) + if ctr == nil { + logrus.Warnf("bad state when getting container removed %+v", removedPodContainer) + continue + } + c.ReleaseContainerName(ctr.Name()) + c.RemoveContainer(ctr) + if err = c.ctrIDIndex.Delete(ctr.ID()); err != nil { + return err + } + logrus.Debugf("forgetting removed pod container %s", ctr.ID()) + } + c.PodIDIndex().Iterate(func(id string) { + if _, ok := oldPods[id]; !ok { + // this pod's ID wasn't in the updated list -> removed + removedPods[id] = id + } + }) + for removedPod := range removedPods { + // forget this pod + sb := c.GetSandbox(removedPod) + if sb == nil { + logrus.Warnf("bad state when getting pod to remove %+v", removedPod) + continue + } + podInfraContainer := sb.InfraContainer() + c.ReleaseContainerName(podInfraContainer.Name()) + c.RemoveContainer(podInfraContainer) + if err = c.ctrIDIndex.Delete(podInfraContainer.ID()); err != nil { + return err + } + sb.RemoveInfraContainer() + c.ReleasePodName(sb.Name()) + c.RemoveSandbox(sb.ID()) + if err = c.podIDIndex.Delete(sb.ID()); err != nil { + return err + } + logrus.Debugf("forgetting removed pod %s", sb.ID()) + } + for sandboxID := range newPods { + // load this pod + if err = c.LoadSandbox(sandboxID); err != nil { + logrus.Warnf("could not load new pod sandbox %s: %v, ignoring", sandboxID, err) + } else { + logrus.Debugf("loaded new pod sandbox %s", sandboxID, err) + } + } + for containerID := range newPodContainers { + // load this container + if err = c.LoadContainer(containerID); err != nil { + logrus.Warnf("could not load new sandbox container %s: %v, ignoring", containerID, err) + } else { + logrus.Debugf("loaded new pod container %s", containerID, err) + } + } + return nil +} + +// LoadSandbox loads a sandbox from the disk into the sandbox store +func (c *ContainerServer) LoadSandbox(id string) error { + config, err := c.store.FromContainerDirectory(id, "config.json") + if err != nil { + return err + } + var m rspec.Spec + if err = json.Unmarshal(config, &m); err != nil { + return err + } + labels := make(map[string]string) + if err = json.Unmarshal([]byte(m.Annotations[annotations.Labels]), &labels); err != nil { + return err + } + name := m.Annotations[annotations.Name] + name, err = c.ReservePodName(id, name) + if err != nil { + return err + } + defer func() { + if err != nil { + c.ReleasePodName(name) + } + }() + var metadata pb.PodSandboxMetadata + if err = json.Unmarshal([]byte(m.Annotations[annotations.Metadata]), &metadata); err != nil { + return err + } + + ip := m.Annotations[annotations.IP] + + processLabel, mountLabel, err := label.InitLabels(label.DupSecOpt(m.Process.SelinuxLabel)) + if err != nil { + return err + } + + kubeAnnotations := make(map[string]string) + if err = json.Unmarshal([]byte(m.Annotations[annotations.Annotations]), &kubeAnnotations); err != nil { + return err + } + + privileged := isTrue(m.Annotations[annotations.PrivilegedRuntime]) + trusted := isTrue(m.Annotations[annotations.TrustedSandbox]) + + sb, err := sandbox.New(id, name, m.Annotations[annotations.KubeName], filepath.Dir(m.Annotations[annotations.LogPath]), "", labels, kubeAnnotations, processLabel, mountLabel, &metadata, m.Annotations[annotations.ShmPath], "", privileged, trusted, m.Annotations[annotations.ResolvPath], "", nil) + if err != nil { + return err + } + sb.AddHostnamePath(m.Annotations[annotations.HostnamePath]) + sb.AddIP(ip) + + // We add a netNS only if we can load a permanent one. + // Otherwise, the sandbox will live in the host namespace. + netNsPath, err := configNetNsPath(m) + if err == nil { + nsErr := sb.NetNsJoin(netNsPath, sb.Name()) + // If we can't load the networking namespace + // because it's closed, we just set the sb netns + // pointer to nil. Otherwise we return an error. + if nsErr != nil && nsErr != sandbox.ErrClosedNetNS { + return nsErr + } + } + + c.AddSandbox(sb) + + defer func() { + if err != nil { + c.RemoveSandbox(sb.ID()) + } + }() + + sandboxPath, err := c.store.ContainerRunDirectory(id) + if err != nil { + return err + } + + sandboxDir, err := c.store.ContainerDirectory(id) + if err != nil { + return err + } + + cname, err := c.ReserveContainerName(m.Annotations[annotations.ContainerID], m.Annotations[annotations.ContainerName]) + if err != nil { + return err + } + defer func() { + if err != nil { + c.ReleaseContainerName(cname) + } + }() + + created, err := time.Parse(time.RFC3339Nano, m.Annotations[annotations.Created]) + if err != nil { + return err + } + + scontainer, err := oci.NewContainer(m.Annotations[annotations.ContainerID], cname, sandboxPath, m.Annotations[annotations.LogPath], sb.NetNs(), labels, m.Annotations, kubeAnnotations, "", "", "", nil, id, false, false, false, privileged, trusted, sandboxDir, created, m.Annotations["org.opencontainers.image.stopSignal"]) + if err != nil { + return err + } + scontainer.SetSpec(&m) + scontainer.SetMountPoint(m.Annotations[annotations.MountPoint]) + + if m.Annotations[annotations.Volumes] != "" { + containerVolumes := []oci.ContainerVolume{} + if err = json.Unmarshal([]byte(m.Annotations[annotations.Volumes]), &containerVolumes); err != nil { + return fmt.Errorf("failed to unmarshal container volumes: %v", err) + } + if containerVolumes != nil { + for _, cv := range containerVolumes { + scontainer.AddVolume(cv) + } + } + } + + c.ContainerStateFromDisk(scontainer) + + if err = label.ReserveLabel(processLabel); err != nil { + return err + } + sb.SetInfraContainer(scontainer) + if err = c.ctrIDIndex.Add(scontainer.ID()); err != nil { + return err + } + if err = c.podIDIndex.Add(id); err != nil { + return err + } + return nil +} + +func configNetNsPath(spec rspec.Spec) (string, error) { + for _, ns := range spec.Linux.Namespaces { + if ns.Type != rspec.NetworkNamespace { + continue + } + + if ns.Path == "" { + return "", fmt.Errorf("empty networking namespace") + } + + return ns.Path, nil + } + + return "", fmt.Errorf("missing networking namespace") +} + +// LoadContainer loads a container from the disk into the container store +func (c *ContainerServer) LoadContainer(id string) error { + config, err := c.store.FromContainerDirectory(id, "config.json") + if err != nil { + return err + } + var m rspec.Spec + if err = json.Unmarshal(config, &m); err != nil { + return err + } + labels := make(map[string]string) + if err = json.Unmarshal([]byte(m.Annotations[annotations.Labels]), &labels); err != nil { + return err + } + name := m.Annotations[annotations.Name] + name, err = c.ReserveContainerName(id, name) + if err != nil { + return err + } + + defer func() { + if err != nil { + c.ReleaseContainerName(name) + } + }() + + var metadata pb.ContainerMetadata + if err = json.Unmarshal([]byte(m.Annotations[annotations.Metadata]), &metadata); err != nil { + return err + } + sb := c.GetSandbox(m.Annotations[annotations.SandboxID]) + if sb == nil { + return fmt.Errorf("could not get sandbox with id %s, skipping", m.Annotations[annotations.SandboxID]) + } + + tty := isTrue(m.Annotations[annotations.TTY]) + stdin := isTrue(m.Annotations[annotations.Stdin]) + stdinOnce := isTrue(m.Annotations[annotations.StdinOnce]) + + containerPath, err := c.store.ContainerRunDirectory(id) + if err != nil { + return err + } + + containerDir, err := c.store.ContainerDirectory(id) + if err != nil { + return err + } + + img, ok := m.Annotations[annotations.Image] + if !ok { + img = "" + } + + imgName, ok := m.Annotations[annotations.ImageName] + if !ok { + imgName = "" + } + + imgRef, ok := m.Annotations[annotations.ImageRef] + if !ok { + imgRef = "" + } + + kubeAnnotations := make(map[string]string) + if err = json.Unmarshal([]byte(m.Annotations[annotations.Annotations]), &kubeAnnotations); err != nil { + return err + } + + created, err := time.Parse(time.RFC3339Nano, m.Annotations[annotations.Created]) + if err != nil { + return err + } + + ctr, err := oci.NewContainer(id, name, containerPath, m.Annotations[annotations.LogPath], sb.NetNs(), labels, m.Annotations, kubeAnnotations, img, imgName, imgRef, &metadata, sb.ID(), tty, stdin, stdinOnce, sb.Privileged(), sb.Trusted(), containerDir, created, m.Annotations["org.opencontainers.image.stopSignal"]) + if err != nil { + return err + } + ctr.SetSpec(&m) + ctr.SetMountPoint(m.Annotations[annotations.MountPoint]) + + c.ContainerStateFromDisk(ctr) + + c.AddContainer(ctr) + return c.ctrIDIndex.Add(id) +} + +func isTrue(annotaton string) bool { + return annotaton == "true" +} + +// ContainerStateFromDisk retrieves information on the state of a running container +// from the disk +func (c *ContainerServer) ContainerStateFromDisk(ctr *oci.Container) error { + if err := ctr.FromDisk(); err != nil { + return err + } + // ignore errors, this is a best effort to have up-to-date info about + // a given container before its state gets stored + c.runtime.UpdateStatus(ctr) + + return nil +} + +// ContainerStateToDisk writes the container's state information to a JSON file +// on disk +func (c *ContainerServer) ContainerStateToDisk(ctr *oci.Container) error { + // ignore errors, this is a best effort to have up-to-date info about + // a given container before its state gets stored + c.Runtime().UpdateStatus(ctr) + + jsonSource, err := ioutils.NewAtomicFileWriter(ctr.StatePath(), 0644) + if err != nil { + return err + } + defer jsonSource.Close() + enc := json.NewEncoder(jsonSource) + return enc.Encode(c.runtime.ContainerStatus(ctr)) +} + +// ReserveContainerName holds a name for a container that is being created +func (c *ContainerServer) ReserveContainerName(id, name string) (string, error) { + if err := c.ctrNameIndex.Reserve(name, id); err != nil { + if err == registrar.ErrNameReserved { + id, err := c.ctrNameIndex.Get(name) + if err != nil { + logrus.Warnf("conflict, ctr name %q already reserved", name) + return "", err + } + return "", fmt.Errorf("conflict, name %q already reserved for ctr %q", name, id) + } + return "", fmt.Errorf("error reserving ctr name %s", name) + } + return name, nil +} + +// ReleaseContainerName releases a container name from the index so that it can +// be used by other containers +func (c *ContainerServer) ReleaseContainerName(name string) { + c.ctrNameIndex.Release(name) +} + +// ReservePodName holds a name for a pod that is being created +func (c *ContainerServer) ReservePodName(id, name string) (string, error) { + if err := c.podNameIndex.Reserve(name, id); err != nil { + if err == registrar.ErrNameReserved { + id, err := c.podNameIndex.Get(name) + if err != nil { + logrus.Warnf("conflict, pod name %q already reserved", name) + return "", err + } + return "", fmt.Errorf("conflict, name %q already reserved for pod %q", name, id) + } + return "", fmt.Errorf("error reserving pod name %q", name) + } + return name, nil +} + +// ReleasePodName releases a pod name from the index so it can be used by other +// pods +func (c *ContainerServer) ReleasePodName(name string) { + c.podNameIndex.Release(name) +} + +// Shutdown attempts to shut down the server's storage cleanly +func (c *ContainerServer) Shutdown() error { + _, err := c.store.Shutdown(false) + if err != nil && errors.Cause(err) != cstorage.ErrLayerUsedByContainer { + return err + } + return nil +} + +type containerServerState struct { + containers oci.ContainerStorer + infraContainers oci.ContainerStorer + sandboxes map[string]*sandbox.Sandbox + // processLevels The number of sandboxes using the same SELinux MCS level. Need to release MCS Level, when count reaches 0 + processLevels map[string]int +} + +// AddContainer adds a container to the container state store +func (c *ContainerServer) AddContainer(ctr *oci.Container) { + c.stateLock.Lock() + defer c.stateLock.Unlock() + sandbox := c.state.sandboxes[ctr.Sandbox()] + sandbox.AddContainer(ctr) + c.state.containers.Add(ctr.ID(), ctr) +} + +// AddInfraContainer adds a container to the container state store +func (c *ContainerServer) AddInfraContainer(ctr *oci.Container) { + c.stateLock.Lock() + defer c.stateLock.Unlock() + c.state.infraContainers.Add(ctr.ID(), ctr) +} + +// GetContainer returns a container by its ID +func (c *ContainerServer) GetContainer(id string) *oci.Container { + c.stateLock.Lock() + defer c.stateLock.Unlock() + return c.state.containers.Get(id) +} + +// GetInfraContainer returns a container by its ID +func (c *ContainerServer) GetInfraContainer(id string) *oci.Container { + c.stateLock.Lock() + defer c.stateLock.Unlock() + return c.state.infraContainers.Get(id) +} + +// HasContainer checks if a container exists in the state +func (c *ContainerServer) HasContainer(id string) bool { + c.stateLock.Lock() + defer c.stateLock.Unlock() + ctr := c.state.containers.Get(id) + return ctr != nil +} + +// RemoveContainer removes a container from the container state store +func (c *ContainerServer) RemoveContainer(ctr *oci.Container) { + c.stateLock.Lock() + defer c.stateLock.Unlock() + sbID := ctr.Sandbox() + sb := c.state.sandboxes[sbID] + sb.RemoveContainer(ctr) + c.state.containers.Delete(ctr.ID()) +} + +// RemoveInfraContainer removes a container from the container state store +func (c *ContainerServer) RemoveInfraContainer(ctr *oci.Container) { + c.stateLock.Lock() + defer c.stateLock.Unlock() + c.state.infraContainers.Delete(ctr.ID()) +} + +// listContainers returns a list of all containers stored by the server state +func (c *ContainerServer) listContainers() []*oci.Container { + c.stateLock.Lock() + defer c.stateLock.Unlock() + return c.state.containers.List() +} + +// ListContainers returns a list of all containers stored by the server state +// that match the given filter function +func (c *ContainerServer) ListContainers(filters ...func(*oci.Container) bool) ([]*oci.Container, error) { + containers := c.listContainers() + if len(filters) == 0 { + return containers, nil + } + filteredContainers := make([]*oci.Container, 0, len(containers)) + for _, container := range containers { + for _, filter := range filters { + if filter(container) { + filteredContainers = append(filteredContainers, container) + } + } + } + return filteredContainers, nil +} + +// AddSandbox adds a sandbox to the sandbox state store +func (c *ContainerServer) AddSandbox(sb *sandbox.Sandbox) { + c.stateLock.Lock() + defer c.stateLock.Unlock() + c.state.sandboxes[sb.ID()] = sb + c.state.processLevels[selinux.NewContext(sb.ProcessLabel())["level"]]++ +} + +// GetSandbox returns a sandbox by its ID +func (c *ContainerServer) GetSandbox(id string) *sandbox.Sandbox { + c.stateLock.Lock() + defer c.stateLock.Unlock() + return c.state.sandboxes[id] +} + +// GetSandboxContainer returns a sandbox's infra container +func (c *ContainerServer) GetSandboxContainer(id string) *oci.Container { + c.stateLock.Lock() + defer c.stateLock.Unlock() + sb, ok := c.state.sandboxes[id] + if !ok { + return nil + } + return sb.InfraContainer() +} + +// HasSandbox checks if a sandbox exists in the state +func (c *ContainerServer) HasSandbox(id string) bool { + c.stateLock.Lock() + defer c.stateLock.Unlock() + _, ok := c.state.sandboxes[id] + return ok +} + +// RemoveSandbox removes a sandbox from the state store +func (c *ContainerServer) RemoveSandbox(id string) { + c.stateLock.Lock() + defer c.stateLock.Unlock() + processLabel := c.state.sandboxes[id].ProcessLabel() + delete(c.state.sandboxes, id) + level := selinux.NewContext(processLabel)["level"] + c.state.processLevels[level]-- + if c.state.processLevels[level] == 0 { + label.ReleaseLabel(processLabel) + delete(c.state.processLevels, level) + } +} + +// ListSandboxes lists all sandboxes in the state store +func (c *ContainerServer) ListSandboxes() []*sandbox.Sandbox { + c.stateLock.Lock() + defer c.stateLock.Unlock() + sbArray := make([]*sandbox.Sandbox, 0, len(c.state.sandboxes)) + for _, sb := range c.state.sandboxes { + sbArray = append(sbArray, sb) + } + + return sbArray +} + +// LibcontainerStats gets the stats for the container with the given id from runc/libcontainer +func (c *ContainerServer) LibcontainerStats(ctr *oci.Container) (*libcontainer.Stats, error) { + // TODO: make this not hardcoded + // was: c.runtime.Path(ociContainer) but that returns /usr/bin/runc - how do we get /run/runc? + // runroot is /var/run/runc + // Hardcoding probably breaks ClearContainers compatibility + factory, err := loadFactory("/run/runc") + if err != nil { + return nil, err + } + container, err := factory.Load(ctr.ID()) + if err != nil { + return nil, err + } + return container.Stats() +} diff --git a/libkpod/hooks.go b/libkpod/hooks.go new file mode 100644 index 000000000..f353cdcde --- /dev/null +++ b/libkpod/hooks.go @@ -0,0 +1,98 @@ +package libkpod + +import ( + "encoding/json" + "io/ioutil" + "os" + "path/filepath" + "regexp" + "strings" + "syscall" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +const ( + // DefaultHooksDirPath Default directory containing hooks config files + DefaultHooksDirPath = "/usr/share/containers/oci/hooks.d" + // OverrideHooksDirPath Directory where admin can override the default configuration + OverrideHooksDirPath = "/etc/containers/oci/hooks.d" +) + +// HookParams is the structure returned from read the hooks configuration +type HookParams struct { + Hook string `json:"hook"` + Stage []string `json:"stage"` + Cmds []string `json:"cmd"` + Annotations []string `json:"annotation"` + HasBindMounts bool `json:"hasbindmounts"` +} + +// readHook reads hooks json files, verifies it and returns the json config +func readHook(hookPath string) (HookParams, error) { + var hook HookParams + raw, err := ioutil.ReadFile(hookPath) + if err != nil { + return hook, errors.Wrapf(err, "error Reading hook %q", hookPath) + } + if err := json.Unmarshal(raw, &hook); err != nil { + return hook, errors.Wrapf(err, "error Unmarshalling JSON for %q", hookPath) + } + if _, err := os.Stat(hook.Hook); err != nil { + return hook, errors.Wrapf(err, "unable to stat hook %q in hook config %q", hook.Hook, hookPath) + } + validStage := map[string]bool{"prestart": true, "poststart": true, "poststop": true} + for _, cmd := range hook.Cmds { + if _, err = regexp.Compile(cmd); err != nil { + return hook, errors.Wrapf(err, "invalid cmd regular expression %q defined in hook config %q", cmd, hookPath) + } + } + for _, cmd := range hook.Annotations { + if _, err = regexp.Compile(cmd); err != nil { + return hook, errors.Wrapf(err, "invalid cmd regular expression %q defined in hook config %q", cmd, hookPath) + } + } + for _, stage := range hook.Stage { + if !validStage[stage] { + return hook, errors.Wrapf(err, "unknown stage %q defined in hook config %q", stage, hookPath) + } + } + return hook, nil +} + +// readHooks reads hooks json files in directory to setup OCI Hooks +// adding hooks to the passedin hooks map. +func readHooks(hooksPath string, hooks map[string]HookParams) error { + if _, err := os.Stat(hooksPath); err != nil { + if os.IsNotExist(err) { + logrus.Warnf("hooks path: %q does not exist", hooksPath) + return nil + } + return errors.Wrapf(err, "unable to stat hooks path %q", hooksPath) + } + + files, err := ioutil.ReadDir(hooksPath) + if err != nil { + return err + } + + for _, file := range files { + if !strings.HasSuffix(file.Name(), ".json") { + continue + } + hook, err := readHook(filepath.Join(hooksPath, file.Name())) + if err != nil { + return err + } + for key, h := range hooks { + // hook.Hook can only be defined in one hook file, unless it has the + // same name in the override path. + if hook.Hook == h.Hook && key != file.Name() { + return errors.Wrapf(syscall.EINVAL, "duplicate path, hook %q from %q already defined in %q", hook.Hook, hooksPath, key) + } + } + hooks[file.Name()] = hook + } + return nil +} diff --git a/libkpod/kill.go b/libkpod/kill.go new file mode 100644 index 000000000..b2c3219a2 --- /dev/null +++ b/libkpod/kill.go @@ -0,0 +1,45 @@ +package libkpod + +import ( + "github.com/docker/docker/pkg/signal" + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/kubernetes-incubator/cri-o/utils" + "github.com/pkg/errors" + "os" + "syscall" +) + +// Reverse lookup signal string from its map +func findStringInSignalMap(killSignal syscall.Signal) (string, error) { + for k, v := range signal.SignalMap { + if v == killSignal { + return k, nil + } + } + return "", errors.Errorf("unable to convert signal to string") + +} + +// ContainerKill sends the user provided signal to the containers primary process. +func (c *ContainerServer) ContainerKill(container string, killSignal syscall.Signal) (string, error) { // nolint + ctr, err := c.LookupContainer(container) + if err != nil { + return "", errors.Wrapf(err, "failed to find container %s", container) + } + c.runtime.UpdateStatus(ctr) + cStatus := c.runtime.ContainerStatus(ctr) + + // If the container is not running, error and move on. + if cStatus.Status != oci.ContainerStateRunning { + return "", errors.Errorf("cannot kill container %s: it is not running", container) + } + signalString, err := findStringInSignalMap(killSignal) + if err != nil { + return "", err + } + if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, c.runtime.Path(ctr), "kill", ctr.ID(), signalString); err != nil { + return "", err + } + c.ContainerStateToDisk(ctr) + return ctr.ID(), nil +} diff --git a/libkpod/logs.go b/libkpod/logs.go new file mode 100644 index 000000000..00b0f0167 --- /dev/null +++ b/libkpod/logs.go @@ -0,0 +1,80 @@ +package libkpod + +import ( + "path" + "strings" + "time" + + "github.com/hpcloud/tail" +) + +// LogOptions contains all of the options for displaying logs in kpod +type LogOptions struct { + Details bool + Follow bool + SinceTime time.Time + Tail uint64 +} + +// GetLogs gets each line of a log file and, if it matches the criteria in logOptions, sends it down logChan +func (c *ContainerServer) GetLogs(container string, logChan chan string, opts LogOptions) error { + defer close(logChan) + // Get the full ID of the container + ctr, err := c.LookupContainer(container) + if err != nil { + return err + } + + containerID := ctr.ID() + sandbox := ctr.Sandbox() + if sandbox == "" { + sandbox = containerID + } + // Read the log line by line and pass it into the pipe + logsFile := path.Join(c.config.LogDir, sandbox, containerID+".log") + + seekInfo := &tail.SeekInfo{Offset: 0, Whence: 0} + if opts.Tail > 0 { + // seek to correct position in logs files + seekInfo.Offset = int64(opts.Tail) + seekInfo.Whence = 2 + } + + t, err := tail.TailFile(logsFile, tail.Config{Follow: false, ReOpen: false, Location: seekInfo}) + for line := range t.Lines { + if since, err := logSinceTime(opts.SinceTime, line.Text); err != nil || !since { + continue + } + logMessage := line.Text[secondSpaceIndex(line.Text):] + if opts.Details { + // add additional information to line + } + logChan <- logMessage + } + return err +} + +func logSinceTime(sinceTime time.Time, logStr string) (bool, error) { + timestamp := strings.Split(logStr, " ")[0] + logTime, err := time.Parse("2006-01-02T15:04:05.999999999-07:00", timestamp) + if err != nil { + return false, err + } + return logTime.After(sinceTime) || logTime.Equal(sinceTime), nil +} + +// secondSpaceIndex returns the index of the second space in a string +// In a line of the logs, the first two tokens are a timestamp and stdout/stderr, +// followed by the message itself. This allows us to get the index of the message +// and avoid sending the other information back to the caller of GetLogs() +func secondSpaceIndex(line string) int { + index := strings.Index(line, " ") + if index == -1 { + return 0 + } + index = strings.Index(line[index:], " ") + if index == -1 { + return 0 + } + return index +} diff --git a/libkpod/pause.go b/libkpod/pause.go new file mode 100644 index 000000000..29871d32b --- /dev/null +++ b/libkpod/pause.go @@ -0,0 +1,46 @@ +package libkpod + +import ( + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/pkg/errors" +) + +// ContainerPause pauses a running container. +func (c *ContainerServer) ContainerPause(container string) (string, error) { + ctr, err := c.LookupContainer(container) + if err != nil { + return "", errors.Wrapf(err, "failed to find container %s", container) + } + + cStatus := c.runtime.ContainerStatus(ctr) + if cStatus.Status != oci.ContainerStatePaused { + if err := c.runtime.PauseContainer(ctr); err != nil { + return "", errors.Wrapf(err, "failed to pause container %s", ctr.ID()) + } + c.ContainerStateToDisk(ctr) + } else { + return "", errors.Wrapf(err, "container %s is already paused", ctr.ID()) + } + + return ctr.ID(), nil +} + +// ContainerUnpause unpauses a running container with a grace period (i.e., timeout). +func (c *ContainerServer) ContainerUnpause(container string) (string, error) { + ctr, err := c.LookupContainer(container) + if err != nil { + return "", errors.Wrapf(err, "failed to find container %s", container) + } + + cStatus := c.runtime.ContainerStatus(ctr) + if cStatus.Status == oci.ContainerStatePaused { + if err := c.runtime.UnpauseContainer(ctr); err != nil { + return "", errors.Wrapf(err, "failed to unpause container %s", ctr.ID()) + } + c.ContainerStateToDisk(ctr) + } else { + return "", errors.Wrapf(err, "the container %s is not paused", ctr.ID()) + } + + return ctr.ID(), nil +} diff --git a/libkpod/remove.go b/libkpod/remove.go new file mode 100644 index 000000000..5df9e8f74 --- /dev/null +++ b/libkpod/remove.go @@ -0,0 +1,53 @@ +package libkpod + +import ( + "os" + "path/filepath" + + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/pkg/errors" + "golang.org/x/net/context" +) + +// Remove removes a container +func (c *ContainerServer) Remove(ctx context.Context, container string, force bool) (string, error) { + ctr, err := c.LookupContainer(container) + if err != nil { + return "", err + } + ctrID := ctr.ID() + + cStatus := c.runtime.ContainerStatus(ctr) + switch cStatus.Status { + case oci.ContainerStatePaused: + return "", errors.Errorf("cannot remove paused container %s", ctrID) + case oci.ContainerStateCreated, oci.ContainerStateRunning: + if force { + _, err = c.ContainerStop(ctx, container, 10) + if err != nil { + return "", errors.Wrapf(err, "unable to stop container %s", ctrID) + } + } else { + return "", errors.Errorf("cannot remove running container %s", ctrID) + } + } + + if err := c.runtime.DeleteContainer(ctr); err != nil { + return "", errors.Wrapf(err, "failed to delete container %s", ctrID) + } + if err := os.Remove(filepath.Join(c.Config().RuntimeConfig.ContainerExitsDir, ctrID)); err != nil && !os.IsNotExist(err) { + return "", errors.Wrapf(err, "failed to remove container exit file %s", ctrID) + } + c.RemoveContainer(ctr) + + if err := c.storageRuntimeServer.DeleteContainer(ctrID); err != nil { + return "", errors.Wrapf(err, "failed to delete storage for container %s", ctrID) + } + + c.ReleaseContainerName(ctr.Name()) + + if err := c.ctrIDIndex.Delete(ctrID); err != nil { + return "", err + } + return ctrID, nil +} diff --git a/libkpod/rename.go b/libkpod/rename.go new file mode 100644 index 000000000..7c0279bfe --- /dev/null +++ b/libkpod/rename.go @@ -0,0 +1,114 @@ +package libkpod + +import ( + "encoding/json" + "path/filepath" + + "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" + + "github.com/docker/docker/pkg/ioutils" + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/kubernetes-incubator/cri-o/pkg/annotations" + "github.com/opencontainers/runtime-tools/generate" +) + +const configFile = "config.json" + +// ContainerRename renames the given container +func (c *ContainerServer) ContainerRename(container, name string) error { + ctr, err := c.LookupContainer(container) + if err != nil { + return err + } + + oldName := ctr.Name() + _, err = c.ReserveContainerName(ctr.ID(), name) + if err != nil { + return err + } + defer func() { + if err != nil { + c.ReleaseContainerName(name) + } else { + c.ReleaseContainerName(oldName) + } + }() + + // Update state.json + if err = c.updateStateName(ctr, name); err != nil { + return err + } + + // Update config.json + configRuntimePath := filepath.Join(ctr.BundlePath(), configFile) + if err = updateConfigName(configRuntimePath, name); err != nil { + return err + } + configStoragePath := filepath.Join(ctr.Dir(), configFile) + if err = updateConfigName(configStoragePath, name); err != nil { + return err + } + + // Update containers.json + if err = c.store.SetNames(ctr.ID(), []string{name}); err != nil { + return err + } + return nil +} + +func updateConfigName(configPath, name string) error { + specgen, err := generate.NewFromFile(configPath) + if err != nil { + return err + } + specgen.AddAnnotation(annotations.Name, name) + specgen.AddAnnotation(annotations.Metadata, updateMetadata(specgen.Spec().Annotations, name)) + + return specgen.SaveToFile(configPath, generate.ExportOptions{}) +} + +func (c *ContainerServer) updateStateName(ctr *oci.Container, name string) error { + ctr.State().Annotations[annotations.Name] = name + ctr.State().Annotations[annotations.Metadata] = updateMetadata(ctr.State().Annotations, name) + // This is taken directly from c.ContainerStateToDisk(), which can't be used because of the call to UpdateStatus() in the first line + jsonSource, err := ioutils.NewAtomicFileWriter(ctr.StatePath(), 0644) + if err != nil { + return err + } + defer jsonSource.Close() + enc := json.NewEncoder(jsonSource) + return enc.Encode(c.runtime.ContainerStatus(ctr)) +} + +// Attempts to update a metadata annotation +func updateMetadata(specAnnotations map[string]string, name string) string { + oldMetadata := specAnnotations[annotations.Metadata] + containerType := specAnnotations[annotations.ContainerType] + if containerType == "container" { + metadata := runtime.ContainerMetadata{} + err := json.Unmarshal([]byte(oldMetadata), metadata) + if err != nil { + return oldMetadata + } + metadata.Name = name + m, err := json.Marshal(metadata) + if err != nil { + return oldMetadata + } + return string(m) + } else if containerType == "sandbox" { + metadata := runtime.PodSandboxMetadata{} + err := json.Unmarshal([]byte(oldMetadata), metadata) + if err != nil { + return oldMetadata + } + metadata.Name = name + m, err := json.Marshal(metadata) + if err != nil { + return oldMetadata + } + return string(m) + } else { + return specAnnotations[annotations.Metadata] + } +} diff --git a/libkpod/sandbox/sandbox.go b/libkpod/sandbox/sandbox.go new file mode 100644 index 000000000..d7d6569d9 --- /dev/null +++ b/libkpod/sandbox/sandbox.go @@ -0,0 +1,484 @@ +package sandbox + +import ( + "crypto/rand" + "errors" + "fmt" + "os" + "path/filepath" + "sync" + + "github.com/containernetworking/plugins/pkg/ns" + "github.com/docker/docker/pkg/mount" + "github.com/docker/docker/pkg/symlink" + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" + "k8s.io/apimachinery/pkg/fields" + pb "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" + "k8s.io/kubernetes/pkg/kubelet/network/hostport" +) + +// NetNs handles data pertaining a network namespace +type NetNs struct { + sync.Mutex + ns ns.NetNS + symlink *os.File + closed bool + restored bool +} + +func (ns *NetNs) symlinkCreate(name string) error { + b := make([]byte, 4) + _, randErr := rand.Reader.Read(b) + if randErr != nil { + return randErr + } + + nsName := fmt.Sprintf("%s-%x", name, b) + symlinkPath := filepath.Join(NsRunDir, nsName) + + if err := os.Symlink(ns.ns.Path(), symlinkPath); err != nil { + return err + } + + fd, err := os.Open(symlinkPath) + if err != nil { + if removeErr := os.RemoveAll(symlinkPath); removeErr != nil { + return removeErr + } + + return err + } + + ns.symlink = fd + + return nil +} + +func (ns *NetNs) symlinkRemove() error { + if err := ns.symlink.Close(); err != nil { + return err + } + + return os.RemoveAll(ns.symlink.Name()) +} + +func isSymbolicLink(path string) (bool, error) { + fi, err := os.Lstat(path) + if err != nil { + return false, err + } + + return fi.Mode()&os.ModeSymlink == os.ModeSymlink, nil +} + +// NetNsGet returns the NetNs associated with the given nspath and name +func NetNsGet(nspath, name string) (*NetNs, error) { + if err := ns.IsNSorErr(nspath); err != nil { + return nil, ErrClosedNetNS + } + + symlink, symlinkErr := isSymbolicLink(nspath) + if symlinkErr != nil { + return nil, symlinkErr + } + + var resolvedNsPath string + if symlink { + path, err := os.Readlink(nspath) + if err != nil { + return nil, err + } + resolvedNsPath = path + } else { + resolvedNsPath = nspath + } + + netNS, err := ns.GetNS(resolvedNsPath) + if err != nil { + return nil, err + } + + netNs := &NetNs{ns: netNS, closed: false, restored: true} + + if symlink { + fd, err := os.Open(nspath) + if err != nil { + return nil, err + } + + netNs.symlink = fd + } else { + if err := netNs.symlinkCreate(name); err != nil { + return nil, err + } + } + + return netNs, nil +} + +// HostNetNsPath returns the current network namespace for the host +func HostNetNsPath() (string, error) { + netNS, err := ns.GetCurrentNS() + if err != nil { + return "", err + } + + defer netNS.Close() + return netNS.Path(), nil +} + +// Sandbox contains data surrounding kubernetes sandboxes on the server +type Sandbox struct { + id string + namespace string + // OCI pod name (eg "<namespace>-<name>-<attempt>") + name string + // Kubernetes pod name (eg, "<name>") + kubeName string + logDir string + labels fields.Set + annotations map[string]string + infraContainer *oci.Container + containers oci.ContainerStorer + processLabel string + mountLabel string + netns *NetNs + metadata *pb.PodSandboxMetadata + shmPath string + cgroupParent string + privileged bool + trusted bool + resolvPath string + hostnamePath string + hostname string + portMappings []*hostport.PortMapping + stopped bool + // ipv4 or ipv6 cache + ip string +} + +const ( + // DefaultShmSize is the default shm size + DefaultShmSize = 64 * 1024 * 1024 + // NsRunDir is the default directory in which running network namespaces + // are stored + NsRunDir = "/var/run/netns" + // PodInfraCommand is the default command when starting a pod infrastructure + // container + PodInfraCommand = "/pause" +) + +var ( + // ErrIDEmpty is the erro returned when the id of the sandbox is empty + ErrIDEmpty = errors.New("PodSandboxId should not be empty") + // ErrClosedNetNS is the error returned when the network namespace of the + // sandbox is closed + ErrClosedNetNS = errors.New("PodSandbox networking namespace is closed") +) + +// New creates and populates a new pod sandbox +// New sandboxes have no containers, no infra container, and no network namespaces associated with them +// An infra container must be attached before the sandbox is added to the state +func New(id, namespace, name, kubeName, logDir string, labels, annotations map[string]string, processLabel, mountLabel string, metadata *pb.PodSandboxMetadata, shmPath, cgroupParent string, privileged, trusted bool, resolvPath, hostname string, portMappings []*hostport.PortMapping) (*Sandbox, error) { + sb := new(Sandbox) + sb.id = id + sb.namespace = namespace + sb.name = name + sb.kubeName = kubeName + sb.logDir = logDir + sb.labels = labels + sb.annotations = annotations + sb.containers = oci.NewMemoryStore() + sb.processLabel = processLabel + sb.mountLabel = mountLabel + sb.metadata = metadata + sb.shmPath = shmPath + sb.cgroupParent = cgroupParent + sb.privileged = privileged + sb.trusted = trusted + sb.resolvPath = resolvPath + sb.hostname = hostname + sb.portMappings = portMappings + + return sb, nil +} + +// AddIP stores the ip in the sandbox +func (s *Sandbox) AddIP(ip string) { + s.ip = ip +} + +// IP returns the ip of the sandbox +func (s *Sandbox) IP() string { + return s.ip +} + +// ID returns the id of the sandbox +func (s *Sandbox) ID() string { + return s.id +} + +// Namespace returns the namespace for the sandbox +func (s *Sandbox) Namespace() string { + return s.namespace +} + +// Name returns the name of the sandbox +func (s *Sandbox) Name() string { + return s.name +} + +// KubeName returns the kubernetes name for the sandbox +func (s *Sandbox) KubeName() string { + return s.kubeName +} + +// LogDir returns the location of the logging directory for the sandbox +func (s *Sandbox) LogDir() string { + return s.logDir +} + +// Labels returns the labels associated with the sandbox +func (s *Sandbox) Labels() fields.Set { + return s.labels +} + +// Annotations returns a list of annotations for the sandbox +func (s *Sandbox) Annotations() map[string]string { + return s.annotations +} + +// InfraContainer returns the infrastructure container for the sandbox +func (s *Sandbox) InfraContainer() *oci.Container { + return s.infraContainer +} + +// Containers returns the ContainerStorer that contains information on all +// of the containers in the sandbox +func (s *Sandbox) Containers() oci.ContainerStorer { + return s.containers +} + +// ProcessLabel returns the process label for the sandbox +func (s *Sandbox) ProcessLabel() string { + return s.processLabel +} + +// MountLabel returns the mount label for the sandbox +func (s *Sandbox) MountLabel() string { + return s.mountLabel +} + +// Metadata returns a set of metadata about the sandbox +func (s *Sandbox) Metadata() *pb.PodSandboxMetadata { + return s.metadata +} + +// ShmPath returns the shm path of the sandbox +func (s *Sandbox) ShmPath() string { + return s.shmPath +} + +// CgroupParent returns the cgroup parent of the sandbox +func (s *Sandbox) CgroupParent() string { + return s.cgroupParent +} + +// Privileged returns whether or not the containers in the sandbox are +// privileged containers +func (s *Sandbox) Privileged() bool { + return s.privileged +} + +// Trusted returns whether or not the containers in the sandbox are trusted +func (s *Sandbox) Trusted() bool { + return s.trusted +} + +// ResolvPath returns the resolv path for the sandbox +func (s *Sandbox) ResolvPath() string { + return s.resolvPath +} + +// AddHostnamePath adds the hostname path to the sandbox +func (s *Sandbox) AddHostnamePath(hostname string) { + s.hostnamePath = hostname +} + +// HostnamePath retrieves the hostname path from a sandbox +func (s *Sandbox) HostnamePath() string { + return s.hostnamePath +} + +// Hostname returns the hsotname of the sandbox +func (s *Sandbox) Hostname() string { + return s.hostname +} + +// PortMappings returns a list of port mappings between the host and the sandbox +func (s *Sandbox) PortMappings() []*hostport.PortMapping { + return s.portMappings +} + +// AddContainer adds a container to the sandbox +func (s *Sandbox) AddContainer(c *oci.Container) { + s.containers.Add(c.Name(), c) +} + +// GetContainer retrieves a container from the sandbox +func (s *Sandbox) GetContainer(name string) *oci.Container { + return s.containers.Get(name) +} + +// RemoveContainer deletes a container from the sandbox +func (s *Sandbox) RemoveContainer(c *oci.Container) { + s.containers.Delete(c.Name()) +} + +// SetInfraContainer sets the infrastructure container of a sandbox +// Attempts to set the infrastructure container after one is already present will throw an error +func (s *Sandbox) SetInfraContainer(infraCtr *oci.Container) error { + if s.infraContainer != nil { + return fmt.Errorf("sandbox already has an infra container") + } else if infraCtr == nil { + return fmt.Errorf("must provide non-nil infra container") + } + + s.infraContainer = infraCtr + + return nil +} + +// RemoveInfraContainer removes the infrastructure container of a sandbox +func (s *Sandbox) RemoveInfraContainer() { + s.infraContainer = nil +} + +// NetNs retrieves the network namespace of the sandbox +// If the sandbox uses the host namespace, nil is returned +func (s *Sandbox) NetNs() ns.NetNS { + if s.netns == nil { + return nil + } + + return s.netns.ns +} + +// NetNsPath returns the path to the network namespace of the sandbox. +// If the sandbox uses the host namespace, nil is returned +func (s *Sandbox) NetNsPath() string { + if s.netns == nil { + return "" + } + + return s.netns.symlink.Name() +} + +// NetNsCreate creates a new network namespace for the sandbox +func (s *Sandbox) NetNsCreate() error { + if s.netns != nil { + return fmt.Errorf("net NS already created") + } + + netNS, err := ns.NewNS() + if err != nil { + return err + } + + s.netns = &NetNs{ + ns: netNS, + closed: false, + } + + if err := s.netns.symlinkCreate(s.name); err != nil { + logrus.Warnf("Could not create nentns symlink %v", err) + + if err1 := s.netns.ns.Close(); err1 != nil { + return err1 + } + + return err + } + + return nil +} + +// SetStopped sets the sandbox state to stopped. +// This should be set after a stop operation succeeds +// so that subsequent stops can return fast. +func (s *Sandbox) SetStopped() { + s.stopped = true +} + +// Stopped returns whether the sandbox state has been +// set to stopped. +func (s *Sandbox) Stopped() bool { + return s.stopped +} + +// NetNsJoin attempts to join the sandbox to an existing network namespace +// This will fail if the sandbox is already part of a network namespace +func (s *Sandbox) NetNsJoin(nspath, name string) error { + if s.netns != nil { + return fmt.Errorf("sandbox already has a network namespace, cannot join another") + } + + netNS, err := NetNsGet(nspath, name) + if err != nil { + return err + } + + s.netns = netNS + + return nil +} + +// NetNsRemove removes the network namespace associated with the sandbox +func (s *Sandbox) NetNsRemove() error { + if s.netns == nil { + logrus.Warn("no networking namespace") + return nil + } + + s.netns.Lock() + defer s.netns.Unlock() + + if s.netns.closed { + // netNsRemove() can be called multiple + // times without returning an error. + return nil + } + + if err := s.netns.symlinkRemove(); err != nil { + return err + } + + if err := s.netns.ns.Close(); err != nil { + return err + } + + if s.netns.restored { + // we got namespaces in the form of + // /var/run/netns/cni-0d08effa-06eb-a963-f51a-e2b0eceffc5d + // but /var/run on most system is symlinked to /run so we first resolve + // the symlink and then try and see if it's mounted + fp, err := symlink.FollowSymlinkInScope(s.netns.ns.Path(), "/") + if err != nil { + return err + } + if mounted, err := mount.Mounted(fp); err == nil && mounted { + if err := unix.Unmount(fp, unix.MNT_DETACH); err != nil { + return err + } + } + + if err := os.RemoveAll(s.netns.ns.Path()); err != nil { + return err + } + } + + s.netns.closed = true + return nil +} diff --git a/libkpod/stats.go b/libkpod/stats.go new file mode 100644 index 000000000..f4d645d65 --- /dev/null +++ b/libkpod/stats.go @@ -0,0 +1,111 @@ +package libkpod + +import ( + "path/filepath" + "syscall" + "time" + + "strings" + + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/opencontainers/runc/libcontainer" +) + +// ContainerStats contains the statistics information for a running container +type ContainerStats struct { + Container string + CPU float64 + cpuNano uint64 + systemNano uint64 + MemUsage uint64 + MemLimit uint64 + MemPerc float64 + NetInput uint64 + NetOutput uint64 + BlockInput uint64 + BlockOutput uint64 + PIDs uint64 +} + +// GetContainerStats gets the running stats for a given container +func (c *ContainerServer) GetContainerStats(ctr *oci.Container, previousStats *ContainerStats) (*ContainerStats, error) { + previousCPU := previousStats.cpuNano + previousSystem := previousStats.systemNano + libcontainerStats, err := c.LibcontainerStats(ctr) + if err != nil { + return nil, err + } + cgroupStats := libcontainerStats.CgroupStats + stats := new(ContainerStats) + stats.Container = ctr.ID() + stats.CPU = calculateCPUPercent(libcontainerStats, previousCPU, previousSystem) + stats.MemUsage = cgroupStats.MemoryStats.Usage.Usage + stats.MemLimit = getMemLimit(cgroupStats.MemoryStats.Usage.Limit) + stats.MemPerc = float64(stats.MemUsage) / float64(stats.MemLimit) + stats.PIDs = cgroupStats.PidsStats.Current + stats.BlockInput, stats.BlockOutput = calculateBlockIO(libcontainerStats) + stats.NetInput, stats.NetOutput = getContainerNetIO(libcontainerStats) + + return stats, nil +} + +func loadFactory(root string) (libcontainer.Factory, error) { + abs, err := filepath.Abs(root) + if err != nil { + return nil, err + } + cgroupManager := libcontainer.Cgroupfs + return libcontainer.New(abs, cgroupManager, libcontainer.CriuPath("")) +} + +// getMemory limit returns the memory limit for a given cgroup +// If the configured memory limit is larger than the total memory on the sys, the +// physical system memory size is returned +func getMemLimit(cgroupLimit uint64) uint64 { + si := &syscall.Sysinfo_t{} + err := syscall.Sysinfo(si) + if err != nil { + return cgroupLimit + } + + physicalLimit := uint64(si.Totalram) + if cgroupLimit > physicalLimit { + return physicalLimit + } + return cgroupLimit +} + +// Returns the total number of bytes transmitted and received for the given container stats +func getContainerNetIO(stats *libcontainer.Stats) (received uint64, transmitted uint64) { + for _, iface := range stats.Interfaces { + received += iface.RxBytes + transmitted += iface.TxBytes + } + return +} + +func calculateCPUPercent(stats *libcontainer.Stats, previousCPU, previousSystem uint64) float64 { + var ( + cpuPercent = 0.0 + cpuDelta = float64(stats.CgroupStats.CpuStats.CpuUsage.TotalUsage - previousCPU) + systemDelta = float64(uint64(time.Now().UnixNano()) - previousSystem) + ) + if systemDelta > 0.0 && cpuDelta > 0.0 { + // gets a ratio of container cpu usage total, multiplies it by the number of cores (4 cores running + // at 100% utilization should be 400% utilization), and multiplies that by 100 to get a percentage + cpuPercent = (cpuDelta / systemDelta) * float64(len(stats.CgroupStats.CpuStats.CpuUsage.PercpuUsage)) * 100 + } + return cpuPercent +} + +func calculateBlockIO(stats *libcontainer.Stats) (read uint64, write uint64) { + for _, blkIOEntry := range stats.CgroupStats.BlkioStats.IoServiceBytesRecursive { + switch strings.ToLower(blkIOEntry.Op) { + case "read": + read += blkIOEntry.Value + case "write": + write += blkIOEntry.Value + } + } + return +} diff --git a/libkpod/stop.go b/libkpod/stop.go new file mode 100644 index 000000000..86c9cbec2 --- /dev/null +++ b/libkpod/stop.go @@ -0,0 +1,36 @@ +package libkpod + +import ( + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/pkg/errors" + "golang.org/x/net/context" +) + +// ContainerStop stops a running container with a grace period (i.e., timeout). +func (c *ContainerServer) ContainerStop(ctx context.Context, container string, timeout int64) (string, error) { + ctr, err := c.LookupContainer(container) + if err != nil { + return "", errors.Wrapf(err, "failed to find container %s", container) + } + ctrID := ctr.ID() + + cStatus := c.runtime.ContainerStatus(ctr) + switch cStatus.Status { + + case oci.ContainerStatePaused: + return "", errors.Errorf("cannot stop paused container %s", ctrID) + default: + if cStatus.Status != oci.ContainerStateStopped { + if err := c.runtime.StopContainer(ctx, ctr, timeout); err != nil { + return "", errors.Wrapf(err, "failed to stop container %s", ctrID) + } + if err := c.storageRuntimeServer.StopContainer(ctrID); err != nil { + return "", errors.Wrapf(err, "failed to unmount container %s", ctrID) + } + } + } + + c.ContainerStateToDisk(ctr) + + return ctrID, nil +} diff --git a/libkpod/testdata/config.toml b/libkpod/testdata/config.toml new file mode 100644 index 000000000..318273674 --- /dev/null +++ b/libkpod/testdata/config.toml @@ -0,0 +1,28 @@ +[crio] + root = "/var/lib/containers/storage" + runroot = "/var/run/containers/storage" + storage_driver = "overlay2" + log_dir = "/var/log/crio/pods" + file_locking = true + [crio.runtime] + runtime = "/usr/bin/runc" + runtime_untrusted_workload = "" + default_workload_trust = "trusted" + conmon = "/usr/local/libexec/crio/conmon" + conmon_env = ["PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"] + selinux = true + seccomp_profile = "/etc/crio/seccomp.json" + apparmor_profile = "crio-default" + cgroup_manager = "cgroupfs" + hooks_dir_path = "/usr/share/containers/oci/hooks.d" + pids_limit = 2048 + container_exits_dir = "/var/run/kpod/exits" + [crio.image] + default_transport = "docker://" + pause_image = "kubernetes/pause" + pause_command = "/pause" + signature_policy = "" + image_volumes = "mkdir" + [crio.network] + network_dir = "/etc/cni/net.d/" + plugin_dir = "/opt/cni/bin/" diff --git a/libkpod/wait.go b/libkpod/wait.go new file mode 100644 index 000000000..c7ba57328 --- /dev/null +++ b/libkpod/wait.go @@ -0,0 +1,42 @@ +package libkpod + +import ( + "github.com/kubernetes-incubator/cri-o/oci" + "github.com/pkg/errors" + "k8s.io/apimachinery/pkg/util/wait" +) + +func isStopped(c *ContainerServer, ctr *oci.Container) bool { + c.runtime.UpdateStatus(ctr) + cStatus := c.runtime.ContainerStatus(ctr) + if cStatus.Status == oci.ContainerStateStopped { + return true + } + return false +} + +// ContainerWait stops a running container with a grace period (i.e., timeout). +func (c *ContainerServer) ContainerWait(container string) (int32, error) { + ctr, err := c.LookupContainer(container) + if err != nil { + return 0, errors.Wrapf(err, "failed to find container %s", container) + } + + err = wait.PollImmediateInfinite(1, + func() (bool, error) { + if !isStopped(c, ctr) { + return false, nil + } else { // nolint + return true, nil // nolint + } // nolint + + }, + ) + + if err != nil { + return 0, err + } + exitCode := ctr.State().ExitCode + c.ContainerStateToDisk(ctr) + return exitCode, nil +} |