diff options
Diffstat (limited to 'libpod')
-rw-r--r-- | libpod/container.go | 12 | ||||
-rw-r--r-- | libpod/container_api.go | 29 | ||||
-rw-r--r-- | libpod/container_internal_linux.go | 63 | ||||
-rw-r--r-- | libpod/container_path_resolution.go | 166 | ||||
-rw-r--r-- | libpod/image/search.go | 28 | ||||
-rw-r--r-- | libpod/kube.go | 128 | ||||
-rw-r--r-- | libpod/network/config.go | 5 | ||||
-rw-r--r-- | libpod/network/create.go | 31 | ||||
-rw-r--r-- | libpod/network/netconflist.go | 22 | ||||
-rw-r--r-- | libpod/network/network.go | 12 | ||||
-rw-r--r-- | libpod/networking_linux.go | 22 | ||||
-rw-r--r-- | libpod/oci_conmon_exec_linux.go | 48 | ||||
-rw-r--r-- | libpod/options.go | 29 | ||||
-rw-r--r-- | libpod/pod.go | 1 | ||||
-rw-r--r-- | libpod/rootless_cni_linux.go | 34 | ||||
-rw-r--r-- | libpod/runtime_pod_infra_linux.go | 14 | ||||
-rw-r--r-- | libpod/util.go | 17 |
17 files changed, 573 insertions, 88 deletions
diff --git a/libpod/container.go b/libpod/container.go index 58bf95470..ed7535bc8 100644 --- a/libpod/container.go +++ b/libpod/container.go @@ -1073,6 +1073,18 @@ func networkDisabled(c *Container) (bool, error) { return false, nil } +func (c *Container) HostNetwork() bool { + if c.config.CreateNetNS || c.config.NetNsCtr != "" { + return false + } + for _, ns := range c.config.Spec.Linux.Namespaces { + if ns.Type == spec.NetworkNamespace { + return false + } + } + return true +} + // ContainerState returns containerstate struct func (c *Container) ContainerState() (*ContainerState, error) { if !c.batched { diff --git a/libpod/container_api.go b/libpod/container_api.go index 0d62a2dd7..951227a4f 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -776,3 +776,32 @@ func (c *Container) ShouldRestart(ctx context.Context) bool { } return c.shouldRestart() } + +// ResolvePath resolves the specified path on the root for the container. The +// root must either be the mounted image of the container or the already +// mounted container storage. +// +// It returns the resolved root and the resolved path. Note that the path may +// resolve to the container's mount point or to a volume or bind mount. +func (c *Container) ResolvePath(ctx context.Context, root string, path string) (string, string, error) { + logrus.Debugf("Resolving path %q (root %q) on container %s", path, root, c.ID()) + + // Minimal sanity checks. + if len(root)*len(path) == 0 { + return "", "", errors.Wrapf(define.ErrInternal, "ResolvePath: root (%q) and path (%q) must be non empty", root, path) + } + if _, err := os.Stat(root); err != nil { + return "", "", errors.Wrapf(err, "cannot locate root to resolve path on container %s", c.ID()) + } + + if !c.batched { + c.lock.Lock() + defer c.lock.Unlock() + + if err := c.syncContainer(); err != nil { + return "", "", err + } + } + + return c.resolvePath(root, path) +} diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index b41a3fa38..6c9489a08 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -174,25 +174,60 @@ func (c *Container) prepare() error { return err } - // Ensure container entrypoint is created (if required) - if c.config.CreateWorkingDir { - workdir, err := securejoin.SecureJoin(c.state.Mountpoint, c.WorkingDir()) - if err != nil { - return errors.Wrapf(err, "error creating path to container %s working dir", c.ID()) - } - rootUID := c.RootUID() - rootGID := c.RootGID() + // Make sure the workdir exists + if err := c.resolveWorkDir(); err != nil { + return err + } - if err := os.MkdirAll(workdir, 0755); err != nil { - if os.IsExist(err) { - return nil + return nil +} + +// resolveWorkDir resolves the container's workdir and, depending on the +// configuration, will create it, or error out if it does not exist. +// Note that the container must be mounted before. +func (c *Container) resolveWorkDir() error { + workdir := c.WorkingDir() + + // If the specified workdir is a subdir of a volume or mount, + // we don't need to do anything. The runtime is taking care of + // that. + if isPathOnVolume(c, workdir) || isPathOnBindMount(c, workdir) { + logrus.Debugf("Workdir %q resolved to a volume or mount", workdir) + return nil + } + + _, resolvedWorkdir, err := c.resolvePath(c.state.Mountpoint, workdir) + if err != nil { + return err + } + logrus.Debugf("Workdir %q resolved to host path %q", workdir, resolvedWorkdir) + + // No need to create it (e.g., `--workdir=/foo`), so let's make sure + // the path exists on the container. + if !c.config.CreateWorkingDir { + if _, err := os.Stat(resolvedWorkdir); err != nil { + if os.IsNotExist(err) { + return errors.Errorf("workdir %q does not exist on container %s", workdir, c.ID()) } - return errors.Wrapf(err, "error creating container %s working dir", c.ID()) + // This might be a serious error (e.g., permission), so + // we need to return the full error. + return errors.Wrapf(err, "error detecting workdir %q on container %s", workdir, c.ID()) } + } + + // Ensure container entrypoint is created (if required). + rootUID := c.RootUID() + rootGID := c.RootGID() - if err := os.Chown(workdir, rootUID, rootGID); err != nil { - return errors.Wrapf(err, "error chowning container %s working directory to container root", c.ID()) + if err := os.MkdirAll(resolvedWorkdir, 0755); err != nil { + if os.IsExist(err) { + return nil } + return errors.Wrapf(err, "error creating container %s workdir", c.ID()) + } + + if err := os.Chown(resolvedWorkdir, rootUID, rootGID); err != nil { + return errors.Wrapf(err, "error chowning container %s workdir to container root", c.ID()) } return nil diff --git a/libpod/container_path_resolution.go b/libpod/container_path_resolution.go new file mode 100644 index 000000000..805b3b947 --- /dev/null +++ b/libpod/container_path_resolution.go @@ -0,0 +1,166 @@ +package libpod + +import ( + "path/filepath" + "strings" + + securejoin "github.com/cyphar/filepath-securejoin" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +// resolveContainerPaths resolves the container's mount point and the container +// path as specified by the user. Both may resolve to paths outside of the +// container's mount point when the container path hits a volume or bind mount. +// +// It returns a bool, indicating whether containerPath resolves outside of +// mountPoint (e.g., via a mount or volume), the resolved root (e.g., container +// mount, bind mount or volume) and the resolved path on the root (absolute to +// the host). +func (container *Container) resolvePath(mountPoint string, containerPath string) (string, string, error) { + // Let's first make sure we have a path relative to the mount point. + pathRelativeToContainerMountPoint := containerPath + if !filepath.IsAbs(containerPath) { + // If the containerPath is not absolute, it's relative to the + // container's working dir. To be extra careful, let's first + // join the working dir with "/", and the add the containerPath + // to it. + pathRelativeToContainerMountPoint = filepath.Join(filepath.Join("/", container.WorkingDir()), containerPath) + } + resolvedPathOnTheContainerMountPoint := filepath.Join(mountPoint, pathRelativeToContainerMountPoint) + pathRelativeToContainerMountPoint = strings.TrimPrefix(pathRelativeToContainerMountPoint, mountPoint) + pathRelativeToContainerMountPoint = filepath.Join("/", pathRelativeToContainerMountPoint) + + // Now we have an "absolute container Path" but not yet resolved on the + // host (e.g., "/foo/bar/file.txt"). As mentioned above, we need to + // check if "/foo/bar/file.txt" is on a volume or bind mount. To do + // that, we need to walk *down* the paths to the root. Assuming + // volume-1 is mounted to "/foo" and volume-2 is mounted to "/foo/bar", + // we must select "/foo/bar". Once selected, we need to rebase the + // remainder (i.e, "/file.txt") on the volume's mount point on the + // host. Same applies to bind mounts. + + searchPath := pathRelativeToContainerMountPoint + for { + volume, err := findVolume(container, searchPath) + if err != nil { + return "", "", err + } + if volume != nil { + logrus.Debugf("Container path %q resolved to volume %q on path %q", containerPath, volume.Name(), searchPath) + + // TODO: We really need to force the volume to mount + // before doing this, but that API is not exposed + // externally right now and doing so is beyond the scope + // of this commit. + mountPoint, err := volume.MountPoint() + if err != nil { + return "", "", err + } + if mountPoint == "" { + return "", "", errors.Errorf("volume %s is not mounted, cannot copy into it", volume.Name()) + } + + // We found a matching volume for searchPath. We now + // need to first find the relative path of our input + // path to the searchPath, and then join it with the + // volume's mount point. + pathRelativeToVolume := strings.TrimPrefix(pathRelativeToContainerMountPoint, searchPath) + absolutePathOnTheVolumeMount, err := securejoin.SecureJoin(mountPoint, pathRelativeToVolume) + if err != nil { + return "", "", err + } + return mountPoint, absolutePathOnTheVolumeMount, nil + } + + if mount := findBindMount(container, searchPath); mount != nil { + logrus.Debugf("Container path %q resolved to bind mount %q:%q on path %q", containerPath, mount.Source, mount.Destination, searchPath) + // We found a matching bind mount for searchPath. We + // now need to first find the relative path of our + // input path to the searchPath, and then join it with + // the source of the bind mount. + pathRelativeToBindMount := strings.TrimPrefix(pathRelativeToContainerMountPoint, searchPath) + absolutePathOnTheBindMount, err := securejoin.SecureJoin(mount.Source, pathRelativeToBindMount) + if err != nil { + return "", "", err + } + return mount.Source, absolutePathOnTheBindMount, nil + + } + + if searchPath == "/" { + // Cannot go beyond "/", so we're done. + break + } + + // Walk *down* the path (e.g., "/foo/bar/x" -> "/foo/bar"). + searchPath = filepath.Dir(searchPath) + } + + // No volume, no bind mount but just a normal path on the container. + return mountPoint, resolvedPathOnTheContainerMountPoint, nil +} + +// findVolume checks if the specified containerPath matches the destination +// path of a Volume. Returns a matching Volume or nil. +func findVolume(c *Container, containerPath string) (*Volume, error) { + runtime := c.Runtime() + cleanedContainerPath := filepath.Clean(containerPath) + for _, vol := range c.Config().NamedVolumes { + if cleanedContainerPath == filepath.Clean(vol.Dest) { + return runtime.GetVolume(vol.Name) + } + } + return nil, nil +} + +// isPathOnVolume returns true if the specified containerPath is a subdir of any +// Volume's destination. +func isPathOnVolume(c *Container, containerPath string) bool { + cleanedContainerPath := filepath.Clean(containerPath) + for _, vol := range c.Config().NamedVolumes { + if cleanedContainerPath == filepath.Clean(vol.Dest) { + return true + } + for dest := vol.Dest; dest != "/"; dest = filepath.Dir(dest) { + if cleanedContainerPath == dest { + return true + } + } + } + return false +} + +// findBindMounts checks if the specified containerPath matches the destination +// path of a Mount. Returns a matching Mount or nil. +func findBindMount(c *Container, containerPath string) *specs.Mount { + cleanedPath := filepath.Clean(containerPath) + for _, m := range c.Config().Spec.Mounts { + if m.Type != "bind" { + continue + } + if cleanedPath == filepath.Clean(m.Destination) { + mount := m + return &mount + } + } + return nil +} + +/// isPathOnBindMount returns true if the specified containerPath is a subdir of any +// Mount's destination. +func isPathOnBindMount(c *Container, containerPath string) bool { + cleanedContainerPath := filepath.Clean(containerPath) + for _, m := range c.Config().Spec.Mounts { + if cleanedContainerPath == filepath.Clean(m.Destination) { + return true + } + for dest := m.Destination; dest != "/"; dest = filepath.Dir(dest) { + if cleanedContainerPath == dest { + return true + } + } + } + return false +} diff --git a/libpod/image/search.go b/libpod/image/search.go index 6020fbca9..c5799219a 100644 --- a/libpod/image/search.go +++ b/libpod/image/search.go @@ -102,8 +102,8 @@ func SearchImages(term string, options SearchOptions) ([]SearchResult, error) { searchImageInRegistryHelper := func(index int, registry string) { defer sem.Release(1) defer wg.Done() - searchOutput := searchImageInRegistry(term, registry, options) - data[index] = searchOutputData{data: searchOutput} + searchOutput, err := searchImageInRegistry(term, registry, options) + data[index] = searchOutputData{data: searchOutput, err: err} } ctx := context.Background() @@ -116,13 +116,21 @@ func SearchImages(term string, options SearchOptions) ([]SearchResult, error) { wg.Wait() results := []SearchResult{} + var lastError error for _, d := range data { if d.err != nil { - return nil, d.err + if lastError != nil { + logrus.Errorf("%v", lastError) + } + lastError = d.err + continue } results = append(results, d.data...) } - return results, nil + if len(results) > 0 { + return results, nil + } + return results, lastError } // getRegistries returns the list of registries to search, depending on an optional registry specification @@ -140,7 +148,7 @@ func getRegistries(registry string) ([]string, error) { return registries, nil } -func searchImageInRegistry(term string, registry string, options SearchOptions) []SearchResult { +func searchImageInRegistry(term string, registry string, options SearchOptions) ([]SearchResult, error) { // Max number of queries by default is 25 limit := maxQueries if options.Limit > 0 { @@ -156,16 +164,14 @@ func searchImageInRegistry(term string, registry string, options SearchOptions) if options.ListTags { results, err := searchRepositoryTags(registry, term, sc, options) if err != nil { - logrus.Errorf("error listing registry tags %q: %v", registry, err) - return []SearchResult{} + return []SearchResult{}, err } - return results + return results, nil } results, err := docker.SearchRegistry(context.TODO(), sc, registry, term, limit) if err != nil { - logrus.Errorf("error searching registry %q: %v", registry, err) - return []SearchResult{} + return []SearchResult{}, err } index := registry arr := strings.Split(registry, ".") @@ -219,7 +225,7 @@ func searchImageInRegistry(term string, registry string, options SearchOptions) } paramsArr = append(paramsArr, params) } - return paramsArr + return paramsArr, nil } func searchRepositoryTags(registry, term string, sc *types.SystemContext, options SearchOptions) ([]SearchResult, error) { diff --git a/libpod/kube.go b/libpod/kube.go index 753c58099..bf314b9a3 100644 --- a/libpod/kube.go +++ b/libpod/kube.go @@ -49,6 +49,7 @@ func (p *Pod) GenerateForKube() (*v1.Pod, []v1.ServicePort, error) { } extraHost := make([]v1.HostAlias, 0) + hostNetwork := false if p.HasInfraContainer() { infraContainer, err := p.getInfraContainer() if err != nil { @@ -69,9 +70,9 @@ func (p *Pod) GenerateForKube() (*v1.Pod, []v1.ServicePort, error) { return nil, servicePorts, err } servicePorts = containerPortsToServicePorts(ports) - + hostNetwork = p.config.InfraContainer.HostNetwork } - pod, err := p.podWithContainers(allContainers, ports) + pod, err := p.podWithContainers(allContainers, ports, hostNetwork) if err != nil { return nil, servicePorts, err } @@ -167,13 +168,14 @@ func containersToServicePorts(containers []v1.Container) []v1.ServicePort { return sps } -func (p *Pod) podWithContainers(containers []*Container, ports []v1.ContainerPort) (*v1.Pod, error) { +func (p *Pod) podWithContainers(containers []*Container, ports []v1.ContainerPort, hostNetwork bool) (*v1.Pod, error) { deDupPodVolumes := make(map[string]*v1.Volume) first := true podContainers := make([]v1.Container, 0, len(containers)) + dnsInfo := v1.PodDNSConfig{} for _, ctr := range containers { if !ctr.IsInfra() { - ctr, volumes, err := containerToV1Container(ctr) + ctr, volumes, _, err := containerToV1Container(ctr) if err != nil { return nil, err } @@ -196,6 +198,22 @@ func (p *Pod) podWithContainers(containers []*Container, ports []v1.ContainerPor vol := vol deDupPodVolumes[vol.Name] = &vol } + } else { + _, _, infraDNS, err := containerToV1Container(ctr) + if err != nil { + return nil, err + } + if infraDNS != nil { + if servers := infraDNS.Nameservers; len(servers) > 0 { + dnsInfo.Nameservers = servers + } + if searches := infraDNS.Searches; len(searches) > 0 { + dnsInfo.Searches = searches + } + if options := infraDNS.Options; len(options) > 0 { + dnsInfo.Options = options + } + } } } podVolumes := make([]v1.Volume, 0, len(deDupPodVolumes)) @@ -203,10 +221,10 @@ func (p *Pod) podWithContainers(containers []*Container, ports []v1.ContainerPor podVolumes = append(podVolumes, *vol) } - return addContainersAndVolumesToPodObject(podContainers, podVolumes, p.Name()), nil + return addContainersAndVolumesToPodObject(podContainers, podVolumes, p.Name(), &dnsInfo, hostNetwork), nil } -func addContainersAndVolumesToPodObject(containers []v1.Container, volumes []v1.Volume, podName string) *v1.Pod { +func addContainersAndVolumesToPodObject(containers []v1.Container, volumes []v1.Volume, podName string, dnsOptions *v1.PodDNSConfig, hostNetwork bool) *v1.Pod { tm := v12.TypeMeta{ Kind: "Pod", APIVersion: "v1", @@ -225,8 +243,12 @@ func addContainersAndVolumesToPodObject(containers []v1.Container, volumes []v1. CreationTimestamp: v12.Now(), } ps := v1.PodSpec{ - Containers: containers, - Volumes: volumes, + Containers: containers, + Volumes: volumes, + HostNetwork: hostNetwork, + } + if dnsOptions != nil { + ps.DNSConfig = dnsOptions } p := v1.Pod{ TypeMeta: tm, @@ -241,32 +263,69 @@ func addContainersAndVolumesToPodObject(containers []v1.Container, volumes []v1. func simplePodWithV1Containers(ctrs []*Container) (*v1.Pod, error) { kubeCtrs := make([]v1.Container, 0, len(ctrs)) kubeVolumes := make([]v1.Volume, 0) + hostNetwork := true + podDNS := v1.PodDNSConfig{} for _, ctr := range ctrs { - kubeCtr, kubeVols, err := containerToV1Container(ctr) + if !ctr.HostNetwork() { + hostNetwork = false + } + kubeCtr, kubeVols, ctrDNS, err := containerToV1Container(ctr) if err != nil { return nil, err } kubeCtrs = append(kubeCtrs, kubeCtr) kubeVolumes = append(kubeVolumes, kubeVols...) - } - return addContainersAndVolumesToPodObject(kubeCtrs, kubeVolumes, strings.ReplaceAll(ctrs[0].Name(), "_", "")), nil + // Combine DNS information in sum'd structure + if ctrDNS != nil { + // nameservers + if servers := ctrDNS.Nameservers; servers != nil { + if podDNS.Nameservers == nil { + podDNS.Nameservers = make([]string, 0) + } + for _, s := range servers { + if !util.StringInSlice(s, podDNS.Nameservers) { // only append if it does not exist + podDNS.Nameservers = append(podDNS.Nameservers, s) + } + } + } + // search domains + if domains := ctrDNS.Searches; domains != nil { + if podDNS.Searches == nil { + podDNS.Searches = make([]string, 0) + } + for _, d := range domains { + if !util.StringInSlice(d, podDNS.Searches) { // only append if it does not exist + podDNS.Searches = append(podDNS.Searches, d) + } + } + } + // dns options + if options := ctrDNS.Options; options != nil { + if podDNS.Options == nil { + podDNS.Options = make([]v1.PodDNSConfigOption, 0) + } + podDNS.Options = append(podDNS.Options, options...) + } + } // end if ctrDNS + } + return addContainersAndVolumesToPodObject(kubeCtrs, kubeVolumes, strings.ReplaceAll(ctrs[0].Name(), "_", ""), &podDNS, hostNetwork), nil } // containerToV1Container converts information we know about a libpod container // to a V1.Container specification. -func containerToV1Container(c *Container) (v1.Container, []v1.Volume, error) { +func containerToV1Container(c *Container) (v1.Container, []v1.Volume, *v1.PodDNSConfig, error) { kubeContainer := v1.Container{} kubeVolumes := []v1.Volume{} kubeSec, err := generateKubeSecurityContext(c) if err != nil { - return kubeContainer, kubeVolumes, err + return kubeContainer, kubeVolumes, nil, err } if len(c.config.Spec.Linux.Devices) > 0 { // TODO Enable when we can support devices and their names kubeContainer.VolumeDevices = generateKubeVolumeDeviceFromLinuxDevice(c.Spec().Linux.Devices) - return kubeContainer, kubeVolumes, errors.Wrapf(define.ErrNotImplemented, "linux devices") + return kubeContainer, kubeVolumes, nil, errors.Wrapf(define.ErrNotImplemented, "linux devices") } if len(c.config.UserVolumes) > 0 { @@ -274,7 +333,7 @@ func containerToV1Container(c *Container) (v1.Container, []v1.Volume, error) { // Volume names need to be coordinated "globally" in the kube files. volumeMounts, volumes, err := libpodMountsToKubeVolumeMounts(c) if err != nil { - return kubeContainer, kubeVolumes, err + return kubeContainer, kubeVolumes, nil, err } kubeContainer.VolumeMounts = volumeMounts kubeVolumes = append(kubeVolumes, volumes...) @@ -282,16 +341,16 @@ func containerToV1Container(c *Container) (v1.Container, []v1.Volume, error) { envVariables, err := libpodEnvVarsToKubeEnvVars(c.config.Spec.Process.Env) if err != nil { - return kubeContainer, kubeVolumes, err + return kubeContainer, kubeVolumes, nil, err } portmappings, err := c.PortMappings() if err != nil { - return kubeContainer, kubeVolumes, err + return kubeContainer, kubeVolumes, nil, err } ports, err := ocicniPortMappingToContainerPort(portmappings) if err != nil { - return kubeContainer, kubeVolumes, err + return kubeContainer, kubeVolumes, nil, err } containerCommands := c.Command() @@ -355,7 +414,38 @@ func containerToV1Container(c *Container) (v1.Container, []v1.Volume, error) { } } - return kubeContainer, kubeVolumes, nil + // Obtain the DNS entries from the container + dns := v1.PodDNSConfig{} + + // DNS servers + if servers := c.config.DNSServer; len(servers) > 0 { + dnsServers := make([]string, 0) + for _, server := range servers { + dnsServers = append(dnsServers, server.String()) + } + dns.Nameservers = dnsServers + } + + // DNS search domains + if searches := c.config.DNSSearch; len(searches) > 0 { + dns.Searches = searches + } + + // DNS options + if options := c.config.DNSOption; len(options) > 0 { + dnsOptions := make([]v1.PodDNSConfigOption, 0) + for _, option := range options { + // the option can be "k:v" or just "k", no delimiter is required + opts := strings.SplitN(option, ":", 2) + dnsOpt := v1.PodDNSConfigOption{ + Name: opts[0], + Value: &opts[1], + } + dnsOptions = append(dnsOptions, dnsOpt) + } + dns.Options = dnsOptions + } + return kubeContainer, kubeVolumes, &dns, nil } // ocicniPortMappingToContainerPort takes an ocicni portmapping and converts diff --git a/libpod/network/config.go b/libpod/network/config.go index ce351129e..294e23509 100644 --- a/libpod/network/config.go +++ b/libpod/network/config.go @@ -103,7 +103,9 @@ func (p PortMapConfig) Bytes() ([]byte, error) { // IPAMDHCP describes the ipamdhcp config type IPAMDHCP struct { - DHCP string `json:"type"` + DHCP string `json:"type"` + Routes []IPAMRoute `json:"routes,omitempty"` + Ranges [][]IPAMLocalHostRangeConf `json:"ranges,omitempty"` } // MacVLANConfig describes the macvlan config @@ -111,6 +113,7 @@ type MacVLANConfig struct { PluginType string `json:"type"` Master string `json:"master"` IPAM IPAMDHCP `json:"ipam"` + MTU int `json:"mtu,omitempty"` } // Bytes outputs the configuration as []byte diff --git a/libpod/network/create.go b/libpod/network/create.go index a8f985af9..deacf487a 100644 --- a/libpod/network/create.go +++ b/libpod/network/create.go @@ -29,7 +29,7 @@ func Create(name string, options entities.NetworkCreateOptions, runtimeConfig *c return nil, err } defer l.releaseCNILock() - if len(options.MacVLAN) > 0 { + if len(options.MacVLAN) > 0 || options.Driver == MacVLANNetworkDriver { fileName, err = createMacVLAN(name, options, runtimeConfig) } else { fileName, err = createBridge(name, options, runtimeConfig) @@ -249,6 +249,7 @@ func createBridge(name string, options entities.NetworkCreateOptions, runtimeCon func createMacVLAN(name string, options entities.NetworkCreateOptions, runtimeConfig *config.Config) (string, error) { var ( + mtu int plugins []CNIPlugins ) liveNetNames, err := GetLiveNetworkNames() @@ -256,9 +257,17 @@ func createMacVLAN(name string, options entities.NetworkCreateOptions, runtimeCo return "", err } - // Make sure the host-device exists - if !util.StringInSlice(options.MacVLAN, liveNetNames) { - return "", errors.Errorf("failed to find network interface %q", options.MacVLAN) + // The parent can be defined with --macvlan or as an option (-o parent:device) + parentNetworkDevice := options.MacVLAN + if len(parentNetworkDevice) < 1 { + if parent, ok := options.Options["parent"]; ok { + parentNetworkDevice = parent + } + } + + // Make sure the host-device exists if provided + if len(parentNetworkDevice) > 0 && !util.StringInSlice(parentNetworkDevice, liveNetNames) { + return "", errors.Errorf("failed to find network interface %q", parentNetworkDevice) } if len(name) > 0 { netNames, err := GetNetworkNamesFromFileSystem(runtimeConfig) @@ -275,7 +284,19 @@ func createMacVLAN(name string, options entities.NetworkCreateOptions, runtimeCo } } ncList := NewNcList(name, version.Current(), options.Labels) - macvlan := NewMacVLANPlugin(options.MacVLAN) + if val, ok := options.Options["mtu"]; ok { + intVal, err := strconv.Atoi(val) + if err != nil { + return "", err + } + if intVal > 0 { + mtu = intVal + } + } + macvlan, err := NewMacVLANPlugin(parentNetworkDevice, options.Gateway, &options.Range, &options.Subnet, mtu) + if err != nil { + return "", err + } plugins = append(plugins, macvlan) ncList["plugins"] = plugins b, err := json.MarshalIndent(ncList, "", " ") diff --git a/libpod/network/netconflist.go b/libpod/network/netconflist.go index 165a9067b..9be98e78f 100644 --- a/libpod/network/netconflist.go +++ b/libpod/network/netconflist.go @@ -172,15 +172,31 @@ func HasDNSNamePlugin(paths []string) bool { } // NewMacVLANPlugin creates a macvlanconfig with a given device name -func NewMacVLANPlugin(device string) MacVLANConfig { +func NewMacVLANPlugin(device string, gateway net.IP, ipRange *net.IPNet, subnet *net.IPNet, mtu int) (MacVLANConfig, error) { i := IPAMDHCP{DHCP: "dhcp"} + if gateway != nil || ipRange != nil || subnet != nil { + ipam, err := NewIPAMLocalHostRange(subnet, ipRange, gateway) + if err != nil { + return MacVLANConfig{}, err + } + ranges := make([][]IPAMLocalHostRangeConf, 0) + ranges = append(ranges, ipam) + i.Ranges = ranges + } m := MacVLANConfig{ PluginType: "macvlan", - Master: device, IPAM: i, } - return m + if mtu > 0 { + m.MTU = mtu + } + // CNI is supposed to use the default route if a + // parent device is not provided + if len(device) > 0 { + m.Master = device + } + return m, nil } // IfPassesFilter filters NetworkListReport and returns true if the filter match the given config diff --git a/libpod/network/network.go b/libpod/network/network.go index 0fb878b18..0ff14c1f7 100644 --- a/libpod/network/network.go +++ b/libpod/network/network.go @@ -17,11 +17,17 @@ import ( "github.com/sirupsen/logrus" ) -// DefaultNetworkDriver is the default network type used -var DefaultNetworkDriver = "bridge" +var ( + // BridgeNetworkDriver defines the bridge cni driver + BridgeNetworkDriver = "bridge" + // DefaultNetworkDriver is the default network type used + DefaultNetworkDriver = BridgeNetworkDriver + // MacVLANNetworkDriver defines the macvlan cni driver + MacVLANNetworkDriver = "macvlan" +) // SupportedNetworkDrivers describes the list of supported drivers -var SupportedNetworkDrivers = []string{DefaultNetworkDriver} +var SupportedNetworkDrivers = []string{BridgeNetworkDriver, MacVLANNetworkDriver} // isSupportedDriver checks if the user provided driver is supported func isSupportedDriver(driver string) error { diff --git a/libpod/networking_linux.go b/libpod/networking_linux.go index ef2f034ab..01e4102d1 100644 --- a/libpod/networking_linux.go +++ b/libpod/networking_linux.go @@ -15,6 +15,7 @@ import ( "path/filepath" "regexp" "sort" + "strconv" "strings" "syscall" "time" @@ -42,6 +43,9 @@ const ( // slirp4netnsDNS is the IP for the built-in DNS server in the slirp network slirp4netnsDNS = "10.0.2.3" + + // slirp4netnsMTU the default MTU override + slirp4netnsMTU = 65520 ) // Get an OCICNI network config @@ -282,6 +286,7 @@ func (r *Runtime) setupSlirp4netns(ctr *Container) error { enableIPv6 := false outboundAddr := "" outboundAddr6 := "" + mtu := slirp4netnsMTU if ctr.config.NetworkOptions != nil { slirpOptions = append(slirpOptions, ctr.config.NetworkOptions["slirp4netns"]...) @@ -345,6 +350,11 @@ func (r *Runtime) setupSlirp4netns(ctr *Container) error { } } outboundAddr6 = value + case "mtu": + mtu, err = strconv.Atoi(value) + if mtu < 68 || err != nil { + return errors.Errorf("invalid mtu %q", value) + } default: return errors.Errorf("unknown option for slirp4netns: %q", o) } @@ -358,8 +368,8 @@ func (r *Runtime) setupSlirp4netns(ctr *Container) error { if disableHostLoopback && slirpFeatures.HasDisableHostLoopback { cmdArgs = append(cmdArgs, "--disable-host-loopback") } - if slirpFeatures.HasMTU { - cmdArgs = append(cmdArgs, "--mtu", "65520") + if mtu > -1 && slirpFeatures.HasMTU { + cmdArgs = append(cmdArgs, fmt.Sprintf("--mtu=%d", mtu)) } if !noPivotRoot && slirpFeatures.HasEnableSandbox { cmdArgs = append(cmdArgs, "--enable-sandbox") @@ -977,7 +987,7 @@ func (c *Container) getContainerNetworkInfo() (*define.InspectNetworkSettings, e if c.state.NetNS == nil { // We still want to make dummy configurations for each CNI net // the container joined. - if len(networks) > 0 && !isDefault { + if len(networks) > 0 { settings.Networks = make(map[string]*define.InspectAdditionalNetwork, len(networks)) for _, net := range networks { cniNet := new(define.InspectAdditionalNetwork) @@ -998,7 +1008,7 @@ func (c *Container) getContainerNetworkInfo() (*define.InspectNetworkSettings, e } // If we have CNI networks - handle that here - if len(networks) > 0 && !isDefault { + if len(networks) > 0 { if len(networks) != len(c.state.NetworkStatus) { return nil, errors.Wrapf(define.ErrInternal, "network inspection mismatch: asked to join %d CNI network(s) %v, but have information on %d network(s)", len(networks), networks, len(c.state.NetworkStatus)) } @@ -1028,7 +1038,9 @@ func (c *Container) getContainerNetworkInfo() (*define.InspectNetworkSettings, e settings.Networks[name] = addedNet } - return settings, nil + if !isDefault { + return settings, nil + } } // If not joining networks, we should have at most 1 result diff --git a/libpod/oci_conmon_exec_linux.go b/libpod/oci_conmon_exec_linux.go index dc5dd03df..faf86ea5b 100644 --- a/libpod/oci_conmon_exec_linux.go +++ b/libpod/oci_conmon_exec_linux.go @@ -126,20 +126,25 @@ func (r *ConmonOCIRuntime) ExecContainerHTTP(ctr *Container, sessionID string, o }() attachChan := make(chan error) + conmonPipeDataChan := make(chan conmonPipeData) go func() { // attachToExec is responsible for closing pipes - attachChan <- attachExecHTTP(ctr, sessionID, req, w, streams, pipes, detachKeys, options.Terminal, cancel, hijackDone, holdConnOpen) + attachChan <- attachExecHTTP(ctr, sessionID, req, w, streams, pipes, detachKeys, options.Terminal, cancel, hijackDone, holdConnOpen, execCmd, conmonPipeDataChan, ociLog) close(attachChan) }() - // Wait for conmon to succeed, when return. - if err := execCmd.Wait(); err != nil { - return -1, nil, errors.Wrapf(err, "cannot run conmon") - } + // NOTE: the channel is needed to communicate conmon's data. In case + // of an error, the error will be written on the hijacked http + // connection such that remote clients will receive the error. + pipeData := <-conmonPipeDataChan - pid, err := readConmonPipeData(pipes.syncPipe, ociLog) + return pipeData.pid, attachChan, pipeData.err +} - return pid, attachChan, err +// conmonPipeData contains the data when reading from conmon's pipe. +type conmonPipeData struct { + pid int + err error } // ExecContainerDetached executes a command in a running container, but does @@ -488,9 +493,16 @@ func (r *ConmonOCIRuntime) startExec(c *Container, sessionID string, options *Ex } // Attach to a container over HTTP -func attachExecHTTP(c *Container, sessionID string, r *http.Request, w http.ResponseWriter, streams *HTTPAttachStreams, pipes *execPipes, detachKeys []byte, isTerminal bool, cancel <-chan bool, hijackDone chan<- bool, holdConnOpen <-chan bool) (deferredErr error) { +func attachExecHTTP(c *Container, sessionID string, r *http.Request, w http.ResponseWriter, streams *HTTPAttachStreams, pipes *execPipes, detachKeys []byte, isTerminal bool, cancel <-chan bool, hijackDone chan<- bool, holdConnOpen <-chan bool, execCmd *exec.Cmd, conmonPipeDataChan chan<- conmonPipeData, ociLog string) (deferredErr error) { + // NOTE: As you may notice, the attach code is quite complex. + // Many things happen concurrently and yet are interdependent. + // If you ever change this function, make sure to write to the + // conmonPipeDataChan in case of an error. + if pipes == nil || pipes.startPipe == nil || pipes.attachPipe == nil { - return errors.Wrapf(define.ErrInvalidArg, "must provide a start and attach pipe to finish an exec attach") + err := errors.Wrapf(define.ErrInvalidArg, "must provide a start and attach pipe to finish an exec attach") + conmonPipeDataChan <- conmonPipeData{-1, err} + return err } defer func() { @@ -509,17 +521,20 @@ func attachExecHTTP(c *Container, sessionID string, r *http.Request, w http.Resp // set up the socket path, such that it is the correct length and location for exec sockPath, err := c.execAttachSocketPath(sessionID) if err != nil { + conmonPipeDataChan <- conmonPipeData{-1, err} return err } // 2: read from attachFd that the parent process has set up the console socket if _, err := readConmonPipeData(pipes.attachPipe, ""); err != nil { + conmonPipeDataChan <- conmonPipeData{-1, err} return err } // 2: then attach conn, err := openUnixSocket(sockPath) if err != nil { + conmonPipeDataChan <- conmonPipeData{-1, err} return errors.Wrapf(err, "failed to connect to container's attach socket: %v", sockPath) } defer func() { @@ -540,11 +555,13 @@ func attachExecHTTP(c *Container, sessionID string, r *http.Request, w http.Resp // Perform hijack hijacker, ok := w.(http.Hijacker) if !ok { + conmonPipeDataChan <- conmonPipeData{-1, err} return errors.Errorf("unable to hijack connection") } httpCon, httpBuf, err := hijacker.Hijack() if err != nil { + conmonPipeDataChan <- conmonPipeData{-1, err} return errors.Wrapf(err, "error hijacking connection") } @@ -555,10 +572,23 @@ func attachExecHTTP(c *Container, sessionID string, r *http.Request, w http.Resp // Force a flush after the header is written. if err := httpBuf.Flush(); err != nil { + conmonPipeDataChan <- conmonPipeData{-1, err} return errors.Wrapf(err, "error flushing HTTP hijack header") } go func() { + // Wait for conmon to succeed, when return. + if err := execCmd.Wait(); err != nil { + conmonPipeDataChan <- conmonPipeData{-1, err} + } else { + pid, err := readConmonPipeData(pipes.syncPipe, ociLog) + if err != nil { + hijackWriteError(err, c.ID(), isTerminal, httpBuf) + conmonPipeDataChan <- conmonPipeData{pid, err} + } else { + conmonPipeDataChan <- conmonPipeData{pid, err} + } + } // We need to hold the connection open until the complete exec // function has finished. This channel will be closed in a defer // in that function, so we can wait for it here. diff --git a/libpod/options.go b/libpod/options.go index c7bac7e1f..20f62ee37 100644 --- a/libpod/options.go +++ b/libpod/options.go @@ -2190,13 +2190,37 @@ func WithPodNetworks(networks []string) PodCreateOption { } } +// WithPodNoNetwork tells the pod to disable external networking. +func WithPodNoNetwork() PodCreateOption { + return func(pod *Pod) error { + if pod.valid { + return define.ErrPodFinalized + } + + if !pod.config.InfraContainer.HasInfraContainer { + return errors.Wrapf(define.ErrInvalidArg, "cannot disable pod networking as no infra container is being created") + } + + if len(pod.config.InfraContainer.PortBindings) > 0 || + pod.config.InfraContainer.StaticIP != nil || + pod.config.InfraContainer.StaticMAC != nil || + len(pod.config.InfraContainer.Networks) > 0 || + pod.config.InfraContainer.HostNetwork { + return errors.Wrapf(define.ErrInvalidArg, "cannot disable pod network if network-related configuration is specified") + } + + pod.config.InfraContainer.NoNetwork = true + + return nil + } +} + // WithPodHostNetwork tells the pod to use the host's network namespace. func WithPodHostNetwork() PodCreateOption { return func(pod *Pod) error { if pod.valid { return define.ErrPodFinalized } - if !pod.config.InfraContainer.HasInfraContainer { return errors.Wrapf(define.ErrInvalidArg, "cannot configure pod host networking as no infra container is being created") } @@ -2204,7 +2228,8 @@ func WithPodHostNetwork() PodCreateOption { if len(pod.config.InfraContainer.PortBindings) > 0 || pod.config.InfraContainer.StaticIP != nil || pod.config.InfraContainer.StaticMAC != nil || - len(pod.config.InfraContainer.Networks) > 0 { + len(pod.config.InfraContainer.Networks) > 0 || + pod.config.InfraContainer.NoNetwork { return errors.Wrapf(define.ErrInvalidArg, "cannot set host network if network-related configuration is specified") } diff --git a/libpod/pod.go b/libpod/pod.go index c8f62ca18..784c2cf5e 100644 --- a/libpod/pod.go +++ b/libpod/pod.go @@ -93,6 +93,7 @@ type podState struct { type InfraContainerConfig struct { ConmonPidFile string `json:"conmonPidFile"` HasInfraContainer bool `json:"makeInfraContainer"` + NoNetwork bool `json:"noNetwork,omitempty"` HostNetwork bool `json:"infraHostNetwork,omitempty"` PortBindings []ocicni.PortMapping `json:"infraPortBindings"` StaticIP net.IP `json:"staticIP,omitempty"` diff --git a/libpod/rootless_cni_linux.go b/libpod/rootless_cni_linux.go index 9a980750f..94ae062aa 100644 --- a/libpod/rootless_cni_linux.go +++ b/libpod/rootless_cni_linux.go @@ -25,7 +25,7 @@ import ( // Built from ../contrib/rootless-cni-infra. var rootlessCNIInfraImage = map[string]string{ - "amd64": "quay.io/libpod/rootless-cni-infra@sha256:304742d5d221211df4ec672807a5842ff11e3729c50bc424ea0cea858f69d7b7", // 3-amd64 + "amd64": "quay.io/libpod/rootless-cni-infra@sha256:adf352454666f7ce9ca3e1098448b5ee18f89c4516471ec99447ec9ece917f36", // 5-amd64 } const ( @@ -58,9 +58,33 @@ func AllocRootlessCNI(ctx context.Context, c *Container) (ns.NetNS, []*cnitypes. return nil, nil, err } k8sPodName := getCNIPodName(c) // passed to CNI as K8S_POD_NAME + ip := "" + if c.config.StaticIP != nil { + ip = c.config.StaticIP.String() + } + mac := "" + if c.config.StaticMAC != nil { + mac = c.config.StaticMAC.String() + } + aliases, err := c.runtime.state.GetAllNetworkAliases(c) + if err != nil { + return nil, nil, err + } + capArgs := "" + // add network aliases json encoded as capabilityArgs for cni + if len(aliases) > 0 { + capabilityArgs := make(map[string]interface{}) + capabilityArgs["aliases"] = aliases + b, err := json.Marshal(capabilityArgs) + if err != nil { + return nil, nil, err + } + capArgs = string(b) + } + cniResults := make([]*cnitypes.Result, len(networks)) for i, nw := range networks { - cniRes, err := rootlessCNIInfraCallAlloc(infra, c.ID(), nw, k8sPodName) + cniRes, err := rootlessCNIInfraCallAlloc(infra, c.ID(), nw, k8sPodName, ip, mac, capArgs) if err != nil { return nil, nil, err } @@ -137,11 +161,11 @@ func getCNIPodName(c *Container) string { return c.Name() } -func rootlessCNIInfraCallAlloc(infra *Container, id, nw, k8sPodName string) (*cnitypes.Result, error) { - logrus.Debugf("rootless CNI: alloc %q, %q, %q", id, nw, k8sPodName) +func rootlessCNIInfraCallAlloc(infra *Container, id, nw, k8sPodName, ip, mac, capArgs string) (*cnitypes.Result, error) { + logrus.Debugf("rootless CNI: alloc %q, %q, %q, %q, %q, %q", id, nw, k8sPodName, ip, mac, capArgs) var err error - _, err = rootlessCNIInfraExec(infra, "alloc", id, nw, k8sPodName) + _, err = rootlessCNIInfraExec(infra, "alloc", id, nw, k8sPodName, ip, mac, capArgs) if err != nil { return nil, err } diff --git a/libpod/runtime_pod_infra_linux.go b/libpod/runtime_pod_infra_linux.go index dd957527d..564851f4e 100644 --- a/libpod/runtime_pod_infra_linux.go +++ b/libpod/runtime_pod_infra_linux.go @@ -94,8 +94,16 @@ func (r *Runtime) makeInfraContainer(ctx context.Context, p *Pod, imgName, rawIm } } - // Since user namespace sharing is not implemented, we only need to check if it's rootless - if !p.config.InfraContainer.HostNetwork { + switch { + case p.config.InfraContainer.HostNetwork: + if err := g.RemoveLinuxNamespace(string(spec.NetworkNamespace)); err != nil { + return nil, errors.Wrapf(err, "error removing network namespace from pod %s infra container", p.ID()) + } + case p.config.InfraContainer.NoNetwork: + // Do nothing - we have a network namespace by default, + // but should not configure slirp. + default: + // Since user namespace sharing is not implemented, we only need to check if it's rootless netmode := "bridge" if isRootless || p.config.InfraContainer.Slirp4netns { netmode = "slirp4netns" @@ -106,8 +114,6 @@ func (r *Runtime) makeInfraContainer(ctx context.Context, p *Pod, imgName, rawIm // PostConfigureNetNS should not be set since user namespace sharing is not implemented // and rootless networking no longer supports post configuration setup options = append(options, WithNetNS(p.config.InfraContainer.PortBindings, false, netmode, p.config.InfraContainer.Networks)) - } else if err := g.RemoveLinuxNamespace(string(spec.NetworkNamespace)); err != nil { - return nil, errors.Wrapf(err, "error removing network namespace from pod %s infra container", p.ID()) } // For each option in InfraContainerConfig - if set, pass into diff --git a/libpod/util.go b/libpod/util.go index bf9bf2542..391208fb9 100644 --- a/libpod/util.go +++ b/libpod/util.go @@ -235,20 +235,16 @@ func checkDependencyContainer(depCtr, ctr *Container) error { return nil } -// hijackWriteErrorAndClose writes an error to a hijacked HTTP session and -// closes it. Intended to HTTPAttach function. -// If error is nil, it will not be written; we'll only close the connection. -func hijackWriteErrorAndClose(toWrite error, cid string, terminal bool, httpCon io.Closer, httpBuf *bufio.ReadWriter) { +// hijackWriteError writes an error to a hijacked HTTP session. +func hijackWriteError(toWrite error, cid string, terminal bool, httpBuf *bufio.ReadWriter) { if toWrite != nil { - errString := []byte(fmt.Sprintf("%v\n", toWrite)) + errString := []byte(fmt.Sprintf("Error: %v\n", toWrite)) if !terminal { // We need a header. header := makeHTTPAttachHeader(2, uint32(len(errString))) if _, err := httpBuf.Write(header); err != nil { logrus.Errorf("Error writing header for container %s attach connection error: %v", cid, err) } - // TODO: May want to return immediately here to avoid - // writing garbage to the socket? } if _, err := httpBuf.Write(errString); err != nil { logrus.Errorf("Error writing error to container %s HTTP attach connection: %v", cid, err) @@ -257,6 +253,13 @@ func hijackWriteErrorAndClose(toWrite error, cid string, terminal bool, httpCon logrus.Errorf("Error flushing HTTP buffer for container %s HTTP attach connection: %v", cid, err) } } +} + +// hijackWriteErrorAndClose writes an error to a hijacked HTTP session and +// closes it. Intended to HTTPAttach function. +// If error is nil, it will not be written; we'll only close the connection. +func hijackWriteErrorAndClose(toWrite error, cid string, terminal bool, httpCon io.Closer, httpBuf *bufio.ReadWriter) { + hijackWriteError(toWrite, cid, terminal, httpBuf) if err := httpCon.Close(); err != nil { logrus.Errorf("Error closing container %s HTTP attach connection: %v", cid, err) |