diff options
author | OpenShift Merge Robot <openshift-merge-robot@users.noreply.github.com> | 2022-09-12 21:14:40 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-09-12 21:14:40 +0200 |
commit | 8216d0ef4e8212413a650a55ce8fd02f2ca8d181 (patch) | |
tree | 41e4359ad59c1052a01f4f45857672a3f2f9dfe7 | |
parent | 6e545945a562b0116aa18ab0f38b36c59166838e (diff) | |
parent | 56c880a7e428c4f2f14317fe30bebaa735cd562d (diff) | |
download | podman-8216d0ef4e8212413a650a55ce8fd02f2ca8d181.tar.gz podman-8216d0ef4e8212413a650a55ce8fd02f2ca8d181.tar.bz2 podman-8216d0ef4e8212413a650a55ce8fd02f2ca8d181.zip |
Merge pull request #15749 from dfr/freebsd-networking
Add support for networking on FreeBSD
-rw-r--r-- | libpod/boltdb_state_freebsd.go | 14 | ||||
-rw-r--r-- | libpod/container_freebsd.go | 15 | ||||
-rw-r--r-- | libpod/container_internal_common.go | 19 | ||||
-rw-r--r-- | libpod/container_internal_freebsd.go | 33 | ||||
-rw-r--r-- | libpod/container_internal_linux.go | 13 | ||||
-rw-r--r-- | libpod/networking_common.go | 719 | ||||
-rw-r--r-- | libpod/networking_freebsd.go | 268 | ||||
-rw-r--r-- | libpod/networking_linux.go | 704 | ||||
-rw-r--r-- | libpod/networking_unsupported.go | 4 |
9 files changed, 1043 insertions, 746 deletions
diff --git a/libpod/boltdb_state_freebsd.go b/libpod/boltdb_state_freebsd.go index d7f2736fc..d0a2d4f28 100644 --- a/libpod/boltdb_state_freebsd.go +++ b/libpod/boltdb_state_freebsd.go @@ -6,12 +6,20 @@ package libpod // replaceNetNS handle network namespace transitions after updating a // container's state. func replaceNetNS(netNSPath string, ctr *Container, newState *ContainerState) error { - // On FreeBSD, we just record the network jail's name in our state. - newState.NetworkJail = netNSPath + if netNSPath != "" { + // On FreeBSD, we just record the network jail's name in our state. + newState.NetNS = &jailNetNS{Name: netNSPath} + } else { + newState.NetNS = nil + } return nil } // getNetNSPath retrieves the netns path to be stored in the database func getNetNSPath(ctr *Container) string { - return ctr.state.NetworkJail + if ctr.state.NetNS != nil { + return ctr.state.NetNS.Name + } else { + return "" + } } diff --git a/libpod/container_freebsd.go b/libpod/container_freebsd.go index 7292ba37a..87fb494dd 100644 --- a/libpod/container_freebsd.go +++ b/libpod/container_freebsd.go @@ -4,11 +4,20 @@ package libpod type containerPlatformState struct { - // NetworkJail is the name of the container's network VNET + // NetNS is the name of the container's network VNET // jail. Will only be set if config.CreateNetNS is true, or // the container was told to join another container's network // namespace. - NetworkJail string `json:"-"` + NetNS *jailNetNS `json:"-"` +} + +type jailNetNS struct { + Name string `json:"-"` +} + +func (ns *jailNetNS) Path() string { + // The jail name approximately corresponds to the Linux netns path + return ns.Name } func networkDisabled(c *Container) (bool, error) { @@ -16,7 +25,7 @@ func networkDisabled(c *Container) (bool, error) { return false, nil } if !c.config.PostConfigureNetNS { - return c.state.NetworkJail == "", nil + return c.state.NetNS != nil, nil } return false, nil } diff --git a/libpod/container_internal_common.go b/libpod/container_internal_common.go index f1d3f5e89..c7f59aba5 100644 --- a/libpod/container_internal_common.go +++ b/libpod/container_internal_common.go @@ -1766,16 +1766,6 @@ func (c *Container) makeBindMounts() error { } } - // Make /etc/hostname - // This should never change, so no need to recreate if it exists - if _, ok := c.state.BindMounts["/etc/hostname"]; !ok { - hostnamePath, err := c.writeStringToRundir("hostname", c.Hostname()) - if err != nil { - return fmt.Errorf("creating hostname file for container %s: %w", c.ID(), err) - } - c.state.BindMounts["/etc/hostname"] = hostnamePath - } - // Make /etc/localtime ctrTimezone := c.Timezone() if ctrTimezone != "" { @@ -1879,7 +1869,7 @@ rootless=%d } } - return nil + return c.makePlatformBindMounts() } // generateResolvConf generates a containers resolv.conf @@ -1939,11 +1929,16 @@ func (c *Container) generateResolvConf() error { destPath := filepath.Join(c.state.RunDir, "resolv.conf") + var namespaces []spec.LinuxNamespace + if c.config.Spec.Linux != nil { + namespaces = c.config.Spec.Linux.Namespaces + } + if err := resolvconf.New(&resolvconf.Params{ IPv6Enabled: ipv6, KeepHostServers: keepHostServers, Nameservers: nameservers, - Namespaces: c.config.Spec.Linux.Namespaces, + Namespaces: namespaces, Options: options, Path: destPath, Searches: search, diff --git a/libpod/container_internal_freebsd.go b/libpod/container_internal_freebsd.go index c6ed6147c..67f87a98d 100644 --- a/libpod/container_internal_freebsd.go +++ b/libpod/container_internal_freebsd.go @@ -4,7 +4,6 @@ package libpod import ( - "errors" "fmt" "os" "strings" @@ -24,20 +23,6 @@ var ( bindOptions = []string{} ) -// Network stubs to decouple container_internal_freebsd.go from -// networking_freebsd.go so they can be reviewed separately. -func (r *Runtime) createNetNS(ctr *Container) (netJail string, q map[string]types.StatusBlock, retErr error) { - return "", nil, errors.New("not implemented (*Runtime) createNetNS") -} - -func (r *Runtime) teardownNetNS(ctr *Container) error { - return errors.New("not implemented (*Runtime) teardownNetNS") -} - -func (r *Runtime) reloadContainerNetwork(ctr *Container) (map[string]types.StatusBlock, error) { - return nil, errors.New("not implemented (*Runtime) reloadContainerNetwork") -} - func (c *Container) mountSHM(shmOptions string) error { return nil } @@ -51,7 +36,7 @@ func (c *Container) unmountSHM(path string) error { func (c *Container) prepare() error { var ( wg sync.WaitGroup - jailName string + ctrNS *jailNetNS networkStatus map[string]types.StatusBlock createNetNSErr, mountStorageErr error mountPoint string @@ -63,9 +48,9 @@ func (c *Container) prepare() error { go func() { defer wg.Done() // Set up network namespace if not already set up - noNetNS := c.state.NetworkJail == "" + noNetNS := c.state.NetNS == nil if c.config.CreateNetNS && noNetNS && !c.config.PostConfigureNetNS { - jailName, networkStatus, createNetNSErr = c.runtime.createNetNS(c) + ctrNS, networkStatus, createNetNSErr = c.runtime.createNetNS(c) if createNetNSErr != nil { return } @@ -74,7 +59,7 @@ func (c *Container) prepare() error { defer tmpStateLock.Unlock() // Assign NetNS attributes to container - c.state.NetworkJail = jailName + c.state.NetNS = ctrNS c.state.NetworkStatus = networkStatus } }() @@ -164,7 +149,7 @@ func (c *Container) addNetworkContainer(g *generate.Generator, ctr string) error if err != nil { return fmt.Errorf("retrieving dependency %s of container %s from state: %w", ctr, c.ID(), err) } - g.AddAnnotation("org.freebsd.parentJail", nsCtr.state.NetworkJail) + g.AddAnnotation("org.freebsd.parentJail", nsCtr.state.NetNS.Name) return nil } @@ -187,7 +172,7 @@ func openDirectory(path string) (fd int, err error) { func (c *Container) addNetworkNamespace(g *generate.Generator) error { if c.config.CreateNetNS { - g.AddAnnotation("org.freebsd.parentJail", c.state.NetworkJail) + g.AddAnnotation("org.freebsd.parentJail", c.state.NetNS.Name) } return nil } @@ -272,7 +257,7 @@ func (c *Container) isSlirp4netnsIPv6() (bool, error) { // check for net=none func (c *Container) hasNetNone() bool { - return c.state.NetworkJail == "" + return c.state.NetNS == nil } func setVolumeAtime(mountPoint string, st os.FileInfo) error { @@ -283,3 +268,7 @@ func setVolumeAtime(mountPoint string, st os.FileInfo) error { } return nil } + +func (c *Container) makePlatformBindMounts() error { + return nil +} diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index 0fec1a7d2..ef8649776 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -652,3 +652,16 @@ func setVolumeAtime(mountPoint string, st os.FileInfo) error { } return nil } + +func (c *Container) makePlatformBindMounts() error { + // Make /etc/hostname + // This should never change, so no need to recreate if it exists + if _, ok := c.state.BindMounts["/etc/hostname"]; !ok { + hostnamePath, err := c.writeStringToRundir("hostname", c.Hostname()) + if err != nil { + return fmt.Errorf("creating hostname file for container %s: %w", c.ID(), err) + } + c.state.BindMounts["/etc/hostname"] = hostnamePath + } + return nil +} diff --git a/libpod/networking_common.go b/libpod/networking_common.go new file mode 100644 index 000000000..fa444e26a --- /dev/null +++ b/libpod/networking_common.go @@ -0,0 +1,719 @@ +//go:build linux || freebsd +// +build linux freebsd + +package libpod + +import ( + "errors" + "fmt" + "regexp" + "sort" + + "github.com/containers/common/libnetwork/etchosts" + "github.com/containers/common/libnetwork/types" + "github.com/containers/common/pkg/config" + "github.com/containers/common/pkg/machine" + "github.com/containers/common/pkg/util" + "github.com/containers/podman/v4/libpod/define" + "github.com/containers/podman/v4/libpod/events" + "github.com/containers/podman/v4/pkg/namespaces" + "github.com/containers/podman/v4/pkg/rootless" + "github.com/containers/storage/pkg/lockfile" + "github.com/sirupsen/logrus" +) + +// convertPortMappings will remove the HostIP part from the ports when running inside podman machine. +// This is need because a HostIP of 127.0.0.1 would now allow the gvproxy forwarder to reach to open ports. +// For machine the HostIP must only be used by gvproxy and never in the VM. +func (c *Container) convertPortMappings() []types.PortMapping { + if !machine.IsGvProxyBased() || len(c.config.PortMappings) == 0 { + return c.config.PortMappings + } + // if we run in a machine VM we have to ignore the host IP part + newPorts := make([]types.PortMapping, 0, len(c.config.PortMappings)) + for _, port := range c.config.PortMappings { + port.HostIP = "" + newPorts = append(newPorts, port) + } + return newPorts +} + +func (c *Container) getNetworkOptions(networkOpts map[string]types.PerNetworkOptions) types.NetworkOptions { + opts := types.NetworkOptions{ + ContainerID: c.config.ID, + ContainerName: getCNIPodName(c), + } + opts.PortMappings = c.convertPortMappings() + + // If the container requested special network options use this instead of the config. + // This is the case for container restore or network reload. + if c.perNetworkOpts != nil { + opts.Networks = c.perNetworkOpts + } else { + opts.Networks = networkOpts + } + return opts +} + +// setUpNetwork will set up the the networks, on error it will also tear down the cni +// networks. If rootless it will join/create the rootless network namespace. +func (r *Runtime) setUpNetwork(ns string, opts types.NetworkOptions) (map[string]types.StatusBlock, error) { + rootlessNetNS, err := r.GetRootlessNetNs(true) + if err != nil { + return nil, err + } + var results map[string]types.StatusBlock + setUpPod := func() error { + results, err = r.network.Setup(ns, types.SetupOptions{NetworkOptions: opts}) + return err + } + // rootlessNetNS is nil if we are root + if rootlessNetNS != nil { + // execute the setup in the rootless net ns + err = rootlessNetNS.Do(setUpPod) + rootlessNetNS.Lock.Unlock() + } else { + err = setUpPod() + } + return results, err +} + +// getCNIPodName return the pod name (hostname) used by CNI and the dnsname plugin. +// If we are in the pod network namespace use the pod name otherwise the container name +func getCNIPodName(c *Container) string { + if c.config.NetMode.IsPod() || c.IsInfra() { + pod, err := c.runtime.state.Pod(c.PodID()) + if err == nil { + return pod.Name() + } + } + return c.Name() +} + +// Tear down a container's network configuration and joins the +// rootless net ns as rootless user +func (r *Runtime) teardownNetwork(ns string, opts types.NetworkOptions) error { + rootlessNetNS, err := r.GetRootlessNetNs(false) + if err != nil { + return err + } + tearDownPod := func() error { + if err := r.network.Teardown(ns, types.TeardownOptions{NetworkOptions: opts}); err != nil { + return fmt.Errorf("tearing down network namespace configuration for container %s: %w", opts.ContainerID, err) + } + return nil + } + + // rootlessNetNS is nil if we are root + if rootlessNetNS != nil { + // execute the cni setup in the rootless net ns + err = rootlessNetNS.Do(tearDownPod) + if cerr := rootlessNetNS.Cleanup(r); cerr != nil { + logrus.WithError(err).Error("failed to clean up rootless netns") + } + rootlessNetNS.Lock.Unlock() + } else { + err = tearDownPod() + } + return err +} + +// Tear down a container's CNI network configuration, but do not tear down the +// namespace itself. +func (r *Runtime) teardownCNI(ctr *Container) error { + if ctr.state.NetNS == nil { + // The container has no network namespace, we're set + return nil + } + + logrus.Debugf("Tearing down network namespace at %s for container %s", ctr.state.NetNS.Path(), ctr.ID()) + + networks, err := ctr.networks() + if err != nil { + return err + } + + if !ctr.config.NetMode.IsSlirp4netns() && len(networks) > 0 { + netOpts := ctr.getNetworkOptions(networks) + return r.teardownNetwork(ctr.state.NetNS.Path(), netOpts) + } + return nil +} + +// isBridgeNetMode checks if the given network mode is bridge. +// It returns nil when it is set to bridge and an error otherwise. +func isBridgeNetMode(n namespaces.NetworkMode) error { + if !n.IsBridge() { + return fmt.Errorf("%q is not supported: %w", n, define.ErrNetworkModeInvalid) + } + return nil +} + +// Reload only works with containers with a configured network. +// It will tear down, and then reconfigure, the network of the container. +// This is mainly used when a reload of firewall rules wipes out existing +// firewall configuration. +// Efforts will be made to preserve MAC and IP addresses, but this only works if +// the container only joined a single CNI network, and was only assigned a +// single MAC or IP. +// Only works on root containers at present, though in the future we could +// extend this to stop + restart slirp4netns +func (r *Runtime) reloadContainerNetwork(ctr *Container) (map[string]types.StatusBlock, error) { + if ctr.state.NetNS == nil { + return nil, fmt.Errorf("container %s network is not configured, refusing to reload: %w", ctr.ID(), define.ErrCtrStateInvalid) + } + if err := isBridgeNetMode(ctr.config.NetMode); err != nil { + return nil, err + } + logrus.Infof("Going to reload container %s network", ctr.ID()) + + err := r.teardownCNI(ctr) + if err != nil { + // teardownCNI will error if the iptables rules do not exists and this is the case after + // a firewall reload. The purpose of network reload is to recreate the rules if they do + // not exists so we should not log this specific error as error. This would confuse users otherwise. + // iptables-legacy and iptables-nft will create different errors make sure to match both. + b, rerr := regexp.MatchString("Couldn't load target `CNI-[a-f0-9]{24}':No such file or directory|Chain 'CNI-[a-f0-9]{24}' does not exist", err.Error()) + if rerr == nil && !b { + logrus.Error(err) + } else { + logrus.Info(err) + } + } + + networkOpts, err := ctr.networks() + if err != nil { + return nil, err + } + + // Set the same network settings as before.. + netStatus := ctr.getNetworkStatus() + for network, perNetOpts := range networkOpts { + for name, netInt := range netStatus[network].Interfaces { + perNetOpts.InterfaceName = name + perNetOpts.StaticMAC = netInt.MacAddress + for _, netAddress := range netInt.Subnets { + perNetOpts.StaticIPs = append(perNetOpts.StaticIPs, netAddress.IPNet.IP) + } + // Normally interfaces have a length of 1, only for some special cni configs we could get more. + // For now just use the first interface to get the ips this should be good enough for most cases. + break + } + networkOpts[network] = perNetOpts + } + ctr.perNetworkOpts = networkOpts + + return r.configureNetNS(ctr, ctr.state.NetNS) +} + +// Produce an InspectNetworkSettings containing information on the container +// network. +func (c *Container) getContainerNetworkInfo() (*define.InspectNetworkSettings, error) { + if c.config.NetNsCtr != "" { + netNsCtr, err := c.runtime.GetContainer(c.config.NetNsCtr) + if err != nil { + return nil, err + } + // see https://github.com/containers/podman/issues/10090 + // the container has to be locked for syncContainer() + netNsCtr.lock.Lock() + defer netNsCtr.lock.Unlock() + // Have to sync to ensure that state is populated + if err := netNsCtr.syncContainer(); err != nil { + return nil, err + } + logrus.Debugf("Container %s shares network namespace, retrieving network info of container %s", c.ID(), c.config.NetNsCtr) + + return netNsCtr.getContainerNetworkInfo() + } + + settings := new(define.InspectNetworkSettings) + settings.Ports = makeInspectPortBindings(c.config.PortMappings, c.config.ExposedPorts) + + networks, err := c.networks() + if err != nil { + return nil, err + } + + if c.state.NetNS == nil { + if networkNSPath := c.joinedNetworkNSPath(); networkNSPath != "" { + if result, err := c.inspectJoinedNetworkNS(networkNSPath); err == nil { + // fallback to dummy configuration + settings.InspectBasicNetworkConfig = resultToBasicNetworkConfig(result) + return settings, nil + } + // do not propagate error inspecting a joined network ns + logrus.Errorf("Inspecting network namespace: %s of container %s: %v", networkNSPath, c.ID(), err) + } + // We can't do more if the network is down. + + // We still want to make dummy configurations for each CNI net + // the container joined. + if len(networks) > 0 { + settings.Networks = make(map[string]*define.InspectAdditionalNetwork, len(networks)) + for net, opts := range networks { + cniNet := new(define.InspectAdditionalNetwork) + cniNet.NetworkID = net + cniNet.Aliases = opts.Aliases + settings.Networks[net] = cniNet + } + } + + return settings, nil + } + + // Set network namespace path + settings.SandboxKey = c.state.NetNS.Path() + + netStatus := c.getNetworkStatus() + // If this is empty, we're probably slirp4netns + if len(netStatus) == 0 { + return settings, nil + } + + // If we have networks - handle that here + if len(networks) > 0 { + if len(networks) != len(netStatus) { + return nil, fmt.Errorf("network inspection mismatch: asked to join %d network(s) %v, but have information on %d network(s): %w", len(networks), networks, len(netStatus), define.ErrInternal) + } + + settings.Networks = make(map[string]*define.InspectAdditionalNetwork) + + for name, opts := range networks { + result := netStatus[name] + addedNet := new(define.InspectAdditionalNetwork) + addedNet.NetworkID = name + addedNet.Aliases = opts.Aliases + addedNet.InspectBasicNetworkConfig = resultToBasicNetworkConfig(result) + + settings.Networks[name] = addedNet + } + + // if not only the default network is connected we can return here + // otherwise we have to populate the InspectBasicNetworkConfig settings + _, isDefaultNet := networks[c.runtime.config.Network.DefaultNetwork] + if !(len(networks) == 1 && isDefaultNet) { + return settings, nil + } + } + + // If not joining networks, we should have at most 1 result + if len(netStatus) > 1 { + return nil, fmt.Errorf("should have at most 1 network status result if not joining networks, instead got %d: %w", len(netStatus), define.ErrInternal) + } + + if len(netStatus) == 1 { + for _, status := range netStatus { + settings.InspectBasicNetworkConfig = resultToBasicNetworkConfig(status) + } + } + return settings, nil +} + +// resultToBasicNetworkConfig produces an InspectBasicNetworkConfig from a CNI +// result +func resultToBasicNetworkConfig(result types.StatusBlock) define.InspectBasicNetworkConfig { + config := define.InspectBasicNetworkConfig{} + interfaceNames := make([]string, 0, len(result.Interfaces)) + for interfaceName := range result.Interfaces { + interfaceNames = append(interfaceNames, interfaceName) + } + // ensure consistent inspect results by sorting + sort.Strings(interfaceNames) + for _, interfaceName := range interfaceNames { + netInt := result.Interfaces[interfaceName] + for _, netAddress := range netInt.Subnets { + size, _ := netAddress.IPNet.Mask.Size() + if netAddress.IPNet.IP.To4() != nil { + // ipv4 + if config.IPAddress == "" { + config.IPAddress = netAddress.IPNet.IP.String() + config.IPPrefixLen = size + config.Gateway = netAddress.Gateway.String() + } else { + config.SecondaryIPAddresses = append(config.SecondaryIPAddresses, define.Address{Addr: netAddress.IPNet.IP.String(), PrefixLength: size}) + } + } else { + // ipv6 + if config.GlobalIPv6Address == "" { + config.GlobalIPv6Address = netAddress.IPNet.IP.String() + config.GlobalIPv6PrefixLen = size + config.IPv6Gateway = netAddress.Gateway.String() + } else { + config.SecondaryIPv6Addresses = append(config.SecondaryIPv6Addresses, define.Address{Addr: netAddress.IPNet.IP.String(), PrefixLength: size}) + } + } + } + if config.MacAddress == "" { + config.MacAddress = netInt.MacAddress.String() + } else { + config.AdditionalMacAddresses = append(config.AdditionalMacAddresses, netInt.MacAddress.String()) + } + } + return config +} + +// NetworkDisconnect removes a container from the network +func (c *Container) NetworkDisconnect(nameOrID, netName string, force bool) error { + // only the bridge mode supports cni networks + if err := isBridgeNetMode(c.config.NetMode); err != nil { + return err + } + + c.lock.Lock() + defer c.lock.Unlock() + + networks, err := c.networks() + if err != nil { + return err + } + + // check if network exists and if the input is a ID we get the name + // CNI only uses names so it is important that we only use the name + netName, err = c.runtime.normalizeNetworkName(netName) + if err != nil { + return err + } + + _, nameExists := networks[netName] + if !nameExists && len(networks) > 0 { + return fmt.Errorf("container %s is not connected to network %s", nameOrID, netName) + } + + if err := c.syncContainer(); err != nil { + return err + } + // get network status before we disconnect + networkStatus := c.getNetworkStatus() + + if err := c.runtime.state.NetworkDisconnect(c, netName); err != nil { + return err + } + + c.newNetworkEvent(events.NetworkDisconnect, netName) + if !c.ensureState(define.ContainerStateRunning, define.ContainerStateCreated) { + return nil + } + + if c.state.NetNS == nil { + return fmt.Errorf("unable to disconnect %s from %s: %w", nameOrID, netName, define.ErrNoNetwork) + } + + opts := types.NetworkOptions{ + ContainerID: c.config.ID, + ContainerName: getCNIPodName(c), + } + opts.PortMappings = c.convertPortMappings() + opts.Networks = map[string]types.PerNetworkOptions{ + netName: networks[netName], + } + + if err := c.runtime.teardownNetwork(c.state.NetNS.Path(), opts); err != nil { + return err + } + + // update network status if container is running + oldStatus, statusExist := networkStatus[netName] + delete(networkStatus, netName) + c.state.NetworkStatus = networkStatus + err = c.save() + if err != nil { + return err + } + + // Reload ports when there are still connected networks, maybe we removed the network interface with the child ip. + // Reloading without connected networks does not make sense, so we can skip this step. + if rootless.IsRootless() && len(networkStatus) > 0 { + if err := c.reloadRootlessRLKPortMapping(); err != nil { + return err + } + } + + // Update resolv.conf if required + if statusExist { + stringIPs := make([]string, 0, len(oldStatus.DNSServerIPs)) + for _, ip := range oldStatus.DNSServerIPs { + stringIPs = append(stringIPs, ip.String()) + } + if len(stringIPs) > 0 { + logrus.Debugf("Removing DNS Servers %v from resolv.conf", stringIPs) + if err := c.removeNameserver(stringIPs); err != nil { + return err + } + } + + // update /etc/hosts file + if file, ok := c.state.BindMounts[config.DefaultHostsFile]; ok { + // sync the names with c.getHostsEntries() + names := []string{c.Hostname(), c.config.Name} + rm := etchosts.GetNetworkHostEntries(map[string]types.StatusBlock{netName: oldStatus}, names...) + if len(rm) > 0 { + // make sure to lock this file to prevent concurrent writes when + // this is used a net dependency container + lock, err := lockfile.GetLockfile(file) + if err != nil { + return fmt.Errorf("failed to lock hosts file: %w", err) + } + logrus.Debugf("Remove /etc/hosts entries %v", rm) + lock.Lock() + err = etchosts.Remove(file, rm) + lock.Unlock() + if err != nil { + return err + } + } + } + } + return nil +} + +// ConnectNetwork connects a container to a given network +func (c *Container) NetworkConnect(nameOrID, netName string, netOpts types.PerNetworkOptions) error { + // only the bridge mode supports cni networks + if err := isBridgeNetMode(c.config.NetMode); err != nil { + return err + } + + c.lock.Lock() + defer c.lock.Unlock() + + networks, err := c.networks() + if err != nil { + return err + } + + // check if network exists and if the input is a ID we get the name + // CNI only uses names so it is important that we only use the name + netName, err = c.runtime.normalizeNetworkName(netName) + if err != nil { + return err + } + + if err := c.syncContainer(); err != nil { + return err + } + + // get network status before we connect + networkStatus := c.getNetworkStatus() + + // always add the short id as alias for docker compat + netOpts.Aliases = append(netOpts.Aliases, c.config.ID[:12]) + + if netOpts.InterfaceName == "" { + netOpts.InterfaceName = getFreeInterfaceName(networks) + if netOpts.InterfaceName == "" { + return errors.New("could not find free network interface name") + } + } + + if err := c.runtime.state.NetworkConnect(c, netName, netOpts); err != nil { + // Docker compat: treat requests to attach already attached networks as a no-op, ignoring opts + if errors.Is(err, define.ErrNetworkConnected) && c.ensureState(define.ContainerStateConfigured) { + return nil + } + + return err + } + c.newNetworkEvent(events.NetworkConnect, netName) + if !c.ensureState(define.ContainerStateRunning, define.ContainerStateCreated) { + return nil + } + if c.state.NetNS == nil { + return fmt.Errorf("unable to connect %s to %s: %w", nameOrID, netName, define.ErrNoNetwork) + } + + opts := types.NetworkOptions{ + ContainerID: c.config.ID, + ContainerName: getCNIPodName(c), + } + opts.PortMappings = c.convertPortMappings() + opts.Networks = map[string]types.PerNetworkOptions{ + netName: netOpts, + } + + results, err := c.runtime.setUpNetwork(c.state.NetNS.Path(), opts) + if err != nil { + return err + } + if len(results) != 1 { + return errors.New("when adding aliases, results must be of length 1") + } + + // we need to get the old host entries before we add the new one to the status + // if we do not add do it here we will get the wrong existing entries which will throw of the logic + // we could also copy the map but this does not seem worth it + // sync the hostNames with c.getHostsEntries() + hostNames := []string{c.Hostname(), c.config.Name} + oldHostEntries := etchosts.GetNetworkHostEntries(networkStatus, hostNames...) + + // update network status + if networkStatus == nil { + networkStatus = make(map[string]types.StatusBlock, 1) + } + networkStatus[netName] = results[netName] + c.state.NetworkStatus = networkStatus + + err = c.save() + if err != nil { + return err + } + + // The first network needs a port reload to set the correct child ip for the rootlessport process. + // Adding a second network does not require a port reload because the child ip is still valid. + if rootless.IsRootless() && len(networks) == 0 { + if err := c.reloadRootlessRLKPortMapping(); err != nil { + return err + } + } + + ipv6, err := c.checkForIPv6(networkStatus) + if err != nil { + return err + } + + // Update resolv.conf if required + stringIPs := make([]string, 0, len(results[netName].DNSServerIPs)) + for _, ip := range results[netName].DNSServerIPs { + if (ip.To4() == nil) && !ipv6 { + continue + } + stringIPs = append(stringIPs, ip.String()) + } + if len(stringIPs) > 0 { + logrus.Debugf("Adding DNS Servers %v to resolv.conf", stringIPs) + if err := c.addNameserver(stringIPs); err != nil { + return err + } + } + + // update /etc/hosts file + if file, ok := c.state.BindMounts[config.DefaultHostsFile]; ok { + // make sure to lock this file to prevent concurrent writes when + // this is used a net dependency container + lock, err := lockfile.GetLockfile(file) + if err != nil { + return fmt.Errorf("failed to lock hosts file: %w", err) + } + new := etchosts.GetNetworkHostEntries(results, hostNames...) + logrus.Debugf("Add /etc/hosts entries %v", new) + // use special AddIfExists API to make sure we only add new entries if an old one exists + // see the AddIfExists() comment for more information + lock.Lock() + err = etchosts.AddIfExists(file, oldHostEntries, new) + lock.Unlock() + if err != nil { + return err + } + } + + return nil +} + +// get a free interface name for a new network +// return an empty string if no free name was found +func getFreeInterfaceName(networks map[string]types.PerNetworkOptions) string { + ifNames := make([]string, 0, len(networks)) + for _, opts := range networks { + ifNames = append(ifNames, opts.InterfaceName) + } + for i := 0; i < 100000; i++ { + ifName := fmt.Sprintf("eth%d", i) + if !util.StringInSlice(ifName, ifNames) { + return ifName + } + } + return "" +} + +// DisconnectContainerFromNetwork removes a container from its CNI network +func (r *Runtime) DisconnectContainerFromNetwork(nameOrID, netName string, force bool) error { + ctr, err := r.LookupContainer(nameOrID) + if err != nil { + return err + } + return ctr.NetworkDisconnect(nameOrID, netName, force) +} + +// ConnectContainerToNetwork connects a container to a CNI network +func (r *Runtime) ConnectContainerToNetwork(nameOrID, netName string, netOpts types.PerNetworkOptions) error { + ctr, err := r.LookupContainer(nameOrID) + if err != nil { + return err + } + return ctr.NetworkConnect(nameOrID, netName, netOpts) +} + +// normalizeNetworkName takes a network name, a partial or a full network ID and returns the network name. +// If the network is not found a errors is returned. +func (r *Runtime) normalizeNetworkName(nameOrID string) (string, error) { + net, err := r.network.NetworkInspect(nameOrID) + if err != nil { + return "", err + } + return net.Name, nil +} + +// ocicniPortsToNetTypesPorts convert the old port format to the new one +// while deduplicating ports into ranges +func ocicniPortsToNetTypesPorts(ports []types.OCICNIPortMapping) []types.PortMapping { + if len(ports) == 0 { + return nil + } + + newPorts := make([]types.PortMapping, 0, len(ports)) + + // first sort the ports + sort.Slice(ports, func(i, j int) bool { + return compareOCICNIPorts(ports[i], ports[j]) + }) + + // we already check if the slice is empty so we can use the first element + currentPort := types.PortMapping{ + HostIP: ports[0].HostIP, + HostPort: uint16(ports[0].HostPort), + ContainerPort: uint16(ports[0].ContainerPort), + Protocol: ports[0].Protocol, + Range: 1, + } + + for i := 1; i < len(ports); i++ { + if ports[i].HostIP == currentPort.HostIP && + ports[i].Protocol == currentPort.Protocol && + ports[i].HostPort-int32(currentPort.Range) == int32(currentPort.HostPort) && + ports[i].ContainerPort-int32(currentPort.Range) == int32(currentPort.ContainerPort) { + currentPort.Range++ + } else { + newPorts = append(newPorts, currentPort) + currentPort = types.PortMapping{ + HostIP: ports[i].HostIP, + HostPort: uint16(ports[i].HostPort), + ContainerPort: uint16(ports[i].ContainerPort), + Protocol: ports[i].Protocol, + Range: 1, + } + } + } + newPorts = append(newPorts, currentPort) + return newPorts +} + +// compareOCICNIPorts will sort the ocicni ports by +// 1) host ip +// 2) protocol +// 3) hostPort +// 4) container port +func compareOCICNIPorts(i, j types.OCICNIPortMapping) bool { + if i.HostIP != j.HostIP { + return i.HostIP < j.HostIP + } + + if i.Protocol != j.Protocol { + return i.Protocol < j.Protocol + } + + if i.HostPort != j.HostPort { + return i.HostPort < j.HostPort + } + + return i.ContainerPort < j.ContainerPort +} diff --git a/libpod/networking_freebsd.go b/libpod/networking_freebsd.go new file mode 100644 index 000000000..230efc99d --- /dev/null +++ b/libpod/networking_freebsd.go @@ -0,0 +1,268 @@ +//go:build freebsd +// +build freebsd + +package libpod + +import ( + "crypto/rand" + jdec "encoding/json" + "errors" + "fmt" + "net" + "os/exec" + "path/filepath" + + "github.com/containers/buildah/pkg/jail" + "github.com/containers/common/libnetwork/types" + "github.com/containers/storage/pkg/lockfile" + "github.com/sirupsen/logrus" +) + +type Netstat struct { + Statistics NetstatInterface `json:"statistics"` +} + +type NetstatInterface struct { + Interface []NetstatAddress `json:"interface"` +} + +type NetstatAddress struct { + Name string `json:"name"` + Flags string `json:"flags"` + Mtu int `json:"mtu"` + Network string `json:"network"` + Address string `json:"address"` + + ReceivedPackets uint64 `json:"received-packets"` + ReceivedBytes uint64 `json:"received-bytes"` + ReceivedErrors uint64 `json:"received-errors"` + + SentPackets uint64 `json:"sent-packets"` + SentBytes uint64 `json:"sent-bytes"` + SentErrors uint64 `json:"send-errors"` + + DroppedPackets uint64 `json:"dropped-packets"` + + Collisions uint64 `json:"collisions"` +} + +// copied from github.com/vishvanada/netlink which does not build on freebsd +type LinkStatistics64 struct { + RxPackets uint64 + TxPackets uint64 + RxBytes uint64 + TxBytes uint64 + RxErrors uint64 + TxErrors uint64 + RxDropped uint64 + TxDropped uint64 + Multicast uint64 + Collisions uint64 + RxLengthErrors uint64 + RxOverErrors uint64 + RxCrcErrors uint64 + RxFrameErrors uint64 + RxFifoErrors uint64 + RxMissedErrors uint64 + TxAbortedErrors uint64 + TxCarrierErrors uint64 + TxFifoErrors uint64 + TxHeartbeatErrors uint64 + TxWindowErrors uint64 + RxCompressed uint64 + TxCompressed uint64 +} + +type RootlessNetNS struct { + dir string + Lock lockfile.Locker +} + +// getPath will join the given path to the rootless netns dir +func (r *RootlessNetNS) getPath(path string) string { + return filepath.Join(r.dir, path) +} + +// Do - run the given function in the rootless netns. +// It does not lock the rootlessCNI lock, the caller +// should only lock when needed, e.g. for cni operations. +func (r *RootlessNetNS) Do(toRun func() error) error { + return errors.New("not supported on freebsd") +} + +// Cleanup the rootless network namespace if needed. +// It checks if we have running containers with the bridge network mode. +// Cleanup() expects that r.Lock is locked +func (r *RootlessNetNS) Cleanup(runtime *Runtime) error { + return errors.New("not supported on freebsd") +} + +// GetRootlessNetNs returns the rootless netns object. If create is set to true +// the rootless network namespace will be created if it does not exists already. +// If called as root it returns always nil. +// On success the returned RootlessCNI lock is locked and must be unlocked by the caller. +func (r *Runtime) GetRootlessNetNs(new bool) (*RootlessNetNS, error) { + return nil, nil +} + +func GetSlirp4netnsIP(subnet *net.IPNet) (*net.IP, error) { + return nil, errors.New("not implemented GetSlirp4netnsIP") +} + +// While there is code in container_internal.go which calls this, in +// my testing network creation always seems to go through createNetNS. +func (r *Runtime) setupNetNS(ctr *Container) error { + return errors.New("not implemented (*Runtime) setupNetNS") +} + +// Create and configure a new network namespace for a container +func (r *Runtime) configureNetNS(ctr *Container, ctrNS *jailNetNS) (status map[string]types.StatusBlock, rerr error) { + if err := r.exposeMachinePorts(ctr.config.PortMappings); err != nil { + return nil, err + } + defer func() { + // make sure to unexpose the gvproxy ports when an error happens + if rerr != nil { + if err := r.unexposeMachinePorts(ctr.config.PortMappings); err != nil { + logrus.Errorf("failed to free gvproxy machine ports: %v", err) + } + } + }() + networks, err := ctr.networks() + if err != nil { + return nil, err + } + // All networks have been removed from the container. + // This is effectively forcing net=none. + if len(networks) == 0 { + return nil, nil + } + + netOpts := ctr.getNetworkOptions(networks) + netStatus, err := r.setUpNetwork(ctrNS.Name, netOpts) + if err != nil { + return nil, err + } + + return netStatus, err +} + +// Create and configure a new network namespace for a container +func (r *Runtime) createNetNS(ctr *Container) (n *jailNetNS, q map[string]types.StatusBlock, retErr error) { + b := make([]byte, 16) + _, err := rand.Reader.Read(b) + if err != nil { + return nil, nil, fmt.Errorf("failed to generate random vnet name: %v", err) + } + ctrNS := &jailNetNS{Name: fmt.Sprintf("vnet-%x-%x-%x-%x-%x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:])} + + jconf := jail.NewConfig() + jconf.Set("name", ctrNS.Name) + jconf.Set("vnet", jail.NEW) + jconf.Set("children.max", 1) + jconf.Set("persist", true) + jconf.Set("enforce_statfs", 0) + jconf.Set("devfs_ruleset", 4) + jconf.Set("allow.raw_sockets", true) + jconf.Set("allow.chflags", true) + jconf.Set("securelevel", -1) + if _, err := jail.Create(jconf); err != nil { + logrus.Debugf("Failed to create vnet jail %s for container %s", ctrNS.Name, ctr.ID()) + } + + logrus.Debugf("Created vnet jail %s for container %s", ctrNS.Name, ctr.ID()) + + var networkStatus map[string]types.StatusBlock + networkStatus, err = r.configureNetNS(ctr, ctrNS) + return ctrNS, networkStatus, err +} + +// Tear down a network namespace, undoing all state associated with it. +func (r *Runtime) teardownNetNS(ctr *Container) error { + if err := r.unexposeMachinePorts(ctr.config.PortMappings); err != nil { + // do not return an error otherwise we would prevent network cleanup + logrus.Errorf("failed to free gvproxy machine ports: %v", err) + } + if err := r.teardownCNI(ctr); err != nil { + return err + } + + if ctr.state.NetNS != nil { + // Rather than destroying the jail immediately, reset the + // persist flag so that it will live until the container is + // done. + netjail, err := jail.FindByName(ctr.state.NetNS.Name) + if err != nil { + return fmt.Errorf("finding network jail %s: %w", ctr.state.NetNS.Name, err) + } + jconf := jail.NewConfig() + jconf.Set("persist", false) + if err := netjail.Set(jconf); err != nil { + return fmt.Errorf("releasing network jail %s: %w", ctr.state.NetNS.Name, err) + } + + ctr.state.NetNS = nil + } + + return nil +} + +func getContainerNetIO(ctr *Container) (*LinkStatistics64, error) { + if ctr.state.NetNS == nil { + // If NetNS is nil, it was set as none, and no netNS + // was set up this is a valid state and thus return no + // error, nor any statistics + return nil, nil + } + + // FIXME get the interface from the container netstatus + cmd := exec.Command("jexec", ctr.state.NetNS.Name, "netstat", "-bI", "eth0", "--libxo", "json") + out, err := cmd.Output() + if err != nil { + return nil, err + } + stats := Netstat{} + if err := jdec.Unmarshal(out, &stats); err != nil { + return nil, err + } + + // Find the link stats + for _, ifaddr := range stats.Statistics.Interface { + if ifaddr.Mtu > 0 { + return &LinkStatistics64{ + RxPackets: ifaddr.ReceivedPackets, + TxPackets: ifaddr.SentPackets, + RxBytes: ifaddr.ReceivedBytes, + TxBytes: ifaddr.SentBytes, + RxErrors: ifaddr.ReceivedErrors, + TxErrors: ifaddr.SentErrors, + RxDropped: ifaddr.DroppedPackets, + Collisions: ifaddr.Collisions, + }, nil + } + } + + return &LinkStatistics64{}, nil +} + +func (c *Container) joinedNetworkNSPath() string { + if c.state.NetNS != nil { + return c.state.NetNS.Name + } else { + return "" + } +} + +func (c *Container) inspectJoinedNetworkNS(networkns string) (q types.StatusBlock, retErr error) { + // TODO: extract interface information from the vnet jail + return types.StatusBlock{}, nil + +} + +func (c *Container) reloadRootlessRLKPortMapping() error { + return errors.New("unsupported (*Container).reloadRootlessRLKPortMapping") +} + +func (c *Container) setupRootlessNetwork() error { + return nil +} diff --git a/libpod/networking_linux.go b/libpod/networking_linux.go index a8050d130..e27ec8e9d 100644 --- a/libpod/networking_linux.go +++ b/libpod/networking_linux.go @@ -13,25 +13,17 @@ import ( "os" "os/exec" "path/filepath" - "regexp" - "sort" "strconv" "strings" "syscall" "time" "github.com/containernetworking/plugins/pkg/ns" - "github.com/containers/common/libnetwork/etchosts" "github.com/containers/common/libnetwork/resolvconf" "github.com/containers/common/libnetwork/types" - "github.com/containers/common/pkg/config" - "github.com/containers/common/pkg/machine" "github.com/containers/common/pkg/netns" "github.com/containers/common/pkg/util" - "github.com/containers/podman/v4/libpod/define" - "github.com/containers/podman/v4/libpod/events" "github.com/containers/podman/v4/pkg/errorhandling" - "github.com/containers/podman/v4/pkg/namespaces" "github.com/containers/podman/v4/pkg/rootless" "github.com/containers/podman/v4/utils" "github.com/containers/storage/pkg/lockfile" @@ -59,39 +51,6 @@ const ( persistentCNIDir = "/var/lib/cni" ) -// convertPortMappings will remove the HostIP part from the ports when running inside podman machine. -// This is need because a HostIP of 127.0.0.1 would now allow the gvproxy forwarder to reach to open ports. -// For machine the HostIP must only be used by gvproxy and never in the VM. -func (c *Container) convertPortMappings() []types.PortMapping { - if !machine.IsGvProxyBased() || len(c.config.PortMappings) == 0 { - return c.config.PortMappings - } - // if we run in a machine VM we have to ignore the host IP part - newPorts := make([]types.PortMapping, 0, len(c.config.PortMappings)) - for _, port := range c.config.PortMappings { - port.HostIP = "" - newPorts = append(newPorts, port) - } - return newPorts -} - -func (c *Container) getNetworkOptions(networkOpts map[string]types.PerNetworkOptions) types.NetworkOptions { - opts := types.NetworkOptions{ - ContainerID: c.config.ID, - ContainerName: getCNIPodName(c), - } - opts.PortMappings = c.convertPortMappings() - - // If the container requested special network options use this instead of the config. - // This is the case for container restore or network reload. - if c.perNetworkOpts != nil { - opts.Networks = c.perNetworkOpts - } else { - opts.Networks = networkOpts - } - return opts -} - type RootlessNetNS struct { ns ns.NetNS dir string @@ -589,41 +548,6 @@ func (r *Runtime) GetRootlessNetNs(new bool) (*RootlessNetNS, error) { return rootlessNetNS, nil } -// setUpNetwork will set up the the networks, on error it will also tear down the cni -// networks. If rootless it will join/create the rootless network namespace. -func (r *Runtime) setUpNetwork(ns string, opts types.NetworkOptions) (map[string]types.StatusBlock, error) { - rootlessNetNS, err := r.GetRootlessNetNs(true) - if err != nil { - return nil, err - } - var results map[string]types.StatusBlock - setUpPod := func() error { - results, err = r.network.Setup(ns, types.SetupOptions{NetworkOptions: opts}) - return err - } - // rootlessNetNS is nil if we are root - if rootlessNetNS != nil { - // execute the setup in the rootless net ns - err = rootlessNetNS.Do(setUpPod) - rootlessNetNS.Lock.Unlock() - } else { - err = setUpPod() - } - return results, err -} - -// getCNIPodName return the pod name (hostname) used by CNI and the dnsname plugin. -// If we are in the pod network namespace use the pod name otherwise the container name -func getCNIPodName(c *Container) string { - if c.config.NetMode.IsPod() || c.IsInfra() { - pod, err := c.runtime.state.Pod(c.PodID()) - if err == nil { - return pod.Name() - } - } - return c.Name() -} - // Create and configure a new network namespace for a container func (r *Runtime) configureNetNS(ctr *Container, ctrNS ns.NetNS) (status map[string]types.StatusBlock, rerr error) { if err := r.exposeMachinePorts(ctr.config.PortMappings); err != nil { @@ -766,56 +690,6 @@ func (r *Runtime) closeNetNS(ctr *Container) error { return nil } -// Tear down a container's network configuration and joins the -// rootless net ns as rootless user -func (r *Runtime) teardownNetwork(ns string, opts types.NetworkOptions) error { - rootlessNetNS, err := r.GetRootlessNetNs(false) - if err != nil { - return err - } - tearDownPod := func() error { - if err := r.network.Teardown(ns, types.TeardownOptions{NetworkOptions: opts}); err != nil { - return fmt.Errorf("tearing down network namespace configuration for container %s: %w", opts.ContainerID, err) - } - return nil - } - - // rootlessNetNS is nil if we are root - if rootlessNetNS != nil { - // execute the cni setup in the rootless net ns - err = rootlessNetNS.Do(tearDownPod) - if cerr := rootlessNetNS.Cleanup(r); cerr != nil { - logrus.WithError(err).Error("failed to clean up rootless netns") - } - rootlessNetNS.Lock.Unlock() - } else { - err = tearDownPod() - } - return err -} - -// Tear down a container's CNI network configuration, but do not tear down the -// namespace itself. -func (r *Runtime) teardownCNI(ctr *Container) error { - if ctr.state.NetNS == nil { - // The container has no network namespace, we're set - return nil - } - - logrus.Debugf("Tearing down network namespace at %s for container %s", ctr.state.NetNS.Path(), ctr.ID()) - - networks, err := ctr.networks() - if err != nil { - return err - } - - if !ctr.config.NetMode.IsSlirp4netns() && len(networks) > 0 { - netOpts := ctr.getNetworkOptions(networks) - return r.teardownNetwork(ctr.state.NetNS.Path(), netOpts) - } - return nil -} - // Tear down a network namespace, undoing all state associated with it. func (r *Runtime) teardownNetNS(ctr *Container) error { if err := r.unexposeMachinePorts(ctr.config.PortMappings); err != nil { @@ -862,72 +736,6 @@ func getContainerNetNS(ctr *Container) (string, *Container, error) { return "", nil, nil } -// isBridgeNetMode checks if the given network mode is bridge. -// It returns nil when it is set to bridge and an error otherwise. -func isBridgeNetMode(n namespaces.NetworkMode) error { - if !n.IsBridge() { - return fmt.Errorf("%q is not supported: %w", n, define.ErrNetworkModeInvalid) - } - return nil -} - -// Reload only works with containers with a configured network. -// It will tear down, and then reconfigure, the network of the container. -// This is mainly used when a reload of firewall rules wipes out existing -// firewall configuration. -// Efforts will be made to preserve MAC and IP addresses, but this only works if -// the container only joined a single CNI network, and was only assigned a -// single MAC or IP. -// Only works on root containers at present, though in the future we could -// extend this to stop + restart slirp4netns -func (r *Runtime) reloadContainerNetwork(ctr *Container) (map[string]types.StatusBlock, error) { - if ctr.state.NetNS == nil { - return nil, fmt.Errorf("container %s network is not configured, refusing to reload: %w", ctr.ID(), define.ErrCtrStateInvalid) - } - if err := isBridgeNetMode(ctr.config.NetMode); err != nil { - return nil, err - } - logrus.Infof("Going to reload container %s network", ctr.ID()) - - err := r.teardownCNI(ctr) - if err != nil { - // teardownCNI will error if the iptables rules do not exists and this is the case after - // a firewall reload. The purpose of network reload is to recreate the rules if they do - // not exists so we should not log this specific error as error. This would confuse users otherwise. - // iptables-legacy and iptables-nft will create different errors make sure to match both. - b, rerr := regexp.MatchString("Couldn't load target `CNI-[a-f0-9]{24}':No such file or directory|Chain 'CNI-[a-f0-9]{24}' does not exist", err.Error()) - if rerr == nil && !b { - logrus.Error(err) - } else { - logrus.Info(err) - } - } - - networkOpts, err := ctr.networks() - if err != nil { - return nil, err - } - - // Set the same network settings as before.. - netStatus := ctr.getNetworkStatus() - for network, perNetOpts := range networkOpts { - for name, netInt := range netStatus[network].Interfaces { - perNetOpts.InterfaceName = name - perNetOpts.StaticMAC = netInt.MacAddress - for _, netAddress := range netInt.Subnets { - perNetOpts.StaticIPs = append(perNetOpts.StaticIPs, netAddress.IPNet.IP) - } - // Normally interfaces have a length of 1, only for some special cni configs we could get more. - // For now just use the first interface to get the ips this should be good enough for most cases. - break - } - networkOpts[network] = perNetOpts - } - ctr.perNetworkOpts = networkOpts - - return r.configureNetNS(ctr, ctr.state.NetNS) -} - // TODO (5.0): return the statistics per network interface // This would allow better compat with docker. func getContainerNetIO(ctr *Container) (*netlink.LinkStatistics, error) { @@ -981,110 +789,6 @@ func getContainerNetIO(ctr *Container) (*netlink.LinkStatistics, error) { return netStats, err } -// Produce an InspectNetworkSettings containing information on the container -// network. -func (c *Container) getContainerNetworkInfo() (*define.InspectNetworkSettings, error) { - if c.config.NetNsCtr != "" { - netNsCtr, err := c.runtime.GetContainer(c.config.NetNsCtr) - if err != nil { - return nil, err - } - // see https://github.com/containers/podman/issues/10090 - // the container has to be locked for syncContainer() - netNsCtr.lock.Lock() - defer netNsCtr.lock.Unlock() - // Have to sync to ensure that state is populated - if err := netNsCtr.syncContainer(); err != nil { - return nil, err - } - logrus.Debugf("Container %s shares network namespace, retrieving network info of container %s", c.ID(), c.config.NetNsCtr) - - return netNsCtr.getContainerNetworkInfo() - } - - settings := new(define.InspectNetworkSettings) - settings.Ports = makeInspectPortBindings(c.config.PortMappings, c.config.ExposedPorts) - - networks, err := c.networks() - if err != nil { - return nil, err - } - - if c.state.NetNS == nil { - if networkNSPath := c.joinedNetworkNSPath(); networkNSPath != "" { - if result, err := c.inspectJoinedNetworkNS(networkNSPath); err == nil { - // fallback to dummy configuration - settings.InspectBasicNetworkConfig = resultToBasicNetworkConfig(result) - return settings, nil - } - // do not propagate error inspecting a joined network ns - logrus.Errorf("Inspecting network namespace: %s of container %s: %v", networkNSPath, c.ID(), err) - } - // We can't do more if the network is down. - - // We still want to make dummy configurations for each CNI net - // the container joined. - if len(networks) > 0 { - settings.Networks = make(map[string]*define.InspectAdditionalNetwork, len(networks)) - for net, opts := range networks { - cniNet := new(define.InspectAdditionalNetwork) - cniNet.NetworkID = net - cniNet.Aliases = opts.Aliases - settings.Networks[net] = cniNet - } - } - - return settings, nil - } - - // Set network namespace path - settings.SandboxKey = c.state.NetNS.Path() - - netStatus := c.getNetworkStatus() - // If this is empty, we're probably slirp4netns - if len(netStatus) == 0 { - return settings, nil - } - - // If we have networks - handle that here - if len(networks) > 0 { - if len(networks) != len(netStatus) { - return nil, fmt.Errorf("network inspection mismatch: asked to join %d network(s) %v, but have information on %d network(s): %w", len(networks), networks, len(netStatus), define.ErrInternal) - } - - settings.Networks = make(map[string]*define.InspectAdditionalNetwork) - - for name, opts := range networks { - result := netStatus[name] - addedNet := new(define.InspectAdditionalNetwork) - addedNet.NetworkID = name - addedNet.Aliases = opts.Aliases - addedNet.InspectBasicNetworkConfig = resultToBasicNetworkConfig(result) - - settings.Networks[name] = addedNet - } - - // if not only the default network is connected we can return here - // otherwise we have to populate the InspectBasicNetworkConfig settings - _, isDefaultNet := networks[c.runtime.config.Network.DefaultNetwork] - if !(len(networks) == 1 && isDefaultNet) { - return settings, nil - } - } - - // If not joining networks, we should have at most 1 result - if len(netStatus) > 1 { - return nil, fmt.Errorf("should have at most 1 network status result if not joining networks, instead got %d: %w", len(netStatus), define.ErrInternal) - } - - if len(netStatus) == 1 { - for _, status := range netStatus { - settings.InspectBasicNetworkConfig = resultToBasicNetworkConfig(status) - } - } - return settings, nil -} - func (c *Container) joinedNetworkNSPath() string { for _, namespace := range c.config.Spec.Linux.Namespaces { if namespace.Type == specs.NetworkNamespace { @@ -1151,49 +855,6 @@ func (c *Container) inspectJoinedNetworkNS(networkns string) (q types.StatusBloc return result, err } -// resultToBasicNetworkConfig produces an InspectBasicNetworkConfig from a CNI -// result -func resultToBasicNetworkConfig(result types.StatusBlock) define.InspectBasicNetworkConfig { - config := define.InspectBasicNetworkConfig{} - interfaceNames := make([]string, 0, len(result.Interfaces)) - for interfaceName := range result.Interfaces { - interfaceNames = append(interfaceNames, interfaceName) - } - // ensure consistent inspect results by sorting - sort.Strings(interfaceNames) - for _, interfaceName := range interfaceNames { - netInt := result.Interfaces[interfaceName] - for _, netAddress := range netInt.Subnets { - size, _ := netAddress.IPNet.Mask.Size() - if netAddress.IPNet.IP.To4() != nil { - // ipv4 - if config.IPAddress == "" { - config.IPAddress = netAddress.IPNet.IP.String() - config.IPPrefixLen = size - config.Gateway = netAddress.Gateway.String() - } else { - config.SecondaryIPAddresses = append(config.SecondaryIPAddresses, define.Address{Addr: netAddress.IPNet.IP.String(), PrefixLength: size}) - } - } else { - // ipv6 - if config.GlobalIPv6Address == "" { - config.GlobalIPv6Address = netAddress.IPNet.IP.String() - config.GlobalIPv6PrefixLen = size - config.IPv6Gateway = netAddress.Gateway.String() - } else { - config.SecondaryIPv6Addresses = append(config.SecondaryIPv6Addresses, define.Address{Addr: netAddress.IPNet.IP.String(), PrefixLength: size}) - } - } - } - if config.MacAddress == "" { - config.MacAddress = netInt.MacAddress.String() - } else { - config.AdditionalMacAddresses = append(config.AdditionalMacAddresses, netInt.MacAddress.String()) - } - } - return config -} - type logrusDebugWriter struct { prefix string } @@ -1202,368 +863,3 @@ func (w *logrusDebugWriter) Write(p []byte) (int, error) { logrus.Debugf("%s%s", w.prefix, string(p)) return len(p), nil } - -// NetworkDisconnect removes a container from the network -func (c *Container) NetworkDisconnect(nameOrID, netName string, force bool) error { - // only the bridge mode supports cni networks - if err := isBridgeNetMode(c.config.NetMode); err != nil { - return err - } - - c.lock.Lock() - defer c.lock.Unlock() - - networks, err := c.networks() - if err != nil { - return err - } - - // check if network exists and if the input is a ID we get the name - // CNI only uses names so it is important that we only use the name - netName, err = c.runtime.normalizeNetworkName(netName) - if err != nil { - return err - } - - _, nameExists := networks[netName] - if !nameExists && len(networks) > 0 { - return fmt.Errorf("container %s is not connected to network %s", nameOrID, netName) - } - - if err := c.syncContainer(); err != nil { - return err - } - // get network status before we disconnect - networkStatus := c.getNetworkStatus() - - if err := c.runtime.state.NetworkDisconnect(c, netName); err != nil { - return err - } - - c.newNetworkEvent(events.NetworkDisconnect, netName) - if !c.ensureState(define.ContainerStateRunning, define.ContainerStateCreated) { - return nil - } - - if c.state.NetNS == nil { - return fmt.Errorf("unable to disconnect %s from %s: %w", nameOrID, netName, define.ErrNoNetwork) - } - - opts := types.NetworkOptions{ - ContainerID: c.config.ID, - ContainerName: getCNIPodName(c), - } - opts.PortMappings = c.convertPortMappings() - opts.Networks = map[string]types.PerNetworkOptions{ - netName: networks[netName], - } - - if err := c.runtime.teardownNetwork(c.state.NetNS.Path(), opts); err != nil { - return err - } - - // update network status if container is running - oldStatus, statusExist := networkStatus[netName] - delete(networkStatus, netName) - c.state.NetworkStatus = networkStatus - err = c.save() - if err != nil { - return err - } - - // Reload ports when there are still connected networks, maybe we removed the network interface with the child ip. - // Reloading without connected networks does not make sense, so we can skip this step. - if rootless.IsRootless() && len(networkStatus) > 0 { - if err := c.reloadRootlessRLKPortMapping(); err != nil { - return err - } - } - - // Update resolv.conf if required - if statusExist { - stringIPs := make([]string, 0, len(oldStatus.DNSServerIPs)) - for _, ip := range oldStatus.DNSServerIPs { - stringIPs = append(stringIPs, ip.String()) - } - if len(stringIPs) > 0 { - logrus.Debugf("Removing DNS Servers %v from resolv.conf", stringIPs) - if err := c.removeNameserver(stringIPs); err != nil { - return err - } - } - - // update /etc/hosts file - if file, ok := c.state.BindMounts[config.DefaultHostsFile]; ok { - // sync the names with c.getHostsEntries() - names := []string{c.Hostname(), c.config.Name} - rm := etchosts.GetNetworkHostEntries(map[string]types.StatusBlock{netName: oldStatus}, names...) - if len(rm) > 0 { - // make sure to lock this file to prevent concurrent writes when - // this is used a net dependency container - lock, err := lockfile.GetLockfile(file) - if err != nil { - return fmt.Errorf("failed to lock hosts file: %w", err) - } - logrus.Debugf("Remove /etc/hosts entries %v", rm) - lock.Lock() - err = etchosts.Remove(file, rm) - lock.Unlock() - if err != nil { - return err - } - } - } - } - return nil -} - -// ConnectNetwork connects a container to a given network -func (c *Container) NetworkConnect(nameOrID, netName string, netOpts types.PerNetworkOptions) error { - // only the bridge mode supports cni networks - if err := isBridgeNetMode(c.config.NetMode); err != nil { - return err - } - - c.lock.Lock() - defer c.lock.Unlock() - - networks, err := c.networks() - if err != nil { - return err - } - - // check if network exists and if the input is a ID we get the name - // CNI only uses names so it is important that we only use the name - netName, err = c.runtime.normalizeNetworkName(netName) - if err != nil { - return err - } - - if err := c.syncContainer(); err != nil { - return err - } - - // get network status before we connect - networkStatus := c.getNetworkStatus() - - // always add the short id as alias for docker compat - netOpts.Aliases = append(netOpts.Aliases, c.config.ID[:12]) - - if netOpts.InterfaceName == "" { - netOpts.InterfaceName = getFreeInterfaceName(networks) - if netOpts.InterfaceName == "" { - return errors.New("could not find free network interface name") - } - } - - if err := c.runtime.state.NetworkConnect(c, netName, netOpts); err != nil { - // Docker compat: treat requests to attach already attached networks as a no-op, ignoring opts - if errors.Is(err, define.ErrNetworkConnected) && c.ensureState(define.ContainerStateConfigured) { - return nil - } - - return err - } - c.newNetworkEvent(events.NetworkConnect, netName) - if !c.ensureState(define.ContainerStateRunning, define.ContainerStateCreated) { - return nil - } - if c.state.NetNS == nil { - return fmt.Errorf("unable to connect %s to %s: %w", nameOrID, netName, define.ErrNoNetwork) - } - - opts := types.NetworkOptions{ - ContainerID: c.config.ID, - ContainerName: getCNIPodName(c), - } - opts.PortMappings = c.convertPortMappings() - opts.Networks = map[string]types.PerNetworkOptions{ - netName: netOpts, - } - - results, err := c.runtime.setUpNetwork(c.state.NetNS.Path(), opts) - if err != nil { - return err - } - if len(results) != 1 { - return errors.New("when adding aliases, results must be of length 1") - } - - // we need to get the old host entries before we add the new one to the status - // if we do not add do it here we will get the wrong existing entries which will throw of the logic - // we could also copy the map but this does not seem worth it - // sync the hostNames with c.getHostsEntries() - hostNames := []string{c.Hostname(), c.config.Name} - oldHostEntries := etchosts.GetNetworkHostEntries(networkStatus, hostNames...) - - // update network status - if networkStatus == nil { - networkStatus = make(map[string]types.StatusBlock, 1) - } - networkStatus[netName] = results[netName] - c.state.NetworkStatus = networkStatus - - err = c.save() - if err != nil { - return err - } - - // The first network needs a port reload to set the correct child ip for the rootlessport process. - // Adding a second network does not require a port reload because the child ip is still valid. - if rootless.IsRootless() && len(networks) == 0 { - if err := c.reloadRootlessRLKPortMapping(); err != nil { - return err - } - } - - ipv6, err := c.checkForIPv6(networkStatus) - if err != nil { - return err - } - - // Update resolv.conf if required - stringIPs := make([]string, 0, len(results[netName].DNSServerIPs)) - for _, ip := range results[netName].DNSServerIPs { - if (ip.To4() == nil) && !ipv6 { - continue - } - stringIPs = append(stringIPs, ip.String()) - } - if len(stringIPs) > 0 { - logrus.Debugf("Adding DNS Servers %v to resolv.conf", stringIPs) - if err := c.addNameserver(stringIPs); err != nil { - return err - } - } - - // update /etc/hosts file - if file, ok := c.state.BindMounts[config.DefaultHostsFile]; ok { - // make sure to lock this file to prevent concurrent writes when - // this is used a net dependency container - lock, err := lockfile.GetLockfile(file) - if err != nil { - return fmt.Errorf("failed to lock hosts file: %w", err) - } - new := etchosts.GetNetworkHostEntries(results, hostNames...) - logrus.Debugf("Add /etc/hosts entries %v", new) - // use special AddIfExists API to make sure we only add new entries if an old one exists - // see the AddIfExists() comment for more information - lock.Lock() - err = etchosts.AddIfExists(file, oldHostEntries, new) - lock.Unlock() - if err != nil { - return err - } - } - - return nil -} - -// get a free interface name for a new network -// return an empty string if no free name was found -func getFreeInterfaceName(networks map[string]types.PerNetworkOptions) string { - ifNames := make([]string, 0, len(networks)) - for _, opts := range networks { - ifNames = append(ifNames, opts.InterfaceName) - } - for i := 0; i < 100000; i++ { - ifName := fmt.Sprintf("eth%d", i) - if !util.StringInSlice(ifName, ifNames) { - return ifName - } - } - return "" -} - -// DisconnectContainerFromNetwork removes a container from its CNI network -func (r *Runtime) DisconnectContainerFromNetwork(nameOrID, netName string, force bool) error { - ctr, err := r.LookupContainer(nameOrID) - if err != nil { - return err - } - return ctr.NetworkDisconnect(nameOrID, netName, force) -} - -// ConnectContainerToNetwork connects a container to a CNI network -func (r *Runtime) ConnectContainerToNetwork(nameOrID, netName string, netOpts types.PerNetworkOptions) error { - ctr, err := r.LookupContainer(nameOrID) - if err != nil { - return err - } - return ctr.NetworkConnect(nameOrID, netName, netOpts) -} - -// normalizeNetworkName takes a network name, a partial or a full network ID and returns the network name. -// If the network is not found a errors is returned. -func (r *Runtime) normalizeNetworkName(nameOrID string) (string, error) { - net, err := r.network.NetworkInspect(nameOrID) - if err != nil { - return "", err - } - return net.Name, nil -} - -// ocicniPortsToNetTypesPorts convert the old port format to the new one -// while deduplicating ports into ranges -func ocicniPortsToNetTypesPorts(ports []types.OCICNIPortMapping) []types.PortMapping { - if len(ports) == 0 { - return nil - } - - newPorts := make([]types.PortMapping, 0, len(ports)) - - // first sort the ports - sort.Slice(ports, func(i, j int) bool { - return compareOCICNIPorts(ports[i], ports[j]) - }) - - // we already check if the slice is empty so we can use the first element - currentPort := types.PortMapping{ - HostIP: ports[0].HostIP, - HostPort: uint16(ports[0].HostPort), - ContainerPort: uint16(ports[0].ContainerPort), - Protocol: ports[0].Protocol, - Range: 1, - } - - for i := 1; i < len(ports); i++ { - if ports[i].HostIP == currentPort.HostIP && - ports[i].Protocol == currentPort.Protocol && - ports[i].HostPort-int32(currentPort.Range) == int32(currentPort.HostPort) && - ports[i].ContainerPort-int32(currentPort.Range) == int32(currentPort.ContainerPort) { - currentPort.Range++ - } else { - newPorts = append(newPorts, currentPort) - currentPort = types.PortMapping{ - HostIP: ports[i].HostIP, - HostPort: uint16(ports[i].HostPort), - ContainerPort: uint16(ports[i].ContainerPort), - Protocol: ports[i].Protocol, - Range: 1, - } - } - } - newPorts = append(newPorts, currentPort) - return newPorts -} - -// compareOCICNIPorts will sort the ocicni ports by -// 1) host ip -// 2) protocol -// 3) hostPort -// 4) container port -func compareOCICNIPorts(i, j types.OCICNIPortMapping) bool { - if i.HostIP != j.HostIP { - return i.HostIP < j.HostIP - } - - if i.Protocol != j.Protocol { - return i.Protocol < j.Protocol - } - - if i.HostPort != j.HostPort { - return i.HostPort < j.HostPort - } - - return i.ContainerPort < j.ContainerPort -} diff --git a/libpod/networking_unsupported.go b/libpod/networking_unsupported.go index 9429287f9..e5a6d1456 100644 --- a/libpod/networking_unsupported.go +++ b/libpod/networking_unsupported.go @@ -1,5 +1,5 @@ -//go:build !linux -// +build !linux +//go:build !linux && !freebsd +// +build !linux,!freebsd package libpod |