diff options
Diffstat (limited to 'libpod')
32 files changed, 1251 insertions, 420 deletions
diff --git a/libpod/boltdb_state_internal.go b/libpod/boltdb_state_internal.go index 3e3c17a9e..e71d82736 100644 --- a/libpod/boltdb_state_internal.go +++ b/libpod/boltdb_state_internal.go @@ -384,6 +384,15 @@ func (s *BoltState) getContainerConfigFromDB(id []byte, config *ContainerConfig, return errors.Wrapf(err, "error unmarshalling container %s config", string(id)) } + // convert ports to the new format if needed + if len(config.ContainerNetworkConfig.OldPortMappings) > 0 && len(config.ContainerNetworkConfig.PortMappings) == 0 { + config.ContainerNetworkConfig.PortMappings = ocicniPortsToNetTypesPorts(config.ContainerNetworkConfig.OldPortMappings) + // keep the OldPortMappings in case an user has to downgrade podman + + // indicate the the config was modified and should be written back to the db when possible + config.rewrite = true + } + return nil } diff --git a/libpod/common_test.go b/libpod/common_test.go index 4662a33bd..67e29c265 100644 --- a/libpod/common_test.go +++ b/libpod/common_test.go @@ -41,18 +41,20 @@ func getTestContainer(id, name string, manager lock.Manager) (*Container, error) ContainerNetworkConfig: ContainerNetworkConfig{ DNSServer: []net.IP{net.ParseIP("192.168.1.1"), net.ParseIP("192.168.2.2")}, DNSSearch: []string{"example.com", "example.example.com"}, - PortMappings: []types.OCICNIPortMapping{ + PortMappings: []types.PortMapping{ { HostPort: 80, ContainerPort: 90, Protocol: "tcp", HostIP: "192.168.3.3", + Range: 1, }, { HostPort: 100, ContainerPort: 110, Protocol: "udp", HostIP: "192.168.4.4", + Range: 1, }, }, }, diff --git a/libpod/container.go b/libpod/container.go index 4d15c04c5..86989a02f 100644 --- a/libpod/container.go +++ b/libpod/container.go @@ -465,7 +465,7 @@ func (c *Container) NewNetNS() bool { // PortMappings returns the ports that will be mapped into a container if // a new network namespace is created // If NewNetNS() is false, this value is unused -func (c *Container) PortMappings() ([]types.OCICNIPortMapping, error) { +func (c *Container) PortMappings() ([]types.PortMapping, error) { // First check if the container belongs to a network namespace (like a pod) if len(c.config.NetNsCtr) > 0 { netNsCtr, err := c.runtime.GetContainer(c.config.NetNsCtr) diff --git a/libpod/container_config.go b/libpod/container_config.go index 54d102a71..412be835f 100644 --- a/libpod/container_config.go +++ b/libpod/container_config.go @@ -78,6 +78,11 @@ type ContainerConfig struct { // These containers must be started before this container is started. Dependencies []string + // rewrite is an internal bool to indicate that the config was modified after + // a read from the db, e.g. to migrate config fields after an upgrade. + // This field should never be written to the db, the json tag ensures this. + rewrite bool `json:"-"` + // embedded sub-configs ContainerRootFSConfig ContainerSecurityConfig @@ -153,6 +158,8 @@ type ContainerRootFSConfig struct { Secrets []*ContainerSecret `json:"secrets,omitempty"` // SecretPath is the secrets location in storage SecretsPath string `json:"secretsPath"` + // StorageOpts to be used when creating rootfs + StorageOpts map[string]string `json:"storageOpts"` // Volatile specifies whether the container storage can be optimized // at the cost of not syncing all the dirty files in memory. Volatile bool `json:"volatile,omitempty"` @@ -226,11 +233,16 @@ type ContainerNetworkConfig struct { // StaticMAC is a static MAC to request for the container. // This cannot be set unless CreateNetNS is set. // If not set, the container will be dynamically assigned a MAC by CNI. - StaticMAC net.HardwareAddr `json:"staticMAC"` + StaticMAC types.HardwareAddr `json:"staticMAC"` // PortMappings are the ports forwarded to the container's network // namespace // These are not used unless CreateNetNS is true - PortMappings []types.OCICNIPortMapping `json:"portMappings,omitempty"` + PortMappings []types.PortMapping `json:"newPortMappings,omitempty"` + // OldPortMappings are the ports forwarded to the container's network + // namespace. As of podman 4.0 this field is deprecated, use PortMappings + // instead. The db will convert the old ports to the new structure for you. + // These are not used unless CreateNetNS is true + OldPortMappings []types.OCICNIPortMapping `json:"portMappings,omitempty"` // ExposedPorts are the ports which are exposed but not forwarded // into the container. // The map key is the port and the string slice contains the protocols, diff --git a/libpod/container_copy_linux.go b/libpod/container_copy_linux.go index 7d4dd0d46..954d54a1d 100644 --- a/libpod/container_copy_linux.go +++ b/libpod/container_copy_linux.go @@ -15,8 +15,8 @@ import ( "github.com/containers/buildah/util" "github.com/containers/podman/v3/libpod/define" "github.com/containers/podman/v3/pkg/rootless" + "github.com/containers/storage/pkg/archive" "github.com/containers/storage/pkg/idtools" - "github.com/docker/docker/pkg/archive" "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" "github.com/sirupsen/logrus" diff --git a/libpod/container_internal.go b/libpod/container_internal.go index 3f9738411..fbc2c1f38 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -17,12 +17,14 @@ import ( "github.com/containers/buildah/copier" "github.com/containers/buildah/pkg/overlay" butil "github.com/containers/buildah/util" + "github.com/containers/common/pkg/chown" "github.com/containers/podman/v3/libpod/define" "github.com/containers/podman/v3/libpod/events" "github.com/containers/podman/v3/pkg/cgroups" "github.com/containers/podman/v3/pkg/ctime" "github.com/containers/podman/v3/pkg/hooks" "github.com/containers/podman/v3/pkg/hooks/exec" + "github.com/containers/podman/v3/pkg/lookup" "github.com/containers/podman/v3/pkg/rootless" "github.com/containers/podman/v3/pkg/selinux" "github.com/containers/podman/v3/pkg/util" @@ -443,10 +445,24 @@ func (c *Container) setupStorage(ctx context.Context) error { }, LabelOpts: c.config.LabelOpts, } - if c.restoreFromCheckpoint && !c.config.Privileged { - // If restoring from a checkpoint, the root file-system - // needs to be mounted with the same SELinux labels as - // it was mounted previously. + + nopts := len(c.config.StorageOpts) + if nopts > 0 { + options.StorageOpt = make(map[string]string, nopts) + for _, opt := range c.config.StorageOpts { + split2 := strings.SplitN(opt, "=", 2) + if len(split2) > 2 { + return errors.Wrapf(define.ErrInvalidArg, "invalid storage options %q for %s", opt, c.ID()) + } + options.StorageOpt[split2[0]] = split2[1] + } + } + if c.restoreFromCheckpoint && c.config.ProcessLabel != "" && c.config.MountLabel != "" { + // If restoring from a checkpoint, the root file-system needs + // to be mounted with the same SELinux labels as it was mounted + // previously. But only if both labels have been set. For + // privileged containers or '--ipc host' only ProcessLabel will + // be set and so we will skip it for cases like that. if options.Flags == nil { options.Flags = make(map[string]interface{}) } @@ -480,13 +496,35 @@ func (c *Container) setupStorage(ctx context.Context) error { c.setupStorageMapping(&options.IDMappingOptions, &c.config.IDMappings) - containerInfo, err := c.runtime.storageService.CreateContainerStorage(ctx, c.runtime.imageContext, c.config.RootfsImageName, c.config.RootfsImageID, c.config.Name, c.config.ID, options) - if err != nil { - return errors.Wrapf(err, "error creating container storage") + // Unless the user has specified a name, use a randomly generated one. + // Note that name conflicts may occur (see #11735), so we need to loop. + generateName := c.config.Name == "" + var containerInfo ContainerInfo + var containerInfoErr error + for { + if generateName { + name, err := c.runtime.generateName() + if err != nil { + return err + } + c.config.Name = name + } + containerInfo, containerInfoErr = c.runtime.storageService.CreateContainerStorage(ctx, c.runtime.imageContext, c.config.RootfsImageName, c.config.RootfsImageID, c.config.Name, c.config.ID, options) + + if !generateName || errors.Cause(containerInfoErr) != storage.ErrDuplicateName { + break + } + } + if containerInfoErr != nil { + return errors.Wrapf(containerInfoErr, "error creating container storage") } - c.config.IDMappings.UIDMap = containerInfo.UIDMap - c.config.IDMappings.GIDMap = containerInfo.GIDMap + // only reconfig IDMappings if layer was mounted from storage + // if its a external overlay do not reset IDmappings + if !c.config.RootfsOverlay { + c.config.IDMappings.UIDMap = containerInfo.UIDMap + c.config.IDMappings.GIDMap = containerInfo.GIDMap + } processLabel, err := c.processLabel(containerInfo.ProcessLabel) if err != nil { @@ -647,6 +685,19 @@ func (c *Container) refresh() error { c.state.NetworkStatus = nil c.state.NetworkStatusOld = nil + // Rewrite the config if necessary. + // Podman 4.0 uses a new port format in the config. + // getContainerConfigFromDB() already converted the old ports to the new one + // but it did not write the config to the db back for performance reasons. + // If a rewrite must happen the config.rewrite field is set to true. + if c.config.rewrite { + // SafeRewriteContainerConfig must be used with care. Make sure to not change config fields by accident. + if err := c.runtime.state.SafeRewriteContainerConfig(c, "", "", c.config); err != nil { + return errors.Wrapf(err, "failed to rewrite the config for container %s", c.config.ID) + } + c.config.rewrite = false + } + if err := c.save(); err != nil { return errors.Wrapf(err, "error refreshing state for container %s", c.ID()) } @@ -1493,8 +1544,8 @@ func (c *Container) mountStorage() (_ string, deferredErr error) { mountPoint := c.config.Rootfs // Check if overlay has to be created on top of Rootfs if c.config.RootfsOverlay { - overlayDest := c.runtime.store.GraphRoot() - contentDir, err := overlay.GenerateStructure(c.runtime.store.GraphRoot(), c.ID(), "rootfs", c.RootUID(), c.RootGID()) + overlayDest := c.runtime.RunRoot() + contentDir, err := overlay.GenerateStructure(overlayDest, c.ID(), "rootfs", c.RootUID(), c.RootGID()) if err != nil { return "", errors.Wrapf(err, "rootfs-overlay: failed to create TempDir in the %s directory", overlayDest) } @@ -1515,6 +1566,19 @@ func (c *Container) mountStorage() (_ string, deferredErr error) { } mountPoint = overlayMount.Source + execUser, err := lookup.GetUserGroupInfo(mountPoint, c.config.User, nil) + if err != nil { + return "", err + } + hostUID, hostGID, err := butil.GetHostIDs(util.IDtoolsToRuntimeSpec(c.config.IDMappings.UIDMap), util.IDtoolsToRuntimeSpec(c.config.IDMappings.GIDMap), uint32(execUser.Uid), uint32(execUser.Gid)) + if err != nil { + return "", errors.Wrap(err, "unable to get host UID and host GID") + } + + //note: this should not be recursive, if using external rootfs users should be responsible on configuring ownership. + if err := chown.ChangeHostPathOwnership(mountPoint, false, int(hostUID), int(hostGID)); err != nil { + return "", err + } } if mountPoint == "" { @@ -1690,13 +1754,27 @@ func (c *Container) cleanupStorage() error { var cleanupErr error + markUnmounted := func() { + c.state.Mountpoint = "" + c.state.Mounted = false + + if c.valid { + if err := c.save(); err != nil { + if cleanupErr != nil { + logrus.Errorf("Unmounting container %s: %v", c.ID(), cleanupErr) + } + cleanupErr = err + } + } + } + // umount rootfs overlay if it was created if c.config.RootfsOverlay { - overlayBasePath := c.runtime.store.GraphRoot() - overlayBasePath = filepath.Join(overlayBasePath, "rootfs") + overlayBasePath := filepath.Dir(c.state.Mountpoint) if err := overlay.Unmount(overlayBasePath); err != nil { - // If the container can't remove content report the error - logrus.Errorf("Failed to cleanup overlay mounts for %s: %v", c.ID(), err) + if cleanupErr != nil { + logrus.Errorf("Failed to cleanup overlay mounts for %s: %v", c.ID(), err) + } cleanupErr = err } } @@ -1717,6 +1795,7 @@ func (c *Container) cleanupStorage() error { } if c.config.Rootfs != "" { + markUnmounted() return cleanupErr } @@ -1761,17 +1840,7 @@ func (c *Container) cleanupStorage() error { } } - c.state.Mountpoint = "" - c.state.Mounted = false - - if c.valid { - if err := c.save(); err != nil { - if cleanupErr != nil { - logrus.Errorf("Unmounting container %s: %v", c.ID(), cleanupErr) - } - cleanupErr = err - } - } + markUnmounted() return cleanupErr } @@ -2079,7 +2148,7 @@ func (c *Container) checkReadyForRemoval() error { return errors.Wrapf(define.ErrCtrStateInvalid, "container %s is in invalid state", c.ID()) } - if c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) { + if c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) && !c.IsInfra() { return errors.Wrapf(define.ErrCtrStateInvalid, "cannot remove container %s as it is %s - running or paused containers cannot be removed without force", c.ID(), c.state.State.String()) } diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index 867ecc2ad..3187724ca 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -21,6 +21,7 @@ import ( "time" metadata "github.com/checkpoint-restore/checkpointctl/lib" + "github.com/checkpoint-restore/go-criu/v5/stats" cdi "github.com/container-orchestrated-devices/container-device-interface/pkg" "github.com/containernetworking/plugins/pkg/ns" "github.com/containers/buildah/pkg/chrootuser" @@ -50,6 +51,7 @@ import ( runcuser "github.com/opencontainers/runc/libcontainer/user" spec "github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/runtime-tools/generate" + "github.com/opencontainers/selinux/go-selinux" "github.com/opencontainers/selinux/go-selinux/label" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -321,7 +323,7 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) { return nil, err } - g := generate.Generator{Config: c.config.Spec} + g := generate.NewFromSpec(c.config.Spec) // If network namespace was requested, add it now if c.config.CreateNetNS { @@ -390,11 +392,11 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) { for _, o := range namedVol.Options { switch o { case "U": - if err := chown.ChangeHostPathOwnership(mountPoint, true, int(hostUID), int(hostGID)); err != nil { + if err := c.ChangeHostPathOwnership(mountPoint, true, int(hostUID), int(hostGID)); err != nil { return nil, err } - if err := chown.ChangeHostPathOwnership(contentDir, true, int(hostUID), int(hostGID)); err != nil { + if err := c.ChangeHostPathOwnership(contentDir, true, int(hostUID), int(hostGID)); err != nil { return nil, err } } @@ -423,14 +425,15 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) { if m.Type == "tmpfs" { options = append(options, []string{fmt.Sprintf("uid=%d", execUser.Uid), fmt.Sprintf("gid=%d", execUser.Gid)}...) } else { - if err := chown.ChangeHostPathOwnership(m.Source, true, int(hostUID), int(hostGID)); err != nil { + // only chown on initial creation of container + if err := c.ChangeHostPathOwnership(m.Source, true, int(hostUID), int(hostGID)); err != nil { return nil, err } } case "z": fallthrough case "Z": - if err := label.Relabel(m.Source, c.MountLabel(), label.IsShared(o)); err != nil { + if err := c.relabel(m.Source, c.MountLabel(), label.IsShared(o)); err != nil { return nil, err } @@ -477,11 +480,11 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) { for _, o := range overlayVol.Options { switch o { case "U": - if err := chown.ChangeHostPathOwnership(overlayVol.Source, true, int(hostUID), int(hostGID)); err != nil { + if err := c.ChangeHostPathOwnership(overlayVol.Source, true, int(hostUID), int(hostGID)); err != nil { return nil, err } - if err := chown.ChangeHostPathOwnership(contentDir, true, int(hostUID), int(hostGID)); err != nil { + if err := c.ChangeHostPathOwnership(contentDir, true, int(hostUID), int(hostGID)); err != nil { return nil, err } } @@ -1008,12 +1011,16 @@ func (c *Container) exportCheckpoint(options ContainerCheckpointOptions) error { includeFiles := []string{ "artifacts", - "ctr.log", metadata.ConfigDumpFile, metadata.SpecDumpFile, metadata.NetworkStatusFile, + stats.StatsDump, } + if c.LogDriver() == define.KubernetesLogging || + c.LogDriver() == define.JSONLogging { + includeFiles = append(includeFiles, "ctr.log") + } if options.PreCheckPoint { includeFiles = append(includeFiles, preCheckpointDir) } else { @@ -1192,7 +1199,7 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO if !options.Keep && !options.PreCheckPoint { cleanup := []string{ "dump.log", - "stats-dump", + stats.StatsDump, metadata.ConfigDumpFile, metadata.SpecDumpFile, } @@ -1214,7 +1221,8 @@ func (c *Container) importCheckpoint(input string) error { } // Make sure the newly created config.json exists on disk - g := generate.Generator{Config: c.config.Spec} + g := generate.NewFromSpec(c.config.Spec) + if err := c.saveSpec(g.Config); err != nil { return errors.Wrap(err, "saving imported container specification for restore failed") } @@ -1540,6 +1548,7 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti logrus.Debugf("Restored container %s", c.ID()) c.state.State = define.ContainerStateRunning + c.state.Checkpointed = false if !options.Keep { // Delete all checkpoint related files. At this point, in theory, all files @@ -1557,8 +1566,8 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti cleanup := [...]string{ "restore.log", "dump.log", - "stats-dump", - "stats-restore", + stats.StatsDump, + stats.StatsRestore, metadata.NetworkStatusFile, metadata.RootFsDiffTar, metadata.DeletedFilesFile, @@ -1706,13 +1715,13 @@ func (c *Container) makeBindMounts() error { } if c.state.BindMounts["/etc/hosts"] != "" { - if err := label.Relabel(c.state.BindMounts["/etc/hosts"], c.config.MountLabel, true); err != nil { + if err := c.relabel(c.state.BindMounts["/etc/hosts"], c.config.MountLabel, true); err != nil { return err } } if c.state.BindMounts["/etc/resolv.conf"] != "" { - if err := label.Relabel(c.state.BindMounts["/etc/resolv.conf"], c.config.MountLabel, true); err != nil { + if err := c.relabel(c.state.BindMounts["/etc/resolv.conf"], c.config.MountLabel, true); err != nil { return err } } @@ -1994,7 +2003,7 @@ func (c *Container) generateResolvConf() (string, error) { } // Relabel resolv.conf for the container - if err := label.Relabel(destPath, c.config.MountLabel, true); err != nil { + if err := c.relabel(destPath, c.config.MountLabel, true); err != nil { return "", err } @@ -2016,7 +2025,7 @@ func (c *Container) generateHosts(path string) (string, error) { } // based on networking mode we may want to append the localhost -// if there isn't any record for it and also this shoud happen +// if there isn't any record for it and also this should happen // in slirp4netns and similar network modes. func (c *Container) appendLocalhost(hosts string) string { if !strings.Contains(hosts, "localhost") && @@ -2611,7 +2620,7 @@ func (c *Container) copyTimezoneFile(zonePath string) (string, error) { if err != nil { return "", err } - if err := label.Relabel(localtimeCopy, c.config.MountLabel, false); err != nil { + if err := c.relabel(localtimeCopy, c.config.MountLabel, false); err != nil { return "", err } if err := dest.Chown(c.RootUID(), c.RootGID()); err != nil { @@ -2746,3 +2755,37 @@ func (c *Container) fixVolumePermissions(v *ContainerNamedVolume) error { } return nil } + +func (c *Container) relabel(src, mountLabel string, recurse bool) error { + if !selinux.GetEnabled() || mountLabel == "" { + return nil + } + // only relabel on initial creation of container + if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateUnknown) { + label, err := label.FileLabel(src) + if err != nil { + return err + } + // If labels are different, might be on a tmpfs + if label == mountLabel { + return nil + } + } + return label.Relabel(src, mountLabel, recurse) +} + +func (c *Container) ChangeHostPathOwnership(src string, recurse bool, uid, gid int) error { + // only chown on initial creation of container + if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateUnknown) { + st, err := os.Stat(src) + if err != nil { + return err + } + + // If labels are different, might be on a tmpfs + if int(st.Sys().(*syscall.Stat_t).Uid) == uid && int(st.Sys().(*syscall.Stat_t).Gid) == gid { + return nil + } + } + return chown.ChangeHostPathOwnership(src, recurse, uid, gid) +} diff --git a/libpod/container_log_linux.go b/libpod/container_log_linux.go index ca1e11ef5..4029d0af7 100644 --- a/libpod/container_log_linux.go +++ b/libpod/container_log_linux.go @@ -91,8 +91,12 @@ func (c *Container) readFromJournal(ctx context.Context, options *logs.LogOption var cursorError error for i := 1; i <= 3; i++ { cursor, cursorError = journal.GetCursor() + hundreds := 1 + for j := 1; j < i; j++ { + hundreds *= 2 + } if cursorError != nil { - time.Sleep(time.Duration(i*100) * time.Millisecond) + time.Sleep(time.Duration(hundreds*100) * time.Millisecond) continue } break @@ -117,7 +121,24 @@ func (c *Container) readFromJournal(ctx context.Context, options *logs.LogOption }() tailQueue := []*logs.LogLine{} // needed for options.Tail - doTail := options.Tail > 0 + doTail := options.Tail >= 0 + doTailFunc := func() { + // Flush *once* we hit the end of the journal. + startIndex := int64(len(tailQueue)) + outputLines := int64(0) + for startIndex > 0 && outputLines < options.Tail { + startIndex-- + for startIndex > 0 && tailQueue[startIndex].Partial() { + startIndex-- + } + outputLines++ + } + for i := startIndex; i < int64(len(tailQueue)); i++ { + logChannel <- tailQueue[i] + } + tailQueue = nil + doTail = false + } lastReadCursor := "" for { select { @@ -148,16 +169,7 @@ func (c *Container) readFromJournal(ctx context.Context, options *logs.LogOption // Hit the end of the journal (so far?). if cursor == lastReadCursor { if doTail { - // Flush *once* we hit the end of the journal. - startIndex := int64(len(tailQueue)-1) - options.Tail - if startIndex < 0 { - startIndex = 0 - } - for i := startIndex; i < int64(len(tailQueue)); i++ { - logChannel <- tailQueue[i] - } - tailQueue = nil - doTail = false + doTailFunc() } // Unless we follow, quit. if !options.Follow { @@ -190,6 +202,9 @@ func (c *Container) readFromJournal(ctx context.Context, options *logs.LogOption return } if status == events.Exited { + if doTail { + doTailFunc() + } return } continue diff --git a/libpod/container_path_resolution.go b/libpod/container_path_resolution.go index bb2ef1a73..7db23b783 100644 --- a/libpod/container_path_resolution.go +++ b/libpod/container_path_resolution.go @@ -161,7 +161,7 @@ func isPathOnBindMount(c *Container, containerPath string) bool { if cleanedContainerPath == filepath.Clean(m.Destination) { return true } - for dest := m.Destination; dest != "/"; dest = filepath.Dir(dest) { + for dest := m.Destination; dest != "/" && dest != "."; dest = filepath.Dir(dest) { if cleanedContainerPath == dest { return true } diff --git a/libpod/info.go b/libpod/info.go index a2fd18491..8d0b19f23 100644 --- a/libpod/info.go +++ b/libpod/info.go @@ -332,7 +332,7 @@ func readKernelVersion() (string, error) { return "", err } f := bytes.Fields(buf) - if len(f) < 2 { + if len(f) < 3 { return string(bytes.TrimSpace(buf)), nil } return string(f[2]), nil diff --git a/libpod/kube.go b/libpod/kube.go index 816fe9cc3..e0ed911af 100644 --- a/libpod/kube.go +++ b/libpod/kube.go @@ -11,6 +11,7 @@ import ( "strings" "time" + "github.com/containers/common/pkg/config" "github.com/containers/podman/v3/libpod/define" "github.com/containers/podman/v3/libpod/network/types" "github.com/containers/podman/v3/pkg/env" @@ -25,6 +26,7 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" v12 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" ) // GenerateForKube takes a slice of libpod containers and generates @@ -73,7 +75,7 @@ func (p *Pod) GenerateForKube(ctx context.Context) (*v1.Pod, []v1.ServicePort, e Hostnames: []string{hostSli[0]}, }) } - ports, err = ocicniPortMappingToContainerPort(infraContainer.config.PortMappings) + ports, err = portMappingToContainerPort(infraContainer.config.PortMappings) if err != nil { return nil, servicePorts, err } @@ -196,10 +198,11 @@ func containerPortsToServicePorts(containerPorts []v1.ContainerPort) []v1.Servic for _, cp := range containerPorts { nodePort := 30000 + rand.Intn(32767-30000+1) servicePort := v1.ServicePort{ - Protocol: cp.Protocol, - Port: cp.ContainerPort, - NodePort: int32(nodePort), - Name: strconv.Itoa(int(cp.ContainerPort)), + Protocol: cp.Protocol, + Port: cp.ContainerPort, + NodePort: int32(nodePort), + Name: strconv.Itoa(int(cp.ContainerPort)), + TargetPort: intstr.Parse(strconv.Itoa(int(cp.ContainerPort))), } sps = append(sps, servicePort) } @@ -246,7 +249,7 @@ func (p *Pod) podWithContainers(ctx context.Context, containers []*Container, po return nil, err } for k, v := range annotations { - podAnnotations[define.BindMountPrefix+k] = v + podAnnotations[define.BindMountPrefix+k] = strings.TrimSpace(v) } // Since port bindings for the pod are handled by the // infra container, wipe them here. @@ -330,7 +333,7 @@ func newPodObject(podName string, annotations map[string]string, initCtrs, conta InitContainers: initCtrs, Volumes: volumes, } - if dnsOptions != nil { + if dnsOptions != nil && (len(dnsOptions.Nameservers)+len(dnsOptions.Searches)+len(dnsOptions.Options) > 0) { ps.DNSConfig = dnsOptions } p := v1.Pod{ @@ -366,7 +369,7 @@ func simplePodWithV1Containers(ctx context.Context, ctrs []*Container) (*v1.Pod, return nil, err } for k, v := range annotations { - kubeAnnotations[define.BindMountPrefix+k] = v + kubeAnnotations[define.BindMountPrefix+k] = strings.TrimSpace(v) } if isInit { kubeInitCtrs = append(kubeInitCtrs, kubeCtr) @@ -445,16 +448,11 @@ func containerToV1Container(ctx context.Context, c *Container) (v1.Container, [] kubeVolumes = append(kubeVolumes, volumes...) } - envVariables, err := libpodEnvVarsToKubeEnvVars(c.config.Spec.Process.Env) - if err != nil { - return kubeContainer, kubeVolumes, nil, annotations, err - } - portmappings, err := c.PortMappings() if err != nil { return kubeContainer, kubeVolumes, nil, annotations, err } - ports, err := ocicniPortMappingToContainerPort(portmappings) + ports, err := portMappingToContainerPort(portmappings) if err != nil { return kubeContainer, kubeVolumes, nil, annotations, err } @@ -471,25 +469,51 @@ func containerToV1Container(ctx context.Context, c *Container) (v1.Container, [] kubeContainer.Name = removeUnderscores(c.Name()) _, image := c.Image() + + // The infra container may have been created with an overlay root FS + // instead of an infra image. If so, set the imageto the default K8s + // pause one and make sure it's in the storage by pulling it down if + // missing. + if image == "" && c.IsInfra() { + image = config.DefaultInfraImage + if _, err := c.runtime.libimageRuntime.Pull(ctx, image, config.PullPolicyMissing, nil); err != nil { + return kubeContainer, nil, nil, nil, err + } + } + kubeContainer.Image = image kubeContainer.Stdin = c.Stdin() img, _, err := c.runtime.libimageRuntime.LookupImage(image, nil) if err != nil { - return kubeContainer, kubeVolumes, nil, annotations, err + return kubeContainer, kubeVolumes, nil, annotations, fmt.Errorf("looking up image %q of container %q: %w", image, c.ID(), err) } imgData, err := img.Inspect(ctx, nil) if err != nil { return kubeContainer, kubeVolumes, nil, annotations, err } - if reflect.DeepEqual(imgData.Config.Cmd, kubeContainer.Command) { + // If the user doesn't set a command/entrypoint when creating the container with podman and + // is using the image command or entrypoint from the image, don't add it to the generated kube yaml + if reflect.DeepEqual(imgData.Config.Cmd, kubeContainer.Command) || reflect.DeepEqual(imgData.Config.Entrypoint, kubeContainer.Command) { kubeContainer.Command = nil } - kubeContainer.WorkingDir = c.WorkingDir() + if c.WorkingDir() != "/" && imgData.Config.WorkingDir != c.WorkingDir() { + kubeContainer.WorkingDir = c.WorkingDir() + } + + if imgData.User == c.User() { + kubeSec.RunAsGroup, kubeSec.RunAsUser = nil, nil + } + + envVariables, err := libpodEnvVarsToKubeEnvVars(c.config.Spec.Process.Env, imgData.Config.Env) + if err != nil { + return kubeContainer, kubeVolumes, nil, annotations, err + } + kubeContainer.Env = envVariables + kubeContainer.Ports = ports // This should not be applicable //container.EnvFromSource = - kubeContainer.Env = envVariables kubeContainer.SecurityContext = kubeSec kubeContainer.StdinOnce = false kubeContainer.TTY = c.config.Spec.Process.Terminal @@ -564,36 +588,49 @@ func containerToV1Container(ctx context.Context, c *Container) (v1.Container, [] return kubeContainer, kubeVolumes, &dns, annotations, nil } -// ocicniPortMappingToContainerPort takes an ocicni portmapping and converts +// portMappingToContainerPort takes an portmapping and converts // it to a v1.ContainerPort format for kube output -func ocicniPortMappingToContainerPort(portMappings []types.OCICNIPortMapping) ([]v1.ContainerPort, error) { +func portMappingToContainerPort(portMappings []types.PortMapping) ([]v1.ContainerPort, error) { containerPorts := make([]v1.ContainerPort, 0, len(portMappings)) for _, p := range portMappings { - var protocol v1.Protocol - switch strings.ToUpper(p.Protocol) { - case "TCP": - protocol = v1.ProtocolTCP - case "UDP": - protocol = v1.ProtocolUDP - default: - return containerPorts, errors.Errorf("unknown network protocol %s", p.Protocol) - } - cp := v1.ContainerPort{ - // Name will not be supported - HostPort: p.HostPort, - HostIP: p.HostIP, - ContainerPort: p.ContainerPort, - Protocol: protocol, - } - containerPorts = append(containerPorts, cp) + protocols := strings.Split(p.Protocol, ",") + for _, proto := range protocols { + var protocol v1.Protocol + switch strings.ToUpper(proto) { + case "TCP": + // do nothing as it is the default protocol in k8s, there is no need to explicitly + // add it to the generated yaml + case "UDP": + protocol = v1.ProtocolUDP + case "SCTP": + protocol = v1.ProtocolSCTP + default: + return containerPorts, errors.Errorf("unknown network protocol %s", p.Protocol) + } + for i := uint16(0); i < p.Range; i++ { + cp := v1.ContainerPort{ + // Name will not be supported + HostPort: int32(p.HostPort + i), + HostIP: p.HostIP, + ContainerPort: int32(p.ContainerPort + i), + Protocol: protocol, + } + containerPorts = append(containerPorts, cp) + } + } } return containerPorts, nil } // libpodEnvVarsToKubeEnvVars converts a key=value string slice to []v1.EnvVar -func libpodEnvVarsToKubeEnvVars(envs []string) ([]v1.EnvVar, error) { +func libpodEnvVarsToKubeEnvVars(envs []string, imageEnvs []string) ([]v1.EnvVar, error) { defaultEnv := env.DefaultEnvVariables() envVars := make([]v1.EnvVar, 0, len(envs)) + imageMap := make(map[string]string, len(imageEnvs)) + for _, ie := range envs { + split := strings.SplitN(ie, "=", 2) + imageMap[split[0]] = split[1] + } for _, e := range envs { split := strings.SplitN(e, "=", 2) if len(split) != 2 { @@ -602,6 +639,9 @@ func libpodEnvVarsToKubeEnvVars(envs []string) ([]v1.EnvVar, error) { if defaultEnv[split[0]] == split[1] { continue } + if imageMap[split[0]] == split[1] { + continue + } ev := v1.EnvVar{ Name: split[0], Value: split[1], @@ -799,33 +839,42 @@ func generateKubeSecurityContext(c *Container) (*v1.SecurityContext, error) { capabilities = newCaps } + sc := v1.SecurityContext{ + // RunAsNonRoot is an optional parameter; our first implementations should be root only; however + // I'm leaving this as a bread-crumb for later + //RunAsNonRoot: &nonRoot, + } + if capabilities != nil { + sc.Capabilities = capabilities + } var selinuxOpts v1.SELinuxOptions opts := strings.SplitN(c.config.Spec.Annotations[define.InspectAnnotationLabel], ":", 2) - if len(opts) == 2 { + switch len(opts) { + case 2: switch opts[0] { case "type": selinuxOpts.Type = opts[1] + sc.SELinuxOptions = &selinuxOpts case "level": selinuxOpts.Level = opts[1] + sc.SELinuxOptions = &selinuxOpts } - } - if len(opts) == 1 { + case 1: if opts[0] == "disable" { selinuxOpts.Type = "spc_t" + sc.SELinuxOptions = &selinuxOpts } } - sc := v1.SecurityContext{ - Capabilities: capabilities, - Privileged: &privileged, - SELinuxOptions: &selinuxOpts, - // RunAsNonRoot is an optional parameter; our first implementations should be root only; however - // I'm leaving this as a bread-crumb for later - //RunAsNonRoot: &nonRoot, - ReadOnlyRootFilesystem: &ro, - AllowPrivilegeEscalation: &allowPrivEscalation, + if !allowPrivEscalation { + sc.AllowPrivilegeEscalation = &allowPrivEscalation + } + if privileged { + sc.Privileged = &privileged + } + if ro { + sc.ReadOnlyRootFilesystem = &ro } - if c.User() != "" { if !c.batched { c.lock.Lock() diff --git a/libpod/network/cni/cni_conversion.go b/libpod/network/cni/cni_conversion.go index 93d871767..01e149114 100644 --- a/libpod/network/cni/cni_conversion.go +++ b/libpod/network/cni/cni_conversion.go @@ -103,7 +103,7 @@ func createNetworkFromCNIConfigList(conf *libcni.NetworkConfigList, confPath str } default: - // A warning would be good but users would get this warning everytime so keep this at info level. + // A warning would be good but users would get this warning every time so keep this at info level. logrus.Infof("Unsupported CNI config type %s in %s, this network can still be used but inspect or list cannot show all information", firstPlugin.Network.Type, confPath) } diff --git a/libpod/network/cni/cni_types.go b/libpod/network/cni/cni_types.go index fbf917c2d..c70cb92b6 100644 --- a/libpod/network/cni/cni_types.go +++ b/libpod/network/cni/cni_types.go @@ -176,13 +176,13 @@ func newIPAMLocalHostRange(subnet types.IPNet, leaseRange *types.LeaseRange, gw Subnet: subnet.String(), } - // an user provided a range, we add it here + // a user provided a range, we add it here if leaseRange != nil { if leaseRange.StartIP != nil { hostRange.RangeStart = leaseRange.StartIP.String() } if leaseRange.EndIP != nil { - hostRange.RangeStart = leaseRange.EndIP.String() + hostRange.RangeEnd = leaseRange.EndIP.String() } } diff --git a/libpod/network/cni/config_test.go b/libpod/network/cni/config_test.go index 5b0feb859..0dfc6173c 100644 --- a/libpod/network/cni/config_test.go +++ b/libpod/network/cni/config_test.go @@ -621,7 +621,7 @@ var _ = Describe("Config", func() { err = libpodNet.NetworkRemove(network1.Name) Expect(err).To(BeNil()) - endIP := "10.0.0.10" + endIP := "10.0.0.30" network = types.Network{ Driver: "bridge", Subnets: []types.Subnet{ @@ -665,6 +665,22 @@ var _ = Describe("Config", func() { Expect(network1.Subnets[0].Gateway.String()).To(Equal("10.0.0.1")) Expect(network1.Subnets[0].LeaseRange.StartIP.String()).To(Equal(startIP)) Expect(network1.Subnets[0].LeaseRange.EndIP.String()).To(Equal(endIP)) + + // create a new interface to force a config load from disk + libpodNet, err = getNetworkInterface(cniConfDir, false) + Expect(err).To(BeNil()) + + network1, err = libpodNet.NetworkInspect(network1.Name) + Expect(err).To(BeNil()) + Expect(network1.Name).ToNot(BeEmpty()) + Expect(network1.ID).ToNot(BeEmpty()) + Expect(network1.NetworkInterface).ToNot(BeEmpty()) + Expect(network1.Driver).To(Equal("bridge")) + Expect(network1.Subnets).To(HaveLen(1)) + Expect(network1.Subnets[0].Subnet.String()).To(Equal(subnet)) + Expect(network1.Subnets[0].Gateway.String()).To(Equal("10.0.0.1")) + Expect(network1.Subnets[0].LeaseRange.StartIP.String()).To(Equal(startIP)) + Expect(network1.Subnets[0].LeaseRange.EndIP.String()).To(Equal(endIP)) }) It("create bridge with subnet and invalid lease range", func() { @@ -1313,7 +1329,7 @@ var _ = Describe("Config", func() { Expect(networks).To(HaveLen(0)) }) - It("crate bridge network with used interface name", func() { + It("create bridge network with used interface name", func() { network := types.Network{ NetworkInterface: "cni-podman9", } diff --git a/libpod/network/cni/run.go b/libpod/network/cni/run.go index bd873f89b..7795dfeeb 100644 --- a/libpod/network/cni/run.go +++ b/libpod/network/cni/run.go @@ -160,7 +160,7 @@ func CNIResultToStatus(res cnitypes.Result) (types.StatusBlock, error) { return result, err } interfaces[cniInt.Name] = types.NetInterface{ - MacAddress: mac, + MacAddress: types.HardwareAddr(mac), Networks: []types.NetAddress{{ Subnet: types.IPNet{IPNet: ip.Address}, Gateway: ip.Gateway, @@ -196,10 +196,8 @@ func getRuntimeConfig(netns, conName, conID, networkName string, ports []cniPort IfName: opts.InterfaceName, Args: [][2]string{ {"IgnoreUnknown", "1"}, - // FIXME: Should we set the K8S args? - //{"K8S_POD_NAMESPACE", conName}, - //{"K8S_POD_INFRA_CONTAINER_ID", conID}, - // K8S_POD_NAME is used by dnsname to get the container name + // Do not set the K8S env vars, see https://github.com/containers/podman/issues/12083. + // Only K8S_POD_NAME is used by dnsname to get the container name. {"K8S_POD_NAME", conName}, }, CapabilityArgs: map[string]interface{}{}, diff --git a/libpod/network/cni/run_test.go b/libpod/network/cni/run_test.go index 965203c2a..3169cd0eb 100644 --- a/libpod/network/cni/run_test.go +++ b/libpod/network/cni/run_test.go @@ -398,7 +398,7 @@ var _ = Describe("run CNI", func() { i, err := net.InterfaceByName(intName1) Expect(err).To(BeNil()) Expect(i.Name).To(Equal(intName1)) - Expect(i.HardwareAddr).To(Equal(macInt1)) + Expect(i.HardwareAddr).To(Equal((net.HardwareAddr)(macInt1))) addrs, err := i.Addrs() Expect(err).To(BeNil()) subnet := &net.IPNet{ @@ -448,7 +448,7 @@ var _ = Describe("run CNI", func() { i, err := net.InterfaceByName(intName1) Expect(err).To(BeNil()) Expect(i.Name).To(Equal(intName1)) - Expect(i.HardwareAddr).To(Equal(macInt1)) + Expect(i.HardwareAddr).To(Equal(net.HardwareAddr(macInt1))) addrs, err := i.Addrs() Expect(err).To(BeNil()) subnet := &net.IPNet{ @@ -460,7 +460,7 @@ var _ = Describe("run CNI", func() { i, err = net.InterfaceByName(intName2) Expect(err).To(BeNil()) Expect(i.Name).To(Equal(intName2)) - Expect(i.HardwareAddr).To(Equal(macInt2)) + Expect(i.HardwareAddr).To(Equal(net.HardwareAddr(macInt2))) addrs, err = i.Addrs() Expect(err).To(BeNil()) subnet = &net.IPNet{ @@ -600,7 +600,7 @@ var _ = Describe("run CNI", func() { i, err := net.InterfaceByName(intName1) Expect(err).To(BeNil()) Expect(i.Name).To(Equal(intName1)) - Expect(i.HardwareAddr).To(Equal(macInt1)) + Expect(i.HardwareAddr).To(Equal(net.HardwareAddr(macInt1))) addrs, err := i.Addrs() Expect(err).To(BeNil()) subnet := &net.IPNet{ @@ -612,7 +612,7 @@ var _ = Describe("run CNI", func() { i, err = net.InterfaceByName(intName2) Expect(err).To(BeNil()) Expect(i.Name).To(Equal(intName2)) - Expect(i.HardwareAddr).To(Equal(macInt2)) + Expect(i.HardwareAddr).To(Equal(net.HardwareAddr(macInt2))) addrs, err = i.Addrs() Expect(err).To(BeNil()) subnet = &net.IPNet{ @@ -690,7 +690,7 @@ var _ = Describe("run CNI", func() { netName: { InterfaceName: interfaceName, StaticIPs: []net.IP{ip1, ip2}, - StaticMAC: mac, + StaticMAC: types.HardwareAddr(mac), }, }, }, @@ -708,7 +708,7 @@ var _ = Describe("run CNI", func() { Expect(res[netName].Interfaces[interfaceName].Networks[1].Subnet.IP.String()).To(Equal(ip2.String())) Expect(res[netName].Interfaces[interfaceName].Networks[1].Subnet.Mask).To(Equal(subnet2.Mask)) Expect(res[netName].Interfaces[interfaceName].Networks[1].Gateway).To(Equal(net.ParseIP("fd41:0a75:2ca0:48a9::1"))) - Expect(res[netName].Interfaces[interfaceName].MacAddress).To(Equal(mac)) + Expect(res[netName].Interfaces[interfaceName].MacAddress).To(Equal(types.HardwareAddr(mac))) // default network has no dns Expect(res[netName].DNSServerIPs).To(BeEmpty()) Expect(res[netName].DNSSearchDomains).To(BeEmpty()) diff --git a/libpod/network/types/network.go b/libpod/network/types/network.go index 2fe4f3da2..ba5e018fd 100644 --- a/libpod/network/types/network.go +++ b/libpod/network/types/network.go @@ -1,6 +1,7 @@ package types import ( + "encoding/json" "net" "time" ) @@ -94,6 +95,51 @@ func (n *IPNet) UnmarshalText(text []byte) error { return nil } +// HardwareAddr is the same as net.HardwareAddr except +// that it adds the json marshal/unmarshal methods. +// This allows us to read the mac from a json string +// and a byte array. +// swagger:model MacAddress +type HardwareAddr net.HardwareAddr + +func (h *HardwareAddr) String() string { + return (*net.HardwareAddr)(h).String() +} + +func (h *HardwareAddr) MarshalText() ([]byte, error) { + return []byte((*net.HardwareAddr)(h).String()), nil +} + +func (h *HardwareAddr) UnmarshalJSON(text []byte) error { + if len(text) == 0 { + *h = nil + return nil + } + + // if the json string start with a quote we got a string + // unmarshal the string and parse the mac from this string + if string(text[0]) == `"` { + var macString string + err := json.Unmarshal(text, &macString) + if err == nil { + mac, err := net.ParseMAC(macString) + if err == nil { + *h = HardwareAddr(mac) + return nil + } + } + } + // not a string or got an error fallback to the normal parsing + mac := make(net.HardwareAddr, 0, 6) + // use the standard json unmarshal for backwards compat + err := json.Unmarshal(text, &mac) + if err != nil { + return err + } + *h = HardwareAddr(mac) + return nil +} + type Subnet struct { // Subnet for this Network in CIDR form. // swagger:strfmt string @@ -134,10 +180,10 @@ type NetInterface struct { // Networks list of assigned subnets with their gateway. Networks []NetAddress `json:"networks,omitempty"` // MacAddress for this Interface. - MacAddress net.HardwareAddr `json:"mac_address"` + MacAddress HardwareAddr `json:"mac_address"` } -// NetAddress contains the subnet and gatway. +// NetAddress contains the subnet and gateway. type NetAddress struct { // Subnet of this NetAddress. Note that the subnet contains the // actual ip of the net interface and not the network address. @@ -157,7 +203,7 @@ type PerNetworkOptions struct { // Optional. Aliases []string `json:"aliases,omitempty"` // StaticMac for this container. Optional. - StaticMAC net.HardwareAddr `json:"static_mac,omitempty"` + StaticMAC HardwareAddr `json:"static_mac,omitempty"` // InterfaceName for this container. Required. InterfaceName string `json:"interface_name"` } diff --git a/libpod/network/types/network_test.go b/libpod/network/types/network_test.go new file mode 100644 index 000000000..91ee93692 --- /dev/null +++ b/libpod/network/types/network_test.go @@ -0,0 +1,82 @@ +package types_test + +import ( + "encoding/json" + "reflect" + "testing" + + "github.com/containers/podman/v3/libpod/network/types" +) + +func TestUnmarshalMacAddress(t *testing.T) { + tests := []struct { + name string + json string + want types.HardwareAddr + wantErr bool + }{ + { + name: "mac as string with colon", + json: `"52:54:00:1c:2e:46"`, + want: types.HardwareAddr{0x52, 0x54, 0x00, 0x1c, 0x2e, 0x46}, + }, + { + name: "mac as string with dash", + json: `"52-54-00-1c-2e-46"`, + want: types.HardwareAddr{0x52, 0x54, 0x00, 0x1c, 0x2e, 0x46}, + }, + { + name: "mac as byte array", + json: `[82, 84, 0, 28, 46, 70]`, + want: types.HardwareAddr{0x52, 0x54, 0x00, 0x1c, 0x2e, 0x46}, + }, + { + name: "null value", + json: `null`, + want: nil, + }, + { + name: "mac as base64", + json: `"qrvM3e7/"`, + want: types.HardwareAddr{0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff}, + }, + { + name: "invalid string", + json: `"52:54:00:1c:2e`, + wantErr: true, + }, + { + name: "invalid array", + json: `[82, 84, 0, 28, 46`, + wantErr: true, + }, + + { + name: "invalid value", + json: `ab`, + wantErr: true, + }, + { + name: "invalid object", + json: `{}`, + wantErr: true, + }, + } + for _, tt := range tests { + test := tt + t.Run(test.name, func(t *testing.T) { + mac := types.HardwareAddr{} + err := json.Unmarshal([]byte(test.json), &mac) + if (err != nil) != test.wantErr { + t.Errorf("types.HardwareAddress Unmarshal() error = %v, wantErr %v", err, test.wantErr) + return + } + if test.wantErr { + return + } + if !reflect.DeepEqual(mac, test.want) { + t.Errorf("types.HardwareAddress Unmarshal() got = %v, want %v", mac, test.want) + } + }) + } +} diff --git a/libpod/networking_linux.go b/libpod/networking_linux.go index e792a410c..ef261a438 100644 --- a/libpod/networking_linux.go +++ b/libpod/networking_linux.go @@ -11,6 +11,7 @@ import ( "os/exec" "path/filepath" "regexp" + "sort" "strconv" "strings" "syscall" @@ -41,8 +42,11 @@ const ( // default slirp4ns subnet defaultSlirp4netnsSubnet = "10.0.2.0/24" - // rootlessCNINSName is the file name for the rootless network namespace bind mount - rootlessCNINSName = "rootless-cni-ns" + // rootlessNetNsName is the file name for the rootless network namespace bind mount + rootlessNetNsName = "rootless-netns" + + // rootlessNetNsSilrp4netnsPidFile is the name of the rootless netns slirp4netns pid file + rootlessNetNsSilrp4netnsPidFile = "rootless-netns-slirp4netns.pid" // persistentCNIDir is the directory where the CNI files are stored persistentCNIDir = "/var/lib/cni" @@ -88,10 +92,7 @@ func (c *Container) getNetworkOptions() (types.NetworkOptions, error) { ContainerID: c.config.ID, ContainerName: getCNIPodName(c), } - // TODO remove ocicni PortMappings from container config and store as types PortMappings - if len(c.config.PortMappings) > 0 { - opts.PortMappings = ocicniPortsToNetTypesPorts(c.config.PortMappings) - } + opts.PortMappings = c.config.PortMappings networks, _, err := c.networks() if err != nil { return opts, err @@ -136,21 +137,21 @@ func (c *Container) getNetworkOptions() (types.NetworkOptions, error) { return opts, nil } -type RootlessCNI struct { +type RootlessNetNS struct { ns ns.NetNS dir string Lock lockfile.Locker } -// getPath will join the given path to the rootless cni dir -func (r *RootlessCNI) getPath(path string) string { +// getPath will join the given path to the rootless netns dir +func (r *RootlessNetNS) getPath(path string) string { return filepath.Join(r.dir, path) } -// Do - run the given function in the rootless cni ns. +// Do - run the given function in the rootless netns. // It does not lock the rootlessCNI lock, the caller // should only lock when needed, e.g. for cni operations. -func (r *RootlessCNI) Do(toRun func() error) error { +func (r *RootlessNetNS) Do(toRun func() error) error { err := r.ns.Do(func(_ ns.NetNS) error { // Before we can run the given function, // we have to setup all mounts correctly. @@ -161,11 +162,11 @@ func (r *RootlessCNI) Do(toRun func() error) error { // Because the plugins also need access to XDG_RUNTIME_DIR/netns some special setup is needed. // The following bind mounts are needed - // 1. XDG_RUNTIME_DIR/netns -> XDG_RUNTIME_DIR/rootless-cni/XDG_RUNTIME_DIR/netns - // 2. /run/systemd -> XDG_RUNTIME_DIR/rootless-cni/run/systemd (only if it exists) - // 3. XDG_RUNTIME_DIR/rootless-cni/resolv.conf -> /etc/resolv.conf or XDG_RUNTIME_DIR/rootless-cni/run/symlink/target - // 4. XDG_RUNTIME_DIR/rootless-cni/var/lib/cni -> /var/lib/cni (if /var/lib/cni does not exists use the parent dir) - // 5. XDG_RUNTIME_DIR/rootless-cni/run -> /run + // 1. XDG_RUNTIME_DIR -> XDG_RUNTIME_DIR/rootless-netns/XDG_RUNTIME_DIR + // 2. /run/systemd -> XDG_RUNTIME_DIR/rootless-netns/run/systemd (only if it exists) + // 3. XDG_RUNTIME_DIR/rootless-netns/resolv.conf -> /etc/resolv.conf or XDG_RUNTIME_DIR/rootless-netns/run/symlink/target + // 4. XDG_RUNTIME_DIR/rootless-netns/var/lib/cni -> /var/lib/cni (if /var/lib/cni does not exists use the parent dir) + // 5. XDG_RUNTIME_DIR/rootless-netns/run -> /run // Create a new mount namespace, // this must happen inside the netns thread. @@ -174,16 +175,16 @@ func (r *RootlessCNI) Do(toRun func() error) error { return errors.Wrapf(err, "cannot create a new mount namespace") } - netNsDir, err := netns.GetNSRunDir() + xdgRuntimeDir, err := util.GetRuntimeDir() if err != nil { - return errors.Wrap(err, "could not get network namespace directory") + return errors.Wrap(err, "could not get runtime directory") } - newNetNsDir := r.getPath(netNsDir) + newXDGRuntimeDir := r.getPath(xdgRuntimeDir) // 1. Mount the netns into the new run to keep them accessible. // Otherwise cni setup will fail because it cannot access the netns files. - err = unix.Mount(netNsDir, newNetNsDir, "none", unix.MS_BIND|unix.MS_SHARED|unix.MS_REC, "") + err = unix.Mount(xdgRuntimeDir, newXDGRuntimeDir, "none", unix.MS_BIND|unix.MS_SHARED|unix.MS_REC, "") if err != nil { - return errors.Wrap(err, "failed to mount netns directory for rootless cni") + return errors.Wrap(err, "failed to mount runtime directory for rootless netns") } // 2. Also keep /run/systemd if it exists. @@ -194,7 +195,7 @@ func (r *RootlessCNI) Do(toRun func() error) error { newRunSystemd := r.getPath(runSystemd) err = unix.Mount(runSystemd, newRunSystemd, "none", unix.MS_BIND|unix.MS_REC, "") if err != nil { - return errors.Wrap(err, "failed to mount /run/systemd directory for rootless cni") + return errors.Wrap(err, "failed to mount /run/systemd directory for rootless netns") } } @@ -242,25 +243,25 @@ func (r *RootlessCNI) Do(toRun func() error) error { rsr := r.getPath("/run/systemd/resolve") err = unix.Mount("", rsr, "tmpfs", unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV, "") if err != nil { - return errors.Wrapf(err, "failed to mount tmpfs on %q for rootless cni", rsr) + return errors.Wrapf(err, "failed to mount tmpfs on %q for rootless netns", rsr) } } if strings.HasPrefix(resolvePath, "/run/") { resolvePath = r.getPath(resolvePath) err = os.MkdirAll(filepath.Dir(resolvePath), 0700) if err != nil { - return errors.Wrap(err, "failed to create rootless-cni resolv.conf directory") + return errors.Wrap(err, "failed to create rootless-netns resolv.conf directory") } // we want to bind mount on this file so we have to create the file first _, err = os.OpenFile(resolvePath, os.O_CREATE|os.O_RDONLY, 0700) if err != nil { - return errors.Wrap(err, "failed to create rootless-cni resolv.conf file") + return errors.Wrap(err, "failed to create rootless-netns resolv.conf file") } } // mount resolv.conf to make use of the host dns err = unix.Mount(r.getPath("resolv.conf"), resolvePath, "none", unix.MS_BIND, "") if err != nil { - return errors.Wrap(err, "failed to mount resolv.conf for rootless cni") + return errors.Wrap(err, "failed to mount resolv.conf for rootless netns") } // 4. CNI plugins need access to /var/lib/cni and /run @@ -285,14 +286,14 @@ func (r *RootlessCNI) Do(toRun func() error) error { // make sure to mount var first err = unix.Mount(varDir, varTarget, "none", unix.MS_BIND, "") if err != nil { - return errors.Wrapf(err, "failed to mount %s for rootless cni", varTarget) + return errors.Wrapf(err, "failed to mount %s for rootless netns", varTarget) } // 5. Mount the new prepared run dir to /run, it has to be recursive to keep the other bind mounts. runDir := r.getPath("run") err = unix.Mount(runDir, "/run", "none", unix.MS_BIND|unix.MS_REC, "") if err != nil { - return errors.Wrap(err, "failed to mount /run for rootless cni") + return errors.Wrap(err, "failed to mount /run for rootless netns") } // run the given function in the correct namespace @@ -302,10 +303,11 @@ func (r *RootlessCNI) Do(toRun func() error) error { return err } -// Cleanup the rootless cni namespace if needed. +// Cleanup the rootless network namespace if needed. // It checks if we have running containers with the bridge network mode. -// Cleanup() will try to lock RootlessCNI, therefore you have to call it with an unlocked -func (r *RootlessCNI) Cleanup(runtime *Runtime) error { +// Cleanup() will try to lock RootlessNetNS, therefore you have to call +// it with an unlocked lock. +func (r *RootlessNetNS) Cleanup(runtime *Runtime) error { _, err := os.Stat(r.dir) if os.IsNotExist(err) { // the directory does not exists no need for cleanup @@ -314,8 +316,25 @@ func (r *RootlessCNI) Cleanup(runtime *Runtime) error { r.Lock.Lock() defer r.Lock.Unlock() running := func(c *Container) bool { + // no bridge => no need to check + if !c.config.NetMode.IsBridge() { + return false + } + // we cannot use c.state() because it will try to lock the container - // using c.state.State directly should be good enough for this use case + // locking is a problem because cleanup is called after net teardown + // at this stage the container is already locked. + // also do not try to lock only containers which are not currently in net + // teardown because this will result in an ABBA deadlock between the rootless + // cni lock and the container lock + // because we need to get the state we have to sync otherwise this will not + // work because the state is empty by default + // I do not like this but I do not see a better way at moment + err := c.syncContainer() + if err != nil { + return false + } + state := c.state.State return state == define.ContainerStateRunning } @@ -323,101 +342,89 @@ func (r *RootlessCNI) Cleanup(runtime *Runtime) error { if err != nil { return err } - cleanup := true - for _, ctr := range ctrs { - if ctr.config.NetMode.IsBridge() { - cleanup = false - } + // no cleanup if we found containers + if len(ctrs) > 0 { + return nil } - if cleanup { - // make sure the the cni results (cache) dir is empty - // libpod instances with another root dir are not covered by the check above - // this allows several libpod instances to use the same rootless cni ns - contents, err := ioutil.ReadDir(r.getPath("var/lib/cni/results")) - if (err == nil && len(contents) == 0) || os.IsNotExist(err) { - logrus.Debug("Cleaning up rootless cni namespace") - err = netns.UnmountNS(r.ns) - if err != nil { - return err - } - // make the following errors not fatal - err = r.ns.Close() - if err != nil { - logrus.Error(err) - } - b, err := ioutil.ReadFile(r.getPath("rootless-cni-slirp4netns.pid")) - if err == nil { - var i int - i, err = strconv.Atoi(string(b)) - if err == nil { - // kill the slirp process so we do not leak it - err = syscall.Kill(i, syscall.SIGTERM) - } - } - if err != nil { - logrus.Errorf("Failed to kill slirp4netns process: %s", err) - } - err = os.RemoveAll(r.dir) - if err != nil { - logrus.Error(err) - } - } else if err != nil && !os.IsNotExist(err) { - logrus.Errorf("Could not read rootless cni directory, skipping cleanup: %s", err) + logrus.Debug("Cleaning up rootless network namespace") + err = netns.UnmountNS(r.ns) + if err != nil { + return err + } + // make the following errors not fatal + err = r.ns.Close() + if err != nil { + logrus.Error(err) + } + b, err := ioutil.ReadFile(r.getPath(rootlessNetNsSilrp4netnsPidFile)) + if err == nil { + var i int + i, err = strconv.Atoi(string(b)) + if err == nil { + // kill the slirp process so we do not leak it + err = syscall.Kill(i, syscall.SIGTERM) } } + if err != nil { + logrus.Errorf("Failed to kill slirp4netns process: %s", err) + } + err = os.RemoveAll(r.dir) + if err != nil { + logrus.Error(err) + } return nil } -// GetRootlessCNINetNs returns the rootless cni object. If create is set to true -// the rootless cni namespace will be created if it does not exists already. +// GetRootlessNetNs returns the rootless netns object. If create is set to true +// the rootless network namespace will be created if it does not exists already. // If called as root it returns always nil. // On success the returned RootlessCNI lock is locked and must be unlocked by the caller. -func (r *Runtime) GetRootlessCNINetNs(new bool) (*RootlessCNI, error) { +func (r *Runtime) GetRootlessNetNs(new bool) (*RootlessNetNS, error) { if !rootless.IsRootless() { return nil, nil } - var rootlessCNINS *RootlessCNI + var rootlessNetNS *RootlessNetNS runDir, err := util.GetRuntimeDir() if err != nil { return nil, err } - lfile := filepath.Join(runDir, "rootless-cni.lock") + lfile := filepath.Join(runDir, "rootless-netns.lock") lock, err := lockfile.GetLockfile(lfile) if err != nil { - return nil, errors.Wrap(err, "failed to get rootless-cni lockfile") + return nil, errors.Wrap(err, "failed to get rootless-netns lockfile") } lock.Lock() defer func() { - // In case of an error (early exit) rootlessCNINS will be nil. + // In case of an error (early exit) rootlessNetNS will be nil. // Make sure to unlock otherwise we could deadlock. - if rootlessCNINS == nil { + if rootlessNetNS == nil { lock.Unlock() } }() - cniDir := filepath.Join(runDir, "rootless-cni") - err = os.MkdirAll(cniDir, 0700) + rootlessNetNsDir := filepath.Join(runDir, rootlessNetNsName) + err = os.MkdirAll(rootlessNetNsDir, 0700) if err != nil { - return nil, errors.Wrap(err, "could not create rootless-cni directory") + return nil, errors.Wrap(err, "could not create rootless-netns directory") } nsDir, err := netns.GetNSRunDir() if err != nil { return nil, err } - path := filepath.Join(nsDir, rootlessCNINSName) + path := filepath.Join(nsDir, rootlessNetNsName) ns, err := ns.GetNS(path) if err != nil { if !new { // return a error if we could not get the namespace and should no create one - return nil, errors.Wrap(err, "error getting rootless cni network namespace") + return nil, errors.Wrap(err, "error getting rootless network namespace") } // create a new namespace - logrus.Debug("creating rootless cni network namespace") - ns, err = netns.NewNSWithName(rootlessCNINSName) + logrus.Debug("creating rootless network namespace") + ns, err = netns.NewNSWithName(rootlessNetNsName) if err != nil { - return nil, errors.Wrap(err, "error creating rootless cni network namespace") + return nil, errors.Wrap(err, "error creating rootless network namespace") } // setup slirp4netns here path := r.config.Engine.NetworkCmdPath @@ -467,7 +474,7 @@ func (r *Runtime) GetRootlessCNINetNs(new bool) (*RootlessCNI, error) { // Leak one end of the pipe in slirp4netns cmd.ExtraFiles = append(cmd.ExtraFiles, syncW) - logPath := filepath.Join(r.config.Engine.TmpDir, "slirp4netns-rootless-cni.log") + logPath := filepath.Join(r.config.Engine.TmpDir, "slirp4netns-rootless-netns.log") logFile, err := os.Create(logPath) if err != nil { return nil, errors.Wrapf(err, "failed to open slirp4netns log file %s", logPath) @@ -486,9 +493,9 @@ func (r *Runtime) GetRootlessCNINetNs(new bool) (*RootlessCNI, error) { // create pid file for the slirp4netns process // this is need to kill the process in the cleanup pid := strconv.Itoa(cmd.Process.Pid) - err = ioutil.WriteFile(filepath.Join(cniDir, "rootless-cni-slirp4netns.pid"), []byte(pid), 0700) + err = ioutil.WriteFile(filepath.Join(rootlessNetNsDir, rootlessNetNsSilrp4netnsPidFile), []byte(pid), 0700) if err != nil { - errors.Wrap(err, "unable to write rootless-cni slirp4netns pid file") + errors.Wrap(err, "unable to write rootless-netns slirp4netns pid file") } defer func() { @@ -529,43 +536,43 @@ func (r *Runtime) GetRootlessCNINetNs(new bool) (*RootlessCNI, error) { dnsOptions := resolvconf.GetOptions(conf.Content) nameServers := resolvconf.GetNameservers(conf.Content) - _, err = resolvconf.Build(filepath.Join(cniDir, "resolv.conf"), append([]string{resolveIP.String()}, nameServers...), searchDomains, dnsOptions) + _, err = resolvconf.Build(filepath.Join(rootlessNetNsDir, "resolv.conf"), append([]string{resolveIP.String()}, nameServers...), searchDomains, dnsOptions) if err != nil { - return nil, errors.Wrap(err, "failed to create rootless cni resolv.conf") + return nil, errors.Wrap(err, "failed to create rootless netns resolv.conf") } // create cni directories to store files // they will be bind mounted to the correct location in a extra mount ns - err = os.MkdirAll(filepath.Join(cniDir, strings.TrimPrefix(persistentCNIDir, "/")), 0700) + err = os.MkdirAll(filepath.Join(rootlessNetNsDir, persistentCNIDir), 0700) if err != nil { - return nil, errors.Wrap(err, "could not create rootless-cni var directory") + return nil, errors.Wrap(err, "could not create rootless-netns var directory") } - runDir := filepath.Join(cniDir, "run") + runDir := filepath.Join(rootlessNetNsDir, "run") err = os.MkdirAll(runDir, 0700) if err != nil { - return nil, errors.Wrap(err, "could not create rootless-cni run directory") + return nil, errors.Wrap(err, "could not create rootless-netns run directory") } // relabel the new run directory to the iptables /run label // this is important, otherwise the iptables command will fail err = label.Relabel(runDir, "system_u:object_r:iptables_var_run_t:s0", false) if err != nil { - return nil, errors.Wrap(err, "could not create relabel rootless-cni run directory") + return nil, errors.Wrap(err, "could not create relabel rootless-netns run directory") } // create systemd run directory err = os.MkdirAll(filepath.Join(runDir, "systemd"), 0700) if err != nil { - return nil, errors.Wrap(err, "could not create rootless-cni systemd directory") + return nil, errors.Wrap(err, "could not create rootless-netns systemd directory") } // create the directory for the netns files at the same location - // relative to the rootless-cni location - err = os.MkdirAll(filepath.Join(cniDir, nsDir), 0700) + // relative to the rootless-netns location + err = os.MkdirAll(filepath.Join(rootlessNetNsDir, nsDir), 0700) if err != nil { - return nil, errors.Wrap(err, "could not create rootless-cni netns directory") + return nil, errors.Wrap(err, "could not create rootless-netns netns directory") } } - // The CNI plugins need access to iptables in $PATH. As it turns out debian doesn't put - // /usr/sbin in $PATH for rootless users. This will break rootless cni completely. + // The CNI plugins and netavark need access to iptables in $PATH. As it turns out debian doesn't put + // /usr/sbin in $PATH for rootless users. This will break rootless networking completely. // We might break existing users and we cannot expect everyone to change their $PATH so // lets add /usr/sbin to $PATH ourselves. path = os.Getenv("PATH") @@ -574,14 +581,14 @@ func (r *Runtime) GetRootlessCNINetNs(new bool) (*RootlessCNI, error) { os.Setenv("PATH", path) } - // Important set rootlessCNINS as last step. + // Important set rootlessNetNS as last step. // Do not return any errors after this. - rootlessCNINS = &RootlessCNI{ + rootlessNetNS = &RootlessNetNS{ ns: ns, - dir: cniDir, + dir: rootlessNetNsDir, Lock: lock, } - return rootlessCNINS, nil + return rootlessNetNS, nil } // setPrimaryMachineIP is used for podman-machine and it sets @@ -603,14 +610,14 @@ func setPrimaryMachineIP() error { } // setUpNetwork will set up the the networks, on error it will also tear down the cni -// networks. If rootless it will join/create the rootless cni namespace. +// networks. If rootless it will join/create the rootless network namespace. func (r *Runtime) setUpNetwork(ns string, opts types.NetworkOptions) (map[string]types.StatusBlock, error) { if r.config.MachineEnabled() { if err := setPrimaryMachineIP(); err != nil { return nil, err } } - rootlessCNINS, err := r.GetRootlessCNINetNs(true) + rootlessNetNS, err := r.GetRootlessNetNs(true) if err != nil { return nil, err } @@ -619,11 +626,11 @@ func (r *Runtime) setUpNetwork(ns string, opts types.NetworkOptions) (map[string results, err = r.network.Setup(ns, types.SetupOptions{NetworkOptions: opts}) return err } - // rootlessCNINS is nil if we are root - if rootlessCNINS != nil { - // execute the cni setup in the rootless net ns - err = rootlessCNINS.Do(setUpPod) - rootlessCNINS.Lock.Unlock() + // rootlessNetNS is nil if we are root + if rootlessNetNS != nil { + // execute the setup in the rootless net ns + err = rootlessNetNS.Do(setUpPod) + rootlessNetNS.Lock.Unlock() } else { err = setUpPod() } @@ -697,10 +704,10 @@ func (r *Runtime) setupRootlessNetNS(ctr *Container) error { return err } if len(networks) > 0 && len(ctr.config.PortMappings) > 0 { - // set up port forwarder for CNI-in-slirp4netns + // set up port forwarder for rootless netns netnsPath := ctr.state.NetNS.Path() // TODO: support slirp4netns port forwarder as well - // make sure to fix this container.handleRestartPolicy() as well + // make sure to fix this in container.handleRestartPolicy() as well return r.setupRootlessPortMappingViaRLK(ctr, netnsPath) } return nil @@ -719,7 +726,7 @@ func (r *Runtime) setupNetNS(ctr *Container) error { if err != nil { return err } - nsPath = filepath.Join(nsPath, fmt.Sprintf("cni-%x-%x-%x-%x-%x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:])) + nsPath = filepath.Join(nsPath, fmt.Sprintf("netns-%x-%x-%x-%x-%x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:])) if err := os.MkdirAll(filepath.Dir(nsPath), 0711); err != nil { return err @@ -777,10 +784,10 @@ func (r *Runtime) closeNetNS(ctr *Container) error { return nil } -// Tear down a container's CNI network configuration and joins the +// Tear down a container's network configuration and joins the // rootless net ns as rootless user func (r *Runtime) teardownNetwork(ns string, opts types.NetworkOptions) error { - rootlessCNINS, err := r.GetRootlessCNINetNs(false) + rootlessNetNS, err := r.GetRootlessNetNs(false) if err != nil { return err } @@ -789,13 +796,13 @@ func (r *Runtime) teardownNetwork(ns string, opts types.NetworkOptions) error { return errors.Wrapf(err, "error tearing down network namespace configuration for container %s", opts.ContainerID) } - // rootlessCNINS is nil if we are root - if rootlessCNINS != nil { + // rootlessNetNS is nil if we are root + if rootlessNetNS != nil { // execute the cni setup in the rootless net ns - err = rootlessCNINS.Do(tearDownPod) - rootlessCNINS.Lock.Unlock() + err = rootlessNetNS.Do(tearDownPod) + rootlessNetNS.Lock.Unlock() if err == nil { - err = rootlessCNINS.Cleanup(r) + err = rootlessNetNS.Cleanup(r) } } else { err = tearDownPod() @@ -1200,9 +1207,7 @@ func (c *Container) NetworkDisconnect(nameOrID, netName string, force bool) erro ContainerID: c.config.ID, ContainerName: getCNIPodName(c), } - if len(c.config.PortMappings) > 0 { - opts.PortMappings = ocicniPortsToNetTypesPorts(c.config.PortMappings) - } + opts.PortMappings = c.config.PortMappings eth, exists := c.state.NetInterfaceDescriptions.getInterfaceByName(netName) if !exists { return errors.Errorf("no network interface name for container %s on network %s", c.config.ID, netName) @@ -1294,9 +1299,7 @@ func (c *Container) NetworkConnect(nameOrID, netName string, aliases []string) e ContainerID: c.config.ID, ContainerName: getCNIPodName(c), } - if len(c.config.PortMappings) > 0 { - opts.PortMappings = ocicniPortsToNetTypesPorts(c.config.PortMappings) - } + opts.PortMappings = c.config.PortMappings eth, exists := c.state.NetInterfaceDescriptions.getInterfaceByName(netName) if !exists { return errors.Errorf("no network interface name for container %s on network %s", c.config.ID, netName) @@ -1364,16 +1367,67 @@ func (r *Runtime) normalizeNetworkName(nameOrID string) (string, error) { return net.Name, nil } +// ocicniPortsToNetTypesPorts convert the old port format to the new one +// while deduplicating ports into ranges func ocicniPortsToNetTypesPorts(ports []types.OCICNIPortMapping) []types.PortMapping { + if len(ports) == 0 { + return nil + } + newPorts := make([]types.PortMapping, 0, len(ports)) - for _, port := range ports { - newPorts = append(newPorts, types.PortMapping{ - HostIP: port.HostIP, - HostPort: uint16(port.HostPort), - ContainerPort: uint16(port.ContainerPort), - Protocol: port.Protocol, - Range: 1, - }) + + // first sort the ports + sort.Slice(ports, func(i, j int) bool { + return compareOCICNIPorts(ports[i], ports[j]) + }) + + // we already check if the slice is empty so we can use the first element + currentPort := types.PortMapping{ + HostIP: ports[0].HostIP, + HostPort: uint16(ports[0].HostPort), + ContainerPort: uint16(ports[0].ContainerPort), + Protocol: ports[0].Protocol, + Range: 1, + } + + for i := 1; i < len(ports); i++ { + if ports[i].HostIP == currentPort.HostIP && + ports[i].Protocol == currentPort.Protocol && + ports[i].HostPort-int32(currentPort.Range) == int32(currentPort.HostPort) && + ports[i].ContainerPort-int32(currentPort.Range) == int32(currentPort.ContainerPort) { + currentPort.Range = currentPort.Range + 1 + } else { + newPorts = append(newPorts, currentPort) + currentPort = types.PortMapping{ + HostIP: ports[i].HostIP, + HostPort: uint16(ports[i].HostPort), + ContainerPort: uint16(ports[i].ContainerPort), + Protocol: ports[i].Protocol, + Range: 1, + } + } } + newPorts = append(newPorts, currentPort) return newPorts } + +// compareOCICNIPorts will sort the ocicni ports by +// 1) host ip +// 2) protocol +// 3) hostPort +// 4) container port +func compareOCICNIPorts(i, j types.OCICNIPortMapping) bool { + if i.HostIP != j.HostIP { + return i.HostIP < j.HostIP + } + + if i.Protocol != j.Protocol { + return i.Protocol < j.Protocol + } + + if i.HostPort != j.HostPort { + return i.HostPort < j.HostPort + } + + return i.ContainerPort < j.ContainerPort +} diff --git a/libpod/networking_linux_test.go b/libpod/networking_linux_test.go new file mode 100644 index 000000000..06bf05723 --- /dev/null +++ b/libpod/networking_linux_test.go @@ -0,0 +1,323 @@ +package libpod + +import ( + "fmt" + "testing" + + "github.com/containers/podman/v3/libpod/network/types" + "github.com/stretchr/testify/assert" +) + +func Test_ocicniPortsToNetTypesPorts(t *testing.T) { + tests := []struct { + name string + arg []types.OCICNIPortMapping + want []types.PortMapping + }{ + { + name: "no ports", + arg: nil, + want: nil, + }, + { + name: "empty ports", + arg: []types.OCICNIPortMapping{}, + want: nil, + }, + { + name: "single port", + arg: []types.OCICNIPortMapping{ + { + HostPort: 8080, + ContainerPort: 80, + Protocol: "tcp", + }, + }, + want: []types.PortMapping{ + { + HostPort: 8080, + ContainerPort: 80, + Protocol: "tcp", + Range: 1, + }, + }, + }, + { + name: "two separate ports", + arg: []types.OCICNIPortMapping{ + { + HostPort: 8080, + ContainerPort: 80, + Protocol: "tcp", + }, + { + HostPort: 9000, + ContainerPort: 90, + Protocol: "tcp", + }, + }, + want: []types.PortMapping{ + { + HostPort: 8080, + ContainerPort: 80, + Protocol: "tcp", + Range: 1, + }, + { + HostPort: 9000, + ContainerPort: 90, + Protocol: "tcp", + Range: 1, + }, + }, + }, + { + name: "two ports joined", + arg: []types.OCICNIPortMapping{ + { + HostPort: 8080, + ContainerPort: 80, + Protocol: "tcp", + }, + { + HostPort: 8081, + ContainerPort: 81, + Protocol: "tcp", + }, + }, + want: []types.PortMapping{ + { + HostPort: 8080, + ContainerPort: 80, + Protocol: "tcp", + Range: 2, + }, + }, + }, + { + name: "three ports with different container port are not joined", + arg: []types.OCICNIPortMapping{ + { + HostPort: 8080, + ContainerPort: 80, + Protocol: "tcp", + }, + { + HostPort: 8081, + ContainerPort: 79, + Protocol: "tcp", + }, + { + HostPort: 8082, + ContainerPort: 82, + Protocol: "tcp", + }, + }, + want: []types.PortMapping{ + { + HostPort: 8080, + ContainerPort: 80, + Protocol: "tcp", + Range: 1, + }, + { + HostPort: 8081, + ContainerPort: 79, + Protocol: "tcp", + Range: 1, + }, + { + HostPort: 8082, + ContainerPort: 82, + Protocol: "tcp", + Range: 1, + }, + }, + }, + { + name: "three ports joined (not sorted)", + arg: []types.OCICNIPortMapping{ + { + HostPort: 8081, + ContainerPort: 81, + Protocol: "tcp", + }, + { + HostPort: 8080, + ContainerPort: 80, + Protocol: "tcp", + }, + { + HostPort: 8082, + ContainerPort: 82, + Protocol: "tcp", + }, + }, + want: []types.PortMapping{ + { + HostPort: 8080, + ContainerPort: 80, + Protocol: "tcp", + Range: 3, + }, + }, + }, + { + name: "different protocols ports are not joined", + arg: []types.OCICNIPortMapping{ + { + HostPort: 8080, + ContainerPort: 80, + Protocol: "tcp", + }, + { + HostPort: 8081, + ContainerPort: 81, + Protocol: "udp", + }, + }, + want: []types.PortMapping{ + { + HostPort: 8080, + ContainerPort: 80, + Protocol: "tcp", + Range: 1, + }, + { + HostPort: 8081, + ContainerPort: 81, + Protocol: "udp", + Range: 1, + }, + }, + }, + { + name: "different host ip ports are not joined", + arg: []types.OCICNIPortMapping{ + { + HostPort: 8080, + ContainerPort: 80, + Protocol: "tcp", + HostIP: "192.168.1.1", + }, + { + HostPort: 8081, + ContainerPort: 81, + Protocol: "tcp", + HostIP: "192.168.1.2", + }, + }, + want: []types.PortMapping{ + { + HostPort: 8080, + ContainerPort: 80, + Protocol: "tcp", + Range: 1, + HostIP: "192.168.1.1", + }, + { + HostPort: 8081, + ContainerPort: 81, + Protocol: "tcp", + Range: 1, + HostIP: "192.168.1.2", + }, + }, + }, + } + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + result := ocicniPortsToNetTypesPorts(tt.arg) + assert.Equal(t, tt.want, result, "ports do not match") + }) + } +} + +func benchmarkOCICNIPortsToNetTypesPorts(b *testing.B, ports []types.OCICNIPortMapping) { + for n := 0; n < b.N; n++ { + ocicniPortsToNetTypesPorts(ports) + } +} + +func Benchmark_ocicniPortsToNetTypesPortsNoPorts(b *testing.B) { + benchmarkOCICNIPortsToNetTypesPorts(b, nil) +} + +func Benchmark_ocicniPortsToNetTypesPorts1(b *testing.B) { + benchmarkOCICNIPortsToNetTypesPorts(b, []types.OCICNIPortMapping{ + { + HostPort: 8080, + ContainerPort: 80, + Protocol: "tcp", + }, + }) +} + +func Benchmark_ocicniPortsToNetTypesPorts10(b *testing.B) { + ports := make([]types.OCICNIPortMapping, 0, 10) + for i := int32(8080); i < 8090; i++ { + ports = append(ports, types.OCICNIPortMapping{ + HostPort: i, + ContainerPort: i, + Protocol: "tcp", + }) + } + b.ResetTimer() + benchmarkOCICNIPortsToNetTypesPorts(b, ports) +} + +func Benchmark_ocicniPortsToNetTypesPorts100(b *testing.B) { + ports := make([]types.OCICNIPortMapping, 0, 100) + for i := int32(8080); i < 8180; i++ { + ports = append(ports, types.OCICNIPortMapping{ + HostPort: i, + ContainerPort: i, + Protocol: "tcp", + }) + } + b.ResetTimer() + benchmarkOCICNIPortsToNetTypesPorts(b, ports) +} + +func Benchmark_ocicniPortsToNetTypesPorts1k(b *testing.B) { + ports := make([]types.OCICNIPortMapping, 0, 1000) + for i := int32(8080); i < 9080; i++ { + ports = append(ports, types.OCICNIPortMapping{ + HostPort: i, + ContainerPort: i, + Protocol: "tcp", + }) + } + b.ResetTimer() + benchmarkOCICNIPortsToNetTypesPorts(b, ports) +} + +func Benchmark_ocicniPortsToNetTypesPorts10k(b *testing.B) { + ports := make([]types.OCICNIPortMapping, 0, 30000) + for i := int32(8080); i < 18080; i++ { + ports = append(ports, types.OCICNIPortMapping{ + HostPort: i, + ContainerPort: i, + Protocol: "tcp", + }) + } + b.ResetTimer() + benchmarkOCICNIPortsToNetTypesPorts(b, ports) +} + +func Benchmark_ocicniPortsToNetTypesPorts1m(b *testing.B) { + ports := make([]types.OCICNIPortMapping, 0, 1000000) + for j := 0; j < 20; j++ { + for i := int32(1); i <= 50000; i++ { + ports = append(ports, types.OCICNIPortMapping{ + HostPort: i, + ContainerPort: i, + Protocol: "tcp", + HostIP: fmt.Sprintf("192.168.1.%d", j), + }) + } + } + b.ResetTimer() + benchmarkOCICNIPortsToNetTypesPorts(b, ports) +} diff --git a/libpod/networking_slirp4netns.go b/libpod/networking_slirp4netns.go index 46cda89a9..760427f22 100644 --- a/libpod/networking_slirp4netns.go +++ b/libpod/networking_slirp4netns.go @@ -16,6 +16,7 @@ import ( "syscall" "time" + "github.com/containernetworking/plugins/pkg/ns" "github.com/containers/podman/v3/pkg/errorhandling" "github.com/containers/podman/v3/pkg/rootless" "github.com/containers/podman/v3/pkg/rootlessport" @@ -37,9 +38,9 @@ type slirpFeatures struct { type slirp4netnsCmdArg struct { Proto string `json:"proto,omitempty"` HostAddr string `json:"host_addr"` - HostPort int32 `json:"host_port"` + HostPort uint16 `json:"host_port"` GuestAddr string `json:"guest_addr"` - GuestPort int32 `json:"guest_port"` + GuestPort uint16 `json:"guest_port"` } type slirp4netnsCmd struct { @@ -58,6 +59,8 @@ type slirp4netnsNetworkOptions struct { outboundAddr6 string } +const ipv6ConfDefaultAcceptDadSysctl = "/proc/sys/net/ipv6/conf/default/accept_dad" + func checkSlirpFlags(path string) (*slirpFeatures, error) { cmd := exec.Command(path, "--help") out, err := cmd.CombinedOutput() @@ -297,6 +300,39 @@ func (r *Runtime) setupSlirp4netns(ctr *Container) error { } cmd.Stdout = logFile cmd.Stderr = logFile + + var slirpReadyChan (chan struct{}) + + if netOptions.enableIPv6 { + slirpReadyChan = make(chan struct{}) + defer close(slirpReadyChan) + go func() { + err := ns.WithNetNSPath(netnsPath, func(_ ns.NetNS) error { + // Duplicate Address Detection slows the ipv6 setup down for 1-2 seconds. + // Since slirp4netns is run it is own namespace and not directly routed + // we can skip this to make the ipv6 address immediately available. + // We change the default to make sure the slirp tap interface gets the + // correct value assigned so DAD is disabled for it + // Also make sure to change this value back to the original after slirp4netns + // is ready in case users rely on this sysctl. + orgValue, err := ioutil.ReadFile(ipv6ConfDefaultAcceptDadSysctl) + if err != nil { + return err + } + err = ioutil.WriteFile(ipv6ConfDefaultAcceptDadSysctl, []byte("0"), 0644) + if err != nil { + return err + } + // wait for slirp to finish setup + <-slirpReadyChan + return ioutil.WriteFile(ipv6ConfDefaultAcceptDadSysctl, orgValue, 0644) + }) + if err != nil { + logrus.Warnf("failed to set net.ipv6.conf.default.accept_dad sysctl: %v", err) + } + }() + } + if err := cmd.Start(); err != nil { return errors.Wrapf(err, "failed to start slirp4netns process") } @@ -310,6 +346,9 @@ func (r *Runtime) setupSlirp4netns(ctr *Container) error { if err := waitForSync(syncR, cmd, logFile, 1*time.Second); err != nil { return err } + if slirpReadyChan != nil { + slirpReadyChan <- struct{}{} + } // Set a default slirp subnet. Parsing a string with the net helper is easier than building the struct myself _, ctr.slirp4netnsSubnet, _ = net.ParseCIDR(defaultSlirp4netnsSubnet) @@ -484,10 +523,14 @@ func (r *Runtime) setupRootlessPortMappingViaRLK(ctr *Container, netnsPath strin } cfgR := bytes.NewReader(cfgJSON) var stdout bytes.Buffer - cmd := exec.Command(fmt.Sprintf("/proc/%d/exe", os.Getpid())) - cmd.Args = []string{rootlessport.ReexecKey} - // Leak one end of the pipe in rootlessport process, the other will be sent to conmon + path, err := r.config.FindHelperBinary(rootlessport.BinaryName, false) + if err != nil { + return err + } + cmd := exec.Command(path) + cmd.Args = []string{rootlessport.BinaryName} + // Leak one end of the pipe in rootlessport process, the other will be sent to conmon if ctr.rootlessPortSyncR != nil { defer errorhandling.CloseQuiet(ctr.rootlessPortSyncR) } diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go index 1719b2dfa..db906fabb 100644 --- a/libpod/oci_conmon_linux.go +++ b/libpod/oci_conmon_linux.go @@ -1016,7 +1016,7 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co } if ctr.config.CgroupsMode == cgroupSplit { - if err := utils.MoveUnderCgroupSubtree("supervisor"); err != nil { + if err := utils.MoveUnderCgroupSubtree("runtime"); err != nil { return err } } diff --git a/libpod/oci_util.go b/libpod/oci_util.go index c1afc0d20..6d99d5836 100644 --- a/libpod/oci_util.go +++ b/libpod/oci_util.go @@ -32,93 +32,108 @@ func createUnitName(prefix string, name string) string { } // Bind ports to keep them closed on the host -func bindPorts(ports []types.OCICNIPortMapping) ([]*os.File, error) { +func bindPorts(ports []types.PortMapping) ([]*os.File, error) { var files []*os.File - notifySCTP := false - for _, i := range ports { - isV6 := net.ParseIP(i.HostIP).To4() == nil - if i.HostIP == "" { + sctpWarning := true + for _, port := range ports { + isV6 := net.ParseIP(port.HostIP).To4() == nil + if port.HostIP == "" { isV6 = false } - switch i.Protocol { - case "udp": - var ( - addr *net.UDPAddr - err error - ) - if isV6 { - addr, err = net.ResolveUDPAddr("udp6", fmt.Sprintf("[%s]:%d", i.HostIP, i.HostPort)) - } else { - addr, err = net.ResolveUDPAddr("udp4", fmt.Sprintf("%s:%d", i.HostIP, i.HostPort)) - } - if err != nil { - return nil, errors.Wrapf(err, "cannot resolve the UDP address") + protocols := strings.Split(port.Protocol, ",") + for _, protocol := range protocols { + for i := uint16(0); i < port.Range; i++ { + f, err := bindPort(protocol, port.HostIP, port.HostPort+i, isV6, &sctpWarning) + if err != nil { + return files, err + } + if f != nil { + files = append(files, f) + } } + } + } + return files, nil +} - proto := "udp4" - if isV6 { - proto = "udp6" - } - server, err := net.ListenUDP(proto, addr) - if err != nil { - return nil, errors.Wrapf(err, "cannot listen on the UDP port") - } - f, err := server.File() - if err != nil { - return nil, errors.Wrapf(err, "cannot get file for UDP socket") - } - files = append(files, f) - // close the listener - // note that this does not affect the fd, see the godoc for server.File() - err = server.Close() - if err != nil { - logrus.Warnf("Failed to close connection: %v", err) - } +func bindPort(protocol, hostIP string, port uint16, isV6 bool, sctpWarning *bool) (*os.File, error) { + var file *os.File + switch protocol { + case "udp": + var ( + addr *net.UDPAddr + err error + ) + if isV6 { + addr, err = net.ResolveUDPAddr("udp6", fmt.Sprintf("[%s]:%d", hostIP, port)) + } else { + addr, err = net.ResolveUDPAddr("udp4", fmt.Sprintf("%s:%d", hostIP, port)) + } + if err != nil { + return nil, errors.Wrapf(err, "cannot resolve the UDP address") + } - case "tcp": - var ( - addr *net.TCPAddr - err error - ) - if isV6 { - addr, err = net.ResolveTCPAddr("tcp6", fmt.Sprintf("[%s]:%d", i.HostIP, i.HostPort)) - } else { - addr, err = net.ResolveTCPAddr("tcp4", fmt.Sprintf("%s:%d", i.HostIP, i.HostPort)) - } - if err != nil { - return nil, errors.Wrapf(err, "cannot resolve the TCP address") - } + proto := "udp4" + if isV6 { + proto = "udp6" + } + server, err := net.ListenUDP(proto, addr) + if err != nil { + return nil, errors.Wrapf(err, "cannot listen on the UDP port") + } + file, err = server.File() + if err != nil { + return nil, errors.Wrapf(err, "cannot get file for UDP socket") + } + // close the listener + // note that this does not affect the fd, see the godoc for server.File() + err = server.Close() + if err != nil { + logrus.Warnf("Failed to close connection: %v", err) + } - proto := "tcp4" - if isV6 { - proto = "tcp6" - } - server, err := net.ListenTCP(proto, addr) - if err != nil { - return nil, errors.Wrapf(err, "cannot listen on the TCP port") - } - f, err := server.File() - if err != nil { - return nil, errors.Wrapf(err, "cannot get file for TCP socket") - } - files = append(files, f) - // close the listener - // note that this does not affect the fd, see the godoc for server.File() - err = server.Close() - if err != nil { - logrus.Warnf("Failed to close connection: %v", err) - } + case "tcp": + var ( + addr *net.TCPAddr + err error + ) + if isV6 { + addr, err = net.ResolveTCPAddr("tcp6", fmt.Sprintf("[%s]:%d", hostIP, port)) + } else { + addr, err = net.ResolveTCPAddr("tcp4", fmt.Sprintf("%s:%d", hostIP, port)) + } + if err != nil { + return nil, errors.Wrapf(err, "cannot resolve the TCP address") + } - case "sctp": - if !notifySCTP { - notifySCTP = true - logrus.Warnf("Port reservation for SCTP is not supported") - } - default: - return nil, fmt.Errorf("unknown protocol %s", i.Protocol) + proto := "tcp4" + if isV6 { + proto = "tcp6" + } + server, err := net.ListenTCP(proto, addr) + if err != nil { + return nil, errors.Wrapf(err, "cannot listen on the TCP port") + } + file, err = server.File() + if err != nil { + return nil, errors.Wrapf(err, "cannot get file for TCP socket") + } + // close the listener + // note that this does not affect the fd, see the godoc for server.File() + err = server.Close() + if err != nil { + logrus.Warnf("Failed to close connection: %v", err) } + + case "sctp": + if *sctpWarning { + logrus.Info("Port reservation for SCTP is not supported") + *sctpWarning = false + } + default: + return nil, fmt.Errorf("unknown protocol %s", protocol) } - return files, nil + return file, nil } func getOCIRuntimeError(runtimeMsg string) error { diff --git a/libpod/options.go b/libpod/options.go index 553af43fd..250b16556 100644 --- a/libpod/options.go +++ b/libpod/options.go @@ -310,6 +310,17 @@ func WithCDI(devices []string) CtrCreateOption { } } +// WithStorageOpts sets the devices to check for for CDI configuration. +func WithStorageOpts(storageOpts map[string]string) CtrCreateOption { + return func(ctr *Container) error { + if ctr.valid { + return define.ErrCtrFinalized + } + ctr.config.StorageOpts = storageOpts + return nil + } +} + // WithDefaultMountsFile sets the file to look at for default mounts (mainly // secrets). // Note we are not saving this in the database as it is for testing purposes @@ -958,7 +969,7 @@ func WithUserNSFrom(nsCtr *Container) CtrCreateOption { if err := JSONDeepCopy(nsCtr.IDMappings(), &ctr.config.IDMappings); err != nil { return err } - g := generate.Generator{Config: ctr.config.Spec} + g := generate.NewFromSpec(ctr.config.Spec) g.ClearLinuxUIDMappings() for _, uidmap := range nsCtr.config.IDMappings.UIDMap { @@ -1040,7 +1051,7 @@ func WithDependencyCtrs(ctrs []*Container) CtrCreateOption { // namespace with a minimal configuration. // An optional array of port mappings can be provided. // Conflicts with WithNetNSFrom(). -func WithNetNS(portMappings []nettypes.OCICNIPortMapping, exposedPorts map[uint16][]string, postConfigureNetNS bool, netmode string, networks []string) CtrCreateOption { +func WithNetNS(portMappings []nettypes.PortMapping, exposedPorts map[uint16][]string, postConfigureNetNS bool, netmode string, networks []string) CtrCreateOption { return func(ctr *Container) error { if ctr.valid { return define.ErrCtrFinalized @@ -1093,7 +1104,7 @@ func WithNetworkOptions(options map[string][]string) CtrCreateOption { // It cannot be set unless WithNetNS has already been passed. // Further, it cannot be set if additional CNI networks to join have been // specified. -func WithStaticMAC(mac net.HardwareAddr) CtrCreateOption { +func WithStaticMAC(mac nettypes.HardwareAddr) CtrCreateOption { return func(ctr *Container) error { if ctr.valid { return define.ErrCtrFinalized @@ -2073,21 +2084,6 @@ func WithInfraContainer() PodCreateOption { } } -// WithInfraContainerPorts tells the pod to add port bindings to the pause container -func WithInfraContainerPorts(bindings []nettypes.OCICNIPortMapping, infraSpec *specgen.SpecGenerator) []nettypes.PortMapping { - bindingSpec := []nettypes.PortMapping{} - for _, bind := range bindings { - currBind := nettypes.PortMapping{} - currBind.ContainerPort = uint16(bind.ContainerPort) - currBind.HostIP = bind.HostIP - currBind.HostPort = uint16(bind.HostPort) - currBind.Protocol = bind.Protocol - bindingSpec = append(bindingSpec, currBind) - } - infraSpec.PortMappings = bindingSpec - return infraSpec.PortMappings -} - // WithVolatile sets the volatile flag for the container storage. // The option can potentially cause data loss when used on a container that must survive a machine reboot. func WithVolatile() CtrCreateOption { diff --git a/libpod/pod.go b/libpod/pod.go index 068a835f6..0e5ac4906 100644 --- a/libpod/pod.go +++ b/libpod/pod.go @@ -390,7 +390,7 @@ func (p *Pod) InfraContainerID() (string, error) { return p.infraContainerID() } -// infraContainer is the unlocked versio of InfraContainer which returns the infra container +// infraContainer is the unlocked version of InfraContainer which returns the infra container func (p *Pod) infraContainer() (*Container, error) { id, err := p.infraContainerID() if err != nil { diff --git a/libpod/runtime.go b/libpod/runtime.go index 855f3a9f9..b01f8dd13 100644 --- a/libpod/runtime.go +++ b/libpod/runtime.go @@ -543,6 +543,8 @@ func makeRuntime(ctx context.Context, runtime *Runtime) (retErr error) { return err } if became { + // Check if the pause process was created. If it was created, then + // move it to its own systemd scope. utils.MovePauseProcessToScope(pausePid) os.Exit(ret) } diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index 2256ba57c..114bf9315 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -326,15 +326,6 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Contai } } - if ctr.config.Name == "" { - name, err := r.generateName() - if err != nil { - return nil, err - } - - ctr.config.Name = name - } - // Check CGroup parent sanity, and set it if it was not set. // Only if we're actually configuring CGroups. if !ctr.config.NoCgroups { @@ -389,7 +380,7 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Contai if ctr.restoreFromCheckpoint { // Remove information about bind mount // for new container from imported checkpoint - g := generate.Generator{Config: ctr.config.Spec} + g := generate.NewFromSpec(ctr.config.Spec) g.RemoveMount("/dev/shm") ctr.config.ShmDir = "" g.RemoveMount("/etc/resolv.conf") diff --git a/libpod/runtime_pod_linux.go b/libpod/runtime_pod_linux.go index 9c6f1539f..15050ef48 100644 --- a/libpod/runtime_pod_linux.go +++ b/libpod/runtime_pod_linux.go @@ -43,18 +43,6 @@ func (r *Runtime) NewPod(ctx context.Context, p specgen.PodSpecGenerator, option } } - if pod.config.Name == "" { - name, err := r.generateName() - if err != nil { - return nil, err - } - pod.config.Name = name - } - - if p.InfraContainerSpec != nil && p.InfraContainerSpec.Hostname == "" { - p.InfraContainerSpec.Hostname = pod.config.Name - } - // Allocate a lock for the pod lock, err := r.lockManager.AllocateLock() if err != nil { @@ -131,9 +119,33 @@ func (r *Runtime) NewPod(ctx context.Context, p specgen.PodSpecGenerator, option logrus.Infof("Pod has an infra container, but shares no namespaces") } - if err := r.state.AddPod(pod); err != nil { - return nil, errors.Wrapf(err, "error adding pod to state") + // Unless the user has specified a name, use a randomly generated one. + // Note that name conflicts may occur (see #11735), so we need to loop. + generateName := pod.config.Name == "" + var addPodErr error + for { + if generateName { + name, err := r.generateName() + if err != nil { + return nil, err + } + pod.config.Name = name + } + + if p.InfraContainerSpec != nil && p.InfraContainerSpec.Hostname == "" { + p.InfraContainerSpec.Hostname = pod.config.Name + } + if addPodErr = r.state.AddPod(pod); addPodErr == nil { + return pod, nil + } + if !generateName || (errors.Cause(addPodErr) != define.ErrPodExists && errors.Cause(addPodErr) != define.ErrCtrExists) { + break + } } + if addPodErr != nil { + return nil, errors.Wrapf(addPodErr, "error adding pod to state") + } + return pod, nil } @@ -177,10 +189,9 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool, if err != nil { return err } - numCtrs := len(ctrs) - // If the only container in the pod is the pause container, remove the pod and container unconditionally. + // If the only running container in the pod is the pause container, remove the pod and container unconditionally. pauseCtrID := p.state.InfraContainerID if numCtrs == 1 && ctrs[0].ID() == pauseCtrID { removeCtrs = true @@ -264,6 +275,15 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool, } } + // Clear infra container ID before we remove the infra container. + // There is a potential issue if we don't do that, and removal is + // interrupted between RemoveAllContainers() below and the pod's removal + // later - we end up with a reference to a nonexistent infra container. + p.state.InfraContainerID = "" + if err := p.save(); err != nil { + return err + } + // Remove all containers in the pod from the state. if err := r.state.RemovePodContainers(p); err != nil { // If this fails, there isn't much more we can do. diff --git a/libpod/runtime_volume_linux.go b/libpod/runtime_volume_linux.go index b08693529..ed3cc971c 100644 --- a/libpod/runtime_volume_linux.go +++ b/libpod/runtime_volume_linux.go @@ -230,11 +230,7 @@ func (r *Runtime) removeVolume(ctx context.Context, v *Volume, force bool, timeo logrus.Debugf("Removing container %s (depends on volume %q)", ctr.ID(), v.Name()) - // TODO: do we want to set force here when removing - // containers? - // I'm inclined to say no, in case someone accidentally - // wipes a container they're using... - if err := r.removeContainer(ctx, ctr, false, false, false, timeout); err != nil { + if err := r.removeContainer(ctx, ctr, force, false, false, timeout); err != nil { return errors.Wrapf(err, "error removing container %s that depends on volume %s", ctr.ID(), v.Name()) } } diff --git a/libpod/shutdown/handler.go b/libpod/shutdown/handler.go index b0feafa0b..9add05c9c 100644 --- a/libpod/shutdown/handler.go +++ b/libpod/shutdown/handler.go @@ -5,9 +5,10 @@ import ( "os/signal" "sync" "syscall" + "time" "github.com/pkg/errors" - "github.com/sirupsen/logrus" + logrusImport "github.com/sirupsen/logrus" ) var ( @@ -25,6 +26,7 @@ var ( // Ordering that on-shutdown handlers will be invoked. handlerOrder []string shutdownInhibit sync.RWMutex + logrus = logrusImport.WithField("PID", os.Getpid()) ) // Start begins handling SIGTERM and SIGINT and will run the given on-signal @@ -44,25 +46,31 @@ func Start() error { go func() { select { case <-cancelChan: + logrus.Infof("Received shutdown.Stop(), terminating!") signal.Stop(sigChan) close(sigChan) close(cancelChan) stopped = true return case sig := <-sigChan: - logrus.Infof("Received shutdown signal %v, terminating!", sig) + logrus.Infof("Received shutdown signal %q, terminating!", sig.String()) shutdownInhibit.Lock() handlerLock.Lock() + for _, name := range handlerOrder { handler, ok := handlers[name] if !ok { - logrus.Errorf("Shutdown handler %s definition not found!", name) + logrus.Errorf("Shutdown handler %q definition not found!", name) continue } - logrus.Infof("Invoking shutdown handler %s", name) + + logrus.Infof("Invoking shutdown handler %q", name) + start := time.Now() if err := handler(sig); err != nil { - logrus.Errorf("Running shutdown handler %s: %v", name, err) + logrus.Errorf("Running shutdown handler %q: %v", name, err) } + logrus.Debugf("Completed shutdown handler %q, duration %v", name, + time.Since(start).Round(time.Second)) } handlerLock.Unlock() shutdownInhibit.Unlock() diff --git a/libpod/state_test.go b/libpod/state_test.go index 4799d7b8d..5c3b0d7f7 100644 --- a/libpod/state_test.go +++ b/libpod/state_test.go @@ -11,6 +11,7 @@ import ( "github.com/containers/common/pkg/config" "github.com/containers/podman/v3/libpod/define" "github.com/containers/podman/v3/libpod/lock" + "github.com/containers/podman/v3/libpod/network/types" "github.com/containers/storage" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -3705,3 +3706,42 @@ func TestGetContainerConfigNonExistentIDFails(t *testing.T) { assert.Error(t, err) }) } + +// Test that the state will convert the ports to the new format +func TestConvertPortMapping(t *testing.T) { + runForAllStates(t, func(t *testing.T, state State, manager lock.Manager) { + testCtr, err := getTestCtr1(manager) + assert.NoError(t, err) + + ports := testCtr.config.PortMappings + + oldPorts := []types.OCICNIPortMapping{ + { + HostPort: 80, + ContainerPort: 90, + Protocol: "tcp", + HostIP: "192.168.3.3", + }, + { + HostPort: 100, + ContainerPort: 110, + Protocol: "udp", + HostIP: "192.168.4.4", + }, + } + + testCtr.config.OldPortMappings = oldPorts + testCtr.config.PortMappings = nil + + err = state.AddContainer(testCtr) + assert.NoError(t, err) + + retrievedCtr, err := state.Container(testCtr.ID()) + assert.NoError(t, err) + + // set values to expected ones + testCtr.config.PortMappings = ports + + testContainersEqual(t, retrievedCtr, testCtr, true) + }) +} diff --git a/libpod/util.go b/libpod/util.go index 8f8303ff2..5154a261e 100644 --- a/libpod/util.go +++ b/libpod/util.go @@ -295,19 +295,21 @@ func writeHijackHeader(r *http.Request, conn io.Writer) { } // Convert OCICNI port bindings into Inspect-formatted port bindings. -func makeInspectPortBindings(bindings []types.OCICNIPortMapping, expose map[uint16][]string) map[string][]define.InspectHostPort { +func makeInspectPortBindings(bindings []types.PortMapping, expose map[uint16][]string) map[string][]define.InspectHostPort { portBindings := make(map[string][]define.InspectHostPort) for _, port := range bindings { - key := fmt.Sprintf("%d/%s", port.ContainerPort, port.Protocol) - hostPorts := portBindings[key] - if hostPorts == nil { - hostPorts = []define.InspectHostPort{} + protocols := strings.Split(port.Protocol, ",") + for _, protocol := range protocols { + for i := uint16(0); i < port.Range; i++ { + key := fmt.Sprintf("%d/%s", port.ContainerPort+i, protocol) + hostPorts := portBindings[key] + hostPorts = append(hostPorts, define.InspectHostPort{ + HostIP: port.HostIP, + HostPort: fmt.Sprintf("%d", port.HostPort+i), + }) + portBindings[key] = hostPorts + } } - hostPorts = append(hostPorts, define.InspectHostPort{ - HostIP: port.HostIP, - HostPort: fmt.Sprintf("%d", port.HostPort), - }) - portBindings[key] = hostPorts } // add exposed ports without host port information to match docker for port, protocols := range expose { |