diff options
-rw-r--r-- | .cirrus.yml | 38 | ||||
-rw-r--r-- | Makefile | 10 | ||||
-rw-r--r-- | cmd/podman/machine/list.go | 36 | ||||
-rwxr-xr-x | contrib/cirrus/cirrus_yaml_test.py | 2 | ||||
-rwxr-xr-x | contrib/cirrus/runner.sh | 7 | ||||
-rwxr-xr-x | contrib/cirrus/setup_environment.sh | 34 | ||||
-rw-r--r-- | pkg/domain/entities/machine.go | 18 | ||||
-rw-r--r-- | pkg/machine/e2e/list_test.go | 8 | ||||
-rw-r--r-- | pkg/machine/qemu/machine.go | 39 |
9 files changed, 148 insertions, 44 deletions
diff --git a/.cirrus.yml b/.cirrus.yml index 06f4a565c..81bbe7c8f 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -30,8 +30,10 @@ env: PRIOR_FEDORA_NAME: "fedora-35" UBUNTU_NAME: "ubuntu-2110" - # Google-cloud VM Images + # Image identifiers IMAGE_SUFFIX: "c6211193021923328" + FEDORA_AMI_ID: "ami-06a41d8a81ab56afa" + # Complete image names FEDORA_CACHE_IMAGE_NAME: "fedora-${IMAGE_SUFFIX}" PRIOR_FEDORA_CACHE_IMAGE_NAME: "prior-fedora-${IMAGE_SUFFIX}" UBUNTU_CACHE_IMAGE_NAME: "ubuntu-${IMAGE_SUFFIX}" @@ -66,6 +68,8 @@ timeout_in: 60m gcp_credentials: ENCRYPTED[a28959877b2c9c36f151781b0a05407218cda646c7d047fc556e42f55e097e897ab63ee78369dae141dcf0b46a9d0cdd] +aws_credentials: ENCRYPTED[4ca070bffe28eb9b27d63c568b52970dd46f119c3a83b8e443241e895dbf1737580b4d84eed27a311a2b74287ef9f79f] + # Attempt to prevent flakes by confirming all required external/3rd-party # services are available and functional. @@ -577,6 +581,35 @@ rootless_integration_test_task: always: *int_logs_artifacts +podman_machine_task: + name: *std_name_fmt + alias: podman_machine + only_if: *not_tag_branch_build_docs + # Manually-triggered task: This is "expensive" to run. + # DO NOT ADD THIS TASK AS DEPENDENCY FOR `success_task` + # it will cause 'success' to block. + trigger_type: manual + depends_on: + - build + - local_integration_test + - remote_integration_test + - container_integration_test + - rootless_integration_test + ec2_instance: + image: "${VM_IMAGE_NAME}" + type: m5zn.metal # Bare-metal instance is required + region: us-east-1 + env: + TEST_FLAVOR: "machine" + PRIV_NAME: "rootless" # intended use-case + DISTRO_NV: "${FEDORA_NAME}" + VM_IMAGE_NAME: "${FEDORA_AMI_ID}" + clone_script: *get_gosrc + setup_script: *setup + main_script: *main + always: *int_logs_artifacts + + # Always run subsequent to integration tests. While parallelism is lost # with runtime, debugging system-test failures can be more challenging # for some golang developers. Otherwise the following tasks run across @@ -842,6 +875,9 @@ success_task: - remote_integration_test - container_integration_test - rootless_integration_test + # Manually triggered task. If made automatic, remove bypass + # in contrib/cirrus/cirrus_yaml_test.py for this task. + # - podman_machine - local_system_test - remote_system_test - rootless_system_test @@ -112,6 +112,10 @@ LIBSECCOMP_COMMIT := v2.3.3 # Rarely if ever should integration tests take more than 50min, # caller may override in special circumstances if needed. GINKGOTIMEOUT ?= -timeout=90m +# By default, run test/e2e +GINKGOWHAT ?= test/e2e/. +# By default, run tests in parallel across 3 nodes. +GINKGONODES ?= 3 # Conditional required to produce empty-output if binary not built yet. RELEASE_VERSION = $(shell if test -x test/version/version; then test/version/version; fi) @@ -524,7 +528,7 @@ test: localunit localintegration remoteintegration localsystem remotesystem ## .PHONY: ginkgo-run ginkgo-run: ACK_GINKGO_RC=true ginkgo version - ACK_GINKGO_RC=true ginkgo -v $(TESTFLAGS) -tags "$(TAGS)" $(GINKGOTIMEOUT) -cover -flakeAttempts 3 -progress -trace -noColor -nodes 3 -debug test/e2e/. $(HACK) + ACK_GINKGO_RC=true ginkgo -v $(TESTFLAGS) -tags "$(TAGS)" $(GINKGOTIMEOUT) -cover -flakeAttempts 3 -progress -trace -noColor -nodes $(GINKGONODES) -debug $(GINKGOWHAT) $(HACK) .PHONY: ginkgo ginkgo: @@ -540,6 +544,10 @@ localintegration: test-binaries ginkgo .PHONY: remoteintegration remoteintegration: test-binaries ginkgo-remote +.PHONY: localmachine +localmachine: test-binaries + $(MAKE) ginkgo-run GINKGONODES=1 GINKGOWHAT=pkg/machine/e2e/. HACK= + .PHONY: localbenchmarks localbenchmarks: test-binaries PATH=$(PATH):$(shell pwd)/hack ACK_GINKGO_RC=true ginkgo \ diff --git a/cmd/podman/machine/list.go b/cmd/podman/machine/list.go index b1e31566f..dd4a86697 100644 --- a/cmd/podman/machine/list.go +++ b/cmd/podman/machine/list.go @@ -16,6 +16,7 @@ import ( "github.com/containers/podman/v4/cmd/podman/common" "github.com/containers/podman/v4/cmd/podman/registry" "github.com/containers/podman/v4/cmd/podman/validate" + "github.com/containers/podman/v4/pkg/domain/entities" "github.com/containers/podman/v4/pkg/machine" "github.com/docker/go-units" "github.com/spf13/cobra" @@ -44,23 +45,6 @@ type listFlagType struct { quiet bool } -type ListReporter struct { - Name string - Default bool - Created string - Running bool - Starting bool - LastUp string - Stream string - VMType string - CPUs uint64 - Memory string - DiskSize string - Port int - RemoteUsername string - IdentityPath string -} - func init() { registry.Commands = append(registry.Commands, registry.CliCommand{ Command: lsCmd, @@ -70,7 +54,7 @@ func init() { flags := lsCmd.Flags() formatFlagName := "format" flags.StringVar(&listFlag.format, formatFlagName, "{{.Name}}\t{{.VMType}}\t{{.Created}}\t{{.LastUp}}\t{{.CPUs}}\t{{.Memory}}\t{{.DiskSize}}\n", "Format volume output using JSON or a Go template") - _ = lsCmd.RegisterFlagCompletionFunc(formatFlagName, common.AutocompleteFormat(&ListReporter{})) + _ = lsCmd.RegisterFlagCompletionFunc(formatFlagName, common.AutocompleteFormat(&entities.ListReporter{})) flags.BoolVar(&listFlag.noHeading, "noheading", false, "Do not print headers") flags.BoolVarP(&listFlag.quiet, "quiet", "q", false, "Show only machine names") } @@ -123,8 +107,8 @@ func list(cmd *cobra.Command, args []string) error { return outputTemplate(cmd, machineReporter) } -func outputTemplate(cmd *cobra.Command, responses []*ListReporter) error { - headers := report.Headers(ListReporter{}, map[string]string{ +func outputTemplate(cmd *cobra.Command, responses []*entities.ListReporter) error { + headers := report.Headers(entities.ListReporter{}, map[string]string{ "LastUp": "LAST UP", "VmType": "VM TYPE", "CPUs": "CPUS", @@ -183,15 +167,15 @@ func streamName(imageStream string) string { return imageStream } -func toMachineFormat(vms []*machine.ListResponse) ([]*ListReporter, error) { +func toMachineFormat(vms []*machine.ListResponse) ([]*entities.ListReporter, error) { cfg, err := config.ReadCustomConfig() if err != nil { return nil, err } - machineResponses := make([]*ListReporter, 0, len(vms)) + machineResponses := make([]*entities.ListReporter, 0, len(vms)) for _, vm := range vms { - response := new(ListReporter) + response := new(entities.ListReporter) response.Default = vm.Name == cfg.Engine.ActiveService response.Name = vm.Name response.Running = vm.Running @@ -211,15 +195,15 @@ func toMachineFormat(vms []*machine.ListResponse) ([]*ListReporter, error) { return machineResponses, nil } -func toHumanFormat(vms []*machine.ListResponse) ([]*ListReporter, error) { +func toHumanFormat(vms []*machine.ListResponse) ([]*entities.ListReporter, error) { cfg, err := config.ReadCustomConfig() if err != nil { return nil, err } - humanResponses := make([]*ListReporter, 0, len(vms)) + humanResponses := make([]*entities.ListReporter, 0, len(vms)) for _, vm := range vms { - response := new(ListReporter) + response := new(entities.ListReporter) if vm.Name == cfg.Engine.ActiveService { response.Name = vm.Name + "*" response.Default = true diff --git a/contrib/cirrus/cirrus_yaml_test.py b/contrib/cirrus/cirrus_yaml_test.py index a7fff8d3f..3968b8b1b 100755 --- a/contrib/cirrus/cirrus_yaml_test.py +++ b/contrib/cirrus/cirrus_yaml_test.py @@ -26,7 +26,7 @@ class TestCaseBase(unittest.TestCase): class TestDependsOn(TestCaseBase): ALL_TASK_NAMES = None - SUCCESS_DEPS_EXCLUDE = set(['success', 'artifacts', + SUCCESS_DEPS_EXCLUDE = set(['success', 'artifacts', 'podman_machine', 'test_image_build', 'release', 'release_test']) def setUp(self): diff --git a/contrib/cirrus/runner.sh b/contrib/cirrus/runner.sh index d49286ad3..762a3b501 100755 --- a/contrib/cirrus/runner.sh +++ b/contrib/cirrus/runner.sh @@ -379,6 +379,13 @@ dotest() { |& logformatter } +_run_machine() { + # TODO: This is a manually-triggered task, if that ever changes need to + # add something like: + # _bail_if_test_can_be_skipped docs test/e2e test/system test/python + make localmachine |& logformatter +} + # Optimization: will exit if the only PR diffs are under docs/ or tests/ # with the exception of any given arguments. E.g., don't run e2e or upgrade # or bud tests if the only PR changes are in test/system. diff --git a/contrib/cirrus/setup_environment.sh b/contrib/cirrus/setup_environment.sh index 9bd35bd06..4952f8dd2 100755 --- a/contrib/cirrus/setup_environment.sh +++ b/contrib/cirrus/setup_environment.sh @@ -99,11 +99,28 @@ esac if ((CONTAINER==0)); then # Not yet running inside a container # Discovered reemergence of BFQ scheduler bug in kernel 5.8.12-200 # which causes a kernel panic when system is under heavy I/O load. - # Previously discovered in F32beta and confirmed fixed. It's been - # observed in F31 kernels as well. Deploy workaround for all VMs - # to ensure a more stable I/O scheduler (elevator). - echo "mq-deadline" > /sys/block/sda/queue/scheduler - warn "I/O scheduler: $(cat /sys/block/sda/queue/scheduler)" + # Disable the I/O scheduler (a.k.a. elevator) for all environments, + # leaving optimization up to underlying storage infrastructure. + testfs="/" # mountpoint that experiences the most I/O during testing + msg "Querying block device owning partition hosting the '$testfs' filesystem" + # Need --nofsroot b/c btrfs appends subvolume label to `source` name + testdev=$(findmnt --canonicalize --noheadings --nofsroot \ + --output source --mountpoint $testfs) + msg " found partition: '$testdev'" + testdisk=$(lsblk --noheadings --output pkname --paths $testdev) + msg " found block dev: '$testdisk'" + testsched="/sys/block/$(basename $testdisk)/queue/scheduler" + if [[ -n "$testdev" ]] && [[ -n "$testdisk" ]] && [[ -e "$testsched" ]]; then + msg " Found active I/O scheduler: $(cat $testsched)" + if [[ ! "$(<$testsched)" =~ \[none\] ]]; then + msg " Disabling elevator for '$testsched'" + echo "none" > "$testsched" + else + msg " Elevator already disabled" + fi + else + warn "Sys node for elevator doesn't exist: '$testsched'" + fi fi # Which distribution are we testing on. @@ -296,6 +313,13 @@ case "$TEST_FLAVOR" in install_test_configs ;; + machine) + rpm -ivh $PACKAGE_DOWNLOAD_DIR/podman-gvproxy* + remove_packaged_podman_files + make install.tools + make install PREFIX=/usr ETCDIR=/etc + install_test_configs + ;; gitlab) # This only runs on Ubuntu for now if [[ "$OS_RELEASE_ID" != "ubuntu" ]]; then diff --git a/pkg/domain/entities/machine.go b/pkg/domain/entities/machine.go new file mode 100644 index 000000000..6ba53dbd1 --- /dev/null +++ b/pkg/domain/entities/machine.go @@ -0,0 +1,18 @@ +package entities + +type ListReporter struct { + Name string + Default bool + Created string + Running bool + Starting bool + LastUp string + Stream string + VMType string + CPUs uint64 + Memory string + DiskSize string + Port int + RemoteUsername string + IdentityPath string +} diff --git a/pkg/machine/e2e/list_test.go b/pkg/machine/e2e/list_test.go index e2121e7bf..9e3e9956c 100644 --- a/pkg/machine/e2e/list_test.go +++ b/pkg/machine/e2e/list_test.go @@ -2,9 +2,10 @@ package e2e import ( "strings" + "time" "github.com/containers/common/pkg/util" - "github.com/containers/podman/v4/cmd/podman/machine" + "github.com/containers/podman/v4/pkg/domain/entities" jsoniter "github.com/json-iterator/go" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" @@ -87,7 +88,7 @@ var _ = Describe("podman machine list", func() { startSession, err := mb.setCmd(s).runWithoutWait() Expect(err).To(BeNil()) l := new(listMachine) - for { // needs to be infinite because we need to check if running when inspect returns to avoid race conditions. + for i := 0; i < 30; i++ { listSession, err := mb.setCmd(l).run() Expect(listSession).To(Exit(0)) Expect(err).To(BeNil()) @@ -96,6 +97,7 @@ var _ = Describe("podman machine list", func() { } else { break } + time.Sleep(3 * time.Second) } Expect(startSession).To(Exit(0)) listSession, err := mb.setCmd(l).run() @@ -132,7 +134,7 @@ var _ = Describe("podman machine list", func() { Expect(err).To(BeNil()) Expect(listSession2).To(Exit(0)) - var listResponse []*machine.ListReporter + var listResponse []*entities.ListReporter err = jsoniter.Unmarshal(listSession.Bytes(), &listResponse) Expect(err).To(BeNil()) diff --git a/pkg/machine/qemu/machine.go b/pkg/machine/qemu/machine.go index 322aa3a15..6134e69e1 100644 --- a/pkg/machine/qemu/machine.go +++ b/pkg/machine/qemu/machine.go @@ -5,6 +5,7 @@ package qemu import ( "bufio" + "bytes" "context" "encoding/base64" "encoding/json" @@ -138,8 +139,10 @@ func (p *Provider) NewMachine(opts machine.InitOptions) (machine.VM, error) { cmd = append(cmd, []string{ "-device", "virtio-serial", // qemu needs to establish the long name; other connections can use the symlink'd - "-chardev", "socket,path=" + vm.ReadySocket.Path + ",server=on,wait=off,id=" + vm.Name + "_ready", - "-device", "virtserialport,chardev=" + vm.Name + "_ready" + ",name=org.fedoraproject.port.0", + // Note both id and chardev start with an extra "a" because qemu requires that it + // starts with an letter but users can also use numbers + "-chardev", "socket,path=" + vm.ReadySocket.Path + ",server=on,wait=off,id=a" + vm.Name + "_ready", + "-device", "virtserialport,chardev=a" + vm.Name + "_ready" + ",name=org.fedoraproject.port.0", "-pidfile", vm.VMPidFilePath.GetPath()}...) vm.CmdLine = cmd return vm, nil @@ -571,15 +574,25 @@ func (v *MachineVM) Start(name string, _ machine.StartOptions) error { files := []*os.File{dnr, dnw, dnw, fd} attr.Files = files logrus.Debug(v.CmdLine) - cmd := v.CmdLine + cmdLine := v.CmdLine // Disable graphic window when not in debug mode // Done in start, so we're not suck with the debug level we used on init if !logrus.IsLevelEnabled(logrus.DebugLevel) { - cmd = append(cmd, "-display", "none") + cmdLine = append(cmdLine, "-display", "none") } - _, err = os.StartProcess(v.CmdLine[0], cmd, attr) + stderrBuf := &bytes.Buffer{} + + cmd := &exec.Cmd{ + Args: cmdLine, + Path: cmdLine[0], + Stdin: dnr, + Stdout: dnw, + Stderr: stderrBuf, + ExtraFiles: []*os.File{fd}, + } + err = cmd.Start() if err != nil { // check if qemu was not found if !errors.Is(err, os.ErrNotExist) { @@ -590,15 +603,17 @@ func (v *MachineVM) Start(name string, _ machine.StartOptions) error { if err != nil { return err } - cmd[0], err = cfg.FindHelperBinary(QemuCommand, true) + cmdLine[0], err = cfg.FindHelperBinary(QemuCommand, true) if err != nil { return err } - _, err = os.StartProcess(cmd[0], cmd, attr) + cmd.Path = cmdLine[0] + err = cmd.Start() if err != nil { return fmt.Errorf("unable to execute %q: %w", cmd, err) } } + defer cmd.Process.Release() //nolint:errcheck fmt.Println("Waiting for VM ...") socketPath, err := getRuntimeDir() if err != nil { @@ -613,6 +628,16 @@ func (v *MachineVM) Start(name string, _ machine.StartOptions) error { if err == nil { break } + // check if qemu is still alive + var status syscall.WaitStatus + pid, err := syscall.Wait4(cmd.Process.Pid, &status, syscall.WNOHANG, nil) + if err != nil { + return fmt.Errorf("failed to read qemu process status: %w", err) + } + if pid > 0 { + // child exited + return fmt.Errorf("qemu exited unexpectedly with exit code %d, stderr: %s", status.ExitStatus(), stderrBuf.String()) + } time.Sleep(wait) wait++ } |