9 files changed, 148 insertions, 44 deletions
diff --git a/.cirrus.yml b/.cirrus.yml
index 06f4a565c..81bbe7c8f 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -30,8 +30,10 @@ env:
     PRIOR_FEDORA_NAME: "fedora-35"
     UBUNTU_NAME: "ubuntu-2110"
 
-    # Google-cloud VM Images
+    # Image identifiers
     IMAGE_SUFFIX: "c6211193021923328"
+    FEDORA_AMI_ID: "ami-06a41d8a81ab56afa"
+    # Complete image names
     FEDORA_CACHE_IMAGE_NAME: "fedora-${IMAGE_SUFFIX}"
     PRIOR_FEDORA_CACHE_IMAGE_NAME: "prior-fedora-${IMAGE_SUFFIX}"
     UBUNTU_CACHE_IMAGE_NAME: "ubuntu-${IMAGE_SUFFIX}"
@@ -66,6 +68,8 @@ timeout_in: 60m
 
 gcp_credentials: ENCRYPTED[a28959877b2c9c36f151781b0a05407218cda646c7d047fc556e42f55e097e897ab63ee78369dae141dcf0b46a9d0cdd]
 
+aws_credentials: ENCRYPTED[4ca070bffe28eb9b27d63c568b52970dd46f119c3a83b8e443241e895dbf1737580b4d84eed27a311a2b74287ef9f79f]
+
 
 # Attempt to prevent flakes by confirming all required external/3rd-party
 # services are available and functional.
@@ -577,6 +581,35 @@ rootless_integration_test_task:
     always: *int_logs_artifacts
 
 
+podman_machine_task:
+    name: *std_name_fmt
+    alias: podman_machine
+    only_if: *not_tag_branch_build_docs
+    # Manually-triggered task: This is "expensive" to run.
+    # DO NOT ADD THIS TASK AS DEPENDENCY FOR `success_task`
+    # it will cause 'success' to block.
+    trigger_type: manual
+    depends_on:
+        - build
+        - local_integration_test
+        - remote_integration_test
+        - container_integration_test
+        - rootless_integration_test
+    ec2_instance:
+        image: "${VM_IMAGE_NAME}"
+        type: m5zn.metal  # Bare-metal instance is required
+        region: us-east-1
+    env:
+      TEST_FLAVOR: "machine"
+      PRIV_NAME: "rootless"  # intended use-case
+      DISTRO_NV: "${FEDORA_NAME}"
+      VM_IMAGE_NAME: "${FEDORA_AMI_ID}"
+    clone_script: *get_gosrc
+    setup_script: *setup
+    main_script: *main
+    always: *int_logs_artifacts
+
+
 # Always run subsequent to integration tests.  While parallelism is lost
 # with runtime, debugging system-test failures can be more challenging
 # for some golang developers.  Otherwise the following tasks run across
@@ -842,6 +875,9 @@ success_task:
         - remote_integration_test
         - container_integration_test
         - rootless_integration_test
+        # Manually triggered task. If made automatic, remove bypass
+        # in contrib/cirrus/cirrus_yaml_test.py for this task.
+        # - podman_machine
         - local_system_test
         - remote_system_test
         - rootless_system_test
diff --git a/Makefile b/Makefile
index 781e491fe..32de58856 100644
--- a/Makefile
+++ b/Makefile
@@ -112,6 +112,10 @@ LIBSECCOMP_COMMIT := v2.3.3
 # Rarely if ever should integration tests take more than 50min,
 # caller may override in special circumstances if needed.
 GINKGOTIMEOUT ?= -timeout=90m
+# By default, run test/e2e
+GINKGOWHAT ?= test/e2e/.
+# By default, run tests in parallel across 3 nodes.
+GINKGONODES ?= 3
 
 # Conditional required to produce empty-output if binary not built yet.
 RELEASE_VERSION = $(shell if test -x test/version/version; then test/version/version; fi)
@@ -524,7 +528,7 @@ test: localunit localintegration remoteintegration localsystem remotesystem  ##
 .PHONY: ginkgo-run
 ginkgo-run:
 	ACK_GINKGO_RC=true ginkgo version
-	ACK_GINKGO_RC=true ginkgo -v $(TESTFLAGS) -tags "$(TAGS)" $(GINKGOTIMEOUT) -cover -flakeAttempts 3 -progress -trace -noColor -nodes 3 -debug test/e2e/. $(HACK)
+	ACK_GINKGO_RC=true ginkgo -v $(TESTFLAGS) -tags "$(TAGS)" $(GINKGOTIMEOUT) -cover -flakeAttempts 3 -progress -trace -noColor -nodes $(GINKGONODES) -debug $(GINKGOWHAT) $(HACK)
 
 .PHONY: ginkgo
 ginkgo:
@@ -540,6 +544,10 @@ localintegration: test-binaries ginkgo
 .PHONY: remoteintegration
 remoteintegration: test-binaries ginkgo-remote
 
+.PHONY: localmachine
+localmachine: test-binaries
+	$(MAKE) ginkgo-run GINKGONODES=1 GINKGOWHAT=pkg/machine/e2e/. HACK=
+
 .PHONY: localbenchmarks
 localbenchmarks: test-binaries
 	PATH=$(PATH):$(shell pwd)/hack ACK_GINKGO_RC=true ginkgo \
diff --git a/cmd/podman/machine/list.go b/cmd/podman/machine/list.go
index b1e31566f..dd4a86697 100644
--- a/cmd/podman/machine/list.go
+++ b/cmd/podman/machine/list.go
@@ -16,6 +16,7 @@ import (
 	"github.com/containers/podman/v4/cmd/podman/common"
 	"github.com/containers/podman/v4/cmd/podman/registry"
 	"github.com/containers/podman/v4/cmd/podman/validate"
+	"github.com/containers/podman/v4/pkg/domain/entities"
 	"github.com/containers/podman/v4/pkg/machine"
 	"github.com/docker/go-units"
 	"github.com/spf13/cobra"
@@ -44,23 +45,6 @@ type listFlagType struct {
 	quiet     bool
 }
 
-type ListReporter struct {
-	Name           string
-	Default        bool
-	Created        string
-	Running        bool
-	Starting       bool
-	LastUp         string
-	Stream         string
-	VMType         string
-	CPUs           uint64
-	Memory         string
-	DiskSize       string
-	Port           int
-	RemoteUsername string
-	IdentityPath   string
-}
-
 func init() {
 	registry.Commands = append(registry.Commands, registry.CliCommand{
 		Command: lsCmd,
@@ -70,7 +54,7 @@ func init() {
 	flags := lsCmd.Flags()
 	formatFlagName := "format"
 	flags.StringVar(&listFlag.format, formatFlagName, "{{.Name}}\t{{.VMType}}\t{{.Created}}\t{{.LastUp}}\t{{.CPUs}}\t{{.Memory}}\t{{.DiskSize}}\n", "Format volume output using JSON or a Go template")
-	_ = lsCmd.RegisterFlagCompletionFunc(formatFlagName, common.AutocompleteFormat(&ListReporter{}))
+	_ = lsCmd.RegisterFlagCompletionFunc(formatFlagName, common.AutocompleteFormat(&entities.ListReporter{}))
 	flags.BoolVar(&listFlag.noHeading, "noheading", false, "Do not print headers")
 	flags.BoolVarP(&listFlag.quiet, "quiet", "q", false, "Show only machine names")
 }
@@ -123,8 +107,8 @@ func list(cmd *cobra.Command, args []string) error {
 	return outputTemplate(cmd, machineReporter)
 }
 
-func outputTemplate(cmd *cobra.Command, responses []*ListReporter) error {
-	headers := report.Headers(ListReporter{}, map[string]string{
+func outputTemplate(cmd *cobra.Command, responses []*entities.ListReporter) error {
+	headers := report.Headers(entities.ListReporter{}, map[string]string{
 		"LastUp":   "LAST UP",
 		"VmType":   "VM TYPE",
 		"CPUs":     "CPUS",
@@ -183,15 +167,15 @@ func streamName(imageStream string) string {
 	return imageStream
 }
 
-func toMachineFormat(vms []*machine.ListResponse) ([]*ListReporter, error) {
+func toMachineFormat(vms []*machine.ListResponse) ([]*entities.ListReporter, error) {
 	cfg, err := config.ReadCustomConfig()
 	if err != nil {
 		return nil, err
 	}
 
-	machineResponses := make([]*ListReporter, 0, len(vms))
+	machineResponses := make([]*entities.ListReporter, 0, len(vms))
 	for _, vm := range vms {
-		response := new(ListReporter)
+		response := new(entities.ListReporter)
 		response.Default = vm.Name == cfg.Engine.ActiveService
 		response.Name = vm.Name
 		response.Running = vm.Running
@@ -211,15 +195,15 @@ func toMachineFormat(vms []*machine.ListResponse) ([]*ListReporter, error) {
 	return machineResponses, nil
 }
 
-func toHumanFormat(vms []*machine.ListResponse) ([]*ListReporter, error) {
+func toHumanFormat(vms []*machine.ListResponse) ([]*entities.ListReporter, error) {
 	cfg, err := config.ReadCustomConfig()
 	if err != nil {
 		return nil, err
 	}
 
-	humanResponses := make([]*ListReporter, 0, len(vms))
+	humanResponses := make([]*entities.ListReporter, 0, len(vms))
 	for _, vm := range vms {
-		response := new(ListReporter)
+		response := new(entities.ListReporter)
 		if vm.Name == cfg.Engine.ActiveService {
 			response.Name = vm.Name + "*"
 			response.Default = true
diff --git a/contrib/cirrus/cirrus_yaml_test.py b/contrib/cirrus/cirrus_yaml_test.py
index a7fff8d3f..3968b8b1b 100755
--- a/contrib/cirrus/cirrus_yaml_test.py
+++ b/contrib/cirrus/cirrus_yaml_test.py
@@ -26,7 +26,7 @@ class TestCaseBase(unittest.TestCase):
 class TestDependsOn(TestCaseBase):
 
     ALL_TASK_NAMES = None
-    SUCCESS_DEPS_EXCLUDE = set(['success', 'artifacts',
+    SUCCESS_DEPS_EXCLUDE = set(['success', 'artifacts', 'podman_machine',
         'test_image_build', 'release', 'release_test'])
 
     def setUp(self):
diff --git a/contrib/cirrus/runner.sh b/contrib/cirrus/runner.sh
index d49286ad3..762a3b501 100755
--- a/contrib/cirrus/runner.sh
+++ b/contrib/cirrus/runner.sh
@@ -379,6 +379,13 @@ dotest() {
         |& logformatter
 }
 
+_run_machine() {
+    # TODO: This is a manually-triggered task, if that ever changes need to
+    # add something like:
+    # _bail_if_test_can_be_skipped docs test/e2e test/system test/python
+    make localmachine |& logformatter
+}
+
 # Optimization: will exit if the only PR diffs are under docs/ or tests/
 # with the exception of any given arguments. E.g., don't run e2e or upgrade
 # or bud tests if the only PR changes are in test/system.
diff --git a/contrib/cirrus/setup_environment.sh b/contrib/cirrus/setup_environment.sh
index 9bd35bd06..4952f8dd2 100755
--- a/contrib/cirrus/setup_environment.sh
+++ b/contrib/cirrus/setup_environment.sh
@@ -99,11 +99,28 @@ esac
 if ((CONTAINER==0)); then  # Not yet running inside a container
     # Discovered reemergence of BFQ scheduler bug in kernel 5.8.12-200
     # which causes a kernel panic when system is under heavy I/O load.
-    # Previously discovered in F32beta and confirmed fixed. It's been
-    # observed in F31 kernels as well.  Deploy workaround for all VMs
-    # to ensure a more stable I/O scheduler (elevator).
-    echo "mq-deadline" > /sys/block/sda/queue/scheduler
-    warn "I/O scheduler: $(cat /sys/block/sda/queue/scheduler)"
+    # Disable the I/O scheduler (a.k.a. elevator) for all environments,
+    # leaving optimization up to underlying storage infrastructure.
+    testfs="/"  # mountpoint that experiences the most I/O during testing
+    msg "Querying block device owning partition hosting the '$testfs' filesystem"
+    # Need --nofsroot b/c btrfs appends subvolume label to `source` name
+    testdev=$(findmnt --canonicalize --noheadings --nofsroot \
+              --output source --mountpoint $testfs)
+    msg "    found partition: '$testdev'"
+    testdisk=$(lsblk --noheadings --output pkname --paths $testdev)
+    msg "    found block dev: '$testdisk'"
+    testsched="/sys/block/$(basename $testdisk)/queue/scheduler"
+    if [[ -n "$testdev" ]] && [[ -n "$testdisk" ]] && [[ -e "$testsched" ]]; then
+        msg "    Found active I/O scheduler: $(cat $testsched)"
+        if [[ ! "$(<$testsched)" =~ \[none\]  ]]; then
+            msg "    Disabling elevator for '$testsched'"
+            echo "none" > "$testsched"
+        else
+            msg "    Elevator already disabled"
+        fi
+    else
+        warn "Sys node for elevator doesn't exist: '$testsched'"
+    fi
 fi
 
 # Which distribution are we testing on.
@@ -296,6 +313,13 @@ case "$TEST_FLAVOR" in
 
         install_test_configs
         ;;
+    machine)
+        rpm -ivh $PACKAGE_DOWNLOAD_DIR/podman-gvproxy*
+        remove_packaged_podman_files
+        make install.tools
+        make install PREFIX=/usr ETCDIR=/etc
+        install_test_configs
+        ;;
     gitlab)
         # This only runs on Ubuntu for now
         if [[ "$OS_RELEASE_ID" != "ubuntu" ]]; then
diff --git a/pkg/domain/entities/machine.go b/pkg/domain/entities/machine.go
new file mode 100644
index 000000000..6ba53dbd1
--- /dev/null
+++ b/pkg/domain/entities/machine.go
@@ -0,0 +1,18 @@
+package entities
+
+type ListReporter struct {
+	Name           string
+	Default        bool
+	Created        string
+	Running        bool
+	Starting       bool
+	LastUp         string
+	Stream         string
+	VMType         string
+	CPUs           uint64
+	Memory         string
+	DiskSize       string
+	Port           int
+	RemoteUsername string
+	IdentityPath   string
+}
diff --git a/pkg/machine/e2e/list_test.go b/pkg/machine/e2e/list_test.go
index e2121e7bf..9e3e9956c 100644
--- a/pkg/machine/e2e/list_test.go
+++ b/pkg/machine/e2e/list_test.go
@@ -2,9 +2,10 @@ package e2e
 
 import (
 	"strings"
+	"time"
 
 	"github.com/containers/common/pkg/util"
-	"github.com/containers/podman/v4/cmd/podman/machine"
+	"github.com/containers/podman/v4/pkg/domain/entities"
 	jsoniter "github.com/json-iterator/go"
 	. "github.com/onsi/ginkgo"
 	. "github.com/onsi/gomega"
@@ -87,7 +88,7 @@ var _ = Describe("podman machine list", func() {
 		startSession, err := mb.setCmd(s).runWithoutWait()
 		Expect(err).To(BeNil())
 		l := new(listMachine)
-		for { // needs to be infinite because we need to check if running when inspect returns to avoid race conditions.
+		for i := 0; i < 30; i++ {
 			listSession, err := mb.setCmd(l).run()
 			Expect(listSession).To(Exit(0))
 			Expect(err).To(BeNil())
@@ -96,6 +97,7 @@ var _ = Describe("podman machine list", func() {
 			} else {
 				break
 			}
+			time.Sleep(3 * time.Second)
 		}
 		Expect(startSession).To(Exit(0))
 		listSession, err := mb.setCmd(l).run()
@@ -132,7 +134,7 @@ var _ = Describe("podman machine list", func() {
 		Expect(err).To(BeNil())
 		Expect(listSession2).To(Exit(0))
 
-		var listResponse []*machine.ListReporter
+		var listResponse []*entities.ListReporter
 		err = jsoniter.Unmarshal(listSession.Bytes(), &listResponse)
 		Expect(err).To(BeNil())
 
diff --git a/pkg/machine/qemu/machine.go b/pkg/machine/qemu/machine.go
index 322aa3a15..6134e69e1 100644
--- a/pkg/machine/qemu/machine.go
+++ b/pkg/machine/qemu/machine.go
@@ -5,6 +5,7 @@ package qemu
 
 import (
 	"bufio"
+	"bytes"
 	"context"
 	"encoding/base64"
 	"encoding/json"
@@ -138,8 +139,10 @@ func (p *Provider) NewMachine(opts machine.InitOptions) (machine.VM, error) {
 	cmd = append(cmd, []string{
 		"-device", "virtio-serial",
 		// qemu needs to establish the long name; other connections can use the symlink'd
-		"-chardev", "socket,path=" + vm.ReadySocket.Path + ",server=on,wait=off,id=" + vm.Name + "_ready",
-		"-device", "virtserialport,chardev=" + vm.Name + "_ready" + ",name=org.fedoraproject.port.0",
+		// Note both id and chardev start with an extra "a" because qemu requires that it
+		// starts with an letter but users can also use numbers
+		"-chardev", "socket,path=" + vm.ReadySocket.Path + ",server=on,wait=off,id=a" + vm.Name + "_ready",
+		"-device", "virtserialport,chardev=a" + vm.Name + "_ready" + ",name=org.fedoraproject.port.0",
 		"-pidfile", vm.VMPidFilePath.GetPath()}...)
 	vm.CmdLine = cmd
 	return vm, nil
@@ -571,15 +574,25 @@ func (v *MachineVM) Start(name string, _ machine.StartOptions) error {
 	files := []*os.File{dnr, dnw, dnw, fd}
 	attr.Files = files
 	logrus.Debug(v.CmdLine)
-	cmd := v.CmdLine
+	cmdLine := v.CmdLine
 
 	// Disable graphic window when not in debug mode
 	// Done in start, so we're not suck with the debug level we used on init
 	if !logrus.IsLevelEnabled(logrus.DebugLevel) {
-		cmd = append(cmd, "-display", "none")
+		cmdLine = append(cmdLine, "-display", "none")
 	}
 
-	_, err = os.StartProcess(v.CmdLine[0], cmd, attr)
+	stderrBuf := &bytes.Buffer{}
+
+	cmd := &exec.Cmd{
+		Args:       cmdLine,
+		Path:       cmdLine[0],
+		Stdin:      dnr,
+		Stdout:     dnw,
+		Stderr:     stderrBuf,
+		ExtraFiles: []*os.File{fd},
+	}
+	err = cmd.Start()
 	if err != nil {
 		// check if qemu was not found
 		if !errors.Is(err, os.ErrNotExist) {
@@ -590,15 +603,17 @@ func (v *MachineVM) Start(name string, _ machine.StartOptions) error {
 		if err != nil {
 			return err
 		}
-		cmd[0], err = cfg.FindHelperBinary(QemuCommand, true)
+		cmdLine[0], err = cfg.FindHelperBinary(QemuCommand, true)
 		if err != nil {
 			return err
 		}
-		_, err = os.StartProcess(cmd[0], cmd, attr)
+		cmd.Path = cmdLine[0]
+		err = cmd.Start()
 		if err != nil {
 			return fmt.Errorf("unable to execute %q: %w", cmd, err)
 		}
 	}
+	defer cmd.Process.Release() //nolint:errcheck
 	fmt.Println("Waiting for VM ...")
 	socketPath, err := getRuntimeDir()
 	if err != nil {
@@ -613,6 +628,16 @@ func (v *MachineVM) Start(name string, _ machine.StartOptions) error {
 		if err == nil {
 			break
 		}
+		// check if qemu is still alive
+		var status syscall.WaitStatus
+		pid, err := syscall.Wait4(cmd.Process.Pid, &status, syscall.WNOHANG, nil)
+		if err != nil {
+			return fmt.Errorf("failed to read qemu process status: %w", err)
+		}
+		if pid > 0 {
+			// child exited
+			return fmt.Errorf("qemu exited unexpectedly with exit code %d, stderr: %s", status.ExitStatus(), stderrBuf.String())
+		}
 		time.Sleep(wait)
 		wait++
 	}