aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEd Santiago <santiago@redhat.com>2020-12-15 14:40:16 -0700
committerEd Santiago <santiago@redhat.com>2021-02-23 06:58:54 -0700
commit79eaadd3fb480255fa0b9fbfc5517425b7c1730b (patch)
treea596b0a5cdc7c3e60aa060e9fceaaee6095ec7d0
parenta6e7d19c463a55a7d3bf6e47faec1906f3f32380 (diff)
downloadpodman-79eaadd3fb480255fa0b9fbfc5517425b7c1730b.tar.gz
podman-79eaadd3fb480255fa0b9fbfc5517425b7c1730b.tar.bz2
podman-79eaadd3fb480255fa0b9fbfc5517425b7c1730b.zip
podman upgrade tests
Initial validation of using podman-in-podman to create an old-podman root, then use new-podman to play with the containers created therein. Signed-off-by: Ed Santiago <santiago@redhat.com>
-rw-r--r--.cirrus.yml33
-rwxr-xr-xcontrib/cirrus/runner.sh4
-rwxr-xr-xcontrib/cirrus/setup_environment.sh1
-rw-r--r--test/system/helpers.bash2
-rw-r--r--test/upgrade/README.md87
-rw-r--r--test/upgrade/helpers.bash11
-rw-r--r--test/upgrade/test-upgrade.bats313
7 files changed, 450 insertions, 1 deletions
diff --git a/.cirrus.yml b/.cirrus.yml
index a23595712..ae8a4dc3a 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -598,6 +598,38 @@ rootless_system_test_task:
main_script: *main
always: *logs_artifacts
+# FIXME: we may want to consider running this from nightly cron instead of CI.
+# The tests are actually pretty quick (less than a minute) but they do rely
+# on pulling images from quay.io, which means we're subject to network flakes.
+#
+# FIXME: how does this env matrix work, anyway? Does it spin up multiple VMs?
+# We might just want to encode the version matrix in runner.sh instead
+upgrade_test_task:
+ name: "Upgrade test: from $PODMAN_UPGRADE_FROM"
+ alias: upgrade_test
+ skip: *tags
+ only_if: *not_docs
+ depends_on:
+ - local_system_test
+ matrix:
+ - env:
+ PODMAN_UPGRADE_FROM: v1.9.0
+ - env:
+ PODMAN_UPGRADE_FROM: v2.0.6
+ - env:
+ PODMAN_UPGRADE_FROM: v2.1.1
+ gce_instance: *standardvm
+ env:
+ TEST_FLAVOR: upgrade_test
+ DISTRO_NV: ${FEDORA_NAME}
+ VM_IMAGE_NAME: ${FEDORA_CACHE_IMAGE_NAME}
+ # ID for re-use of build output
+ _BUILD_CACHE_HANDLE: ${FEDORA_NAME}-build-${CIRRUS_BUILD_ID}
+ clone_script: *noop
+ gopath_cache: *ro_gopath_cache
+ setup_script: *setup
+ main_script: *main
+ always: *logs_artifacts
# This task is critical. It updates the "last-used by" timestamp stored
# in metadata for all VM images. This mechanism functions in tandem with
@@ -654,6 +686,7 @@ success_task:
- local_system_test
- remote_system_test
- rootless_system_test
+ - upgrade_test
- meta
container: *smallcontainer
env:
diff --git a/contrib/cirrus/runner.sh b/contrib/cirrus/runner.sh
index ccbdb63b6..fca9aff93 100755
--- a/contrib/cirrus/runner.sh
+++ b/contrib/cirrus/runner.sh
@@ -70,6 +70,10 @@ function _run_sys() {
dotest system
}
+function _run_upgrade_test() {
+ bats test/upgrade |& logformatter
+}
+
function _run_bindings() {
# shellcheck disable=SC2155
export PATH=$PATH:$GOSRC/hack
diff --git a/contrib/cirrus/setup_environment.sh b/contrib/cirrus/setup_environment.sh
index 4c95d0254..64ea3b7b4 100755
--- a/contrib/cirrus/setup_environment.sh
+++ b/contrib/cirrus/setup_environment.sh
@@ -200,6 +200,7 @@ case "$TEST_FLAVOR" in
compose) ;&
int) ;&
sys) ;&
+ upgrade_test) ;&
bindings) ;&
endpoint)
# Use existing host bits when testing is to happen inside a container
diff --git a/test/system/helpers.bash b/test/system/helpers.bash
index 0572c6866..4a7823852 100644
--- a/test/system/helpers.bash
+++ b/test/system/helpers.bash
@@ -149,7 +149,7 @@ function run_podman() {
echo "$_LOG_PROMPT $PODMAN $*"
# BATS hangs if a subprocess remains and keeps FD 3 open; this happens
# if podman crashes unexpectedly without cleaning up subprocesses.
- run timeout --foreground -v --kill=10 $PODMAN_TIMEOUT $PODMAN "$@" 3>/dev/null
+ run timeout --foreground -v --kill=10 $PODMAN_TIMEOUT $PODMAN $_PODMAN_TEST_OPTS "$@" 3>/dev/null
# without "quotes", multiple lines are glommed together into one
if [ -n "$output" ]; then
echo "$output"
diff --git a/test/upgrade/README.md b/test/upgrade/README.md
new file mode 100644
index 000000000..2979a66d7
--- /dev/null
+++ b/test/upgrade/README.md
@@ -0,0 +1,87 @@
+Background
+==========
+
+For years we've been needing a way to test podman upgrades; this
+became much more critical on December 7, 2020, when Matt disclosed
+a bug he had found over the weekend
+([#8613](https://github.com/containers/podman/issues/8613))
+in which reuse of a previously-defined field name would
+result in fatal JSON decode failures if current-podman were
+to try reading containers created with podman <= 1.8 (FIXME: confirm)
+
+Upgrade testing is a daunting problem; but in the December 12
+Cabal meeting Dan suggested using podman-in-podman. This PR
+is the result of fleshing out that idea.
+
+Overview
+========
+
+The BATS script in this directory fetches and runs an old-podman
+container image from quay.io/podman, uses it to create and run
+a number of containers, then uses new-podman to interact with
+those containers.
+
+As of 2021-02-23 the available old-podman versions are:
+
+```console
+$ ./bin/podman search --list-tags quay.io/podman/stable | awk '$2 ~ /^v/ { print $2}' | sort | column -c 75
+v1.4.2 v1.5.0 v1.6 v1.9.0 v2.0.2 v2.1.1
+v1.4.4 v1.5.1 v1.6.2 v1.9.1 v2.0.6 v2.2.1
+```
+
+Test invocation is:
+```console
+$ sudo env PODMAN=bin/podman PODMAN_UPGRADE_FROM=v1.9.0 PODMAN_UPGRADE_TEST_DEBUG= bats test/upgrade
+```
+(Path assumes you're cd'ed to top-level podman repo). `PODMAN_UPGRADE_FROM`
+can be any of the versions above. `PODMAN_UPGRADE_TEST_DEBUG` is empty
+here, but listed so you can set it `=1` and leave the podman_parent
+container running. Interacting with this container is left as an
+exercise for the reader.
+
+The script will pull the given podman image, invoke it with a scratch
+root directory, and have it do a small set of podman stuff (pull an
+image, create/run some containers). This podman process stays running
+because if it exits, it kills containers running inside the container.
+
+We then invoke the current (host-installed) podman, using the same
+scratch root directory, and perform operations on those images and
+containers. Most of those operations are done in individual @tests.
+
+The goal is to have this upgrade test run in CI, iterating over a
+loop of known old versions. This list would need to be hand-maintained
+and updated on new releases. There might also need to be extra
+configuration defined, such as per-version commands (see below).
+
+Findings
+========
+
+Well, first, `v1.6.2` won't work on default f32/f33: the image
+does not include `crun`, so it can't work at all:
+
+ ERRO[0000] oci runtime "runc" does not support CGroups V2: use system migrate to mitigate
+
+I realize that it's kind of stupid not to test 1.6, since that's
+precisely the test that would've caught #8613 early, but I just
+don't think it's worth the hassle of setting up cgroupsv1 VMs.
+
+For posterity, in an earlier incantation of this script I tried
+booting f32 into cgroupsv1 and ran into the following warnings
+when running new-podman on old-containers:
+```
+ERRO[0000] error joining network namespace for container 322b66d94640e31b2e6921565445cf0dade4ec13cabc16ee5f29292bdc038341: error retrieving network namespace at /var/run/netns/cni-577e2289-2c05-2e28-3c3d-002a5596e7da: failed to Statfs "/var/run/netns/cni-577e2289
+```
+
+Where To Go From Here
+=====================
+
+* Tests are still (2021-02-23) incomplete, with several failing outright.
+ See FIXMEs in the code.
+
+* Figuring out how/if to run rootless. I think this is possible, perhaps
+ even necessary, but will be tricky to get right because of home-directory
+ mounting.
+
+* Figuring out how/if to run variations with different config files
+ (e.g. running OLD-PODMAN that creates a user libpod.conf, tweaking
+ that in the test, then running NEW-PODMAN upgrate tests)
diff --git a/test/upgrade/helpers.bash b/test/upgrade/helpers.bash
new file mode 100644
index 000000000..41d9279e6
--- /dev/null
+++ b/test/upgrade/helpers.bash
@@ -0,0 +1,11 @@
+# -*- bash -*-
+
+load "../system/helpers"
+
+setup() {
+ :
+}
+
+teardown() {
+ :
+}
diff --git a/test/upgrade/test-upgrade.bats b/test/upgrade/test-upgrade.bats
new file mode 100644
index 000000000..dd827b398
--- /dev/null
+++ b/test/upgrade/test-upgrade.bats
@@ -0,0 +1,313 @@
+# -*- bats -*-
+
+load helpers
+
+# Create a var-lib-containers dir for this podman. We need to bind-mount
+# this into the container, and use --root and --runroot and --tmpdir
+# options both in the container podman and out here: that's the only
+# way to share image and container storage.
+if [ -z "${PODMAN_UPGRADE_WORKDIR}" ]; then
+ # Much as I'd love a descriptive name like "podman-upgrade-tests.XXXXX",
+ # keep it short ("pu") because of the 100-character path length limit
+ # for UNIX sockets (needed by conmon)
+ export PODMAN_UPGRADE_WORKDIR=$(mktemp -d --tmpdir=${BATS_TMPDIR:-${TMPDIR:-/tmp}} pu.XXXXXX)
+
+ touch $PODMAN_UPGRADE_WORKDIR/status
+fi
+
+# Generate a set of random strings used for content verification
+if [ -z "${RANDOM_STRING_1}" ]; then
+ export RANDOM_STRING_1=$(random_string 15)
+ export LABEL_CREATED=$(random_string 16)
+ export LABEL_FAILED=$(random_string 17)
+ export LABEL_RUNNING=$(random_string 18)
+
+ # FIXME: randomize this
+ HOST_PORT=34567
+fi
+
+# Version string of the podman we're actually testing, e.g. '3.0.0-dev-d1a26013'
+PODMAN_VERSION=$($PODMAN version |awk '/^Version:/ { V=$2 } /^Git Commit:/ { G=$3 } END { print V "-" substr(G,0,8) }')
+
+setup() {
+ skip_if_rootless
+
+ # The podman-in-podman image (old podman)
+ if [[ -z "$PODMAN_UPGRADE_FROM" ]]; then
+ echo "# \$PODMAN_UPGRADE_FROM is undefined (should be e.g. v1.9.0)" >&3
+ false
+ fi
+
+ if [ "$(< $PODMAN_UPGRADE_WORKDIR/status)" = "failed" ]; then
+ # FIXME: exit instead?
+ echo "*** setup failed - no point in running tests"
+ false
+ fi
+
+ export _PODMAN_TEST_OPTS="--root=$PODMAN_UPGRADE_WORKDIR/root --runroot=$PODMAN_UPGRADE_WORKDIR/runroot --tmpdir=$PODMAN_UPGRADE_WORKDIR/tmp"
+}
+
+###############################################################################
+# BEGIN setup
+
+@test "initial setup: start $PODMAN_UPGRADE_FROM containers" {
+ echo failed >| $PODMAN_UPGRADE_WORKDIR/status
+
+ OLD_PODMAN=quay.io/podman/stable:$PODMAN_UPGRADE_FROM
+ $PODMAN pull $OLD_PODMAN
+
+ # Shortcut name, because we're referencing it a lot
+ pmroot=$PODMAN_UPGRADE_WORKDIR
+
+ # WWW content to share
+ mkdir -p $pmroot/var/www
+ echo $RANDOM_STRING_1 >$pmroot/var/www/index.txt
+
+ # podman tmpdir
+ mkdir -p $pmroot/tmp
+
+ #
+ # Script to run >>OLD<< podman commands.
+ #
+ # These commands will be run inside a podman container. The "podman"
+ # command in this script will be the desired old-podman version.
+ #
+ pmscript=$pmroot/setup
+ cat >| $pmscript <<EOF
+#!/bin/bash
+
+# cgroup-manager=systemd does not work inside a container
+opts="--cgroup-manager=cgroupfs --events-backend=file $_PODMAN_TEST_OPTS"
+
+set -ex
+
+# Try try again, because network flakiness makes this a point of failure
+podman \$opts pull $IMAGE \
+ || (sleep 10; podman \$opts pull $IMAGE) \
+ || (sleep 30; podman \$opts pull $IMAGE)
+
+
+podman \$opts create --name mycreatedcontainer --label mylabel=$LABEL_CREATED \
+ $IMAGE false
+
+podman \$opts run --name mydonecontainer $IMAGE echo ++$RANDOM_STRING_1++
+
+podman \$opts run --name myfailedcontainer --label mylabel=$LABEL_FAILED \
+ $IMAGE sh -c 'exit 17' || true
+
+# FIXME: add "-p $HOST_PORT:80"
+# ...I tried and tried, and could not get this to work. I could never
+# connect to the port from the host, nor even from the podman_parent
+# container; I could never see the port listed in 'ps' nor 'inspect'.
+# And, finally, I ended up in a state where the container wouldn't
+# even start, and via complicated 'podman logs' found out:
+# httpd: bind: Address in use
+# So I just give up for now.
+#
+podman \$opts run -d --name myrunningcontainer --label mylabel=$LABEL_RUNNING \
+ -v $pmroot/var/www:/var/www \
+ -w /var/www \
+ $IMAGE /bin/busybox-extras httpd -f -p 80
+
+echo READY
+while :;do
+ if [ -e /stop ]; then
+ echo STOPPING
+ podman \$opts stop -t 0 myrunningcontainer || true
+ podman \$opts rm -f myrunningcontainer || true
+ exit 0
+ fi
+ sleep 0.5
+done
+EOF
+ chmod 555 $pmscript
+
+ # Clean up vestiges of previous run
+ $PODMAN rm -f podman_parent || true
+
+ # Not entirely a NOP! This is just so we get /run/crun created on a CI VM
+ $PODMAN run --rm $OLD_PODMAN true
+
+ #
+ # Use new-podman to run the above script under old-podman.
+ #
+ # DO NOT USE run_podman HERE! That would use $_PODMAN_TEST_OPTS
+ # and would write into our shared test dir, which would then
+ # pollute it for use by old-podman. We must keep that pristine
+ # so old-podman is the first to write to it.
+ #
+ $PODMAN run -d --name podman_parent --pid=host \
+ --privileged \
+ --net=host \
+ --cgroupns=host \
+ -v /dev/fuse:/dev/fuse \
+ -v /run/crun:/run/crun \
+ -v $pmroot:$pmroot \
+ $OLD_PODMAN $pmroot/setup
+
+ _PODMAN_TEST_OPTS= wait_for_ready podman_parent
+
+ echo OK >| $PODMAN_UPGRADE_WORKDIR/status
+}
+
+# END setup
+###############################################################################
+# BEGIN actual tests
+
+# This is a NOP; used only so the version string will show up in logs
+@test "upgrade: $PODMAN_UPGRADE_FROM -> $PODMAN_VERSION" {
+ :
+}
+
+@test "images" {
+ run_podman images -a --format '{{.Names}}'
+ is "$output" "\[$IMAGE\]" "podman images"
+}
+
+@test "ps : one container running" {
+ run_podman ps --format '{{.Image}}--{{.Names}}'
+ is "$output" "$IMAGE--myrunningcontainer" "ps: one container running"
+}
+
+@test "ps -a : shows all containers" {
+ # IMPORTANT: we can't use --sort=created, because that requires #8427
+ # on the *creating* podman end.
+ run_podman ps -a \
+ --format '{{.Names}}--{{.Status}}--{{.Ports}}--{{.Labels.mylabel}}' \
+ --sort=names
+ is "${lines[0]}" "mycreatedcontainer--Created----$LABEL_CREATED" "created"
+ is "${lines[1]}" "mydonecontainer--Exited (0).*----<no value>" "done"
+ is "${lines[2]}" "myfailedcontainer--Exited (17) .*----$LABEL_FAILED" "fail"
+ is "${lines[3]}" "myrunningcontainer--Up .*----$LABEL_RUNNING" "running"
+
+ # For debugging: dump containers and IDs
+ if [[ -n "$PODMAN_UPGRADE_TEST_DEBUG" ]]; then
+ run_podman ps -a
+ for l in "${lines[@]}"; do
+ echo "# $l" >&3
+ done
+ fi
+}
+
+
+@test "inspect - all container status" {
+ tests="
+running | running | 0
+created | configured | 0
+done | exited | 0
+failed | exited | 17
+"
+ while read cname state exitstatus; do
+ run_podman inspect --format '{{.State.Status}}--{{.State.ExitCode}}' my${cname}container
+ is "$output" "$state--$exitstatus" "status of my${cname}container"
+ done < <(parse_table "$tests")
+}
+
+@test "logs" {
+ run_podman logs mydonecontainer
+ is "$output" "++$RANDOM_STRING_1++" "podman logs on stopped container"
+
+# run_podman logs myrunningcontainer
+# is "$output" "READY" "podman logs on running container"
+}
+
+@test "exec" {
+ run_podman exec myrunningcontainer cat /var/www/index.txt
+ is "$output" "$RANDOM_STRING_1" "exec into myrunningcontainer"
+}
+
+@test "load" {
+ # FIXME, is this really necessary?
+ skip "TBI. Not sure if there's any point to this."
+}
+
+@test "mount" {
+ skip "TBI"
+}
+
+@test "pods" {
+ skip "TBI"
+}
+
+# FIXME: commit? kill? network? pause? restart? top? volumes? What else?
+
+
+@test "start" {
+ skip "FIXME: this leaves a mount behind: root/overlay/sha/merged"
+ run_podman --cgroup-manager=cgroupfs start -a mydonecontainer
+ is "$output" "++$RANDOM_STRING_1++" "start on already-run container"
+}
+
+@test "rm a stopped container" {
+ # FIXME FIXME FIXME!
+ #
+ # I have no idea what's going on here. For most of my testing in this
+ # section, the code here was simply 'podman rm myfailedcontainer', and
+ # it would succeed, but then way down, in 'cleanup' below, the 'rm -f'
+ # step would fail:
+ #
+ # # podman rm -f podman_parent
+ # error freeing lock for container <sha>: no such file or directory
+ # ...where <sha> is the ID of the podman_parent container.
+ #
+ # I started playing with this section, by adding 'rm mydonecontainer',
+ # and now it always fails, the same way, but with the container we're
+ # removing right here:
+ #
+ # error freeing lock for container <sha>: no such file or directory
+ # ...where <sha> is the ID of mydonecontainer.
+ #
+ # I don't know. I give up for now, and am skip'ing the whole thing.
+ # If you want to play with it, try commenting out the 'myfailed' lines,
+ # or just the 'mydone' ones, or, I don't know.
+ skip "FIXME: error freeing lock for container <sha>: no such file or dir"
+
+ # For debugging, so we can see what 'error freeing lock' refers to
+ run_podman ps -a
+
+ run_podman rm myfailedcontainer
+ is "$output" "[0-9a-f]\\{64\\}" "podman rm myfailedcontainer"
+
+ run_podman rm mydonecontainer
+ is "$output" "[0-9a-f]\\{64\\}" "podman rm mydonecontainer"
+}
+
+
+@test "stop and rm" {
+ # About a ten-second pause, then:
+ # Error: timed out waiting for file /tmp/pu.nf747w/tmp/exits/<sha>: internal libpod error
+ # It doesn't seem to be a socket-length issue: the paths are ~80-88 chars.
+ # Leaving podman_parent running, and exec'ing into it, it doesn't look
+ # like the file is being written to the wrong place.
+ skip "FIXME: this doesn't work: timed out waiting for file tmpdir/exits/sha"
+ run_podman stop myrunningcontainer
+ run_podman rm myrunningcontainer
+}
+
+@test "clean up parent" {
+ if [[ -n "$PODMAN_UPGRADE_TEST_DEBUG" ]]; then
+ skip "workdir is $PODMAN_UPGRADE_WORKDIR"
+ fi
+
+ # We're done with shared environment. By clearing this, we can now
+ # use run_podman for actions on the podman_parent container
+ unset _PODMAN_TEST_OPTS
+
+ # (Useful for debugging the 'rm -f' step below, which, when it fails, only
+ # gives a container ID. This 'ps' confirms that the CID is podman_parent)
+ run_podman ps -a
+
+ # Stop the container gracefully
+ run_podman exec podman_parent touch /stop
+ run_podman wait podman_parent
+
+ run_podman logs podman_parent
+ run_podman rm -f podman_parent
+
+ # FIXME: why does this remain mounted?
+ umount $PODMAN_UPGRADE_WORKDIR/root/overlay || true
+
+ rm -rf $PODMAN_UPGRADE_WORKDIR
+}
+
+# FIXME: now clean up