From 1cf2b3eb28959a8e2c9ccd4b0d40186c6731104d Mon Sep 17 00:00:00 2001
From: Ed Santiago <santiago@redhat.com>
Date: Wed, 14 Apr 2021 10:43:19 -0600
Subject: compose test: ongoing efforts to diagnose flakes

Yay, we got a failure with the new code (#10017). It shows
one ECONNRESET followed by a lot of ECONNREFUSED over an 8-second
period (actually 15s because of the second curl retry).

My hunch: the container itself is dying. No amount of retrying
will get anything to work. So, instead of the curl retry, if
curl fails, run 'docker-compose logs', 'podman ps', and 'ss -tulpn'
and hope that one/more of those tells us something useful when
the test flakes again.

Also: DUH! Bitten by one of the most common bash pitfalls.
Checking exit status after 'local' will always be zero.
Split the declaration and the action into separate lines.

Also: if curl fails, return immediately. There's no point in
running the string output comparison.

Also: in _show_ok(), don't emit "actual/expect" messages
if both strings are empty.

Signed-off-by: Ed Santiago <santiago@redhat.com>
---
 test/compose/test-compose | 31 ++++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/test/compose/test-compose b/test/compose/test-compose
index abb957b43..c4c484190 100755
--- a/test/compose/test-compose
+++ b/test/compose/test-compose
@@ -136,8 +136,11 @@ function _show_ok() {
     local expect=$3
     local actual=$4
     printf "${red}not ok $count $testname${reset}\n"
-    printf "${red}#  expected: %s${reset}\n" "$expect"
-    printf "${red}#    actual: ${bold}%s${reset}\n" "$actual"
+    # Not all errors include actual/expect
+    if [[ -n "$expect" || -n "$actual" ]]; then
+        printf "${red}#  expected: %s${reset}\n" "$expect"
+        printf "${red}#    actual: ${bold}%s${reset}\n" "$actual"
+    fi
 
     echo    "not ok $count $testname" >>$LOG
     echo    "  expected: $expect"                     >>$LOG
@@ -164,20 +167,22 @@ function test_port() {
     local expect="$3"            # what to expect from curl output
 
     # -s -S means "silent, but show errors"
-    local actual=$(curl --retry 3 --retry-all-errors -s -S http://127.0.0.1:$port/)
+    local actual
+    actual=$(curl --retry 3 --retry-all-errors -s -S http://127.0.0.1:$port/)
     local curl_rc=$?
 
-    # FIXME 2021-04-13: test is flaking, curl succeeds but returns empty result.
-    # Could it be that the container is not actually ready? Wait, and retry.
-    if [[ $curl_rc -eq 0 && -z "$actual" ]]; then
-        sleep 1
-        echo "# Retrying curl:"
-        actual=$(curl --retry 3 --retry-all-errors -s -S http://127.0.0.1:$port/)
-        curl_rc=$?
-    fi
-
     if [ $curl_rc -ne 0 ]; then
-        _show_ok 0 "$testname - curl failed with status $curl_rc"
+        _show_ok 0 "$testname - curl (port $port) failed with status $curl_rc"
+        # FIXME: is this useful? What else can we do to diagnose?
+        echo "# docker-compose logs:"
+        docker-compose logs
+        echo "# podman ps -a:"
+        $PODMAN_BIN --root $WORKDIR/root --runroot $WORKDIR/runroot ps -a
+        if type -p ss; then
+            echo "# ss -tulpn:"
+            ss -tulpn
+        fi
+        return
     fi
 
     case "$op" in
-- 
cgit v1.2.3-54-g00ecf