summaryrefslogtreecommitdiff
path: root/contrib/cirrus/logformatter
blob: 3fa0e5618e9a2dfa66619c466d20697b5729d9b6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
#!/usr/bin/perl
#
# logformatter - highlight a Cirrus test log (ginkgo or bats)
#
# Adapted from https://raw.githubusercontent.com/edsantiago/greasemonkey/podman-ginkgo-highlight
#
package LibPod::CI::LogFormatter;

use v5.14;
use utf8;

# Grumble. CI system doesn't have 'open'
binmode STDIN,  ':utf8';
binmode STDOUT, ':utf8';

use strict;
use warnings;

(our $ME = $0) =~ s|.*/||;

our $VERSION = '0.1';

# For debugging, show data structures using DumpTree($var)
#use Data::TreeDumper; $Data::TreeDumper::Displayaddress = 0;

###############################################################################
# BEGIN user-customizable section

# Stylesheet for highlighting or de-highlighting parts of lines
our $CSS = <<'END_CSS';
/* wrap long lines - don't require user to scroll right */
pre        { line-break: normal; overflow-wrap: normal; white-space: pre-wrap; }

/* synopsis table at top */
table.synopsis { border: none; border-collapse: collapse; margin-left: 2em; margin-top: 2ex; }
.synopsis th   { font-weight: normal; font-size: 110%; text-align: right; }
.synopsis td   { font-weight: bold;   font-size: 120%; font-family: monospace; }

/* test results */
.boring    { color: #999; }
.timestamp { color: #999; }
.log-debug { color: #999; }
.log-info  { color: #333; }
.log-warn  { color: #f60; }
.log-error { color: #900; font-weight: bold; }
.subtest   { background: #eee; }
.subsubtest { color: #F39; font-weight: bold; }
.string    { color: #00c; }
.command   { font-weight: bold; color: #000; }
.changed   { color: #000; font-weight: bold; }

/* links to source files: not as prominent as links to errors */
a.codelink:link    { color: #000; }
a.codelink:visited { color: #666; }
a.codelink:hover   { background: #000; color: #999; }

/* The timing tests at bottom: remove underline, it's too cluttery. */
a.timing           { text-decoration: none; }
.timing:hover      { background: #FF9; }  /* highlight row for easy reading */

/* BATS styles */
.bats-passed    { color: #393; }
.bats-failed    { color: #F00; font-weight: bold; }
.bats-skipped   { color: #F90; }
.bats-log       { color: #900; }
.bats-log-esm   { color: #b00; font-weight: bold; }

.bats-summary   { font-size: 150%; }

/* error titles: display next to timestamp, not on separate line */
h2 { display: inline; }
END_CSS

# END   user-customizable section
###############################################################################

###############################################################################
# BEGIN boilerplate args checking, usage messages

sub usage {
    print  <<"END_USAGE";
Usage: $ME [OPTIONS] TEST_NAME

$ME is a filter; it HTMLifies an input stream (presumably
Ginkgo or BATS log results), writing HTML results to an output file
but passing stdin unmodified to stdout. It is intended to run in
the Cirrus CI environment.

Parameters:

    TEST_NAME   descriptive name; output file will be TEST_NAME.log.html

OPTIONS:

  --help         display this message
  --man          display program man page
  --version      display program name and version
END_USAGE

    exit;
}

# Command-line options.  Note that this operates directly on @ARGV !
our $debug   = 0;
our $force   = 0;
our $verbose = 0;
our $NOT     = '';              # print "blahing the blah$NOT\n" if $debug
sub handle_opts {
    use Getopt::Long;
    GetOptions(
        'debug!'     => \$debug,
        'dry-run|n!' => sub { $NOT = ' [NOT]' },
        'force'      => \$force,
        'verbose|v'  => \$verbose,

        help         => \&usage,
        version      => sub { print "$ME version $VERSION\n"; exit 0 },
    ) or die "Try `$ME --help' for help\n";
}

# END   boilerplate args checking, usage messages
###############################################################################

############################## CODE BEGINS HERE ###############################

# The term is "modulino".
__PACKAGE__->main()                                     unless caller();

# Main code.
sub main {
    # Note that we operate directly on @ARGV, not on function parameters.
    # This is deliberate: it's because Getopt::Long only operates on @ARGV
    # and there's no clean way to make it use @_.
    handle_opts();                      # will set package globals

    # In case someone is tempted to run us on the command line
    die "$ME: this is a filter, not an interactive script\n" if -t *STDIN;

    # Fetch command-line arguments.  Barf if too many.
    my $test_name = shift(@ARGV)
        or die "$ME: missing TEST_NAME argument; try $ME --help\n";
    warn "$ME: Too many arguments; ignoring extras. try $ME --help\n" if @ARGV;

    format_log($test_name);
}


sub format_log {
    my $test_name = shift;              # in: e.g. 'integration_test'

    my $outfile = "$test_name.log.html";
    my $out_tmp = "$outfile.tmp.$$";
    open my $out_fh, '>:utf8', $out_tmp
        or warn "$ME: Cannot create $out_tmp: $!\n";

    # Boilerplate: HTML headers for output file
    print { $out_fh } <<"END_HTML"      if $out_fh;
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html
        PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
         "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en-US">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>$test_name</title>
<style type="text/css">
$CSS
</style>

<!-- on page load, go to bottom: that's where the error summary is -->
<script language="javascript">
function scrollToBottom() {
    if (window.scrollY < 10) {
        window.scrollTo(0, document.body.scrollHeight);
    }
}
window.addEventListener("load", scrollToBottom, false);
</script>
</head>
<body>
END_HTML

    # Synopsis of this job: show job environment, links to PR and Cirrus
    print { $out_fh } "<h2>Synopsis</h2>\n<hr/>\n",
        job_synopsis($test_name), "<hr/>\n";

    # FOR DEBUGGING: dump environment, but in HTML comments to not clutter
    print { $out_fh } "<!-- Environment: -->\n";
    for my $e (sort keys %ENV) {
        my $val = escapeHTML($ENV{$e});
        $val =~ s/--/-&#x002D;/g;       # double dash not valid in comments
        printf { $out_fh } "<!--  %-20s %s -->\n", $e, $val;
    }

    # State variables
    my $previous_timestamp = '';  # timestamp of previous line
    my $cirrus_task;              # Cirrus task number, used for linking
    my $git_commit;               # git SHA, used for linking to source files
    my $in_failure;               # binary flag: are we in an error dump?
    my $in_timing;                # binary flag: are we in the timing section?
    my $after_divider = 0;        # Count of lines after seeing '-----'
    my $current_output;           # for removing duplication
    my $looks_like_bats;          # binary flag: for detecting BATS results
    my %bats_count;               # For summary line: count of pass/fail/skip

    print { $out_fh } "<pre> <!-- begin processed output -->\n";

    # Main loop: read input, one line at a time, and write out reformatted
  LINE:
    while (my $line = <STDIN>) {
        print $line;                    # Immediately dump back to stdout

        # Remain robust in face of errors: always write stdout even if no HTML
        next LINE if ! $out_fh;

        chomp $line;
        $line =~ s/\0//g;               # Some log files have NULs????
        $line = escapeHTML($line);

        # Temporarily strip off leading timestamp
        $line =~ s/^(\[\+\d+s\]\s)//;
        my $timestamp = $1 || '';
        if ($previous_timestamp && $timestamp eq $previous_timestamp) {
            $timestamp = ' ' x length($timestamp);
        }
        elsif ($timestamp) {
            $previous_timestamp = $timestamp;
        }

        # Try to identify the git commit we're working with...
        if ($line =~ m!/define.gitCommit=([0-9a-f]+)!) {
            $git_commit = $1;
        }
        # ...so we can link to specific lines in source files
        if ($git_commit) {
            #           1  12  3                 34     4 5   526  6
            $line =~ s{^(.*)(\/(containers\/[^/]+)(\/\S+):(\d+))(.*)$}
                      {$1<a class="codelink" href='https://github.com/$3/blob/$git_commit$4#L$5'>$2</a>$6};
        }

        # Try to identify the cirrus task
        if ($line =~ /cirrus-task-(\d+)/) {
            $cirrus_task = $1;
        }

        # BATS handling (used also for apiv2 tests, which emit TAP output)
        if ($line =~ /^1\.\.(\d+)$/ || $line =~ m!/test-apiv2!) {
            $looks_like_bats = 1;
            $bats_count{expected_total} = $1;
        }
        if ($looks_like_bats) {
            my $css;

            # Readability: /long/path/to/podman -> podman (hover for full path)
            # Also make it boldface, to make commands stand out
            $line =~ s{^(#\s+(#|\$)\s+)(\S+/)(podman\S*)(\s.*)}
                      {$1<b><span title="$3$4">$4</span>$5</b>};

            if    ($line =~ /^ok\s.*\s# skip/) { $css = 'skipped' }
            elsif ($line =~ /^ok\s/)           { $css = 'passed'  }
            elsif ($line =~ /^not\s+ok\s/)     { $css = 'failed'  }
            elsif ($line =~ /^#\s#\|\s/)       { $css = 'log-esm' }
            elsif ($line =~ /^#\s/)            { $css = 'log'     }

            if ($css) {
                # Make it linkable, e.g. foo.html#t--00001
                if ($line =~ /^(not\s+)?ok\s+(\d+)/) {
                    $line = sprintf("<a name='t--%05d'>%s</a>", $2, $line);
                }
                $line = "<span class='bats-$css'>$line</span>";

                $bats_count{$css}++;
            }

            print { $out_fh } "<span class=\"timestamp\">$timestamp</span>"
                if $timestamp;
            print { $out_fh } $line, "\n";
            next LINE;
        }

        # Timing section at the bottom of the page
        if ($line =~ / timing results\s*$/) {
            $in_timing = 1;
        }
        elsif ($in_timing) {
            if ($line =~ /^(\S.*\S)\s+(\d+\.\d+)\s*$/) {
                my ($name, $time) = ($1, $2);
                my $id = make_id($1, 'timing');

                # Try to column-align the timing numbers. Some test names
                # will be longer than our max - oh well.
                my $spaces = 80 - length(unescapeHTML($name));
                $spaces = 1 if $spaces < 1;
                $spaces++ if $time < 10;
                my $spacing = ' ' x $spaces;
                $line = qq{<span class="timing"><a href="#t--$id">$name</a>$spacing$time</span>};
            }
            else {
                $in_timing = 0;
            }
        }

        #
        # Ginkgo error reformatting
        #
        if ($line =~ /^.{1,4} (Failure|Panic)( in .*)? \[/) {
            # Begins a block of multiple lines including a stack trace
            print { $out_fh } "<div class='log-error'>\n";
            $in_failure = 1;
        }
        elsif ($line =~ /^-----------/) {
            if ($in_failure) {
                # Ends a stack trace block
                $in_failure = 0;
                print { $out_fh } "</div>\n";
            }
            $after_divider = 1;

            print { $out_fh } "</pre>\n<hr />\n<pre>\n";
            # Always show timestamp at start of each new test
            $previous_timestamp = '';
            next LINE;
        }
        # (bindings test sometimes emits 'Running' with leading bullet char)
        elsif ($line =~ /^•?Running:/) {
            # Highlight the important (non-boilerplate) podman command.
            $line =~ s/\s+--remote\s+/ /g;      # --remote takes no args
            # Strip out the global podman options, but show them on hover
            $line =~ s{(\S+\/podman(-remote)?)((\s+--(root|runroot|runtime|tmpdir|storage-opt|conmon|cgroup-manager|cni-config-dir|storage-driver|events-backend|url) \S+)*)(.*)}{
                my ($full_path, $remote, $options, $args) = ($1, $2||'', $3, $6);

                $options =~ s/^\s+//;
                # Separate each '--foo bar' with newlines for readability
                $options =~ s/ --/\n--/g;
                qq{<span title="$full_path"><b>podman$remote</b></span> <span class=\"boring\" title=\"$options\">[options]</span><b>$args</b>};
            }e;
            $current_output = '';
        }
        # Grrr. 'output:' usually just tells us what we already know.
        elsif ($line =~ /^output:/) {
            $current_output =~ s!^\s+|\s+$!!g;  # Trim leading/trailing blanks
            $current_output =~ s/\s+/ /g;       # Collapse multiple spaces
            if ($line eq "output: $current_output" || $line eq 'output: ') {
                next LINE;
            }
        }
        elsif ($line =~ /^Error:/ || $line =~ / level=(warning|error) /) {
            $line = "<span class='log-warn'>" . $line . "</span>";
        }
        elsif ($line =~ /^panic:/) {
            $line = "<span class='log-error'>" . $line . "</span>";
        }
        else {
            $current_output .= ' ' . $line;
        }


        # Two lines after each divider, there's a test name. Make it
        # an anchor so we can link to it later.
        if ($after_divider++ == 2) {
            # Sigh. There is no actual marker. Assume that anything with
            ## two leading spaces then alpha (not slashes) is a test name.
            if ($line =~ /^  [a-zA-Z]/) {
                my $id = make_id($line, 'anchor');

                $line = "<a name='t--$id'><h2>$line</h2></a>";
            }
        }

        # Failure name corresponds to a previously-seen block.
        ## FIXME: sometimes there are three failures with the same name.
        ##        ...I have no idea why or how to link to the right ones.
        #              1  2           2        3             3   14          4
        if ($line =~ /^(\[(Fail|Panic!)\] .* \[(It|BeforeEach)\] )([A-Za-z].*)/) {
            my ($lhs, $type, $ginkgo_fluff, $testname) = ($1, $2, $3, $4);
            my $id = make_id($testname, 'link');

            $line = "<b>$lhs<a href='#t--$id'>$testname</a></b>";
        }

        print { $out_fh } "<span class=\"timestamp\">$timestamp</span>"
            if $timestamp;
        print { $out_fh } $line, "\n";
    }

    my $have_formatted_log;     # Set on success

    if ($out_fh) {
        # Summary line for BATS tests
        if (keys %bats_count) {
            print { $out_fh } "<hr/><span class='bats-summary'>Summary:";
            my $total = 0;
            my $comma = '';
            for my $class (qw(passed failed skipped)) {
                if (my $n = $bats_count{$class}) {
                    printf { $out_fh } "%s <span class='bats-%s'>%d %s</span>",
                        $comma, $class, $n, ucfirst($class);
                    $total += $n;
                    $comma = ',';
                }
            }

            printf { $out_fh } ". Total tests: $total";
            if (my $expected_total = $bats_count{expected_total}) {
                if ($total != $expected_total) {
                    print { $out_fh } " <span class='bats-failed'>(WARNING: expected $expected_total)</span>";
                }
            }
            print { $out_fh } "</span>\n";
        }

        print { $out_fh } "</pre>  <!-- end processed output -->\n";

        # Did we find a cirrus task? Link back.
        if ($cirrus_task) {
            print { $out_fh } <<"END_HTML";
<hr />
<h3>Cirrus <a href="https://cirrus-ci.com/task/$cirrus_task">task $cirrus_task</a></h3>
END_HTML
        }

        # FIXME: need a safe way to get TZ
        printf { $out_fh } <<"END_HTML", scalar(CORE::localtime);
<hr />
<small>Processed %s by $ME v$VERSION</small>
</body>
</html>
END_HTML

        if (close $out_fh) {
            if (rename $out_tmp => $outfile) {
                $have_formatted_log = 1;
            }
            else {
                warn "$ME: Could not rename $out_tmp: $!\n";
            }
        }
        else {
            warn "$ME: Error writing $out_tmp: $!\n";
        }
    }

    # If Cirrus magic envariables are available, write a link to results.
    # FIXME: it'd be so nice to make this a clickable live link.
    #
    # STATIC_MAGIC_BLOB is the name of a google-storage bucket. It is
    # unlikely to change often, but if it does you will suddenly start
    # seeing errors when trying to view formatted logs:
    #
    #    AccessDeniedAccess denied.Anonymous caller does not have storage.objects.get access to the Google Cloud Storage object.
    #
    # This happened in July 2020 when github.com/containers/libpod was
    # renamed to podman. If something like that ever happens again, you
    # will need to get the new magic blob value from:
    #
    #   https://console.cloud.google.com/storage/browser?project=libpod-218412
    #
    # You will also probably need to set the bucket Public by clicking on
    # the bucket name, then the Permissions tab. This is safe, since this
    # project is fully open-source.
    if ($have_formatted_log && $ENV{CIRRUS_TASK_ID}) {
        my $URL_BASE          = "https://storage.googleapis.com";
        my $STATIC_MAGIC_BLOB = "cirrus-ci-6707778565701632-fcae48";
        my $ARTIFACT_NAME     = "html";

        my $URL = "${URL_BASE}/${STATIC_MAGIC_BLOB}/artifacts/$ENV{CIRRUS_REPO_FULL_NAME}/$ENV{CIRRUS_TASK_ID}/${ARTIFACT_NAME}/${outfile}";

        print "\n\nAnnotated results:\n  $URL\n";
    }
}


#############
#  make_id  #  Given a test name, generate an anchor link name
#############
sub make_id {
    my $name = shift;                   # in: test title
    my $type = shift;                   # in: differentiator (anchor, link)

    state %counter;

    $name =~ s/^\s+|\s+$//g;            # strip leading/trailing whitespace
    $name =~ s/[^a-zA-Z0-9_-]/-/g;      # Convert non-alphanumeric to dash

    # Keep a running tally of how many times we've seen this identifier
    # for this given type! This lets us cross-match, in the bottom of
    # the page, the first/second/third failure of a given test.
    $name .= "--" . ++$counter{$type}{$name};

    $name;
}


###############################################################################
# BEGIN job_synopsis and related helpers

##################
#  job_synopsis  #  Job details, links to github/cirrus
##################
sub job_synopsis {
    my $subtest_name = shift;           # e.g. integration_test

    my $s = <<"END_SYNOPSIS";
<table class="synopsis">
END_SYNOPSIS

    # PR 1234 - title of the pr
    my $pr_title = escapeHTML(_env_replace("{CIRRUS_CHANGE_TITLE}"));
    $s .= _tr("GitHub PR", sprintf("%s - %s",
                                   _a("{CIRRUS_PR}", "https://{CIRRUS_REPO_CLONE_HOST}/{CIRRUS_REPO_FULL_NAME}/pull/{CIRRUS_PR}"),
                                   $pr_title));

    # PR author, if signed-off-by
    if (my $msg = _env_replace("{CIRRUS_COMMIT_MESSAGE}")) {
        while ($msg =~ /^Signed-off-by:\s+(\S.*\S)$/gmi) {
            $s .= _tr("Author", escapeHTML($1));
        }
    }

    # eg "test fedora", "special_testing_rootless"
    # WARNING: As of 2020-10-05, $CIRRUS_TASK_NAME reflects the same
    # descriptive content as our $subtest_name argument (confirm via
    # cross-checking runner.sh:logformatter() vs cirrus.yml:&std_name_fmt).
    # If this ever becomes untrue, just add _tr("Subtest", $subtest_name).
    my $test_name = _env_replace("{CIRRUS_TASK_NAME}");
    # (Special-case cleanup: Cirrus\ quotes\ spaces; remove for readability).
    $test_name =~ s/\\\s+/ /g;
    $s .= _tr("Test name", $test_name);

    # Link to further Cirrus results, e.g. other runs.
    # Build is mostly boring, it's usually TASK that we want to see.
    $s .= _tr("Cirrus", sprintf("<small>Build %s</small> / <b>Task %s</b>",
                                _a("{CIRRUS_BUILD_ID}", "https://cirrus-ci.com/build/{CIRRUS_BUILD_ID}"),
                                _a("{CIRRUS_TASK_ID}", "https://cirrus-ci.com/task/{CIRRUS_TASK_ID}")));

    $s .= "</table>\n";
    return $s;
}


sub _tr {
    my ($th, $td) = @_;
    return "<tr><th>$th:</th><td>$td</td></tr>\n";
}

sub _a {
    my ($name, $href) = map { _env_replace($_) } @_;

    if ($href =~ /UNDEFINED/) {
        return "$name ($href)";
    }
    return "<a href='$href'>$name</a>";
}

sub _env_replace {
    my $s_in = shift;

    $s_in =~ s[\{(.*?)\}][$ENV{$1} || "[$1 UNDEFINED]"]ge;

    return $s_in;
}

# END   job_synopsis and related helpers
###############################################################################
# BEGIN html-formatting helpers

sub escapeHTML {
    my $s = shift;

    state %chars;
    %chars = ('&' => '&amp;', '<' => '&lt;', '>' => '&gt;', '"' => '&quot;', "'" => '&#39;')
        if keys(%chars) == 0;
    my $class = join('', sort keys %chars);
    $s =~ s/([$class])/$chars{$1}/ge;

    return $s;
}

sub unescapeHTML {
    my $s = shift;

    # We don't actually care about the character, only its length
    $s =~ s/\&\#?[a-z0-9]+;/./g;

    return $s;
}

# END   html-formatting helpers
###############################################################################

1;