summaryrefslogtreecommitdiff
path: root/hack/parse-localbenchmarks
diff options
context:
space:
mode:
authorEd Santiago <santiago@redhat.com>2022-05-11 11:44:17 -0600
committerEd Santiago <santiago@redhat.com>2022-05-11 11:44:17 -0600
commit1834afd3e927dfef69a09330693f316469018571 (patch)
tree8673d67cbec7cbf1e323920e0a34287e0425c8e9 /hack/parse-localbenchmarks
parentc379014ee4e57dc19669ae92f45f8e4c0814295b (diff)
downloadpodman-1834afd3e927dfef69a09330693f316469018571.tar.gz
podman-1834afd3e927dfef69a09330693f316469018571.tar.bz2
podman-1834afd3e927dfef69a09330693f316469018571.zip
[CI:DOCS] Benchmarks: new tool for parsing results
New script for use with Valentin's benchmarks. Converts ginkgo timing results to CSV format suitable for (TBI) saving and comparing. Signed-off-by: Ed Santiago <santiago@redhat.com>
Diffstat (limited to 'hack/parse-localbenchmarks')
-rwxr-xr-xhack/parse-localbenchmarks104
1 files changed, 104 insertions, 0 deletions
diff --git a/hack/parse-localbenchmarks b/hack/parse-localbenchmarks
new file mode 100755
index 000000000..6e22cabbb
--- /dev/null
+++ b/hack/parse-localbenchmarks
@@ -0,0 +1,104 @@
+#!/usr/bin/perl
+#
+# parse-localbenchmarks - convert localbenchmarks output to CSV
+#
+# This is a filter. It transforms data from one format to another. Usage:
+#
+# $ make localbenchmarks &> mylogfile
+# $ hack/parse-localbenchmarks <mylogfile > benchmarks.csv
+#
+# To be more precise, this is a very stupid simpleminded filter. It is
+# not a complete solution to the benchmarks problem. In particular,
+# other tools are still needed to:
+#
+# * Actually _run_ the benchmarks in some standard production environment
+# * Run this script on the results
+# * Save results, with identifying tags (datetime, git hash, PR id, ...)
+# * Compare two or more sets of CSVs
+#
+(our $ME = $0) =~ s|^.*/||; # script name
+
+use v5.14;
+use utf8;
+
+# FIXME: add --help. Some day. Not urgent.
+die "$ME: This is a filter, not an interactive tool\n" if -t *STDIN;
+
+my $n_samples; # Number of timing runs (FIXME: unused)
+my %results; # Timing results
+my @benchmarks; # Names of benchmarks
+my ($type, $testname); # Current context
+
+#
+# Pass 1: read in timings
+#
+while (my $line = <STDIN>) {
+ # Log will have lots of ginkgo output. The only thing we care about is
+ # the summary at the end, which will look something like:
+ #
+ # * [MEASUREMENT]
+ # Podman Benchmark Suite
+ # ....
+ # Ran 3 samples:
+ # [CPU] podman images:
+ # Fastest Time: 0.265s
+ # Slowest Time: 0.322s
+ # Average Time: 0.302s ± 0.018s
+ # [MEM] podman images:
+ # Smallest: 44076.0KB
+ # Largest: 44616.0KB
+ # Average: 44338.7KB ± 171.2KB
+ # [CPU] podman push:
+ # ....repeat [CPU] and [MEM] for each test
+ # --------------------------
+ # SSSSSSSSSSSSSSSSSSSSS (and more ginkgo output we don't care about)
+ #
+ chomp $line;
+ next unless $line =~ /^.{1,3}\s+\[MEASUREMENT\]/ .. $line =~ /^-{20,}$/;
+
+ # Trim leading & trailing whitespace
+ $line =~ s/(^\s+|\s+$)//g;
+
+ # FIXME: we don't actually emit this. What would be a good way to do so?
+ if ($line =~ /^Ran\s+(\d+)\s+samples/) {
+ $n_samples = $1;
+ }
+
+ # e.g., [CPU] podman foo:
+ elsif ($line =~ /^\[([A-Z]+)\]\s+(\S.*\S):$/) {
+ ($type, $testname) = ($1, $2);
+ }
+
+ # e.g., 'Fastest Time: 0.265s'
+ elsif ($line =~ /^(\S.*?\S):\s+(.*)/) {
+ my $benchmark = "$type $1";
+ $results{$testname}{$benchmark} = $2;
+
+ # Keep an ordered list of benchmark names (as in, the order we
+ # encounter them)
+ push @benchmarks, $benchmark
+ unless grep { $_ eq $benchmark } @benchmarks;
+ }
+
+ else {
+ warn "Cannot grok '$line'\n" if $ENV{DEBUG_PARSELOCALBENCHMARKS};
+ }
+}
+
+#
+# Pass 2: write out CSV
+#
+
+# Headings...
+print "\"Test Name\"";
+printf ", \"%s\"", $_ for @benchmarks;
+print "\n";
+
+# ...then data
+for my $t (sort keys %results) {
+ printf "\"%s\"", $t;
+ for my $benchmark (@benchmarks) {
+ printf ", \"%s\"", $results{$t}{$benchmark} || '';
+ }
+ print "\n";
+}