#!/usr/bin/perl
#
# xref-helpmsgs-manpages - cross-reference --help options against man pages
#
package LibPod::CI::XrefHelpmsgsManpages;

use v5.14;
use utf8;

use strict;
use warnings;

(our $ME = $0) =~ s|.*/||;
our $VERSION = '0.1';

# For debugging, show data structures using DumpTree($var)
#use Data::TreeDumper; $Data::TreeDumper::Displayaddress = 0;

# unbuffer output
$| = 1;

###############################################################################
# BEGIN user-customizable section

# Path to podman executable
my $Default_Podman = './bin/podman';
my $PODMAN = $ENV{PODMAN} || $Default_Podman;

# Path to all doc files, including .rst and (down one level) markdown
my $Docs_Path = 'docs/source';

# Path to podman markdown source files (of the form podman-*.1.md)
my $Markdown_Path = "$Docs_Path/markdown";

# Global error count
my $Errs = 0;

# END   user-customizable section
###############################################################################

use FindBin;

###############################################################################
# BEGIN boilerplate args checking, usage messages

sub usage {
    print  <<"END_USAGE";
Usage: $ME [OPTIONS]

$ME recursively runs 'podman --help' against
all subcommands; and recursively reads podman-*.1.md files
in $Markdown_Path, then cross-references that each --help
option is listed in the appropriate man page and vice-versa.

$ME invokes '\$PODMAN' (default: $Default_Podman).

Exit status is zero if no inconsistencies found, one otherwise

OPTIONS:

  -v, --verbose  show verbose progress indicators
  -n, --dry-run  make no actual changes

  --help         display this message
  --version      display program name and version
END_USAGE

    exit;
}

# Command-line options.  Note that this operates directly on @ARGV !
our $debug   = 0;
our $verbose = 0;
sub handle_opts {
    use Getopt::Long;
    GetOptions(
        'debug!'     => \$debug,
        'verbose|v'  => \$verbose,

        help         => \&usage,
        version      => sub { print "$ME version $VERSION\n"; exit 0 },
    ) or die "Try `$ME --help' for help\n";
}

# END   boilerplate args checking, usage messages
###############################################################################

############################## CODE BEGINS HERE ###############################

# The term is "modulino".
__PACKAGE__->main()                                     unless caller();

# Main code.
sub main {
    # Note that we operate directly on @ARGV, not on function parameters.
    # This is deliberate: it's because Getopt::Long only operates on @ARGV
    # and there's no clean way to make it use @_.
    handle_opts();                      # will set package globals

    # Fetch command-line arguments.  Barf if too many.
    die "$ME: Too many arguments; try $ME --help\n"                 if @ARGV;

    my $help = podman_help();
    my $man  = podman_man('podman');
    my $rst  = podman_rst();

    xref_by_help($help, $man);
    xref_by_man($help, $man);

    xref_rst($help, $rst);

    exit !!$Errs;
}

###############################################################################
# BEGIN cross-referencing

##################
#  xref_by_help  #  Find keys in '--help' but not in man
##################
sub xref_by_help {
    my ($help, $man, @subcommand) = @_;

    for my $k (sort keys %$help) {
        if (exists $man->{$k}) {
            if (ref $help->{$k}) {
                xref_by_help($help->{$k}, $man->{$k}, @subcommand, $k);
            }
            # Otherwise, non-ref is leaf node such as a --option
        }
        else {
            my $man = $man->{_path} || 'man';
            warn "$ME: podman @subcommand --help lists $k, but $k not in $man\n";
            ++$Errs;
        }
    }
}

#################
#  xref_by_man  #  Find keys in man pages but not in --help
#################
#
# In an ideal world we could share the functionality in one function; but
# there are just too many special cases in man pages.
#
sub xref_by_man {
    my ($help, $man, @subcommand) = @_;

    # FIXME: this generates way too much output
    for my $k (grep { $_ ne '_path' } sort keys %$man) {
        if (exists $help->{$k}) {
            if (ref $man->{$k}) {
                xref_by_man($help->{$k}, $man->{$k}, @subcommand, $k);
            }
        }
        elsif ($k ne '--help' && $k ne '-h') {
            my $man = $man->{_path} || 'man';

            # Special case: podman-inspect serves dual purpose (image, ctr)
            my %ignore = map { $_ => 1 } qw(-l -s -t --latest --size --type);
            next if $man =~ /-inspect/ && $ignore{$k};

            # Special case: podman-diff serves dual purpose (image, ctr)
            my %diffignore = map { $_ => 1 } qw(-l --latest );
            next if $man =~ /-diff/ && $diffignore{$k};

            # Special case: the 'trust' man page is a mess
            next if $man =~ /-trust/;

            # Special case: '--net' is an undocumented shortcut
            next if $k eq '--net' && $help->{'--network'};

            # Special case: these are actually global options
            next if $k =~ /^--(cni-config-dir|runtime)$/ && $man =~ /-build/;

            # Special case: weirdness with Cobra and global/local options
            next if $k eq '--namespace' && $man =~ /-ps/;

            # Special case: these require compiling with 'varlink' tag,
            # which doesn't happen in CI gating task.
            next if $k eq 'varlink';
            next if "@subcommand" eq 'system' && $k eq 'service';

            warn "$ME: podman @subcommand: $k in $man, but not --help\n";
            ++$Errs;
        }
    }
}

##############
#  xref_rst  #  Cross-check *.rst files against help
##############
sub xref_rst {
    my ($help, $rst, @subcommand) = @_;

    # Cross-check against rst (but only subcommands, not options).
    # We key on $help because that is Absolute Truth: anything in podman --help
    # must be referenced in an rst (the converse is not true).
    for my $k (sort grep { $_ !~ /^-/ } keys %$help) {
        # Check for subcommands, if any (eg podman system -> connection -> add)
        if (ref $help->{$k}) {
            xref_rst($help->{$k}, $rst->{$k}, @subcommand, $k);
        }

        # Check that command is mentioned in at least one .rst file
        if (! exists $rst->{$k}{_desc}) {
            my @podman = ("podman", @subcommand, $k);
            warn "$ME: no link in *.rst for @podman\n";
            ++$Errs;
        }
    }
}

# END   cross-referencing
###############################################################################
# BEGIN data gathering

#################
#  podman_help  #  Parse output of 'podman [subcommand] --help'
#################
sub podman_help {
    my %help;
    open my $fh, '-|', $PODMAN, @_, '--help'
        or die "$ME: Cannot fork: $!\n";
    my $section = '';
    while (my $line = <$fh>) {
        # Cobra is blessedly consistent in its output:
        #    Usage: ...
        #    Available Commands:
        #       ....
        #    Flags:
        #       ....
        #
        # Start by identifying the section we're in...
        if ($line =~ /^Available\s+(Commands):/) {
            $section = lc $1;
        }
        elsif ($line =~ /^(Flags):/) {
            $section = lc $1;
        }

        # ...then track commands and options. For subcommands, recurse.
        elsif ($section eq 'commands') {
            if ($line =~ /^\s{1,4}(\S+)\s/) {
                my $subcommand = $1;
                print "> podman @_ $subcommand\n"               if $debug;
                $help{$subcommand} = podman_help(@_, $subcommand)
                    unless $subcommand eq 'help';       # 'help' not in man
            }
        }
        elsif ($section eq 'flags') {
            # Handle '--foo' or '-f, --foo'
            if ($line =~ /^\s{1,10}(--\S+)\s/) {
                print "> podman @_ $1\n"                        if $debug;
                $help{$1} = 1;
            }
            elsif ($line =~ /^\s{1,10}(-\S),\s+(--\S+)\s/) {
                print "> podman @_ $1, $2\n"                    if $debug;
                $help{$1} = $help{$2} = 1;
            }
        }
    }
    close $fh
        or die "$ME: Error running 'podman @_ --help'\n";

    return \%help;
}


################
#  podman_man  #  Parse contents of podman-*.1.md
################
sub podman_man {
    my $command = shift;
    my $subpath = "$Markdown_Path/$command.1.md";
    my $manpath = "$FindBin::Bin/../$subpath";
    print "** $subpath \n"                              if $debug;

    my %man = (_path => $subpath);
    open my $fh, '<', $manpath
        or die "$ME: Cannot read $manpath: $!\n";
    my $section = '';
    my @most_recent_flags;
    my $previous_subcmd = '';
    while (my $line = <$fh>) {
        chomp $line;
        next unless $line;		# skip empty lines

        # .md files designate sections with leading double hash
        if ($line =~ /^##\s*(GLOBAL\s+)?OPTIONS/) {
            $section = 'flags';
        }
        elsif ($line =~ /^\#\#\s+(SUB)?COMMANDS/) {
            $section = 'commands';
        }
        elsif ($line =~ /^\#\#/) {
            $section = '';
        }

        # This will be a table containing subcommand names, links to man pages.
        # The format is slightly different between podman.1.md and subcommands.
        elsif ($section eq 'commands') {
            # In podman.1.md
            if ($line =~ /^\|\s*\[podman-(\S+?)\(\d\)\]/) {
                # $1 will be changed by recursion _*BEFORE*_ left-hand assignment
                my $subcmd = $1;
                $man{$subcmd} = podman_man("podman-$1");
            }

            # In podman-<subcommand>.1.md
            elsif ($line =~ /^\|\s+(\S+)\s+\|\s+\[\S+\]\((\S+)\.1\.md\)/) {
                # $1 will be changed by recursion _*BEFORE*_ left-hand assignment
                my $subcmd = $1;
                if ($previous_subcmd gt $subcmd) {
                    warn "$ME: $subpath: '$previous_subcmd' and '$subcmd' are out of order\n";
                    ++$Errs;
                }
                $previous_subcmd = $subcmd;
                $man{$subcmd} = podman_man($2);
            }
        }

        # Flags should always be of the form '**-f**' or '**--flag**',
        # possibly separated by comma-space.
        elsif ($section eq 'flags') {
            # e.g. 'podman run --ip6', documented in man page, but nonexistent
            if ($line =~ /^not\s+implemented/i) {
                delete $man{$_} for @most_recent_flags;
            }

            @most_recent_flags = ();
            # Handle any variation of '**--foo**, **-f**'
            while ($line =~ s/^\*\*((--[a-z0-9-]+)|(-.))\*\*(,\s+)?//g) {
                $man{$1} = 1;

                # Keep track of them, in case we see 'Not implemented' below
                push @most_recent_flags, $1;
            }
        }
    }
    close $fh;

    # Special case: the 'image trust' man page tries hard to cover both set
    # and show, which means it ends up not being machine-readable.
    if ($command eq 'podman-image-trust') {
        my %set  = %man;
        my %show = %man;
        $show{$_} = 1 for qw(--raw -j --json);
        return +{ set => \%set, show => \%show }
    }

    return \%man;
}


################
#  podman_rst  #  Parse contents of docs/source/*.rst
################
sub podman_rst {
    my %rst;

    # Read all .rst files, looking for ":doc:`subcmd <target>` description"
    for my $rst (glob "$Docs_Path/*.rst") {
        open my $fh, '<', $rst
            or die "$ME: Cannot read $rst: $!\n";

        # The basename of foo.rst is usually, but not always, the name of
        # a podman subcommand. There are a few special cases:
        (my $command = $rst) =~ s!^.*/(.*)\.rst!$1!;

        my $subcommand_href = \%rst;
        if ($command eq 'Commands') {
            ;
        }
        elsif ($command eq 'managecontainers') {
            $subcommand_href = $rst{container} //= { };
        }
        elsif ($command eq 'connection') {
            $subcommand_href = $rst{system}{connection} //= { };
        }
        else {
            $subcommand_href = $rst{$command} //= { };
        }

        my $previous_subcommand = '';
        while (my $line = <$fh>) {
            if ($line =~ /^:doc:`(\S+)\s+<(.*?)>`\s+(.*)/) {
                my ($subcommand, $target, $desc) = ($1, $2, $3);

                # Check that entries are in alphabetical order
                if ($subcommand lt $previous_subcommand) {
                    warn "$ME: $rst:$.: '$previous_subcommand' and '$subcommand' are out of order\n";
                    ++$Errs;
                }
                $previous_subcommand = $subcommand;

                # Mark this subcommand as documented.
                $subcommand_href->{$subcommand}{_desc} = $desc;

                # Check for invalid links. These will be one of two forms:
                #    <markdown/foo.1>     -> markdown/foo.1.md
                #    <foo>                -> foo.rst
                if ($target =~ m!^markdown/!) {
                    if (! -e "$Docs_Path/$target.md") {
                        warn "$ME: $rst:$.: '$subcommand' links to nonexistent $target\n";
                        ++$Errs;
                    }
                }
                else {
                    if (! -e "$Docs_Path/$target.rst") {
                        warn "$ME: $rst:$.: '$subcommand' links to nonexistent $target.rst\n";
                    }
                }
            }
        }
        close $fh;
    }

    # Special case: 'image trust set/show' are documented in image-trust.1
    $rst{image}{trust}{$_} = { _desc => 'ok' } for (qw(set show));

    return \%rst;
}

# END   data gathering
###############################################################################

1;