From 6236be4ff9ec706926d415a1cb4305ebf49048a7 Mon Sep 17 00:00:00 2001 From: Ed Santiago Date: Wed, 10 Nov 2021 08:44:44 -0700 Subject: [CI:DOCS] Add CI check for SEE ALSO in man pages Add new CI check to confirm that links and references in SEE ALSO sections are properly formatted and that links are valid (at least in theory: we do no actual URL fetching to test for 404). The check is piggybacked into existing xref-helpmsgs-manpages script. It could conceivably be more elegant to write a separate tool for this purpose, but I don't wish to duplicate the logic for finding and reading markdown files. Script identified various problems, which I fix in this PR: . missing '**' (asterisks) around some references, or '**' in the wrong place. . links pointing to github.com/.../tree/ instead of /blob/ (github redirects those automatically, but I like consistency) . a few copy-paste errors, e.g. subgid linking to subuid. Signed-off-by: Ed Santiago --- hack/xref-helpmsgs-manpages | 127 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) (limited to 'hack/xref-helpmsgs-manpages') diff --git a/hack/xref-helpmsgs-manpages b/hack/xref-helpmsgs-manpages index af54f05f3..6a2d627bb 100755 --- a/hack/xref-helpmsgs-manpages +++ b/hack/xref-helpmsgs-manpages @@ -54,6 +54,11 @@ option is listed in the appropriate man page and vice-versa. $ME invokes '\$PODMAN' (default: $Default_Podman). +In the spirit of shoehorning functionality where it wasn't intended, +$ME also checks the SEE ALSO section of each man page +to ensure that references and links are properly formatted +and valid. + Exit status is zero if no inconsistencies found, one otherwise OPTIONS: @@ -286,6 +291,9 @@ sub podman_man { elsif ($line =~ /^\#\#\s+(SUB)?COMMANDS/) { $section = 'commands'; } + elsif ($line =~ /^\#\#\s+SEE\s+ALSO/) { + $section = 'see-also'; + } elsif ($line =~ /^\#\#[^#]/) { $section = ''; } @@ -340,6 +348,11 @@ sub podman_man { } } } + + # It's easy to make mistakes in the SEE ALSO elements. + elsif ($section eq 'see-also') { + _check_seealso_links( "$subpath:$.", $line ); + } } close $fh; @@ -427,5 +440,119 @@ sub podman_rst { # END data gathering ############################################################################### +# BEGIN sanity checking of SEE ALSO links + +########################## +# _check_seealso_links # Check formatting and link validity. +########################## +sub _check_seealso_links { + my $path = shift; + my $line = shift; + + return if ! $line; + + # Line must be a comma-separated list of man page references, e.g. + # **foo(1)**, **[podman-bar(1)](podman-bar.1.md)**, **[xxx(8)](http...)** + TOKEN: + for my $token (split /,\s+/, $line) { + # Elements must be separated by comma and space. (We don't do further + # checks here, so it's possible for the dev to add the space and then + # have us fail on the next iteration. I choose not to address that.) + if ($token =~ /,/) { + warn "$ME: $path: please add space after comma: '$token'\n"; + ++$Errs; + next TOKEN; + } + + # Each token must be of the form '**something**' + if ($token !~ s/^\*\*(.*)\*\*$/$1/) { + if ($token =~ /\*\*/) { + warn "$ME: $path: '$token' has asterisks in the wrong place\n"; + } + else { + warn "$ME: $path: '$token' should be bracketed by '**'\n"; + } + ++$Errs; + next TOKEN; + } + + # Is it a markdown link? + if ($token =~ /^\[(\S+)\]\((\S+)\)$/) { + my ($name, $link) = ($1, $2); + if ($name =~ /^(.*)\((\d)\)$/) { + my ($base, $section) = ($1, $2); + if (-e "$Markdown_Path/$base.$section.md" || -e "$Markdown_Path/links/$base.$section") { + if ($link ne "$base.$section.md") { + warn "$ME: $path: inconsistent link $name -> $link, expected $base.$section.md\n"; + ++$Errs; + } + } + else { + if (! _is_valid_external_link($base, $section, $link)) { + warn "$ME: $path: invalid link $name -> $link\n"; + ++$Errs; + } + } + } + else { + warn "$ME: $path: could not parse '$name' as 'manpage(N)'\n"; + ++$Errs; + } + } + + # Not a markdown link; it must be a plain man reference, e.g. 'foo(5)' + elsif ($token =~ m!^(\S+)\((\d+)\)$!) { + my ($base, $section) = ($1, $2); + + # Unadorned 'podman-foo(1)' must be a link. + if (-e "$Markdown_Path/$base.$section.md" || -e "$Markdown_Path/links/$base.$section") { + warn "$ME: $path: '$token' should be '[$token]($base.$section.md)'\n"; + ++$Errs; + } + + # Link to man page foo(5) but without a link. This is not an error + # but Ed may sometimes want to see those on a manual test run. + warn "$ME: $path: plain '$token' would be so much nicer as a link\n" + if $verbose; + } + else { + warn "$ME: $path: invalid token '$token'\n"; + ++$Errs; + } + } +} + +############################# +# _is_valid_external_link # Tries to validate links to external man pages +############################# +# +# This performs no actual fetches, so we can't actually check for 404. +# All we do is ensure that links conform to standard patterns. This is +# good for catching things like 'conmon(8)' pointing to a .5 URL, or +# linking to .md instead of .html. +# +# FIXME: we could actually rewrite this so as to offer hints on what to fix. +# That's a lot of work, and a lot of convoluted code, for questionable ROI. +# +sub _is_valid_external_link { + my ($base, $section, $link) = @_; + + return 1 if $link =~ m!^https://github\.com/\S+/blob/(main|master)(/.*)?/\Q$base\E\.$section\.md!; + + return 1 if $link =~ m!^https://.*unix\.com/man-page/(linux|redhat)/$section/$base$!; + return 1 if $link eq "https://man7\.org/linux/man-pages/man$section/$base\.$section\.html"; + + if ($base =~ /systemd/) { + return 1 if $link eq "https://www.freedesktop.org/software/systemd/man/$base.html"; + } + + return; +} + + + + +# END sanity checking of SEE ALSO links +############################################################################### 1; -- cgit v1.2.3-54-g00ecf