summaryrefslogtreecommitdiff
path: root/tools/docs/documentation-file-ref-check
diff options
context:
space:
mode:
authorJonathan Corbet <corbet@lwn.net>2025-11-03 16:25:22 -0700
committerJonathan Corbet <corbet@lwn.net>2025-11-03 16:25:22 -0700
commit77a22121fe17fe78123d345350e0e301de7aed99 (patch)
treeab34024a9005f367ff84bdbfd666473ce25b844a /tools/docs/documentation-file-ref-check
parente849217cf376ece0f43e7a454d9e80a1a337d9b0 (diff)
parent683e8cbaba7f0baf94a774ee17a1c0ddf3b243b4 (diff)
Merge branch 'tools-final2' into docs-mw
Our documentation-related tools are spread out over various directories; several are buried in the scripts/ dumping ground. That makes them harder to discover and harder to maintain. Recent work has started accumulating our documentation-related tools in /tools/docs. This series nearly completes that task, moving most of the rest of our various utilities there, hopefully fixing up all of the relevant references in the process. The one exception is scripts/kernel-doc; that move turned up some other problems, so I have dropped it until those are ironed out. At the end, rather than move the old, Perl kernel-doc, I simply removed it.
Diffstat (limited to 'tools/docs/documentation-file-ref-check')
-rwxr-xr-xtools/docs/documentation-file-ref-check245
1 files changed, 245 insertions, 0 deletions
diff --git a/tools/docs/documentation-file-ref-check b/tools/docs/documentation-file-ref-check
new file mode 100755
index 000000000000..0cad42f6943b
--- /dev/null
+++ b/tools/docs/documentation-file-ref-check
@@ -0,0 +1,245 @@
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-2.0
+#
+# Treewide grep for references to files under Documentation, and report
+# non-existing files in stderr.
+
+use warnings;
+use strict;
+use Getopt::Long qw(:config no_auto_abbrev);
+
+# NOTE: only add things here when the file was gone, but the text wants
+# to mention a past documentation file, for example, to give credits for
+# the original work.
+my %false_positives = (
+ "Documentation/scsi/scsi_mid_low_api.rst" => "Documentation/Configure.help",
+ "drivers/vhost/vhost.c" => "Documentation/virtual/lguest/lguest.c",
+);
+
+my $scriptname = $0;
+$scriptname =~ s,tools/docs/([^/]+/),$1,;
+
+# Parse arguments
+my $help = 0;
+my $fix = 0;
+my $warn = 0;
+
+if (! -e ".git") {
+ printf "Warning: can't check if file exists, as this is not a git tree\n";
+ exit 0;
+}
+
+GetOptions(
+ 'fix' => \$fix,
+ 'warn' => \$warn,
+ 'h|help|usage' => \$help,
+);
+
+if ($help != 0) {
+ print "$scriptname [--help] [--fix]\n";
+ exit -1;
+}
+
+# Step 1: find broken references
+print "Finding broken references. This may take a while... " if ($fix);
+
+my %broken_ref;
+
+my $doc_fix = 0;
+
+open IN, "git grep ':doc:\`' Documentation/|"
+ or die "Failed to run git grep";
+while (<IN>) {
+ next if (!m,^([^:]+):.*\:doc\:\`([^\`]+)\`,);
+ next if (m,sphinx/,);
+
+ my $file = $1;
+ my $d = $1;
+ my $doc_ref = $2;
+
+ my $f = $doc_ref;
+
+ $d =~ s,(.*/).*,$1,;
+ $f =~ s,.*\<([^\>]+)\>,$1,;
+
+ if ($f =~ m,^/,) {
+ $f = "$f.rst";
+ $f =~ s,^/,Documentation/,;
+ } else {
+ $f = "$d$f.rst";
+ }
+
+ next if (grep -e, glob("$f"));
+
+ if ($fix && !$doc_fix) {
+ print STDERR "\nWARNING: Currently, can't fix broken :doc:`` fields\n";
+ }
+ $doc_fix++;
+
+ print STDERR "$file: :doc:`$doc_ref`\n";
+}
+close IN;
+
+open IN, "git grep 'Documentation/'|"
+ or die "Failed to run git grep";
+while (<IN>) {
+ next if (!m/^([^:]+):(.*)/);
+
+ my $f = $1;
+ my $ln = $2;
+
+ # On linux-next, discard the Next/ directory
+ next if ($f =~ m,^Next/,);
+
+ # Makefiles and scripts contain nasty expressions to parse docs
+ next if ($f =~ m/Makefile/ || $f =~ m/\.(sh|py|pl|~|rej|org|orig)$/);
+
+ # It doesn't make sense to parse hidden files
+ next if ($f =~ m#/\.#);
+
+ # Skip this script
+ next if ($f eq $scriptname);
+
+ # Ignore the dir where documentation will be built
+ next if ($ln =~ m,\b(\S*)Documentation/output,);
+
+ if ($ln =~ m,\b(\S*)(Documentation/[A-Za-z0-9\_\.\,\~/\*\[\]\?+-]*)(.*),) {
+ my $prefix = $1;
+ my $ref = $2;
+ my $base = $2;
+ my $extra = $3;
+
+ # some file references are like:
+ # /usr/src/linux/Documentation/DMA-{API,mapping}.txt
+ # For now, ignore them
+ next if ($extra =~ m/^{/);
+
+ # Remove footnotes at the end like:
+ # Documentation/devicetree/dt-object-internal.txt[1]
+ $ref =~ s/(txt|rst)\[\d+]$/$1/;
+
+ # Remove ending ']' without any '['
+ $ref =~ s/\].*// if (!($ref =~ m/\[/));
+
+ # Remove puntuation marks at the end
+ $ref =~ s/[\,\.]+$//;
+
+ my $fulref = "$prefix$ref";
+
+ $fulref =~ s/^(\<file|ref)://;
+ $fulref =~ s/^[\'\`]+//;
+ $fulref =~ s,^\$\(.*\)/,,;
+ $base =~ s,.*/,,;
+
+ # Remove URL false-positives
+ next if ($fulref =~ m/^http/);
+
+ # Remove sched-pelt false-positive
+ next if ($fulref =~ m,^Documentation/scheduler/sched-pelt$,);
+
+ # Discard some build examples from Documentation/target/tcm_mod_builder.rst
+ next if ($fulref =~ m,mnt/sdb/lio-core-2.6.git/Documentation/target,);
+
+ # Check if exists, evaluating wildcards
+ next if (grep -e, glob("$ref $fulref"));
+
+ # Accept relative Documentation patches for tools/
+ if ($f =~ m/tools/) {
+ my $path = $f;
+ $path =~ s,(.*)/.*,$1,;
+ $path =~ s,testing/selftests/bpf,bpf/bpftool,;
+ next if (grep -e, glob("$path/$ref $path/../$ref $path/$fulref"));
+ }
+
+ # Discard known false-positives
+ if (defined($false_positives{$f})) {
+ next if ($false_positives{$f} eq $fulref);
+ }
+
+ if ($fix) {
+ if (!($ref =~ m/(scripts|Kconfig|Kbuild)/)) {
+ $broken_ref{$ref}++;
+ }
+ } elsif ($warn) {
+ print STDERR "Warning: $f references a file that doesn't exist: $fulref\n";
+ } else {
+ print STDERR "$f: $fulref\n";
+ }
+ }
+}
+close IN;
+
+exit 0 if (!$fix);
+
+# Step 2: Seek for file name alternatives
+print "Auto-fixing broken references. Please double-check the results\n";
+
+foreach my $ref (keys %broken_ref) {
+ my $new =$ref;
+
+ my $basedir = ".";
+ # On translations, only seek inside the translations directory
+ $basedir = $1 if ($ref =~ m,(Documentation/translations/[^/]+),);
+
+ # get just the basename
+ $new =~ s,.*/,,;
+
+ my $f="";
+
+ # usual reason for breakage: DT file moved around
+ if ($ref =~ /devicetree/) {
+ # usual reason for breakage: DT file renamed to .yaml
+ if (!$f) {
+ my $new_ref = $ref;
+ $new_ref =~ s/\.txt$/.yaml/;
+ $f=$new_ref if (-f $new_ref);
+ }
+
+ if (!$f) {
+ my $search = $new;
+ $search =~ s,^.*/,,;
+ $f = qx(find Documentation/devicetree/ -iname "*$search*") if ($search);
+ if (!$f) {
+ # Manufacturer name may have changed
+ $search =~ s/^.*,//;
+ $f = qx(find Documentation/devicetree/ -iname "*$search*") if ($search);
+ }
+ }
+ }
+
+ # usual reason for breakage: file renamed to .rst
+ if (!$f) {
+ $new =~ s/\.txt$/.rst/;
+ $f=qx(find $basedir -iname $new) if ($new);
+ }
+
+ # usual reason for breakage: use dash or underline
+ if (!$f) {
+ $new =~ s/[-_]/[-_]/g;
+ $f=qx(find $basedir -iname $new) if ($new);
+ }
+
+ # Wild guess: seek for the same name on another place
+ if (!$f) {
+ $f = qx(find $basedir -iname $new) if ($new);
+ }
+
+ my @find = split /\s+/, $f;
+
+ if (!$f) {
+ print STDERR "ERROR: Didn't find a replacement for $ref\n";
+ } elsif (scalar(@find) > 1) {
+ print STDERR "WARNING: Won't auto-replace, as found multiple files close to $ref:\n";
+ foreach my $j (@find) {
+ $j =~ s,^./,,;
+ print STDERR " $j\n";
+ }
+ } else {
+ $f = $find[0];
+ $f =~ s,^./,,;
+ print "INFO: Replacing $ref to $f\n";
+ foreach my $j (qx(git grep -l $ref)) {
+ qx(sed "s\@$ref\@$f\@g" -i $j);
+ }
+ }
+}