From 778b8ebe5192e7a7f00563a7456517dfa63e1d90 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 10 Nov 2025 15:04:29 -0700 Subject: docs: Move the python libraries to tools/lib/python "scripts/lib" was always a bit of an awkward place for Python modules. We already have tools/lib; create a tools/lib/python, move the libraries there, and update the users accordingly. While at it, move the contents of tools/docs/lib. Rather than make another directory, just put these documentation-oriented modules under "kdoc". Signed-off-by: Jonathan Corbet Message-ID: <20251110220430.726665-2-corbet@lwn.net> --- tools/lib/python/kdoc/kdoc_re.py | 270 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 270 insertions(+) create mode 100644 tools/lib/python/kdoc/kdoc_re.py (limited to 'tools/lib/python/kdoc/kdoc_re.py') diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py new file mode 100644 index 000000000000..612223e1e723 --- /dev/null +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -0,0 +1,270 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab . + +""" +Regular expression ancillary classes. + +Those help caching regular expressions and do matching for kernel-doc. +""" + +import re + +# Local cache for regular expressions +re_cache = {} + + +class KernRe: + """ + Helper class to simplify regex declaration and usage, + + It calls re.compile for a given pattern. It also allows adding + regular expressions and define sub at class init time. + + Regular expressions can be cached via an argument, helping to speedup + searches. + """ + + def _add_regex(self, string, flags): + """ + Adds a new regex or re-use it from the cache. + """ + self.regex = re_cache.get(string, None) + if not self.regex: + self.regex = re.compile(string, flags=flags) + if self.cache: + re_cache[string] = self.regex + + def __init__(self, string, cache=True, flags=0): + """ + Compile a regular expression and initialize internal vars. + """ + + self.cache = cache + self.last_match = None + + self._add_regex(string, flags) + + def __str__(self): + """ + Return the regular expression pattern. + """ + return self.regex.pattern + + def __add__(self, other): + """ + Allows adding two regular expressions into one. + """ + + return KernRe(str(self) + str(other), cache=self.cache or other.cache, + flags=self.regex.flags | other.regex.flags) + + def match(self, string): + """ + Handles a re.match storing its results + """ + + self.last_match = self.regex.match(string) + return self.last_match + + def search(self, string): + """ + Handles a re.search storing its results + """ + + self.last_match = self.regex.search(string) + return self.last_match + + def findall(self, string): + """ + Alias to re.findall + """ + + return self.regex.findall(string) + + def split(self, string): + """ + Alias to re.split + """ + + return self.regex.split(string) + + def sub(self, sub, string, count=0): + """ + Alias to re.sub + """ + + return self.regex.sub(sub, string, count=count) + + def group(self, num): + """ + Returns the group results of the last match + """ + + return self.last_match.group(num) + + +class NestedMatch: + """ + Finding nested delimiters is hard with regular expressions. It is + even harder on Python with its normal re module, as there are several + advanced regular expressions that are missing. + + This is the case of this pattern: + + '\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;' + + which is used to properly match open/close parenthesis of the + string search STRUCT_GROUP(), + + Add a class that counts pairs of delimiters, using it to match and + replace nested expressions. + + The original approach was suggested by: + https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex + + Although I re-implemented it to make it more generic and match 3 types + of delimiters. The logic checks if delimiters are paired. If not, it + will ignore the search string. + """ + + # TODO: make NestedMatch handle multiple match groups + # + # Right now, regular expressions to match it are defined only up to + # the start delimiter, e.g.: + # + # \bSTRUCT_GROUP\( + # + # is similar to: STRUCT_GROUP\((.*)\) + # except that the content inside the match group is delimiter's aligned. + # + # The content inside parenthesis are converted into a single replace + # group (e.g. r`\1'). + # + # It would be nice to change such definition to support multiple + # match groups, allowing a regex equivalent to. + # + # FOO\((.*), (.*), (.*)\) + # + # it is probably easier to define it not as a regular expression, but + # with some lexical definition like: + # + # FOO(arg1, arg2, arg3) + + DELIMITER_PAIRS = { + '{': '}', + '(': ')', + '[': ']', + } + + RE_DELIM = re.compile(r'[\{\}\[\]\(\)]') + + def _search(self, regex, line): + """ + Finds paired blocks for a regex that ends with a delimiter. + + The suggestion of using finditer to match pairs came from: + https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex + but I ended using a different implementation to align all three types + of delimiters and seek for an initial regular expression. + + The algorithm seeks for open/close paired delimiters and place them + into a stack, yielding a start/stop position of each match when the + stack is zeroed. + + The algorithm shoud work fine for properly paired lines, but will + silently ignore end delimiters that preceeds an start delimiter. + This should be OK for kernel-doc parser, as unaligned delimiters + would cause compilation errors. So, we don't need to rise exceptions + to cover such issues. + """ + + stack = [] + + for match_re in regex.finditer(line): + start = match_re.start() + offset = match_re.end() + + d = line[offset - 1] + if d not in self.DELIMITER_PAIRS: + continue + + end = self.DELIMITER_PAIRS[d] + stack.append(end) + + for match in self.RE_DELIM.finditer(line[offset:]): + pos = match.start() + offset + + d = line[pos] + + if d in self.DELIMITER_PAIRS: + end = self.DELIMITER_PAIRS[d] + + stack.append(end) + continue + + # Does the end delimiter match what it is expected? + if stack and d == stack[-1]: + stack.pop() + + if not stack: + yield start, offset, pos + 1 + break + + def search(self, regex, line): + """ + This is similar to re.search: + + It matches a regex that it is followed by a delimiter, + returning occurrences only if all delimiters are paired. + """ + + for t in self._search(regex, line): + + yield line[t[0]:t[2]] + + def sub(self, regex, sub, line, count=0): + """ + This is similar to re.sub: + + It matches a regex that it is followed by a delimiter, + replacing occurrences only if all delimiters are paired. + + if r'\1' is used, it works just like re: it places there the + matched paired data with the delimiter stripped. + + If count is different than zero, it will replace at most count + items. + """ + out = "" + + cur_pos = 0 + n = 0 + + for start, end, pos in self._search(regex, line): + out += line[cur_pos:start] + + # Value, ignoring start/end delimiters + value = line[end:pos - 1] + + # replaces \1 at the sub string, if \1 is used there + new_sub = sub + new_sub = new_sub.replace(r'\1', value) + + out += new_sub + + # Drop end ';' if any + if line[pos] == ';': + pos += 1 + + cur_pos = pos + n += 1 + + if count and count >= n: + break + + # Append the remaining string + l = len(line) + out += line[cur_pos:l] + + return out -- cgit v1.2.3 From 5f88f44d8427a97347afda3a6114aed0df472a0b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 23 Nov 2025 20:10:11 -0800 Subject: docs: kdoc: various fixes for grammar, spelling, punctuation Correct grammar, spelling, and punctuation in comments, strings, print messages, logs. Change two instances of two spaces between words to just one space. codespell was used to find misspelled words. Signed-off-by: Randy Dunlap Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Cc: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <20251124041011.3030571-1-rdunlap@infradead.org> --- tools/lib/python/kdoc/kdoc_files.py | 4 ++-- tools/lib/python/kdoc/kdoc_output.py | 8 ++++---- tools/lib/python/kdoc/kdoc_parser.py | 14 +++++++------- tools/lib/python/kdoc/kdoc_re.py | 24 ++++++++++++------------ tools/lib/python/kdoc/python_version.py | 2 +- 5 files changed, 26 insertions(+), 26 deletions(-) (limited to 'tools/lib/python/kdoc/kdoc_re.py') diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py index 562cdf5261c3..bfe02baf1606 100644 --- a/tools/lib/python/kdoc/kdoc_files.py +++ b/tools/lib/python/kdoc/kdoc_files.py @@ -64,7 +64,7 @@ class GlobSourceFiles: def parse_files(self, file_list, file_not_found_cb): """ - Define an interator to parse all source files from file_list, + Define an iterator to parse all source files from file_list, handling directories if any """ @@ -229,7 +229,7 @@ class KernelFiles(): Return output messages from a file name using the output style filtering. - If output type was not handled by the syler, return None. + If output type was not handled by the styler, return None. """ # NOTE: we can add rules here to filter out unwanted parts, diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py index 14378953301b..b1aaa7fc3604 100644 --- a/tools/lib/python/kdoc/kdoc_output.py +++ b/tools/lib/python/kdoc/kdoc_output.py @@ -8,7 +8,7 @@ Implement output filters to print kernel-doc documentation. The implementation uses a virtual base class (OutputFormat) which -contains a dispatches to virtual methods, and some code to filter +contains dispatches to virtual methods, and some code to filter out output messages. The actual implementation is done on one separate class per each type @@ -59,7 +59,7 @@ class OutputFormat: OUTPUT_EXPORTED = 2 # output exported symbols OUTPUT_INTERNAL = 3 # output non-exported symbols - # Virtual member to be overriden at the inherited classes + # Virtual member to be overridden at the inherited classes highlights = [] def __init__(self): @@ -85,7 +85,7 @@ class OutputFormat: def set_filter(self, export, internal, symbol, nosymbol, function_table, enable_lineno, no_doc_sections): """ - Initialize filter variables according with the requested mode. + Initialize filter variables according to the requested mode. Only one choice is valid between export, internal and symbol. @@ -208,7 +208,7 @@ class OutputFormat: return self.data # Warn if some type requires an output logic - self.config.log.warning("doesn't now how to output '%s' block", + self.config.log.warning("doesn't know how to output '%s' block", dtype) return None diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index a32682bc050b..500aafc50032 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -22,8 +22,8 @@ from kdoc.kdoc_item import KdocItem # # Regular expressions used to parse kernel-doc markups at KernelDoc class. # -# Let's declare them in lowercase outside any class to make easier to -# convert from the python script. +# Let's declare them in lowercase outside any class to make it easier to +# convert from the Perl script. # # As those are evaluated at the beginning, no need to cache them # @@ -135,7 +135,7 @@ struct_xforms = [ # TODO: use NestedMatch for FOO($1, $2, ...) matches # # it is better to also move those to the NestedMatch logic, - # to ensure that parenthesis will be properly matched. + # to ensure that parentheses will be properly matched. # (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), @@ -155,7 +155,7 @@ struct_xforms = [ (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), ] # -# Regexes here are guaranteed to have the end limiter matching +# Regexes here are guaranteed to have the end delimiter matching # the start delimiter. Yet, right now, only one replace group # is allowed. # @@ -815,7 +815,7 @@ class KernelDoc: def dump_struct(self, ln, proto): """ - Store an entry for an struct or union + Store an entry for a struct or union """ # # Do the basic parse to get the pieces of the declaration. @@ -944,7 +944,7 @@ class KernelDoc: def dump_function(self, ln, prototype): """ - Stores a function of function macro inside self.entries array. + Stores a function or function macro inside self.entries array. """ found = func_macro = False @@ -1179,7 +1179,7 @@ class KernelDoc: # else: self.emit_msg(ln, - f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}") + f"This comment starts with '/**', but isn't a kernel-doc comment. Refer to Documentation/doc-guide/kernel-doc.rst\n{line}") self.state = state.NORMAL return # diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py index 612223e1e723..2dfa1bf83d64 100644 --- a/tools/lib/python/kdoc/kdoc_re.py +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -16,7 +16,7 @@ re_cache = {} class KernRe: """ - Helper class to simplify regex declaration and usage, + Helper class to simplify regex declaration and usage. It calls re.compile for a given pattern. It also allows adding regular expressions and define sub at class init time. @@ -27,7 +27,7 @@ class KernRe: def _add_regex(self, string, flags): """ - Adds a new regex or re-use it from the cache. + Adds a new regex or reuses it from the cache. """ self.regex = re_cache.get(string, None) if not self.regex: @@ -114,7 +114,7 @@ class NestedMatch: '\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;' - which is used to properly match open/close parenthesis of the + which is used to properly match open/close parentheses of the string search STRUCT_GROUP(), Add a class that counts pairs of delimiters, using it to match and @@ -136,13 +136,13 @@ class NestedMatch: # \bSTRUCT_GROUP\( # # is similar to: STRUCT_GROUP\((.*)\) - # except that the content inside the match group is delimiter's aligned. + # except that the content inside the match group is delimiter-aligned. # - # The content inside parenthesis are converted into a single replace + # The content inside parentheses is converted into a single replace # group (e.g. r`\1'). # # It would be nice to change such definition to support multiple - # match groups, allowing a regex equivalent to. + # match groups, allowing a regex equivalent to: # # FOO\((.*), (.*), (.*)\) # @@ -168,14 +168,14 @@ class NestedMatch: but I ended using a different implementation to align all three types of delimiters and seek for an initial regular expression. - The algorithm seeks for open/close paired delimiters and place them - into a stack, yielding a start/stop position of each match when the + The algorithm seeks for open/close paired delimiters and places them + into a stack, yielding a start/stop position of each match when the stack is zeroed. - The algorithm shoud work fine for properly paired lines, but will - silently ignore end delimiters that preceeds an start delimiter. + The algorithm should work fine for properly paired lines, but will + silently ignore end delimiters that precede a start delimiter. This should be OK for kernel-doc parser, as unaligned delimiters - would cause compilation errors. So, we don't need to rise exceptions + would cause compilation errors. So, we don't need to raise exceptions to cover such issues. """ @@ -203,7 +203,7 @@ class NestedMatch: stack.append(end) continue - # Does the end delimiter match what it is expected? + # Does the end delimiter match what is expected? if stack and d == stack[-1]: stack.pop() diff --git a/tools/lib/python/kdoc/python_version.py b/tools/lib/python/kdoc/python_version.py index 4fde1b882164..e83088013db2 100644 --- a/tools/lib/python/kdoc/python_version.py +++ b/tools/lib/python/kdoc/python_version.py @@ -139,7 +139,7 @@ class PythonVersion: available_versions = PythonVersion.find_python(min_version) if not available_versions: - print(f"ERROR: Python version {python_ver} is not spported anymore\n") + print(f"ERROR: Python version {python_ver} is not supported anymore\n") print(" Can't find a new version. This script may fail") return -- cgit v1.2.3