From 7d11c7aa5d627c8d85e2ac2d40bfafcc78f07e95 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Sat, 25 Apr 2026 18:15:15 +0300 Subject: [PATCH 1/3] Add colour to tokenize CLI output --- Doc/library/tokenize.rst | 12 +++-- Doc/whatsnew/3.15.rst | 9 ++++ Lib/_colorize.py | 12 +++++ Lib/test/test_tokenize.py | 1 + Lib/tokenize.py | 52 +++++++++++++++++-- ...-04-25-18-09-16.gh-issue-148991.AZ64Et.rst | 1 + 6 files changed, 79 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2026-04-25-18-09-16.gh-issue-148991.AZ64Et.rst diff --git a/Doc/library/tokenize.rst b/Doc/library/tokenize.rst index 3db4cf42c17f3d..72fbcaba160660 100644 --- a/Doc/library/tokenize.rst +++ b/Doc/library/tokenize.rst @@ -28,7 +28,7 @@ type can be determined by checking the ``exact_type`` property on the **undefined** when providing invalid Python code and it can change at any point. -Tokenizing Input +Tokenizing input ---------------- The primary entry point is a :term:`generator`: @@ -146,7 +146,7 @@ function it uses to do this is available: .. _tokenize-cli: -Command-Line Usage +Command-line usage ------------------ .. versionadded:: 3.3 @@ -173,8 +173,12 @@ The following options are accepted: If :file:`filename.py` is specified its contents are tokenized to stdout. Otherwise, tokenization is performed on stdin. +.. versionadded:: next + Output is in color by default and can be + :ref:`controlled using environment variables `. + Examples ------------------- +-------- Example of a script rewriter that transforms float literals into Decimal objects:: @@ -227,7 +231,7 @@ Example of tokenizing from the command line. The script:: will be tokenized to the following output where the first column is the range of the line/column coordinates where the token is found, the second column is -the name of the token, and the final column is the value of the token (if any) +the name of the token, and the final column is the value of the token (if any): .. code-block:: shell-session diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 9ccd63bd8795f9..1c0f7f117898c7 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -1217,6 +1217,15 @@ tkinter (Contributed by Matthias Kievernagel and Serhiy Storchaka in :gh:`47655`.) +tokenize +-------- + +* The output of the :mod:`tokenize` :ref:`command-line interface + ` is colored by default. This can be controlled with + :ref:`environment variables `. + (Contributed by Hugo van Kemenade in :gh:`148991`.) + + .. _whatsnew315-tomllib-1-1-0: tomllib diff --git a/Lib/_colorize.py b/Lib/_colorize.py index 852ad38f08618e..435af0bb56ae9f 100644 --- a/Lib/_colorize.py +++ b/Lib/_colorize.py @@ -375,6 +375,14 @@ class Timeit(ThemeSection): reset: str = ANSIColors.RESET +@dataclass(frozen=True, kw_only=True) +class Tokenize(ThemeSection): + whitespace: str = ANSIColors.GREY + error: str = ANSIColors.BOLD_RED + position: str = ANSIColors.GREY + delimiter: str = ANSIColors.RESET + + @dataclass(frozen=True, kw_only=True) class Traceback(ThemeSection): type: str = ANSIColors.BOLD_MAGENTA @@ -411,6 +419,7 @@ class Theme: live_profiler: LiveProfiler = field(default_factory=LiveProfiler) syntax: Syntax = field(default_factory=Syntax) timeit: Timeit = field(default_factory=Timeit) + tokenize: Tokenize = field(default_factory=Tokenize) traceback: Traceback = field(default_factory=Traceback) unittest: Unittest = field(default_factory=Unittest) @@ -424,6 +433,7 @@ def copy_with( live_profiler: LiveProfiler | None = None, syntax: Syntax | None = None, timeit: Timeit | None = None, + tokenize: Tokenize | None = None, traceback: Traceback | None = None, unittest: Unittest | None = None, ) -> Self: @@ -440,6 +450,7 @@ def copy_with( live_profiler=live_profiler or self.live_profiler, syntax=syntax or self.syntax, timeit=timeit or self.timeit, + tokenize=tokenize or self.tokenize, traceback=traceback or self.traceback, unittest=unittest or self.unittest, ) @@ -460,6 +471,7 @@ def no_colors(cls) -> Self: live_profiler=LiveProfiler.no_colors(), syntax=Syntax.no_colors(), timeit=Timeit.no_colors(), + tokenize=Tokenize.no_colors(), traceback=Traceback.no_colors(), unittest=Unittest.no_colors(), ) diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index ca67e381958757..ab53a20cff5539 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -3326,6 +3326,7 @@ def test_newline_at_the_end_of_buffer(self): run_test_script(file_name) +@support.force_not_colorized_test_class class CommandLineTest(unittest.TestCase): def setUp(self): self.filename = tempfile.mktemp() diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 11c134482db024..180b89dcd90335 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -35,6 +35,7 @@ from token import * from token import EXACT_TOKEN_TYPES import _tokenize +lazy import _colorize cookie_re = re.compile(br'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII) blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII) @@ -505,6 +506,32 @@ def generate_tokens(readline): """ return _generate_tokens_from_c_tokenizer(readline, extra_tokens=True) + +def _get_token_colors(syntax, tokenize): + """Map token type numbers to theme colors.""" + return frozendict({ + COMMENT: syntax.comment, + DEDENT: tokenize.whitespace, + ENCODING: tokenize.whitespace, + ENDMARKER: tokenize.whitespace, + ERRORTOKEN: tokenize.error, + FSTRING_START: syntax.string, + FSTRING_MIDDLE: syntax.string, + FSTRING_END: syntax.string, + INDENT: tokenize.whitespace, + NAME: syntax.reset, + NEWLINE: tokenize.whitespace, + NL: tokenize.whitespace, + NUMBER: syntax.number, + OP: syntax.op, + SOFT_KEYWORD: syntax.soft_keyword, + STRING: syntax.string, + TSTRING_START: syntax.string, + TSTRING_MIDDLE: syntax.string, + TSTRING_END: syntax.string, + }) + + def _main(args=None): import argparse @@ -524,7 +551,7 @@ def error(message, filename=None, location=None): sys.exit(1) # Parse the arguments and options - parser = argparse.ArgumentParser(color=True) + parser = argparse.ArgumentParser() parser.add_argument(dest='filename', nargs='?', metavar='filename.py', help='the file to tokenize; defaults to stdin') @@ -545,13 +572,30 @@ def error(message, filename=None, location=None): # Output the tokenization + _theme = _colorize.get_theme() + s = _theme.syntax + t = _theme.tokenize + _token_colors = _get_token_colors(s, t) for token in tokens: token_type = token.type if args.exact: token_type = token.exact_type - token_range = "%d,%d-%d,%d:" % (token.start + token.end) - print("%-20s%-15s%-15r" % - (token_range, tok_name[token_type], token.string)) + token_range = ( + f"{t.position}{token.start[0]}" + f"{t.delimiter},{t.position}{token.start[1]}" + f"{t.delimiter}-" + f"{t.position}{token.end[0]}" + f"{t.delimiter},{t.position}{token.end[1]}" + f"{t.delimiter}:" + ) + color = _token_colors.get(token_type, s.reset) + token_name = tok_name[token_type] + visible_range = f"{token.start[0]},{token.start[1]}-{token.end[0]},{token.end[1]}:" + print( + f"{token_range}{' ' * (20 - len(visible_range))}" + f"{color}{token_name:<15}" + f"{s.reset}{token.string!r:<15}" + ) except IndentationError as err: line, column = err.args[1][1:3] error(err.args[0], filename, (line, column)) diff --git a/Misc/NEWS.d/next/Library/2026-04-25-18-09-16.gh-issue-148991.AZ64Et.rst b/Misc/NEWS.d/next/Library/2026-04-25-18-09-16.gh-issue-148991.AZ64Et.rst new file mode 100644 index 00000000000000..336ed42e51f1b8 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-04-25-18-09-16.gh-issue-148991.AZ64Et.rst @@ -0,0 +1 @@ +Add colour to :mod:`tokenize` CLI output. Patch by Hugo van Kemenade. From 1a0039ab8431ea71b898435586618f755d9c34c5 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Sat, 25 Apr 2026 16:41:18 +0100 Subject: [PATCH 2/3] Move to `_print_tokens()` --- Lib/tokenize.py | 50 +++++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 180b89dcd90335..47d79c653c938b 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -532,6 +532,31 @@ def _get_token_colors(syntax, tokenize): }) +def _print_tokens(tokens, *, color=False, exact=False): + theme = _colorize.get_theme(force_no_color=not color) + s = theme.syntax + t = theme.tokenize + token_colors = _get_token_colors(s, t) + for token in tokens: + token_type = token.exact_type if exact else token.type + token_range = ( + f"{t.position}{token.start[0]}" + f"{t.delimiter},{t.position}{token.start[1]}" + f"{t.delimiter}-" + f"{t.position}{token.end[0]}" + f"{t.delimiter},{t.position}{token.end[1]}" + f"{t.delimiter}:" + ) + token_color = token_colors.get(token_type, s.reset) + token_name = tok_name[token_type] + visible_range = f"{token.start[0]},{token.start[1]}-{token.end[0]},{token.end[1]}:" + print( + f"{token_range}{' ' * (20 - len(visible_range))}" + f"{token_color}{token_name:<15}" + f"{s.reset}{token.string!r:<15}" + ) + + def _main(args=None): import argparse @@ -572,30 +597,7 @@ def error(message, filename=None, location=None): # Output the tokenization - _theme = _colorize.get_theme() - s = _theme.syntax - t = _theme.tokenize - _token_colors = _get_token_colors(s, t) - for token in tokens: - token_type = token.type - if args.exact: - token_type = token.exact_type - token_range = ( - f"{t.position}{token.start[0]}" - f"{t.delimiter},{t.position}{token.start[1]}" - f"{t.delimiter}-" - f"{t.position}{token.end[0]}" - f"{t.delimiter},{t.position}{token.end[1]}" - f"{t.delimiter}:" - ) - color = _token_colors.get(token_type, s.reset) - token_name = tok_name[token_type] - visible_range = f"{token.start[0]},{token.start[1]}-{token.end[0]},{token.end[1]}:" - print( - f"{token_range}{' ' * (20 - len(visible_range))}" - f"{color}{token_name:<15}" - f"{s.reset}{token.string!r:<15}" - ) + _print_tokens(tokens, color=True, exact=args.exact) except IndentationError as err: line, column = err.args[1][1:3] error(err.args[0], filename, (line, column)) From 1dda979834571ab5bf7403546cac21f8c4f62978 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Sat, 25 Apr 2026 17:38:09 +0100 Subject: [PATCH 3/3] Yield instead --- Lib/tokenize.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 47d79c653c938b..9839c94d45e9ae 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -532,7 +532,7 @@ def _get_token_colors(syntax, tokenize): }) -def _print_tokens(tokens, *, color=False, exact=False): +def _format_tokens(tokens, *, color=False, exact=False): theme = _colorize.get_theme(force_no_color=not color) s = theme.syntax t = theme.tokenize @@ -550,7 +550,7 @@ def _print_tokens(tokens, *, color=False, exact=False): token_color = token_colors.get(token_type, s.reset) token_name = tok_name[token_type] visible_range = f"{token.start[0]},{token.start[1]}-{token.end[0]},{token.end[1]}:" - print( + yield ( f"{token_range}{' ' * (20 - len(visible_range))}" f"{token_color}{token_name:<15}" f"{s.reset}{token.string!r:<15}" @@ -597,7 +597,8 @@ def error(message, filename=None, location=None): # Output the tokenization - _print_tokens(tokens, color=True, exact=args.exact) + for line in _format_tokens(tokens, color=True, exact=args.exact): + print(line) except IndentationError as err: line, column = err.args[1][1:3] error(err.args[0], filename, (line, column))