From 7d11c7aa5d627c8d85e2ac2d40bfafcc78f07e95 Mon Sep 17 00:00:00 2001
From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com>
Date: Sat, 25 Apr 2026 18:15:15 +0300
Subject: [PATCH 1/3] Add colour to tokenize CLI output

---
 Doc/library/tokenize.rst                      | 12 +++--
 Doc/whatsnew/3.15.rst                         |  9 ++++
 Lib/_colorize.py                              | 12 +++++
 Lib/test/test_tokenize.py                     |  1 +
 Lib/tokenize.py                               | 52 +++++++++++++++++--
 ...-04-25-18-09-16.gh-issue-148991.AZ64Et.rst |  1 +
 6 files changed, 79 insertions(+), 8 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Library/2026-04-25-18-09-16.gh-issue-148991.AZ64Et.rst
diff --git a/Doc/library/tokenize.rst b/Doc/library/tokenize.rst
index 3db4cf42c17f3d..72fbcaba160660 100644
--- a/Doc/library/tokenize.rst
+++ b/Doc/library/tokenize.rst
@@ -28,7 +28,7 @@ type can be determined by checking the ``exact_type`` property on the
    **undefined** when providing invalid Python code and it can change at any
    point.
 
-Tokenizing Input
+Tokenizing input
 ----------------
 
 The primary entry point is a :term:`generator`:
@@ -146,7 +146,7 @@ function it uses to do this is available:
 
 .. _tokenize-cli:
 
-Command-Line Usage
+Command-line usage
 ------------------
 
 .. versionadded:: 3.3
@@ -173,8 +173,12 @@ The following options are accepted:
 If :file:`filename.py` is specified its contents are tokenized to stdout.
 Otherwise, tokenization is performed on stdin.
 
+.. versionadded:: next
+   Output is in color by default and can be
+   :ref:`controlled using environment variables <using-on-controlling-color>`.
+
 Examples
-------------------
+--------
 
 Example of a script rewriter that transforms float literals into Decimal
 objects::
@@ -227,7 +231,7 @@ Example of tokenizing from the command line.  The script::
 
 will be tokenized to the following output where the first column is the range
 of the line/column coordinates where the token is found, the second column is
-the name of the token, and the final column is the value of the token (if any)
+the name of the token, and the final column is the value of the token (if any):
 
 .. code-block:: shell-session
 
diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst
index 9ccd63bd8795f9..1c0f7f117898c7 100644
--- a/Doc/whatsnew/3.15.rst
+++ b/Doc/whatsnew/3.15.rst
@@ -1217,6 +1217,15 @@ tkinter
   (Contributed by Matthias Kievernagel and Serhiy Storchaka in :gh:`47655`.)
 
 
+tokenize
+--------
+
+* The output of the :mod:`tokenize` :ref:`command-line interface
+  <tokenize-cli>` is colored by default. This can be controlled with
+  :ref:`environment variables <using-on-controlling-color>`.
+  (Contributed by Hugo van Kemenade in :gh:`148991`.)
+
+
 .. _whatsnew315-tomllib-1-1-0:
 
 tomllib
diff --git a/Lib/_colorize.py b/Lib/_colorize.py
index 852ad38f08618e..435af0bb56ae9f 100644
--- a/Lib/_colorize.py
+++ b/Lib/_colorize.py
@@ -375,6 +375,14 @@ class Timeit(ThemeSection):
     reset: str = ANSIColors.RESET
 
 
+@dataclass(frozen=True, kw_only=True)
+class Tokenize(ThemeSection):
+    whitespace: str = ANSIColors.GREY
+    error: str = ANSIColors.BOLD_RED
+    position: str = ANSIColors.GREY
+    delimiter: str = ANSIColors.RESET
+
+
 @dataclass(frozen=True, kw_only=True)
 class Traceback(ThemeSection):
     type: str = ANSIColors.BOLD_MAGENTA
@@ -411,6 +419,7 @@ class Theme:
     live_profiler: LiveProfiler = field(default_factory=LiveProfiler)
     syntax: Syntax = field(default_factory=Syntax)
     timeit: Timeit = field(default_factory=Timeit)
+    tokenize: Tokenize = field(default_factory=Tokenize)
     traceback: Traceback = field(default_factory=Traceback)
     unittest: Unittest = field(default_factory=Unittest)
 
@@ -424,6 +433,7 @@ def copy_with(
         live_profiler: LiveProfiler | None = None,
         syntax: Syntax | None = None,
         timeit: Timeit | None = None,
+        tokenize: Tokenize | None = None,
         traceback: Traceback | None = None,
         unittest: Unittest | None = None,
     ) -> Self:
@@ -440,6 +450,7 @@ def copy_with(
             live_profiler=live_profiler or self.live_profiler,
             syntax=syntax or self.syntax,
             timeit=timeit or self.timeit,
+            tokenize=tokenize or self.tokenize,
             traceback=traceback or self.traceback,
             unittest=unittest or self.unittest,
         )
@@ -460,6 +471,7 @@ def no_colors(cls) -> Self:
             live_profiler=LiveProfiler.no_colors(),
             syntax=Syntax.no_colors(),
             timeit=Timeit.no_colors(),
+            tokenize=Tokenize.no_colors(),
             traceback=Traceback.no_colors(),
             unittest=Unittest.no_colors(),
         )
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index ca67e381958757..ab53a20cff5539 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -3326,6 +3326,7 @@ def test_newline_at_the_end_of_buffer(self):
             run_test_script(file_name)
 
 
+@support.force_not_colorized_test_class
 class CommandLineTest(unittest.TestCase):
     def setUp(self):
         self.filename = tempfile.mktemp()
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index 11c134482db024..180b89dcd90335 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -35,6 +35,7 @@
 from token import *
 from token import EXACT_TOKEN_TYPES
 import _tokenize
+lazy import _colorize
 
 cookie_re = re.compile(br'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
 blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
@@ -505,6 +506,32 @@ def generate_tokens(readline):
     """
     return _generate_tokens_from_c_tokenizer(readline, extra_tokens=True)
 
+
+def _get_token_colors(syntax, tokenize):
+    """Map token type numbers to theme colors."""
+    return frozendict({
+        COMMENT: syntax.comment,
+        DEDENT: tokenize.whitespace,
+        ENCODING: tokenize.whitespace,
+        ENDMARKER: tokenize.whitespace,
+        ERRORTOKEN: tokenize.error,
+        FSTRING_START: syntax.string,
+        FSTRING_MIDDLE: syntax.string,
+        FSTRING_END: syntax.string,
+        INDENT: tokenize.whitespace,
+        NAME: syntax.reset,
+        NEWLINE: tokenize.whitespace,
+        NL: tokenize.whitespace,
+        NUMBER: syntax.number,
+        OP: syntax.op,
+        SOFT_KEYWORD: syntax.soft_keyword,
+        STRING: syntax.string,
+        TSTRING_START: syntax.string,
+        TSTRING_MIDDLE: syntax.string,
+        TSTRING_END: syntax.string,
+    })
+
+
 def _main(args=None):
     import argparse
 
@@ -524,7 +551,7 @@ def error(message, filename=None, location=None):
         sys.exit(1)
 
     # Parse the arguments and options
-    parser = argparse.ArgumentParser(color=True)
+    parser = argparse.ArgumentParser()
     parser.add_argument(dest='filename', nargs='?',
                         metavar='filename.py',
                         help='the file to tokenize; defaults to stdin')
@@ -545,13 +572,30 @@ def error(message, filename=None, location=None):
 
 
         # Output the tokenization
+        _theme = _colorize.get_theme()
+        s = _theme.syntax
+        t = _theme.tokenize
+        _token_colors = _get_token_colors(s, t)
         for token in tokens:
             token_type = token.type
             if args.exact:
                 token_type = token.exact_type
-            token_range = "%d,%d-%d,%d:" % (token.start + token.end)
-            print("%-20s%-15s%-15r" %
-                  (token_range, tok_name[token_type], token.string))
+            token_range = (
+                f"{t.position}{token.start[0]}"
+                f"{t.delimiter},{t.position}{token.start[1]}"
+                f"{t.delimiter}-"
+                f"{t.position}{token.end[0]}"
+                f"{t.delimiter},{t.position}{token.end[1]}"
+                f"{t.delimiter}:"
+            )
+            color = _token_colors.get(token_type, s.reset)
+            token_name = tok_name[token_type]
+            visible_range = f"{token.start[0]},{token.start[1]}-{token.end[0]},{token.end[1]}:"
+            print(
+                f"{token_range}{' ' * (20 - len(visible_range))}"
+                f"{color}{token_name:<15}"
+                f"{s.reset}{token.string!r:<15}"
+            )
     except IndentationError as err:
         line, column = err.args[1][1:3]
         error(err.args[0], filename, (line, column))
diff --git a/Misc/NEWS.d/next/Library/2026-04-25-18-09-16.gh-issue-148991.AZ64Et.rst b/Misc/NEWS.d/next/Library/2026-04-25-18-09-16.gh-issue-148991.AZ64Et.rst
new file mode 100644
index 00000000000000..336ed42e51f1b8
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-04-25-18-09-16.gh-issue-148991.AZ64Et.rst
@@ -0,0 +1 @@
+Add colour to :mod:`tokenize` CLI output. Patch by Hugo van Kemenade.

From 1a0039ab8431ea71b898435586618f755d9c34c5 Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stan@python.org>
Date: Sat, 25 Apr 2026 16:41:18 +0100
Subject: [PATCH 2/3] Move to `_print_tokens()`

---
 Lib/tokenize.py | 50 +++++++++++++++++++++++++------------------------
 1 file changed, 26 insertions(+), 24 deletions(-)

diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index 180b89dcd90335..47d79c653c938b 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -532,6 +532,31 @@ def _get_token_colors(syntax, tokenize):
     })
 
 
+def _print_tokens(tokens, *, color=False, exact=False):
+    theme = _colorize.get_theme(force_no_color=not color)
+    s = theme.syntax
+    t = theme.tokenize
+    token_colors = _get_token_colors(s, t)
+    for token in tokens:
+        token_type = token.exact_type if exact else token.type
+        token_range = (
+            f"{t.position}{token.start[0]}"
+            f"{t.delimiter},{t.position}{token.start[1]}"
+            f"{t.delimiter}-"
+            f"{t.position}{token.end[0]}"
+            f"{t.delimiter},{t.position}{token.end[1]}"
+            f"{t.delimiter}:"
+        )
+        token_color = token_colors.get(token_type, s.reset)
+        token_name = tok_name[token_type]
+        visible_range = f"{token.start[0]},{token.start[1]}-{token.end[0]},{token.end[1]}:"
+        print(
+            f"{token_range}{' ' * (20 - len(visible_range))}"
+            f"{token_color}{token_name:<15}"
+            f"{s.reset}{token.string!r:<15}"
+        )
+
+
 def _main(args=None):
     import argparse
 
@@ -572,30 +597,7 @@ def error(message, filename=None, location=None):
 
 
         # Output the tokenization
-        _theme = _colorize.get_theme()
-        s = _theme.syntax
-        t = _theme.tokenize
-        _token_colors = _get_token_colors(s, t)
-        for token in tokens:
-            token_type = token.type
-            if args.exact:
-                token_type = token.exact_type
-            token_range = (
-                f"{t.position}{token.start[0]}"
-                f"{t.delimiter},{t.position}{token.start[1]}"
-                f"{t.delimiter}-"
-                f"{t.position}{token.end[0]}"
-                f"{t.delimiter},{t.position}{token.end[1]}"
-                f"{t.delimiter}:"
-            )
-            color = _token_colors.get(token_type, s.reset)
-            token_name = tok_name[token_type]
-            visible_range = f"{token.start[0]},{token.start[1]}-{token.end[0]},{token.end[1]}:"
-            print(
-                f"{token_range}{' ' * (20 - len(visible_range))}"
-                f"{color}{token_name:<15}"
-                f"{s.reset}{token.string!r:<15}"
-            )
+        _print_tokens(tokens, color=True, exact=args.exact)
     except IndentationError as err:
         line, column = err.args[1][1:3]
         error(err.args[0], filename, (line, column))

From 1dda979834571ab5bf7403546cac21f8c4f62978 Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stan@python.org>
Date: Sat, 25 Apr 2026 17:38:09 +0100
Subject: [PATCH 3/3] Yield instead

---
 Lib/tokenize.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index 47d79c653c938b..9839c94d45e9ae 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -532,7 +532,7 @@ def _get_token_colors(syntax, tokenize):
     })
 
 
-def _print_tokens(tokens, *, color=False, exact=False):
+def _format_tokens(tokens, *, color=False, exact=False):
     theme = _colorize.get_theme(force_no_color=not color)
     s = theme.syntax
     t = theme.tokenize
@@ -550,7 +550,7 @@ def _print_tokens(tokens, *, color=False, exact=False):
         token_color = token_colors.get(token_type, s.reset)
         token_name = tok_name[token_type]
         visible_range = f"{token.start[0]},{token.start[1]}-{token.end[0]},{token.end[1]}:"
-        print(
+        yield (
             f"{token_range}{' ' * (20 - len(visible_range))}"
             f"{token_color}{token_name:<15}"
             f"{s.reset}{token.string!r:<15}"
@@ -597,7 +597,8 @@ def error(message, filename=None, location=None):
 
 
         # Output the tokenization
-        _print_tokens(tokens, color=True, exact=args.exact)
+        for line in _format_tokens(tokens, color=True, exact=args.exact):
+            print(line)
     except IndentationError as err:
         line, column = err.args[1][1:3]
         error(err.args[0], filename, (line, column))