diff --git a/Doc/library/tokenize.rst b/Doc/library/tokenize.rst index 3db4cf42c17f3d..72fbcaba160660 100644 --- a/Doc/library/tokenize.rst +++ b/Doc/library/tokenize.rst @@ -28,7 +28,7 @@ type can be determined by checking the ``exact_type`` property on the **undefined** when providing invalid Python code and it can change at any point. -Tokenizing Input +Tokenizing input ---------------- The primary entry point is a :term:`generator`: @@ -146,7 +146,7 @@ function it uses to do this is available: .. _tokenize-cli: -Command-Line Usage +Command-line usage ------------------ .. versionadded:: 3.3 @@ -173,8 +173,12 @@ The following options are accepted: If :file:`filename.py` is specified its contents are tokenized to stdout. Otherwise, tokenization is performed on stdin. +.. versionadded:: next + Output is in color by default and can be + :ref:`controlled using environment variables `. + Examples ------------------- +-------- Example of a script rewriter that transforms float literals into Decimal objects:: @@ -227,7 +231,7 @@ Example of tokenizing from the command line. The script:: will be tokenized to the following output where the first column is the range of the line/column coordinates where the token is found, the second column is -the name of the token, and the final column is the value of the token (if any) +the name of the token, and the final column is the value of the token (if any): .. code-block:: shell-session diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 9ccd63bd8795f9..1c0f7f117898c7 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -1217,6 +1217,15 @@ tkinter (Contributed by Matthias Kievernagel and Serhiy Storchaka in :gh:`47655`.) +tokenize +-------- + +* The output of the :mod:`tokenize` :ref:`command-line interface + ` is colored by default. This can be controlled with + :ref:`environment variables `. + (Contributed by Hugo van Kemenade in :gh:`148991`.) + + .. _whatsnew315-tomllib-1-1-0: tomllib diff --git a/Lib/_colorize.py b/Lib/_colorize.py index 852ad38f08618e..435af0bb56ae9f 100644 --- a/Lib/_colorize.py +++ b/Lib/_colorize.py @@ -375,6 +375,14 @@ class Timeit(ThemeSection): reset: str = ANSIColors.RESET +@dataclass(frozen=True, kw_only=True) +class Tokenize(ThemeSection): + whitespace: str = ANSIColors.GREY + error: str = ANSIColors.BOLD_RED + position: str = ANSIColors.GREY + delimiter: str = ANSIColors.RESET + + @dataclass(frozen=True, kw_only=True) class Traceback(ThemeSection): type: str = ANSIColors.BOLD_MAGENTA @@ -411,6 +419,7 @@ class Theme: live_profiler: LiveProfiler = field(default_factory=LiveProfiler) syntax: Syntax = field(default_factory=Syntax) timeit: Timeit = field(default_factory=Timeit) + tokenize: Tokenize = field(default_factory=Tokenize) traceback: Traceback = field(default_factory=Traceback) unittest: Unittest = field(default_factory=Unittest) @@ -424,6 +433,7 @@ def copy_with( live_profiler: LiveProfiler | None = None, syntax: Syntax | None = None, timeit: Timeit | None = None, + tokenize: Tokenize | None = None, traceback: Traceback | None = None, unittest: Unittest | None = None, ) -> Self: @@ -440,6 +450,7 @@ def copy_with( live_profiler=live_profiler or self.live_profiler, syntax=syntax or self.syntax, timeit=timeit or self.timeit, + tokenize=tokenize or self.tokenize, traceback=traceback or self.traceback, unittest=unittest or self.unittest, ) @@ -460,6 +471,7 @@ def no_colors(cls) -> Self: live_profiler=LiveProfiler.no_colors(), syntax=Syntax.no_colors(), timeit=Timeit.no_colors(), + tokenize=Tokenize.no_colors(), traceback=Traceback.no_colors(), unittest=Unittest.no_colors(), ) diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index ca67e381958757..ab53a20cff5539 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -3326,6 +3326,7 @@ def test_newline_at_the_end_of_buffer(self): run_test_script(file_name) +@support.force_not_colorized_test_class class CommandLineTest(unittest.TestCase): def setUp(self): self.filename = tempfile.mktemp() diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 11c134482db024..9839c94d45e9ae 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -35,6 +35,7 @@ from token import * from token import EXACT_TOKEN_TYPES import _tokenize +lazy import _colorize cookie_re = re.compile(br'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII) blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII) @@ -505,6 +506,57 @@ def generate_tokens(readline): """ return _generate_tokens_from_c_tokenizer(readline, extra_tokens=True) + +def _get_token_colors(syntax, tokenize): + """Map token type numbers to theme colors.""" + return frozendict({ + COMMENT: syntax.comment, + DEDENT: tokenize.whitespace, + ENCODING: tokenize.whitespace, + ENDMARKER: tokenize.whitespace, + ERRORTOKEN: tokenize.error, + FSTRING_START: syntax.string, + FSTRING_MIDDLE: syntax.string, + FSTRING_END: syntax.string, + INDENT: tokenize.whitespace, + NAME: syntax.reset, + NEWLINE: tokenize.whitespace, + NL: tokenize.whitespace, + NUMBER: syntax.number, + OP: syntax.op, + SOFT_KEYWORD: syntax.soft_keyword, + STRING: syntax.string, + TSTRING_START: syntax.string, + TSTRING_MIDDLE: syntax.string, + TSTRING_END: syntax.string, + }) + + +def _format_tokens(tokens, *, color=False, exact=False): + theme = _colorize.get_theme(force_no_color=not color) + s = theme.syntax + t = theme.tokenize + token_colors = _get_token_colors(s, t) + for token in tokens: + token_type = token.exact_type if exact else token.type + token_range = ( + f"{t.position}{token.start[0]}" + f"{t.delimiter},{t.position}{token.start[1]}" + f"{t.delimiter}-" + f"{t.position}{token.end[0]}" + f"{t.delimiter},{t.position}{token.end[1]}" + f"{t.delimiter}:" + ) + token_color = token_colors.get(token_type, s.reset) + token_name = tok_name[token_type] + visible_range = f"{token.start[0]},{token.start[1]}-{token.end[0]},{token.end[1]}:" + yield ( + f"{token_range}{' ' * (20 - len(visible_range))}" + f"{token_color}{token_name:<15}" + f"{s.reset}{token.string!r:<15}" + ) + + def _main(args=None): import argparse @@ -524,7 +576,7 @@ def error(message, filename=None, location=None): sys.exit(1) # Parse the arguments and options - parser = argparse.ArgumentParser(color=True) + parser = argparse.ArgumentParser() parser.add_argument(dest='filename', nargs='?', metavar='filename.py', help='the file to tokenize; defaults to stdin') @@ -545,13 +597,8 @@ def error(message, filename=None, location=None): # Output the tokenization - for token in tokens: - token_type = token.type - if args.exact: - token_type = token.exact_type - token_range = "%d,%d-%d,%d:" % (token.start + token.end) - print("%-20s%-15s%-15r" % - (token_range, tok_name[token_type], token.string)) + for line in _format_tokens(tokens, color=True, exact=args.exact): + print(line) except IndentationError as err: line, column = err.args[1][1:3] error(err.args[0], filename, (line, column)) diff --git a/Misc/NEWS.d/next/Library/2026-04-25-18-09-16.gh-issue-148991.AZ64Et.rst b/Misc/NEWS.d/next/Library/2026-04-25-18-09-16.gh-issue-148991.AZ64Et.rst new file mode 100644 index 00000000000000..336ed42e51f1b8 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-04-25-18-09-16.gh-issue-148991.AZ64Et.rst @@ -0,0 +1 @@ +Add colour to :mod:`tokenize` CLI output. Patch by Hugo van Kemenade.