Skip to content

Commit 7d11c7a

Browse files
committed
Add colour to tokenize CLI output
1 parent 5ea3ae7 commit 7d11c7a

6 files changed

Lines changed: 79 additions & 8 deletions

File tree

Doc/library/tokenize.rst

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ type can be determined by checking the ``exact_type`` property on the
2828
**undefined** when providing invalid Python code and it can change at any
2929
point.
3030

31-
Tokenizing Input
31+
Tokenizing input
3232
----------------
3333

3434
The primary entry point is a :term:`generator`:
@@ -146,7 +146,7 @@ function it uses to do this is available:
146146

147147
.. _tokenize-cli:
148148

149-
Command-Line Usage
149+
Command-line usage
150150
------------------
151151

152152
.. versionadded:: 3.3
@@ -173,8 +173,12 @@ The following options are accepted:
173173
If :file:`filename.py` is specified its contents are tokenized to stdout.
174174
Otherwise, tokenization is performed on stdin.
175175

176+
.. versionadded:: next
177+
Output is in color by default and can be
178+
:ref:`controlled using environment variables <using-on-controlling-color>`.
179+
176180
Examples
177-
------------------
181+
--------
178182

179183
Example of a script rewriter that transforms float literals into Decimal
180184
objects::
@@ -227,7 +231,7 @@ Example of tokenizing from the command line. The script::
227231

228232
will be tokenized to the following output where the first column is the range
229233
of the line/column coordinates where the token is found, the second column is
230-
the name of the token, and the final column is the value of the token (if any)
234+
the name of the token, and the final column is the value of the token (if any):
231235

232236
.. code-block:: shell-session
233237

Doc/whatsnew/3.15.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1217,6 +1217,15 @@ tkinter
12171217
(Contributed by Matthias Kievernagel and Serhiy Storchaka in :gh:`47655`.)
12181218

12191219

1220+
tokenize
1221+
--------
1222+
1223+
* The output of the :mod:`tokenize` :ref:`command-line interface
1224+
<tokenize-cli>` is colored by default. This can be controlled with
1225+
:ref:`environment variables <using-on-controlling-color>`.
1226+
(Contributed by Hugo van Kemenade in :gh:`148991`.)
1227+
1228+
12201229
.. _whatsnew315-tomllib-1-1-0:
12211230

12221231
tomllib

Lib/_colorize.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,14 @@ class Timeit(ThemeSection):
375375
reset: str = ANSIColors.RESET
376376

377377

378+
@dataclass(frozen=True, kw_only=True)
379+
class Tokenize(ThemeSection):
380+
whitespace: str = ANSIColors.GREY
381+
error: str = ANSIColors.BOLD_RED
382+
position: str = ANSIColors.GREY
383+
delimiter: str = ANSIColors.RESET
384+
385+
378386
@dataclass(frozen=True, kw_only=True)
379387
class Traceback(ThemeSection):
380388
type: str = ANSIColors.BOLD_MAGENTA
@@ -411,6 +419,7 @@ class Theme:
411419
live_profiler: LiveProfiler = field(default_factory=LiveProfiler)
412420
syntax: Syntax = field(default_factory=Syntax)
413421
timeit: Timeit = field(default_factory=Timeit)
422+
tokenize: Tokenize = field(default_factory=Tokenize)
414423
traceback: Traceback = field(default_factory=Traceback)
415424
unittest: Unittest = field(default_factory=Unittest)
416425

@@ -424,6 +433,7 @@ def copy_with(
424433
live_profiler: LiveProfiler | None = None,
425434
syntax: Syntax | None = None,
426435
timeit: Timeit | None = None,
436+
tokenize: Tokenize | None = None,
427437
traceback: Traceback | None = None,
428438
unittest: Unittest | None = None,
429439
) -> Self:
@@ -440,6 +450,7 @@ def copy_with(
440450
live_profiler=live_profiler or self.live_profiler,
441451
syntax=syntax or self.syntax,
442452
timeit=timeit or self.timeit,
453+
tokenize=tokenize or self.tokenize,
443454
traceback=traceback or self.traceback,
444455
unittest=unittest or self.unittest,
445456
)
@@ -460,6 +471,7 @@ def no_colors(cls) -> Self:
460471
live_profiler=LiveProfiler.no_colors(),
461472
syntax=Syntax.no_colors(),
462473
timeit=Timeit.no_colors(),
474+
tokenize=Tokenize.no_colors(),
463475
traceback=Traceback.no_colors(),
464476
unittest=Unittest.no_colors(),
465477
)

Lib/test/test_tokenize.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3326,6 +3326,7 @@ def test_newline_at_the_end_of_buffer(self):
33263326
run_test_script(file_name)
33273327

33283328

3329+
@support.force_not_colorized_test_class
33293330
class CommandLineTest(unittest.TestCase):
33303331
def setUp(self):
33313332
self.filename = tempfile.mktemp()

Lib/tokenize.py

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
from token import *
3636
from token import EXACT_TOKEN_TYPES
3737
import _tokenize
38+
lazy import _colorize
3839

3940
cookie_re = re.compile(br'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
4041
blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
@@ -505,6 +506,32 @@ def generate_tokens(readline):
505506
"""
506507
return _generate_tokens_from_c_tokenizer(readline, extra_tokens=True)
507508

509+
510+
def _get_token_colors(syntax, tokenize):
511+
"""Map token type numbers to theme colors."""
512+
return frozendict({
513+
COMMENT: syntax.comment,
514+
DEDENT: tokenize.whitespace,
515+
ENCODING: tokenize.whitespace,
516+
ENDMARKER: tokenize.whitespace,
517+
ERRORTOKEN: tokenize.error,
518+
FSTRING_START: syntax.string,
519+
FSTRING_MIDDLE: syntax.string,
520+
FSTRING_END: syntax.string,
521+
INDENT: tokenize.whitespace,
522+
NAME: syntax.reset,
523+
NEWLINE: tokenize.whitespace,
524+
NL: tokenize.whitespace,
525+
NUMBER: syntax.number,
526+
OP: syntax.op,
527+
SOFT_KEYWORD: syntax.soft_keyword,
528+
STRING: syntax.string,
529+
TSTRING_START: syntax.string,
530+
TSTRING_MIDDLE: syntax.string,
531+
TSTRING_END: syntax.string,
532+
})
533+
534+
508535
def _main(args=None):
509536
import argparse
510537

@@ -524,7 +551,7 @@ def error(message, filename=None, location=None):
524551
sys.exit(1)
525552

526553
# Parse the arguments and options
527-
parser = argparse.ArgumentParser(color=True)
554+
parser = argparse.ArgumentParser()
528555
parser.add_argument(dest='filename', nargs='?',
529556
metavar='filename.py',
530557
help='the file to tokenize; defaults to stdin')
@@ -545,13 +572,30 @@ def error(message, filename=None, location=None):
545572

546573

547574
# Output the tokenization
575+
_theme = _colorize.get_theme()
576+
s = _theme.syntax
577+
t = _theme.tokenize
578+
_token_colors = _get_token_colors(s, t)
548579
for token in tokens:
549580
token_type = token.type
550581
if args.exact:
551582
token_type = token.exact_type
552-
token_range = "%d,%d-%d,%d:" % (token.start + token.end)
553-
print("%-20s%-15s%-15r" %
554-
(token_range, tok_name[token_type], token.string))
583+
token_range = (
584+
f"{t.position}{token.start[0]}"
585+
f"{t.delimiter},{t.position}{token.start[1]}"
586+
f"{t.delimiter}-"
587+
f"{t.position}{token.end[0]}"
588+
f"{t.delimiter},{t.position}{token.end[1]}"
589+
f"{t.delimiter}:"
590+
)
591+
color = _token_colors.get(token_type, s.reset)
592+
token_name = tok_name[token_type]
593+
visible_range = f"{token.start[0]},{token.start[1]}-{token.end[0]},{token.end[1]}:"
594+
print(
595+
f"{token_range}{' ' * (20 - len(visible_range))}"
596+
f"{color}{token_name:<15}"
597+
f"{s.reset}{token.string!r:<15}"
598+
)
555599
except IndentationError as err:
556600
line, column = err.args[1][1:3]
557601
error(err.args[0], filename, (line, column))
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add colour to :mod:`tokenize` CLI output. Patch by Hugo van Kemenade.

0 commit comments

Comments
 (0)