Skip to content
This repository was archived by the owner on Mar 26, 2026. It is now read-only.

Commit 9898807

Browse files
authored
feat: add syntax highlighting support for Markdown pages (#170)
* feat: add syntax highlight support for markdown pages * test: add unit test for syntax highlighting * test: remove unneeded files * fix: apply commit suggestion * feat: handle code blocks with langauge indicators * test: update unittest with language indicator support * test: apply review suggestions * test: update to use temporary file. * feat: update with review suggestions. * chore: code cleanup * test: update to use context manager for temporary file
1 parent 5ce356b commit 9898807

8 files changed

Lines changed: 145 additions & 0 deletions

docfx_yaml/extension.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1268,6 +1268,52 @@ def extract_header_from_markdown(mdfile_iterator):
12681268
return mdfile_name
12691269

12701270

1271+
# For a given markdown file, adds syntax highlighting to code blocks.
1272+
def highlight_md_codeblocks(mdfile):
1273+
fence = '```'
1274+
fence_with_python = '```python'
1275+
new_lines = []
1276+
1277+
with open(mdfile) as mdfile_iterator:
1278+
file_content = mdfile_iterator.read()
1279+
# If there is an odd number of code block annotations, do not syntax
1280+
# highlight.
1281+
if file_content.count(fence) % 2 != 0:
1282+
print(f'{mdfile_iterator.name} contains wrong format of code blocks. Skipping syntax highlighting.')
1283+
return
1284+
# Retrieve code block positions to replace
1285+
codeblocks = [[m.start(), m.end()] for m in re.finditer(
1286+
fence,
1287+
file_content)]
1288+
1289+
# This is equivalent to grabbing every odd index item.
1290+
codeblocks = codeblocks[::2]
1291+
# Used to store code blocks that come without language indicators.
1292+
blocks_without_indicators = []
1293+
1294+
# Check if the fence comes without a language indicator. If so, include
1295+
# this to a list to render.
1296+
for start, end in codeblocks:
1297+
if file_content[end] == '\n':
1298+
blocks_without_indicators.append([start, end])
1299+
1300+
# Stitch content that does not need to be parsed, and replace with
1301+
# `fence_with_python` for parsed portions.
1302+
prev_start = prev_end = 0
1303+
for start, end in blocks_without_indicators:
1304+
new_lines.append(file_content[prev_end:start])
1305+
new_lines.append(fence_with_python)
1306+
prev_start, prev_end = start, end
1307+
1308+
# Include rest of the content.
1309+
new_lines.append(file_content[prev_end:])
1310+
1311+
# Overwrite with newly parsed content.
1312+
with open(mdfile, 'w') as mdfile_iterator:
1313+
new_content = ''.join(new_lines)
1314+
mdfile_iterator.write(new_content)
1315+
1316+
12711317
# Given generated markdown files, incorporate them into the docfx_yaml output.
12721318
# The markdown file metadata will be added to top level of the TOC.
12731319
def find_markdown_pages(app, outdir):
@@ -1294,6 +1340,7 @@ def find_markdown_pages(app, outdir):
12941340
# For each file, if it is a markdown file move to the top level pages.
12951341
for mdfile in markdown_dir.iterdir():
12961342
if mdfile.is_file() and mdfile.name.lower() not in files_to_ignore:
1343+
highlight_md_codeblocks(markdown_dir / mdfile.name)
12971344
shutil.copy(mdfile, f"{outdir}/{mdfile.name.lower()}")
12981345

12991346
# Extract the header name for TOC.

tests/markdown_mixed_highlight.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
```python
2+
These code blocks should not be highlighted.
3+
```
4+
5+
```py
6+
As these come with a language indicator.
7+
```
8+
9+
```java
10+
Shouldn't matter which langauge indicator is used.
11+
```
12+
13+
```
14+
But this block should get highlighted.
15+
```
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
```python
2+
These code blocks should not be highlighted.
3+
```
4+
5+
```py
6+
As these come with a language indicator.
7+
```
8+
9+
```java
10+
Shouldn't matter which langauge indicator is used.
11+
```
12+
13+
```python
14+
But this block should get highlighted.
15+
```

tests/markdown_no_highlight.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
```
2+
File with missing codeblock
3+
```
4+
5+
```
6+
with no closing bracket
7+
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
```
2+
File with missing codeblock
3+
```
4+
5+
```
6+
with no closing bracket
7+

tests/markdown_syntax_highlight.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
```
2+
test markdown file for
3+
highlighing markdown codeblocks
4+
```
5+
6+
```
7+
all code blocks
8+
should be highlighted
9+
```
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
```python
2+
test markdown file for
3+
highlighing markdown codeblocks
4+
```
5+
6+
```python
7+
all code blocks
8+
should be highlighted
9+
```

tests/test_helpers.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,15 @@
44
from docfx_yaml.extension import search_cross_references
55
from docfx_yaml.extension import format_code
66
from docfx_yaml.extension import extract_product_name
7+
from docfx_yaml.extension import highlight_md_codeblocks
78

89
import unittest
910
from parameterized import parameterized
1011

1112
from yaml import load, Loader
1213

14+
import tempfile
15+
1316
class TestGenerate(unittest.TestCase):
1417
def test_indent_code_left(self):
1518
# Check that the code indents to left based on first line.
@@ -190,5 +193,38 @@ def test_extract_product_name(self):
190193

191194
self.assertEqual(short_name_want, short_product_name)
192195

196+
197+
# Filenames to test markdown syntax highlight with.
198+
test_markdown_filenames = [
199+
[
200+
"tests/markdown_syntax_highlight.md",
201+
"tests/markdown_syntax_highlight_want.md"
202+
],
203+
[
204+
"tests/markdown_no_highlight.md",
205+
"tests/markdown_no_highlight_want.md"
206+
],
207+
[
208+
"tests/markdown_mixed_highlight.md",
209+
"tests/markdown_mixed_highlight_want.md"
210+
],
211+
]
212+
@parameterized.expand(test_markdown_filenames)
213+
def test_highlight_md_codeblocks(self, base_filename, want_filename):
214+
# Test to ensure codeblocks in markdown files are correctly highlighted.
215+
216+
# Copy the base file we'll need to test.
217+
with tempfile.NamedTemporaryFile(mode='r+', delete=False) as test_file:
218+
with open(base_filename) as base_file:
219+
test_file.write(base_file.read())
220+
test_file.flush()
221+
222+
highlight_md_codeblocks(test_file.name)
223+
test_file.seek(0)
224+
225+
with open(want_filename) as mdfile_want:
226+
self.assertEqual(test_file.read(), mdfile_want.read())
227+
228+
193229
if __name__ == '__main__':
194230
unittest.main()

0 commit comments

Comments
 (0)