feat: add syntax highlighting support for Markdown pages (#170)

dandhlee · web-flow · commit 98988072c3a3 · 2022-01-28T11:55:27.000-05:00
* feat: add syntax highlight support for markdown pages

* test: add unit test for syntax highlighting

* test: remove unneeded files

* fix: apply commit suggestion

* feat: handle code blocks with langauge indicators

* test: update unittest with language indicator support

* test: apply review suggestions

* test: update to use temporary file.

* feat: update with review suggestions.

* chore: code cleanup

* test: update to use context manager for temporary file
diff --git a/docfx_yaml/extension.py b/docfx_yaml/extension.py
@@ -1268,6 +1268,52 @@ def extract_header_from_markdown(mdfile_iterator):
     return mdfile_name
 
 
+# For a given markdown file, adds syntax highlighting to code blocks.
+def highlight_md_codeblocks(mdfile):
+    fence = '```'
+    fence_with_python = '```python'
+    new_lines = []
+
+    with open(mdfile) as mdfile_iterator:
+        file_content = mdfile_iterator.read()
+        # If there is an odd number of code block annotations, do not syntax
+        # highlight.
+        if file_content.count(fence) % 2 != 0:
+            print(f'{mdfile_iterator.name} contains wrong format of code blocks. Skipping syntax highlighting.')
+            return
+        # Retrieve code block positions to replace
+        codeblocks = [[m.start(), m.end()] for m in re.finditer(
+                                                      fence,
+                                                      file_content)]
+
+        # This is equivalent to grabbing every odd index item.
+        codeblocks = codeblocks[::2]
+        # Used to store code blocks that come without language indicators.
+        blocks_without_indicators = []
+
+        # Check if the fence comes without a language indicator. If so, include
+        # this to a list to render.
+        for start, end in codeblocks:
+            if file_content[end] == '\n':
+                blocks_without_indicators.append([start, end])
+
+        # Stitch content that does not need to be parsed, and replace with
+        # `fence_with_python` for parsed portions.
+        prev_start = prev_end = 0
+        for start, end in blocks_without_indicators:
+            new_lines.append(file_content[prev_end:start])
+            new_lines.append(fence_with_python)
+            prev_start, prev_end = start, end
+
+        # Include rest of the content.
+        new_lines.append(file_content[prev_end:])
+
+    # Overwrite with newly parsed content.
+    with open(mdfile, 'w') as mdfile_iterator:
+        new_content = ''.join(new_lines)
+        mdfile_iterator.write(new_content)
+
+
 # Given generated markdown files, incorporate them into the docfx_yaml output.
 # The markdown file metadata will be added to top level of the TOC.
 def find_markdown_pages(app, outdir):
@@ -1294,6 +1340,7 @@ def find_markdown_pages(app, outdir):
     # For each file, if it is a markdown file move to the top level pages.
     for mdfile in markdown_dir.iterdir():
         if mdfile.is_file() and mdfile.name.lower() not in files_to_ignore:
+            highlight_md_codeblocks(markdown_dir / mdfile.name)
             shutil.copy(mdfile, f"{outdir}/{mdfile.name.lower()}")
 
             # Extract the header name for TOC.
diff --git a/tests/markdown_mixed_highlight.md b/tests/markdown_mixed_highlight.md
@@ -0,0 +1,15 @@
+```python
+These code blocks should not be highlighted.
+```
+
+```py
+As these come with a language indicator.
+```
+
+```java
+Shouldn't matter which langauge indicator is used.
+```
+
+```
+But this block should get highlighted.
+```
diff --git a/tests/markdown_mixed_highlight_want.md b/tests/markdown_mixed_highlight_want.md
@@ -0,0 +1,15 @@
+```python
+These code blocks should not be highlighted.
+```
+
+```py
+As these come with a language indicator.
+```
+
+```java
+Shouldn't matter which langauge indicator is used.
+```
+
+```python
+But this block should get highlighted.
+```
diff --git a/tests/markdown_no_highlight.md b/tests/markdown_no_highlight.md
@@ -0,0 +1,7 @@
+```
+File with missing codeblock
+```
+
+```
+with no closing bracket
+
diff --git a/tests/markdown_no_highlight_want.md b/tests/markdown_no_highlight_want.md
@@ -0,0 +1,7 @@
+```
+File with missing codeblock
+```
+
+```
+with no closing bracket
+
diff --git a/tests/markdown_syntax_highlight.md b/tests/markdown_syntax_highlight.md
@@ -0,0 +1,9 @@
+```
+test markdown file for
+highlighing markdown codeblocks
+```
+
+```
+all code blocks
+should be highlighted
+```
diff --git a/tests/markdown_syntax_highlight_want.md b/tests/markdown_syntax_highlight_want.md
@@ -0,0 +1,9 @@
+```python
+test markdown file for
+highlighing markdown codeblocks
+```
+
+```python
+all code blocks
+should be highlighted
+```
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
@@ -4,12 +4,15 @@
 from docfx_yaml.extension import search_cross_references
 from docfx_yaml.extension import format_code
 from docfx_yaml.extension import extract_product_name
+from docfx_yaml.extension import highlight_md_codeblocks
 
 import unittest
 from parameterized import parameterized
 
 from yaml import load, Loader
 
+import tempfile
+
 class TestGenerate(unittest.TestCase):
     def test_indent_code_left(self):
         # Check that the code indents to left based on first line.
@@ -190,5 +193,38 @@ def test_extract_product_name(self):
 
         self.assertEqual(short_name_want, short_product_name)
 
+
+    # Filenames to test markdown syntax highlight with.
+    test_markdown_filenames = [
+        [
+            "tests/markdown_syntax_highlight.md",
+            "tests/markdown_syntax_highlight_want.md"
+        ],
+        [
+            "tests/markdown_no_highlight.md",
+            "tests/markdown_no_highlight_want.md"
+        ],
+        [
+            "tests/markdown_mixed_highlight.md",
+            "tests/markdown_mixed_highlight_want.md"
+        ],
+    ]
+    @parameterized.expand(test_markdown_filenames)
+    def test_highlight_md_codeblocks(self, base_filename, want_filename):
+        # Test to ensure codeblocks in markdown files are correctly highlighted.
+
+        # Copy the base file we'll need to test.
+        with tempfile.NamedTemporaryFile(mode='r+', delete=False) as test_file:
+            with open(base_filename) as base_file:
+                test_file.write(base_file.read())
+                test_file.flush()
+
+            highlight_md_codeblocks(test_file.name)
+            test_file.seek(0)
+
+            with open(want_filename) as mdfile_want:
+                self.assertEqual(test_file.read(), mdfile_want.read())
+
+
 if __name__ == '__main__':
     unittest.main()

-Original file line number
+Diff line change
@@ @@ -0,0 +1,7 @@ @@
 +```
 +File with missing codeblock
 +```
++
 +```
 +with no closing bracket
++