fix: add markdown header if it is missing (#203)

dandhlee · web-flow · commit ccd53bdba8cd · 2022-04-12T14:35:11.000-04:00
* fix: add a Markdown header level 1 if it is missing

* test: update unit test

* fix: update name from prepend_markdown_title to prepend_markdown_header

* test: update unit test

* fix: update comments based on code review

* test: update unit test with comments and parameterized tests

* test: update unit test

* fix: update docstring with types

* fix: update type hint for Iterables
diff --git a/docfx_yaml/extension.py b/docfx_yaml/extension.py
@@ -29,7 +29,7 @@
 from pathlib import Path
 from functools import partial
 from itertools import zip_longest
-from typing import List
+from typing import List, Iterable
 from black import InvalidInput
 
 try:
@@ -1286,12 +1286,18 @@ def parse_markdown_header(header_line, prev_line):
     return ""
 
 
-# For a given markdown file, extract its header line.
-def extract_header_from_markdown(mdfile_iterator):
-    mdfile_name = mdfile_iterator.name.split("/")[-1].split(".")[0].capitalize()
+def extract_header_from_markdown(mdfile: Iterable[str]) -> str:
+    """For a given markdown file, extract its header line.
+
+    Args:
+        mdfile: iterator to the markdown file.
+
+    Returns:
+        A string for header or empty string if header is not found.
+    """
     prev_line = ""
 
-    for header_line in mdfile_iterator:
+    for header_line in mdfile:
 
         # Ignore licenses and other non-headers prior to the header.
         header = parse_markdown_header(header_line, prev_line)
@@ -1301,8 +1307,7 @@ def extract_header_from_markdown(mdfile_iterator):
 
         prev_line = header_line
 
-    print(f"Could not find a title for {mdfile_iterator.name}. Using {mdfile_name} as the title instead.")
-    return mdfile_name
+    return ""
 
 
 # For a given markdown file, adds syntax highlighting to code blocks.
@@ -1351,6 +1356,20 @@ def highlight_md_codeblocks(mdfile):
         mdfile_iterator.write(new_content)
 
 
+def prepend_markdown_header(filename: str, mdfile: Iterable[str]):
+    """Prepends the filename as a Markdown header.
+
+    Args:
+        filename: the name of the markdown file to prepend.
+        mdfile: iterator to the markdown file that is both readable
+          and writable.
+    """
+    file_content = f'# {filename}\n\n' + mdfile.read()
+    # Reset file position to the beginning to write
+    mdfile.seek(0)
+    mdfile.write(file_content)
+
+
 # Given generated markdown files, incorporate them into the docfx_yaml output.
 # The markdown file metadata will be added to top level of the TOC.
 def find_markdown_pages(app, outdir):
@@ -1374,13 +1393,24 @@ def find_markdown_pages(app, outdir):
     # For each file, if it is a markdown file move to the top level pages.
     for mdfile in markdown_dir.iterdir():
         if mdfile.is_file() and mdfile.name.lower() not in files_to_ignore:
+            mdfile_name = ""
             highlight_md_codeblocks(markdown_dir / mdfile.name)
-            shutil.copy(mdfile, f"{outdir}/{mdfile.name.lower()}")
 
             # Extract the header name for TOC.
             with open(mdfile) as mdfile_iterator:
                 name = extract_header_from_markdown(mdfile_iterator)
 
+            if not name:
+                with open(mdfile, 'r+') as mdfile_iterator:
+                    mdfile_name = mdfile_iterator.name.split("/")[-1].split(".")[0].capitalize()
+
+                    print(f"Could not find a title for {mdfile_iterator.name}. Using {mdfile_name} as the title instead.")
+                    name = mdfile_name
+
+                    prepend_markdown_header(name, mdfile_iterator)
+
+            shutil.copy(mdfile, f"{outdir}/{mdfile.name.lower()}")
+
             # Add the file to the TOC later.
             app.env.markdown_pages.append({
                 'name': name,
diff --git a/tests/markdown_example_alternate_bad_want.md b/tests/markdown_example_alternate_bad_want.md
@@ -0,0 +1,8 @@
+# Markdown_example_alternate_bad
+
+==============
+
+There should be a header line before the divider.
+
+##Content header
+This is a simple line followed by an h2 header.
diff --git a/tests/markdown_example_bad_header_want.md b/tests/markdown_example_bad_header_want.md
@@ -0,0 +1,6 @@
+# Markdown_example_bad_header
+
+ #Test header for a bad formatted markdown file.  
+
+##Content header
+This is a simple line followed by an h2 header.
diff --git a/tests/markdown_example_h2_want.md b/tests/markdown_example_h2_want.md
@@ -0,0 +1,6 @@
+# Markdown_example_h2
+
+## Test header for a simple markdown file.  
+
+##Content header
+This is a simple line followed by an h2 header.
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
@@ -5,6 +5,7 @@
 from docfx_yaml.extension import format_code
 from docfx_yaml.extension import extract_product_name
 from docfx_yaml.extension import highlight_md_codeblocks
+from docfx_yaml.extension import prepend_markdown_header
 
 import unittest
 from parameterized import parameterized
@@ -260,5 +261,40 @@ def test_highlight_md_codeblocks(self, base_filename, want_filename):
                 self.assertEqual(test_file.read(), mdfile_want.read())
 
 
+    # Filenames to test prepending Markdown title..
+    test_markdown_filenames = [
+        [
+            "tests/markdown_example_bad_header.md",
+            "tests/markdown_example_bad_header_want.md"
+        ],
+        [
+            "tests/markdown_example_h2.md",
+            "tests/markdown_example_h2_want.md"
+        ],
+        [
+            "tests/markdown_example_alternate_bad.md",
+            "tests/markdown_example_alternate_bad_want.md"
+        ],
+    ]
+    @parameterized.expand(test_markdown_filenames)
+    def test_prepend_markdown_header(self, base_filename, want_filename):
+        # Ensure markdown titles are correctly prepended.
+
+        # Copy the base file we'll need to test.
+        with tempfile.NamedTemporaryFile(mode='r+', delete=False) as test_file:
+            with open(base_filename) as base_file:
+                # Use same file name extraction as original code.
+                file_name = base_file.name.split("/")[-1].split(".")[0].capitalize()
+                test_file.write(base_file.read())
+                test_file.flush()
+                test_file.seek(0)
+
+            prepend_markdown_header(file_name, test_file)
+            test_file.seek(0)
+
+            with open(want_filename) as mdfile_want:
+                self.assertEqual(test_file.read(), mdfile_want.read())
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/test_unit.py b/tests/test_unit.py
@@ -13,6 +13,7 @@
 from docfx_yaml.extension import parse_markdown_header
 
 import unittest
+from parameterized import parameterized
 
 from yaml import load, Loader
 
@@ -667,74 +668,70 @@ def test_parse_markdown_header_alternate(self):
         self.assertEqual(header_line_got, header_line_want)
 
 
-    def test_extract_header_from_markdown(self):
-        # Check the header for a normal markdown file.
+    test_markdown_filenames = [
+        [
+            # Check the header for a normal markdown file.
+            "tests/markdown_example.md"
+        ],
+        [
+            # The header should be the same even with the license header.
+            "tests/markdown_example_header.md"
+        ],
+    ]
+    @parameterized.expand(test_markdown_filenames)
+    def test_extract_header_from_markdown(self, markdown_filename):
+        # Check the header for markdown files.
         header_line_want = "Test header for a simple markdown file."
 
-        with open('tests/markdown_example.md', 'r') as mdfile:
+        with open(markdown_filename, 'r') as mdfile:
             header_line_got = extract_header_from_markdown(mdfile)
 
         self.assertEqual(header_line_got, header_line_want)
 
-        # The header should be the same even with the license header.
-        header_line_with_license_want = header_line_want
-
-        with open('tests/markdown_example_header.md', 'r') as mdfile_license:
-            header_line_with_license_got = extract_header_from_markdown(mdfile_license)
-
-        self.assertEqual(header_line_with_license_got, header_line_with_license_want)
 
-
-    def test_extract_header_from_markdown_alternate_header(self):
-        # Check the header for an alternate header style.
-        header_line_want = "This is a simple alternate header"
-
-        with open('tests/markdown_example_alternate.md', 'r') as mdfile:
-            header_line_got = extract_header_from_markdown(mdfile)
-
-        self.assertEqual(header_line_got, header_line_want)
-
-        # The header should be the same even with the license header.
-        header_line_with_license_want = header_line_want
-
-        with open('tests/markdown_example_alternate_header.md', 'r') as mdfile:
-            header_line_with_license_got = extract_header_from_markdown(mdfile)
-
-        self.assertEqual(header_line_with_license_got, header_line_with_license_want)
-
-        # Check the header for an alternate header style.
+    test_markdown_filenames = [
+        [
+            # Check the header for an alternate header style.
+            "tests/markdown_example_alternate.md"
+        ],
+        [
+            # The header should be the same even with the license header.
+            "tests/markdown_example_alternate_header.md"
+        ],
+        [
+            # Check the header for an alternate header style.
+            "tests/markdown_example_alternate_less.md"
+        ],
+    ]
+    @parameterized.expand(test_markdown_filenames)
+    def test_extract_header_from_markdown_alternate_header(self, markdown_filename):
+        # Check the header for different accepted styles.
         header_line_want = "This is a simple alternate header"
 
-        with open('tests/markdown_example_alternate_less.md', 'r') as mdfile:
+        with open(markdown_filename, 'r') as mdfile:
             header_line_got = extract_header_from_markdown(mdfile)
 
         self.assertEqual(header_line_got, header_line_want)
 
 
-    def test_extract_header_from_markdown_bad_headers(self):
-        # Check that the filename is used as header if no valid header is found.
-        header_line_want = "Markdown_example_bad_header"
-
-        with open('tests/markdown_example_bad_header.md', 'r') as mdfile:
-            header_line_got = extract_header_from_markdown(mdfile)
-
-        self.assertEqual(header_line_want, header_line_got)
-
-        # Check that only h1 headers are parsed.
-        header_line_want = "Markdown_example_h2"
-
-        with open('tests/markdown_example_h2.md', 'r') as mdfile:
-            header_line_got = extract_header_from_markdown(mdfile)
-
-        self.assertEqual(header_line_want, header_line_got)
-
-        # Check that there must be a line before the h1 header breaker.
-        header_line_want = "Markdown_example_alternate_bad"
-
-        with open('tests/markdown_example_alternate_bad.md', 'r') as mdfile:
+    test_markdown_filenames = [
+        [
+            "tests/markdown_example_bad_header.md"
+        ],
+        [
+            "tests/markdown_example_h2.md"
+        ],
+        [
+            "tests/markdown_example_alternate_bad.md"
+        ],
+    ]
+    @parameterized.expand(test_markdown_filenames)
+    def test_extract_header_from_markdown_bad_headers(self, markdown_filename):
+        # Check that empty string is returned if no valid header is found.
+        with open(markdown_filename, 'r') as mdfile:
             header_line_got = extract_header_from_markdown(mdfile)
 
-        self.assertEqual(header_line_want, header_line_got)
+        self.assertFalse(header_line_got)
 
 
     def test_parse_docstring_summary(self):