Skip to content
This repository was archived by the owner on Mar 26, 2026. It is now read-only.

Commit ccd53bd

Browse files
authored
fix: add markdown header if it is missing (#203)
* fix: add a Markdown header level 1 if it is missing * test: update unit test * fix: update name from prepend_markdown_title to prepend_markdown_header * test: update unit test * fix: update comments based on code review * test: update unit test with comments and parameterized tests * test: update unit test * fix: update docstring with types * fix: update type hint for Iterables
1 parent 9ffe7e0 commit ccd53bd

6 files changed

Lines changed: 143 additions & 60 deletions

File tree

docfx_yaml/extension.py

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from pathlib import Path
3030
from functools import partial
3131
from itertools import zip_longest
32-
from typing import List
32+
from typing import List, Iterable
3333
from black import InvalidInput
3434

3535
try:
@@ -1286,12 +1286,18 @@ def parse_markdown_header(header_line, prev_line):
12861286
return ""
12871287

12881288

1289-
# For a given markdown file, extract its header line.
1290-
def extract_header_from_markdown(mdfile_iterator):
1291-
mdfile_name = mdfile_iterator.name.split("/")[-1].split(".")[0].capitalize()
1289+
def extract_header_from_markdown(mdfile: Iterable[str]) -> str:
1290+
"""For a given markdown file, extract its header line.
1291+
1292+
Args:
1293+
mdfile: iterator to the markdown file.
1294+
1295+
Returns:
1296+
A string for header or empty string if header is not found.
1297+
"""
12921298
prev_line = ""
12931299

1294-
for header_line in mdfile_iterator:
1300+
for header_line in mdfile:
12951301

12961302
# Ignore licenses and other non-headers prior to the header.
12971303
header = parse_markdown_header(header_line, prev_line)
@@ -1301,8 +1307,7 @@ def extract_header_from_markdown(mdfile_iterator):
13011307

13021308
prev_line = header_line
13031309

1304-
print(f"Could not find a title for {mdfile_iterator.name}. Using {mdfile_name} as the title instead.")
1305-
return mdfile_name
1310+
return ""
13061311

13071312

13081313
# For a given markdown file, adds syntax highlighting to code blocks.
@@ -1351,6 +1356,20 @@ def highlight_md_codeblocks(mdfile):
13511356
mdfile_iterator.write(new_content)
13521357

13531358

1359+
def prepend_markdown_header(filename: str, mdfile: Iterable[str]):
1360+
"""Prepends the filename as a Markdown header.
1361+
1362+
Args:
1363+
filename: the name of the markdown file to prepend.
1364+
mdfile: iterator to the markdown file that is both readable
1365+
and writable.
1366+
"""
1367+
file_content = f'# {filename}\n\n' + mdfile.read()
1368+
# Reset file position to the beginning to write
1369+
mdfile.seek(0)
1370+
mdfile.write(file_content)
1371+
1372+
13541373
# Given generated markdown files, incorporate them into the docfx_yaml output.
13551374
# The markdown file metadata will be added to top level of the TOC.
13561375
def find_markdown_pages(app, outdir):
@@ -1374,13 +1393,24 @@ def find_markdown_pages(app, outdir):
13741393
# For each file, if it is a markdown file move to the top level pages.
13751394
for mdfile in markdown_dir.iterdir():
13761395
if mdfile.is_file() and mdfile.name.lower() not in files_to_ignore:
1396+
mdfile_name = ""
13771397
highlight_md_codeblocks(markdown_dir / mdfile.name)
1378-
shutil.copy(mdfile, f"{outdir}/{mdfile.name.lower()}")
13791398

13801399
# Extract the header name for TOC.
13811400
with open(mdfile) as mdfile_iterator:
13821401
name = extract_header_from_markdown(mdfile_iterator)
13831402

1403+
if not name:
1404+
with open(mdfile, 'r+') as mdfile_iterator:
1405+
mdfile_name = mdfile_iterator.name.split("/")[-1].split(".")[0].capitalize()
1406+
1407+
print(f"Could not find a title for {mdfile_iterator.name}. Using {mdfile_name} as the title instead.")
1408+
name = mdfile_name
1409+
1410+
prepend_markdown_header(name, mdfile_iterator)
1411+
1412+
shutil.copy(mdfile, f"{outdir}/{mdfile.name.lower()}")
1413+
13841414
# Add the file to the TOC later.
13851415
app.env.markdown_pages.append({
13861416
'name': name,
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Markdown_example_alternate_bad
2+
3+
==============
4+
5+
There should be a header line before the divider.
6+
7+
##Content header
8+
This is a simple line followed by an h2 header.
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# Markdown_example_bad_header
2+
3+
#Test header for a bad formatted markdown file.
4+
5+
##Content header
6+
This is a simple line followed by an h2 header.

tests/markdown_example_h2_want.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# Markdown_example_h2
2+
3+
## Test header for a simple markdown file.
4+
5+
##Content header
6+
This is a simple line followed by an h2 header.

tests/test_helpers.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from docfx_yaml.extension import format_code
66
from docfx_yaml.extension import extract_product_name
77
from docfx_yaml.extension import highlight_md_codeblocks
8+
from docfx_yaml.extension import prepend_markdown_header
89

910
import unittest
1011
from parameterized import parameterized
@@ -260,5 +261,40 @@ def test_highlight_md_codeblocks(self, base_filename, want_filename):
260261
self.assertEqual(test_file.read(), mdfile_want.read())
261262

262263

264+
# Filenames to test prepending Markdown title..
265+
test_markdown_filenames = [
266+
[
267+
"tests/markdown_example_bad_header.md",
268+
"tests/markdown_example_bad_header_want.md"
269+
],
270+
[
271+
"tests/markdown_example_h2.md",
272+
"tests/markdown_example_h2_want.md"
273+
],
274+
[
275+
"tests/markdown_example_alternate_bad.md",
276+
"tests/markdown_example_alternate_bad_want.md"
277+
],
278+
]
279+
@parameterized.expand(test_markdown_filenames)
280+
def test_prepend_markdown_header(self, base_filename, want_filename):
281+
# Ensure markdown titles are correctly prepended.
282+
283+
# Copy the base file we'll need to test.
284+
with tempfile.NamedTemporaryFile(mode='r+', delete=False) as test_file:
285+
with open(base_filename) as base_file:
286+
# Use same file name extraction as original code.
287+
file_name = base_file.name.split("/")[-1].split(".")[0].capitalize()
288+
test_file.write(base_file.read())
289+
test_file.flush()
290+
test_file.seek(0)
291+
292+
prepend_markdown_header(file_name, test_file)
293+
test_file.seek(0)
294+
295+
with open(want_filename) as mdfile_want:
296+
self.assertEqual(test_file.read(), mdfile_want.read())
297+
298+
263299
if __name__ == '__main__':
264300
unittest.main()

tests/test_unit.py

Lines changed: 49 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from docfx_yaml.extension import parse_markdown_header
1414

1515
import unittest
16+
from parameterized import parameterized
1617

1718
from yaml import load, Loader
1819

@@ -667,74 +668,70 @@ def test_parse_markdown_header_alternate(self):
667668
self.assertEqual(header_line_got, header_line_want)
668669

669670

670-
def test_extract_header_from_markdown(self):
671-
# Check the header for a normal markdown file.
671+
test_markdown_filenames = [
672+
[
673+
# Check the header for a normal markdown file.
674+
"tests/markdown_example.md"
675+
],
676+
[
677+
# The header should be the same even with the license header.
678+
"tests/markdown_example_header.md"
679+
],
680+
]
681+
@parameterized.expand(test_markdown_filenames)
682+
def test_extract_header_from_markdown(self, markdown_filename):
683+
# Check the header for markdown files.
672684
header_line_want = "Test header for a simple markdown file."
673685

674-
with open('tests/markdown_example.md', 'r') as mdfile:
686+
with open(markdown_filename, 'r') as mdfile:
675687
header_line_got = extract_header_from_markdown(mdfile)
676688

677689
self.assertEqual(header_line_got, header_line_want)
678690

679-
# The header should be the same even with the license header.
680-
header_line_with_license_want = header_line_want
681-
682-
with open('tests/markdown_example_header.md', 'r') as mdfile_license:
683-
header_line_with_license_got = extract_header_from_markdown(mdfile_license)
684-
685-
self.assertEqual(header_line_with_license_got, header_line_with_license_want)
686691

687-
688-
def test_extract_header_from_markdown_alternate_header(self):
689-
# Check the header for an alternate header style.
690-
header_line_want = "This is a simple alternate header"
691-
692-
with open('tests/markdown_example_alternate.md', 'r') as mdfile:
693-
header_line_got = extract_header_from_markdown(mdfile)
694-
695-
self.assertEqual(header_line_got, header_line_want)
696-
697-
# The header should be the same even with the license header.
698-
header_line_with_license_want = header_line_want
699-
700-
with open('tests/markdown_example_alternate_header.md', 'r') as mdfile:
701-
header_line_with_license_got = extract_header_from_markdown(mdfile)
702-
703-
self.assertEqual(header_line_with_license_got, header_line_with_license_want)
704-
705-
# Check the header for an alternate header style.
692+
test_markdown_filenames = [
693+
[
694+
# Check the header for an alternate header style.
695+
"tests/markdown_example_alternate.md"
696+
],
697+
[
698+
# The header should be the same even with the license header.
699+
"tests/markdown_example_alternate_header.md"
700+
],
701+
[
702+
# Check the header for an alternate header style.
703+
"tests/markdown_example_alternate_less.md"
704+
],
705+
]
706+
@parameterized.expand(test_markdown_filenames)
707+
def test_extract_header_from_markdown_alternate_header(self, markdown_filename):
708+
# Check the header for different accepted styles.
706709
header_line_want = "This is a simple alternate header"
707710

708-
with open('tests/markdown_example_alternate_less.md', 'r') as mdfile:
711+
with open(markdown_filename, 'r') as mdfile:
709712
header_line_got = extract_header_from_markdown(mdfile)
710713

711714
self.assertEqual(header_line_got, header_line_want)
712715

713716

714-
def test_extract_header_from_markdown_bad_headers(self):
715-
# Check that the filename is used as header if no valid header is found.
716-
header_line_want = "Markdown_example_bad_header"
717-
718-
with open('tests/markdown_example_bad_header.md', 'r') as mdfile:
719-
header_line_got = extract_header_from_markdown(mdfile)
720-
721-
self.assertEqual(header_line_want, header_line_got)
722-
723-
# Check that only h1 headers are parsed.
724-
header_line_want = "Markdown_example_h2"
725-
726-
with open('tests/markdown_example_h2.md', 'r') as mdfile:
727-
header_line_got = extract_header_from_markdown(mdfile)
728-
729-
self.assertEqual(header_line_want, header_line_got)
730-
731-
# Check that there must be a line before the h1 header breaker.
732-
header_line_want = "Markdown_example_alternate_bad"
733-
734-
with open('tests/markdown_example_alternate_bad.md', 'r') as mdfile:
717+
test_markdown_filenames = [
718+
[
719+
"tests/markdown_example_bad_header.md"
720+
],
721+
[
722+
"tests/markdown_example_h2.md"
723+
],
724+
[
725+
"tests/markdown_example_alternate_bad.md"
726+
],
727+
]
728+
@parameterized.expand(test_markdown_filenames)
729+
def test_extract_header_from_markdown_bad_headers(self, markdown_filename):
730+
# Check that empty string is returned if no valid header is found.
731+
with open(markdown_filename, 'r') as mdfile:
735732
header_line_got = extract_header_from_markdown(mdfile)
736733

737-
self.assertEqual(header_line_want, header_line_got)
734+
self.assertFalse(header_line_got)
738735

739736

740737
def test_parse_docstring_summary(self):

0 commit comments

Comments
 (0)