Skip to content

Commit 2caeffe

Browse files
authored
Fix handle AssertionError from html.parser on edge-case markup (#51)
fix: handle AssertionError from html.parser on edge-case markup Python 3.13's html.parser throws AssertionError when encountering certain edge-case markup like `<<>>` in the content. This occurs in the _RawHTMLPreprocessor which feeds raw markdown into HTMLParser to extract anchor IDs. Wrap the parser.feed() call in a try/except to catch AssertionError and RuntimeError, allowing the build to continue gracefully. Fixes #4001
1 parent f85d357 commit 2caeffe

2 files changed

Lines changed: 21 additions & 2 deletions

File tree

mkdocs/structure/pages.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -572,8 +572,15 @@ def __init__(self) -> None:
572572

573573
def run(self, lines: list[str]) -> list[str]:
574574
parser = _HTMLHandler()
575-
parser.feed("\n".join(lines))
576-
parser.close()
575+
try:
576+
parser.feed("\n".join(lines))
577+
parser.close()
578+
except (AssertionError, RuntimeError):
579+
# Python's html.parser can throw AssertionError on edge-case
580+
# markup such as "<<>>" (Python 3.13+ regression).
581+
# RuntimeError is raised when the parser encounters deeply
582+
# nested or otherwise problematic input.
583+
pass
577584
self.present_anchor_ids = parser.present_anchor_ids
578585
return lines
579586

mkdocs/tests/structure/page_tests.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,18 @@ def test_page_title_from_markdown_html_entity(self):
401401
def test_page_title_from_markdown_strip_raw_html(self):
402402
self._test_extract_title("""# Hello <b>world</b>""", expected="Hello world")
403403

404+
def test_raw_html_preprocessor_edge_case_markup(self):
405+
# Regression test for https://github.com/mkdocs/mkdocs/issues/4001
406+
# Python 3.13's html.parser throws AssertionError on "<<>>" markup.
407+
from mkdocs.structure.pages import _RawHTMLPreprocessor as RHP
408+
409+
proc = RHP()
410+
# The preprocessor should handle edge-case markup without raising.
411+
lines = ["# Title", "", "The PDF object as an `obj<</>>endobj` text block."]
412+
result = proc.run(lines)
413+
self.assertEqual(result, lines)
414+
self.assertEqual(proc.present_anchor_ids, set())
415+
404416
def test_page_title_from_markdown_strip_comments(self):
405417
self._test_extract_title(
406418
"""# foo <!-- comment with <em> --> bar""", expected="foo bar"

0 commit comments

Comments
 (0)