Skip to content

Commit e5a2c9c

Browse files
authored
Strip HTML tags from search entry titles (#53)
Sanitize page and section titles in the search index by stripping any HTML tags using the existing _strip_tags utility. This prevents raw HTML from appearing in search results, which is both a UI issue and a potential XSS vector when page titles contain inline HTML from Markdown rendering. Fixes mkdocs/mkdocs#3560 mkdocs/mkdocs#3213
1 parent 50c080c commit e5a2c9c

2 files changed

Lines changed: 36 additions & 0 deletions

File tree

mkdocs/contrib/search/search_index.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
from html.parser import HTMLParser
99
from typing import TYPE_CHECKING
1010

11+
from mkdocs.utils.rendering import _strip_tags
12+
1113
if TYPE_CHECKING:
1214
from mkdocs.structure.pages import Page
1315
from mkdocs.structure.toc import AnchorLink, TableOfContents
@@ -50,6 +52,11 @@ def _add_entry(self, title: str | None, text: str, loc: str) -> None:
5052
text = text.replace("\u00a0", " ")
5153
text = re.sub(r"[ \t\n\r\f\v]+", " ", text.strip())
5254

55+
# Strip HTML tags from the title to prevent raw HTML from appearing
56+
# in search results (which could also be an XSS vector).
57+
if title is not None:
58+
title = _strip_tags(title)
59+
5360
self._entries.append({"title": title, "text": text, "location": loc})
5461

5562
def add_entry_from_context(self, page: Page) -> None:

mkdocs/tests/search_tests.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -651,3 +651,32 @@ def test_prebuild_index_node(self, mock_popen):
651651
self.assertEqual(mock_popen.call_count, 1)
652652
self.assertEqual(mock_popen_obj.communicate.call_count, 1)
653653
self.assertEqual(result, expected)
654+
655+
def test_html_stripped_from_titles(self):
656+
"""HTML tags in page and section titles are stripped from search entries."""
657+
plugin = search.SearchPlugin()
658+
errors, warnings = plugin.load_config({})
659+
self.assertEqual(errors, [])
660+
self.assertEqual(warnings, [])
661+
662+
config = load_config(plugins=["search"])
663+
# A page title with inline HTML from Markdown (e.g. `<code>foo</code>`)
664+
page = Page(
665+
"The <code>mkdocs</code> Project",
666+
File(
667+
"index.md", config.docs_dir, config.site_dir, config.use_directory_urls
668+
),
669+
config,
670+
)
671+
page.content = """
672+
<h1 id="heading-1">Heading <em>one</em></h1>
673+
<p>Content</p>"""
674+
page.markdown = "# Heading 1\n\nContent"
675+
page.toc = get_toc(get_markdown_toc(page.markdown))
676+
677+
index = search_index.SearchIndex(**plugin.config)
678+
index.add_entry_from_context(page)
679+
680+
self.assertEqual(len(index._entries), 2)
681+
self.assertEqual(index._entries[0]["title"], "The mkdocs Project")
682+
self.assertEqual(index._entries[1]["title"], "Heading 1")

0 commit comments

Comments
 (0)