66import re
77import warnings
88from html import escape , unescape
9+ from html .parser import HTMLParser
910from typing import TYPE_CHECKING , Any , Callable , ClassVar , Match
1011from urllib .parse import urlsplit
1112from xml .etree .ElementTree import Element
@@ -44,7 +45,12 @@ def __getattr__(name: str) -> Any:
4445 rf"(?: class=(?P<class>{ _ATTR_VALUE } ))?(?P<attrs> [^<>]+)?>(?P<title>.*?)</span>" ,
4546 flags = re .DOTALL ,
4647)
47- """A regular expression to match mkdocs-autorefs' special reference markers
48+ """Deprecated. Use [`AUTOREF_RE`][mkdocs_autorefs.references.AUTOREF_RE] instead."""
49+
50+ AUTOREF_RE = re .compile (r"<autoref (?P<attrs>.*?)>(?P<title>.*?)</autoref>" , flags = re .DOTALL )
51+ """The autoref HTML tag regular expression.
52+
53+ A regular expression to match mkdocs-autorefs' special reference markers
4854in the [`on_post_page` hook][mkdocs_autorefs.plugin.AutorefsPlugin.on_post_page].
4955"""
5056
@@ -135,8 +141,8 @@ def _make_tag(self, identifier: str, text: str) -> Element:
135141 Returns:
136142 A new element.
137143 """
138- el = Element ("span " )
139- el .set ("data-autorefs- identifier" , identifier )
144+ el = Element ("autoref " )
145+ el .set ("identifier" , identifier )
140146 el .text = text
141147 return el
142148
@@ -167,7 +173,7 @@ def relative_url(url_a: str, url_b: str) -> str:
167173 return f"{ relative } #{ anchor } "
168174
169175
170- def fix_ref (url_mapper : Callable [[str ], str ], unmapped : list [str ]) -> Callable :
176+ def _legacy_fix_ref (url_mapper : Callable [[str ], str ], unmapped : list [str ]) -> Callable :
171177 """Return a `repl` function for [`re.sub`](https://docs.python.org/3/library/re.html#re.sub).
172178
173179 In our context, we match Markdown references and replace them with HTML links.
@@ -216,7 +222,84 @@ def inner(match: Match) -> str:
216222 return inner
217223
218224
219- def fix_refs (html : str , url_mapper : Callable [[str ], str ]) -> tuple [str , list [str ]]:
225+ class _AutorefsAttrs (dict ):
226+ _handled_attrs : ClassVar [set [str ]] = {"identifier" , "optional" , "hover" , "class" }
227+
228+ @property
229+ def remaining (self ) -> str :
230+ return " " .join (k if v is None else f'{ k } ="{ v } "' for k , v in self .items () if k not in self ._handled_attrs )
231+
232+
233+ class _HTMLAttrsParser (HTMLParser ):
234+ def __init__ (self ):
235+ super ().__init__ ()
236+ self .attrs = {}
237+
238+ def parse (self , html : str ) -> _AutorefsAttrs :
239+ self .attrs .clear ()
240+ self .feed (html )
241+ return _AutorefsAttrs (self .attrs )
242+
243+ def handle_starttag (self , tag : str , attrs : list [tuple [str , str | None ]]) -> None : # noqa: ARG002
244+ self .attrs .update (attrs )
245+
246+
247+ _html_attrs_parser = _HTMLAttrsParser ()
248+
249+
250+ def fix_ref (url_mapper : Callable [[str ], str ], unmapped : list [str ]) -> Callable :
251+ """Return a `repl` function for [`re.sub`](https://docs.python.org/3/library/re.html#re.sub).
252+
253+ In our context, we match Markdown references and replace them with HTML links.
254+
255+ When the matched reference's identifier was not mapped to an URL, we append the identifier to the outer
256+ `unmapped` list. It generally means the user is trying to cross-reference an object that was not collected
257+ and rendered, making it impossible to link to it. We catch this exception in the caller to issue a warning.
258+
259+ Arguments:
260+ url_mapper: A callable that gets an object's site URL by its identifier,
261+ such as [mkdocs_autorefs.plugin.AutorefsPlugin.get_item_url][].
262+ unmapped: A list to store unmapped identifiers.
263+
264+ Returns:
265+ The actual function accepting a [`Match` object](https://docs.python.org/3/library/re.html#match-objects)
266+ and returning the replacement strings.
267+ """
268+
269+ def inner (match : Match ) -> str :
270+ title = match ["title" ]
271+ attrs = _html_attrs_parser .parse (f"<a { match ['attrs' ]} >" )
272+ identifier : str = attrs ["identifier" ]
273+ optional = "optional" in attrs
274+ hover = "hover" in attrs
275+
276+ try :
277+ url = url_mapper (unescape (identifier ))
278+ except KeyError :
279+ if optional :
280+ if hover :
281+ return f'<span title="{ identifier } ">{ title } </span>'
282+ return title
283+ unmapped .append (identifier )
284+ if title == identifier :
285+ return f"[{ identifier } ][]"
286+ return f"[{ title } ][{ identifier } ]"
287+
288+ parsed = urlsplit (url )
289+ external = parsed .scheme or parsed .netloc
290+ classes = (attrs .get ("class" ) or "" ).strip ().split ()
291+ classes = ["autorefs" , "autorefs-external" if external else "autorefs-internal" , * classes ]
292+ class_attr = " " .join (classes )
293+ if remaining := attrs .remaining :
294+ remaining = f" { remaining } "
295+ if optional and hover :
296+ return f'<a class="{ class_attr } " title="{ identifier } " href="{ escape (url )} "{ remaining } >{ title } </a>'
297+ return f'<a class="{ class_attr } " href="{ escape (url )} "{ remaining } >{ title } </a>'
298+
299+ return inner
300+
301+
302+ def fix_refs (html : str , url_mapper : Callable [[str ], str ], * , _legacy_refs : bool = True ) -> tuple [str , list [str ]]:
220303 """Fix all references in the given HTML text.
221304
222305 Arguments:
@@ -228,7 +311,9 @@ def fix_refs(html: str, url_mapper: Callable[[str], str]) -> tuple[str, list[str
228311 The fixed HTML.
229312 """
230313 unmapped : list [str ] = []
231- html = AUTO_REF_RE .sub (fix_ref (url_mapper , unmapped ), html )
314+ html = AUTOREF_RE .sub (fix_ref (url_mapper , unmapped ), html )
315+ if _legacy_refs :
316+ html = AUTO_REF_RE .sub (_legacy_fix_ref (url_mapper , unmapped ), html )
232317 return html , unmapped
233318
234319
0 commit comments