From 0851b53aa32eb6134fb790aba3223dc226f9bdf6 Mon Sep 17 00:00:00 2001 From: stephffuller Date: Thu, 3 Jul 2025 13:15:59 -0400 Subject: [PATCH 1/5] Update plugin.py to add support for mkdocs-autorefs Cavaet - I'm no developer, and this was written with Cuade Sonnet 4 over several trial-and-error iterations. I tested in my output pdf, and the autoref links are working as expected. YMMV. --- src/mkdocs_print_site_plugin/plugin.py | 135 +++++++++++++++++++++++++ 1 file changed, 135 insertions(+) diff --git a/src/mkdocs_print_site_plugin/plugin.py b/src/mkdocs_print_site_plugin/plugin.py index 0597821..8248ce5 100644 --- a/src/mkdocs_print_site_plugin/plugin.py +++ b/src/mkdocs_print_site_plugin/plugin.py @@ -330,6 +330,141 @@ def on_post_build(self, config, **kwargs): if config.get("plugins", {}).get("drawio"): html = config.get("plugins", {}).get("drawio").render_drawio_diagrams(html, self.print_page) + # Compatibility with mkdocs-autorefs + # As this plugin processes cross-references in the on_env event, + # which happens after the print page is generated, it's necessary to + # manually execute the autorefs fix_refs function here. + logger.info(f"[mkdocs-print-site] Available plugins: {list(config.get('plugins', {}).keys())}") + autorefs_plugin = config.get("plugins", {}).get("mkdocs-autorefs") or config.get("plugins", {}).get("autorefs") + if autorefs_plugin: + logger.info("[mkdocs-print-site] Processing autorefs cross-references...") + import functools + from mkdocs_autorefs._internal.references import fix_refs + + # First, extract all available anchors from the HTML + import re as regex_module + anchor_pattern = r'(?:id="([^"]+)"|name="([^"]+)")' + anchor_matches = regex_module.findall(anchor_pattern, html, regex_module.IGNORECASE) + available_anchors = set() + for match in anchor_matches: + # Each match is a tuple (id_value, name_value), one is empty + anchor = match[0] or match[1] + if anchor: + available_anchors.add(anchor) + + # Create custom url_mapper that converts cross-references to internal anchors + def print_page_url_mapper(identifier, from_url=None): + """ + Custom URL mapper for print page that converts all cross-references + to internal anchors in the same page instead of external URLs. + """ + try: + # Get the original URL from autorefs + original_url, title = autorefs_plugin.get_item_url(identifier, from_url) + + # Check if identifier directly exists as anchor + if identifier in available_anchors: + return f"#{identifier}", title + + # Extract anchor part from URL if it exists + if '#' in original_url: + anchor = original_url.split('#')[-1] + + # Check if this anchor actually exists in the HTML + if anchor in available_anchors: + return f"#{anchor}", title + else: + # Try to find a similar anchor (case-insensitive, partial match) + anchor_lower = anchor.lower() + for available_anchor in available_anchors: + if (available_anchor.lower() == anchor_lower or + anchor_lower in available_anchor.lower() or + available_anchor.lower() in anchor_lower): + return f"#{available_anchor}", title + + return f"#{anchor}", title # Return original anchor anyway + else: + # If no anchor in original URL, try fuzzy matching with identifier + identifier_lower = identifier.lower() + for available_anchor in available_anchors: + if (available_anchor.lower() == identifier_lower or + identifier_lower in available_anchor.lower() or + available_anchor.lower() in identifier_lower): + return f"#{available_anchor}", title + + return f"#{identifier}", title # Return anyway, might work + + except Exception as e: + # Fallback: check if identifier exists as anchor or find fuzzy match + if identifier in available_anchors: + return f"#{identifier}", identifier + else: + # Try fuzzy matching as fallback + identifier_lower = identifier.lower() + for available_anchor in available_anchors: + if (available_anchor.lower() == identifier_lower or + identifier_lower in available_anchor.lower()): + return f"#{available_anchor}", identifier + return f"#{identifier}", identifier # Return anyway as last fallback + + # Apply cross-references to the HTML + html, unmapped = fix_refs( + html, + print_page_url_mapper, + link_titles=autorefs_plugin._link_titles, + strip_title_tags=autorefs_plugin._strip_title_tags, + _legacy_refs=autorefs_plugin.legacy_refs, + ) + if unmapped: + logger.warning(f"[mkdocs-print-site] Unmapped autorefs: {[ref for ref, _ in unmapped]}") + else: + logger.info("[mkdocs-print-site] No autorefs plugin found") + + # Alternative autorefs implementation when plugin isn't available + if not autorefs_plugin: + # Try alternative approach: look for autorefs patterns in HTML and process them + try: + from mkdocs_autorefs._internal.references import AUTOREF_RE, fix_refs + + # Check if there are any autorefs patterns in the HTML + autoref_matches = AUTOREF_RE.findall(html) + if autoref_matches: + # Extract available anchors (same as above) + import re as regex_module + anchor_pattern = r'(?:id="([^"]+)"|name="([^"]+)")' + anchor_matches = regex_module.findall(anchor_pattern, html, regex_module.IGNORECASE) + available_anchors = set() + for match in anchor_matches: + anchor = match[0] or match[1] + if anchor: + available_anchors.add(anchor) + + # Create url_mapper that converts to internal anchors + def simple_url_mapper(identifier): + # Check if identifier exists as anchor + if identifier in available_anchors: + return f"#{identifier}", identifier + + # Try to find similar anchor + identifier_lower = identifier.lower() + for available_anchor in available_anchors: + if (available_anchor.lower() == identifier_lower or + identifier_lower in available_anchor.lower() or + available_anchor.lower() in identifier_lower): + return f"#{available_anchor}", identifier + + # Fallback: use identifier anyway + return f"#{identifier}", identifier + + html, unmapped = fix_refs(html, simple_url_mapper) + if unmapped: + logger.warning(f"[mkdocs-print-site] Could not resolve: {[ref for ref, _ in unmapped]}") + + except ImportError: + logger.warning("[mkdocs-print-site] mkdocs-autorefs not available for processing") + except Exception as e: + logger.warning(f"[mkdocs-print-site] Error processing autorefs: {e}") + # Compatibility with https://github.com/g-provost/lightgallery-markdown # This plugin insert link hrefs with double dashes, f.e. # From f92aff30fbd057ae18d0fc28f294a5be964fff24 Mon Sep 17 00:00:00 2001 From: stephffuller Date: Wed, 9 Jul 2025 16:43:07 -0400 Subject: [PATCH 2/5] Changes as per tvink review of plugin.py --- src/mkdocs_print_site_plugin/plugin.py | 48 ++------------------------ 1 file changed, 3 insertions(+), 45 deletions(-) diff --git a/src/mkdocs_print_site_plugin/plugin.py b/src/mkdocs_print_site_plugin/plugin.py index 8248ce5..bc183bc 100644 --- a/src/mkdocs_print_site_plugin/plugin.py +++ b/src/mkdocs_print_site_plugin/plugin.py @@ -2,6 +2,8 @@ import os import re import sys +import functools + from mkdocs.config import config_options from mkdocs.exceptions import PluginError @@ -334,11 +336,8 @@ def on_post_build(self, config, **kwargs): # As this plugin processes cross-references in the on_env event, # which happens after the print page is generated, it's necessary to # manually execute the autorefs fix_refs function here. - logger.info(f"[mkdocs-print-site] Available plugins: {list(config.get('plugins', {}).keys())}") autorefs_plugin = config.get("plugins", {}).get("mkdocs-autorefs") or config.get("plugins", {}).get("autorefs") if autorefs_plugin: - logger.info("[mkdocs-print-site] Processing autorefs cross-references...") - import functools from mkdocs_autorefs._internal.references import fix_refs # First, extract all available anchors from the HTML @@ -418,48 +417,7 @@ def print_page_url_mapper(identifier, from_url=None): if unmapped: logger.warning(f"[mkdocs-print-site] Unmapped autorefs: {[ref for ref, _ in unmapped]}") else: - logger.info("[mkdocs-print-site] No autorefs plugin found") - - # Alternative autorefs implementation when plugin isn't available - if not autorefs_plugin: - # Try alternative approach: look for autorefs patterns in HTML and process them - try: - from mkdocs_autorefs._internal.references import AUTOREF_RE, fix_refs - - # Check if there are any autorefs patterns in the HTML - autoref_matches = AUTOREF_RE.findall(html) - if autoref_matches: - # Extract available anchors (same as above) - import re as regex_module - anchor_pattern = r'(?:id="([^"]+)"|name="([^"]+)")' - anchor_matches = regex_module.findall(anchor_pattern, html, regex_module.IGNORECASE) - available_anchors = set() - for match in anchor_matches: - anchor = match[0] or match[1] - if anchor: - available_anchors.add(anchor) - - # Create url_mapper that converts to internal anchors - def simple_url_mapper(identifier): - # Check if identifier exists as anchor - if identifier in available_anchors: - return f"#{identifier}", identifier - - # Try to find similar anchor - identifier_lower = identifier.lower() - for available_anchor in available_anchors: - if (available_anchor.lower() == identifier_lower or - identifier_lower in available_anchor.lower() or - available_anchor.lower() in identifier_lower): - return f"#{available_anchor}", identifier - - # Fallback: use identifier anyway - return f"#{identifier}", identifier - - html, unmapped = fix_refs(html, simple_url_mapper) - if unmapped: - logger.warning(f"[mkdocs-print-site] Could not resolve: {[ref for ref, _ in unmapped]}") - + logger.info("[mkdocs-print-site] No autorefs plugin found") except ImportError: logger.warning("[mkdocs-print-site] mkdocs-autorefs not available for processing") except Exception as e: From 7db997c8804f30d79a7ea503809c9f47085617ed Mon Sep 17 00:00:00 2001 From: stephffuller Date: Wed, 9 Jul 2025 16:44:43 -0400 Subject: [PATCH 3/5] One last change to plugin.py per tvink review --- src/mkdocs_print_site_plugin/plugin.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mkdocs_print_site_plugin/plugin.py b/src/mkdocs_print_site_plugin/plugin.py index bc183bc..3a56e4c 100644 --- a/src/mkdocs_print_site_plugin/plugin.py +++ b/src/mkdocs_print_site_plugin/plugin.py @@ -3,6 +3,7 @@ import re import sys import functools +import re as regex_module from mkdocs.config import config_options From 80326660306bd68eb3cd5da2b00d719fd42bbea1 Mon Sep 17 00:00:00 2001 From: stephffuller Date: Wed, 9 Jul 2025 18:01:38 -0400 Subject: [PATCH 4/5] Update plugin.py --- src/mkdocs_print_site_plugin/plugin.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/mkdocs_print_site_plugin/plugin.py b/src/mkdocs_print_site_plugin/plugin.py index 3a56e4c..dc86dd1 100644 --- a/src/mkdocs_print_site_plugin/plugin.py +++ b/src/mkdocs_print_site_plugin/plugin.py @@ -3,7 +3,6 @@ import re import sys import functools -import re as regex_module from mkdocs.config import config_options @@ -417,12 +416,6 @@ def print_page_url_mapper(identifier, from_url=None): ) if unmapped: logger.warning(f"[mkdocs-print-site] Unmapped autorefs: {[ref for ref, _ in unmapped]}") - else: - logger.info("[mkdocs-print-site] No autorefs plugin found") - except ImportError: - logger.warning("[mkdocs-print-site] mkdocs-autorefs not available for processing") - except Exception as e: - logger.warning(f"[mkdocs-print-site] Error processing autorefs: {e}") # Compatibility with https://github.com/g-provost/lightgallery-markdown # This plugin insert link hrefs with double dashes, f.e. From 9a09c27861e99d4921461f85dd0a5792f27c48ec Mon Sep 17 00:00:00 2001 From: stephffuller Date: Wed, 9 Jul 2025 18:03:51 -0400 Subject: [PATCH 5/5] Update plugin.py --- src/mkdocs_print_site_plugin/plugin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mkdocs_print_site_plugin/plugin.py b/src/mkdocs_print_site_plugin/plugin.py index dc86dd1..020170f 100644 --- a/src/mkdocs_print_site_plugin/plugin.py +++ b/src/mkdocs_print_site_plugin/plugin.py @@ -3,6 +3,7 @@ import re import sys import functools +import re as regex_module from mkdocs.config import config_options @@ -341,7 +342,6 @@ def on_post_build(self, config, **kwargs): from mkdocs_autorefs._internal.references import fix_refs # First, extract all available anchors from the HTML - import re as regex_module anchor_pattern = r'(?:id="([^"]+)"|name="([^"]+)")' anchor_matches = regex_module.findall(anchor_pattern, html, regex_module.IGNORECASE) available_anchors = set()