Skip to content
This repository was archived by the owner on Mar 26, 2026. It is now read-only.

Commit 4bbd921

Browse files
authored
fix: render emphasis properly and markdown in tables (#258)
* fix: render emphasis properly and markdown in tables * test: add and update unit test * fix: address review comments
1 parent eecd0e5 commit 4bbd921

3 files changed

Lines changed: 191 additions & 14 deletions

File tree

docfx_yaml/extension.py

Lines changed: 118 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -83,12 +83,12 @@ class Bcolors:
8383
REFFUNCTION = 'func'
8484
INITPY = '__init__.py'
8585
# Regex expression for checking references of pattern like ":class:`~package_v1.module`"
86-
REF_PATTERN = ':(py:)?(func|class|meth|mod|ref|attr|exc):`~?[a-zA-Z0-9_\.<> ]*(\(\))?`'
86+
REF_PATTERN = ':(py:)?(func|class|meth|mod|ref|attr|exc):`~?[a-zA-Z0-9_.<> ]*(\(\))?`'
8787
# Regex expression for checking references of pattern like "~package_v1.subpackage.module"
8888
REF_PATTERN_LAST = '~([a-zA-Z0-9_<>]*\.)*[a-zA-Z0-9_<>]*(\(\))?'
8989
# Regex expression for checking references of pattern like
9090
# "[module][google.cloud.cloudkms_v1.module]"
91-
REF_PATTERN_BRACKETS = '\[[a-zA-Z0-9\_\<\>\-\. ]+\]\[[a-zA-Z0-9\_\<\>\-\. ]+\]'
91+
REF_PATTERN_BRACKETS = '\[[a-zA-Z0-9_<>\-. ]+\]\[[a-zA-Z0-9_<>\-. ]+\]'
9292

9393
REF_PATTERNS = [
9494
REF_PATTERN,
@@ -758,12 +758,103 @@ def _extract_docstring_info(summary_info, summary, name):
758758
words = []
759759
else:
760760
words.append(word)
761-
761+
762762
index += 1
763763

764764
return top_summary
765765

766766

767+
def _reformat_codeblocks(content: str) -> str:
768+
"""Formats codeblocks from ``` to <pre>."""
769+
triple_backtick = '```'
770+
current_tag = '<pre>'
771+
next_tag = '</pre>'
772+
# If there are no proper pairs of triple backticks, don't format docstring.
773+
if content.count(triple_backtick) % 2 != 0:
774+
print(f'Docstring is not formatted well, missing proper pairs of triple backticks (```): {content}')
775+
return content
776+
while triple_backtick in content:
777+
content = content.replace(triple_backtick, current_tag, 1)
778+
# Alternate between replacing with <pre> and </pre>.
779+
current_tag, next_tag = next_tag, current_tag
780+
781+
return content
782+
783+
784+
def _reformat_code(content: str) -> str:
785+
"""Formats code from ` to <code>."""
786+
reformatted_lines = []
787+
788+
code_pattern = '`[^`\n]+`'
789+
code_start = '<code>'
790+
code_end = '</code>'
791+
prev_start = prev_end = 0
792+
# Convert `text` to <code>text</code>
793+
for matched_obj in re.finditer(code_pattern, content):
794+
start = matched_obj.start()
795+
end = matched_obj.end()
796+
code_content = content[start+1:end-1]
797+
798+
reformatted_lines.append(content[prev_end:start])
799+
reformatted_lines.append(f'{code_start}{code_content}{code_end}')
800+
prev_start, prev_end = start, end
801+
802+
reformatted_lines.append(content[prev_end:])
803+
804+
return ''.join(reformatted_lines)
805+
806+
807+
def reformat_markdown_to_html(content: str) -> str:
808+
"""Applies changes from markdown syntax to equivalent HTML.
809+
810+
Acts as a wrapper function to format all Markdown to HTML.
811+
812+
Markdown syntax cannot be used within HTML elements, and must be converted
813+
at YAML level.
814+
815+
Args:
816+
content: the string to be reformatted.
817+
818+
Returns:
819+
Content that has been formatted with proper HTML.
820+
"""
821+
822+
content = _reformat_codeblocks(content)
823+
content = _reformat_code(content)
824+
825+
return content
826+
827+
828+
def reformat_summary(summary: str) -> str:
829+
"""Applies any style changes to be made specifically for DocFX YAML.
830+
831+
Makes the following changes:
832+
- converts ``text`` to `text`
833+
834+
Args:
835+
summary: The summary to be modified.
836+
837+
Returns:
838+
Converted summary suitable for DocFX YAML.
839+
"""
840+
841+
reformatted_lines = []
842+
843+
single_backtick = '`'
844+
double_backtick = '``'
845+
triple_backtick = '```'
846+
for line in summary.split('\n'):
847+
# Check that we're only looking for double backtick (``) and not
848+
# comments (```).
849+
if triple_backtick not in line and double_backtick in line:
850+
reformatted_lines.append(line.replace(double_backtick, single_backtick))
851+
852+
else:
853+
reformatted_lines.append(line)
854+
855+
return '\n'.join(reformatted_lines)
856+
857+
767858
# Returns appropriate product name to display for given full name of entry.
768859
def extract_product_name(name):
769860
if 'google.cloud' in name:
@@ -953,6 +1044,7 @@ def _update_friendly_package_name(path):
9531044

9541045
# Extract summary info into respective sections.
9551046
if summary:
1047+
summary = reformat_summary(summary)
9561048
top_summary = _extract_docstring_info(summary_info, summary, name)
9571049
try:
9581050
datam['summary'], datam['attributes'] = _parse_docstring_summary(top_summary)
@@ -1645,81 +1737,94 @@ def search_cross_references(obj, current_object_name: str, known_uids: List[str]
16451737
if obj["syntax"].get("parameters"):
16461738
for param in obj["syntax"]["parameters"]:
16471739
if param.get("description"):
1648-
param["description"] = convert_cross_references(
1740+
param_description = convert_cross_references(
16491741
param["description"],
16501742
current_object_name,
16511743
known_uids
16521744
)
1745+
param["description"] = reformat_markdown_to_html(param_description)
16531746

16541747
if param.get("id"):
1655-
param["id"] = convert_cross_references(
1748+
param_id = convert_cross_references(
16561749
param["id"],
16571750
current_object_name,
16581751
known_uids
16591752
)
1753+
param["id"] = reformat_markdown_to_html(param_id)
16601754

16611755
if param.get("var_type"):
1662-
param["var_type"] = convert_cross_references(
1756+
param_type = convert_cross_references(
16631757
param["var_type"],
16641758
current_object_name,
16651759
known_uids
16661760
)
1761+
param["var_type"] = reformat_markdown_to_html(param_type)
16671762

16681763
if obj["syntax"].get("exceptions"):
16691764
for exception in obj["syntax"]["exceptions"]:
16701765
if exception.get("description"):
1671-
exception["description"] = convert_cross_references(
1766+
exception_description = convert_cross_references(
16721767
exception["description"],
16731768
current_object_name,
16741769
known_uids
16751770
)
1771+
exception["description"] = (
1772+
reformat_markdown_to_html(exception_description))
16761773

16771774
if exception.get("var_type"):
1678-
exception["var_type"] = convert_cross_references(
1775+
exception_type = convert_cross_references(
16791776
exception["var_type"],
16801777
current_object_name,
16811778
known_uids
16821779
)
1780+
exception["var_type"] = (
1781+
reformat_markdown_to_html(exception_type))
16831782

16841783
if obj["syntax"].get("returns"):
16851784
for ret in obj["syntax"]["returns"]:
16861785
if ret.get("description"):
1687-
ret["description"] = convert_cross_references(
1786+
ret_description = convert_cross_references(
16881787
ret["description"],
16891788
current_object_name,
16901789
known_uids
16911790
)
1791+
ret["description"] = reformat_markdown_to_html(ret_description)
16921792

16931793
if ret.get("var_type"):
1694-
ret["var_type"] = convert_cross_references(
1794+
ret_type = convert_cross_references(
16951795
ret["var_type"],
16961796
current_object_name,
16971797
known_uids
16981798
)
1799+
ret["var_type"] = reformat_markdown_to_html(ret_type)
16991800

17001801

17011802
if obj.get("attributes"):
17021803
for attribute in obj["attributes"]:
17031804
if attribute.get("description"):
1704-
attribute["description"] = convert_cross_references(
1805+
attribute_description = convert_cross_references(
17051806
attribute["description"],
17061807
current_object_name,
17071808
known_uids
17081809
)
1810+
attribute["description"] = (
1811+
reformat_markdown_to_html(attribute_description))
17091812

17101813
if attribute.get("id"):
1711-
attribute["id"] = convert_cross_references(
1814+
attribute_id = convert_cross_references(
17121815
attribute["id"],
17131816
current_object_name,
17141817
known_uids
17151818
)
1819+
attribute["id"] = reformat_markdown_to_html(attribute_id)
17161820

17171821
if attribute.get("var_type"):
1718-
attribute["var_type"] = convert_cross_references(
1822+
attribute_type = convert_cross_references(
17191823
attribute["var_type"],
17201824
current_object_name,
17211825
known_uids
17221826
)
1827+
attribute["var_type"] = reformat_markdown_to_html(attribute_type)
17231828

17241829

17251830
def build_finished(app, exception):

tests/cross_references_post.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ items:
4040
var_type: google.api_core.exceptions.GoogleAPICallError
4141
parameters:
4242
- description: Required. Name of the stream to start reading from, of the form
43-
`projects/{project_id}/locations/{location}/sessions/{session_id}/streams/{stream_id}`
43+
<code>projects/{project_id}/locations/{location}/sessions/{session_id}/streams/{stream_id}</code>
4444
with <xref uid="google.cloud.bigquery_storage_v1.types.SplitReadStreamResponse">google.cloud.bigquery_storage_v1.types.SplitReadStreamResponse</xref>
4545
id: row
4646
var_type: <xref uid="google.cloud.bigquery_storage_v1.types.AvroRows">google.cloud.bigquery_storage_v1.types.AvroRows</xref>

tests/test_helpers.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
from docfx_yaml.extension import highlight_md_codeblocks
99
from docfx_yaml.extension import prepend_markdown_header
1010
from docfx_yaml.extension import clean_image_links
11+
from docfx_yaml.extension import reformat_summary
12+
from docfx_yaml.extension import reformat_markdown_to_html
1113

1214
import unittest
1315
from parameterized import parameterized
@@ -390,6 +392,76 @@ def test_find_uid_to_convert(self, current_word, uids, visited_words, cross_refe
390392
)
391393
self.assertEqual(cross_reference_got, cross_reference_want)
392394

395+
test_summary = [
396+
[
397+
"""Retrieve the ``ID`` for the object.
398+
399+
See https://cloud.google.com/storage/docs/json_api/v1/objects
400+
401+
The ID consists of the bucket name, object name, and generation number.
402+
""",
403+
"""Retrieve the `ID` for the object.
404+
405+
See https://cloud.google.com/storage/docs/json_api/v1/objects
406+
407+
The ID consists of the bucket name, object name, and generation number.
408+
""",
409+
],
410+
[
411+
# Test that codeblocks don't get formatted.
412+
"""The ID of the blob or `None`.
413+
414+
if the blob's resource has not been loaded from the server.
415+
416+
For example:
417+
```
418+
ID or None
419+
```
420+
""",
421+
"""The ID of the blob or `None`.
422+
423+
if the blob's resource has not been loaded from the server.
424+
425+
For example:
426+
```
427+
ID or None
428+
```
429+
""",
430+
],
431+
]
432+
@parameterized.expand(test_summary)
433+
def test_reformat_summary(self, summary, summary_want):
434+
summary_got = reformat_summary(summary)
435+
self.assertEqual(summary_want, summary_got)
436+
437+
438+
test_markdown_content = [
439+
[
440+
"""The resource name or `None`
441+
442+
if no Cloud KMS key was used, or the blob's resource has not been loaded from the server.
443+
444+
For example:
445+
```
446+
kms_key_name: ID
447+
```
448+
""",
449+
"""The resource name or <code>None</code>
450+
451+
if no Cloud KMS key was used, or the blob's resource has not been loaded from the server.
452+
453+
For example:
454+
<pre>
455+
kms_key_name: ID
456+
</pre>
457+
""",
458+
],
459+
]
460+
@parameterized.expand(test_markdown_content)
461+
def test_reformat_markdown_to_html(self, content, content_want):
462+
content_got = reformat_markdown_to_html(content)
463+
self.assertEqual(content_want, content_got)
464+
393465

394466
if __name__ == '__main__':
395467
unittest.main()

0 commit comments

Comments
 (0)