fix: render emphasis properly and markdown in tables (#258)

dandhlee · web-flow · commit 4bbd921f116c · 2022-10-07T12:04:05.000-04:00
* fix: render emphasis properly and markdown in tables

* test: add and update unit test

* fix: address review comments
diff --git a/docfx_yaml/extension.py b/docfx_yaml/extension.py
@@ -83,12 +83,12 @@ class Bcolors:
 REFFUNCTION = 'func'
 INITPY = '__init__.py'
 # Regex expression for checking references of pattern like ":class:`~package_v1.module`"
-REF_PATTERN = ':(py:)?(func|class|meth|mod|ref|attr|exc):`~?[a-zA-Z0-9_\.<> ]*(\(\))?`'
+REF_PATTERN = ':(py:)?(func|class|meth|mod|ref|attr|exc):`~?[a-zA-Z0-9_.<> ]*(\(\))?`'
 # Regex expression for checking references of pattern like "~package_v1.subpackage.module"
 REF_PATTERN_LAST = '~([a-zA-Z0-9_<>]*\.)*[a-zA-Z0-9_<>]*(\(\))?'
 # Regex expression for checking references of pattern like
 # "[module][google.cloud.cloudkms_v1.module]"
-REF_PATTERN_BRACKETS = '\[[a-zA-Z0-9\_\<\>\-\. ]+\]\[[a-zA-Z0-9\_\<\>\-\. ]+\]'
+REF_PATTERN_BRACKETS = '\[[a-zA-Z0-9_<>\-. ]+\]\[[a-zA-Z0-9_<>\-. ]+\]'
 
 REF_PATTERNS = [
     REF_PATTERN,
@@ -758,12 +758,103 @@ def _extract_docstring_info(summary_info, summary, name):
             words = []
         else:
             words.append(word)
-    
+
         index += 1
 
     return top_summary
 
 
+def _reformat_codeblocks(content: str) -> str:
+    """Formats codeblocks from ``` to <pre>."""
+    triple_backtick = '```'
+    current_tag = '<pre>'
+    next_tag = '</pre>'
+    # If there are no proper pairs of triple backticks, don't format docstring.
+    if content.count(triple_backtick) % 2 != 0:
+        print(f'Docstring is not formatted well, missing proper pairs of triple backticks (```): {content}')
+        return content
+    while triple_backtick in content:
+        content = content.replace(triple_backtick, current_tag, 1)
+        # Alternate between replacing with <pre> and </pre>.
+        current_tag, next_tag = next_tag, current_tag
+
+    return content
+
+
+def _reformat_code(content: str) -> str:
+    """Formats code from ` to <code>."""
+    reformatted_lines = []
+
+    code_pattern = '`[^`\n]+`'
+    code_start = '<code>'
+    code_end = '</code>'
+    prev_start = prev_end = 0
+    # Convert `text` to <code>text</code>
+    for matched_obj in re.finditer(code_pattern, content):
+        start = matched_obj.start()
+        end = matched_obj.end()
+        code_content = content[start+1:end-1]
+
+        reformatted_lines.append(content[prev_end:start])
+        reformatted_lines.append(f'{code_start}{code_content}{code_end}')
+        prev_start, prev_end = start, end
+
+    reformatted_lines.append(content[prev_end:])
+
+    return ''.join(reformatted_lines)
+
+
+def reformat_markdown_to_html(content: str) -> str:
+    """Applies changes from markdown syntax to equivalent HTML.
+
+    Acts as a wrapper function to format all Markdown to HTML.
+
+    Markdown syntax cannot be used within HTML elements, and must be converted
+    at YAML level.
+
+    Args:
+        content: the string to be reformatted.
+
+    Returns:
+        Content that has been formatted with proper HTML.
+    """
+
+    content = _reformat_codeblocks(content)
+    content = _reformat_code(content)
+
+    return content
+
+
+def reformat_summary(summary: str) -> str:
+    """Applies any style changes to be made specifically for DocFX YAML.
+
+    Makes the following changes:
+      - converts ``text`` to `text`
+
+    Args:
+        summary: The summary to be modified.
+
+    Returns:
+        Converted summary suitable for DocFX YAML.
+    """
+
+    reformatted_lines = []
+
+    single_backtick = '`'
+    double_backtick = '``'
+    triple_backtick = '```'
+    for line in summary.split('\n'):
+        # Check that we're only looking for double backtick (``) and not
+        # comments (```).
+        if triple_backtick not in line and double_backtick in line:
+            reformatted_lines.append(line.replace(double_backtick, single_backtick))
+
+        else:
+            reformatted_lines.append(line)
+
+    return '\n'.join(reformatted_lines)
+
+
 # Returns appropriate product name to display for given full name of entry.
 def extract_product_name(name):
     if 'google.cloud' in name:
@@ -953,6 +1044,7 @@ def _update_friendly_package_name(path):
 
         # Extract summary info into respective sections.
         if summary:
+            summary = reformat_summary(summary)
             top_summary = _extract_docstring_info(summary_info, summary, name)
             try:
                 datam['summary'], datam['attributes'] = _parse_docstring_summary(top_summary)
@@ -1645,81 +1737,94 @@ def search_cross_references(obj, current_object_name: str, known_uids: List[str]
         if obj["syntax"].get("parameters"):
             for param in obj["syntax"]["parameters"]:
                 if param.get("description"):
-                    param["description"] = convert_cross_references(
+                    param_description = convert_cross_references(
                         param["description"],
                         current_object_name,
                         known_uids
                     )
+                    param["description"] = reformat_markdown_to_html(param_description)
 
                 if param.get("id"):
-                    param["id"] = convert_cross_references(
+                    param_id = convert_cross_references(
                         param["id"],
                         current_object_name,
                         known_uids
                     )
+                    param["id"] = reformat_markdown_to_html(param_id)
 
                 if param.get("var_type"):
-                    param["var_type"] = convert_cross_references(
+                    param_type = convert_cross_references(
                         param["var_type"],
                         current_object_name,
                         known_uids
                     )
+                    param["var_type"] = reformat_markdown_to_html(param_type)
 
         if obj["syntax"].get("exceptions"):
             for exception in obj["syntax"]["exceptions"]:
                 if exception.get("description"):
-                    exception["description"] = convert_cross_references(
+                    exception_description = convert_cross_references(
                         exception["description"],
                         current_object_name,
                         known_uids
                     )
+                    exception["description"] = (
+                        reformat_markdown_to_html(exception_description))
 
                 if exception.get("var_type"):
-                    exception["var_type"] = convert_cross_references(
+                    exception_type = convert_cross_references(
                         exception["var_type"],
                         current_object_name,
                         known_uids
                     )
+                    exception["var_type"] = (
+                        reformat_markdown_to_html(exception_type))
 
         if obj["syntax"].get("returns"):
             for ret in obj["syntax"]["returns"]:
                 if ret.get("description"):
-                    ret["description"] = convert_cross_references(
+                    ret_description = convert_cross_references(
                         ret["description"],
                         current_object_name,
                         known_uids
                     )
+                    ret["description"] = reformat_markdown_to_html(ret_description)
 
                 if ret.get("var_type"):
-                    ret["var_type"] = convert_cross_references(
+                    ret_type = convert_cross_references(
                         ret["var_type"],
                         current_object_name,
                         known_uids
                     )
+                    ret["var_type"] = reformat_markdown_to_html(ret_type)
 
 
     if obj.get("attributes"):
         for attribute in obj["attributes"]:
             if attribute.get("description"):
-                attribute["description"] = convert_cross_references(
+                attribute_description = convert_cross_references(
                     attribute["description"],
                     current_object_name,
                     known_uids
                 )
+                attribute["description"] = (
+                    reformat_markdown_to_html(attribute_description))
 
             if attribute.get("id"):
-                attribute["id"] = convert_cross_references(
+                attribute_id = convert_cross_references(
                     attribute["id"],
                     current_object_name,
                     known_uids
                 )
+                attribute["id"] = reformat_markdown_to_html(attribute_id)
 
             if attribute.get("var_type"):
-                attribute["var_type"] = convert_cross_references(
+                attribute_type = convert_cross_references(
                     attribute["var_type"],
                     current_object_name,
                     known_uids
                 )
+                attribute["var_type"] = reformat_markdown_to_html(attribute_type)
 
 
 def build_finished(app, exception):
diff --git a/tests/cross_references_post.yaml b/tests/cross_references_post.yaml
@@ -40,7 +40,7 @@ items:
       var_type: google.api_core.exceptions.GoogleAPICallError
     parameters:
     - description: Required. Name of the stream to start reading from, of the form
-        `projects/{project_id}/locations/{location}/sessions/{session_id}/streams/{stream_id}`
+        <code>projects/{project_id}/locations/{location}/sessions/{session_id}/streams/{stream_id}</code>
         with <xref uid="google.cloud.bigquery_storage_v1.types.SplitReadStreamResponse">google.cloud.bigquery_storage_v1.types.SplitReadStreamResponse</xref>
       id: row
       var_type: <xref uid="google.cloud.bigquery_storage_v1.types.AvroRows">google.cloud.bigquery_storage_v1.types.AvroRows</xref>
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
@@ -8,6 +8,8 @@
 from docfx_yaml.extension import highlight_md_codeblocks
 from docfx_yaml.extension import prepend_markdown_header
 from docfx_yaml.extension import clean_image_links
+from docfx_yaml.extension import reformat_summary
+from docfx_yaml.extension import reformat_markdown_to_html
 
 import unittest
 from parameterized import parameterized
@@ -390,6 +392,76 @@ def test_find_uid_to_convert(self, current_word, uids, visited_words, cross_refe
         )
         self.assertEqual(cross_reference_got, cross_reference_want)
 
+    test_summary = [
+        [
+            """Retrieve the ``ID`` for the object.
+
+See https://cloud.google.com/storage/docs/json_api/v1/objects
+
+The ID consists of the bucket name, object name, and generation number.
+            """,
+            """Retrieve the `ID` for the object.
+
+See https://cloud.google.com/storage/docs/json_api/v1/objects
+
+The ID consists of the bucket name, object name, and generation number.
+            """,
+        ],
+        [
+            # Test that codeblocks don't get formatted.
+            """The ID of the blob or `None`.
+
+if the blob's resource has not been loaded from the server.
+
+For example:
+```
+    ID or None
+```
+            """,
+            """The ID of the blob or `None`.
+
+if the blob's resource has not been loaded from the server.
+
+For example:
+```
+    ID or None
+```
+            """,
+        ],
+    ]
+    @parameterized.expand(test_summary)
+    def test_reformat_summary(self, summary, summary_want):
+        summary_got = reformat_summary(summary)
+        self.assertEqual(summary_want, summary_got)
+
+
+    test_markdown_content = [
+        [
+            """The resource name or `None`
+
+if no Cloud KMS key was used, or the blob's resource has not been loaded from the server.
+
+For example:
+```
+    kms_key_name: ID
+```
+            """,
+            """The resource name or <code>None</code>
+
+if no Cloud KMS key was used, or the blob's resource has not been loaded from the server.
+
+For example:
+<pre>
+    kms_key_name: ID
+</pre>
+            """,
+        ],
+    ]
+    @parameterized.expand(test_markdown_content)
+    def test_reformat_markdown_to_html(self, content, content_want):
+        content_got = reformat_markdown_to_html(content)
+        self.assertEqual(content_want, content_got)
+
 
 if __name__ == '__main__':
     unittest.main()