fix: add hardcoded IAM references temporarily (#209)

dandhlee · tbpg · web-flow · commit 5dc99d25532e · 2022-05-24T14:47:03.000-04:00
* fix: add hardcoded reference for IAM references

* test: update unit test

* fix: apply suggestion

Co-authored-by: Tyler Bui-Palsulich &lt;26876514+tbpg@users.noreply.github.com&gt;

* fix: separate code into helper function and add proper docstrings

* fix: ensure hard_coded_references is optional and not used if not found

* test: update unit test with new helper function

* fix: update method name and apply review suggestions

* test: update unit test

* fix: address review suggestion for adding return type

Co-authored-by: Tyler Bui-Palsulich &lt;26876514+tbpg@users.noreply.github.com&gt;

* fix: address review comments

* fix: update variable name from keyword to uid

* fix: update method names, variable names and refactor methods

* test: update unit test

* fix: update comments, move hardcoded bits to the back

Co-authored-by: Tyler Bui-Palsulich &lt;26876514+tbpg@users.noreply.github.com&gt;
diff --git a/docfx_yaml/extension.py b/docfx_yaml/extension.py
@@ -29,7 +29,7 @@
 from pathlib import Path
 from functools import partial
 from itertools import zip_longest
-from typing import List, Iterable
+from typing import Dict, Iterable, List, Optional
 from black import InvalidInput
 
 try:
@@ -1417,91 +1417,167 @@ def find_markdown_pages(app, outdir):
                 'href': mdfile.name.lower(),
             })
 
+def find_uid_to_convert(
+    current_word: str,
+    words: List[str],
+    index: int,
+    known_uids: List[str],
+    current_object_name: str,
+    processed_words: List[str],
+    hard_coded_references: Dict[str, str] = None
+) -> Optional[str]:
+    """Given `current_word`, returns the `uid` to convert to cross reference if found.
 
-# Finds and replaces occurrences which should be a cross reference in the given
-# content, except for the current name.
-def convert_cross_references(content: str, current_name: str, entry_names: List[str]):
+    Args:
+        current_word: current word being looked at
+        words: list of words used to check and compare content before and after `current_word`
+        index: index position of `current_word` within words
+        known_uids: list of uid references to look for
+        current_object_name: the name of the current Python object being processed
+        processed_words: list of words containing words that's been processed so far
+        hard_coded_references: Optional list containing a list of hard coded reference
+
+    Returns:
+        None if current word does not contain any reference `uid`, or the `uid`
+          that should be converted.
+    """
+    for uid in known_uids:
+        # Do not convert references to itself or containing partial
+        # references. This could result in `storage.types.ReadSession` being
+        # prematurely converted to
+        # `<xref uid="storage.types">storage.types</xref>ReadSession`
+        # instead of
+        # `<xref uid="storage.types.ReadSession">storage.types.ReadSession</xref>`
+        if uid in current_object_name:
+            continue
+
+        if uid in current_word:
+            # If the cross reference has been processed already, "<xref" or
+            # "<a" will appear as the previous word.
+            # For hard coded references, we use "<a href" style.
+            if "<xref" not in words[index-1] and "<a" not in words[index-1]:
+                # Check to see if the reference has been converted already.
+                if not (processed_words and ( \
+                    f"<xref uid=\"{uid}" in processed_words[-1] or \
+                    (hard_coded_references and f"<a href=\"{hard_coded_references.get(uid)}" in processed_words[-1]))):
+                  return uid
+
+    return None
+
+
+def convert_cross_references(content: str, current_object_name: str, known_uids: List[str]) -> str:
+    """Finds and replaces references that should be a cross reference in given content.
+
+    This should not convert any references that contain `current_object_name`,
+    i.e. if we're processing docstring for `google.cloud.spanner.v1.services`,
+    references to `google.cloud.spanner.v1.services` should not be convereted
+    to references.
+
+    Args:
+        content: body of content to parse and look for references in
+        current_object_name: the name of the current Python object being processed
+        known_uids: list of uid references to look for
+
+    Returns:
+        content that has been modified with proper cross references if found.
+    """
     words = content.split(" ")
-    new_words = []
-    # Using counter to check if the entry is already a cross reference.
+
+    # Contains a list of words that is not a valid reference or converted
+    # references.
+    processed_words = []
+
+    # TODO(https://github.com/googleapis/sphinx-docfx-yaml/issues/208):
+    # remove this in the future.
+    iam_policy_link = "http://github.com/googleapis/python-grpc-google-iam-v1/blob/8e73b45993f030f521c0169b380d0fbafe66630b/google/iam/v1/iam_policy_pb2_grpc.py"
+    hard_coded_references = {
+        "google.iam.v1.iam_policy_pb2.SetIamPolicyRequest": iam_policy_link + "#L103-L109",
+        "google.iam.v1.iam_policy_pb2.GetIamPolicyRequest": iam_policy_link + "#L111-L118",
+        "google.iam.v1.iam_policy_pb2.TestIamPermissionsRequest": iam_policy_link + "#L120-L131",
+        "google.iam.v1.iam_policy_pb2.TestIamPermissionsResponse": iam_policy_link + "#L120-L131"
+    }
+    known_uids.extend(hard_coded_references.keys())
+
     for index, word in enumerate(words):
-        cross_reference = ""
-        for keyword in entry_names:
-            if keyword != current_name and keyword not in current_name and keyword in word:
-                # If it is already processed as cross reference, skip over it.
-                if "<xref" in words[index-1] or (new_words and f"<xref uid=\"{keyword}" in new_words[-1]):
-                    continue
-                cross_reference = f"<xref uid=\"{keyword}\">{keyword}</xref>"
-                new_words.append(word.replace(keyword, cross_reference))
-                print(f"Converted {keyword} into cross reference in: \n{content}")
+        uid = find_uid_to_convert(
+            word, words, index, known_uids, current_object_name, processed_words, hard_coded_references
+        )
+
+        if uid:
+            cross_reference = f"<a href=\"{hard_coded_references[uid]}\">{uid}</a>" \
+                if uid in hard_coded_references else \
+                f"<xref uid=\"{uid}\">{uid}</xref>"
 
-        # If cross reference has not been found, add current unchanged content.
-        if not cross_reference:
-            new_words.append(word)
+            processed_words.append(word.replace(uid, cross_reference))
+            print(f"Converted {uid} into cross reference in: \n{content}")
+
+        else:
+            # If cross reference has not been found, add current unchanged content.
+            processed_words.append(word)
 
-    return " ".join(new_words)
+    return " ".join(processed_words)
 
 
 # Used to look for cross references in the obj's data where applicable.
 # For now, we inspect summary, syntax and attributes.
-def search_cross_references(obj, current_name: str, entry_names: List[str]):
+def search_cross_references(obj, current_object_name: str, known_uids: List[str]):
     if obj.get("summary"):
-        obj["summary"] = convert_cross_references(obj["summary"], current_name, entry_names)
+        obj["summary"] = convert_cross_references(obj["summary"], current_object_name, known_uids)
 
     if obj.get("syntax"):
         if obj["syntax"].get("parameters"):
             for param in obj["syntax"]["parameters"]:
                 if param.get("description"):
                     param["description"] = convert_cross_references(
                         param["description"],
-                        current_name,
-                        entry_names
+                        current_object_name,
+                        known_uids
                     )
 
                 if param.get("id"):
                     param["id"] = convert_cross_references(
                         param["id"],
-                        current_name,
-                        entry_names
+                        current_object_name,
+                        known_uids
                     )
 
                 if param.get("var_type"):
                     param["var_type"] = convert_cross_references(
                         param["var_type"],
-                        current_name,
-                        entry_names
+                        current_object_name,
+                        known_uids
                     )
 
         if obj["syntax"].get("exceptions"):
             for exception in obj["syntax"]["exceptions"]:
                 if exception.get("description"):
                     exception["description"] = convert_cross_references(
                         exception["description"],
-                        current_name,
-                        entry_names
+                        current_object_name,
+                        known_uids
                     )
 
                 if exception.get("var_type"):
                     exception["var_type"] = convert_cross_references(
                         exception["var_type"],
-                        current_name,
-                        entry_names
+                        current_object_name,
+                        known_uids
                     )
 
         if obj["syntax"].get("returns"):
             for ret in obj["syntax"]["returns"]:
                 if ret.get("description"):
                     ret["description"] = convert_cross_references(
                         ret["description"],
-                        current_name,
-                        entry_names
+                        current_object_name,
+                        known_uids
                     )
 
                 if ret.get("var_type"):
                     ret["var_type"] = convert_cross_references(
                         ret["var_type"],
-                        current_name,
-                        entry_names
+                        current_object_name,
+                        known_uids
                     )
 
 
@@ -1510,22 +1586,22 @@ def search_cross_references(obj, current_name: str, entry_names: List[str]):
             if attribute.get("description"):
                 attribute["description"] = convert_cross_references(
                     attribute["description"],
-                    current_name,
-                    entry_names
+                    current_object_name,
+                    known_uids
                 )
 
             if attribute.get("id"):
                 attribute["id"] = convert_cross_references(
                     attribute["id"],
-                    current_name,
-                    entry_names
+                    current_object_name,
+                    known_uids
                 )
 
             if attribute.get("var_type"):
                 attribute["var_type"] = convert_cross_references(
                     attribute["var_type"],
-                    current_name,
-                    entry_names
+                    current_object_name,
+                    known_uids
                 )
 
 
@@ -1728,11 +1804,11 @@ def convert_module_to_package_if_needed(obj):
                 #   (not from the same library)
                 #   google.cloud.aiplatform.AutoMLForecastingTrainingJob
 
-                current_name = obj["fullName"]
-                entry_names = sorted(app.env.docfx_uid_names.keys(), reverse=True)
+                current_object_name = obj["fullName"]
+                known_uids = sorted(app.env.docfx_uid_names.keys(), reverse=True)
                 # Currently we only need to look in summary, syntax and
                 # attributes for cross references.
-                search_cross_references(obj, current_name, entry_names)
+                search_cross_references(obj, current_object_name, known_uids)
 
             yaml_map[uid] = [yaml_data, references]
 
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
@@ -1,5 +1,6 @@
 from docfx_yaml.extension import extract_keyword
 from docfx_yaml.extension import indent_code_left
+from docfx_yaml.extension import find_uid_to_convert
 from docfx_yaml.extension import convert_cross_references
 from docfx_yaml.extension import search_cross_references
 from docfx_yaml.extension import format_code
@@ -115,16 +116,29 @@ def test_extract_keyword(self):
             "Response message for google.cloud.bigquery_storage_v1.types.SplitReadStreamResponse.",
             "Response message for <xref uid=\"google.cloud.bigquery_storage_v1.types.SplitReadStreamResponse\">google.cloud.bigquery_storage_v1.types.SplitReadStreamResponse</xref>."
         ],
+        # Testing for cross reference to not be converted for its own object.
+        [
+            "Response message for google.cloud.bigquery_storage_v1.types.SplitResponse.",
+            "Response message for google.cloud.bigquery_storage_v1.types.SplitResponse."
+        ],
+        # TODO(https://github.com/googleapis/sphinx-docfx-yaml/issues/208):
+        # remove this when it is not needed anymore.
+        # Testing for hardcoded reference.
+        [
+            "google.iam.v1.iam_policy_pb2.GetIamPolicyRequest",
+            "<a href=\"http://github.com/googleapis/python-grpc-google-iam-v1/blob/8e73b45993f030f521c0169b380d0fbafe66630b/google/iam/v1/iam_policy_pb2_grpc.py#L111-L118\">google.iam.v1.iam_policy_pb2.GetIamPolicyRequest</a>"
+        ]
     ]
     @parameterized.expand(cross_references_testdata)
     def test_convert_cross_references(self, content, content_want):
         # Check that entries correctly turns into cross references.
         keyword_map = [
-            "google.cloud.bigquery_storage_v1.types.SplitReadStreamResponse"
+            "google.cloud.bigquery_storage_v1.types.SplitReadStreamResponse",
+            "google.cloud.bigquery_storage_v1.types.SplitResponse"
         ]
-        current_name = "SplitRepsonse"
+        current_object_name = "google.cloud.bigquery_storage_v1.types.SplitResponse"
 
-        content_got = convert_cross_references(content, current_name, keyword_map)
+        content_got = convert_cross_references(content, current_object_name, keyword_map)
         self.assertEqual(content_got, content_want)
 
 
@@ -296,5 +310,58 @@ def test_prepend_markdown_header(self, base_filename, want_filename):
                 self.assertEqual(test_file.read(), mdfile_want.read())
 
 
+    test_reference_params = [
+        [
+            # If no reference keyword is found, check for None
+            "google.cloud.resourcemanager_v3.ProjectsClient",
+            ["google.cloud.resourcemanager_v1.ProjectsClient"],
+            ["The", "following", "constraints", "apply", "when", "using"],
+            None
+        ],
+        [
+            # If keyword reference is found, validate proper cross reference
+            "google.cloud.resourcemanager_v3.set_iam_policy",
+            ["google.cloud.resourcemanager_v3.set_iam_policy"],
+            ["A", "Policy", "is", "a", "collection", "of", "bindings", "from"],
+            "google.cloud.resourcemanager_v3.set_iam_policy"
+        ],
+        [
+            # If keyword reference has already been converted, do not convert
+            # again.
+            "uid=\"google.cloud.resourcemanager_v3.set_iam_policy\">documentation</xref>",
+            ["google.cloud.resourcemanager_v3.set_iam_policy"],
+            ["Take", "a", "look", "at", "<xref"],
+            None
+        ],
+        [
+            # If no reference keyword is found, check for None
+            "google.cloud.resourcemanager_v3.ProjectsClient",
+            ["google.cloud.resourcemanager_v3.ProjectsClient"],
+            ["The", "following", "constraints", "apply", "when", "using"],
+            None
+        ],
+    ]
+    @parameterized.expand(test_reference_params)
+    def test_find_uid_to_convert(self, current_word, uids, visited_words, cross_reference_want):
+        current_object_name = "google.cloud.resourcemanager_v3.ProjectsClient"
+        content ="""Sets the IAM access control policy for the specified project.
+
+The following constraints apply when using google.cloud.resourcemanager_v3.ProjectsClient
+
+A Policy is a collection of bindings from google.cloud.resourcemanager_v3.set_iam_policy
+
+Take a look at <xref uid="google.cloud.resourcemanager_v3.set_iam_policy">documentation</xref> for more information.
+"""
+        # Break up the paragraph into sanitized list of words as shown in Sphinx.
+        words = " ".join(content.split("\n")).split(" ")
+
+        index = words.index(current_word)
+
+        cross_reference_got = find_uid_to_convert(
+            current_word, words, index, uids, current_object_name, visited_words
+        )
+        self.assertEqual(cross_reference_got, cross_reference_want)
+
+
 if __name__ == '__main__':
     unittest.main()