Skip to content
This repository was archived by the owner on Mar 26, 2026. It is now read-only.

Commit 5dc99d2

Browse files
dandhleetbpg
andauthored
fix: add hardcoded IAM references temporarily (#209)
* fix: add hardcoded reference for IAM references * test: update unit test * fix: apply suggestion Co-authored-by: Tyler Bui-Palsulich <26876514+tbpg@users.noreply.github.com> * fix: separate code into helper function and add proper docstrings * fix: ensure hard_coded_references is optional and not used if not found * test: update unit test with new helper function * fix: update method name and apply review suggestions * test: update unit test * fix: address review suggestion for adding return type Co-authored-by: Tyler Bui-Palsulich <26876514+tbpg@users.noreply.github.com> * fix: address review comments * fix: update variable name from keyword to uid * fix: update method names, variable names and refactor methods * test: update unit test * fix: update comments, move hardcoded bits to the back Co-authored-by: Tyler Bui-Palsulich <26876514+tbpg@users.noreply.github.com>
1 parent dfedd36 commit 5dc99d2

2 files changed

Lines changed: 190 additions & 47 deletions

File tree

docfx_yaml/extension.py

Lines changed: 120 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from pathlib import Path
3030
from functools import partial
3131
from itertools import zip_longest
32-
from typing import List, Iterable
32+
from typing import Dict, Iterable, List, Optional
3333
from black import InvalidInput
3434

3535
try:
@@ -1417,91 +1417,167 @@ def find_markdown_pages(app, outdir):
14171417
'href': mdfile.name.lower(),
14181418
})
14191419

1420+
def find_uid_to_convert(
1421+
current_word: str,
1422+
words: List[str],
1423+
index: int,
1424+
known_uids: List[str],
1425+
current_object_name: str,
1426+
processed_words: List[str],
1427+
hard_coded_references: Dict[str, str] = None
1428+
) -> Optional[str]:
1429+
"""Given `current_word`, returns the `uid` to convert to cross reference if found.
14201430
1421-
# Finds and replaces occurrences which should be a cross reference in the given
1422-
# content, except for the current name.
1423-
def convert_cross_references(content: str, current_name: str, entry_names: List[str]):
1431+
Args:
1432+
current_word: current word being looked at
1433+
words: list of words used to check and compare content before and after `current_word`
1434+
index: index position of `current_word` within words
1435+
known_uids: list of uid references to look for
1436+
current_object_name: the name of the current Python object being processed
1437+
processed_words: list of words containing words that's been processed so far
1438+
hard_coded_references: Optional list containing a list of hard coded reference
1439+
1440+
Returns:
1441+
None if current word does not contain any reference `uid`, or the `uid`
1442+
that should be converted.
1443+
"""
1444+
for uid in known_uids:
1445+
# Do not convert references to itself or containing partial
1446+
# references. This could result in `storage.types.ReadSession` being
1447+
# prematurely converted to
1448+
# `<xref uid="storage.types">storage.types</xref>ReadSession`
1449+
# instead of
1450+
# `<xref uid="storage.types.ReadSession">storage.types.ReadSession</xref>`
1451+
if uid in current_object_name:
1452+
continue
1453+
1454+
if uid in current_word:
1455+
# If the cross reference has been processed already, "<xref" or
1456+
# "<a" will appear as the previous word.
1457+
# For hard coded references, we use "<a href" style.
1458+
if "<xref" not in words[index-1] and "<a" not in words[index-1]:
1459+
# Check to see if the reference has been converted already.
1460+
if not (processed_words and ( \
1461+
f"<xref uid=\"{uid}" in processed_words[-1] or \
1462+
(hard_coded_references and f"<a href=\"{hard_coded_references.get(uid)}" in processed_words[-1]))):
1463+
return uid
1464+
1465+
return None
1466+
1467+
1468+
def convert_cross_references(content: str, current_object_name: str, known_uids: List[str]) -> str:
1469+
"""Finds and replaces references that should be a cross reference in given content.
1470+
1471+
This should not convert any references that contain `current_object_name`,
1472+
i.e. if we're processing docstring for `google.cloud.spanner.v1.services`,
1473+
references to `google.cloud.spanner.v1.services` should not be convereted
1474+
to references.
1475+
1476+
Args:
1477+
content: body of content to parse and look for references in
1478+
current_object_name: the name of the current Python object being processed
1479+
known_uids: list of uid references to look for
1480+
1481+
Returns:
1482+
content that has been modified with proper cross references if found.
1483+
"""
14241484
words = content.split(" ")
1425-
new_words = []
1426-
# Using counter to check if the entry is already a cross reference.
1485+
1486+
# Contains a list of words that is not a valid reference or converted
1487+
# references.
1488+
processed_words = []
1489+
1490+
# TODO(https://github.com/googleapis/sphinx-docfx-yaml/issues/208):
1491+
# remove this in the future.
1492+
iam_policy_link = "http://github.com/googleapis/python-grpc-google-iam-v1/blob/8e73b45993f030f521c0169b380d0fbafe66630b/google/iam/v1/iam_policy_pb2_grpc.py"
1493+
hard_coded_references = {
1494+
"google.iam.v1.iam_policy_pb2.SetIamPolicyRequest": iam_policy_link + "#L103-L109",
1495+
"google.iam.v1.iam_policy_pb2.GetIamPolicyRequest": iam_policy_link + "#L111-L118",
1496+
"google.iam.v1.iam_policy_pb2.TestIamPermissionsRequest": iam_policy_link + "#L120-L131",
1497+
"google.iam.v1.iam_policy_pb2.TestIamPermissionsResponse": iam_policy_link + "#L120-L131"
1498+
}
1499+
known_uids.extend(hard_coded_references.keys())
1500+
14271501
for index, word in enumerate(words):
1428-
cross_reference = ""
1429-
for keyword in entry_names:
1430-
if keyword != current_name and keyword not in current_name and keyword in word:
1431-
# If it is already processed as cross reference, skip over it.
1432-
if "<xref" in words[index-1] or (new_words and f"<xref uid=\"{keyword}" in new_words[-1]):
1433-
continue
1434-
cross_reference = f"<xref uid=\"{keyword}\">{keyword}</xref>"
1435-
new_words.append(word.replace(keyword, cross_reference))
1436-
print(f"Converted {keyword} into cross reference in: \n{content}")
1502+
uid = find_uid_to_convert(
1503+
word, words, index, known_uids, current_object_name, processed_words, hard_coded_references
1504+
)
1505+
1506+
if uid:
1507+
cross_reference = f"<a href=\"{hard_coded_references[uid]}\">{uid}</a>" \
1508+
if uid in hard_coded_references else \
1509+
f"<xref uid=\"{uid}\">{uid}</xref>"
14371510

1438-
# If cross reference has not been found, add current unchanged content.
1439-
if not cross_reference:
1440-
new_words.append(word)
1511+
processed_words.append(word.replace(uid, cross_reference))
1512+
print(f"Converted {uid} into cross reference in: \n{content}")
1513+
1514+
else:
1515+
# If cross reference has not been found, add current unchanged content.
1516+
processed_words.append(word)
14411517

1442-
return " ".join(new_words)
1518+
return " ".join(processed_words)
14431519

14441520

14451521
# Used to look for cross references in the obj's data where applicable.
14461522
# For now, we inspect summary, syntax and attributes.
1447-
def search_cross_references(obj, current_name: str, entry_names: List[str]):
1523+
def search_cross_references(obj, current_object_name: str, known_uids: List[str]):
14481524
if obj.get("summary"):
1449-
obj["summary"] = convert_cross_references(obj["summary"], current_name, entry_names)
1525+
obj["summary"] = convert_cross_references(obj["summary"], current_object_name, known_uids)
14501526

14511527
if obj.get("syntax"):
14521528
if obj["syntax"].get("parameters"):
14531529
for param in obj["syntax"]["parameters"]:
14541530
if param.get("description"):
14551531
param["description"] = convert_cross_references(
14561532
param["description"],
1457-
current_name,
1458-
entry_names
1533+
current_object_name,
1534+
known_uids
14591535
)
14601536

14611537
if param.get("id"):
14621538
param["id"] = convert_cross_references(
14631539
param["id"],
1464-
current_name,
1465-
entry_names
1540+
current_object_name,
1541+
known_uids
14661542
)
14671543

14681544
if param.get("var_type"):
14691545
param["var_type"] = convert_cross_references(
14701546
param["var_type"],
1471-
current_name,
1472-
entry_names
1547+
current_object_name,
1548+
known_uids
14731549
)
14741550

14751551
if obj["syntax"].get("exceptions"):
14761552
for exception in obj["syntax"]["exceptions"]:
14771553
if exception.get("description"):
14781554
exception["description"] = convert_cross_references(
14791555
exception["description"],
1480-
current_name,
1481-
entry_names
1556+
current_object_name,
1557+
known_uids
14821558
)
14831559

14841560
if exception.get("var_type"):
14851561
exception["var_type"] = convert_cross_references(
14861562
exception["var_type"],
1487-
current_name,
1488-
entry_names
1563+
current_object_name,
1564+
known_uids
14891565
)
14901566

14911567
if obj["syntax"].get("returns"):
14921568
for ret in obj["syntax"]["returns"]:
14931569
if ret.get("description"):
14941570
ret["description"] = convert_cross_references(
14951571
ret["description"],
1496-
current_name,
1497-
entry_names
1572+
current_object_name,
1573+
known_uids
14981574
)
14991575

15001576
if ret.get("var_type"):
15011577
ret["var_type"] = convert_cross_references(
15021578
ret["var_type"],
1503-
current_name,
1504-
entry_names
1579+
current_object_name,
1580+
known_uids
15051581
)
15061582

15071583

@@ -1510,22 +1586,22 @@ def search_cross_references(obj, current_name: str, entry_names: List[str]):
15101586
if attribute.get("description"):
15111587
attribute["description"] = convert_cross_references(
15121588
attribute["description"],
1513-
current_name,
1514-
entry_names
1589+
current_object_name,
1590+
known_uids
15151591
)
15161592

15171593
if attribute.get("id"):
15181594
attribute["id"] = convert_cross_references(
15191595
attribute["id"],
1520-
current_name,
1521-
entry_names
1596+
current_object_name,
1597+
known_uids
15221598
)
15231599

15241600
if attribute.get("var_type"):
15251601
attribute["var_type"] = convert_cross_references(
15261602
attribute["var_type"],
1527-
current_name,
1528-
entry_names
1603+
current_object_name,
1604+
known_uids
15291605
)
15301606

15311607

@@ -1728,11 +1804,11 @@ def convert_module_to_package_if_needed(obj):
17281804
# (not from the same library)
17291805
# google.cloud.aiplatform.AutoMLForecastingTrainingJob
17301806

1731-
current_name = obj["fullName"]
1732-
entry_names = sorted(app.env.docfx_uid_names.keys(), reverse=True)
1807+
current_object_name = obj["fullName"]
1808+
known_uids = sorted(app.env.docfx_uid_names.keys(), reverse=True)
17331809
# Currently we only need to look in summary, syntax and
17341810
# attributes for cross references.
1735-
search_cross_references(obj, current_name, entry_names)
1811+
search_cross_references(obj, current_object_name, known_uids)
17361812

17371813
yaml_map[uid] = [yaml_data, references]
17381814

tests/test_helpers.py

Lines changed: 70 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from docfx_yaml.extension import extract_keyword
22
from docfx_yaml.extension import indent_code_left
3+
from docfx_yaml.extension import find_uid_to_convert
34
from docfx_yaml.extension import convert_cross_references
45
from docfx_yaml.extension import search_cross_references
56
from docfx_yaml.extension import format_code
@@ -115,16 +116,29 @@ def test_extract_keyword(self):
115116
"Response message for google.cloud.bigquery_storage_v1.types.SplitReadStreamResponse.",
116117
"Response message for <xref uid=\"google.cloud.bigquery_storage_v1.types.SplitReadStreamResponse\">google.cloud.bigquery_storage_v1.types.SplitReadStreamResponse</xref>."
117118
],
119+
# Testing for cross reference to not be converted for its own object.
120+
[
121+
"Response message for google.cloud.bigquery_storage_v1.types.SplitResponse.",
122+
"Response message for google.cloud.bigquery_storage_v1.types.SplitResponse."
123+
],
124+
# TODO(https://github.com/googleapis/sphinx-docfx-yaml/issues/208):
125+
# remove this when it is not needed anymore.
126+
# Testing for hardcoded reference.
127+
[
128+
"google.iam.v1.iam_policy_pb2.GetIamPolicyRequest",
129+
"<a href=\"http://github.com/googleapis/python-grpc-google-iam-v1/blob/8e73b45993f030f521c0169b380d0fbafe66630b/google/iam/v1/iam_policy_pb2_grpc.py#L111-L118\">google.iam.v1.iam_policy_pb2.GetIamPolicyRequest</a>"
130+
]
118131
]
119132
@parameterized.expand(cross_references_testdata)
120133
def test_convert_cross_references(self, content, content_want):
121134
# Check that entries correctly turns into cross references.
122135
keyword_map = [
123-
"google.cloud.bigquery_storage_v1.types.SplitReadStreamResponse"
136+
"google.cloud.bigquery_storage_v1.types.SplitReadStreamResponse",
137+
"google.cloud.bigquery_storage_v1.types.SplitResponse"
124138
]
125-
current_name = "SplitRepsonse"
139+
current_object_name = "google.cloud.bigquery_storage_v1.types.SplitResponse"
126140

127-
content_got = convert_cross_references(content, current_name, keyword_map)
141+
content_got = convert_cross_references(content, current_object_name, keyword_map)
128142
self.assertEqual(content_got, content_want)
129143

130144

@@ -296,5 +310,58 @@ def test_prepend_markdown_header(self, base_filename, want_filename):
296310
self.assertEqual(test_file.read(), mdfile_want.read())
297311

298312

313+
test_reference_params = [
314+
[
315+
# If no reference keyword is found, check for None
316+
"google.cloud.resourcemanager_v3.ProjectsClient",
317+
["google.cloud.resourcemanager_v1.ProjectsClient"],
318+
["The", "following", "constraints", "apply", "when", "using"],
319+
None
320+
],
321+
[
322+
# If keyword reference is found, validate proper cross reference
323+
"google.cloud.resourcemanager_v3.set_iam_policy",
324+
["google.cloud.resourcemanager_v3.set_iam_policy"],
325+
["A", "Policy", "is", "a", "collection", "of", "bindings", "from"],
326+
"google.cloud.resourcemanager_v3.set_iam_policy"
327+
],
328+
[
329+
# If keyword reference has already been converted, do not convert
330+
# again.
331+
"uid=\"google.cloud.resourcemanager_v3.set_iam_policy\">documentation</xref>",
332+
["google.cloud.resourcemanager_v3.set_iam_policy"],
333+
["Take", "a", "look", "at", "<xref"],
334+
None
335+
],
336+
[
337+
# If no reference keyword is found, check for None
338+
"google.cloud.resourcemanager_v3.ProjectsClient",
339+
["google.cloud.resourcemanager_v3.ProjectsClient"],
340+
["The", "following", "constraints", "apply", "when", "using"],
341+
None
342+
],
343+
]
344+
@parameterized.expand(test_reference_params)
345+
def test_find_uid_to_convert(self, current_word, uids, visited_words, cross_reference_want):
346+
current_object_name = "google.cloud.resourcemanager_v3.ProjectsClient"
347+
content ="""Sets the IAM access control policy for the specified project.
348+
349+
The following constraints apply when using google.cloud.resourcemanager_v3.ProjectsClient
350+
351+
A Policy is a collection of bindings from google.cloud.resourcemanager_v3.set_iam_policy
352+
353+
Take a look at <xref uid="google.cloud.resourcemanager_v3.set_iam_policy">documentation</xref> for more information.
354+
"""
355+
# Break up the paragraph into sanitized list of words as shown in Sphinx.
356+
words = " ".join(content.split("\n")).split(" ")
357+
358+
index = words.index(current_word)
359+
360+
cross_reference_got = find_uid_to_convert(
361+
current_word, words, index, uids, current_object_name, visited_words
362+
)
363+
self.assertEqual(cross_reference_got, cross_reference_want)
364+
365+
299366
if __name__ == '__main__':
300367
unittest.main()

0 commit comments

Comments
 (0)