Skip to content

Commit 3650261

Browse files
authored
chore: Add heuristics for issue tracker (#12703)
Fixes #11147 🦕 Fixes #12679
1 parent 995bdaf commit 3650261

2 files changed

Lines changed: 156 additions & 29 deletions

File tree

.github/workflows/main.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ on:
2020
# * is a special character in YAML so you have to quote this string
2121
# Run this Github Action every Tuesday at 7 AM UTC
2222
- cron: '0 7 * * 2'
23+
workflow_dispatch:
2324

2425
permissions: read-all
2526

scripts/updateapilist.py

Lines changed: 155 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,14 @@
1616

1717
import os
1818
import requests
19+
import logging
1920
from typing import List, Optional
2021
from dataclasses import dataclass
2122

23+
# Configure logging to output messages to console
24+
logging.basicConfig(level=logging.INFO) # Set the desired logging level
25+
26+
import re
2227

2328
class MissingGithubToken(ValueError):
2429
"""Raised when the GITHUB_TOKEN environment variable is not set"""
@@ -57,9 +62,20 @@ class MissingGithubToken(ValueError):
5762
# BASE_API defines the base API for Github.
5863
BASE_API = "https://api.github.com"
5964

65+
# GITHUB_ISSUES defines the issues URL for a repository on GitHub.
66+
GITHUB_ISSUES = "https://github.com/{repo}/issues"
6067

68+
# BASE_ISSUE_TRACKER defines the base URL for issue tracker.
69+
BASE_ISSUE_TRACKER = "https://issuetracker.google.com"
6170

71+
# This issue-tracker component is part of some saved searches for listing API-side issues.
72+
# However, when we construct URLs for filing new issues (which in some cases we do by analyzing
73+
# the query string for a saved search), we want to ensure we DON'T file a new issue against
74+
# this generic component but against a more specific one.
75+
GENERIC_ISSUE_TRACKER_COMPONENT = "187065"
6276

77+
# This sentinel value is used to mark cache fields that have not been computed yet.
78+
NOT_COMPUTED = -1
6379

6480
class CloudClient:
6581
repo: str = None
@@ -68,13 +84,97 @@ class CloudClient:
6884
distribution_name: str = None
6985
issue_tracker: str = None
7086

87+
7188
def __init__(self, repo: dict):
7289
self.repo = repo["repo"]
7390
# For now, strip out "Google Cloud" to standardize the titles
7491
self.title = repo["name_pretty"].replace("Google ", "").replace("Cloud ", "")
7592
self.release_level = repo["release_level"]
7693
self.distribution_name = repo["distribution_name"]
7794
self.issue_tracker = repo.get("issue_tracker")
95+
self._cached_component_id = NOT_COMPUTED
96+
self._cached_template_id = NOT_COMPUTED
97+
self._cached_saved_search_id = NOT_COMPUTED
98+
99+
@property
100+
def saved_search_id(self):
101+
if self._cached_saved_search_id != NOT_COMPUTED:
102+
return self._cached_saved_search_id
103+
if not self.issue_tracker:
104+
self._cached_saved_search_id = None
105+
else:
106+
match = re.search(r'savedsearches/(\d+)', self.issue_tracker)
107+
self._cached_saved_search_id = match.group(1) if match else None
108+
return self._cached_saved_search_id
109+
110+
@property
111+
def saved_search_response_text(self):
112+
if not self.saved_search_id:
113+
return None
114+
url = f"{BASE_ISSUE_TRACKER}/action/saved_searches/{self.saved_search_id}"
115+
response = _fetch_response(url)
116+
return response.text if response else None
117+
118+
@property
119+
def issue_tracker_component_id(self):
120+
if self._cached_component_id != NOT_COMPUTED:
121+
return self._cached_component_id
122+
123+
# First, check if the issue tracker is a saved search:
124+
query_string = self.saved_search_response_text or self.issue_tracker
125+
if not query_string:
126+
self._cached_component_id = None
127+
else:
128+
# Try to match 'component=' in the query string
129+
query_match = re.search(r'\bcomponent=(\d+)', query_string)
130+
if query_match:
131+
self._cached_component_id = query_match.group(1)
132+
else:
133+
# If not found, try to match 'componentid:' in the query string
134+
query_match = re.findall(r'\bcomponentid:(\d+)', query_string)
135+
for component_id in query_match:
136+
if component_id == GENERIC_ISSUE_TRACKER_COMPONENT:
137+
continue
138+
if self._cached_component_id != NOT_COMPUTED:
139+
self._cached_component_id = None
140+
logging.error(f"More than one component ID found for issue tracker: {self.issue_tracker}")
141+
break
142+
self._cached_component_id = component_id
143+
self._cached_component_id = self._cached_component_id if self._cached_component_id != NOT_COMPUTED else None
144+
return self._cached_component_id
145+
146+
@property
147+
def issue_tracker_template_id(self):
148+
if self._cached_template_id != NOT_COMPUTED:
149+
return self._cached_template_id
150+
if not self.issue_tracker:
151+
self._cached_template_id = None
152+
else:
153+
match = re.search(r'(?:\?|&)template=(\d+)', self.issue_tracker)
154+
self._cached_template_id = match.group(1) if match else None
155+
return self._cached_template_id
156+
157+
@property
158+
def show_client_issues(self):
159+
return GITHUB_ISSUES.format(repo=self.repo)
160+
161+
@property
162+
def file_api_issue(self):
163+
if self.issue_tracker_component_id:
164+
link = f"{BASE_ISSUE_TRACKER}/issues/new?component={self.issue_tracker_component_id}"
165+
if self.issue_tracker_template_id:
166+
link += f"&template={self.issue_tracker_template_id}"
167+
return link
168+
return None
169+
170+
@property
171+
def show_api_issues(self):
172+
if self.saved_search_id:
173+
# Return the original issue_tracker content, which already links to the saved search.
174+
return self.issue_tracker
175+
elif self.issue_tracker_component_id:
176+
return f"{BASE_ISSUE_TRACKER}/issues?q=componentid:{self.issue_tracker_component_id}"
177+
return None
78178

79179
# For sorting, we want to sort by release level, then API pretty_name
80180
def __lt__(self, other):
@@ -95,15 +195,35 @@ class Extractor:
95195
def client_for_repo(self, repo_slug) -> Optional[CloudClient]:
96196
path = self.path_format.format(repo_slug=repo_slug)
97197
url = f"{RAW_CONTENT_BASE_URL}/{path}/{REPO_METADATA_FILENAME}"
98-
response = requests.get(url)
99-
if response.status_code != requests.codes.ok:
100-
return
101-
102-
return CloudClient(response.json())
198+
_, metadata = _fetch_and_parse_response(url)
199+
if not metadata:
200+
return None
201+
return CloudClient(metadata)
103202

104203
def get_clients_from_batch_response(self, response_json) -> List[CloudClient]:
105204
return [self.client_for_repo(repo[self.response_key]) for repo in response_json if allowed_repo(repo)]
106205

206+
def _fetch_response(url: str, headers:dict = None, params:Optional[dict] = None) -> Optional[requests.Response]:
207+
try:
208+
response = requests.get(url, headers=headers, params=params)
209+
response.raise_for_status()
210+
return response
211+
except requests.RequestException as e:
212+
logging.error(f"Request failed for URL {url}: {e}")
213+
return None
214+
215+
def _parse_response(response: requests.Response) -> Optional[dict]:
216+
try:
217+
return response.json()
218+
except ValueError as e:
219+
logging.error(f"JSON decoding failed for URL {response.url}: {e}")
220+
return None
221+
222+
def _fetch_and_parse_response(url: str, headers:dict = None, params:Optional[dict] = None):
223+
response = _fetch_response(url, headers, params)
224+
if not response:
225+
return None, None
226+
return response, _parse_response(response)
107227

108228
def replace_content_in_readme(content_rows: List[str]) -> None:
109229
START_MARKER = ".. API_TABLE_START"
@@ -135,16 +255,17 @@ def client_row(client: CloudClient) -> str:
135255
url = f"https://github.com/{client.repo}"
136256
if client.repo == MONO_REPO:
137257
url += f"/tree/main/packages/{client.distribution_name}"
138-
258+
_show_api_issues = client.show_api_issues
259+
_file_api_issue = client.file_api_issue
139260
content_row = [
140261
f" * - `{client.title} <{url}>`_\n",
141-
f" - " + client.release_level + "\n",
142-
f" - |PyPI-{client.distribution_name}|\n",
262+
f" - {client.release_level}\n",
263+
f" - |PyPI-{client.distribution_name}|\n",
264+
f" - `API Issues <{_show_api_issues}>`_\n" if _show_api_issues else " -\n",
265+
f" - `File an API Issue <{_file_api_issue}>`_\n" if _file_api_issue else " -\n",
266+
f" - `Client Library Issues <{client.show_client_issues}>`_\n"
143267
]
144268

145-
if client.issue_tracker:
146-
content_row.append(f" - `API Issues <{client.issue_tracker}>`_\n")
147-
148269
return (content_row, pypi_badge)
149270

150271

@@ -157,7 +278,9 @@ def generate_table_contents(clients: List[CloudClient]) -> List[str]:
157278
" * - Client\n",
158279
" - Release Level\n",
159280
" - Version\n",
160-
" - API Issue Tracker\n",
281+
" - API Issues\n",
282+
" - File an API Issue\n",
283+
" - Client Library Issues\n",
161284
]
162285

163286
pypi_links = ["\n"]
@@ -181,30 +304,33 @@ def mono_repo_clients(token: str) -> List[CloudClient]:
181304
# all mono repo clients
182305
url = f"{BASE_API}/repos/{MONO_REPO}/contents/packages"
183306
headers = {'Authorization': f'token {token}'}
184-
response = requests.get(url=url, headers=headers)
307+
_, packages = _fetch_and_parse_response(url, headers)
308+
if not packages:
309+
return []
185310
mono_repo_extractor = Extractor(path_format=MONO_REPO_PATH_FORMAT, response_key=PACKAGE_RESPONSE_KEY)
186-
187-
return mono_repo_extractor.get_clients_from_batch_response(response.json())
311+
return mono_repo_extractor.get_clients_from_batch_response(packages)
188312

189313

190314
def split_repo_clients(token: str) -> List[CloudClient]:
191-
192-
first_request = True
193-
while first_request or 'next' in response.links:
194-
if first_request:
195-
url = f"{BASE_API}/search/repositories?page=1"
196-
first_request = False
197-
else:
198-
url = response.links['next']['url']
199-
headers = {'Authorization': f'token {token}'}
200-
params = {'per_page': 100, "q": "python- in:name org:googleapis"}
201-
response = requests.get(url=url, params=params, headers=headers)
202-
repositories = response.json().get("items", [])
315+
clients = []
316+
url = f"{BASE_API}/search/repositories?page=1"
317+
headers = {'Authorization': f'token {token}'}
318+
params = {'per_page': 100, "q": "python- in:name org:googleapis"}
319+
320+
while url:
321+
response, metadata = _fetch_and_parse_response(url, headers, params)
322+
if not metadata:
323+
break
324+
repositories = metadata.get("items", [])
203325
if len(repositories) == 0:
204326
break
205-
206327
split_repo_extractor = Extractor(path_format=SPLIT_REPO_PATH_FORMAT, response_key=REPO_RESPONSE_KEY)
207-
return split_repo_extractor.get_clients_from_batch_response(repositories)
328+
clients.extend(split_repo_extractor.get_clients_from_batch_response(repositories))
329+
330+
# Check for the 'next' link in the response headers for pagination
331+
url = response.links.get('next', {}).get('url')
332+
333+
return clients
208334

209335

210336
def get_token():

0 commit comments

Comments
 (0)