Skip to content

Commit 7b5d4ca

Browse files
authored
KAFKA-20362 Fall back to GitHub handle when reviewer email is unavailable (#22108)
Fix #21928 (comment). When a reviewer has "Keep my email addresses private" enabled, their real email is unreachable through either the commits API (GitHub rewrites the author email to the `@users.noreply.github.com` form on squash-merge) or the GitHub user profile (which typically returns null). The previous script wrote that noreply email, or a `{login}@email-not-found` placeholder when both tiers missed, into the `Reviewers:` trailer -- neither identifies the reviewer. This PR adds two fallbacks, in order: 1. **Tier 3 -- past Reviewers: trailers in `git log`.** Once a reviewer has been credited with a real email in any earlier merged PR, match by name and reuse that email. This makes real-email attribution "sticky" even after a reviewer later enables email privacy on GitHub. 2. **`Name (@login)` fallback.** When no usable email can be resolved from any tier, fall back to the GitHub handle, which is stable, public, and unambiguously identifies the reviewer. Reviewers whose real email is recoverable from any tier continue to use the existing `Name <email>` form. Kafka committers typically count reviewer contributions by grepping `git log` for the reviewer's name, e.g., `git log | grep -i Reviewer | grep -i "Ming-Yen Chung" | wc -l`. Since the name portion is preserved in both `<email>` and `(@login)` forms, this change does not affect those counts. **For first-time reviewers who want to be credited by email instead of `(@login)` (subsequent PRs are handled automatically by Tier 3):** - **Tier 2 (works immediately, recommended):** set **Settings → Profile → Public email**. Verify with `gh api "users/<login>"` -- the `email` field should be non-null. - **Tier 1 (only after your first merged commit in apache/kafka):** disable **Settings → Emails → Keep my email addresses private** before committing. Verify with `gh api "repos/apache/kafka/commits?author=<login>&per_page=1"` -- returns `[]` if you have no commits in the repo yet, and the author email is the noreply form if privacy was on at commit time. Reviewers: Chia-Ping Tsai <chia7712@gmail.com>
1 parent a3f1732 commit 7b5d4ca

1 file changed

Lines changed: 56 additions & 17 deletions

File tree

.github/scripts/pr-format.py

Lines changed: 56 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -107,14 +107,24 @@ def split_paragraphs(text: str):
107107
def resolve_reviewer(login: str) -> tuple:
108108
"""Map a GitHub login to (name, email).
109109
110-
Tries the repo commit history first, then falls back to the GitHub user profile.
111-
If the display name is unavailable, the login is used as the name.
112-
If the email is unavailable, '{login}@email-not-found' is used as a placeholder.
110+
Tries three tiers in order: repo commit history, GitHub user profile,
111+
and past `Reviewers:` trailers in git log (matched by name).
112+
Noreply emails (@users.noreply.github.com) are treated as missing since
113+
they are GitHub privacy placeholders that do not identify the reviewer.
114+
Returns (name, None) when no usable email is found; the caller falls
115+
back to the '(@login)' form in the Reviewers trailer.
113116
"""
117+
def _usable_email(e):
118+
if not e or e.endswith("@users.noreply.github.com"):
119+
return None
120+
return e
121+
114122
name = None
115123
email = None
116124

117-
# Tier 1: find from repo commit history
125+
# Tier 1: find from repo commit history. Misses when the reviewer has no
126+
# merged commit in apache/kafka, or had "Keep my email private" enabled
127+
# at commit time (GitHub rewrites the author to the noreply form).
118128
try:
119129
cmd = f"gh api repos/apache/kafka/commits?author={login}&per_page=1"
120130
p = subprocess.run(shlex.split(cmd), capture_output=True, text=True)
@@ -123,11 +133,12 @@ def resolve_reviewer(login: str) -> tuple:
123133
if commits:
124134
author = commits[0].get("commit", {}).get("author", {})
125135
name = author.get("name")
126-
email = author.get("email")
136+
email = _usable_email(author.get("email"))
127137
except Exception as e:
128138
logger.debug(f"Failed to resolve {login} from commit history: {e}")
129139

130-
# Tier 2: GitHub user profile
140+
# Tier 2: GitHub user profile. Only exposes an email when the reviewer
141+
# has set a Public email in their profile settings.
131142
if not name or not email:
132143
try:
133144
cmd = f"gh api users/{login}"
@@ -137,23 +148,47 @@ def resolve_reviewer(login: str) -> tuple:
137148
if not name:
138149
name = user.get("name")
139150
if not email:
140-
email = user.get("email")
151+
email = _usable_email(user.get("email"))
141152
except Exception as e:
142153
logger.debug(f"Failed to resolve {login} from GitHub profile: {e}")
143154

155+
# Tier 3: past Reviewers: trailers in git log, matched by name. Catches
156+
# pure reviewers (no commits in apache/kafka, no public profile email)
157+
# who have been credited with a real email in an earlier merged PR.
158+
# git log is newest-first, so the first usable match is the most recent.
159+
if name and not email:
160+
try:
161+
p = subprocess.run(
162+
["git", "log",
163+
"--pretty=format:%(trailers:key=Reviewers,valueonly=true,unfold=true)"],
164+
capture_output=True, text=True,
165+
)
166+
if p.returncode == 0:
167+
pattern = re.compile(rf"{re.escape(name)}\s*<([^>]+)>")
168+
for line in p.stdout.splitlines():
169+
for m in pattern.finditer(line):
170+
candidate = _usable_email(m.group(1))
171+
if candidate:
172+
email = candidate
173+
break
174+
if email:
175+
break
176+
except Exception as e:
177+
logger.debug(f"Failed to resolve {login} from past Reviewers trailers: {e}")
178+
144179
if not name:
145180
name = login
146181

147-
if not email:
148-
email = f"{login}@email-not-found"
149-
150182
return (name, email)
151183

152184

153-
def already_exists(email: str, existing_reviewers: List[str]) -> bool:
154-
"""Check if a reviewer with the given email is already in the existing reviewers list."""
155-
existing_emails = re.findall(r'<(.+?)>', ", ".join(existing_reviewers))
156-
return email.lower() in [e.lower() for e in existing_emails]
185+
def already_exists(identity: str, existing_reviewers: List[str]) -> bool:
186+
"""Check if a reviewer identity is already in the existing reviewers list.
187+
188+
identity is the delimited token that uniquely identifies a reviewer, either
189+
'<email>' (for the email form) or '(@login)' (for the login fallback).
190+
"""
191+
return identity.lower() in ", ".join(existing_reviewers).lower()
157192

158193

159194
def update_reviewers_trailer(body: str, trailer: str) -> str:
@@ -207,11 +242,15 @@ def update_reviewers_trailer(body: str, trailer: str) -> str:
207242
reviewer_login = get_env("REVIEWER_LOGIN")
208243
pr_author = (gh_json.get("author") or {}).get("login")
209244
if reviewer_login and reviewer_login != pr_author:
210-
existing_reviewers = parse_trailers(title, body).get("Reviewers", [])
211245
name, email = resolve_reviewer(reviewer_login)
212-
if not already_exists(email, existing_reviewers):
246+
if email:
247+
identity = f"<{email}>"
248+
else:
249+
identity = f"(@{reviewer_login})"
250+
resolved = f"{name} {identity}"
251+
existing_reviewers = parse_trailers(title, body).get("Reviewers", [])
252+
if not already_exists(identity, existing_reviewers):
213253
existing_value = ", ".join(existing_reviewers)
214-
resolved = f"{name} <{email}>"
215254
new_value = f"{existing_value}, {resolved}" if existing_value else resolved
216255
body = update_reviewers_trailer(body, f"Reviewers: {new_value}")
217256

0 commit comments

Comments
 (0)