From 460e31ae0971a3c038ed2f4613e8fa2e7e17ace3 Mon Sep 17 00:00:00 2001 From: Sinatras Date: Fri, 1 May 2026 14:16:11 +0300 Subject: [PATCH] fix(detector): catch LAST_CALL_REPLAY when entrypoint is aliased to a closure-returned inner function Targets the case where the publicly-exposed custom_kernel is bound at module level to a function returned from a factory: def _make_kernel(): last_in = None last_out = None def k(data): nonlocal last_in, last_out if last_in is data: return last_out last_in = data last_out = data.clone() return last_out return k custom_kernel = _make_kernel() The replay logic lives in 'k', not in any function literally named custom_kernel, so the existing entrypoint matcher (is_entrypoint_name(node.name)) skipped it entirely. Same exploit semantics as the textbook LAST_CALL_REPLAY, only the storage moves from module globals to closure cells. Approach: walk the module top-level once before the existing entrypoint loop, collect any function names that are aliased to a known entrypoint name via either custom_kernel = some_function # direct alias custom_kernel = factory() # factory call For factory calls, the factory is scanned for top-level Return statements and any returned Name is added to the alias set. The main entrypoint loop then accepts both is_entrypoint_name(node.name) and node.name in entrypoint_aliases, so the existing identity-replay passes fire on the inner function as if it were custom_kernel. _looks_stateful_name already matches 'last_in'/'last_out' via the 'last' substring, so no other detector logic needed changing. Verified: closure_replay flagged, decorator_replay still misses (separate pattern, separate fix), and all benign factory/decorator/plain shapes stay valid. Existing classic LAST_CALL_REPLAY regression tests pass. --- kernelguard.py | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/kernelguard.py b/kernelguard.py index f086847..ae36091 100644 --- a/kernelguard.py +++ b/kernelguard.py @@ -2197,10 +2197,51 @@ def _has_ver(expr: ast.AST | None) -> bool: for n in ast.walk(expr) ) + # Collect inner-function names that are aliased to a top-level entrypoint + # via a factory return. Pattern: + # def _make_kernel(): + # ... + # def k(data): + # ... + # return k + # custom_kernel = _make_kernel() + # The replay logic lives in `k`, not `custom_kernel`. The straight + # ``is_entrypoint_name(node.name)`` check would skip it because + # ``k`` is not in ENTRYPOINT_NAMES, so the module-level alias is + # invisible to the analyzer. Trace those aliases here. + entrypoint_aliases: set[str] = set() + if isinstance(tree, ast.Module): + factory_returns: dict[str, set[str]] = {} + for stmt in tree.body: + if not isinstance(stmt, (ast.FunctionDef, ast.AsyncFunctionDef)): + continue + returned: set[str] = set() + for sub in ast.walk(stmt): + if isinstance(sub, ast.Return) and isinstance(sub.value, ast.Name): + returned.add(sub.value.id) + if returned: + factory_returns[stmt.name] = returned + + for stmt in tree.body: + if not isinstance(stmt, ast.Assign): + continue + for target in stmt.targets: + if not (isinstance(target, ast.Name) and is_entrypoint_name(target.id)): + continue + value = stmt.value + if isinstance(value, ast.Name): + entrypoint_aliases.add(value.id) + elif ( + isinstance(value, ast.Call) + and isinstance(value.func, ast.Name) + and value.func.id in factory_returns + ): + entrypoint_aliases.update(factory_returns[value.func.id]) + for node in ast.walk(tree): if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): continue - if not is_entrypoint_name(node.name): + if not (is_entrypoint_name(node.name) or node.name in entrypoint_aliases): continue signature_features: dict[str, set[str]] = defaultdict(set)