From 92e5e4cc1a5c9e4c763c42cdebfcbfafb1be6e18 Mon Sep 17 00:00:00 2001 From: Sinatras Date: Fri, 1 May 2026 21:02:11 +0300 Subject: [PATCH] fix(detector): flag setattr/assign on __builtins__ as harness mutation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Targets red #126 (Evaluator state mutation): writes to Python's __builtins__ namespace let the kernel inject state visible to the entire benchmark harness. Three shapes flagged with HARNESS_RUNTIME_PATCHING (existing rule in the evaluator_state_mutation family, AUTO_FILTER): * setattr(__builtins__, name, value) * __builtins__.name = value (Attribute target) * __builtins__[name] = value (Subscript target) There is no legitimate reason for a compute kernel to write to __builtins__ — the 20 production nvfp4 false-positive cases do not touch it. --- kernelguard.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/kernelguard.py b/kernelguard.py index f086847..ed32419 100644 --- a/kernelguard.py +++ b/kernelguard.py @@ -2786,6 +2786,45 @@ def detect_dynamic_execution(code: str | SubmissionFacts) -> list[dict]: "evidence": "importlib.reload() (module state reset/manipulation)", }) + # Evaluator state mutation: any write to ``__builtins__`` is unambiguous + # adversarial activity in a compute kernel. Three shapes: + # * ``setattr(__builtins__, name, value)`` + # * ``__builtins__.name = value`` + # * ``__builtins__[name] = value`` + if "BUILTINS_MUTATION" not in seen: + for n in ast.walk(tree): + triggered = False + evidence = "" + if ( + isinstance(n, ast.Call) + and isinstance(n.func, ast.Name) + and n.func.id == "setattr" + and len(n.args) >= 1 + and isinstance(n.args[0], ast.Name) + and n.args[0].id == "__builtins__" + ): + triggered = True + evidence = ( + "setattr(__builtins__, ...) — writes to the Python builtins namespace" + ) + elif isinstance(n, ast.Assign): + for t in n.targets: + root = _ast_root_name(t) + if root == "__builtins__" and isinstance(t, (ast.Attribute, ast.Subscript)): + triggered = True + evidence = ( + "assignment to __builtins__ — writes to the Python builtins namespace" + ) + break + if triggered: + seen.add("BUILTINS_MUTATION") + matches.append({ + "pattern": "HARNESS_RUNTIME_PATCHING", + "severity": "critical", + "evidence": evidence, + }) + break + return matches