Skip to content

Commit 2bced6c

Browse files
committed
Remove non-deterministic files from OUT_DIR to fix TreeArtifact cache misses
Recursively remove files from OUT_DIR whose names appear in a known list (config.log, config.log.old, config.status, Makefile, Makefile.config, config.cache, commit_hash) or have a .d extension before Bazel captures OUT_DIR as a TreeArtifact. These files embed sandbox-specific paths, timestamps, or volatile values that make the TreeArtifact hash non-deterministic, causing cache misses for all downstream rustc compilations on every action run.
1 parent 85007f4 commit 2bced6c

4 files changed

Lines changed: 180 additions & 1 deletion

File tree

cargo/private/cargo_build_script_runner/bin.rs

Lines changed: 134 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
1818
use std::collections::BTreeMap;
1919
use std::env;
20-
use std::fs::{create_dir_all, read_to_string, write};
20+
use std::fs::{create_dir_all, read_dir, read_to_string, remove_file, write};
2121
use std::path::{Path, PathBuf};
2222
use std::process::Command;
2323

@@ -238,6 +238,13 @@ fn run_buildrs() -> Result<(), String> {
238238
.unwrap();
239239
}
240240

241+
// Remove non-deterministic configure-generated files from OUT_DIR before
242+
// Bazel captures it as a TreeArtifact. Files like config.log and
243+
// Makefile.config embed the Bazel sandbox path (which changes on every
244+
// action run), making the TreeArtifact hash non-deterministic and causing
245+
// cache misses for all downstream rustc compilations.
246+
remove_nondeterministic_out_dir_files(&out_dir_abs);
247+
241248
// If out_dir is empty add an empty file to the directory to avoid an upstream Bazel bug
242249
// https://github.com/bazelbuild/bazel/issues/28286
243250
if out_dir_abs.read_dir().map(|read| read.count()).unwrap_or(0) == 0 {
@@ -260,6 +267,62 @@ fn run_buildrs() -> Result<(), String> {
260267
Ok(())
261268
}
262269

270+
/// OUT_DIR file names that embed the Bazel sandbox path or other volatile
271+
/// values and must be removed to make the _bs.out_dir TreeArtifact deterministic.
272+
///
273+
/// - config.log / config.log.old: autoconf and mklove debug logs.
274+
/// - config.status: standard autoconf re-run script.
275+
/// - Makefile: standard autoconf generates this from Makefile.in, embedding
276+
/// the --prefix (OUT_DIR, sandbox-specific) and CFLAGS. The recursive walk
277+
/// catches sub-directory Makefiles (e.g. src/Makefile) too.
278+
/// - Makefile.config: mklove output (rdkafka-sys/librdkafka); contains
279+
/// CFLAGS/CXXFLAGS/LDFLAGS with sandbox-specific -I/-L paths.
280+
/// - config.cache: mklove variable cache; contains CFLAGS/LDFLAGS plus
281+
/// a "generated at $(date)" timestamp header.
282+
/// - commit_hash: librocksdb-sys writes the git HEAD SHA here to detect
283+
/// repo-level commit changes; Bazel ignores the resulting
284+
/// cargo:rerun-if-changed directives but the file changes across commits,
285+
/// causing cache misses for downstream compilations.
286+
/// - *.d: Make dependency files generated by gcc/clang; list all included
287+
/// headers with absolute sandbox paths. rdkafka-sys produces ~91 .d files.
288+
const NONDETERMINISTIC_OUT_DIR_FILES: &[&str] = &[
289+
"config.log",
290+
"config.log.old",
291+
"config.status",
292+
"Makefile",
293+
"Makefile.config",
294+
"config.cache",
295+
"commit_hash",
296+
];
297+
298+
/// Recursively walk `dir` and delete any file whose name is in
299+
/// `NONDETERMINISTIC_OUT_DIR_FILES` or has a .d extension (Make dependency files).
300+
/// Errors are silently ignored: if a file cannot be removed the worst
301+
/// outcome is a cache miss, not a build failure.
302+
fn remove_nondeterministic_out_dir_files(dir: &Path) {
303+
let entries = match read_dir(dir) {
304+
Ok(e) => e,
305+
Err(_) => return,
306+
};
307+
for entry in entries.flatten() {
308+
// Use file_type() which does not follow symlinks, so we never recurse
309+
// into symlink targets or traverse outside OUT_DIR.
310+
let Ok(file_type) = entry.file_type() else {
311+
continue;
312+
};
313+
let path = entry.path();
314+
if file_type.is_dir() {
315+
remove_nondeterministic_out_dir_files(&path);
316+
} else if file_type.is_file() {
317+
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
318+
if NONDETERMINISTIC_OUT_DIR_FILES.contains(&name) || name.ends_with(".d") {
319+
let _ = remove_file(&path);
320+
}
321+
}
322+
}
323+
}
324+
}
325+
263326
fn should_symlink_exec_root() -> bool {
264327
env::var("RULES_RUST_SYMLINK_EXEC_ROOT")
265328
.map(|s| s == "1")
@@ -449,6 +512,76 @@ fn main() {
449512
#[cfg(test)]
450513
mod test {
451514
use super::*;
515+
use std::fs::{create_dir_all, write};
516+
517+
fn make_temp_dir(label: &str) -> PathBuf {
518+
let nanos = std::time::SystemTime::now()
519+
.duration_since(std::time::UNIX_EPOCH)
520+
.unwrap()
521+
.subsec_nanos();
522+
let dir = std::env::temp_dir()
523+
.join(format!("rules_rust_bin_test_{}_{}", label, nanos));
524+
create_dir_all(&dir).unwrap();
525+
dir
526+
}
527+
528+
#[test]
529+
fn remove_nondeterministic_named_files() {
530+
let dir = make_temp_dir("named");
531+
for name in NONDETERMINISTIC_OUT_DIR_FILES {
532+
write(dir.join(name), "content").unwrap();
533+
}
534+
write(dir.join("libfoo.a"), "keep").unwrap();
535+
536+
remove_nondeterministic_out_dir_files(&dir);
537+
538+
for name in NONDETERMINISTIC_OUT_DIR_FILES {
539+
assert!(!dir.join(name).exists(), "{} should have been removed", name);
540+
}
541+
assert!(dir.join("libfoo.a").exists(), "libfoo.a should be kept");
542+
std::fs::remove_dir_all(&dir).ok();
543+
}
544+
545+
#[test]
546+
fn remove_dot_d_files() {
547+
let dir = make_temp_dir("dotd");
548+
write(dir.join("foo.d"), "deps").unwrap();
549+
write(dir.join("bar.d"), "deps").unwrap();
550+
write(dir.join("output.o"), "keep").unwrap();
551+
552+
remove_nondeterministic_out_dir_files(&dir);
553+
554+
assert!(!dir.join("foo.d").exists(), "foo.d should be removed");
555+
assert!(!dir.join("bar.d").exists(), "bar.d should be removed");
556+
assert!(dir.join("output.o").exists(), "output.o should be kept");
557+
std::fs::remove_dir_all(&dir).ok();
558+
}
559+
560+
#[test]
561+
fn remove_nondeterministic_files_recursively() {
562+
let dir = make_temp_dir("recurse");
563+
let sub = dir.join("subdir");
564+
create_dir_all(&sub).unwrap();
565+
write(sub.join("config.log"), "log").unwrap();
566+
write(sub.join("foo.d"), "deps").unwrap();
567+
write(sub.join("output.o"), "keep").unwrap();
568+
write(dir.join("Makefile"), "top-level").unwrap();
569+
570+
remove_nondeterministic_out_dir_files(&dir);
571+
572+
assert!(!sub.join("config.log").exists());
573+
assert!(!sub.join("foo.d").exists());
574+
assert!(sub.join("output.o").exists());
575+
assert!(!dir.join("Makefile").exists());
576+
std::fs::remove_dir_all(&dir).ok();
577+
}
578+
579+
#[test]
580+
fn remove_nondeterministic_nonexistent_dir_is_noop() {
581+
let dir = std::env::temp_dir().join("rules_rust_bin_test_nonexistent_999999999");
582+
// Must not panic.
583+
remove_nondeterministic_out_dir_files(&dir);
584+
}
452585

453586
#[test]
454587
fn rustc_cfg_parsing() {
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
load("//cargo:defs.bzl", "cargo_build_script")
2+
load("//rust:defs.bzl", "rust_test")
3+
4+
cargo_build_script(
5+
name = "build_script",
6+
srcs = ["build.rs"],
7+
edition = "2021",
8+
)
9+
10+
rust_test(
11+
name = "test",
12+
srcs = ["test.rs"],
13+
edition = "2021",
14+
deps = [":build_script"],
15+
)
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
use std::path::PathBuf;
2+
3+
fn main() {
4+
let out_dir = PathBuf::from(std::env::var("OUT_DIR").unwrap());
5+
6+
// Simulate files written by autoconf/cmake/mklove that embed sandbox-specific
7+
// paths and must be stripped by the build script runner before Bazel captures
8+
// OUT_DIR as a TreeArtifact.
9+
std::fs::write(out_dir.join("config.log"), "configure log with /sandbox/path").unwrap();
10+
std::fs::write(out_dir.join("config.status"), "configure status").unwrap();
11+
std::fs::write(out_dir.join("Makefile"), "all:\n\t@echo sandbox path here").unwrap();
12+
std::fs::write(out_dir.join("Makefile.config"), "CFLAGS=-I/sandbox/include").unwrap();
13+
std::fs::write(out_dir.join("config.cache"), "# generated at Mon Jan 1 00:00:00 UTC 2024").unwrap();
14+
std::fs::write(out_dir.join("foo.d"), "foo.o: foo.c /sandbox/include/bar.h").unwrap();
15+
std::fs::write(out_dir.join("baz.d"), "baz.o: baz.c /sandbox/include/qux.h").unwrap();
16+
17+
// Write a legitimate output that downstream consumers must be able to read.
18+
std::fs::write(out_dir.join("output.txt"), "legitimate output").unwrap();
19+
20+
println!("cargo:rerun-if-changed=build.rs");
21+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// include_str! resolves at compile time against the TreeArtifact captured by Bazel.
2+
// If the runner had removed output.txt this would not compile; if it had failed to
3+
// strip config.log / *.d files, the TreeArtifact hash would change on every run,
4+
// causing unnecessary rebuilds for all downstream crates.
5+
const OUTPUT: &str = include_str!(concat!(env!("OUT_DIR"), "/output.txt"));
6+
7+
#[test]
8+
fn legitimate_output_survives_nondeterministic_file_removal() {
9+
assert_eq!(OUTPUT, "legitimate output");
10+
}

0 commit comments

Comments
 (0)