Skip to content

Commit 94a2144

Browse files
committed
Remove non-deterministic files from OUT_DIR to fix TreeArtifact cache misses
Recursively remove files from OUT_DIR whose names appear in a known list (config.log, config.log.old, config.status, Makefile, Makefile.config, config.cache, commit_hash) or have a .d extension before Bazel captures OUT_DIR as a TreeArtifact. These files embed sandbox-specific paths, timestamps, or volatile values that make the TreeArtifact hash non-deterministic, causing cache misses for all downstream rustc compilations on every action run.
1 parent 85007f4 commit 94a2144

4 files changed

Lines changed: 191 additions & 1 deletion

File tree

cargo/private/cargo_build_script_runner/bin.rs

Lines changed: 137 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
1818
use std::collections::BTreeMap;
1919
use std::env;
20-
use std::fs::{create_dir_all, read_to_string, write};
20+
use std::fs::{create_dir_all, read_dir, read_to_string, remove_file, write};
2121
use std::path::{Path, PathBuf};
2222
use std::process::Command;
2323

@@ -238,6 +238,13 @@ fn run_buildrs() -> Result<(), String> {
238238
.unwrap();
239239
}
240240

241+
// Remove non-deterministic configure-generated files from OUT_DIR before
242+
// Bazel captures it as a TreeArtifact. Files like config.log and
243+
// Makefile.config embed the Bazel sandbox path (which changes on every
244+
// action run), making the TreeArtifact hash non-deterministic and causing
245+
// cache misses for all downstream rustc compilations.
246+
remove_nondeterministic_out_dir_files(&out_dir_abs);
247+
241248
// If out_dir is empty add an empty file to the directory to avoid an upstream Bazel bug
242249
// https://github.com/bazelbuild/bazel/issues/28286
243250
if out_dir_abs.read_dir().map(|read| read.count()).unwrap_or(0) == 0 {
@@ -260,6 +267,62 @@ fn run_buildrs() -> Result<(), String> {
260267
Ok(())
261268
}
262269

270+
/// OUT_DIR file names that embed the Bazel sandbox path or other volatile
271+
/// values and must be removed to make the _bs.out_dir TreeArtifact deterministic.
272+
///
273+
/// - config.log / config.log.old: autoconf and mklove debug logs.
274+
/// - config.status: standard autoconf re-run script.
275+
/// - Makefile: standard autoconf generates this from Makefile.in, embedding
276+
/// the --prefix (OUT_DIR, sandbox-specific) and CFLAGS. The recursive walk
277+
/// catches sub-directory Makefiles (e.g. src/Makefile) too.
278+
/// - Makefile.config: mklove output (rdkafka-sys/librdkafka); contains
279+
/// CFLAGS/CXXFLAGS/LDFLAGS with sandbox-specific -I/-L paths.
280+
/// - config.cache: mklove variable cache; contains CFLAGS/LDFLAGS plus
281+
/// a "generated at $(date)" timestamp header.
282+
/// - commit_hash: librocksdb-sys writes the git HEAD SHA here to detect
283+
/// repo-level commit changes; Bazel ignores the resulting
284+
/// cargo:rerun-if-changed directives but the file changes across commits,
285+
/// causing cache misses for downstream compilations.
286+
/// - *.d: Make dependency files generated by gcc/clang; list all included
287+
/// headers with absolute sandbox paths. rdkafka-sys produces ~91 .d files.
288+
const NONDETERMINISTIC_OUT_DIR_FILES: &[&str] = &[
289+
"config.log",
290+
"config.log.old",
291+
"config.status",
292+
"Makefile",
293+
"Makefile.config",
294+
"config.cache",
295+
"commit_hash",
296+
];
297+
298+
/// Recursively walk `dir` and delete any file whose name is in
299+
/// `NONDETERMINISTIC_OUT_DIR_FILES` or has a .d extension (Make dependency files).
300+
/// Errors are silently ignored: if a file cannot be removed the worst
301+
/// outcome is a cache miss, not a build failure.
302+
fn remove_nondeterministic_out_dir_files(dir: &Path) {
303+
let entries = match read_dir(dir) {
304+
Ok(e) => e,
305+
Err(_) => return,
306+
};
307+
for entry in entries.flatten() {
308+
// Use file_type() which does not follow symlinks, so we never recurse
309+
// into symlink targets or traverse outside OUT_DIR.
310+
let Ok(file_type) = entry.file_type() else {
311+
continue;
312+
};
313+
let path = entry.path();
314+
if file_type.is_dir() {
315+
remove_nondeterministic_out_dir_files(&path);
316+
} else if file_type.is_file() {
317+
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
318+
if NONDETERMINISTIC_OUT_DIR_FILES.contains(&name) || name.ends_with(".d") {
319+
let _ = remove_file(&path);
320+
}
321+
}
322+
}
323+
}
324+
}
325+
263326
fn should_symlink_exec_root() -> bool {
264327
env::var("RULES_RUST_SYMLINK_EXEC_ROOT")
265328
.map(|s| s == "1")
@@ -449,6 +512,79 @@ fn main() {
449512
#[cfg(test)]
450513
mod test {
451514
use super::*;
515+
use std::fs::{create_dir_all, write};
516+
517+
fn make_temp_dir(label: &str) -> PathBuf {
518+
let nanos = std::time::SystemTime::now()
519+
.duration_since(std::time::UNIX_EPOCH)
520+
.unwrap()
521+
.subsec_nanos();
522+
let dir = std::env::temp_dir().join(format!("rules_rust_bin_test_{}_{}", label, nanos));
523+
create_dir_all(&dir).unwrap();
524+
dir
525+
}
526+
527+
#[test]
528+
fn remove_nondeterministic_named_files() {
529+
let dir = make_temp_dir("named");
530+
for name in NONDETERMINISTIC_OUT_DIR_FILES {
531+
write(dir.join(name), "content").unwrap();
532+
}
533+
write(dir.join("libfoo.a"), "keep").unwrap();
534+
535+
remove_nondeterministic_out_dir_files(&dir);
536+
537+
for name in NONDETERMINISTIC_OUT_DIR_FILES {
538+
assert!(
539+
!dir.join(name).exists(),
540+
"{} should have been removed",
541+
name
542+
);
543+
}
544+
assert!(dir.join("libfoo.a").exists(), "libfoo.a should be kept");
545+
std::fs::remove_dir_all(&dir).ok();
546+
}
547+
548+
#[test]
549+
fn remove_dot_d_files() {
550+
let dir = make_temp_dir("dotd");
551+
write(dir.join("foo.d"), "deps").unwrap();
552+
write(dir.join("bar.d"), "deps").unwrap();
553+
write(dir.join("output.o"), "keep").unwrap();
554+
555+
remove_nondeterministic_out_dir_files(&dir);
556+
557+
assert!(!dir.join("foo.d").exists(), "foo.d should be removed");
558+
assert!(!dir.join("bar.d").exists(), "bar.d should be removed");
559+
assert!(dir.join("output.o").exists(), "output.o should be kept");
560+
std::fs::remove_dir_all(&dir).ok();
561+
}
562+
563+
#[test]
564+
fn remove_nondeterministic_files_recursively() {
565+
let dir = make_temp_dir("recurse");
566+
let sub = dir.join("subdir");
567+
create_dir_all(&sub).unwrap();
568+
write(sub.join("config.log"), "log").unwrap();
569+
write(sub.join("foo.d"), "deps").unwrap();
570+
write(sub.join("output.o"), "keep").unwrap();
571+
write(dir.join("Makefile"), "top-level").unwrap();
572+
573+
remove_nondeterministic_out_dir_files(&dir);
574+
575+
assert!(!sub.join("config.log").exists());
576+
assert!(!sub.join("foo.d").exists());
577+
assert!(sub.join("output.o").exists());
578+
assert!(!dir.join("Makefile").exists());
579+
std::fs::remove_dir_all(&dir).ok();
580+
}
581+
582+
#[test]
583+
fn remove_nondeterministic_nonexistent_dir_is_noop() {
584+
let dir = std::env::temp_dir().join("rules_rust_bin_test_nonexistent_999999999");
585+
// Must not panic.
586+
remove_nondeterministic_out_dir_files(&dir);
587+
}
452588

453589
#[test]
454590
fn rustc_cfg_parsing() {
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
load("//cargo:defs.bzl", "cargo_build_script")
2+
load("//rust:defs.bzl", "rust_test")
3+
4+
cargo_build_script(
5+
name = "build_script",
6+
srcs = ["build.rs"],
7+
edition = "2021",
8+
)
9+
10+
rust_test(
11+
name = "test",
12+
srcs = ["test.rs"],
13+
edition = "2021",
14+
deps = [":build_script"],
15+
)
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
use std::path::PathBuf;
2+
3+
fn main() {
4+
let out_dir = PathBuf::from(std::env::var("OUT_DIR").unwrap());
5+
6+
// Simulate files written by autoconf/cmake/mklove that embed sandbox-specific
7+
// paths and must be stripped by the build script runner before Bazel captures
8+
// OUT_DIR as a TreeArtifact.
9+
std::fs::write(
10+
out_dir.join("config.log"),
11+
"configure log with /sandbox/path",
12+
)
13+
.unwrap();
14+
std::fs::write(out_dir.join("config.status"), "configure status").unwrap();
15+
std::fs::write(out_dir.join("Makefile"), "all:\n\t@echo sandbox path here").unwrap();
16+
std::fs::write(out_dir.join("Makefile.config"), "CFLAGS=-I/sandbox/include").unwrap();
17+
std::fs::write(
18+
out_dir.join("config.cache"),
19+
"# generated at Mon Jan 1 00:00:00 UTC 2024",
20+
)
21+
.unwrap();
22+
std::fs::write(out_dir.join("foo.d"), "foo.o: foo.c /sandbox/include/bar.h").unwrap();
23+
std::fs::write(out_dir.join("baz.d"), "baz.o: baz.c /sandbox/include/qux.h").unwrap();
24+
25+
// Write a legitimate output that downstream consumers must be able to read.
26+
std::fs::write(out_dir.join("output.txt"), "legitimate output").unwrap();
27+
28+
println!("cargo:rerun-if-changed=build.rs");
29+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// include_str! resolves at compile time against the TreeArtifact captured by Bazel.
2+
// If the runner had removed output.txt this would not compile; if it had failed to
3+
// strip config.log / *.d files, the TreeArtifact hash would change on every run,
4+
// causing unnecessary rebuilds for all downstream crates.
5+
const OUTPUT: &str = include_str!(concat!(env!("OUT_DIR"), "/output.txt"));
6+
7+
#[test]
8+
fn legitimate_output_survives_nondeterministic_file_removal() {
9+
assert_eq!(OUTPUT, "legitimate output");
10+
}

0 commit comments

Comments
 (0)