Skip to content

Commit 5ac1e80

Browse files
committed
Remove non-deterministic files from OUT_DIR to fix TreeArtifact cache misses
Recursively remove files from OUT_DIR whose names appear in the `//cargo/settings:out_dir_volatile_file_basenames` string_list_flag, or have a .d or .pc extension, before Bazel captures OUT_DIR as a TreeArtifact. The flag value (default: config.log, config.log.old, config.status, Makefile, Makefile.config, config.cache, commit_hash, jemalloc-config, jemalloc.sh) is passed to the runner via the `RULES_RUST_OUT_DIR_VOLATILE_BASENAMES` env var. The runner reads the env var at runtime; if absent, no named-file removal occurs (extension-based removal of .d and .pc files is unconditional). These files embed sandbox-specific paths, timestamps, or volatile values that make the TreeArtifact hash non-deterministic, causing cache misses for all downstream rustc compilations on every action run. An integration test verifies both that legitimate outputs survive and that each category of volatile file is absent from the captured TreeArtifact.
1 parent 82506df commit 5ac1e80

7 files changed

Lines changed: 334 additions & 1 deletion

File tree

cargo/private/cargo_build_script.bzl

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -623,6 +623,10 @@ def _cargo_build_script_impl(ctx):
623623
if experimental_symlink_execroot:
624624
env["RULES_RUST_SYMLINK_EXEC_ROOT"] = "1"
625625

626+
out_dir_volatile_basenames = ctx.attr._out_dir_volatile_file_basenames[BuildSettingInfo].value
627+
if out_dir_volatile_basenames:
628+
env["RULES_RUST_OUT_DIR_VOLATILE_BASENAMES"] = ":".join(out_dir_volatile_basenames)
629+
626630
ctx.actions.run(
627631
executable = ctx.executable._cargo_build_script_runner,
628632
arguments = [args, runfiles_args],
@@ -796,6 +800,9 @@ cargo_build_script = rule(
796800
allow_files = True,
797801
default = Label("//cargo/private:no_cxx"),
798802
),
803+
"_out_dir_volatile_file_basenames": attr.label(
804+
default = Label("//cargo/settings:out_dir_volatile_file_basenames"),
805+
),
799806
},
800807
fragments = ["cpp"],
801808
toolchains = [

cargo/private/cargo_build_script_runner/bin.rs

Lines changed: 178 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
1818
use std::collections::BTreeMap;
1919
use std::env;
20-
use std::fs::{create_dir_all, read_to_string, write};
20+
use std::fs::{create_dir_all, read_dir, read_to_string, remove_file, write};
2121
use std::path::{Path, PathBuf};
2222
use std::process::Command;
2323

@@ -234,6 +234,13 @@ fn run_buildrs() -> Result<(), String> {
234234
.drain_runfiles_dir(&out_dir_abs)
235235
.unwrap();
236236

237+
// Remove non-deterministic configure-generated files from OUT_DIR before
238+
// Bazel captures it as a TreeArtifact. Files like config.log and
239+
// Makefile.config embed the Bazel sandbox path (which changes on every
240+
// action run), making the TreeArtifact hash non-deterministic and causing
241+
// cache misses for all downstream rustc compilations.
242+
remove_nondeterministic_out_dir_files(&out_dir_abs);
243+
237244
// If out_dir is empty add an empty file to the directory to avoid an upstream Bazel bug
238245
// https://github.com/bazelbuild/bazel/issues/28286
239246
if out_dir_abs.read_dir().map(|read| read.count()).unwrap_or(0) == 0 {
@@ -256,6 +263,45 @@ fn run_buildrs() -> Result<(), String> {
256263
Ok(())
257264
}
258265

266+
/// Recursively walk `dir` and delete any file whose basename appears in
267+
/// `RULES_RUST_OUT_DIR_VOLATILE_BASENAMES` (colon-separated, set by the
268+
/// `//cargo/settings:out_dir_volatile_file_basenames` flag) or has a `.d` or
269+
/// `.pc` extension. Errors are silently ignored: if a file cannot be removed
270+
/// the worst outcome is a cache miss, not a build failure.
271+
fn remove_nondeterministic_out_dir_files(dir: &Path) {
272+
let volatile_basenames: Vec<String> = env::var("RULES_RUST_OUT_DIR_VOLATILE_BASENAMES")
273+
.map(|v| v.split(':').map(String::from).collect())
274+
.unwrap_or_default();
275+
remove_nondeterministic_out_dir_files_with_list(dir, &volatile_basenames);
276+
}
277+
278+
fn remove_nondeterministic_out_dir_files_with_list(dir: &Path, volatile_basenames: &[String]) {
279+
let entries = match read_dir(dir) {
280+
Ok(e) => e,
281+
Err(_) => return,
282+
};
283+
for entry in entries.flatten() {
284+
// Use file_type() which does not follow symlinks, so we never recurse
285+
// into symlink targets or traverse outside OUT_DIR.
286+
let Ok(file_type) = entry.file_type() else {
287+
continue;
288+
};
289+
let path = entry.path();
290+
if file_type.is_dir() {
291+
remove_nondeterministic_out_dir_files_with_list(&path, volatile_basenames);
292+
} else if file_type.is_file() {
293+
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
294+
if volatile_basenames.iter().any(|b| b == name)
295+
|| name.ends_with(".d")
296+
|| name.ends_with(".pc")
297+
{
298+
let _ = remove_file(&path);
299+
}
300+
}
301+
}
302+
}
303+
}
304+
259305
fn should_symlink_exec_root() -> bool {
260306
env::var("RULES_RUST_SYMLINK_EXEC_ROOT")
261307
.map(|s| s == "1")
@@ -446,6 +492,137 @@ fn main() {
446492
#[cfg(test)]
447493
mod test {
448494
use super::*;
495+
use std::fs::{create_dir_all, write};
496+
497+
fn make_temp_dir(label: &str) -> PathBuf {
498+
let nanos = std::time::SystemTime::now()
499+
.duration_since(std::time::UNIX_EPOCH)
500+
.unwrap()
501+
.subsec_nanos();
502+
let dir = std::env::temp_dir().join(format!("rules_rust_bin_test_{}_{}", label, nanos));
503+
create_dir_all(&dir).unwrap();
504+
dir
505+
}
506+
507+
fn basenames(names: &[&str]) -> Vec<String> {
508+
names.iter().map(|s| s.to_string()).collect()
509+
}
510+
511+
#[test]
512+
fn remove_nondeterministic_named_files() {
513+
let names = &["config.log", "config.status", "Makefile", "commit_hash"];
514+
let dir = make_temp_dir("named");
515+
for name in names {
516+
write(dir.join(name), "content").unwrap();
517+
}
518+
write(dir.join("libfoo.a"), "keep").unwrap();
519+
520+
remove_nondeterministic_out_dir_files_with_list(&dir, &basenames(names));
521+
522+
for name in names {
523+
assert!(
524+
!dir.join(name).exists(),
525+
"{} should have been removed",
526+
name
527+
);
528+
}
529+
assert!(dir.join("libfoo.a").exists(), "libfoo.a should be kept");
530+
std::fs::remove_dir_all(&dir).ok();
531+
}
532+
533+
#[test]
534+
fn remove_dot_d_and_pc_files() {
535+
let dir = make_temp_dir("dotd");
536+
write(dir.join("foo.d"), "deps").unwrap();
537+
write(dir.join("bar.d"), "deps").unwrap();
538+
write(dir.join("jemalloc.pc"), "prefix=/sandbox/out").unwrap();
539+
write(dir.join("output.o"), "keep").unwrap();
540+
541+
remove_nondeterministic_out_dir_files_with_list(&dir, &[]);
542+
543+
assert!(!dir.join("foo.d").exists(), "foo.d should be removed");
544+
assert!(!dir.join("bar.d").exists(), "bar.d should be removed");
545+
assert!(
546+
!dir.join("jemalloc.pc").exists(),
547+
"jemalloc.pc should be removed"
548+
);
549+
assert!(dir.join("output.o").exists(), "output.o should be kept");
550+
std::fs::remove_dir_all(&dir).ok();
551+
}
552+
553+
#[test]
554+
fn remove_nondeterministic_files_recursively() {
555+
let dir = make_temp_dir("recurse");
556+
let sub = dir.join("subdir");
557+
create_dir_all(&sub).unwrap();
558+
write(sub.join("config.log"), "log").unwrap();
559+
write(sub.join("foo.d"), "deps").unwrap();
560+
write(sub.join("output.o"), "keep").unwrap();
561+
write(dir.join("Makefile"), "top-level").unwrap();
562+
563+
remove_nondeterministic_out_dir_files_with_list(
564+
&dir,
565+
&basenames(&["config.log", "Makefile"]),
566+
);
567+
568+
assert!(!sub.join("config.log").exists());
569+
assert!(!sub.join("foo.d").exists());
570+
assert!(sub.join("output.o").exists());
571+
assert!(!dir.join("Makefile").exists());
572+
std::fs::remove_dir_all(&dir).ok();
573+
}
574+
575+
#[test]
576+
fn remove_nondeterministic_nonexistent_dir_is_noop() {
577+
let dir = std::env::temp_dir().join("rules_rust_bin_test_nonexistent_999999999");
578+
// Must not panic.
579+
remove_nondeterministic_out_dir_files_with_list(&dir, &[]);
580+
}
581+
582+
#[test]
583+
fn remove_nondeterministic_custom_basenames() {
584+
let dir = make_temp_dir("custom");
585+
write(dir.join("custom_volatile.txt"), "bad").unwrap();
586+
write(dir.join("config.log"), "keep_this").unwrap();
587+
588+
remove_nondeterministic_out_dir_files_with_list(&dir, &basenames(&["custom_volatile.txt"]));
589+
590+
assert!(
591+
!dir.join("custom_volatile.txt").exists(),
592+
"custom file should be removed"
593+
);
594+
assert!(
595+
dir.join("config.log").exists(),
596+
"config.log should be kept with custom list"
597+
);
598+
std::fs::remove_dir_all(&dir).ok();
599+
}
600+
601+
#[test]
602+
fn remove_nondeterministic_env_var_override() {
603+
let dir = make_temp_dir("envvar");
604+
write(dir.join("config.log"), "would be removed by default").unwrap();
605+
write(dir.join("only_this.txt"), "should be removed").unwrap();
606+
607+
// Override via env var: only "only_this.txt" is volatile; config.log should survive.
608+
let prev = std::env::var("RULES_RUST_OUT_DIR_VOLATILE_BASENAMES").ok();
609+
std::env::set_var("RULES_RUST_OUT_DIR_VOLATILE_BASENAMES", "only_this.txt");
610+
remove_nondeterministic_out_dir_files(&dir);
611+
match prev {
612+
Some(v) => std::env::set_var("RULES_RUST_OUT_DIR_VOLATILE_BASENAMES", v),
613+
None => std::env::remove_var("RULES_RUST_OUT_DIR_VOLATILE_BASENAMES"),
614+
}
615+
616+
assert!(
617+
!dir.join("only_this.txt").exists(),
618+
"only_this.txt should be removed"
619+
);
620+
assert!(
621+
dir.join("config.log").exists(),
622+
"config.log should survive when not in env var list"
623+
);
624+
std::fs::remove_dir_all(&dir).ok();
625+
}
449626

450627
#[test]
451628
fn rustc_cfg_parsing() {

cargo/settings/BUILD.bazel

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ load(
44
"cargo_manifest_dir_filename_suffixes_to_retain",
55
"debug_std_streams_output_group",
66
"experimental_symlink_execroot",
7+
"out_dir_volatile_file_basenames",
78
"use_default_shell_env",
89
)
910

@@ -27,3 +28,5 @@ debug_std_streams_output_group()
2728
experimental_symlink_execroot()
2829

2930
use_default_shell_env()
31+
32+
out_dir_volatile_file_basenames()

cargo/settings/settings.bzl

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,26 @@ def use_default_shell_env():
4242
name = "use_default_shell_env",
4343
build_setting_default = True,
4444
)
45+
46+
def out_dir_volatile_file_basenames():
47+
"""A flag which determines what file basenames are removed from `OUT_DIR` by \
48+
`cargo_build_script` actions to make the `_bs.out_dir` TreeArtifact deterministic.
49+
50+
Files whose names appear in this list, as well as files with a `.d` or `.pc` \
51+
extension, are deleted from `OUT_DIR` after the build script runs and before Bazel \
52+
captures the directory. Files like `config.log` and `Makefile` embed the Bazel \
53+
sandbox path, so their content changes on every action invocation, causing cache \
54+
misses for all downstream `rustc` compilations.
55+
"""
56+
string_list_flag(
57+
name = "out_dir_volatile_file_basenames",
58+
build_setting_default = [
59+
"config.log",
60+
"config.log.old",
61+
"config.status",
62+
"Makefile",
63+
"Makefile.config",
64+
"config.cache",
65+
"commit_hash",
66+
],
67+
)
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
load("//cargo:defs.bzl", "cargo_build_script")
2+
load("//rust:defs.bzl", "rust_test")
3+
4+
cargo_build_script(
5+
name = "build_script",
6+
srcs = ["build.rs"],
7+
edition = "2021",
8+
)
9+
10+
rust_test(
11+
name = "test",
12+
srcs = ["test.rs"],
13+
edition = "2021",
14+
deps = [":build_script"],
15+
)
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
use std::path::PathBuf;
2+
3+
fn main() {
4+
let out_dir = PathBuf::from(std::env::var("OUT_DIR").unwrap());
5+
6+
// Simulate files written by autoconf/cmake/mklove that embed sandbox-specific
7+
// paths and must be stripped by the build script runner before Bazel captures
8+
// OUT_DIR as a TreeArtifact.
9+
std::fs::write(
10+
out_dir.join("config.log"),
11+
"configure log with /sandbox/path",
12+
)
13+
.unwrap();
14+
std::fs::write(out_dir.join("config.status"), "configure status").unwrap();
15+
std::fs::write(out_dir.join("Makefile"), "all:\n\t@echo sandbox path here").unwrap();
16+
std::fs::write(out_dir.join("Makefile.config"), "CFLAGS=-I/sandbox/include").unwrap();
17+
std::fs::write(
18+
out_dir.join("config.cache"),
19+
"# generated at Mon Jan 1 00:00:00 UTC 2024",
20+
)
21+
.unwrap();
22+
std::fs::write(out_dir.join("foo.d"), "foo.o: foo.c /sandbox/include/bar.h").unwrap();
23+
std::fs::write(out_dir.join("baz.d"), "baz.o: baz.c /sandbox/include/qux.h").unwrap();
24+
std::fs::write(
25+
out_dir.join("foo.pc"),
26+
"prefix=/sandbox/out\nexec_prefix=${prefix}",
27+
)
28+
.unwrap();
29+
30+
// Write a legitimate output that downstream consumers must be able to read.
31+
std::fs::write(out_dir.join("output.txt"), "legitimate output").unwrap();
32+
33+
println!("cargo:rerun-if-changed=build.rs");
34+
}
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
// include_str! resolves at compile time against the TreeArtifact captured by Bazel.
2+
// If the runner failed to strip config.log / *.d / *.pc files, the TreeArtifact hash
3+
// would change on every run, causing unnecessary rebuilds for all downstream crates.
4+
const OUTPUT: &str = include_str!(concat!(env!("OUT_DIR"), "/output.txt"));
5+
6+
#[test]
7+
fn legitimate_output_survives_nondeterministic_file_removal() {
8+
assert_eq!(OUTPUT, "legitimate output");
9+
}
10+
11+
// Verify that volatile files written by the build script are absent from the
12+
// captured OUT_DIR TreeArtifact. The build script wrote each of these; the
13+
// cargo_build_script_runner must have removed them before Bazel snapshotted
14+
// the directory.
15+
16+
#[test]
17+
fn config_log_removed() {
18+
assert!(
19+
!std::path::Path::new(concat!(env!("OUT_DIR"), "/config.log")).exists(),
20+
"config.log should have been removed from OUT_DIR"
21+
);
22+
}
23+
24+
#[test]
25+
fn config_status_removed() {
26+
assert!(
27+
!std::path::Path::new(concat!(env!("OUT_DIR"), "/config.status")).exists(),
28+
"config.status should have been removed from OUT_DIR"
29+
);
30+
}
31+
32+
#[test]
33+
fn makefile_removed() {
34+
assert!(
35+
!std::path::Path::new(concat!(env!("OUT_DIR"), "/Makefile")).exists(),
36+
"Makefile should have been removed from OUT_DIR"
37+
);
38+
}
39+
40+
#[test]
41+
fn makefile_config_removed() {
42+
assert!(
43+
!std::path::Path::new(concat!(env!("OUT_DIR"), "/Makefile.config")).exists(),
44+
"Makefile.config should have been removed from OUT_DIR"
45+
);
46+
}
47+
48+
#[test]
49+
fn config_cache_removed() {
50+
assert!(
51+
!std::path::Path::new(concat!(env!("OUT_DIR"), "/config.cache")).exists(),
52+
"config.cache should have been removed from OUT_DIR"
53+
);
54+
}
55+
56+
#[test]
57+
fn dot_d_files_removed() {
58+
assert!(
59+
!std::path::Path::new(concat!(env!("OUT_DIR"), "/foo.d")).exists(),
60+
"foo.d should have been removed from OUT_DIR"
61+
);
62+
assert!(
63+
!std::path::Path::new(concat!(env!("OUT_DIR"), "/baz.d")).exists(),
64+
"baz.d should have been removed from OUT_DIR"
65+
);
66+
}
67+
68+
#[test]
69+
fn dot_pc_file_removed() {
70+
assert!(
71+
!std::path::Path::new(concat!(env!("OUT_DIR"), "/foo.pc")).exists(),
72+
"foo.pc should have been removed from OUT_DIR"
73+
);
74+
}

0 commit comments

Comments
 (0)