Skip to content

Commit 6d85959

Browse files
jsharpeUebelAndre
andauthored
Remove non-deterministic files from OUT_DIR to fix TreeArtifact cache misses (#3973)
Recursively remove files from OUT_DIR whose names appear in a known list (config.log, config.log.old, config.status, Makefile, Makefile.config, config.cache, commit_hash) or have a .d extension before Bazel captures OUT_DIR as a TreeArtifact. These files embed sandbox-specific paths, timestamps, or volatile values that make the TreeArtifact hash non-deterministic, causing cache misses for all downstream rustc compilations on every action run. --------- Co-authored-by: UebelAndre <github@uebelandre.com>
1 parent e890f0d commit 6d85959

7 files changed

Lines changed: 334 additions & 1 deletion

File tree

cargo/private/cargo_build_script.bzl

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -623,6 +623,10 @@ def _cargo_build_script_impl(ctx):
623623
if experimental_symlink_execroot:
624624
env["RULES_RUST_SYMLINK_EXEC_ROOT"] = "1"
625625

626+
out_dir_volatile_basenames = ctx.attr._out_dir_volatile_file_basenames[BuildSettingInfo].value
627+
if out_dir_volatile_basenames:
628+
env["RULES_RUST_OUT_DIR_VOLATILE_BASENAMES"] = ":".join(out_dir_volatile_basenames)
629+
626630
ctx.actions.run(
627631
executable = ctx.executable._cargo_build_script_runner,
628632
arguments = [args, runfiles_args],
@@ -796,6 +800,9 @@ cargo_build_script = rule(
796800
allow_files = True,
797801
default = Label("//cargo/private:no_cxx"),
798802
),
803+
"_out_dir_volatile_file_basenames": attr.label(
804+
default = Label("//cargo/settings:out_dir_volatile_file_basenames"),
805+
),
799806
},
800807
fragments = ["cpp"],
801808
toolchains = [

cargo/private/cargo_build_script_runner/bin.rs

Lines changed: 178 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
1818
use std::collections::BTreeMap;
1919
use std::env;
20-
use std::fs::{create_dir_all, read_to_string, write};
20+
use std::fs::{create_dir_all, read_dir, read_to_string, remove_file, write};
2121
use std::path::{Path, PathBuf};
2222
use std::process::Command;
2323

@@ -234,6 +234,13 @@ fn run_buildrs() -> Result<(), String> {
234234
.drain_runfiles_dir(&out_dir_abs)
235235
.unwrap();
236236

237+
// Remove non-deterministic configure-generated files from OUT_DIR before
238+
// Bazel captures it as a TreeArtifact. Files like config.log and
239+
// Makefile.config embed the Bazel sandbox path (which changes on every
240+
// action run), making the TreeArtifact hash non-deterministic and causing
241+
// cache misses for all downstream rustc compilations.
242+
remove_nondeterministic_out_dir_files(&out_dir_abs);
243+
237244
// If out_dir is empty add an empty file to the directory to avoid an upstream Bazel bug
238245
// https://github.com/bazelbuild/bazel/issues/28286
239246
if out_dir_abs.read_dir().map(|read| read.count()).unwrap_or(0) == 0 {
@@ -256,6 +263,45 @@ fn run_buildrs() -> Result<(), String> {
256263
Ok(())
257264
}
258265

266+
/// Recursively walk `dir` and delete any file whose basename appears in
267+
/// `RULES_RUST_OUT_DIR_VOLATILE_BASENAMES` (colon-separated, set by the
268+
/// `//cargo/settings:out_dir_volatile_file_basenames` flag) or has a `.d` or
269+
/// `.pc` extension. Errors are silently ignored: if a file cannot be removed
270+
/// the worst outcome is a cache miss, not a build failure.
271+
fn remove_nondeterministic_out_dir_files(dir: &Path) {
272+
let volatile_basenames: Vec<String> = env::var("RULES_RUST_OUT_DIR_VOLATILE_BASENAMES")
273+
.map(|v| v.split(':').map(String::from).collect())
274+
.unwrap_or_default();
275+
remove_nondeterministic_out_dir_files_with_list(dir, &volatile_basenames);
276+
}
277+
278+
fn remove_nondeterministic_out_dir_files_with_list(dir: &Path, volatile_basenames: &[String]) {
279+
let entries = match read_dir(dir) {
280+
Ok(e) => e,
281+
Err(_) => return,
282+
};
283+
for entry in entries.flatten() {
284+
// Use file_type() which does not follow symlinks, so we never recurse
285+
// into symlink targets or traverse outside OUT_DIR.
286+
let Ok(file_type) = entry.file_type() else {
287+
continue;
288+
};
289+
let path = entry.path();
290+
if file_type.is_dir() {
291+
remove_nondeterministic_out_dir_files_with_list(&path, volatile_basenames);
292+
} else if file_type.is_file() {
293+
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
294+
if volatile_basenames.iter().any(|b| b == name)
295+
|| name.ends_with(".d")
296+
|| name.ends_with(".pc")
297+
{
298+
let _ = remove_file(&path);
299+
}
300+
}
301+
}
302+
}
303+
}
304+
259305
fn should_symlink_exec_root() -> bool {
260306
env::var("RULES_RUST_SYMLINK_EXEC_ROOT")
261307
.map(|s| s == "1")
@@ -446,6 +492,137 @@ fn main() {
446492
#[cfg(test)]
447493
mod test {
448494
use super::*;
495+
use std::fs::{create_dir_all, write};
496+
497+
fn make_temp_dir(label: &str) -> PathBuf {
498+
let nanos = std::time::SystemTime::now()
499+
.duration_since(std::time::UNIX_EPOCH)
500+
.unwrap()
501+
.subsec_nanos();
502+
let dir = std::env::temp_dir().join(format!("rules_rust_bin_test_{}_{}", label, nanos));
503+
create_dir_all(&dir).unwrap();
504+
dir
505+
}
506+
507+
fn basenames(names: &[&str]) -> Vec<String> {
508+
names.iter().map(|s| s.to_string()).collect()
509+
}
510+
511+
#[test]
512+
fn remove_nondeterministic_named_files() {
513+
let names = &["config.log", "config.status", "Makefile", "commit_hash"];
514+
let dir = make_temp_dir("named");
515+
for name in names {
516+
write(dir.join(name), "content").unwrap();
517+
}
518+
write(dir.join("libfoo.a"), "keep").unwrap();
519+
520+
remove_nondeterministic_out_dir_files_with_list(&dir, &basenames(names));
521+
522+
for name in names {
523+
assert!(
524+
!dir.join(name).exists(),
525+
"{} should have been removed",
526+
name
527+
);
528+
}
529+
assert!(dir.join("libfoo.a").exists(), "libfoo.a should be kept");
530+
std::fs::remove_dir_all(&dir).ok();
531+
}
532+
533+
#[test]
534+
fn remove_dot_d_and_pc_files() {
535+
let dir = make_temp_dir("dotd");
536+
write(dir.join("foo.d"), "deps").unwrap();
537+
write(dir.join("bar.d"), "deps").unwrap();
538+
write(dir.join("jemalloc.pc"), "prefix=/sandbox/out").unwrap();
539+
write(dir.join("output.o"), "keep").unwrap();
540+
541+
remove_nondeterministic_out_dir_files_with_list(&dir, &[]);
542+
543+
assert!(!dir.join("foo.d").exists(), "foo.d should be removed");
544+
assert!(!dir.join("bar.d").exists(), "bar.d should be removed");
545+
assert!(
546+
!dir.join("jemalloc.pc").exists(),
547+
"jemalloc.pc should be removed"
548+
);
549+
assert!(dir.join("output.o").exists(), "output.o should be kept");
550+
std::fs::remove_dir_all(&dir).ok();
551+
}
552+
553+
#[test]
554+
fn remove_nondeterministic_files_recursively() {
555+
let dir = make_temp_dir("recurse");
556+
let sub = dir.join("subdir");
557+
create_dir_all(&sub).unwrap();
558+
write(sub.join("config.log"), "log").unwrap();
559+
write(sub.join("foo.d"), "deps").unwrap();
560+
write(sub.join("output.o"), "keep").unwrap();
561+
write(dir.join("Makefile"), "top-level").unwrap();
562+
563+
remove_nondeterministic_out_dir_files_with_list(
564+
&dir,
565+
&basenames(&["config.log", "Makefile"]),
566+
);
567+
568+
assert!(!sub.join("config.log").exists());
569+
assert!(!sub.join("foo.d").exists());
570+
assert!(sub.join("output.o").exists());
571+
assert!(!dir.join("Makefile").exists());
572+
std::fs::remove_dir_all(&dir).ok();
573+
}
574+
575+
#[test]
576+
fn remove_nondeterministic_nonexistent_dir_is_noop() {
577+
let dir = std::env::temp_dir().join("rules_rust_bin_test_nonexistent_999999999");
578+
// Must not panic.
579+
remove_nondeterministic_out_dir_files_with_list(&dir, &[]);
580+
}
581+
582+
#[test]
583+
fn remove_nondeterministic_custom_basenames() {
584+
let dir = make_temp_dir("custom");
585+
write(dir.join("custom_volatile.txt"), "bad").unwrap();
586+
write(dir.join("config.log"), "keep_this").unwrap();
587+
588+
remove_nondeterministic_out_dir_files_with_list(&dir, &basenames(&["custom_volatile.txt"]));
589+
590+
assert!(
591+
!dir.join("custom_volatile.txt").exists(),
592+
"custom file should be removed"
593+
);
594+
assert!(
595+
dir.join("config.log").exists(),
596+
"config.log should be kept with custom list"
597+
);
598+
std::fs::remove_dir_all(&dir).ok();
599+
}
600+
601+
#[test]
602+
fn remove_nondeterministic_env_var_override() {
603+
let dir = make_temp_dir("envvar");
604+
write(dir.join("config.log"), "would be removed by default").unwrap();
605+
write(dir.join("only_this.txt"), "should be removed").unwrap();
606+
607+
// Override via env var: only "only_this.txt" is volatile; config.log should survive.
608+
let prev = std::env::var("RULES_RUST_OUT_DIR_VOLATILE_BASENAMES").ok();
609+
std::env::set_var("RULES_RUST_OUT_DIR_VOLATILE_BASENAMES", "only_this.txt");
610+
remove_nondeterministic_out_dir_files(&dir);
611+
match prev {
612+
Some(v) => std::env::set_var("RULES_RUST_OUT_DIR_VOLATILE_BASENAMES", v),
613+
None => std::env::remove_var("RULES_RUST_OUT_DIR_VOLATILE_BASENAMES"),
614+
}
615+
616+
assert!(
617+
!dir.join("only_this.txt").exists(),
618+
"only_this.txt should be removed"
619+
);
620+
assert!(
621+
dir.join("config.log").exists(),
622+
"config.log should survive when not in env var list"
623+
);
624+
std::fs::remove_dir_all(&dir).ok();
625+
}
449626

450627
#[test]
451628
fn rustc_cfg_parsing() {

cargo/settings/BUILD.bazel

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ load(
44
"cargo_manifest_dir_filename_suffixes_to_retain",
55
"debug_std_streams_output_group",
66
"experimental_symlink_execroot",
7+
"out_dir_volatile_file_basenames",
78
"use_default_shell_env",
89
)
910

@@ -27,3 +28,5 @@ debug_std_streams_output_group()
2728
experimental_symlink_execroot()
2829

2930
use_default_shell_env()
31+
32+
out_dir_volatile_file_basenames()

cargo/settings/settings.bzl

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,25 @@ def use_default_shell_env():
4242
name = "use_default_shell_env",
4343
build_setting_default = True,
4444
)
45+
46+
def out_dir_volatile_file_basenames():
47+
"""A flag which determines what file basenames are removed from `OUT_DIR` by `cargo_build_script` actions to make the `_bs.out_dir` TreeArtifact deterministic.
48+
49+
Files whose names appear in this list, as well as files with a `.d` or `.pc`
50+
extension, are deleted from `OUT_DIR` after the build script runs and before Bazel
51+
captures the directory. Files like `config.log` and `Makefile` embed the Bazel
52+
sandbox path, so their content changes on every action invocation, causing cache
53+
misses for all downstream `rustc` compilations.
54+
"""
55+
string_list_flag(
56+
name = "out_dir_volatile_file_basenames",
57+
build_setting_default = [
58+
"config.log",
59+
"config.log.old",
60+
"config.status",
61+
"Makefile",
62+
"Makefile.config",
63+
"config.cache",
64+
"commit_hash",
65+
],
66+
)
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
load("//cargo:defs.bzl", "cargo_build_script")
2+
load("//rust:defs.bzl", "rust_test")
3+
4+
cargo_build_script(
5+
name = "build_script",
6+
srcs = ["build.rs"],
7+
edition = "2021",
8+
)
9+
10+
rust_test(
11+
name = "test",
12+
srcs = ["test.rs"],
13+
edition = "2021",
14+
deps = [":build_script"],
15+
)
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
use std::path::PathBuf;
2+
3+
fn main() {
4+
let out_dir = PathBuf::from(std::env::var("OUT_DIR").unwrap());
5+
6+
// Simulate files written by autoconf/cmake/mklove that embed sandbox-specific
7+
// paths and must be stripped by the build script runner before Bazel captures
8+
// OUT_DIR as a TreeArtifact.
9+
std::fs::write(
10+
out_dir.join("config.log"),
11+
"configure log with /sandbox/path",
12+
)
13+
.unwrap();
14+
std::fs::write(out_dir.join("config.status"), "configure status").unwrap();
15+
std::fs::write(out_dir.join("Makefile"), "all:\n\t@echo sandbox path here").unwrap();
16+
std::fs::write(out_dir.join("Makefile.config"), "CFLAGS=-I/sandbox/include").unwrap();
17+
std::fs::write(
18+
out_dir.join("config.cache"),
19+
"# generated at Mon Jan 1 00:00:00 UTC 2024",
20+
)
21+
.unwrap();
22+
std::fs::write(out_dir.join("foo.d"), "foo.o: foo.c /sandbox/include/bar.h").unwrap();
23+
std::fs::write(out_dir.join("baz.d"), "baz.o: baz.c /sandbox/include/qux.h").unwrap();
24+
std::fs::write(
25+
out_dir.join("foo.pc"),
26+
"prefix=/sandbox/out\nexec_prefix=${prefix}",
27+
)
28+
.unwrap();
29+
30+
// Write a legitimate output that downstream consumers must be able to read.
31+
std::fs::write(out_dir.join("output.txt"), "legitimate output").unwrap();
32+
33+
println!("cargo:rerun-if-changed=build.rs");
34+
}
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
//! include_str! resolves at compile time against the TreeArtifact captured by Bazel.
2+
//! If the runner failed to strip config.log / *.d / *.pc files, the TreeArtifact hash
3+
//! would change on every run, causing unnecessary rebuilds for all downstream crates.
4+
5+
const OUTPUT: &str = include_str!(concat!(env!("OUT_DIR"), "/output.txt"));
6+
7+
#[test]
8+
fn legitimate_output_survives_nondeterministic_file_removal() {
9+
assert_eq!(OUTPUT, "legitimate output");
10+
}
11+
12+
// Verify that volatile files written by the build script are absent from the
13+
// captured OUT_DIR TreeArtifact. The build script wrote each of these; the
14+
// cargo_build_script_runner must have removed them before Bazel snapshotted
15+
// the directory.
16+
17+
#[test]
18+
fn config_log_removed() {
19+
assert!(
20+
!std::path::Path::new(concat!(env!("OUT_DIR"), "/config.log")).exists(),
21+
"config.log should have been removed from OUT_DIR"
22+
);
23+
}
24+
25+
#[test]
26+
fn config_status_removed() {
27+
assert!(
28+
!std::path::Path::new(concat!(env!("OUT_DIR"), "/config.status")).exists(),
29+
"config.status should have been removed from OUT_DIR"
30+
);
31+
}
32+
33+
#[test]
34+
fn makefile_removed() {
35+
assert!(
36+
!std::path::Path::new(concat!(env!("OUT_DIR"), "/Makefile")).exists(),
37+
"Makefile should have been removed from OUT_DIR"
38+
);
39+
}
40+
41+
#[test]
42+
fn makefile_config_removed() {
43+
assert!(
44+
!std::path::Path::new(concat!(env!("OUT_DIR"), "/Makefile.config")).exists(),
45+
"Makefile.config should have been removed from OUT_DIR"
46+
);
47+
}
48+
49+
#[test]
50+
fn config_cache_removed() {
51+
assert!(
52+
!std::path::Path::new(concat!(env!("OUT_DIR"), "/config.cache")).exists(),
53+
"config.cache should have been removed from OUT_DIR"
54+
);
55+
}
56+
57+
#[test]
58+
fn dot_d_files_removed() {
59+
assert!(
60+
!std::path::Path::new(concat!(env!("OUT_DIR"), "/foo.d")).exists(),
61+
"foo.d should have been removed from OUT_DIR"
62+
);
63+
assert!(
64+
!std::path::Path::new(concat!(env!("OUT_DIR"), "/baz.d")).exists(),
65+
"baz.d should have been removed from OUT_DIR"
66+
);
67+
}
68+
69+
#[test]
70+
fn dot_pc_file_removed() {
71+
assert!(
72+
!std::path::Path::new(concat!(env!("OUT_DIR"), "/foo.pc")).exists(),
73+
"foo.pc should have been removed from OUT_DIR"
74+
);
75+
}

0 commit comments

Comments
 (0)