diff --git a/cargo/private/cargo_build_script.bzl b/cargo/private/cargo_build_script.bzl index 4023621836..12dabbcd10 100644 --- a/cargo/private/cargo_build_script.bzl +++ b/cargo/private/cargo_build_script.bzl @@ -623,6 +623,10 @@ def _cargo_build_script_impl(ctx): if experimental_symlink_execroot: env["RULES_RUST_SYMLINK_EXEC_ROOT"] = "1" + out_dir_volatile_basenames = ctx.attr._out_dir_volatile_file_basenames[BuildSettingInfo].value + if out_dir_volatile_basenames: + env["RULES_RUST_OUT_DIR_VOLATILE_BASENAMES"] = ":".join(out_dir_volatile_basenames) + ctx.actions.run( executable = ctx.executable._cargo_build_script_runner, arguments = [args, runfiles_args], @@ -796,6 +800,9 @@ cargo_build_script = rule( allow_files = True, default = Label("//cargo/private:no_cxx"), ), + "_out_dir_volatile_file_basenames": attr.label( + default = Label("//cargo/settings:out_dir_volatile_file_basenames"), + ), }, fragments = ["cpp"], toolchains = [ diff --git a/cargo/private/cargo_build_script_runner/bin.rs b/cargo/private/cargo_build_script_runner/bin.rs index 48341b102c..8df5376cf7 100644 --- a/cargo/private/cargo_build_script_runner/bin.rs +++ b/cargo/private/cargo_build_script_runner/bin.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; use std::env; -use std::fs::{create_dir_all, read_to_string, write}; +use std::fs::{create_dir_all, read_dir, read_to_string, remove_file, write}; use std::path::{Path, PathBuf}; use std::process::Command; @@ -234,6 +234,13 @@ fn run_buildrs() -> Result<(), String> { .drain_runfiles_dir(&out_dir_abs) .unwrap(); + // Remove non-deterministic configure-generated files from OUT_DIR before + // Bazel captures it as a TreeArtifact. Files like config.log and + // Makefile.config embed the Bazel sandbox path (which changes on every + // action run), making the TreeArtifact hash non-deterministic and causing + // cache misses for all downstream rustc compilations. + remove_nondeterministic_out_dir_files(&out_dir_abs); + // If out_dir is empty add an empty file to the directory to avoid an upstream Bazel bug // https://github.com/bazelbuild/bazel/issues/28286 if out_dir_abs.read_dir().map(|read| read.count()).unwrap_or(0) == 0 { @@ -256,6 +263,45 @@ fn run_buildrs() -> Result<(), String> { Ok(()) } +/// Recursively walk `dir` and delete any file whose basename appears in +/// `RULES_RUST_OUT_DIR_VOLATILE_BASENAMES` (colon-separated, set by the +/// `//cargo/settings:out_dir_volatile_file_basenames` flag) or has a `.d` or +/// `.pc` extension. Errors are silently ignored: if a file cannot be removed +/// the worst outcome is a cache miss, not a build failure. +fn remove_nondeterministic_out_dir_files(dir: &Path) { + let volatile_basenames: Vec = env::var("RULES_RUST_OUT_DIR_VOLATILE_BASENAMES") + .map(|v| v.split(':').map(String::from).collect()) + .unwrap_or_default(); + remove_nondeterministic_out_dir_files_with_list(dir, &volatile_basenames); +} + +fn remove_nondeterministic_out_dir_files_with_list(dir: &Path, volatile_basenames: &[String]) { + let entries = match read_dir(dir) { + Ok(e) => e, + Err(_) => return, + }; + for entry in entries.flatten() { + // Use file_type() which does not follow symlinks, so we never recurse + // into symlink targets or traverse outside OUT_DIR. + let Ok(file_type) = entry.file_type() else { + continue; + }; + let path = entry.path(); + if file_type.is_dir() { + remove_nondeterministic_out_dir_files_with_list(&path, volatile_basenames); + } else if file_type.is_file() { + if let Some(name) = path.file_name().and_then(|n| n.to_str()) { + if volatile_basenames.iter().any(|b| b == name) + || name.ends_with(".d") + || name.ends_with(".pc") + { + let _ = remove_file(&path); + } + } + } + } +} + fn should_symlink_exec_root() -> bool { env::var("RULES_RUST_SYMLINK_EXEC_ROOT") .map(|s| s == "1") @@ -446,6 +492,137 @@ fn main() { #[cfg(test)] mod test { use super::*; + use std::fs::{create_dir_all, write}; + + fn make_temp_dir(label: &str) -> PathBuf { + let nanos = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .subsec_nanos(); + let dir = std::env::temp_dir().join(format!("rules_rust_bin_test_{}_{}", label, nanos)); + create_dir_all(&dir).unwrap(); + dir + } + + fn basenames(names: &[&str]) -> Vec { + names.iter().map(|s| s.to_string()).collect() + } + + #[test] + fn remove_nondeterministic_named_files() { + let names = &["config.log", "config.status", "Makefile", "commit_hash"]; + let dir = make_temp_dir("named"); + for name in names { + write(dir.join(name), "content").unwrap(); + } + write(dir.join("libfoo.a"), "keep").unwrap(); + + remove_nondeterministic_out_dir_files_with_list(&dir, &basenames(names)); + + for name in names { + assert!( + !dir.join(name).exists(), + "{} should have been removed", + name + ); + } + assert!(dir.join("libfoo.a").exists(), "libfoo.a should be kept"); + std::fs::remove_dir_all(&dir).ok(); + } + + #[test] + fn remove_dot_d_and_pc_files() { + let dir = make_temp_dir("dotd"); + write(dir.join("foo.d"), "deps").unwrap(); + write(dir.join("bar.d"), "deps").unwrap(); + write(dir.join("jemalloc.pc"), "prefix=/sandbox/out").unwrap(); + write(dir.join("output.o"), "keep").unwrap(); + + remove_nondeterministic_out_dir_files_with_list(&dir, &[]); + + assert!(!dir.join("foo.d").exists(), "foo.d should be removed"); + assert!(!dir.join("bar.d").exists(), "bar.d should be removed"); + assert!( + !dir.join("jemalloc.pc").exists(), + "jemalloc.pc should be removed" + ); + assert!(dir.join("output.o").exists(), "output.o should be kept"); + std::fs::remove_dir_all(&dir).ok(); + } + + #[test] + fn remove_nondeterministic_files_recursively() { + let dir = make_temp_dir("recurse"); + let sub = dir.join("subdir"); + create_dir_all(&sub).unwrap(); + write(sub.join("config.log"), "log").unwrap(); + write(sub.join("foo.d"), "deps").unwrap(); + write(sub.join("output.o"), "keep").unwrap(); + write(dir.join("Makefile"), "top-level").unwrap(); + + remove_nondeterministic_out_dir_files_with_list( + &dir, + &basenames(&["config.log", "Makefile"]), + ); + + assert!(!sub.join("config.log").exists()); + assert!(!sub.join("foo.d").exists()); + assert!(sub.join("output.o").exists()); + assert!(!dir.join("Makefile").exists()); + std::fs::remove_dir_all(&dir).ok(); + } + + #[test] + fn remove_nondeterministic_nonexistent_dir_is_noop() { + let dir = std::env::temp_dir().join("rules_rust_bin_test_nonexistent_999999999"); + // Must not panic. + remove_nondeterministic_out_dir_files_with_list(&dir, &[]); + } + + #[test] + fn remove_nondeterministic_custom_basenames() { + let dir = make_temp_dir("custom"); + write(dir.join("custom_volatile.txt"), "bad").unwrap(); + write(dir.join("config.log"), "keep_this").unwrap(); + + remove_nondeterministic_out_dir_files_with_list(&dir, &basenames(&["custom_volatile.txt"])); + + assert!( + !dir.join("custom_volatile.txt").exists(), + "custom file should be removed" + ); + assert!( + dir.join("config.log").exists(), + "config.log should be kept with custom list" + ); + std::fs::remove_dir_all(&dir).ok(); + } + + #[test] + fn remove_nondeterministic_env_var_override() { + let dir = make_temp_dir("envvar"); + write(dir.join("config.log"), "would be removed by default").unwrap(); + write(dir.join("only_this.txt"), "should be removed").unwrap(); + + // Override via env var: only "only_this.txt" is volatile; config.log should survive. + let prev = std::env::var("RULES_RUST_OUT_DIR_VOLATILE_BASENAMES").ok(); + std::env::set_var("RULES_RUST_OUT_DIR_VOLATILE_BASENAMES", "only_this.txt"); + remove_nondeterministic_out_dir_files(&dir); + match prev { + Some(v) => std::env::set_var("RULES_RUST_OUT_DIR_VOLATILE_BASENAMES", v), + None => std::env::remove_var("RULES_RUST_OUT_DIR_VOLATILE_BASENAMES"), + } + + assert!( + !dir.join("only_this.txt").exists(), + "only_this.txt should be removed" + ); + assert!( + dir.join("config.log").exists(), + "config.log should survive when not in env var list" + ); + std::fs::remove_dir_all(&dir).ok(); + } #[test] fn rustc_cfg_parsing() { diff --git a/cargo/settings/BUILD.bazel b/cargo/settings/BUILD.bazel index db03d83588..1eae294cde 100644 --- a/cargo/settings/BUILD.bazel +++ b/cargo/settings/BUILD.bazel @@ -4,6 +4,7 @@ load( "cargo_manifest_dir_filename_suffixes_to_retain", "debug_std_streams_output_group", "experimental_symlink_execroot", + "out_dir_volatile_file_basenames", "use_default_shell_env", ) @@ -27,3 +28,5 @@ debug_std_streams_output_group() experimental_symlink_execroot() use_default_shell_env() + +out_dir_volatile_file_basenames() diff --git a/cargo/settings/settings.bzl b/cargo/settings/settings.bzl index c37c114484..a00147c6e6 100644 --- a/cargo/settings/settings.bzl +++ b/cargo/settings/settings.bzl @@ -42,3 +42,25 @@ def use_default_shell_env(): name = "use_default_shell_env", build_setting_default = True, ) + +def out_dir_volatile_file_basenames(): + """A flag which determines what file basenames are removed from `OUT_DIR` by `cargo_build_script` actions to make the `_bs.out_dir` TreeArtifact deterministic. + + Files whose names appear in this list, as well as files with a `.d` or `.pc` + extension, are deleted from `OUT_DIR` after the build script runs and before Bazel + captures the directory. Files like `config.log` and `Makefile` embed the Bazel + sandbox path, so their content changes on every action invocation, causing cache + misses for all downstream `rustc` compilations. + """ + string_list_flag( + name = "out_dir_volatile_file_basenames", + build_setting_default = [ + "config.log", + "config.log.old", + "config.status", + "Makefile", + "Makefile.config", + "config.cache", + "commit_hash", + ], + ) diff --git a/cargo/tests/cargo_build_script/nondeterministic_out_dir/BUILD.bazel b/cargo/tests/cargo_build_script/nondeterministic_out_dir/BUILD.bazel new file mode 100644 index 0000000000..36dd9d5581 --- /dev/null +++ b/cargo/tests/cargo_build_script/nondeterministic_out_dir/BUILD.bazel @@ -0,0 +1,15 @@ +load("//cargo:defs.bzl", "cargo_build_script") +load("//rust:defs.bzl", "rust_test") + +cargo_build_script( + name = "build_script", + srcs = ["build.rs"], + edition = "2021", +) + +rust_test( + name = "test", + srcs = ["test.rs"], + edition = "2021", + deps = [":build_script"], +) diff --git a/cargo/tests/cargo_build_script/nondeterministic_out_dir/build.rs b/cargo/tests/cargo_build_script/nondeterministic_out_dir/build.rs new file mode 100644 index 0000000000..2c87340258 --- /dev/null +++ b/cargo/tests/cargo_build_script/nondeterministic_out_dir/build.rs @@ -0,0 +1,34 @@ +use std::path::PathBuf; + +fn main() { + let out_dir = PathBuf::from(std::env::var("OUT_DIR").unwrap()); + + // Simulate files written by autoconf/cmake/mklove that embed sandbox-specific + // paths and must be stripped by the build script runner before Bazel captures + // OUT_DIR as a TreeArtifact. + std::fs::write( + out_dir.join("config.log"), + "configure log with /sandbox/path", + ) + .unwrap(); + std::fs::write(out_dir.join("config.status"), "configure status").unwrap(); + std::fs::write(out_dir.join("Makefile"), "all:\n\t@echo sandbox path here").unwrap(); + std::fs::write(out_dir.join("Makefile.config"), "CFLAGS=-I/sandbox/include").unwrap(); + std::fs::write( + out_dir.join("config.cache"), + "# generated at Mon Jan 1 00:00:00 UTC 2024", + ) + .unwrap(); + std::fs::write(out_dir.join("foo.d"), "foo.o: foo.c /sandbox/include/bar.h").unwrap(); + std::fs::write(out_dir.join("baz.d"), "baz.o: baz.c /sandbox/include/qux.h").unwrap(); + std::fs::write( + out_dir.join("foo.pc"), + "prefix=/sandbox/out\nexec_prefix=${prefix}", + ) + .unwrap(); + + // Write a legitimate output that downstream consumers must be able to read. + std::fs::write(out_dir.join("output.txt"), "legitimate output").unwrap(); + + println!("cargo:rerun-if-changed=build.rs"); +} diff --git a/cargo/tests/cargo_build_script/nondeterministic_out_dir/test.rs b/cargo/tests/cargo_build_script/nondeterministic_out_dir/test.rs new file mode 100644 index 0000000000..be0069fc88 --- /dev/null +++ b/cargo/tests/cargo_build_script/nondeterministic_out_dir/test.rs @@ -0,0 +1,75 @@ +//! include_str! resolves at compile time against the TreeArtifact captured by Bazel. +//! If the runner failed to strip config.log / *.d / *.pc files, the TreeArtifact hash +//! would change on every run, causing unnecessary rebuilds for all downstream crates. + +const OUTPUT: &str = include_str!(concat!(env!("OUT_DIR"), "/output.txt")); + +#[test] +fn legitimate_output_survives_nondeterministic_file_removal() { + assert_eq!(OUTPUT, "legitimate output"); +} + +// Verify that volatile files written by the build script are absent from the +// captured OUT_DIR TreeArtifact. The build script wrote each of these; the +// cargo_build_script_runner must have removed them before Bazel snapshotted +// the directory. + +#[test] +fn config_log_removed() { + assert!( + !std::path::Path::new(concat!(env!("OUT_DIR"), "/config.log")).exists(), + "config.log should have been removed from OUT_DIR" + ); +} + +#[test] +fn config_status_removed() { + assert!( + !std::path::Path::new(concat!(env!("OUT_DIR"), "/config.status")).exists(), + "config.status should have been removed from OUT_DIR" + ); +} + +#[test] +fn makefile_removed() { + assert!( + !std::path::Path::new(concat!(env!("OUT_DIR"), "/Makefile")).exists(), + "Makefile should have been removed from OUT_DIR" + ); +} + +#[test] +fn makefile_config_removed() { + assert!( + !std::path::Path::new(concat!(env!("OUT_DIR"), "/Makefile.config")).exists(), + "Makefile.config should have been removed from OUT_DIR" + ); +} + +#[test] +fn config_cache_removed() { + assert!( + !std::path::Path::new(concat!(env!("OUT_DIR"), "/config.cache")).exists(), + "config.cache should have been removed from OUT_DIR" + ); +} + +#[test] +fn dot_d_files_removed() { + assert!( + !std::path::Path::new(concat!(env!("OUT_DIR"), "/foo.d")).exists(), + "foo.d should have been removed from OUT_DIR" + ); + assert!( + !std::path::Path::new(concat!(env!("OUT_DIR"), "/baz.d")).exists(), + "baz.d should have been removed from OUT_DIR" + ); +} + +#[test] +fn dot_pc_file_removed() { + assert!( + !std::path::Path::new(concat!(env!("OUT_DIR"), "/foo.pc")).exists(), + "foo.pc should have been removed from OUT_DIR" + ); +}