Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions cargo/private/cargo_build_script.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -623,6 +623,10 @@ def _cargo_build_script_impl(ctx):
if experimental_symlink_execroot:
env["RULES_RUST_SYMLINK_EXEC_ROOT"] = "1"

out_dir_volatile_basenames = ctx.attr._out_dir_volatile_file_basenames[BuildSettingInfo].value
if out_dir_volatile_basenames:
env["RULES_RUST_OUT_DIR_VOLATILE_BASENAMES"] = ":".join(out_dir_volatile_basenames)

ctx.actions.run(
executable = ctx.executable._cargo_build_script_runner,
arguments = [args, runfiles_args],
Expand Down Expand Up @@ -796,6 +800,9 @@ cargo_build_script = rule(
allow_files = True,
default = Label("//cargo/private:no_cxx"),
),
"_out_dir_volatile_file_basenames": attr.label(
default = Label("//cargo/settings:out_dir_volatile_file_basenames"),
),
},
fragments = ["cpp"],
toolchains = [
Expand Down
179 changes: 178 additions & 1 deletion cargo/private/cargo_build_script_runner/bin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

use std::collections::BTreeMap;
use std::env;
use std::fs::{create_dir_all, read_to_string, write};
use std::fs::{create_dir_all, read_dir, read_to_string, remove_file, write};
use std::path::{Path, PathBuf};
use std::process::Command;

Expand Down Expand Up @@ -234,6 +234,13 @@ fn run_buildrs() -> Result<(), String> {
.drain_runfiles_dir(&out_dir_abs)
.unwrap();

// Remove non-deterministic configure-generated files from OUT_DIR before
// Bazel captures it as a TreeArtifact. Files like config.log and
// Makefile.config embed the Bazel sandbox path (which changes on every
// action run), making the TreeArtifact hash non-deterministic and causing
// cache misses for all downstream rustc compilations.
remove_nondeterministic_out_dir_files(&out_dir_abs);

// If out_dir is empty add an empty file to the directory to avoid an upstream Bazel bug
// https://github.com/bazelbuild/bazel/issues/28286
if out_dir_abs.read_dir().map(|read| read.count()).unwrap_or(0) == 0 {
Expand All @@ -256,6 +263,45 @@ fn run_buildrs() -> Result<(), String> {
Ok(())
}

/// Recursively walk `dir` and delete any file whose basename appears in
/// `RULES_RUST_OUT_DIR_VOLATILE_BASENAMES` (colon-separated, set by the
/// `//cargo/settings:out_dir_volatile_file_basenames` flag) or has a `.d` or
/// `.pc` extension. Errors are silently ignored: if a file cannot be removed
/// the worst outcome is a cache miss, not a build failure.
fn remove_nondeterministic_out_dir_files(dir: &Path) {
let volatile_basenames: Vec<String> = env::var("RULES_RUST_OUT_DIR_VOLATILE_BASENAMES")
.map(|v| v.split(':').map(String::from).collect())
.unwrap_or_default();
remove_nondeterministic_out_dir_files_with_list(dir, &volatile_basenames);
}

fn remove_nondeterministic_out_dir_files_with_list(dir: &Path, volatile_basenames: &[String]) {
let entries = match read_dir(dir) {
Ok(e) => e,
Err(_) => return,
};
for entry in entries.flatten() {
// Use file_type() which does not follow symlinks, so we never recurse
// into symlink targets or traverse outside OUT_DIR.
let Ok(file_type) = entry.file_type() else {
continue;
};
let path = entry.path();
if file_type.is_dir() {
remove_nondeterministic_out_dir_files_with_list(&path, volatile_basenames);
} else if file_type.is_file() {
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
if volatile_basenames.iter().any(|b| b == name)
|| name.ends_with(".d")
|| name.ends_with(".pc")
{
let _ = remove_file(&path);
}
}
}
}
}

fn should_symlink_exec_root() -> bool {
env::var("RULES_RUST_SYMLINK_EXEC_ROOT")
.map(|s| s == "1")
Expand Down Expand Up @@ -446,6 +492,137 @@ fn main() {
#[cfg(test)]
mod test {
use super::*;
use std::fs::{create_dir_all, write};

fn make_temp_dir(label: &str) -> PathBuf {
let nanos = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.subsec_nanos();
let dir = std::env::temp_dir().join(format!("rules_rust_bin_test_{}_{}", label, nanos));
create_dir_all(&dir).unwrap();
dir
}

fn basenames(names: &[&str]) -> Vec<String> {
names.iter().map(|s| s.to_string()).collect()
}

#[test]
fn remove_nondeterministic_named_files() {
let names = &["config.log", "config.status", "Makefile", "commit_hash"];
let dir = make_temp_dir("named");
for name in names {
write(dir.join(name), "content").unwrap();
}
write(dir.join("libfoo.a"), "keep").unwrap();

remove_nondeterministic_out_dir_files_with_list(&dir, &basenames(names));

for name in names {
assert!(
!dir.join(name).exists(),
"{} should have been removed",
name
);
}
assert!(dir.join("libfoo.a").exists(), "libfoo.a should be kept");
std::fs::remove_dir_all(&dir).ok();
}

#[test]
fn remove_dot_d_and_pc_files() {
let dir = make_temp_dir("dotd");
write(dir.join("foo.d"), "deps").unwrap();
write(dir.join("bar.d"), "deps").unwrap();
write(dir.join("jemalloc.pc"), "prefix=/sandbox/out").unwrap();
write(dir.join("output.o"), "keep").unwrap();

remove_nondeterministic_out_dir_files_with_list(&dir, &[]);

assert!(!dir.join("foo.d").exists(), "foo.d should be removed");
assert!(!dir.join("bar.d").exists(), "bar.d should be removed");
assert!(
!dir.join("jemalloc.pc").exists(),
"jemalloc.pc should be removed"
);
assert!(dir.join("output.o").exists(), "output.o should be kept");
std::fs::remove_dir_all(&dir).ok();
}

#[test]
fn remove_nondeterministic_files_recursively() {
let dir = make_temp_dir("recurse");
let sub = dir.join("subdir");
create_dir_all(&sub).unwrap();
write(sub.join("config.log"), "log").unwrap();
write(sub.join("foo.d"), "deps").unwrap();
write(sub.join("output.o"), "keep").unwrap();
write(dir.join("Makefile"), "top-level").unwrap();

remove_nondeterministic_out_dir_files_with_list(
&dir,
&basenames(&["config.log", "Makefile"]),
);

assert!(!sub.join("config.log").exists());
assert!(!sub.join("foo.d").exists());
assert!(sub.join("output.o").exists());
assert!(!dir.join("Makefile").exists());
std::fs::remove_dir_all(&dir).ok();
}

#[test]
fn remove_nondeterministic_nonexistent_dir_is_noop() {
let dir = std::env::temp_dir().join("rules_rust_bin_test_nonexistent_999999999");
// Must not panic.
remove_nondeterministic_out_dir_files_with_list(&dir, &[]);
}

#[test]
fn remove_nondeterministic_custom_basenames() {
let dir = make_temp_dir("custom");
write(dir.join("custom_volatile.txt"), "bad").unwrap();
write(dir.join("config.log"), "keep_this").unwrap();

remove_nondeterministic_out_dir_files_with_list(&dir, &basenames(&["custom_volatile.txt"]));

assert!(
!dir.join("custom_volatile.txt").exists(),
"custom file should be removed"
);
assert!(
dir.join("config.log").exists(),
"config.log should be kept with custom list"
);
std::fs::remove_dir_all(&dir).ok();
}

#[test]
fn remove_nondeterministic_env_var_override() {
let dir = make_temp_dir("envvar");
write(dir.join("config.log"), "would be removed by default").unwrap();
write(dir.join("only_this.txt"), "should be removed").unwrap();

// Override via env var: only "only_this.txt" is volatile; config.log should survive.
let prev = std::env::var("RULES_RUST_OUT_DIR_VOLATILE_BASENAMES").ok();
std::env::set_var("RULES_RUST_OUT_DIR_VOLATILE_BASENAMES", "only_this.txt");
remove_nondeterministic_out_dir_files(&dir);
match prev {
Some(v) => std::env::set_var("RULES_RUST_OUT_DIR_VOLATILE_BASENAMES", v),
None => std::env::remove_var("RULES_RUST_OUT_DIR_VOLATILE_BASENAMES"),
}

assert!(
!dir.join("only_this.txt").exists(),
"only_this.txt should be removed"
);
assert!(
dir.join("config.log").exists(),
"config.log should survive when not in env var list"
);
std::fs::remove_dir_all(&dir).ok();
}

#[test]
fn rustc_cfg_parsing() {
Expand Down
3 changes: 3 additions & 0 deletions cargo/settings/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ load(
"cargo_manifest_dir_filename_suffixes_to_retain",
"debug_std_streams_output_group",
"experimental_symlink_execroot",
"out_dir_volatile_file_basenames",
"use_default_shell_env",
)

Expand All @@ -27,3 +28,5 @@ debug_std_streams_output_group()
experimental_symlink_execroot()

use_default_shell_env()

out_dir_volatile_file_basenames()
22 changes: 22 additions & 0 deletions cargo/settings/settings.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,25 @@ def use_default_shell_env():
name = "use_default_shell_env",
build_setting_default = True,
)

def out_dir_volatile_file_basenames():
"""A flag which determines what file basenames are removed from `OUT_DIR` by `cargo_build_script` actions to make the `_bs.out_dir` TreeArtifact deterministic.

Files whose names appear in this list, as well as files with a `.d` or `.pc`
extension, are deleted from `OUT_DIR` after the build script runs and before Bazel
captures the directory. Files like `config.log` and `Makefile` embed the Bazel
sandbox path, so their content changes on every action invocation, causing cache
misses for all downstream `rustc` compilations.
"""
string_list_flag(
name = "out_dir_volatile_file_basenames",
build_setting_default = [
"config.log",
"config.log.old",
"config.status",
"Makefile",
"Makefile.config",
"config.cache",
"commit_hash",
],
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
load("//cargo:defs.bzl", "cargo_build_script")
load("//rust:defs.bzl", "rust_test")

cargo_build_script(
name = "build_script",
srcs = ["build.rs"],
edition = "2021",
)

rust_test(
name = "test",
srcs = ["test.rs"],
edition = "2021",
deps = [":build_script"],
)
34 changes: 34 additions & 0 deletions cargo/tests/cargo_build_script/nondeterministic_out_dir/build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
use std::path::PathBuf;

fn main() {
let out_dir = PathBuf::from(std::env::var("OUT_DIR").unwrap());

// Simulate files written by autoconf/cmake/mklove that embed sandbox-specific
// paths and must be stripped by the build script runner before Bazel captures
// OUT_DIR as a TreeArtifact.
std::fs::write(
out_dir.join("config.log"),
"configure log with /sandbox/path",
)
.unwrap();
std::fs::write(out_dir.join("config.status"), "configure status").unwrap();
std::fs::write(out_dir.join("Makefile"), "all:\n\t@echo sandbox path here").unwrap();
std::fs::write(out_dir.join("Makefile.config"), "CFLAGS=-I/sandbox/include").unwrap();
std::fs::write(
out_dir.join("config.cache"),
"# generated at Mon Jan 1 00:00:00 UTC 2024",
)
.unwrap();
std::fs::write(out_dir.join("foo.d"), "foo.o: foo.c /sandbox/include/bar.h").unwrap();
std::fs::write(out_dir.join("baz.d"), "baz.o: baz.c /sandbox/include/qux.h").unwrap();
std::fs::write(
out_dir.join("foo.pc"),
"prefix=/sandbox/out\nexec_prefix=${prefix}",
)
.unwrap();

// Write a legitimate output that downstream consumers must be able to read.
std::fs::write(out_dir.join("output.txt"), "legitimate output").unwrap();

println!("cargo:rerun-if-changed=build.rs");
}
75 changes: 75 additions & 0 deletions cargo/tests/cargo_build_script/nondeterministic_out_dir/test.rs
Comment thread
jsharpe marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
//! include_str! resolves at compile time against the TreeArtifact captured by Bazel.
//! If the runner failed to strip config.log / *.d / *.pc files, the TreeArtifact hash
//! would change on every run, causing unnecessary rebuilds for all downstream crates.

const OUTPUT: &str = include_str!(concat!(env!("OUT_DIR"), "/output.txt"));

#[test]
fn legitimate_output_survives_nondeterministic_file_removal() {
assert_eq!(OUTPUT, "legitimate output");
}

// Verify that volatile files written by the build script are absent from the
// captured OUT_DIR TreeArtifact. The build script wrote each of these; the
// cargo_build_script_runner must have removed them before Bazel snapshotted
// the directory.

#[test]
fn config_log_removed() {
assert!(
!std::path::Path::new(concat!(env!("OUT_DIR"), "/config.log")).exists(),
"config.log should have been removed from OUT_DIR"
);
}

#[test]
fn config_status_removed() {
assert!(
!std::path::Path::new(concat!(env!("OUT_DIR"), "/config.status")).exists(),
"config.status should have been removed from OUT_DIR"
);
}

#[test]
fn makefile_removed() {
assert!(
!std::path::Path::new(concat!(env!("OUT_DIR"), "/Makefile")).exists(),
"Makefile should have been removed from OUT_DIR"
);
}

#[test]
fn makefile_config_removed() {
assert!(
!std::path::Path::new(concat!(env!("OUT_DIR"), "/Makefile.config")).exists(),
"Makefile.config should have been removed from OUT_DIR"
);
}

#[test]
fn config_cache_removed() {
assert!(
!std::path::Path::new(concat!(env!("OUT_DIR"), "/config.cache")).exists(),
"config.cache should have been removed from OUT_DIR"
);
}

#[test]
fn dot_d_files_removed() {
assert!(
!std::path::Path::new(concat!(env!("OUT_DIR"), "/foo.d")).exists(),
"foo.d should have been removed from OUT_DIR"
);
assert!(
!std::path::Path::new(concat!(env!("OUT_DIR"), "/baz.d")).exists(),
"baz.d should have been removed from OUT_DIR"
);
}

#[test]
fn dot_pc_file_removed() {
assert!(
!std::path::Path::new(concat!(env!("OUT_DIR"), "/foo.pc")).exists(),
"foo.pc should have been removed from OUT_DIR"
);
}
Loading