Skip to content

Commit 8495347

Browse files
committed
tmp: save it for now
1 parent 2e74a70 commit 8495347

8 files changed

Lines changed: 204 additions & 4 deletions

File tree

Cargo.lock

Lines changed: 78 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@ serde = { version = "^1.0.219", optional = true }
6767
serde_json = { version = "^1.0.140", optional = true }
6868
sysinfo = "^0.35.2"
6969

70+
[target.'cfg(unix)'.dependencies]
71+
dashmap = "^6.1.0"
72+
7073
[dev-dependencies]
7174
build-fs-tree = "^0.7.1"
7275
command-extra = "^1.0.0"

src/data_tree.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,6 @@ pub struct DataTree<Name, Size: size::Size> {
3030

3131
mod constructors;
3232
mod getters;
33+
mod hardlink;
3334
mod retain;
3435
mod sort;

src/data_tree/hardlink.rs

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
use super::DataTree;
2+
use crate::size;
3+
use assert_cmp::debug_assert_op;
4+
use rayon::prelude::*;
5+
use std::{ffi::OsStr, ops::Mul, path::Path};
6+
7+
impl<Name, Size> DataTree<Name, Size>
8+
where
9+
Self: Send,
10+
Name: AsRef<OsStr>,
11+
Size: size::Size + Mul<u64, Output = Size> + Sync,
12+
{
13+
/// Reduce the size of the directories that have hardlinks.
14+
pub fn par_deduplicate_hardlinks(&mut self, hardlink_info: &[(Size, Vec<&Path>)]) {
15+
if hardlink_info.is_empty() {
16+
return;
17+
}
18+
19+
let prefix = self.name().as_ref();
20+
let sub_hardlink_info: Vec<(Size, Vec<&Path>)> = hardlink_info
21+
.iter()
22+
.filter(|(_, link_paths)| link_paths.len() > 1)
23+
.map(|(size, link_paths)| {
24+
let link_suffices: Vec<&Path> = link_paths
25+
.iter()
26+
.map(|link_path| link_path.strip_prefix(prefix))
27+
.filter_map(Result::ok)
28+
.collect();
29+
(*size, link_suffices)
30+
})
31+
.filter(|(_, link_paths)| link_paths.len() > 1)
32+
.collect();
33+
34+
for (size, link_suffices) in &sub_hardlink_info {
35+
let number_of_links = link_suffices.len() as u64;
36+
debug_assert_op!(number_of_links > 1);
37+
self.size -= *size * (number_of_links - 1);
38+
}
39+
40+
self.children
41+
.par_iter_mut()
42+
.for_each(|child| child.par_deduplicate_hardlinks(&sub_hardlink_info))
43+
}
44+
}

src/hook.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
use std::{fs::Metadata, path::Path};
2+
3+
/// Argument to pass to [`Hook::run_hook`].
4+
#[derive(Debug, Clone, Copy)]
5+
pub struct HookArgument<'a, Size> {
6+
pub path: &'a Path,
7+
pub metadata: &'a Metadata,
8+
pub size: Size,
9+
}
10+
11+
/// Hook to run with a [`Path`] and its corresponding [`Metadata`].
12+
pub trait Hook<Size> {
13+
fn run_hook(&self, argument: HookArgument<Size>);
14+
}
15+
16+
/// A [hook](Hook) that does nothing.
17+
#[derive(Debug, Clone, Copy)]
18+
pub struct DoNothing;
19+
impl<Size> Hook<Size> for DoNothing {
20+
fn run_hook(&self, _: HookArgument<Size>) {}
21+
}
22+
23+
// `RecordHardlink` is POSIX-exclusive, because whilst Windows does have `MetadataExt::number_of_links`, it requires Nightly.
24+
#[cfg(unix)]
25+
mod record_hardlink;
26+
#[cfg(unix)]
27+
pub use record_hardlink::*;

src/hook/record_hardlink.rs

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
use super::{Hook, HookArgument};
2+
use dashmap::DashMap;
3+
use std::{fmt::Debug, os::unix::fs::MetadataExt, path::PathBuf};
4+
5+
/// Map an inode number to its size and detected paths.
6+
type RecordHardLinkStorage<Size> = DashMap<u64, (Size, Vec<PathBuf>)>; // TODO: benchmark against Mutex<HashMap<u64, (Size, Vec<PathBuf>)>>
7+
8+
/// A [hook](Hook) that record files with more than 1 links.
9+
#[derive(Debug, Clone, Copy)]
10+
pub struct RecordHardLink<'a, Size> {
11+
/// Map an inode number to its size and detected paths.
12+
storage: &'a RecordHardLinkStorage<Size>,
13+
}
14+
15+
impl<'a, Size> RecordHardLink<'a, Size> {
16+
/// Create a [hook](Hook) to record files with more than 1 links.
17+
pub fn new(storage: &'a RecordHardLinkStorage<Size>) -> Self {
18+
RecordHardLink { storage }
19+
}
20+
}
21+
22+
impl<'a, Size: Eq + Debug> Hook<Size> for RecordHardLink<'a, Size> {
23+
fn run_hook(&self, argument: HookArgument<Size>) {
24+
let HookArgument {
25+
path,
26+
metadata,
27+
size,
28+
} = argument;
29+
30+
if metadata.is_dir() || metadata.nlink() <= 1 {
31+
return;
32+
}
33+
34+
self.storage
35+
.entry(metadata.ino())
36+
.and_modify(|(expected_size, paths)| {
37+
assert_eq!(
38+
size, *expected_size,
39+
"same ino but different sizes: {size:?} vs {expected_size:?}",
40+
);
41+
paths.push(path.to_path_buf());
42+
})
43+
.or_insert_with(|| (size, Vec::new()));
44+
}
45+
}

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ pub mod bytes_format;
3333
pub mod data_tree;
3434
pub mod fs_tree_builder;
3535
pub mod get_size;
36+
pub mod hook;
3637
pub mod json_data;
3738
pub mod os_string_display;
3839
pub mod reporter;

src/size.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
use super::bytes_format::{self, BytesFormat};
2-
use derive_more::{Add, AddAssign, From, Into, Sum};
2+
use derive_more::{Add, AddAssign, From, Into, Sub, SubAssign, Sum};
33
use std::{
44
fmt::{Debug, Display},
55
iter::Sum,
6-
ops::{Add, AddAssign, Mul, MulAssign},
6+
ops::{Add, AddAssign, Mul, MulAssign, Sub, SubAssign},
77
};
88

99
#[cfg(feature = "json")]
@@ -21,6 +21,8 @@ pub trait Size:
2121
+ Ord
2222
+ Add<Output = Self>
2323
+ AddAssign
24+
+ Sub<Output = Self>
25+
+ SubAssign
2426
+ Sum
2527
{
2628
/// Underlying type
@@ -40,7 +42,7 @@ macro_rules! newtype {
4042
display: ($display_format:ty) -> $display_output:ty = $display_impl:expr;
4143
) => {
4244
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
43-
#[derive(From, Into, Add, AddAssign, Sum)]
45+
#[derive(From, Into, Add, AddAssign, Sub, SubAssign, Sum)]
4446
#[cfg_attr(feature = "json", derive(Deserialize, Serialize))]
4547
$(#[$attribute])*
4648
pub struct $name($inner);

0 commit comments

Comments
 (0)