We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 2e74a70 commit 8495347Copy full SHA for 8495347
8 files changed
Cargo.lock
Cargo.toml
@@ -67,6 +67,9 @@ serde = { version = "^1.0.219", optional = true }
67
serde_json = { version = "^1.0.140", optional = true }
68
sysinfo = "^0.35.2"
69
70
+[target.'cfg(unix)'.dependencies]
71
+dashmap = "^6.1.0"
72
+
73
[dev-dependencies]
74
build-fs-tree = "^0.7.1"
75
command-extra = "^1.0.0"
src/data_tree.rs
@@ -30,5 +30,6 @@ pub struct DataTree<Name, Size: size::Size> {
30
31
mod constructors;
32
mod getters;
33
+mod hardlink;
34
mod retain;
35
mod sort;
src/data_tree/hardlink.rs
@@ -0,0 +1,44 @@
1
+use super::DataTree;
2
+use crate::size;
3
+use assert_cmp::debug_assert_op;
4
+use rayon::prelude::*;
5
+use std::{ffi::OsStr, ops::Mul, path::Path};
6
7
+impl<Name, Size> DataTree<Name, Size>
8
+where
9
+ Self: Send,
10
+ Name: AsRef<OsStr>,
11
+ Size: size::Size + Mul<u64, Output = Size> + Sync,
12
+{
13
+ /// Reduce the size of the directories that have hardlinks.
14
+ pub fn par_deduplicate_hardlinks(&mut self, hardlink_info: &[(Size, Vec<&Path>)]) {
15
+ if hardlink_info.is_empty() {
16
+ return;
17
+ }
18
19
+ let prefix = self.name().as_ref();
20
+ let sub_hardlink_info: Vec<(Size, Vec<&Path>)> = hardlink_info
21
+ .iter()
22
+ .filter(|(_, link_paths)| link_paths.len() > 1)
23
+ .map(|(size, link_paths)| {
24
+ let link_suffices: Vec<&Path> = link_paths
25
26
+ .map(|link_path| link_path.strip_prefix(prefix))
27
+ .filter_map(Result::ok)
28
+ .collect();
29
+ (*size, link_suffices)
+ })
+ for (size, link_suffices) in &sub_hardlink_info {
+ let number_of_links = link_suffices.len() as u64;
36
+ debug_assert_op!(number_of_links > 1);
37
+ self.size -= *size * (number_of_links - 1);
38
39
40
+ self.children
41
+ .par_iter_mut()
42
+ .for_each(|child| child.par_deduplicate_hardlinks(&sub_hardlink_info))
43
44
+}
src/hook.rs
@@ -0,0 +1,27 @@
+use std::{fs::Metadata, path::Path};
+/// Argument to pass to [`Hook::run_hook`].
+#[derive(Debug, Clone, Copy)]
+pub struct HookArgument<'a, Size> {
+ pub path: &'a Path,
+ pub metadata: &'a Metadata,
+ pub size: Size,
+/// Hook to run with a [`Path`] and its corresponding [`Metadata`].
+pub trait Hook<Size> {
+ fn run_hook(&self, argument: HookArgument<Size>);
+/// A [hook](Hook) that does nothing.
+pub struct DoNothing;
+impl<Size> Hook<Size> for DoNothing {
+ fn run_hook(&self, _: HookArgument<Size>) {}
+// `RecordHardlink` is POSIX-exclusive, because whilst Windows does have `MetadataExt::number_of_links`, it requires Nightly.
+#[cfg(unix)]
+mod record_hardlink;
+pub use record_hardlink::*;
src/hook/record_hardlink.rs
@@ -0,0 +1,45 @@
+use super::{Hook, HookArgument};
+use dashmap::DashMap;
+use std::{fmt::Debug, os::unix::fs::MetadataExt, path::PathBuf};
+/// Map an inode number to its size and detected paths.
+type RecordHardLinkStorage<Size> = DashMap<u64, (Size, Vec<PathBuf>)>; // TODO: benchmark against Mutex<HashMap<u64, (Size, Vec<PathBuf>)>>
+/// A [hook](Hook) that record files with more than 1 links.
+pub struct RecordHardLink<'a, Size> {
+ /// Map an inode number to its size and detected paths.
+ storage: &'a RecordHardLinkStorage<Size>,
+impl<'a, Size> RecordHardLink<'a, Size> {
+ /// Create a [hook](Hook) to record files with more than 1 links.
+ pub fn new(storage: &'a RecordHardLinkStorage<Size>) -> Self {
+ RecordHardLink { storage }
+impl<'a, Size: Eq + Debug> Hook<Size> for RecordHardLink<'a, Size> {
+ fn run_hook(&self, argument: HookArgument<Size>) {
+ let HookArgument {
+ path,
+ metadata,
+ size,
+ } = argument;
+ if metadata.is_dir() || metadata.nlink() <= 1 {
+ self.storage
+ .entry(metadata.ino())
+ .and_modify(|(expected_size, paths)| {
+ assert_eq!(
+ size, *expected_size,
+ "same ino but different sizes: {size:?} vs {expected_size:?}",
+ );
+ paths.push(path.to_path_buf());
+ .or_insert_with(|| (size, Vec::new()));
45
src/lib.rs
@@ -33,6 +33,7 @@ pub mod bytes_format;
pub mod data_tree;
pub mod fs_tree_builder;
pub mod get_size;
+pub mod hook;
pub mod json_data;
pub mod os_string_display;
pub mod reporter;
src/size.rs
@@ -1,9 +1,9 @@
use super::bytes_format::{self, BytesFormat};
-use derive_more::{Add, AddAssign, From, Into, Sum};
+use derive_more::{Add, AddAssign, From, Into, Sub, SubAssign, Sum};
use std::{
fmt::{Debug, Display},
iter::Sum,
- ops::{Add, AddAssign, Mul, MulAssign},
+ ops::{Add, AddAssign, Mul, MulAssign, Sub, SubAssign},
};
#[cfg(feature = "json")]
@@ -21,6 +21,8 @@ pub trait Size:
+ Ord
+ Add<Output = Self>
+ AddAssign
+ + Sub<Output = Self>
+ + SubAssign
+ Sum
{
/// Underlying type
@@ -40,7 +42,7 @@ macro_rules! newtype {
display: ($display_format:ty) -> $display_output:ty = $display_impl:expr;
) => {
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
- #[derive(From, Into, Add, AddAssign, Sum)]
+ #[derive(From, Into, Add, AddAssign, Sub, SubAssign, Sum)]
46
#[cfg_attr(feature = "json", derive(Deserialize, Serialize))]
47
$(#[$attribute])*
48
pub struct $name($inner);
0 commit comments