Skip to content

Commit ed6ad76

Browse files
committed
feat!: deduplicate hardlinks
1 parent 9b71420 commit ed6ad76

5 files changed

Lines changed: 81 additions & 26 deletions

File tree

src/app.rs

Lines changed: 74 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -180,15 +180,54 @@ impl App {
180180
}
181181
}
182182

183+
trait HardlinkDeduplicationSystem<const DEDUPLICATE_HARDLINKS: bool>: GetSizeUtils {
184+
type Hook: hook::Hook<Self::Size> + sub::DeduplicateHardlinkSizes<Self::Size>;
185+
fn create_hook(
186+
record: <Self::Hook as sub::DeduplicateHardlinkSizes<Self::Size>>::HardlinkRecord,
187+
) -> Self::Hook;
188+
fn init_hardlink_record(
189+
) -> <Self::Hook as sub::DeduplicateHardlinkSizes<Self::Size>>::HardlinkRecord;
190+
}
191+
192+
impl<SizeGetter> HardlinkDeduplicationSystem<false> for SizeGetter
193+
where
194+
SizeGetter: GetSizeUtils,
195+
SizeGetter::Size: Send + Sync,
196+
{
197+
type Hook = hook::DoNothing;
198+
fn create_hook((): ()) -> Self::Hook {
199+
hook::DoNothing
200+
}
201+
fn init_hardlink_record() {}
202+
}
203+
204+
#[cfg(unix)]
205+
impl<SizeGetter> HardlinkDeduplicationSystem<true> for SizeGetter
206+
where
207+
SizeGetter: GetSizeUtils,
208+
SizeGetter::Size: Send + Sync + 'static,
209+
{
210+
type Hook = hook::RecordHardLink<'static, Self::Size>;
211+
fn create_hook(record: &'static hook::RecordHardLinkStorage<Self::Size>) -> Self::Hook {
212+
hook::RecordHardLink::new(record)
213+
}
214+
fn init_hardlink_record() -> &'static hook::RecordHardLinkStorage<Self::Size> {
215+
hook::RecordHardLinkStorage::new()
216+
.pipe(Box::new)
217+
.pipe(Box::leak)
218+
}
219+
}
220+
183221
macro_rules! run {
184222
($(
185223
$(#[$variant_attrs:meta])*
186-
$size_getter:ident, $progress:literal;
224+
$size_getter:ident, $progress:literal, $deduplicate_hardlinks:ident;
187225
)*) => { match self.args {$(
188226
$(#[$variant_attrs])*
189227
Args {
190228
quantity: <$size_getter as GetSizeUtils>::QUANTITY,
191229
progress: $progress,
230+
#[cfg(unix)] deduplicate_hardlinks: $deduplicate_hardlinks,
192231
files,
193232
json_output,
194233
bytes_format,
@@ -198,32 +237,44 @@ impl App {
198237
min_ratio,
199238
no_sort,
200239
..
201-
} => Sub {
202-
direction: Direction::from_top_down(top_down),
203-
bar_alignment: BarAlignment::from_align_right(align_right),
204-
size_getter: <$size_getter as GetSizeUtils>::INSTANCE,
205-
hook: hook::DoNothing, // TODO: change this
206-
hardlink_record: (), // TODO: change this
207-
reporter: <$size_getter as CreateReporter<$progress>>::create_reporter(report_error),
208-
bytes_format: <$size_getter as GetSizeUtils>::formatter(bytes_format),
209-
files,
210-
json_output,
211-
column_width_distribution,
212-
max_depth,
213-
min_ratio,
214-
no_sort,
215-
}
216-
.run(),
240+
} => {
241+
const DEDUPLICATE_HARDLINKS: bool = cfg!(unix) && $deduplicate_hardlinks;
242+
let hardlink_record = <$size_getter as HardlinkDeduplicationSystem<DEDUPLICATE_HARDLINKS>>::init_hardlink_record();
243+
let hook = <$size_getter as HardlinkDeduplicationSystem<DEDUPLICATE_HARDLINKS>>::create_hook(hardlink_record);
244+
245+
Sub {
246+
direction: Direction::from_top_down(top_down),
247+
bar_alignment: BarAlignment::from_align_right(align_right),
248+
size_getter: <$size_getter as GetSizeUtils>::INSTANCE,
249+
hook,
250+
hardlink_record,
251+
reporter: <$size_getter as CreateReporter<$progress>>::create_reporter(report_error),
252+
bytes_format: <$size_getter as GetSizeUtils>::formatter(bytes_format),
253+
files,
254+
json_output,
255+
column_width_distribution,
256+
max_depth,
257+
min_ratio,
258+
no_sort,
259+
}
260+
.run()
261+
},
217262
)*} };
218263
}
219264

220265
run! {
221-
GetApparentSize, false;
222-
GetApparentSize, true;
223-
#[cfg(unix)] GetBlockSize, false;
224-
#[cfg(unix)] GetBlockSize, true;
225-
#[cfg(unix)] GetBlockCount, false;
226-
#[cfg(unix)] GetBlockCount, true;
266+
GetApparentSize, false, false;
267+
GetApparentSize, true, false;
268+
#[cfg(unix)] GetBlockSize, false, false;
269+
#[cfg(unix)] GetBlockSize, true, false;
270+
#[cfg(unix)] GetBlockCount, false, false;
271+
#[cfg(unix)] GetBlockCount, true, false;
272+
#[cfg(unix)] GetApparentSize, false, true;
273+
#[cfg(unix)] GetApparentSize, true, true;
274+
#[cfg(unix)] GetBlockSize, false, true;
275+
#[cfg(unix)] GetBlockSize, true, true;
276+
#[cfg(unix)] GetBlockCount, false, true;
277+
#[cfg(unix)] GetBlockCount, true, true;
227278
}
228279
}
229280
}

src/app/sub.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ where
189189
DataTree<OsStringDisplay, Size>: Send,
190190
Size: size::Size + Sync,
191191
{
192-
type HardlinkRecord = &'a dashmap::DashMap<u64, (Size, Vec<PathBuf>)>;
192+
type HardlinkRecord = &'a hook::RecordHardLinkStorage<Size>;
193193
type DeduplicationReport = (); // TODO
194194

195195
fn deduplicate_hardlink_sizes(

src/args.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ pub struct Args {
9494
pub bytes_format: BytesFormat,
9595

9696
/// Detect duplicated hardlinks and remove their sizes from total.
97+
#[cfg(unix)]
9798
#[clap(long)]
9899
pub deduplicate_hardlinks: bool,
99100

src/hook.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,7 @@ impl<Size> Hook<Size> for DoNothing {
3030
#[cfg(unix)]
3131
mod record_hardlink;
3232
#[cfg(unix)]
33-
pub use record_hardlink::*;
33+
pub use record_hardlink::RecordHardLink;
34+
#[cfg(unix)]
35+
#[cfg(feature = "cli")]
36+
pub(crate) use record_hardlink::RecordHardLinkStorage;

src/hook/record_hardlink.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use dashmap::DashMap;
33
use std::{fmt::Debug, os::unix::fs::MetadataExt, path::PathBuf};
44

55
/// Map an inode number to its size and detected paths.
6-
type RecordHardLinkStorage<Size> = DashMap<u64, (Size, Vec<PathBuf>)>; // TODO: benchmark against Mutex<HashMap<u64, (Size, Vec<PathBuf>)>>
6+
pub type RecordHardLinkStorage<Size> = DashMap<u64, (Size, Vec<PathBuf>)>; // TODO: benchmark against Mutex<HashMap<u64, (Size, Vec<PathBuf>)>>
77

88
/// A [hook](Hook) that record files with more than 1 links.
99
#[derive(Debug, Clone, Copy)]

0 commit comments

Comments
 (0)