Skip to content

Commit cc55d02

Browse files
committed
feat!: make Sub executes hardlink deduplication
1 parent 7fe51eb commit cc55d02

4 files changed

Lines changed: 74 additions & 6 deletions

File tree

src/app.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ impl App {
203203
bar_alignment: BarAlignment::from_align_right(align_right),
204204
size_getter: <$size_getter as GetSizeUtils>::INSTANCE,
205205
hook: hook::DoNothing, // TODO: change this
206+
hardlink_record: (), // TODO: change this
206207
reporter: <$size_getter as CreateReporter<$progress>>::create_reporter(report_error),
207208
bytes_format: <$size_getter as GetSizeUtils>::formatter(bytes_format),
208209
files,

src/app/sub.rs

Lines changed: 68 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ where
2121
Report: ParallelReporter<Size> + Sync,
2222
Size: size::Size + Into<u64> + Serialize + Send + Sync,
2323
SizeGetter: GetSize<Size = Size> + Copy + Sync,
24-
Hook: hook::Hook<Size> + Copy + Sync,
24+
Hook: hook::Hook<Size> + DeduplicateHardlinkSizes<Size> + Copy + Sync,
2525
DataTreeReflection<String, Size>: Into<UnitAndTree>,
2626
{
2727
/// List of files and/or directories.
@@ -42,6 +42,8 @@ where
4242
pub size_getter: SizeGetter,
4343
/// Hook to run after [`Self::size_getter`].
4444
pub hook: Hook,
45+
/// Record of detected hardlinks.
46+
pub hardlink_record: Hook::HardlinkRecord,
4547
/// Reports measurement progress.
4648
pub reporter: Report,
4749
/// Minimal size proportion required to appear.
@@ -55,7 +57,7 @@ where
5557
Size: size::Size + Into<u64> + Serialize + Send + Sync,
5658
Report: ParallelReporter<Size> + Sync,
5759
SizeGetter: GetSize<Size = Size> + Copy + Sync,
58-
Hook: hook::Hook<Size> + Copy + Sync,
60+
Hook: hook::Hook<Size> + DeduplicateHardlinkSizes<Size> + Copy + Sync,
5961
DataTreeReflection<String, Size>: Into<UnitAndTree>,
6062
{
6163
/// Run the sub program.
@@ -70,6 +72,7 @@ where
7072
max_depth,
7173
size_getter,
7274
hook,
75+
hardlink_record,
7376
reporter,
7477
min_ratio,
7578
no_sort,
@@ -96,6 +99,7 @@ where
9699
return Sub {
97100
files: vec![".".into()],
98101
hook,
102+
hardlink_record,
99103
reporter,
100104
..self
101105
}
@@ -120,15 +124,17 @@ where
120124
}
121125

122126
let min_ratio: f32 = min_ratio.into();
123-
let data_tree = {
127+
let (data_tree, deduplication_record) = {
124128
let mut data_tree = data_tree;
125129
if min_ratio > 0.0 {
126130
data_tree.par_cull_insignificant_data(min_ratio);
127131
}
128132
if !no_sort {
129133
data_tree.par_sort_by(|left, right| left.size().cmp(&right.size()).reverse());
130134
}
131-
data_tree
135+
let deduplication_record =
136+
Hook::deduplicate_hardlink_sizes(&mut data_tree, hardlink_record);
137+
(data_tree, deduplication_record)
132138
};
133139

134140
GLOBAL_STATUS_BOARD.clear_line(0);
@@ -157,6 +163,64 @@ where
157163
};
158164

159165
print!("{visualizer}"); // visualizer already ends with "\n", println! isn't needed here.
166+
Hook::report_deduplication_results(deduplication_record);
160167
Ok(())
161168
}
162169
}
170+
171+
/// Subroutines used by [`Sub`] to deduplicate sizes of detected hardlinks and report about it.
172+
pub trait DeduplicateHardlinkSizes<Size: size::Size> {
173+
/// Record of detected hardlinks.
174+
type HardlinkRecord;
175+
/// Report created by [`DeduplicateHardlinkSizes::deduplicate_hardlink_sizes`].
176+
type DeduplicationReport;
177+
/// Deduplicate the sizes of detected hardlinks and return a report object.
178+
fn deduplicate_hardlink_sizes(
179+
data_tree: &mut DataTree<OsStringDisplay, Size>,
180+
record: Self::HardlinkRecord,
181+
) -> Self::DeduplicationReport;
182+
/// Handle the report.
183+
fn report_deduplication_results(report: Self::DeduplicationReport);
184+
}
185+
186+
#[cfg(unix)]
187+
impl<'a, Size> DeduplicateHardlinkSizes<Size> for hook::RecordHardLink<'a, Size>
188+
where
189+
DataTree<OsStringDisplay, Size>: Send,
190+
Size: size::Size + Sync,
191+
{
192+
type HardlinkRecord = &'a dashmap::DashMap<u64, (Size, Vec<PathBuf>)>;
193+
type DeduplicationReport = (); // TODO
194+
195+
fn deduplicate_hardlink_sizes(
196+
data_tree: &mut DataTree<OsStringDisplay, Size>,
197+
record: Self::HardlinkRecord,
198+
) -> Self::DeduplicationReport {
199+
use std::path::{Path, PathBuf};
200+
let hardlink_info: Vec<(Size, Vec<PathBuf>)> = record
201+
.iter()
202+
.map(|values| (values.0, values.1.clone()))
203+
.collect();
204+
let hardlink_info: Vec<(Size, Vec<&Path>)> = hardlink_info
205+
.iter()
206+
.map(|(size, paths)| (*size, paths.iter().map(AsRef::as_ref).collect()))
207+
.collect();
208+
data_tree.par_deduplicate_hardlinks(&hardlink_info);
209+
}
210+
fn report_deduplication_results((): Self::DeduplicationReport) {} // TODO
211+
}
212+
213+
impl<Size> DeduplicateHardlinkSizes<Size> for hook::DoNothing
214+
where
215+
DataTree<OsStringDisplay, Size>: Send,
216+
Size: size::Size + Sync,
217+
{
218+
type HardlinkRecord = ();
219+
type DeduplicationReport = ();
220+
fn deduplicate_hardlink_sizes(
221+
_: &mut DataTree<OsStringDisplay, Size>,
222+
_: Self::HardlinkRecord,
223+
) -> Self::DeduplicationReport {
224+
}
225+
fn report_deduplication_results((): Self::DeduplicationReport) {}
226+
}

src/data_tree.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ pub struct DataTree<Name, Size: size::Size> {
3030

3131
mod constructors;
3232
mod getters;
33-
mod hardlink;
3433
mod retain;
3534
mod sort;
35+
36+
#[cfg(unix)] // for now, it is only available on unix
37+
#[cfg(feature = "cli")] // this module export a single function that is only used by cli modules
38+
mod hardlink;

src/data_tree/hardlink.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ where
1111
Size: size::Size + Sync,
1212
{
1313
/// Reduce the size of the directories that have hardlinks.
14-
pub fn par_deduplicate_hardlinks(&mut self, hardlink_info: &[(Size, Vec<&Path>)]) {
14+
pub(crate) fn par_deduplicate_hardlinks(&mut self, hardlink_info: &[(Size, Vec<&Path>)]) {
1515
if hardlink_info.is_empty() {
1616
return;
1717
}

0 commit comments

Comments
 (0)