Skip to content

Commit de0dca8

Browse files
committed
feat!: hardlink deduplications (wip)
1 parent f692b91 commit de0dca8

20 files changed

Lines changed: 405 additions & 30 deletions

Cargo.lock

Lines changed: 78 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@ serde = { version = "^1.0.219", optional = true }
6767
serde_json = { version = "^1.0.140", optional = true }
6868
sysinfo = "^0.35.2"
6969

70+
[target.'cfg(unix)'.dependencies]
71+
dashmap = "^6.1.0"
72+
7073
[dev-dependencies]
7174
build-fs-tree = "^0.7.1"
7275
command-extra = "^1.0.0"

exports/completion.bash

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ _pdu() {
2323

2424
case "${cmd}" in
2525
pdu)
26-
opts="-h -V --json-input --json-output --bytes-format --top-down --align-right --quantity --max-depth --total-width --column-width --min-ratio --no-sort --silent-errors --progress --threads --help --version [FILES]..."
26+
opts="-h -V --json-input --json-output --bytes-format --deduplicate-hardlinks --top-down --align-right --quantity --max-depth --total-width --column-width --min-ratio --no-sort --silent-errors --progress --threads --help --version [FILES]..."
2727
if [[ ${cur} == -* || ${COMP_CWORD} -eq 1 ]] ; then
2828
COMPREPLY=( $(compgen -W "${opts}" -- "${cur}") )
2929
return 0

exports/completion.elv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ set edit:completion:arg-completer[pdu] = {|@words|
2727
cand --threads 'Set the maximum number of threads to spawn. Could be either "auto", "max", or a number'
2828
cand --json-input 'Read JSON data from stdin'
2929
cand --json-output 'Print JSON data instead of an ASCII chart'
30+
cand --deduplicate-hardlinks 'Detect duplicated hardlinks and remove their sizes from total'
3031
cand --top-down 'Print the tree top-down instead of bottom-up'
3132
cand --align-right 'Set the root of the bars to the right'
3233
cand --no-sort 'Preserve order of entries'

exports/completion.fish

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ complete -c pdu -l min-ratio -d 'Minimal size proportion required to appear' -r
1111
complete -c pdu -l threads -d 'Set the maximum number of threads to spawn. Could be either "auto", "max", or a number' -r
1212
complete -c pdu -l json-input -d 'Read JSON data from stdin'
1313
complete -c pdu -l json-output -d 'Print JSON data instead of an ASCII chart'
14+
complete -c pdu -l deduplicate-hardlinks -d 'Detect duplicated hardlinks and remove their sizes from total'
1415
complete -c pdu -l top-down -d 'Print the tree top-down instead of bottom-up'
1516
complete -c pdu -l align-right -d 'Set the root of the bars to the right'
1617
complete -c pdu -l no-sort -d 'Preserve order of entries'

exports/completion.ps1

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ Register-ArgumentCompleter -Native -CommandName 'pdu' -ScriptBlock {
3030
[CompletionResult]::new('--threads', '--threads', [CompletionResultType]::ParameterName, 'Set the maximum number of threads to spawn. Could be either "auto", "max", or a number')
3131
[CompletionResult]::new('--json-input', '--json-input', [CompletionResultType]::ParameterName, 'Read JSON data from stdin')
3232
[CompletionResult]::new('--json-output', '--json-output', [CompletionResultType]::ParameterName, 'Print JSON data instead of an ASCII chart')
33+
[CompletionResult]::new('--deduplicate-hardlinks', '--deduplicate-hardlinks', [CompletionResultType]::ParameterName, 'Detect duplicated hardlinks and remove their sizes from total')
3334
[CompletionResult]::new('--top-down', '--top-down', [CompletionResultType]::ParameterName, 'Print the tree top-down instead of bottom-up')
3435
[CompletionResult]::new('--align-right', '--align-right', [CompletionResultType]::ParameterName, 'Set the root of the bars to the right')
3536
[CompletionResult]::new('--no-sort', '--no-sort', [CompletionResultType]::ParameterName, 'Preserve order of entries')

exports/completion.zsh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ block-count\:"Count numbers of blocks"))' \
2828
'--threads=[Set the maximum number of threads to spawn. Could be either "auto", "max", or a number]:THREADS:_default' \
2929
'(--quantity)--json-input[Read JSON data from stdin]' \
3030
'--json-output[Print JSON data instead of an ASCII chart]' \
31+
'--deduplicate-hardlinks[Detect duplicated hardlinks and remove their sizes from total]' \
3132
'--top-down[Print the tree top-down instead of bottom-up]' \
3233
'--align-right[Set the root of the bars to the right]' \
3334
'--no-sort[Preserve order of entries]' \

src/app.rs

Lines changed: 75 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use crate::{
66
args::{Args, Quantity, Threads},
77
bytes_format::BytesFormat,
88
get_size::{GetApparentSize, GetSize},
9+
hook,
910
json_data::{JsonData, UnitAndTree},
1011
reporter::{ErrorOnlyReporter, ErrorReport, ProgressAndErrorReporter, ProgressReport},
1112
runtime_error::RuntimeError,
@@ -179,15 +180,54 @@ impl App {
179180
}
180181
}
181182

183+
trait HardlinkDeduplicationSystem<const DEDUPLICATE_HARDLINKS: bool>: GetSizeUtils {
184+
type Hook: hook::Hook<Self::Size> + sub::DeduplicateHardlinkSizes<Self::Size>;
185+
fn create_hook(
186+
record: <Self::Hook as sub::DeduplicateHardlinkSizes<Self::Size>>::HardlinkRecord,
187+
) -> Self::Hook;
188+
fn init_hardlink_record(
189+
) -> <Self::Hook as sub::DeduplicateHardlinkSizes<Self::Size>>::HardlinkRecord;
190+
}
191+
192+
impl<SizeGetter> HardlinkDeduplicationSystem<false> for SizeGetter
193+
where
194+
SizeGetter: GetSizeUtils,
195+
SizeGetter::Size: Send + Sync,
196+
{
197+
type Hook = hook::DoNothing;
198+
fn create_hook((): ()) -> Self::Hook {
199+
hook::DoNothing
200+
}
201+
fn init_hardlink_record() {}
202+
}
203+
204+
#[cfg(unix)]
205+
impl<SizeGetter> HardlinkDeduplicationSystem<true> for SizeGetter
206+
where
207+
SizeGetter: GetSizeUtils,
208+
SizeGetter::Size: Send + Sync + 'static,
209+
{
210+
type Hook = hook::RecordHardLink<'static, Self::Size>;
211+
fn create_hook(record: &'static hook::RecordHardLinkStorage<Self::Size>) -> Self::Hook {
212+
hook::RecordHardLink::new(record)
213+
}
214+
fn init_hardlink_record() -> &'static hook::RecordHardLinkStorage<Self::Size> {
215+
hook::RecordHardLinkStorage::new()
216+
.pipe(Box::new)
217+
.pipe(Box::leak)
218+
}
219+
}
220+
182221
macro_rules! run {
183222
($(
184223
$(#[$variant_attrs:meta])*
185-
$size_getter:ident, $progress:literal;
224+
$size_getter:ident, $progress:literal, $deduplicate_hardlinks:ident;
186225
)*) => { match self.args {$(
187226
$(#[$variant_attrs])*
188227
Args {
189228
quantity: <$size_getter as GetSizeUtils>::QUANTITY,
190229
progress: $progress,
230+
#[cfg(unix)] deduplicate_hardlinks: $deduplicate_hardlinks,
191231
files,
192232
json_output,
193233
bytes_format,
@@ -197,30 +237,44 @@ impl App {
197237
min_ratio,
198238
no_sort,
199239
..
200-
} => Sub {
201-
direction: Direction::from_top_down(top_down),
202-
bar_alignment: BarAlignment::from_align_right(align_right),
203-
size_getter: <$size_getter as GetSizeUtils>::INSTANCE,
204-
reporter: <$size_getter as CreateReporter<$progress>>::create_reporter(report_error),
205-
bytes_format: <$size_getter as GetSizeUtils>::formatter(bytes_format),
206-
files,
207-
json_output,
208-
column_width_distribution,
209-
max_depth,
210-
min_ratio,
211-
no_sort,
212-
}
213-
.run(),
240+
} => {
241+
const DEDUPLICATE_HARDLINKS: bool = cfg!(unix) && $deduplicate_hardlinks;
242+
let hardlink_record = <$size_getter as HardlinkDeduplicationSystem<DEDUPLICATE_HARDLINKS>>::init_hardlink_record();
243+
let hook = <$size_getter as HardlinkDeduplicationSystem<DEDUPLICATE_HARDLINKS>>::create_hook(hardlink_record);
244+
245+
Sub {
246+
direction: Direction::from_top_down(top_down),
247+
bar_alignment: BarAlignment::from_align_right(align_right),
248+
size_getter: <$size_getter as GetSizeUtils>::INSTANCE,
249+
hook,
250+
hardlink_record,
251+
reporter: <$size_getter as CreateReporter<$progress>>::create_reporter(report_error),
252+
bytes_format: <$size_getter as GetSizeUtils>::formatter(bytes_format),
253+
files,
254+
json_output,
255+
column_width_distribution,
256+
max_depth,
257+
min_ratio,
258+
no_sort,
259+
}
260+
.run()
261+
},
214262
)*} };
215263
}
216264

217265
run! {
218-
GetApparentSize, false;
219-
GetApparentSize, true;
220-
#[cfg(unix)] GetBlockSize, false;
221-
#[cfg(unix)] GetBlockSize, true;
222-
#[cfg(unix)] GetBlockCount, false;
223-
#[cfg(unix)] GetBlockCount, true;
266+
GetApparentSize, false, false;
267+
GetApparentSize, true, false;
268+
#[cfg(unix)] GetBlockSize, false, false;
269+
#[cfg(unix)] GetBlockSize, true, false;
270+
#[cfg(unix)] GetBlockCount, false, false;
271+
#[cfg(unix)] GetBlockCount, true, false;
272+
#[cfg(unix)] GetApparentSize, false, true;
273+
#[cfg(unix)] GetApparentSize, true, true;
274+
#[cfg(unix)] GetBlockSize, false, true;
275+
#[cfg(unix)] GetBlockSize, true, true;
276+
#[cfg(unix)] GetBlockCount, false, true;
277+
#[cfg(unix)] GetBlockCount, true, true;
224278
}
225279
}
226280
}

0 commit comments

Comments
 (0)