Skip to content

Commit e7e5b5c

Browse files
committed
fix(hardlink)!: include dev in ReflectionEntry and JSON schema
The Copilot PR kept dev out of the reflection/JSON layer, introducing a dev=0 placeholder hack, double-allocation sorting, and "unsupported edge case" disclaimers. Since the reflection is meant to mirror the internal HardlinkList, just include dev in ReflectionEntry. This simplifies From<HardlinkList> back to a single-pass map+collect, removes the dev=0 workaround in TryFrom<Reflection>, and makes multi-filesystem JSON round-tripping correct. Bump SCHEMA_VERSION to 2026-04-02 for the new field. https://claude.ai/code/session_01QP9wZyoZcGmJsEsA66ZRok
1 parent 65194d6 commit e7e5b5c

8 files changed

Lines changed: 88 additions & 42 deletions

File tree

src/hardlink/hardlink_list.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ use std::path::Path;
2727
/// the same inode number. Both du-dust and dua-cli track `(device, inode)` pairs
2828
/// for the same reason.
2929
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
30-
struct InodeKey {
30+
pub(crate) struct InodeKey {
3131
/// Device number of the filesystem the inode belongs to.
3232
dev: u64,
3333
/// Inode number within the device.

src/hardlink/hardlink_list/iter.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,12 @@ impl<'a, Size> Iterator for Iter<'a, Size> {
3030
}
3131

3232
impl<'a, Size> Item<'a, Size> {
33+
/// The device number of the filesystem the inode belongs to.
34+
#[inline]
35+
pub fn dev(&self) -> u64 {
36+
self.0.key().dev
37+
}
38+
3339
/// The inode number of the file.
3440
#[inline]
3541
pub fn ino(&self) -> InodeNumber {

src/hardlink/hardlink_list/reflection.rs

Lines changed: 17 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,8 @@ use serde::{Deserialize, Serialize};
1212
/// internal content.
1313
///
1414
/// **Guarantees:**
15-
/// * Every `(device, inode)` pair is unique within the scope of a single scan, but inode
16-
/// numbers alone are **not** guaranteed to be unique: when scanning multiple filesystems,
17-
/// two unrelated files on different devices can share the same inode number and will each
18-
/// produce a separate entry. The reflection stores only the inode number (the JSON format
19-
/// does not carry device information), so round-tripping a multi-filesystem scan through
20-
/// JSON is an unsupported edge case.
21-
/// * The internal list is always sorted by inode numbers (and by device number as a
22-
/// tie-breaker when two entries share the same inode number).
15+
/// * Every `(device, inode)` pair is unique.
16+
/// * The internal list is always sorted by inode numbers (with device number as tie-breaker).
2317
///
2418
/// **Equality:** `Reflection` implements `PartialEq` and `Eq` traits.
2519
///
@@ -54,6 +48,8 @@ impl<Size> Reflection<Size> {
5448
#[derive(Debug, Clone, PartialEq, Eq)]
5549
#[cfg_attr(feature = "json", derive(Deserialize, Serialize))]
5650
pub struct ReflectionEntry<Size> {
51+
/// Device number of the filesystem the inode belongs to.
52+
pub dev: u64,
5753
/// The inode number of the file.
5854
pub ino: InodeNumber,
5955
/// Size of the file.
@@ -67,51 +63,46 @@ pub struct ReflectionEntry<Size> {
6763
impl<Size> ReflectionEntry<Size> {
6864
/// Create a new entry.
6965
#[inline]
70-
fn new(ino: InodeNumber, Value { size, links, paths }: Value<Size>) -> Self {
66+
fn new(InodeKey { dev, ino }: InodeKey, Value { size, links, paths }: Value<Size>) -> Self {
7167
let paths = paths.into();
7268
ReflectionEntry {
69+
dev,
7370
ino,
7471
size,
7572
links,
7673
paths,
7774
}
7875
}
7976

80-
/// Dissolve [`ReflectionEntry`] into a pair of [`InodeNumber`] and [`Value`].
77+
/// Dissolve [`ReflectionEntry`] into a pair of [`InodeKey`] and [`Value`].
8178
#[inline]
82-
fn dissolve(self) -> (InodeNumber, Value<Size>) {
79+
fn dissolve(self) -> (InodeKey, Value<Size>) {
8380
let ReflectionEntry {
81+
dev,
8482
ino,
8583
size,
8684
links,
8785
paths,
8886
} = self;
8987
let paths = paths.into();
90-
(ino, Value { size, links, paths })
88+
(InodeKey { dev, ino }, Value { size, links, paths })
9189
}
9290
}
9391

9492
impl<Size> From<Vec<ReflectionEntry<Size>>> for Reflection<Size> {
95-
/// Sort the list by inode numbers, then create the reflection.
93+
/// Sort the list by `(inode, device)`, then create the reflection.
9694
fn from(list: Vec<ReflectionEntry<Size>>) -> Self {
97-
list.into_sorted_unstable_by_key(|entry| u64::from(entry.ino))
95+
list.into_sorted_unstable_by_key(|entry| (u64::from(entry.ino), entry.dev))
9896
.pipe(Reflection)
9997
}
10098
}
10199

102100
impl<Size> From<HardlinkList<Size>> for Reflection<Size> {
103101
fn from(HardlinkList(list): HardlinkList<Size>) -> Self {
104-
// Collect to a vec, sort by (ino, dev) for a stable, deterministic order, then
105-
// strip dev before wrapping. Sorting here (with dev still available) avoids the
106-
// nondeterminism that would arise from an unstable sort on ino alone when two
107-
// entries from different filesystems share the same inode number.
108-
let mut pairs: Vec<(InodeKey, Value<Size>)> = list.into_iter().collect();
109-
pairs.sort_unstable_by_key(|(key, _)| (u64::from(key.ino), key.dev));
110-
pairs
111-
.into_iter()
112-
.map(|(key, value)| ReflectionEntry::new(key.ino, value))
102+
list.into_iter()
103+
.map(|(key, value)| ReflectionEntry::new(key, value))
113104
.collect::<Vec<_>>()
114-
.pipe(Reflection)
105+
.pipe(Reflection::from)
115106
}
116107
}
117108

@@ -131,17 +122,9 @@ impl<Size> TryFrom<Reflection<Size>> for HardlinkList<Size> {
131122
let map = DashMap::with_capacity(entries.len());
132123

133124
for entry in entries {
134-
let (ino, value) = entry.dissolve();
135-
// Device number is unknown when loading from a reflection (e.g. JSON input);
136-
// use dev=0 as a placeholder. This means that when reloading JSON output that
137-
// was produced by scanning multiple filesystems, files from different devices
138-
// sharing the same inode number cannot be distinguished and therefore cannot
139-
// all be represented. Such duplicates cause a ConversionError::DuplicatedInode
140-
// and are treated as an unsupported edge case, since the JSON format does not
141-
// carry device information.
142-
let key = InodeKey { dev: 0, ino };
125+
let (key, value) = entry.dissolve();
143126
if map.insert(key, value).is_some() {
144-
return ino.pipe(ConversionError::DuplicatedInode).pipe(Err);
127+
return key.ino.pipe(ConversionError::DuplicatedInode).pipe(Err);
145128
}
146129
}
147130

src/hardlink/hardlink_list/test.rs

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -174,12 +174,14 @@ fn same_ino_on_different_devices_are_treated_separately() {
174174
assert_eq!(list.len(), 2, "expected one entry per (dev, ino) pair");
175175

176176
let reflection = list.into_reflection();
177-
// Both entries expose ino=100 in the reflection (device is not part of the
178-
// public JSON format), so there are still 2 entries in the vector.
179177
assert_eq!(reflection.len(), 2);
180178

181-
// Paths are grouped per (dev, ino): each group has exactly 2 paths.
182-
for entry in reflection.iter() {
183-
assert_eq!(entry.paths.len(), 2);
184-
}
179+
// Sorted by (ino, dev), so dev=1 comes first.
180+
let entries: Vec<_> = reflection.iter().collect();
181+
assert_eq!(entries[0].dev, 1);
182+
assert_eq!(entries[0].ino, 100.into());
183+
assert_eq!(entries[0].paths.len(), 2);
184+
assert_eq!(entries[1].dev, 2);
185+
assert_eq!(entries[1].ino, 100.into());
186+
assert_eq!(entries[1].paths.len(), 2);
185187
}

src/json_data/schema_version.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use serde::{Deserialize, Serialize};
66
use std::convert::TryFrom;
77

88
/// Content of [`SchemaVersion`].
9-
pub const SCHEMA_VERSION: &str = "2024-11-02";
9+
pub const SCHEMA_VERSION: &str = "2026-04-02";
1010

1111
/// Verifying schema version.
1212
#[derive(Debug, Clone, Copy)]

tests/_utils.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -583,3 +583,12 @@ pub fn read_inode_number(path: &Path) -> u64 {
583583
.unwrap_or_else(|error| panic!("Can't read metadata at {path:?}: {error}"))
584584
.ino()
585585
}
586+
587+
/// Read [dev](std::os::unix::fs::MetadataExt::dev) of a path.
588+
#[cfg(unix)]
589+
pub fn read_device_number(path: &Path) -> u64 {
590+
use std::os::unix::fs::MetadataExt;
591+
path.pipe(symlink_metadata)
592+
.unwrap_or_else(|error| panic!("Can't read metadata at {path:?}: {error}"))
593+
.dev()
594+
}

tests/hardlinks_deduplication.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ fn simple_tree_with_some_hardlinks() {
8585
.pipe(InodeNumber::from)
8686
};
8787

88+
let dev = read_device_number(&workspace);
89+
8890
let shared_paths = |suffices: &[&str]| {
8991
suffices
9092
.iter()
@@ -190,6 +192,7 @@ fn simple_tree_with_some_hardlinks() {
190192
.collect();
191193
let expected_shared_details = [
192194
ReflectionEntry {
195+
dev,
193196
ino: file_inode("one-internal-hardlink.txt"),
194197
size: file_size("one-internal-hardlink.txt"),
195198
links: 1 + 1,
@@ -199,6 +202,7 @@ fn simple_tree_with_some_hardlinks() {
199202
]),
200203
},
201204
ReflectionEntry {
205+
dev,
202206
ino: file_inode("two-internal-hardlinks.txt"),
203207
size: file_size("two-internal-hardlinks.txt"),
204208
links: 1 + 2,
@@ -209,12 +213,14 @@ fn simple_tree_with_some_hardlinks() {
209213
]),
210214
},
211215
ReflectionEntry {
216+
dev,
212217
ino: file_inode("one-external-hardlink.txt"),
213218
size: file_size("one-external-hardlink.txt"),
214219
links: 1 + 1,
215220
paths: shared_paths(&["sources/one-external-hardlink.txt"]),
216221
},
217222
ReflectionEntry {
223+
dev,
218224
ino: file_inode("one-internal-one-external-hardlinks.txt"),
219225
size: file_size("one-internal-one-external-hardlinks.txt"),
220226
links: 1 + 1 + 1,
@@ -338,6 +344,8 @@ fn multiple_hardlinks_to_a_single_file() {
338344
.pipe_as_ref(read_inode_number)
339345
.pipe(InodeNumber::from);
340346

347+
let dev = read_device_number(&workspace);
348+
341349
let actual_size = tree.size;
342350
let expected_size = workspace
343351
.pipe_as_ref(read_apparent_size)
@@ -374,6 +382,7 @@ fn multiple_hardlinks_to_a_single_file() {
374382
.cloned()
375383
.collect();
376384
let expected_shared_details = [ReflectionEntry {
385+
dev,
377386
ino: file_inode,
378387
size: file_size,
379388
links: 1 + links,
@@ -470,6 +479,8 @@ fn complex_tree_with_shared_and_unique_files() {
470479
.pipe(Bytes::new)
471480
};
472481

482+
let dev = read_device_number(&workspace);
483+
473484
let actual_size = tree.size;
474485

475486
// The following formula treat the first file as "real" and
@@ -562,6 +573,7 @@ fn complex_tree_with_shared_and_unique_files() {
562573
.find(|item| starts_with_path(item, "some-hardlinks/file-0.txt"))
563574
.cloned();
564575
let expected = Some(ReflectionEntry {
576+
dev,
565577
ino: workspace
566578
.join("some-hardlinks/file-0.txt")
567579
.pipe_as_ref(read_inode_number)
@@ -591,6 +603,7 @@ fn complex_tree_with_shared_and_unique_files() {
591603
.find(|item| starts_with_path(item, &format!("some-hardlinks/file-{file_index}.txt")))
592604
.cloned();
593605
let expected = Some(ReflectionEntry {
606+
dev,
594607
ino: workspace
595608
.join(format!("some-hardlinks/file-{file_index}.txt"))
596609
.pipe_as_ref(read_inode_number)
@@ -695,6 +708,8 @@ fn hardlinks_and_non_hardlinks() {
695708
.pipe(InodeNumber::from)
696709
};
697710

711+
let dev = read_device_number(&workspace);
712+
698713
let shared_paths = |file_names: &[&str]| {
699714
file_names
700715
.iter()
@@ -717,37 +732,43 @@ fn hardlinks_and_non_hardlinks() {
717732
.collect();
718733
let expected_shared_details = [
719734
ReflectionEntry {
735+
dev,
720736
ino: file_inode("file-0.txt"),
721737
size: file_size,
722738
links: 3,
723739
paths: shared_paths(&["file-0.txt", "link0-file0.txt", "link1-file0.txt"]),
724740
},
725741
ReflectionEntry {
742+
dev,
726743
ino: file_inode("file-1.txt"),
727744
size: file_size,
728745
links: 2,
729746
paths: shared_paths(&["file-1.txt", "link0-file1.txt"]),
730747
},
731748
// ... file-2.txt and file-3.txt don't have hardlinks so they shouldn't appear here ...
732749
ReflectionEntry {
750+
dev,
733751
ino: file_inode("file-4.txt"),
734752
size: file_size,
735753
links: 2,
736754
paths: shared_paths(&["file-4.txt"]),
737755
},
738756
ReflectionEntry {
757+
dev,
739758
ino: file_inode("file-5.txt"),
740759
size: file_size,
741760
links: 2,
742761
paths: shared_paths(&["file-5.txt"]),
743762
},
744763
ReflectionEntry {
764+
dev,
745765
ino: file_inode("file-6.txt"),
746766
size: file_size,
747767
links: 2,
748768
paths: shared_paths(&["file-6.txt"]),
749769
},
750770
ReflectionEntry {
771+
dev,
751772
ino: file_inode("file-7.txt"),
752773
size: file_size,
753774
links: 2,
@@ -898,6 +919,8 @@ fn exclusive_hardlinks_only() {
898919
.pipe(InodeNumber::from)
899920
};
900921

922+
let dev = read_device_number(&workspace);
923+
901924
let shared_paths = |file_names: &[&str]| {
902925
file_names
903926
.iter()
@@ -921,6 +944,7 @@ fn exclusive_hardlinks_only() {
921944
let expected_shared_details = (0..files_per_branch)
922945
.par_bridge()
923946
.map(|index| ReflectionEntry {
947+
dev,
924948
ino: file_inode(&format!("file-{index}.txt")),
925949
size: file_size,
926950
links: 2,
@@ -1024,6 +1048,8 @@ fn exclusive_only_and_external_only_hardlinks() {
10241048
.pipe(InodeNumber::from)
10251049
};
10261050

1051+
let dev = read_device_number(&workspace);
1052+
10271053
let shared_paths = |file_names: &[&str]| {
10281054
file_names
10291055
.iter()
@@ -1050,6 +1076,7 @@ fn exclusive_only_and_external_only_hardlinks() {
10501076
(0..(files_per_branch / 2))
10511077
.par_bridge()
10521078
.map(|index| ReflectionEntry {
1079+
dev,
10531080
ino: file_inode(&format!("link0-{index}.txt")),
10541081
size: file_size,
10551082
links: 2,
@@ -1060,6 +1087,7 @@ fn exclusive_only_and_external_only_hardlinks() {
10601087
((files_per_branch / 2)..files_per_branch)
10611088
.par_bridge()
10621089
.map(|index| ReflectionEntry {
1090+
dev,
10631091
ino: file_inode(&format!("link0-{index}.txt")),
10641092
size: file_size,
10651093
links: 2,
@@ -1187,6 +1215,8 @@ fn external_hardlinks_only() {
11871215
.pipe(InodeNumber::from)
11881216
};
11891217

1218+
let dev = read_device_number(&workspace);
1219+
11901220
let shared_paths = |file_names: &[&str]| {
11911221
file_names
11921222
.iter()
@@ -1210,6 +1240,7 @@ fn external_hardlinks_only() {
12101240
let expected_shared_details = (0..files_per_branch)
12111241
.par_bridge()
12121242
.map(|index| ReflectionEntry {
1243+
dev,
12131244
ino: file_inode(&format!("linkX-{index}.txt")),
12141245
size: file_size,
12151246
links: 2,

0 commit comments

Comments
 (0)